| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- package ast
- import (
- "fmt"
- multierror "github.com/hashicorp/go-multierror"
- )
- // ============================================================================
- // This file contains:
- // Lexing (string -> []token) for V2 of allocation filters
- // ============================================================================
- //
- // See parser.go for a formal grammar and external links.
- type tokenKind int
- const (
- colon tokenKind = iota // ':'
- comma // ','
- plus // '+'
- or // '|'
- bangColon // '!:'
- tildeColon // '~:'
- bangTildeColon // '!~:'
- startTildeColon // '<~:'
- bangStartTildeColon // '!<~:'
- tildeEndColon // '~>:'
- bangTildeEndColon // '!~>:'
- parenOpen // '('
- parenClose // ')'
- str // '"foo"'
- filterField // 'namespace', 'cluster'
- mapField // 'label', 'annotation'
- keyedAccess // '[app]', '[foo]', etc.
- identifier // K8s valid name + sanitized Prom: 'app', 'abc_label'
- eof
- )
- func (tk tokenKind) String() string {
- switch tk {
- case colon:
- return "colon"
- case comma:
- return "comma"
- case plus:
- return "plus"
- case or:
- return "or"
- case bangColon:
- return "bangColon"
- case tildeColon:
- return "tildeColon"
- case bangTildeColon:
- return "bangTildeColon"
- case startTildeColon:
- return "startTildeColon"
- case bangStartTildeColon:
- return "bangStartTildeColon"
- case tildeEndColon:
- return "tildeEndColon"
- case bangTildeEndColon:
- return "bangTildeEndColon"
- case parenOpen:
- return "parenOpen"
- case parenClose:
- return "parenClose"
- case str:
- return "str"
- case filterField:
- return "filterField1"
- case mapField:
- return "filterField2"
- case keyedAccess:
- return "keyedAccess"
- case identifier:
- return "identifier"
- case eof:
- return "eof"
- default:
- return fmt.Sprintf("Unspecified: %d", tk)
- }
- }
- // ============================================================================
- // Lexer/Scanner
- //
- // Based on the Scanner class in Chapter 4: Scanning of Crafting Interpreters by
- // Robert Nystrom
- // ============================================================================
- type token struct {
- kind tokenKind
- s string
- }
- func (t token) String() string {
- return fmt.Sprintf("%s:%s", t.kind, t.s)
- }
- type scanner struct {
- source string
- tokens []token
- errors []error
- fields map[string]*Field
- mapFields map[string]*Field
- lexemeStartByte int
- nextByte int
- }
- func (s *scanner) scanTokens() {
- for !s.atEnd() {
- s.lexemeStartByte = s.nextByte
- s.scanToken()
- }
- s.tokens = append(s.tokens, token{kind: eof})
- }
- func (s scanner) atEnd() bool {
- return s.nextByte >= len(s.source)
- }
- // advance returns a byte because we only accept ASCII, which has to fit in a
- // byte
- //
- // TODO: If we add unicode support, advance() will probably have to return a
- // rune.
- func (s *scanner) advance() byte {
- b := s.source[s.nextByte]
- s.nextByte += 1
- return b
- }
- func (s *scanner) match(expected byte) bool {
- if s.atEnd() {
- return false
- }
- if s.source[s.nextByte] != expected {
- return false
- }
- s.nextByte += 1
- return true
- }
- func (s *scanner) addToken(kind tokenKind) {
- lexemeString := s.source[s.lexemeStartByte:s.nextByte]
- switch kind {
- // Eliminate surrounding characters like " and []
- case str, keyedAccess:
- lexemeString = lexemeString[1 : len(lexemeString)-1]
- }
- s.tokens = append(s.tokens, token{
- kind: kind,
- s: lexemeString,
- })
- }
- func (s *scanner) peek() byte {
- if s.atEnd() {
- return 0
- }
- return s.source[s.nextByte]
- }
- func (s *scanner) scanToken() {
- c := s.advance()
- switch c {
- case ':':
- s.addToken(colon)
- case ',':
- s.addToken(comma)
- case '+':
- s.addToken(plus)
- case '|':
- s.addToken(or)
- case '!':
- if s.match(':') {
- s.addToken(bangColon)
- } else if s.match('~') {
- if s.match(':') {
- s.addToken(bangTildeColon)
- } else if s.match('>') {
- if s.match(':') {
- s.addToken(bangTildeEndColon)
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '>'", s.nextByte-1))
- }
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
- }
- } else if s.match('<') {
- if s.match('~') {
- if s.match(':') {
- s.addToken(bangStartTildeColon)
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
- }
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '<'", s.nextByte-1))
- }
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '!'", s.nextByte-1))
- }
- case '(':
- s.addToken(parenOpen)
- case ')':
- s.addToken(parenClose)
- case '<':
- if s.match('~') {
- if s.match(':') {
- s.addToken(startTildeColon)
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
- }
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '<'", s.nextByte-1))
- }
- case '~':
- if s.match(':') {
- s.addToken(tildeColon)
- } else if s.match('>') {
- if s.match(':') {
- s.addToken(tildeEndColon)
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '>'", s.nextByte-1))
- }
- } else {
- s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
- }
- // strings
- case '"':
- s.string()
- // keyed access
- case '[':
- s.keyedAccess()
- // Ignore whitespace chars outside of "" and [].
- case ' ', '\t', '\n', '\r':
- break
- default:
- // identifiers
- //
- // We can keep it simple and not _force_ the first character to be a
- // non-number because we don't need numbers in this language. If we need
- // to extend the language to support numbers, this has to become just
- // isAlpha() and then s.identifier() will use isIdentifierChar() in
- // its main loop.
- if isIdentifierChar(c) {
- s.identifier()
- break
- }
- // TODO: We could return a more exact error message for Unicode chars if
- // we added extra handling:
- // https://stackoverflow.com/questions/53069040/checking-a-string-contains-only-ascii-characters
- s.errors = append(s.errors, fmt.Errorf("unexpected character/byte at position %d. Please avoid Unicode.", s.nextByte-1))
- }
- }
- // isIdentifierChar should match Kubernetes-supported name characters.
- //
- // https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
- //
- // TODO: This may not match all characters we support for cluster IDs (it may be
- // the case that cluster IDs can contain UTF-8 characters).
- func isIdentifierChar(b byte) bool {
- return (b >= '0' && b <= '9') || // 0-9
- (b >= 'A' && b <= 'Z') || // A-Z
- (b >= 'a' && b <= 'z') || // a-z
- b == '-' || // hyphens are allowed according to K8s spec
- b == '_' // underscores are allowed because of Prometheus sanitization
- }
- func (s *scanner) string() {
- for s.peek() != '"' && !s.atEnd() {
- s.advance()
- }
- if s.atEnd() {
- s.errors = append(s.errors, fmt.Errorf("unterminated string starting at %d", s.lexemeStartByte))
- return
- }
- // Consume closing '"'
- s.advance()
- s.addToken(str)
- }
- func (s *scanner) keyedAccess() {
- for s.peek() != ']' && !s.atEnd() {
- s.advance()
- }
- if s.atEnd() {
- s.errors = append(s.errors, fmt.Errorf("unterminated access starting at %d", s.lexemeStartByte))
- return
- }
- // Consume closing ']'
- s.advance()
- s.addToken(keyedAccess)
- }
- func (s *scanner) identifier() {
- for isIdentifierChar(s.peek()) {
- s.advance()
- }
- tokenText := s.source[s.lexemeStartByte:s.nextByte]
- if _, ok := s.fields[tokenText]; ok {
- s.addToken(filterField)
- } else if _, ok := s.mapFields[tokenText]; ok {
- s.addToken(mapField)
- } else {
- s.addToken(identifier)
- }
- }
- // lex will generate a slice of tokens provided a raw string and the filter field definitions
- func lex(raw string, fields map[string]*Field, mapFields map[string]*Field) ([]token, error) {
- s := scanner{
- source: raw,
- fields: fields,
- mapFields: mapFields,
- }
- s.scanTokens()
- if len(s.errors) > 0 {
- return s.tokens, multierror.Append(nil, s.errors...)
- }
- return s.tokens, nil
- }
|