parser.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. // allocationfilterutil provides functionality for parsing V2 of the Kubecost
  2. // filter language for Allocation types.
  3. //
  4. // e.g. "filter=namespace:kubecost+controllerkind:deployment"
  5. package ast
  6. import (
  7. "fmt"
  8. "github.com/hashicorp/go-multierror"
  9. )
  10. // The grammar is approximately as follows:
  11. //
  12. // <filter> ::= <filter-element> (<group-op> <filter-element>)*
  13. // <filter-element> ::= <comparison> | <group-filter>
  14. // <group-filter> ::= '(' <filter> ')'
  15. // <group-op> ::= '+' | '|'
  16. // <comparison> ::= <filter-key> <filter-op> <filter-value>
  17. // <filter-key> ::= <map-field> <keyed-access> | <filter-field>
  18. // <filter-op> ::= ':' | '!:' | '~:' | '!~:' | '<~:' | '!<~:' | '~>:' | '!~>:'
  19. // <filter-value> ::= '"' <identifier> '"' (',' <filter-value>)*
  20. // <keyed-access> ::= '[' <identifier> ']'
  21. // <map-field> ::= --- (fields passed into lexer)
  22. // <filter-field> ::= --- (fields passed into lexer)
  23. // <identifier> ::= --- valid K8s name or Prom-sanitized K8s name
  24. // ============================================================================
  25. // Parser
  26. //
  27. // Based on the Parser class in Chapter 6: Parsing Expressions of Crafting
  28. // Interpreters by Robert Nystrom
  29. // ============================================================================
  30. // parseError produces error messages tailored to the needs of the parser
  31. func parseError(t token, message string) error {
  32. if t.kind == eof {
  33. return fmt.Errorf("at end: %s", message)
  34. }
  35. return fmt.Errorf("at '%s': %s", t.s, message)
  36. }
  37. type parser struct {
  38. tokens []token
  39. current int
  40. fields map[string]*Field
  41. mapFields map[string]*Field
  42. }
  43. // ----------------------------------------------------------------------------
  44. // Parser helper methods for token handling
  45. // ----------------------------------------------------------------------------
  46. func (p *parser) atEnd() bool {
  47. return p.peek().kind == eof
  48. }
  49. func (p *parser) advance() token {
  50. if !p.atEnd() {
  51. p.current += 1
  52. }
  53. return p.previous()
  54. }
  55. func (p *parser) previous() token {
  56. return p.tokens[p.current-1]
  57. }
  58. // match return true and advances the parser by one token if the next token has
  59. // a kind that matches one of the arguments. Otherwise, it returns false and
  60. // DOES NOT advance the parser.
  61. func (p *parser) match(tokenKinds ...tokenKind) bool {
  62. for _, kind := range tokenKinds {
  63. if p.check(kind) {
  64. p.advance()
  65. return true
  66. }
  67. }
  68. return false
  69. }
  70. // check returns true iff the next token matches the provided kind.
  71. func (p *parser) check(tk tokenKind) bool {
  72. if p.atEnd() {
  73. return false
  74. }
  75. return p.peek().kind == tk
  76. }
  77. func (p *parser) peek() token {
  78. return p.tokens[p.current]
  79. }
  80. // consume is a "next token must be this kind" method. If the next token is of
  81. // the correct kind, the parser is advanced and that token is returned. If it
  82. // is not of the correct kind, a parse error is returned and the parser is NOT
  83. // advanced.
  84. func (p *parser) consume(tk tokenKind, message string) (token, error) {
  85. if p.check(tk) {
  86. return p.advance(), nil
  87. }
  88. return token{}, parseError(p.peek(), message)
  89. }
  90. // synchronize attempts to skip forward until the next tokenKind, indicating the
  91. // start of a new (plus, or, or parenClose).
  92. func (p *parser) synchronize(tokens ...tokenKind) {
  93. if len(tokens) == 0 {
  94. return
  95. }
  96. for !p.atEnd() {
  97. kind := p.peek().kind
  98. for _, token := range tokens {
  99. if kind == token {
  100. return
  101. }
  102. }
  103. p.advance()
  104. }
  105. }
  106. // ----------------------------------------------------------------------------
  107. // Parser grammar rules as recursive descent methods
  108. // ----------------------------------------------------------------------------
  109. // filter is the main method of the parser. It turns the token stream into an
  110. // FilterNode tree, reporting parse errors that occurred along the way. The depth
  111. // parameter is the number of edges from the node to the tree's root node, which
  112. // is initially 0. As we recurse into the tree, the depth will increase.
  113. func (p *parser) filter(depth int) (FilterNode, error) {
  114. var errs *multierror.Error
  115. // ----------------------------------------------------------------------------
  116. // Capture Starting Op
  117. // ----------------------------------------------------------------------------
  118. // Since every valid filter starts with an operand, this is always our first
  119. // step. Depending on the _next_ token, we can either stop here or use a grouping
  120. // operator (+ or |).
  121. var top FilterNode
  122. // If we determine after parsing the first op that we have a group op, we'll create
  123. // the group based on the operator and push the top into the group.
  124. var f FilterGroup = nil
  125. // Special Case: Empty Filter on depth = 0 and first token is eof
  126. if depth == 0 && p.peek().kind == eof {
  127. return &VoidOp{}, errs.ErrorOrNil()
  128. }
  129. // Open Paren indicates a new filter depth, so we recursively call filter with depth+1.
  130. if p.match(parenOpen) {
  131. node, err := p.filter(depth + 1)
  132. if err != nil {
  133. errs = multierror.Append(errs, err)
  134. } else {
  135. top = node
  136. }
  137. } else {
  138. comparison, err := p.comparison()
  139. if err != nil {
  140. errs = multierror.Append(errs, err)
  141. p.synchronize(plus, or, parenClose)
  142. } else {
  143. top = comparison
  144. }
  145. }
  146. // Handles case `( <comparison> )` with no grouping ops.
  147. if p.match(parenClose) {
  148. if depth <= 0 {
  149. errs = multierror.Append(errs, fmt.Errorf("Found ')' without matching '('"))
  150. }
  151. return top, errs.ErrorOrNil()
  152. }
  153. // ----------------------------------------------------------------------------
  154. // Determine Group Operator
  155. // ----------------------------------------------------------------------------
  156. // Once we land here, we expect an operator as the next token. This operator will
  157. // determine the group for this scope and be used to continue parsing as long as
  158. // the operators following the initial are _the same_.
  159. //
  160. // For instance:
  161. // ( <comparison> | <comparison> | <comparison> ) is allowed
  162. // ( <comparison> + <comparison> + (<comparison> | <comparison>)) is allowed
  163. // ( <comparison> | <comparison> + <comparison> ) is _NOT_ allowed
  164. // Create the proper grouping operator based on the current token kind,
  165. // then use a while to capture each repition of the _same_ operator.
  166. selectedOp := p.peek().kind
  167. if selectedOp == plus || selectedOp == or {
  168. if selectedOp == plus {
  169. f = &AndOp{}
  170. } else if selectedOp == or {
  171. f = &OrOp{}
  172. }
  173. // Once we determine we are using a group operator, it's safe to push
  174. // the current top level operand into the group
  175. f.Add(top)
  176. // Capture each repetition
  177. for p.match(selectedOp) {
  178. if p.match(parenOpen) {
  179. node, err := p.filter(depth + 1)
  180. if err != nil {
  181. errs = multierror.Append(errs, err)
  182. } else {
  183. f.Add(node)
  184. }
  185. } else {
  186. right, err := p.comparison()
  187. if err != nil {
  188. errs = multierror.Append(errs, err)
  189. p.synchronize(plus, or, parenClose)
  190. } else {
  191. f.Add(right)
  192. }
  193. }
  194. if p.match(parenClose) {
  195. if depth <= 0 {
  196. errs = multierror.Append(errs, fmt.Errorf("Found ')' without matching '('"))
  197. }
  198. return f, errs.ErrorOrNil()
  199. }
  200. // The following code enforces continued use of a single operator within a scope.
  201. // ie: (a | b + c) is disallowed
  202. //
  203. // In order to continue parsing (to continue to collect parse errors), we need to fast-
  204. // forward to the next instance of an operator or scope close.
  205. nextOp := p.peek().kind
  206. if nextOp != eof && nextOp != selectedOp {
  207. errs = multierror.Append(errs, fmt.Errorf("Found \"%s\", Expected \"%s\"", nextOp.String(), selectedOp.String()))
  208. // since we were peeking for this check, to correctly synchronize, we must advance at least once
  209. p.advance()
  210. p.synchronize(plus, or, parenClose)
  211. // since it's possible to synchronize to a paren close, we need to ensure we correctly pop the
  212. // current scope if that's the case.
  213. if p.match(parenClose) {
  214. return f, errs.ErrorOrNil()
  215. }
  216. }
  217. }
  218. }
  219. // It should not be possible to reach this point on a non-zero depth, so we
  220. // must have a () mismatch
  221. if depth > 0 {
  222. errs = multierror.Append(errs, fmt.Errorf("Found '(' without matching ')'"))
  223. }
  224. // If we didn't have a grouping operator, we simply return the single op
  225. if f == nil {
  226. return top, errs.ErrorOrNil()
  227. }
  228. return f, errs.ErrorOrNil()
  229. }
  230. func (p *parser) comparison() (FilterNode, error) {
  231. field, key, err := p.filterKey()
  232. if err != nil {
  233. return nil, err
  234. }
  235. opToken, err := p.filterOp()
  236. if err != nil {
  237. return nil, err
  238. }
  239. var op FilterOp
  240. switch opToken.kind {
  241. case colon:
  242. // for ':' using a slice or key-less map, treat as '~:'
  243. if field.IsSlice() || (field.IsMap() && key == "") {
  244. op = FilterOpContains
  245. } else {
  246. op = FilterOpEquals
  247. }
  248. case bangColon:
  249. // for '!:' using a slice or key-less map, treat as '!~:'
  250. if field.IsSlice() || (field.IsMap() && key == "") {
  251. op = FilterOpNotContains
  252. } else {
  253. op = FilterOpNotEquals
  254. }
  255. case tildeColon:
  256. op = FilterOpContains
  257. case bangTildeColon:
  258. op = FilterOpNotContains
  259. case startTildeColon:
  260. op = FilterOpContainsPrefix
  261. case bangStartTildeColon:
  262. op = FilterOpNotContainsPrefix
  263. case tildeEndColon:
  264. op = FilterOpContainsSuffix
  265. case bangTildeEndColon:
  266. op = FilterOpNotContainsSuffix
  267. default:
  268. return nil, parseError(opToken, "implementation problem: unhandled op token")
  269. }
  270. values, err := p.filterValues()
  271. if err != nil {
  272. return nil, err
  273. }
  274. switch opToken.kind {
  275. // In the != case, a sequence of filter values is ANDed
  276. // Example:
  277. // namespace!:"foo","bar" -> (and (notequals namespace foo)
  278. // (notequals namespace bar))
  279. case bangColon, bangTildeColon, bangStartTildeColon, bangTildeEndColon:
  280. // Only a single filter value, don't need to wrap in AND
  281. if len(values) == 1 {
  282. node, err := toFilterNode(field, key, op, values[0])
  283. if err != nil {
  284. return nil, fmt.Errorf("Parse Error: %s", err)
  285. }
  286. return node, nil
  287. }
  288. // Multiple filter values, wrap in AND
  289. baseFilter := &AndOp{}
  290. for _, v := range values {
  291. node, err := toFilterNode(field, key, op, v)
  292. if err != nil {
  293. return nil, fmt.Errorf("Parse Error: %s", err)
  294. }
  295. baseFilter.Operands = append(baseFilter.Operands, node)
  296. }
  297. return baseFilter, nil
  298. default:
  299. // Only a single filter value, don't need to wrap in OR
  300. if len(values) == 1 {
  301. node, err := toFilterNode(field, key, op, values[0])
  302. if err != nil {
  303. return nil, fmt.Errorf("Parse Error: %s", err)
  304. }
  305. return node, nil
  306. }
  307. // Multiple filter values, wrap in OR
  308. baseFilter := &OrOp{}
  309. for _, v := range values {
  310. node, err := toFilterNode(field, key, op, v)
  311. if err != nil {
  312. return nil, fmt.Errorf("Parse Error: %s", err)
  313. }
  314. baseFilter.Operands = append(baseFilter.Operands, node)
  315. }
  316. return baseFilter, nil
  317. }
  318. }
  319. // filterKey parses a series of tokens that represent a "filter key", returning
  320. // an error if a filter key cannot be constructed.
  321. //
  322. // Examples:
  323. // tokens = [filterField2:label keyedAccess:app] -> FilterLabel, app, nil
  324. // tokens = [filterField1:namespace] -> FilterNamespace, "", nil
  325. func (p *parser) filterKey() (field *Field, key string, err error) {
  326. if p.match(mapField) {
  327. rawField := p.previous().s
  328. mappedField, ok := p.mapFields[rawField]
  329. if !ok {
  330. return nil, "", parseError(p.previous(), "expect key-mapped filter field, like 'label' or 'annotation'")
  331. }
  332. // keyed-access is optional after a map field
  333. if p.match(keyedAccess) {
  334. key = p.previous().s
  335. } else {
  336. key = ""
  337. }
  338. return mappedField, key, nil
  339. }
  340. _, err = p.consume(filterField, "expect filter field")
  341. if err != nil {
  342. return nil, "", err
  343. }
  344. rawField := p.previous().s
  345. mappedField, ok := p.fields[rawField]
  346. if !ok {
  347. return nil, "", parseError(p.previous(), "expect known filter field, like 'cluster' or 'namespace'")
  348. }
  349. return mappedField, "", nil
  350. }
  351. func (p *parser) filterOp() (token, error) {
  352. if p.match(colon, bangColon, tildeColon, bangTildeColon, startTildeColon, bangStartTildeColon, tildeEndColon, bangTildeEndColon) {
  353. return p.previous(), nil
  354. }
  355. return token{}, parseError(p.peek(), "expect filter op like ':', '!:', '~:', or '!~:'")
  356. }
  357. func (p *parser) filterValues() ([]string, error) {
  358. vals := []string{}
  359. _, err := p.consume(str, "expect string as filter value")
  360. if err != nil {
  361. return nil, err
  362. }
  363. vals = append(vals, p.previous().s)
  364. for p.match(comma) {
  365. _, err := p.consume(str, "expect string as filter value")
  366. if err != nil {
  367. return nil, err
  368. }
  369. vals = append(vals, p.previous().s)
  370. }
  371. return vals, nil
  372. }
  373. func toFilterNode(field *Field, key string, op FilterOp, value string) (FilterNode, error) {
  374. switch op {
  375. case FilterOpEquals:
  376. return &EqualOp{
  377. Left: Identifier{
  378. Field: field,
  379. Key: key,
  380. },
  381. Right: value,
  382. }, nil
  383. case FilterOpNotEquals:
  384. return &NotOp{
  385. Operand: &EqualOp{
  386. Left: Identifier{
  387. Field: field,
  388. Key: key,
  389. },
  390. Right: value,
  391. },
  392. }, nil
  393. case FilterOpContains:
  394. return &ContainsOp{
  395. Left: Identifier{
  396. Field: field,
  397. Key: key,
  398. },
  399. Right: value,
  400. }, nil
  401. case FilterOpNotContains:
  402. return &NotOp{
  403. Operand: &ContainsOp{
  404. Left: Identifier{
  405. Field: field,
  406. Key: key,
  407. },
  408. Right: value,
  409. },
  410. }, nil
  411. case FilterOpContainsPrefix:
  412. return &ContainsPrefixOp{
  413. Left: Identifier{
  414. Field: field,
  415. Key: key,
  416. },
  417. Right: value,
  418. }, nil
  419. case FilterOpNotContainsPrefix:
  420. return &NotOp{
  421. Operand: &ContainsPrefixOp{
  422. Left: Identifier{
  423. Field: field,
  424. Key: key,
  425. },
  426. Right: value,
  427. },
  428. }, nil
  429. case FilterOpContainsSuffix:
  430. return &ContainsSuffixOp{
  431. Left: Identifier{
  432. Field: field,
  433. Key: key,
  434. },
  435. Right: value,
  436. }, nil
  437. case FilterOpNotContainsSuffix:
  438. return &NotOp{
  439. Operand: &ContainsSuffixOp{
  440. Left: Identifier{
  441. Field: field,
  442. Key: key,
  443. },
  444. Right: value,
  445. },
  446. }, nil
  447. default:
  448. return nil, fmt.Errorf("Failed to parse op: %s", op)
  449. }
  450. }
  451. // FilterParser is an object capable of parsing a filter string into a `FilterNode`
  452. // AST
  453. type FilterParser interface {
  454. // Parse parses a filter string into a FilterNode AST.
  455. Parse(filter string) (FilterNode, error)
  456. }
  457. // default implementation of FilterParser
  458. type defaultFilterParser struct {
  459. fields map[string]*Field
  460. mapFields map[string]*Field
  461. }
  462. // Parse parses a filter string into a FilterNode AST.
  463. func (dfp *defaultFilterParser) Parse(filter string) (FilterNode, error) {
  464. tokens, err := lex(filter, dfp.fields, dfp.mapFields)
  465. if err != nil {
  466. return nil, fmt.Errorf("lexing filter: %w", err)
  467. }
  468. p := parser{
  469. tokens: tokens,
  470. fields: dfp.fields,
  471. mapFields: dfp.mapFields,
  472. }
  473. parsedFilter, err := p.filter(0)
  474. if err != nil {
  475. return nil, fmt.Errorf("parsing filter: %w", err)
  476. }
  477. return parsedFilter, nil
  478. }
  479. // splits a slice of Field instances into a map of fields (key'd by name) that have no key-based
  480. // access and those that have key-based access.
  481. func fieldsToMaps(fs []*Field) (fields map[string]*Field, mapFields map[string]*Field) {
  482. fields = make(map[string]*Field)
  483. mapFields = make(map[string]*Field)
  484. for _, f := range fs {
  485. if f.IsMap() {
  486. mapFields[f.Name] = f
  487. } else {
  488. fields[f.Name] = f
  489. }
  490. }
  491. return
  492. }
  493. // NewFilterParser creates a new `FilterParser` instance with the provided `Field` definitions to
  494. // use when lexing and parsing.
  495. func NewFilterParser(fields []*Field) FilterParser {
  496. f, m := fieldsToMaps(fields)
  497. return &defaultFilterParser{
  498. fields: f,
  499. mapFields: m,
  500. }
  501. }