Procházet zdrojové kódy

Lexer and Parser for AllocationFilter v2 language

Michael Dresser před 4 roky
rodič
revize
8ab25511ea

+ 2 - 0
go.mod

@@ -25,6 +25,7 @@ require (
 	github.com/getsentry/sentry-go v0.6.1
 	github.com/goccy/go-json v0.9.4
 	github.com/google/uuid v1.3.0
+	github.com/hashicorp/go-multierror v1.0.0
 	github.com/json-iterator/go v1.1.12
 	github.com/jszwec/csvutil v1.2.1
 	github.com/julienschmidt/httprouter v1.3.0
@@ -87,6 +88,7 @@ require (
 	github.com/googleapis/gax-go/v2 v2.0.5 // indirect
 	github.com/googleapis/gnostic v0.4.1 // indirect
 	github.com/gorilla/css v1.0.0 // indirect
+	github.com/hashicorp/errwrap v1.0.0 // indirect
 	github.com/hashicorp/golang-lru v0.5.1 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/imdario/mergo v0.3.5 // indirect

+ 2 - 0
go.sum

@@ -320,10 +320,12 @@ github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:Fecb
 github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
 github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q=
 github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
+github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
 github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
 github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
 github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
+github.com/hashicorp/go-multierror v1.0.0 h1:iVjPR7a6H0tWELX5NxNe7bYopibicUzc7uPribsnS6o=
 github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
 github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU=
 github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=

+ 24 - 0
pkg/kubecost/allocationfilter.go

@@ -52,6 +52,10 @@ const (
 	// ["a", "b", "c"] FilterContains "a" = true
 	FilterContains = "contains"
 
+	// FilterNotContains is an array/slice non-membership operator
+	// ["a", "b", "c"] FilterNotContains "d" = true
+	FilterNotContains = "notcontains"
+
 	// FilterStartsWith matches strings with the given prefix.
 	// "kube-system" StartsWith "kube" = true
 	//
@@ -244,6 +248,26 @@ func (filter AllocationFilterCondition) Matches(a *Allocation) bool {
 		} else {
 			log.Warnf("Allocation Filter: invalid 'contains' call for non-list filter value")
 		}
+	case FilterNotContains:
+		if stringSlice, ok := valueToCompare.([]string); ok {
+			// services!:"__unallocated__" should match
+			// len(a.Properties.Services) > 0
+			//
+			// TODO: is this true?
+			if filter.Value == UnallocatedSuffix {
+				return len(stringSlice) > 0
+			}
+
+			for _, s := range stringSlice {
+				if s == filter.Value {
+					return false
+				}
+			}
+
+			return true
+		} else {
+			log.Warnf("Allocation Filter: invalid 'notcontains' call for non-list filter value")
+		}
 	case FilterStartsWith:
 		if toCompareMissing {
 			return false

+ 60 - 0
pkg/kubecost/allocationfilter_test.go

@@ -381,6 +381,66 @@ func Test_AllocationFilterCondition_Matches(t *testing.T) {
 
 			expected: false,
 		},
+		{
+			name: `services notcontains -> true`,
+			a: &Allocation{
+				Properties: &AllocationProperties{
+					Services: []string{"serv1", "serv2"},
+				},
+			},
+			filter: AllocationFilterCondition{
+				Field: FilterServices,
+				Op:    FilterNotContains,
+				Value: "serv3",
+			},
+
+			expected: true,
+		},
+		{
+			name: `services notcontains -> false`,
+			a: &Allocation{
+				Properties: &AllocationProperties{
+					Services: []string{"serv1", "serv2"},
+				},
+			},
+			filter: AllocationFilterCondition{
+				Field: FilterServices,
+				Op:    FilterNotContains,
+				Value: "serv2",
+			},
+
+			expected: false,
+		},
+		{
+			name: `services notcontains unallocated -> true`,
+			a: &Allocation{
+				Properties: &AllocationProperties{
+					Services: []string{"serv1", "serv2"},
+				},
+			},
+			filter: AllocationFilterCondition{
+				Field: FilterServices,
+				Op:    FilterNotContains,
+				Value: UnallocatedSuffix,
+			},
+
+			expected: true,
+		},
+		{
+			name: `services notcontains unallocated -> false`,
+			a: &Allocation{
+				Properties: &AllocationProperties{
+					Services: []string{},
+				},
+			},
+			filter: AllocationFilterCondition{
+				Field: FilterServices,
+				Op:    FilterNotContains,
+				Value: UnallocatedSuffix,
+			},
+
+			expected: false,
+		},
 		{
 			name: `services containsprefix -> true`,
 			a: &Allocation{

+ 8 - 1
pkg/util/filterutil/allocationfilters.go → pkg/util/allocationfilterutil/queryfilters.go

@@ -1,4 +1,4 @@
-package filterutil
+package allocationfilterutil
 
 import (
 	"strings"
@@ -10,6 +10,13 @@ import (
 	"github.com/kubecost/opencost/pkg/util/httputil"
 )
 
+// ============================================================================
+// This file contains:
+// Parsing (HTTP query params -> AllocationFilter) for V1 of filters
+//
+// e.g. "filterNamespaces=ku&filterControllers=deployment:kc"
+// ============================================================================
+
 // parseWildcardEnd checks if the given filter value is wildcarded, meaning
 // it ends in "*". If it does, it removes the suffix and returns the cleaned
 // string and true. Otherwise, it returns the same filter and false.

+ 1 - 1
pkg/util/filterutil/allocationfilters_test.go → pkg/util/allocationfilterutil/queryfilters_test.go

@@ -1,4 +1,4 @@
-package filterutil
+package allocationfilterutil
 
 import (
 	"testing"

+ 272 - 0
pkg/util/allocationfilterutil/v2/lexer.go

@@ -0,0 +1,272 @@
+package allocationfilterutil
+
+import (
+	"fmt"
+
+	multierror "github.com/hashicorp/go-multierror"
+
+	"github.com/kubecost/opencost/pkg/kubecost"
+)
+
+// ============================================================================
+// This file contains:
+// Lexing (string -> []token) for V2 of allocation filters
+// ============================================================================
+//
+// See parser.go for a formal grammar and external links.
+
+type tokenKind int
+
+const (
+	colon tokenKind = iota // ':'
+	comma                  // ','
+	plus                   // '+'
+
+	bangColon // '!:'
+
+	str // '"foo"'
+
+	filterField1 // 'namespace', 'cluster'
+	filterField2 // 'label', 'annotation'
+	keyedAccess  // '[app]', '[foo]', etc.
+	identifier   // K8s valid name + sanitized Prom: 'app', 'abc_label'
+
+	eof
+)
+
+// These maps serve a dual purpose. (1) to help the lexer identify special
+// strings that should become filterField1/2 instead of identifiers and (2) to
+// help the parser convert tokens into AllocationFilterConditions.
+//
+// TODO: Should the lexer _also_ attach the FilterField to help the parser out?
+var ff1ToKCFilterField = map[string]kubecost.FilterField{
+	"cluster":        kubecost.FilterClusterID,
+	"node":           kubecost.FilterNode,
+	"namespace":      kubecost.FilterNamespace,
+	"controllerName": kubecost.FilterControllerName,
+	"controllerKind": kubecost.FilterControllerKind,
+	"container":      kubecost.FilterContainer,
+	"pod":            kubecost.FilterPod,
+	"services":       kubecost.FilterServices,
+}
+var ff2ToKCFilterField = map[string]kubecost.FilterField{
+	"label":      kubecost.FilterLabel,
+	"annotation": kubecost.FilterAnnotation,
+}
+
+func (tk tokenKind) String() string {
+	switch tk {
+	case colon:
+		return "colon"
+	case comma:
+		return "comma"
+	case plus:
+		return "plus"
+	case bangColon:
+		return "bangColon"
+	case str:
+		return "str"
+	case filterField1:
+		return "filterField1"
+	case filterField2:
+		return "filterField2"
+	case keyedAccess:
+		return "keyedAccess"
+	case identifier:
+		return "identifier"
+	case eof:
+		return "eof"
+	default:
+		return fmt.Sprintf("Unspecified: %d", tk)
+	}
+}
+
+// ============================================================================
+// Lexer/Scanner
+//
+// Based on the Scanner class in Chapter 4: Scanning of Crafting Interpreters by
+// Robert Nystrom
+// ============================================================================
+
+type token struct {
+	kind tokenKind
+	s    string
+}
+
+func (t token) String() string {
+	return fmt.Sprintf("%s:%s", t.kind, t.s)
+}
+
+type scanner struct {
+	source string
+	tokens []token
+	errors []error
+
+	lexemeStartByte int
+	nextByte        int
+}
+
+func (s *scanner) scanTokens() {
+	for !s.atEnd() {
+		s.lexemeStartByte = s.nextByte
+		s.scanToken()
+	}
+
+	s.tokens = append(s.tokens, token{kind: eof})
+}
+
+func (s scanner) atEnd() bool {
+	return s.nextByte >= len(s.source)
+}
+
+// advance returns a byte because we only accept ASCII, which has to fit in a
+// byte
+func (s *scanner) advance() byte {
+	b := s.source[s.nextByte]
+	s.nextByte += 1
+	return b
+}
+
+func (s *scanner) match(expected byte) bool {
+	if s.atEnd() {
+		return false
+	}
+	if s.source[s.nextByte] != expected {
+		return false
+	}
+	s.nextByte += 1
+	return true
+}
+
+func (s *scanner) addToken(kind tokenKind) {
+	lexemeString := s.source[s.lexemeStartByte:s.nextByte]
+	switch kind {
+	// Eliminate surrounding characters like " and []
+	case str, keyedAccess:
+		lexemeString = lexemeString[1 : len(lexemeString)-1]
+	}
+
+	s.tokens = append(s.tokens, token{
+		kind: kind,
+		s:    lexemeString,
+	})
+}
+
+func (s *scanner) peek() byte {
+	if s.atEnd() {
+		return 0
+	}
+	return s.source[s.nextByte]
+}
+
+func (s *scanner) scanToken() {
+	// TODO: DON'T ACCEPT NON-ASCII
+	// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+
+	c := s.advance()
+	switch c {
+	case ':':
+		s.addToken(colon)
+	case ',':
+		s.addToken(comma)
+	case '+':
+		s.addToken(plus)
+	case '!':
+		if s.match(':') {
+			s.addToken(bangColon)
+		} else {
+			s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '!'", s.nextByte-1))
+		}
+	// strings
+	// TODO: reject whitespace chars
+	case '"':
+		s.string()
+	// keyed access
+	case '[':
+		s.keyedAccess()
+	default:
+		// identifiers
+		// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
+		// We can keep it simple and not _force_ the first character to be a non-number
+		// because we don't need numbers in this language. If we need to extend the
+		// language to support numbers, this has to become just isAlpha() and then
+		// s.identifier() will keep using isAlphanumeric() in its main loop.
+		//
+		// TODO: does this match all character we support for cluster IDs?
+		if isIdentifierChar(c) {
+			s.identifier()
+			break
+		}
+
+		// TODO: how to phrase for unicode?
+		// extra handling to check unicode? https://stackoverflow.com/questions/53069040/checking-a-string-contains-only-ascii-characters
+		s.errors = append(s.errors, fmt.Errorf("unexpected character/byte at position %d. Please avoid Unicode.", s.nextByte-1))
+	}
+}
+
+// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
+func isIdentifierChar(b byte) bool {
+	return (b >= '0' && b <= '9') || // 0-9
+		(b >= 'A' && b <= 'Z') || // A-Z
+		(b >= 'a' && b <= 'z') || // a-z
+		b == '-' || // hyphens are allowed according to K8s spec
+		b == '_' // underscores are allowed because of Prometheus sanitization
+}
+
+func (s *scanner) string() {
+	for s.peek() != '"' && !s.atEnd() {
+		s.advance()
+	}
+
+	if s.atEnd() {
+		s.errors = append(s.errors, fmt.Errorf("unterminated string starting at %d", s.lexemeStartByte))
+		return
+	}
+
+	// Consume closing '"'
+	s.advance()
+
+	s.addToken(str)
+}
+
+func (s *scanner) keyedAccess() {
+	for s.peek() != ']' && !s.atEnd() {
+		s.advance()
+	}
+
+	if s.atEnd() {
+		s.errors = append(s.errors, fmt.Errorf("unterminated access starting at %d", s.lexemeStartByte))
+		return
+	}
+
+	// Consume closing ']'
+	s.advance()
+	s.addToken(keyedAccess)
+}
+
+func (s *scanner) identifier() {
+	for isIdentifierChar(s.peek()) {
+		s.advance()
+	}
+
+	tokenText := s.source[s.lexemeStartByte:s.nextByte]
+	if _, ok := ff1ToKCFilterField[tokenText]; ok {
+		s.addToken(filterField1)
+	} else if _, ok := ff2ToKCFilterField[tokenText]; ok {
+		s.addToken(filterField2)
+	} else {
+		s.addToken(identifier)
+	}
+}
+
+// TODO: For the time being, disallow whitespace
+func lexAllocationFilterV2(raw string) ([]token, error) {
+	s := scanner{source: raw}
+	s.scanTokens()
+
+	if len(s.errors) > 0 {
+		return s.tokens, multierror.Append(nil, s.errors...)
+	}
+
+	return s.tokens, nil
+}

+ 89 - 0
pkg/util/allocationfilterutil/v2/lexer_test.go

@@ -0,0 +1,89 @@
+package allocationfilterutil
+
+import (
+	"testing"
+)
+
+func TestLexer(t *testing.T) {
+	cases := []struct {
+		name string
+
+		input       string
+		expectError bool
+		expected    []token
+	}{
+		{
+			name:     "Empty string",
+			input:    "",
+			expected: []token{{kind: eof}},
+		},
+		{
+			name:     "colon",
+			input:    ":",
+			expected: []token{{kind: colon, s: ":"}, {kind: eof}},
+		},
+		{
+			name:     "comma",
+			input:    ",",
+			expected: []token{{kind: comma, s: ","}, {kind: eof}},
+		},
+		{
+			name:     "plus",
+			input:    "+",
+			expected: []token{{kind: plus, s: "+"}, {kind: eof}},
+		},
+		{
+			name:     "bangColon",
+			input:    "!:",
+			expected: []token{{kind: bangColon, s: "!:"}, {kind: eof}},
+		},
+		{
+			name: "multiple symbols",
+			// This is a valid string to lex but not to parse.
+			input:    "!::,+",
+			expected: []token{{kind: bangColon, s: "!:"}, {kind: colon, s: ":"}, {kind: comma, s: ","}, {kind: plus, s: "+"}, {kind: eof}},
+		},
+		{
+			name:     "string",
+			input:    `"test"`,
+			expected: []token{{kind: str, s: `test`}, {kind: eof}},
+		},
+		{
+			name:     "keyed access",
+			input:    "[app]",
+			expected: []token{{kind: keyedAccess, s: "app"}, {kind: eof}},
+		},
+		{
+			name:     "identifier pure alpha",
+			input:    "abc",
+			expected: []token{{kind: identifier, s: "abc"}, {kind: eof}},
+		},
+		{
+			name:     "label access",
+			input:    "app[kubecost]",
+			expected: []token{{kind: identifier, s: "app"}, {kind: keyedAccess, s: "kubecost"}, {kind: eof}},
+		},
+		// TODO: more cases
+	}
+
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			t.Logf("Input: '%s'", c.input)
+			result, err := lexAllocationFilterV2(c.input)
+			if c.expectError && err == nil {
+				t.Errorf("expected error but got nil")
+			} else if !c.expectError && err != nil {
+				t.Errorf("unexpected error: %s", err)
+			} else {
+				if len(c.expected) != len(result) {
+					t.Fatalf("Token slices don't match in length.\nExpected: %+v\nGot: %+v", c.expected, result)
+				}
+				for i := range c.expected {
+					if c.expected[i] != result[i] {
+						t.Fatalf("Incorrect token at position %d.\nExpected: %+v\nGot: %+v", i, c.expected, result)
+					}
+				}
+			}
+		})
+	}
+}

+ 329 - 0
pkg/util/allocationfilterutil/v2/parser.go

@@ -0,0 +1,329 @@
+// allocationfilterutil provides functionality for parsing V2 of the Kubecost
+// filter language for Allocation types.
+//
+// e.g. "filter=namespace:kubecost+controllerkind:deployment"
+package allocationfilterutil
+
+import (
+	"fmt"
+
+	"github.com/hashicorp/go-multierror"
+	"github.com/kubecost/opencost/pkg/kubecost"
+)
+
+// ParseAllocationFilter converts a string of the V2 Allocation Filter language
+// into a kubecost.AllocationFilter.
+//
+// Example queries:
+//   namespace:"kubecost"
+//   label[app]:"cost-analyzer"
+//   node!:"node1","node2"
+//   cluster:"cluster-one"+namespace!:"kube-system"
+//
+// The grammar is approximately as follows:
+//
+// Original design doc [1] contains first grammar. This is a slight modification
+// of that grammar to help guide the implementation of the parser.
+//
+// [1] https://docs.google.com/document/d/1HKkp2bv3mnvfQoBZlpHjfZwQ0FzDLOHKpnwV9gQ_KgU/edit?pli=1
+//
+// <filter> ::= <comparison> ('+' <comparison>)*
+//
+// <comparison> ::= <filter-key> <filter-op> <filter-value>
+//
+// <filter-key> ::= <filter-field-2> <keyed-access>
+//                | <filter-field-1>
+//
+// <filter-op> ::= ':' | '!:'
+//
+// <filter-value> ::= '"' <identifier> '"' (',' <filter-value>)*
+//
+// <filter-field-2> ::= 'label' | 'annotation'
+//
+// <filter-field-1> ::= 'cluster' | 'node' | 'namespace'
+//                    | 'controllerName' | 'controllerKind'
+//                    | 'container' | 'pod' | 'services'
+//
+// <keyed-access> ::= '[' <identifier> ']'
+//
+// <identifier> ::= --- valid K8s name or Prom-sanitized K8s name
+func ParseAllocationFilter(filter string) (kubecost.AllocationFilter, error) {
+	tokens, err := lexAllocationFilterV2(filter)
+	if err != nil {
+		return nil, fmt.Errorf("lexing filter: %s", err)
+	}
+
+	p := parser{tokens: tokens}
+
+	parsedFilter, err := p.filter()
+	if err != nil {
+		return nil, fmt.Errorf("parsing filter: %s", err)
+	}
+
+	return parsedFilter, nil
+}
+
+// ============================================================================
+// Parser
+//
+// Based on the Parser class in Chapter 6: Parsing Expressions of Crafting
+// Interpreters by Robert Nystrom
+// ============================================================================
+
+// parseError produces error messages tailored to the needs of the parser
+func parseError(t token, message string) error {
+	if t.kind == eof {
+		return fmt.Errorf("at end: %s", message)
+	}
+
+	return fmt.Errorf("at '%s': %s", t.s, message)
+}
+
+type parser struct {
+	tokens  []token
+	current int
+}
+
+// ----------------------------------------------------------------------------
+// Parser helper methods for token handling
+// ----------------------------------------------------------------------------
+
+func (p *parser) atEnd() bool {
+	return p.peek().kind == eof
+}
+
+func (p *parser) advance() token {
+	if !p.atEnd() {
+		p.current += 1
+	}
+
+	return p.previous()
+}
+
+func (p *parser) previous() token {
+	return p.tokens[p.current-1]
+}
+
+// match return true and advances the parser by one token if the next token has
+// a kind that matches one of the arguments. Otherwise, it returns false and
+// DOES NOT advance the parser.
+func (p *parser) match(tokenKinds ...tokenKind) bool {
+	for _, kind := range tokenKinds {
+		if p.check(kind) {
+			p.advance()
+			return true
+		}
+	}
+	return false
+}
+
+// check returns true iff the next token matches the provided kind.
+func (p *parser) check(tk tokenKind) bool {
+	if p.atEnd() {
+		return false
+	}
+	return p.peek().kind == tk
+}
+
+func (p *parser) peek() token {
+	return p.tokens[p.current]
+}
+
+// consume is a "next token must be this kind" method. If the next token is of
+// the correct kind, the parser is advanced and that token is returned. If it
+// is not of the correct kind, a parse error is returned and the parser is NOT
+// advanced.
+func (p *parser) consume(tk tokenKind, message string) (token, error) {
+	if p.check(tk) {
+		return p.advance(), nil
+	}
+
+	return token{}, parseError(p.peek(), message)
+}
+
+// synchronize attempts to skip forward until the next '+', indicating the
+// start of a new <comparison>. This lets us do best-effort reporting of
+// multiple parse errors.
+func (p *parser) synchronize() {
+	p.advance()
+	for !p.atEnd() {
+		if p.previous().kind == plus {
+			return
+		}
+
+		p.advance()
+	}
+}
+
+// ----------------------------------------------------------------------------
+// Parser grammar rules as recursive descent methods
+// ----------------------------------------------------------------------------
+
+// filter is the main method of the parser. It turns the token stream into an
+// AllocationFilter, reporting parse errors that occurred along the way.
+func (p *parser) filter() (kubecost.AllocationFilter, error) {
+	var errs *multierror.Error
+
+	// Currently, a filter is only a sequence of AND operations
+	f := kubecost.AllocationFilterAnd{}
+	comparison, err := p.comparison()
+	if err != nil {
+		errs = multierror.Append(errs, err)
+		p.synchronize()
+	} else {
+		f.Filters = append(f.Filters, comparison)
+	}
+	for p.match(plus) {
+		right, err := p.comparison()
+		if err != nil {
+			errs = multierror.Append(errs, err)
+			p.synchronize()
+		} else {
+			f.Filters = append(f.Filters, right)
+		}
+	}
+
+	return f, errs.ErrorOrNil()
+}
+
+func (p *parser) comparison() (kubecost.AllocationFilter, error) {
+	field, key, err := p.filterKey()
+	if err != nil {
+		return nil, err
+	}
+
+	opToken, err := p.filterOp()
+	if err != nil {
+		return nil, err
+	}
+
+	var op kubecost.FilterOp
+
+	switch field {
+	case "services":
+		switch opToken.kind {
+		case colon:
+			op = kubecost.FilterContains
+		case bangColon:
+			op = kubecost.FilterNotContains
+		default:
+			return nil, parseError(opToken, "implementation problem: unhandled op token for services filter")
+		}
+	default:
+		switch opToken.kind {
+		case colon:
+			op = kubecost.FilterEquals
+		case bangColon:
+			op = kubecost.FilterNotEquals
+		default:
+			return nil, parseError(opToken, "implementation problem: unhandled op token")
+		}
+
+	}
+
+	values, err := p.filterValues()
+	if err != nil {
+		return nil, err
+	}
+
+	switch opToken.kind {
+	// In the != case, a sequence of filter values is ANDed
+	// Example:
+	// namespace!:"foo","bar" -> (and (notequals namespace foo)
+	//                                (notequals namespace bar))
+	case bangColon:
+		baseFilter := kubecost.AllocationFilterAnd{}
+
+		for _, v := range values {
+			baseFilter.Filters = append(baseFilter.Filters, kubecost.AllocationFilterCondition{
+				Field: field,
+				Key:   key,
+				Op:    op,
+				Value: v,
+			})
+		}
+
+		return baseFilter, nil
+	default:
+		baseFilter := kubecost.AllocationFilterOr{}
+
+		for _, v := range values {
+			baseFilter.Filters = append(baseFilter.Filters, kubecost.AllocationFilterCondition{
+				Field: field,
+				Key:   key,
+				Op:    op,
+				Value: v,
+			})
+		}
+
+		return baseFilter, nil
+	}
+
+}
+
+// filterKey parses a series of tokens that represent a "filter key", returning
+// an error if a filter key cannot be constructed.
+//
+// Examples:
+// tokens = [filterField2:label keyedAccess:app] -> FilterLabel, app, nil
+// tokens = [filterField1:namespace] -> FilterNamespace, "", nil
+func (p *parser) filterKey() (field kubecost.FilterField, key string, err error) {
+
+	if p.match(filterField2) {
+		rawField := p.previous().s
+		mappedField, ok := ff2ToKCFilterField[rawField]
+		if !ok {
+			return "", "", parseError(p.previous(), "expect key-mapped filter field, like 'label' or 'annotation'")
+		}
+
+		_, err := p.consume(keyedAccess, "expect keyed access like '[app]' after a mapped field")
+		if err != nil {
+			return "", "", err
+		}
+
+		key = p.previous().s
+		return mappedField, key, nil
+	}
+
+	_, err = p.consume(filterField1, "expect filter field")
+	if err != nil {
+		return "", "", err
+	}
+
+	rawField := p.previous().s
+	mappedField, ok := ff1ToKCFilterField[rawField]
+	if !ok {
+		return "", "", parseError(p.previous(), "expect known filter field, like 'cluster' or 'namespace'")
+	}
+
+	return mappedField, "", nil
+}
+
+func (p *parser) filterOp() (token, error) {
+	if p.match(bangColon, colon) {
+		return p.previous(), nil
+	}
+
+	return token{}, parseError(p.peek(), "expect filter op like ':' or '!:'")
+}
+
+func (p *parser) filterValues() ([]string, error) {
+	vals := []string{}
+
+	_, err := p.consume(str, "expect string as filter value")
+	if err != nil {
+		return nil, err
+	}
+	vals = append(vals, p.previous().s)
+
+	for p.match(comma) {
+		_, err := p.consume(str, "expect string as filter value")
+		if err != nil {
+			return nil, err
+		}
+
+		vals = append(vals, p.previous().s)
+	}
+
+	return vals, nil
+}

+ 298 - 0
pkg/util/allocationfilterutil/v2/parser_test.go

@@ -0,0 +1,298 @@
+package allocationfilterutil
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/kubecost/opencost/pkg/kubecost"
+)
+
+func allocGenerator(props kubecost.AllocationProperties) kubecost.Allocation {
+	a := kubecost.Allocation{
+		Properties: &props,
+	}
+
+	a.Name = a.Properties.String()
+	return a
+}
+
+func TestParse(t *testing.T) {
+	// TODO: unallocated cases
+	cases := []struct {
+		input          string
+		expected       kubecost.AllocationFilter
+		shouldMatch    []kubecost.Allocation
+		shouldNotMatch []kubecost.Allocation
+	}{
+		{
+			input: `namespace:"kubecost"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNamespace,
+						Op:    kubecost.FilterEquals,
+						Value: "kubecost",
+					},
+				}},
+			}},
+			shouldMatch: []kubecost.Allocation{
+				allocGenerator(kubecost.AllocationProperties{Namespace: "kubecost"}),
+			},
+			shouldNotMatch: []kubecost.Allocation{
+				allocGenerator(kubecost.AllocationProperties{Namespace: "kube-system"}),
+			},
+		},
+		{
+			input: `namespace!:"kubecost","kube-system"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNamespace,
+						Op:    kubecost.FilterNotEquals,
+						Value: "kubecost",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNamespace,
+						Op:    kubecost.FilterNotEquals,
+						Value: "kube-system",
+					},
+				}},
+			}},
+			shouldMatch: []kubecost.Allocation{
+				allocGenerator(kubecost.AllocationProperties{Namespace: "abc"}),
+			},
+			shouldNotMatch: []kubecost.Allocation{
+				allocGenerator(kubecost.AllocationProperties{Namespace: "kubecost"}),
+				allocGenerator(kubecost.AllocationProperties{Namespace: "kube-system"}),
+			},
+		},
+		{
+			input: `namespace:"kubecost","kube-system"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNamespace,
+						Op:    kubecost.FilterEquals,
+						Value: "kubecost",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNamespace,
+						Op:    kubecost.FilterEquals,
+						Value: "kube-system",
+					},
+				}},
+			}},
+			shouldMatch: []kubecost.Allocation{
+				allocGenerator(kubecost.AllocationProperties{Namespace: "kubecost"}),
+				allocGenerator(kubecost.AllocationProperties{Namespace: "kube-system"}),
+			},
+			shouldNotMatch: []kubecost.Allocation{
+				allocGenerator(kubecost.AllocationProperties{Namespace: "abc"}),
+			},
+		},
+		{
+			input: `label[app_abc]:"cost_analyzer"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterLabel,
+						Key:   "app_abc",
+						Op:    kubecost.FilterEquals,
+						Value: "cost_analyzer",
+					},
+				}},
+			}},
+		},
+		{
+			input: `services:"123","abc"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterServices,
+						Op:    kubecost.FilterContains,
+						Value: "123",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterServices,
+						Op:    kubecost.FilterContains,
+						Value: "abc",
+					},
+				}},
+			}},
+		},
+		{
+			input: `services!:"123","abc"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterServices,
+						Op:    kubecost.FilterNotContains,
+						Value: "123",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterServices,
+						Op:    kubecost.FilterNotContains,
+						Value: "abc",
+					},
+				}},
+			}},
+		},
+		{
+			input: `label[app_abc]:"cost_analyzer"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterLabel,
+						Key:   "app_abc",
+						Op:    kubecost.FilterEquals,
+						Value: "cost_analyzer",
+					},
+				}},
+			}},
+		},
+		{
+			input: `label[app_abc]:"cost_analyzer"+label[foo]:"bar"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterLabel,
+						Key:   "app_abc",
+						Op:    kubecost.FilterEquals,
+						Value: "cost_analyzer",
+					},
+				}},
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterLabel,
+						Key:   "foo",
+						Op:    kubecost.FilterEquals,
+						Value: "bar",
+					},
+				}},
+			}},
+		},
+		{
+			input: `namespace:"kubecost"+label[app]:"cost_analyzer"+annotation[a1]:"b2"+cluster:"cluster-one"+node!:"node-123","node-456"+controllerName:"kubecost-cost-analyzer","kubecost-prometheus-server"+controllerKind!:"daemonset","statefulset","job"+container!:"123-abc_foo"+pod!:"aaaaaaaaaaaaaaaaaaaaaaaaa"+services!:"abc123"`,
+			expected: kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNamespace,
+						Op:    kubecost.FilterEquals,
+						Value: "kubecost",
+					},
+				}},
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterLabel,
+						Key:   "app",
+						Op:    kubecost.FilterEquals,
+						Value: "cost_analyzer",
+					},
+				}},
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterAnnotation,
+						Key:   "a1",
+						Op:    kubecost.FilterEquals,
+						Value: "b2",
+					},
+				}},
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterClusterID,
+						Op:    kubecost.FilterEquals,
+						Value: "cluster-one",
+					},
+				}},
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNode,
+						Op:    kubecost.FilterNotEquals,
+						Value: "node-123",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterNode,
+						Op:    kubecost.FilterNotEquals,
+						Value: "node-456",
+					},
+				}},
+				kubecost.AllocationFilterOr{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterControllerName,
+						Op:    kubecost.FilterEquals,
+						Value: "kubecost-cost-analyzer",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterControllerName,
+						Op:    kubecost.FilterEquals,
+						Value: "kubecost-prometheus-server",
+					},
+				}},
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterControllerKind,
+						Op:    kubecost.FilterNotEquals,
+						Value: "daemonset",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterControllerKind,
+						Op:    kubecost.FilterNotEquals,
+						Value: "statefulset",
+					},
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterControllerKind,
+						Op:    kubecost.FilterNotEquals,
+						Value: "job",
+					},
+				}},
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterContainer,
+						Op:    kubecost.FilterNotEquals,
+						Value: "123-abc_foo",
+					},
+				}},
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterPod,
+						Op:    kubecost.FilterNotEquals,
+						Value: "aaaaaaaaaaaaaaaaaaaaaaaaa",
+					},
+				}},
+				kubecost.AllocationFilterAnd{[]kubecost.AllocationFilter{
+					kubecost.AllocationFilterCondition{
+						Field: kubecost.FilterServices,
+						Op:    kubecost.FilterNotContains,
+						Value: "abc123",
+					},
+				}},
+			}},
+		},
+	}
+
+	for i, c := range cases {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			t.Logf("Query: %s", c.input)
+			result, err := ParseAllocationFilter(c.input)
+			t.Logf("Result: %s", result)
+			if err != nil {
+				t.Fatalf("Unexpected parse error: %s", err)
+			}
+			if !reflect.DeepEqual(result, c.expected) {
+				t.Fatalf("Expected:\n%s\nGot:\n%s", c.expected, result)
+			}
+
+			for _, shouldMatch := range c.shouldMatch {
+				if !result.Matches(&shouldMatch) {
+					t.Errorf("Failed to match %s", shouldMatch.Name)
+				}
+			}
+			for _, shouldNotMatch := range c.shouldNotMatch {
+				if result.Matches(&shouldNotMatch) {
+					t.Errorf("Incorrectly matched %s", shouldNotMatch.Name)
+				}
+			}
+		})
+	}
+}