package ast

import (
	"fmt"

	multierror "github.com/hashicorp/go-multierror"
)

// ============================================================================
// This file contains:
// Lexing (string -> []token) for V2 of allocation filters
// ============================================================================
//
// See parser.go for a formal grammar and external links.

type tokenKind int

const (
	colon tokenKind = iota // ':'
	comma                  // ','
	plus                   // '+'
	or                     // '|'

	bangColon           // '!:'
	tildeColon          // '~:'
	bangTildeColon      // '!~:'
	startTildeColon     // '<~:'
	bangStartTildeColon // '!<~:'
	tildeEndColon       // '~>:'
	bangTildeEndColon   // '!~>:'

	parenOpen  // '('
	parenClose // ')'

	str // '"foo"'

	filterField // 'namespace', 'cluster'
	mapField    // 'label', 'annotation'
	keyedAccess // '[app]', '[foo]', etc.
	identifier  // K8s valid name + sanitized Prom: 'app', 'abc_label'

	eof
)

func (tk tokenKind) String() string {
	switch tk {
	case colon:
		return "colon"
	case comma:
		return "comma"
	case plus:
		return "plus"
	case or:
		return "or"
	case bangColon:
		return "bangColon"
	case tildeColon:
		return "tildeColon"
	case bangTildeColon:
		return "bangTildeColon"
	case startTildeColon:
		return "startTildeColon"
	case bangStartTildeColon:
		return "bangStartTildeColon"
	case tildeEndColon:
		return "tildeEndColon"
	case bangTildeEndColon:
		return "bangTildeEndColon"
	case parenOpen:
		return "parenOpen"
	case parenClose:
		return "parenClose"
	case str:
		return "str"
	case filterField:
		return "filterField1"
	case mapField:
		return "filterField2"
	case keyedAccess:
		return "keyedAccess"
	case identifier:
		return "identifier"
	case eof:
		return "eof"
	default:
		return fmt.Sprintf("Unspecified: %d", tk)
	}
}

// ============================================================================
// Lexer/Scanner
//
// Based on the Scanner class in Chapter 4: Scanning of Crafting Interpreters by
// Robert Nystrom
// ============================================================================

type token struct {
	kind tokenKind
	s    string
}

func (t token) String() string {
	return fmt.Sprintf("%s:%s", t.kind, t.s)
}

type scanner struct {
	source string
	tokens []token
	errors []error

	fields    map[string]*Field
	mapFields map[string]*Field

	lexemeStartByte int
	nextByte        int
}

func (s *scanner) scanTokens() {
	for !s.atEnd() {
		s.lexemeStartByte = s.nextByte
		s.scanToken()
	}

	s.tokens = append(s.tokens, token{kind: eof})
}

func (s scanner) atEnd() bool {
	return s.nextByte >= len(s.source)
}

// advance returns a byte because we only accept ASCII, which has to fit in a
// byte
//
// TODO: If we add unicode support, advance() will probably have to return a
// rune.
func (s *scanner) advance() byte {
	b := s.source[s.nextByte]
	s.nextByte += 1
	return b
}

func (s *scanner) match(expected byte) bool {
	if s.atEnd() {
		return false
	}
	if s.source[s.nextByte] != expected {
		return false
	}
	s.nextByte += 1
	return true
}

func (s *scanner) addToken(kind tokenKind) {
	lexemeString := s.source[s.lexemeStartByte:s.nextByte]
	switch kind {
	// Eliminate surrounding characters like " and []
	case str, keyedAccess:
		lexemeString = lexemeString[1 : len(lexemeString)-1]
	}

	s.tokens = append(s.tokens, token{
		kind: kind,
		s:    lexemeString,
	})
}

func (s *scanner) peek() byte {
	if s.atEnd() {
		return 0
	}
	return s.source[s.nextByte]
}

func (s *scanner) scanToken() {
	c := s.advance()
	switch c {
	case ':':
		s.addToken(colon)
	case ',':
		s.addToken(comma)
	case '+':
		s.addToken(plus)
	case '|':
		s.addToken(or)
	case '!':
		if s.match(':') {
			s.addToken(bangColon)
		} else if s.match('~') {
			if s.match(':') {
				s.addToken(bangTildeColon)
			} else if s.match('>') {
				if s.match(':') {
					s.addToken(bangTildeEndColon)
				} else {
					s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '>'", s.nextByte-1))
				}
			} else {
				s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
			}
		} else if s.match('<') {
			if s.match('~') {
				if s.match(':') {
					s.addToken(bangStartTildeColon)
				} else {
					s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
				}
			} else {
				s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '<'", s.nextByte-1))
			}
		} else {
			s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '!'", s.nextByte-1))
		}
	case '(':
		s.addToken(parenOpen)
	case ')':
		s.addToken(parenClose)
	case '<':
		if s.match('~') {
			if s.match(':') {
				s.addToken(startTildeColon)
			} else {
				s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
			}
		} else {
			s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '<'", s.nextByte-1))
		}
	case '~':
		if s.match(':') {
			s.addToken(tildeColon)
		} else if s.match('>') {
			if s.match(':') {
				s.addToken(tildeEndColon)
			} else {
				s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '>'", s.nextByte-1))
			}
		} else {
			s.errors = append(s.errors, fmt.Errorf("Position %d: Unexpected '~'", s.nextByte-1))
		}
	// strings
	case '"':
		s.string()
	// keyed access
	case '[':
		s.keyedAccess()
	// Ignore whitespace chars outside of "" and [].
	case ' ', '\t', '\n', '\r':
		break
	default:
		// identifiers
		//
		// We can keep it simple and not _force_ the first character to be a
		// non-number because we don't need numbers in this language. If we need
		// to extend the language to support numbers, this has to become just
		// isAlpha() and then s.identifier() will use isIdentifierChar() in
		// its main loop.
		if isIdentifierChar(c) {
			s.identifier()
			break
		}

		// TODO: We could return a more exact error message for Unicode chars if
		// we added extra handling:
		// https://stackoverflow.com/questions/53069040/checking-a-string-contains-only-ascii-characters
		s.errors = append(s.errors, fmt.Errorf("unexpected character/byte at position %d. Please avoid Unicode.", s.nextByte-1))
	}
}

// isIdentifierChar should match Kubernetes-supported name characters.
//
// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
//
// TODO: This may not match all characters we support for cluster IDs (it may be
// the case that cluster IDs can contain UTF-8 characters).
func isIdentifierChar(b byte) bool {
	return (b >= '0' && b <= '9') || // 0-9
		(b >= 'A' && b <= 'Z') || // A-Z
		(b >= 'a' && b <= 'z') || // a-z
		b == '-' || // hyphens are allowed according to K8s spec
		b == '_' // underscores are allowed because of Prometheus sanitization
}

func (s *scanner) string() {
	for s.peek() != '"' && !s.atEnd() {
		s.advance()
	}

	if s.atEnd() {
		s.errors = append(s.errors, fmt.Errorf("unterminated string starting at %d", s.lexemeStartByte))
		return
	}

	// Consume closing '"'
	s.advance()

	s.addToken(str)
}

func (s *scanner) keyedAccess() {
	for s.peek() != ']' && !s.atEnd() {
		s.advance()
	}

	if s.atEnd() {
		s.errors = append(s.errors, fmt.Errorf("unterminated access starting at %d", s.lexemeStartByte))
		return
	}

	// Consume closing ']'
	s.advance()
	s.addToken(keyedAccess)
}

func (s *scanner) identifier() {
	for isIdentifierChar(s.peek()) {
		s.advance()
	}

	tokenText := s.source[s.lexemeStartByte:s.nextByte]
	if _, ok := s.fields[tokenText]; ok {
		s.addToken(filterField)
	} else if _, ok := s.mapFields[tokenText]; ok {
		s.addToken(mapField)
	} else {
		s.addToken(identifier)
	}
}

// lex will generate a slice of tokens provided a raw string and the filter field definitions
func lex(raw string, fields map[string]*Field, mapFields map[string]*Field) ([]token, error) {
	s := scanner{
		source:    raw,
		fields:    fields,
		mapFields: mapFields,
	}
	s.scanTokens()

	if len(s.errors) > 0 {
		return s.tokens, multierror.Append(nil, s.errors...)
	}

	return s.tokens, nil
}