| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- package parser
- import (
- "bufio"
- "io"
- "strings"
- "unicode"
- )
- type lexer struct {
- reader *bufio.Reader
- errors []error
- }
- func newLexer(r io.Reader) *lexer {
- return &lexer{
- reader: bufio.NewReader(r),
- }
- }
- func (l *lexer) nextChar() (r rune, isEof bool) {
- ch, _, err := l.reader.ReadRune()
- if err != nil {
- if err == io.EOF {
- return ch, true
- }
- l.errors = append(l.errors, err)
- }
- return ch, false
- }
- func (l *lexer) backup() {
- if err := l.reader.UnreadRune(); err != nil {
- l.errors = append(l.errors, err)
- }
- }
- func (l *lexer) next() token {
- for {
- ch, isEof := l.nextChar()
- if isEof {
- return token{Type: Eof}
- }
- switch ch {
- case '#':
- return token{Type: Comment, Value: l.comment()}
- case '{':
- return token{Type: OpenBracket, Value: "{"}
- case '}':
- return token{Type: CloseBracket, Value: "}"}
- case ',':
- return token{Type: Comma, Value: ","}
- case '=':
- return token{Type: Equal, Value: "="}
- case '"':
- return token{Type: String, Value: l.str()}
- default:
- if unicode.IsSpace(ch) {
- continue
- }
- if unicode.IsLetter(ch) {
- l.backup()
- // special handling for NaN and Inf without leading sign
- lit := l.literal()
- if lit == "NaN" || lit == "Inf" {
- return token{Type: Value, Value: lit}
- }
- return token{Type: Literal, Value: lit}
- }
- if unicode.IsDigit(ch) || ch == '.' || ch == '+' || ch == '-' {
- l.backup()
- return token{Type: Value, Value: l.float()}
- }
- }
- }
- }
- func (l *lexer) comment() string {
- var sb strings.Builder
- for {
- ch, isEof := l.nextChar()
- if isEof {
- return sb.String()
- }
- if ch == '\n' {
- return sb.String()
- }
- sb.WriteRune(ch)
- }
- }
- func (l *lexer) str() string {
- var sb strings.Builder
- for {
- r, isEof := l.nextChar()
- if isEof {
- return sb.String()
- }
- if r == '\\' {
- n, isEof := l.nextChar()
- if isEof {
- return sb.String()
- }
- if n == 'n' {
- sb.WriteRune('\n')
- continue
- }
- if n == '\\' {
- sb.WriteRune('\\')
- continue
- }
- if n == '"' {
- sb.WriteRune('"')
- continue
- }
- }
- if r != '"' {
- sb.WriteRune(r)
- } else {
- return sb.String()
- }
- }
- }
- func (l *lexer) literal() string {
- var sb strings.Builder
- for {
- r, isEof := l.nextChar()
- if isEof {
- return sb.String()
- }
- if isAlphaNumericUnderscore(r) {
- sb.WriteRune(r)
- } else {
- l.backup()
- return sb.String()
- }
- }
- }
- func (l *lexer) float() string {
- var sb strings.Builder
- for {
- r, isEof := l.nextChar()
- if isEof {
- return sb.String()
- }
- if isOneOf(r, "NaInf+-._eE") || unicode.IsDigit(r) {
- //if r == 'N' || r == 'a' || r == 'I' || r == 'n' || r == 'f' || r == '+' || r == '-' || r == '.' || r == 'e' || r == 'E' || r == '_' || unicode.IsDigit(r) {
- sb.WriteRune(r)
- } else {
- return sb.String()
- }
- }
- }
- func isOneOf(ch rune, chars string) bool {
- for _, c := range chars {
- if c == ch {
- return true
- }
- }
- return false
- }
- func isAlphaNumeric(ch rune) bool {
- return unicode.IsLetter(ch) || unicode.IsDigit(ch)
- }
- func isAlphaNumericUnderscore(ch rune) bool {
- return isAlphaNumeric(ch) || ch == '_'
- }
|