lexer.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. package parser
  2. import (
  3. "bufio"
  4. "io"
  5. "strings"
  6. "unicode"
  7. )
  8. type lexer struct {
  9. reader *bufio.Reader
  10. errors []error
  11. }
  12. func newLexer(r io.Reader) *lexer {
  13. return &lexer{
  14. reader: bufio.NewReader(r),
  15. }
  16. }
  17. func (l *lexer) nextChar() (r rune, isEof bool) {
  18. ch, _, err := l.reader.ReadRune()
  19. if err != nil {
  20. if err == io.EOF {
  21. return ch, true
  22. }
  23. l.errors = append(l.errors, err)
  24. }
  25. return ch, false
  26. }
  27. func (l *lexer) backup() {
  28. if err := l.reader.UnreadRune(); err != nil {
  29. l.errors = append(l.errors, err)
  30. }
  31. }
  32. func (l *lexer) next() token {
  33. for {
  34. ch, isEof := l.nextChar()
  35. if isEof {
  36. return token{Type: Eof}
  37. }
  38. switch ch {
  39. case '#':
  40. return token{Type: Comment, Value: l.comment()}
  41. case '{':
  42. return token{Type: OpenBracket, Value: "{"}
  43. case '}':
  44. return token{Type: CloseBracket, Value: "}"}
  45. case ',':
  46. return token{Type: Comma, Value: ","}
  47. case '=':
  48. return token{Type: Equal, Value: "="}
  49. case '"':
  50. return token{Type: String, Value: l.str()}
  51. default:
  52. if unicode.IsSpace(ch) {
  53. continue
  54. }
  55. if unicode.IsLetter(ch) {
  56. l.backup()
  57. // special handling for NaN and Inf without leading sign
  58. lit := l.literal()
  59. if lit == "NaN" || lit == "Inf" {
  60. return token{Type: Value, Value: lit}
  61. }
  62. return token{Type: Literal, Value: lit}
  63. }
  64. if unicode.IsDigit(ch) || ch == '.' || ch == '+' || ch == '-' {
  65. l.backup()
  66. return token{Type: Value, Value: l.float()}
  67. }
  68. }
  69. }
  70. }
  71. func (l *lexer) comment() string {
  72. var sb strings.Builder
  73. for {
  74. ch, isEof := l.nextChar()
  75. if isEof {
  76. return sb.String()
  77. }
  78. if ch == '\n' {
  79. return sb.String()
  80. }
  81. sb.WriteRune(ch)
  82. }
  83. }
  84. func (l *lexer) str() string {
  85. var sb strings.Builder
  86. for {
  87. r, isEof := l.nextChar()
  88. if isEof {
  89. return sb.String()
  90. }
  91. if r == '\\' {
  92. n, isEof := l.nextChar()
  93. if isEof {
  94. return sb.String()
  95. }
  96. if n == 'n' {
  97. sb.WriteRune('\n')
  98. continue
  99. }
  100. if n == '\\' {
  101. sb.WriteRune('\\')
  102. continue
  103. }
  104. if n == '"' {
  105. sb.WriteRune('"')
  106. continue
  107. }
  108. }
  109. if r != '"' {
  110. sb.WriteRune(r)
  111. } else {
  112. return sb.String()
  113. }
  114. }
  115. }
  116. func (l *lexer) literal() string {
  117. var sb strings.Builder
  118. for {
  119. r, isEof := l.nextChar()
  120. if isEof {
  121. return sb.String()
  122. }
  123. if isAlphaNumericUnderscore(r) {
  124. sb.WriteRune(r)
  125. } else {
  126. l.backup()
  127. return sb.String()
  128. }
  129. }
  130. }
  131. func (l *lexer) float() string {
  132. var sb strings.Builder
  133. for {
  134. r, isEof := l.nextChar()
  135. if isEof {
  136. return sb.String()
  137. }
  138. if isOneOf(r, "NaInf+-._eE") || unicode.IsDigit(r) {
  139. //if r == 'N' || r == 'a' || r == 'I' || r == 'n' || r == 'f' || r == '+' || r == '-' || r == '.' || r == 'e' || r == 'E' || r == '_' || unicode.IsDigit(r) {
  140. sb.WriteRune(r)
  141. } else {
  142. return sb.String()
  143. }
  144. }
  145. }
  146. func isOneOf(ch rune, chars string) bool {
  147. for _, c := range chars {
  148. if c == ch {
  149. return true
  150. }
  151. }
  152. return false
  153. }
  154. func isAlphaNumeric(ch rune) bool {
  155. return unicode.IsLetter(ch) || unicode.IsDigit(ch)
  156. }
  157. func isAlphaNumericUnderscore(ch rune) bool {
  158. return isAlphaNumeric(ch) || ch == '_'
  159. }