lexer.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. package pattern
  2. import (
  3. "fmt"
  4. "go/token"
  5. "unicode"
  6. "unicode/utf8"
  7. )
  8. type lexer struct {
  9. f *token.File
  10. input string
  11. start int
  12. pos int
  13. width int
  14. items chan item
  15. }
  16. type itemType int
  17. const eof = -1
  18. const (
  19. itemError itemType = iota
  20. itemLeftParen
  21. itemRightParen
  22. itemLeftBracket
  23. itemRightBracket
  24. itemTypeName
  25. itemVariable
  26. itemAt
  27. itemColon
  28. itemBlank
  29. itemString
  30. itemEOF
  31. )
  32. func (typ itemType) String() string {
  33. switch typ {
  34. case itemError:
  35. return "ERROR"
  36. case itemLeftParen:
  37. return "("
  38. case itemRightParen:
  39. return ")"
  40. case itemLeftBracket:
  41. return "["
  42. case itemRightBracket:
  43. return "]"
  44. case itemTypeName:
  45. return "TYPE"
  46. case itemVariable:
  47. return "VAR"
  48. case itemAt:
  49. return "@"
  50. case itemColon:
  51. return ":"
  52. case itemBlank:
  53. return "_"
  54. case itemString:
  55. return "STRING"
  56. case itemEOF:
  57. return "EOF"
  58. default:
  59. return fmt.Sprintf("itemType(%d)", typ)
  60. }
  61. }
  62. type item struct {
  63. typ itemType
  64. val string
  65. pos int
  66. }
  67. type stateFn func(*lexer) stateFn
  68. func (l *lexer) run() {
  69. for state := lexStart; state != nil; {
  70. state = state(l)
  71. }
  72. close(l.items)
  73. }
  74. func (l *lexer) emitValue(t itemType, value string) {
  75. l.items <- item{t, value, l.start}
  76. l.start = l.pos
  77. }
  78. func (l *lexer) emit(t itemType) {
  79. l.items <- item{t, l.input[l.start:l.pos], l.start}
  80. l.start = l.pos
  81. }
  82. func lexStart(l *lexer) stateFn {
  83. switch r := l.next(); {
  84. case r == eof:
  85. l.emit(itemEOF)
  86. return nil
  87. case unicode.IsSpace(r):
  88. l.ignore()
  89. case r == '(':
  90. l.emit(itemLeftParen)
  91. case r == ')':
  92. l.emit(itemRightParen)
  93. case r == '[':
  94. l.emit(itemLeftBracket)
  95. case r == ']':
  96. l.emit(itemRightBracket)
  97. case r == '@':
  98. l.emit(itemAt)
  99. case r == ':':
  100. l.emit(itemColon)
  101. case r == '_':
  102. l.emit(itemBlank)
  103. case r == '"':
  104. l.backup()
  105. return lexString
  106. case unicode.IsUpper(r):
  107. l.backup()
  108. return lexType
  109. case unicode.IsLower(r):
  110. l.backup()
  111. return lexVariable
  112. default:
  113. return l.errorf("unexpected character %c", r)
  114. }
  115. return lexStart
  116. }
  117. func (l *lexer) next() (r rune) {
  118. if l.pos >= len(l.input) {
  119. l.width = 0
  120. return eof
  121. }
  122. r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
  123. if r == '\n' {
  124. l.f.AddLine(l.pos)
  125. }
  126. l.pos += l.width
  127. return r
  128. }
  129. func (l *lexer) ignore() {
  130. l.start = l.pos
  131. }
  132. func (l *lexer) backup() {
  133. l.pos -= l.width
  134. }
  135. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  136. // TODO(dh): emit position information in errors
  137. l.items <- item{
  138. itemError,
  139. fmt.Sprintf(format, args...),
  140. l.start,
  141. }
  142. return nil
  143. }
  144. func isAlphaNumeric(r rune) bool {
  145. return r >= '0' && r <= '9' ||
  146. r >= 'a' && r <= 'z' ||
  147. r >= 'A' && r <= 'Z'
  148. }
  149. func lexString(l *lexer) stateFn {
  150. l.next() // skip quote
  151. escape := false
  152. var runes []rune
  153. for {
  154. switch r := l.next(); r {
  155. case eof:
  156. return l.errorf("unterminated string")
  157. case '"':
  158. if !escape {
  159. l.emitValue(itemString, string(runes))
  160. return lexStart
  161. } else {
  162. runes = append(runes, '"')
  163. escape = false
  164. }
  165. case '\\':
  166. if escape {
  167. runes = append(runes, '\\')
  168. escape = false
  169. } else {
  170. escape = true
  171. }
  172. default:
  173. runes = append(runes, r)
  174. }
  175. }
  176. }
  177. func lexType(l *lexer) stateFn {
  178. l.next()
  179. for {
  180. if !isAlphaNumeric(l.next()) {
  181. l.backup()
  182. l.emit(itemTypeName)
  183. return lexStart
  184. }
  185. }
  186. }
  187. func lexVariable(l *lexer) stateFn {
  188. l.next()
  189. for {
  190. if !isAlphaNumeric(l.next()) {
  191. l.backup()
  192. l.emit(itemVariable)
  193. return lexStart
  194. }
  195. }
  196. }