| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225 |
- package toml
- import (
- "fmt"
- "reflect"
- "runtime"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- type itemType int
- const (
- itemError itemType = iota
- itemNIL // used in the parser to indicate no type
- itemEOF
- itemText
- itemString
- itemRawString
- itemMultilineString
- itemRawMultilineString
- itemBool
- itemInteger
- itemFloat
- itemDatetime
- itemArray // the start of an array
- itemArrayEnd
- itemTableStart
- itemTableEnd
- itemArrayTableStart
- itemArrayTableEnd
- itemKeyStart
- itemKeyEnd
- itemCommentStart
- itemInlineTableStart
- itemInlineTableEnd
- )
- const (
- eof = 0
- comma = ','
- tableStart = '['
- tableEnd = ']'
- arrayTableStart = '['
- arrayTableEnd = ']'
- tableSep = '.'
- keySep = '='
- arrayStart = '['
- arrayEnd = ']'
- commentStart = '#'
- stringStart = '"'
- stringEnd = '"'
- rawStringStart = '\''
- rawStringEnd = '\''
- inlineTableStart = '{'
- inlineTableEnd = '}'
- )
- type stateFn func(lx *lexer) stateFn
- type lexer struct {
- input string
- start int
- pos int
- line int
- state stateFn
- items chan item
- // Allow for backing up up to four runes.
- // This is necessary because TOML contains 3-rune tokens (""" and ''').
- prevWidths [4]int
- nprev int // how many of prevWidths are in use
- // If we emit an eof, we can still back up, but it is not OK to call
- // next again.
- atEOF bool
- // A stack of state functions used to maintain context.
- // The idea is to reuse parts of the state machine in various places.
- // For example, values can appear at the top level or within arbitrarily
- // nested arrays. The last state on the stack is used after a value has
- // been lexed. Similarly for comments.
- stack []stateFn
- }
- type item struct {
- typ itemType
- val string
- line int
- }
- func (lx *lexer) nextItem() item {
- for {
- select {
- case item := <-lx.items:
- return item
- default:
- lx.state = lx.state(lx)
- //fmt.Printf(" STATE %-24s current: %-10q stack: %s\n", lx.state, lx.current(), lx.stack)
- }
- }
- }
- func lex(input string) *lexer {
- lx := &lexer{
- input: input,
- state: lexTop,
- line: 1,
- items: make(chan item, 10),
- stack: make([]stateFn, 0, 10),
- }
- return lx
- }
- func (lx *lexer) push(state stateFn) {
- lx.stack = append(lx.stack, state)
- }
- func (lx *lexer) pop() stateFn {
- if len(lx.stack) == 0 {
- return lx.errorf("BUG in lexer: no states to pop")
- }
- last := lx.stack[len(lx.stack)-1]
- lx.stack = lx.stack[0 : len(lx.stack)-1]
- return last
- }
- func (lx *lexer) current() string {
- return lx.input[lx.start:lx.pos]
- }
- func (lx *lexer) emit(typ itemType) {
- lx.items <- item{typ, lx.current(), lx.line}
- lx.start = lx.pos
- }
- func (lx *lexer) emitTrim(typ itemType) {
- lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
- lx.start = lx.pos
- }
- func (lx *lexer) next() (r rune) {
- if lx.atEOF {
- panic("BUG in lexer: next called after EOF")
- }
- if lx.pos >= len(lx.input) {
- lx.atEOF = true
- return eof
- }
- if lx.input[lx.pos] == '\n' {
- lx.line++
- }
- lx.prevWidths[3] = lx.prevWidths[2]
- lx.prevWidths[2] = lx.prevWidths[1]
- lx.prevWidths[1] = lx.prevWidths[0]
- if lx.nprev < 4 {
- lx.nprev++
- }
- r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
- if r == utf8.RuneError {
- lx.errorf("invalid UTF-8 byte at position %d (line %d): 0x%02x", lx.pos, lx.line, lx.input[lx.pos])
- return utf8.RuneError
- }
- lx.prevWidths[0] = w
- lx.pos += w
- return r
- }
- // ignore skips over the pending input before this point.
- func (lx *lexer) ignore() {
- lx.start = lx.pos
- }
- // backup steps back one rune. Can be called 4 times between calls to next.
- func (lx *lexer) backup() {
- if lx.atEOF {
- lx.atEOF = false
- return
- }
- if lx.nprev < 1 {
- panic("BUG in lexer: backed up too far")
- }
- w := lx.prevWidths[0]
- lx.prevWidths[0] = lx.prevWidths[1]
- lx.prevWidths[1] = lx.prevWidths[2]
- lx.prevWidths[2] = lx.prevWidths[3]
- lx.nprev--
- lx.pos -= w
- if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
- lx.line--
- }
- }
- // accept consumes the next rune if it's equal to `valid`.
- func (lx *lexer) accept(valid rune) bool {
- if lx.next() == valid {
- return true
- }
- lx.backup()
- return false
- }
- // peek returns but does not consume the next rune in the input.
- func (lx *lexer) peek() rune {
- r := lx.next()
- lx.backup()
- return r
- }
- // skip ignores all input that matches the given predicate.
- func (lx *lexer) skip(pred func(rune) bool) {
- for {
- r := lx.next()
- if pred(r) {
- continue
- }
- lx.backup()
- lx.ignore()
- return
- }
- }
- // errorf stops all lexing by emitting an error and returning `nil`.
- // Note that any value that is a character is escaped if it's a special
- // character (newlines, tabs, etc.).
- func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
- lx.items <- item{
- itemError,
- fmt.Sprintf(format, values...),
- lx.line,
- }
- return nil
- }
- // lexTop consumes elements at the top level of TOML data.
- func lexTop(lx *lexer) stateFn {
- r := lx.next()
- if isWhitespace(r) || isNL(r) {
- return lexSkip(lx, lexTop)
- }
- switch r {
- case commentStart:
- lx.push(lexTop)
- return lexCommentStart
- case tableStart:
- return lexTableStart
- case eof:
- if lx.pos > lx.start {
- return lx.errorf("unexpected EOF")
- }
- lx.emit(itemEOF)
- return nil
- }
- // At this point, the only valid item can be a key, so we back up
- // and let the key lexer do the rest.
- lx.backup()
- lx.push(lexTopEnd)
- return lexKeyStart
- }
- // lexTopEnd is entered whenever a top-level item has been consumed. (A value
- // or a table.) It must see only whitespace, and will turn back to lexTop
- // upon a newline. If it sees EOF, it will quit the lexer successfully.
- func lexTopEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == commentStart:
- // a comment will read to a newline for us.
- lx.push(lexTop)
- return lexCommentStart
- case isWhitespace(r):
- return lexTopEnd
- case isNL(r):
- lx.ignore()
- return lexTop
- case r == eof:
- lx.emit(itemEOF)
- return nil
- }
- return lx.errorf(
- "expected a top-level item to end with a newline, comment, or EOF, but got %q instead",
- r)
- }
- // lexTable lexes the beginning of a table. Namely, it makes sure that
- // it starts with a character other than '.' and ']'.
- // It assumes that '[' has already been consumed.
- // It also handles the case that this is an item in an array of tables.
- // e.g., '[[name]]'.
- func lexTableStart(lx *lexer) stateFn {
- if lx.peek() == arrayTableStart {
- lx.next()
- lx.emit(itemArrayTableStart)
- lx.push(lexArrayTableEnd)
- } else {
- lx.emit(itemTableStart)
- lx.push(lexTableEnd)
- }
- return lexTableNameStart
- }
- func lexTableEnd(lx *lexer) stateFn {
- lx.emit(itemTableEnd)
- return lexTopEnd
- }
- func lexArrayTableEnd(lx *lexer) stateFn {
- if r := lx.next(); r != arrayTableEnd {
- return lx.errorf(
- "expected end of table array name delimiter %q, but got %q instead",
- arrayTableEnd, r)
- }
- lx.emit(itemArrayTableEnd)
- return lexTopEnd
- }
- func lexTableNameStart(lx *lexer) stateFn {
- lx.skip(isWhitespace)
- switch r := lx.peek(); {
- case r == tableEnd || r == eof:
- return lx.errorf("unexpected end of table name (table names cannot be empty)")
- case r == tableSep:
- return lx.errorf("unexpected table separator (table names cannot be empty)")
- case r == stringStart || r == rawStringStart:
- lx.ignore()
- lx.push(lexTableNameEnd)
- return lexQuotedName
- default:
- lx.push(lexTableNameEnd)
- return lexBareName
- }
- }
- // lexTableNameEnd reads the end of a piece of a table name, optionally
- // consuming whitespace.
- func lexTableNameEnd(lx *lexer) stateFn {
- lx.skip(isWhitespace)
- switch r := lx.next(); {
- case isWhitespace(r):
- return lexTableNameEnd
- case r == tableSep:
- lx.ignore()
- return lexTableNameStart
- case r == tableEnd:
- return lx.pop()
- default:
- return lx.errorf("expected '.' or ']' to end table name, but got %q instead", r)
- }
- }
- // lexBareName lexes one part of a key or table.
- //
- // It assumes that at least one valid character for the table has already been
- // read.
- //
- // Lexes only one part, e.g. only 'a' inside 'a.b'.
- func lexBareName(lx *lexer) stateFn {
- r := lx.next()
- if isBareKeyChar(r) {
- return lexBareName
- }
- lx.backup()
- lx.emit(itemText)
- return lx.pop()
- }
- // lexBareName lexes one part of a key or table.
- //
- // It assumes that at least one valid character for the table has already been
- // read.
- //
- // Lexes only one part, e.g. only '"a"' inside '"a".b'.
- func lexQuotedName(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r):
- return lexSkip(lx, lexValue)
- case r == stringStart:
- lx.ignore() // ignore the '"'
- return lexString
- case r == rawStringStart:
- lx.ignore() // ignore the "'"
- return lexRawString
- case r == eof:
- return lx.errorf("unexpected EOF; expected value")
- default:
- return lx.errorf("expected value but found %q instead", r)
- }
- }
- // lexKeyStart consumes all key parts until a '='.
- func lexKeyStart(lx *lexer) stateFn {
- lx.skip(isWhitespace)
- switch r := lx.peek(); {
- case r == '=' || r == eof:
- return lx.errorf("unexpected '=': key name appears blank")
- case r == '.':
- return lx.errorf("unexpected '.': keys cannot start with a '.'")
- case r == stringStart || r == rawStringStart:
- lx.ignore()
- fallthrough
- default: // Bare key
- lx.emit(itemKeyStart)
- return lexKeyNameStart
- }
- }
- func lexKeyNameStart(lx *lexer) stateFn {
- lx.skip(isWhitespace)
- switch r := lx.peek(); {
- case r == '=' || r == eof:
- return lx.errorf("unexpected '='")
- case r == '.':
- return lx.errorf("unexpected '.'")
- case r == stringStart || r == rawStringStart:
- lx.ignore()
- lx.push(lexKeyEnd)
- return lexQuotedName
- default:
- lx.push(lexKeyEnd)
- return lexBareName
- }
- }
- // lexKeyEnd consumes the end of a key and trims whitespace (up to the key
- // separator).
- func lexKeyEnd(lx *lexer) stateFn {
- lx.skip(isWhitespace)
- switch r := lx.next(); {
- case isWhitespace(r):
- return lexSkip(lx, lexKeyEnd)
- case r == eof:
- return lx.errorf("unexpected EOF; expected key separator %q", keySep)
- case r == '.':
- lx.ignore()
- return lexKeyNameStart
- case r == '=':
- lx.emit(itemKeyEnd)
- return lexSkip(lx, lexValue)
- default:
- return lx.errorf("expected '.' or '=', but got %q instead", r)
- }
- }
- // lexValue starts the consumption of a value anywhere a value is expected.
- // lexValue will ignore whitespace.
- // After a value is lexed, the last state on the next is popped and returned.
- func lexValue(lx *lexer) stateFn {
- // We allow whitespace to precede a value, but NOT newlines.
- // In array syntax, the array states are responsible for ignoring newlines.
- r := lx.next()
- switch {
- case isWhitespace(r):
- return lexSkip(lx, lexValue)
- case isDigit(r):
- lx.backup() // avoid an extra state and use the same as above
- return lexNumberOrDateStart
- }
- switch r {
- case arrayStart:
- lx.ignore()
- lx.emit(itemArray)
- return lexArrayValue
- case inlineTableStart:
- lx.ignore()
- lx.emit(itemInlineTableStart)
- return lexInlineTableValue
- case stringStart:
- if lx.accept(stringStart) {
- if lx.accept(stringStart) {
- lx.ignore() // Ignore """
- return lexMultilineString
- }
- lx.backup()
- }
- lx.ignore() // ignore the '"'
- return lexString
- case rawStringStart:
- if lx.accept(rawStringStart) {
- if lx.accept(rawStringStart) {
- lx.ignore() // Ignore """
- return lexMultilineRawString
- }
- lx.backup()
- }
- lx.ignore() // ignore the "'"
- return lexRawString
- case '.': // special error case, be kind to users
- return lx.errorf("floats must start with a digit, not '.'")
- case 'i', 'n':
- if (lx.accept('n') && lx.accept('f')) || (lx.accept('a') && lx.accept('n')) {
- lx.emit(itemFloat)
- return lx.pop()
- }
- case '-', '+':
- return lexDecimalNumberStart
- }
- if unicode.IsLetter(r) {
- // Be permissive here; lexBool will give a nice error if the
- // user wrote something like
- // x = foo
- // (i.e. not 'true' or 'false' but is something else word-like.)
- lx.backup()
- return lexBool
- }
- if r == eof {
- return lx.errorf("unexpected EOF; expected value")
- }
- return lx.errorf("expected value but found %q instead", r)
- }
- // lexArrayValue consumes one value in an array. It assumes that '[' or ','
- // have already been consumed. All whitespace and newlines are ignored.
- func lexArrayValue(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r) || isNL(r):
- return lexSkip(lx, lexArrayValue)
- case r == commentStart:
- lx.push(lexArrayValue)
- return lexCommentStart
- case r == comma:
- return lx.errorf("unexpected comma")
- case r == arrayEnd:
- // NOTE(caleb): The spec isn't clear about whether you can have
- // a trailing comma or not, so we'll allow it.
- return lexArrayEnd
- }
- lx.backup()
- lx.push(lexArrayValueEnd)
- return lexValue
- }
- // lexArrayValueEnd consumes everything between the end of an array value and
- // the next value (or the end of the array): it ignores whitespace and newlines
- // and expects either a ',' or a ']'.
- func lexArrayValueEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r) || isNL(r):
- return lexSkip(lx, lexArrayValueEnd)
- case r == commentStart:
- lx.push(lexArrayValueEnd)
- return lexCommentStart
- case r == comma:
- lx.ignore()
- return lexArrayValue // move on to the next value
- case r == arrayEnd:
- return lexArrayEnd
- }
- return lx.errorf(
- "expected a comma or array terminator %q, but got %s instead",
- arrayEnd, runeOrEOF(r))
- }
- // lexArrayEnd finishes the lexing of an array.
- // It assumes that a ']' has just been consumed.
- func lexArrayEnd(lx *lexer) stateFn {
- lx.ignore()
- lx.emit(itemArrayEnd)
- return lx.pop()
- }
- // lexInlineTableValue consumes one key/value pair in an inline table.
- // It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
- func lexInlineTableValue(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r):
- return lexSkip(lx, lexInlineTableValue)
- case isNL(r):
- return lx.errorf("newlines not allowed within inline tables")
- case r == commentStart:
- lx.push(lexInlineTableValue)
- return lexCommentStart
- case r == comma:
- return lx.errorf("unexpected comma")
- case r == inlineTableEnd:
- return lexInlineTableEnd
- }
- lx.backup()
- lx.push(lexInlineTableValueEnd)
- return lexKeyStart
- }
- // lexInlineTableValueEnd consumes everything between the end of an inline table
- // key/value pair and the next pair (or the end of the table):
- // it ignores whitespace and expects either a ',' or a '}'.
- func lexInlineTableValueEnd(lx *lexer) stateFn {
- switch r := lx.next(); {
- case isWhitespace(r):
- return lexSkip(lx, lexInlineTableValueEnd)
- case isNL(r):
- return lx.errorf("newlines not allowed within inline tables")
- case r == commentStart:
- lx.push(lexInlineTableValueEnd)
- return lexCommentStart
- case r == comma:
- lx.ignore()
- lx.skip(isWhitespace)
- if lx.peek() == '}' {
- return lx.errorf("trailing comma not allowed in inline tables")
- }
- return lexInlineTableValue
- case r == inlineTableEnd:
- return lexInlineTableEnd
- default:
- return lx.errorf(
- "expected a comma or an inline table terminator %q, but got %s instead",
- inlineTableEnd, runeOrEOF(r))
- }
- }
- func runeOrEOF(r rune) string {
- if r == eof {
- return "end of file"
- }
- return "'" + string(r) + "'"
- }
- // lexInlineTableEnd finishes the lexing of an inline table.
- // It assumes that a '}' has just been consumed.
- func lexInlineTableEnd(lx *lexer) stateFn {
- lx.ignore()
- lx.emit(itemInlineTableEnd)
- return lx.pop()
- }
- // lexString consumes the inner contents of a string. It assumes that the
- // beginning '"' has already been consumed and ignored.
- func lexString(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == eof:
- return lx.errorf(`unexpected EOF; expected '"'`)
- case isControl(r) || r == '\r':
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- case isNL(r):
- return lx.errorf("strings cannot contain newlines")
- case r == '\\':
- lx.push(lexString)
- return lexStringEscape
- case r == stringEnd:
- lx.backup()
- lx.emit(itemString)
- lx.next()
- lx.ignore()
- return lx.pop()
- }
- return lexString
- }
- // lexMultilineString consumes the inner contents of a string. It assumes that
- // the beginning '"""' has already been consumed and ignored.
- func lexMultilineString(lx *lexer) stateFn {
- r := lx.next()
- switch r {
- case eof:
- return lx.errorf(`unexpected EOF; expected '"""'`)
- case '\r':
- if lx.peek() != '\n' {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineString
- case '\\':
- return lexMultilineStringEscape
- case stringEnd:
- /// Found " → try to read two more "".
- if lx.accept(stringEnd) {
- if lx.accept(stringEnd) {
- /// Peek ahead: the string can contain " and "", including at the
- /// end: """str"""""
- /// 6 or more at the end, however, is an error.
- if lx.peek() == stringEnd {
- /// Check if we already lexed 5 's; if so we have 6 now, and
- /// that's just too many man!
- if strings.HasSuffix(lx.current(), `"""""`) {
- return lx.errorf(`unexpected '""""""'`)
- }
- lx.backup()
- lx.backup()
- return lexMultilineString
- }
- lx.backup() /// backup: don't include the """ in the item.
- lx.backup()
- lx.backup()
- lx.emit(itemMultilineString)
- lx.next() /// Read over ''' again and discard it.
- lx.next()
- lx.next()
- lx.ignore()
- return lx.pop()
- }
- lx.backup()
- }
- }
- if isControl(r) {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineString
- }
- // lexRawString consumes a raw string. Nothing can be escaped in such a string.
- // It assumes that the beginning "'" has already been consumed and ignored.
- func lexRawString(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == eof:
- return lx.errorf(`unexpected EOF; expected "'"`)
- case isControl(r) || r == '\r':
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- case isNL(r):
- return lx.errorf("strings cannot contain newlines")
- case r == rawStringEnd:
- lx.backup()
- lx.emit(itemRawString)
- lx.next()
- lx.ignore()
- return lx.pop()
- }
- return lexRawString
- }
- // lexMultilineRawString consumes a raw string. Nothing can be escaped in such
- // a string. It assumes that the beginning "'''" has already been consumed and
- // ignored.
- func lexMultilineRawString(lx *lexer) stateFn {
- r := lx.next()
- switch r {
- case eof:
- return lx.errorf(`unexpected EOF; expected "'''"`)
- case '\r':
- if lx.peek() != '\n' {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineRawString
- case rawStringEnd:
- /// Found ' → try to read two more ''.
- if lx.accept(rawStringEnd) {
- if lx.accept(rawStringEnd) {
- /// Peek ahead: the string can contain ' and '', including at the
- /// end: '''str'''''
- /// 6 or more at the end, however, is an error.
- if lx.peek() == rawStringEnd {
- /// Check if we already lexed 5 's; if so we have 6 now, and
- /// that's just too many man!
- if strings.HasSuffix(lx.current(), "'''''") {
- return lx.errorf(`unexpected "''''''"`)
- }
- lx.backup()
- lx.backup()
- return lexMultilineRawString
- }
- lx.backup() /// backup: don't include the ''' in the item.
- lx.backup()
- lx.backup()
- lx.emit(itemRawMultilineString)
- lx.next() /// Read over ''' again and discard it.
- lx.next()
- lx.next()
- lx.ignore()
- return lx.pop()
- }
- lx.backup()
- }
- }
- if isControl(r) {
- return lx.errorf("control characters are not allowed inside strings: '0x%02x'", r)
- }
- return lexMultilineRawString
- }
- // lexMultilineStringEscape consumes an escaped character. It assumes that the
- // preceding '\\' has already been consumed.
- func lexMultilineStringEscape(lx *lexer) stateFn {
- // Handle the special case first:
- if isNL(lx.next()) {
- return lexMultilineString
- }
- lx.backup()
- lx.push(lexMultilineString)
- return lexStringEscape(lx)
- }
- func lexStringEscape(lx *lexer) stateFn {
- r := lx.next()
- switch r {
- case 'b':
- fallthrough
- case 't':
- fallthrough
- case 'n':
- fallthrough
- case 'f':
- fallthrough
- case 'r':
- fallthrough
- case '"':
- fallthrough
- case ' ', '\t':
- // Inside """ .. """ strings you can use \ to escape newlines, and any
- // amount of whitespace can be between the \ and \n.
- fallthrough
- case '\\':
- return lx.pop()
- case 'u':
- return lexShortUnicodeEscape
- case 'U':
- return lexLongUnicodeEscape
- }
- return lx.errorf("invalid escape character %q; only the following escape characters are allowed: "+
- `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
- }
- func lexShortUnicodeEscape(lx *lexer) stateFn {
- var r rune
- for i := 0; i < 4; i++ {
- r = lx.next()
- if !isHexadecimal(r) {
- return lx.errorf(
- `expected four hexadecimal digits after '\u', but got %q instead`,
- lx.current())
- }
- }
- return lx.pop()
- }
- func lexLongUnicodeEscape(lx *lexer) stateFn {
- var r rune
- for i := 0; i < 8; i++ {
- r = lx.next()
- if !isHexadecimal(r) {
- return lx.errorf(
- `expected eight hexadecimal digits after '\U', but got %q instead`,
- lx.current())
- }
- }
- return lx.pop()
- }
- // lexNumberOrDateStart processes the first character of a value which begins
- // with a digit. It exists to catch values starting with '0', so that
- // lexBaseNumberOrDate can differentiate base prefixed integers from other
- // types.
- func lexNumberOrDateStart(lx *lexer) stateFn {
- r := lx.next()
- switch r {
- case '0':
- return lexBaseNumberOrDate
- }
- if !isDigit(r) {
- // The only way to reach this state is if the value starts
- // with a digit, so specifically treat anything else as an
- // error.
- return lx.errorf("expected a digit but got %q", r)
- }
- return lexNumberOrDate
- }
- // lexNumberOrDate consumes either an integer, float or datetime.
- func lexNumberOrDate(lx *lexer) stateFn {
- r := lx.next()
- if isDigit(r) {
- return lexNumberOrDate
- }
- switch r {
- case '-', ':':
- return lexDatetime
- case '_':
- return lexDecimalNumber
- case '.', 'e', 'E':
- return lexFloat
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexDatetime consumes a Datetime, to a first approximation.
- // The parser validates that it matches one of the accepted formats.
- func lexDatetime(lx *lexer) stateFn {
- r := lx.next()
- if isDigit(r) {
- return lexDatetime
- }
- switch r {
- case '-', ':', 'T', 't', ' ', '.', 'Z', 'z', '+':
- return lexDatetime
- }
- lx.backup()
- lx.emitTrim(itemDatetime)
- return lx.pop()
- }
- // lexHexInteger consumes a hexadecimal integer after seeing the '0x' prefix.
- func lexHexInteger(lx *lexer) stateFn {
- r := lx.next()
- if isHexadecimal(r) {
- return lexHexInteger
- }
- switch r {
- case '_':
- return lexHexInteger
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexOctalInteger consumes an octal integer after seeing the '0o' prefix.
- func lexOctalInteger(lx *lexer) stateFn {
- r := lx.next()
- if isOctal(r) {
- return lexOctalInteger
- }
- switch r {
- case '_':
- return lexOctalInteger
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexBinaryInteger consumes a binary integer after seeing the '0b' prefix.
- func lexBinaryInteger(lx *lexer) stateFn {
- r := lx.next()
- if isBinary(r) {
- return lexBinaryInteger
- }
- switch r {
- case '_':
- return lexBinaryInteger
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexDecimalNumber consumes a decimal float or integer.
- func lexDecimalNumber(lx *lexer) stateFn {
- r := lx.next()
- if isDigit(r) {
- return lexDecimalNumber
- }
- switch r {
- case '.', 'e', 'E':
- return lexFloat
- case '_':
- return lexDecimalNumber
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexDecimalNumber consumes the first digit of a number beginning with a sign.
- // It assumes the sign has already been consumed. Values which start with a sign
- // are only allowed to be decimal integers or floats.
- //
- // The special "nan" and "inf" values are also recognized.
- func lexDecimalNumberStart(lx *lexer) stateFn {
- r := lx.next()
- // Special error cases to give users better error messages
- switch r {
- case 'i':
- if !lx.accept('n') || !lx.accept('f') {
- return lx.errorf("invalid float: '%s'", lx.current())
- }
- lx.emit(itemFloat)
- return lx.pop()
- case 'n':
- if !lx.accept('a') || !lx.accept('n') {
- return lx.errorf("invalid float: '%s'", lx.current())
- }
- lx.emit(itemFloat)
- return lx.pop()
- case '0':
- p := lx.peek()
- switch p {
- case 'b', 'o', 'x':
- return lx.errorf("cannot use sign with non-decimal numbers: '%s%c'", lx.current(), p)
- }
- case '.':
- return lx.errorf("floats must start with a digit, not '.'")
- }
- if isDigit(r) {
- return lexDecimalNumber
- }
- return lx.errorf("expected a digit but got %q", r)
- }
- // lexBaseNumberOrDate differentiates between the possible values which
- // start with '0'. It assumes that before reaching this state, the initial '0'
- // has been consumed.
- func lexBaseNumberOrDate(lx *lexer) stateFn {
- r := lx.next()
- // Note: All datetimes start with at least two digits, so we don't
- // handle date characters (':', '-', etc.) here.
- if isDigit(r) {
- return lexNumberOrDate
- }
- switch r {
- case '_':
- // Can only be decimal, because there can't be an underscore
- // between the '0' and the base designator, and dates can't
- // contain underscores.
- return lexDecimalNumber
- case '.', 'e', 'E':
- return lexFloat
- case 'b':
- r = lx.peek()
- if !isBinary(r) {
- lx.errorf("not a binary number: '%s%c'", lx.current(), r)
- }
- return lexBinaryInteger
- case 'o':
- r = lx.peek()
- if !isOctal(r) {
- lx.errorf("not an octal number: '%s%c'", lx.current(), r)
- }
- return lexOctalInteger
- case 'x':
- r = lx.peek()
- if !isHexadecimal(r) {
- lx.errorf("not a hexidecimal number: '%s%c'", lx.current(), r)
- }
- return lexHexInteger
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexFloat consumes the elements of a float. It allows any sequence of
- // float-like characters, so floats emitted by the lexer are only a first
- // approximation and must be validated by the parser.
- func lexFloat(lx *lexer) stateFn {
- r := lx.next()
- if isDigit(r) {
- return lexFloat
- }
- switch r {
- case '_', '.', '-', '+', 'e', 'E':
- return lexFloat
- }
- lx.backup()
- lx.emit(itemFloat)
- return lx.pop()
- }
- // lexBool consumes a bool string: 'true' or 'false.
- func lexBool(lx *lexer) stateFn {
- var rs []rune
- for {
- r := lx.next()
- if !unicode.IsLetter(r) {
- lx.backup()
- break
- }
- rs = append(rs, r)
- }
- s := string(rs)
- switch s {
- case "true", "false":
- lx.emit(itemBool)
- return lx.pop()
- }
- return lx.errorf("expected value but found %q instead", s)
- }
- // lexCommentStart begins the lexing of a comment. It will emit
- // itemCommentStart and consume no characters, passing control to lexComment.
- func lexCommentStart(lx *lexer) stateFn {
- lx.ignore()
- lx.emit(itemCommentStart)
- return lexComment
- }
- // lexComment lexes an entire comment. It assumes that '#' has been consumed.
- // It will consume *up to* the first newline character, and pass control
- // back to the last state on the stack.
- func lexComment(lx *lexer) stateFn {
- switch r := lx.next(); {
- case isNL(r) || r == eof:
- lx.backup()
- lx.emit(itemText)
- return lx.pop()
- case isControl(r):
- return lx.errorf("control characters are not allowed inside comments: '0x%02x'", r)
- default:
- return lexComment
- }
- }
- // lexSkip ignores all slurped input and moves on to the next state.
- func lexSkip(lx *lexer, nextState stateFn) stateFn {
- lx.ignore()
- return nextState
- }
- // isWhitespace returns true if `r` is a whitespace character according
- // to the spec.
- func isWhitespace(r rune) bool {
- return r == '\t' || r == ' '
- }
- func isNL(r rune) bool {
- return r == '\n' || r == '\r'
- }
- // Control characters except \n, \t
- func isControl(r rune) bool {
- switch r {
- case '\t', '\r', '\n':
- return false
- default:
- return (r >= 0x00 && r <= 0x1f) || r == 0x7f
- }
- }
- func isDigit(r rune) bool {
- return r >= '0' && r <= '9'
- }
- func isHexadecimal(r rune) bool {
- return (r >= '0' && r <= '9') ||
- (r >= 'a' && r <= 'f') ||
- (r >= 'A' && r <= 'F')
- }
- func isOctal(r rune) bool {
- return r >= '0' && r <= '7'
- }
- func isBinary(r rune) bool {
- return r == '0' || r == '1'
- }
- func isBareKeyChar(r rune) bool {
- return (r >= 'A' && r <= 'Z') ||
- (r >= 'a' && r <= 'z') ||
- (r >= '0' && r <= '9') ||
- r == '_' ||
- r == '-'
- }
- func (s stateFn) String() string {
- name := runtime.FuncForPC(reflect.ValueOf(s).Pointer()).Name()
- if i := strings.LastIndexByte(name, '.'); i > -1 {
- name = name[i+1:]
- }
- if s == nil {
- name = "<nil>"
- }
- return name + "()"
- }
- func (itype itemType) String() string {
- switch itype {
- case itemError:
- return "Error"
- case itemNIL:
- return "NIL"
- case itemEOF:
- return "EOF"
- case itemText:
- return "Text"
- case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
- return "String"
- case itemBool:
- return "Bool"
- case itemInteger:
- return "Integer"
- case itemFloat:
- return "Float"
- case itemDatetime:
- return "DateTime"
- case itemTableStart:
- return "TableStart"
- case itemTableEnd:
- return "TableEnd"
- case itemKeyStart:
- return "KeyStart"
- case itemKeyEnd:
- return "KeyEnd"
- case itemArray:
- return "Array"
- case itemArrayEnd:
- return "ArrayEnd"
- case itemCommentStart:
- return "CommentStart"
- case itemInlineTableStart:
- return "InlineTableStart"
- case itemInlineTableEnd:
- return "InlineTableEnd"
- }
- panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
- }
- func (item item) String() string {
- return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
- }
|