parse.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739
  1. package toml
  2. import (
  3. "errors"
  4. "fmt"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "unicode/utf8"
  9. "github.com/BurntSushi/toml/internal"
  10. )
  11. type parser struct {
  12. mapping map[string]interface{}
  13. types map[string]tomlType
  14. lx *lexer
  15. ordered []Key // List of keys in the order that they appear in the TOML data.
  16. context Key // Full key for the current hash in scope.
  17. currentKey string // Base key name for everything except hashes.
  18. approxLine int // Rough approximation of line number
  19. implicits map[string]bool // Record implied keys (e.g. 'key.group.names').
  20. }
  21. // ParseError is used when a file can't be parsed: for example invalid integer
  22. // literals, duplicate keys, etc.
  23. type ParseError struct {
  24. Message string
  25. Line int
  26. LastKey string
  27. }
  28. func (pe ParseError) Error() string {
  29. return fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
  30. pe.Line, pe.LastKey, pe.Message)
  31. }
  32. func parse(data string) (p *parser, err error) {
  33. defer func() {
  34. if r := recover(); r != nil {
  35. var ok bool
  36. if err, ok = r.(ParseError); ok {
  37. return
  38. }
  39. panic(r)
  40. }
  41. }()
  42. // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
  43. // which mangles stuff.
  44. if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
  45. data = data[2:]
  46. }
  47. // Examine first few bytes for NULL bytes; this probably means it's a UTF-16
  48. // file (second byte in surrogate pair being NULL). Again, do this here to
  49. // avoid having to deal with UTF-8/16 stuff in the lexer.
  50. ex := 6
  51. if len(data) < 6 {
  52. ex = len(data)
  53. }
  54. if strings.ContainsRune(data[:ex], 0) {
  55. return nil, errors.New("files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8")
  56. }
  57. p = &parser{
  58. mapping: make(map[string]interface{}),
  59. types: make(map[string]tomlType),
  60. lx: lex(data),
  61. ordered: make([]Key, 0),
  62. implicits: make(map[string]bool),
  63. }
  64. for {
  65. item := p.next()
  66. if item.typ == itemEOF {
  67. break
  68. }
  69. p.topLevel(item)
  70. }
  71. return p, nil
  72. }
  73. func (p *parser) panicf(format string, v ...interface{}) {
  74. msg := fmt.Sprintf(format, v...)
  75. panic(ParseError{
  76. Message: msg,
  77. Line: p.approxLine,
  78. LastKey: p.current(),
  79. })
  80. }
  81. func (p *parser) next() item {
  82. it := p.lx.nextItem()
  83. //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.line, it.val)
  84. if it.typ == itemError {
  85. p.panicf("%s", it.val)
  86. }
  87. return it
  88. }
  89. func (p *parser) bug(format string, v ...interface{}) {
  90. panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
  91. }
  92. func (p *parser) expect(typ itemType) item {
  93. it := p.next()
  94. p.assertEqual(typ, it.typ)
  95. return it
  96. }
  97. func (p *parser) assertEqual(expected, got itemType) {
  98. if expected != got {
  99. p.bug("Expected '%s' but got '%s'.", expected, got)
  100. }
  101. }
  102. func (p *parser) topLevel(item item) {
  103. switch item.typ {
  104. case itemCommentStart: // # ..
  105. p.approxLine = item.line
  106. p.expect(itemText)
  107. case itemTableStart: // [ .. ]
  108. name := p.next()
  109. p.approxLine = name.line
  110. var key Key
  111. for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
  112. key = append(key, p.keyString(name))
  113. }
  114. p.assertEqual(itemTableEnd, name.typ)
  115. p.addContext(key, false)
  116. p.setType("", tomlHash)
  117. p.ordered = append(p.ordered, key)
  118. case itemArrayTableStart: // [[ .. ]]
  119. name := p.next()
  120. p.approxLine = name.line
  121. var key Key
  122. for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
  123. key = append(key, p.keyString(name))
  124. }
  125. p.assertEqual(itemArrayTableEnd, name.typ)
  126. p.addContext(key, true)
  127. p.setType("", tomlArrayHash)
  128. p.ordered = append(p.ordered, key)
  129. case itemKeyStart: // key = ..
  130. outerContext := p.context
  131. /// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
  132. k := p.next()
  133. p.approxLine = k.line
  134. var key Key
  135. for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
  136. key = append(key, p.keyString(k))
  137. }
  138. p.assertEqual(itemKeyEnd, k.typ)
  139. /// The current key is the last part.
  140. p.currentKey = key[len(key)-1]
  141. /// All the other parts (if any) are the context; need to set each part
  142. /// as implicit.
  143. context := key[:len(key)-1]
  144. for i := range context {
  145. p.addImplicitContext(append(p.context, context[i:i+1]...))
  146. }
  147. /// Set value.
  148. val, typ := p.value(p.next(), false)
  149. p.set(p.currentKey, val, typ)
  150. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  151. /// Remove the context we added (preserving any context from [tbl] lines).
  152. p.context = outerContext
  153. p.currentKey = ""
  154. default:
  155. p.bug("Unexpected type at top level: %s", item.typ)
  156. }
  157. }
  158. // Gets a string for a key (or part of a key in a table name).
  159. func (p *parser) keyString(it item) string {
  160. switch it.typ {
  161. case itemText:
  162. return it.val
  163. case itemString, itemMultilineString,
  164. itemRawString, itemRawMultilineString:
  165. s, _ := p.value(it, false)
  166. return s.(string)
  167. default:
  168. p.bug("Unexpected key type: %s", it.typ)
  169. }
  170. panic("unreachable")
  171. }
  172. var datetimeRepl = strings.NewReplacer(
  173. "z", "Z",
  174. "t", "T",
  175. " ", "T")
  176. // value translates an expected value from the lexer into a Go value wrapped
  177. // as an empty interface.
  178. func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
  179. switch it.typ {
  180. case itemString:
  181. return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
  182. case itemMultilineString:
  183. return p.replaceEscapes(stripFirstNewline(stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
  184. case itemRawString:
  185. return it.val, p.typeOfPrimitive(it)
  186. case itemRawMultilineString:
  187. return stripFirstNewline(it.val), p.typeOfPrimitive(it)
  188. case itemInteger:
  189. return p.valueInteger(it)
  190. case itemFloat:
  191. return p.valueFloat(it)
  192. case itemBool:
  193. switch it.val {
  194. case "true":
  195. return true, p.typeOfPrimitive(it)
  196. case "false":
  197. return false, p.typeOfPrimitive(it)
  198. default:
  199. p.bug("Expected boolean value, but got '%s'.", it.val)
  200. }
  201. case itemDatetime:
  202. return p.valueDatetime(it)
  203. case itemArray:
  204. return p.valueArray(it)
  205. case itemInlineTableStart:
  206. return p.valueInlineTable(it, parentIsArray)
  207. default:
  208. p.bug("Unexpected value type: %s", it.typ)
  209. }
  210. panic("unreachable")
  211. }
  212. func (p *parser) valueInteger(it item) (interface{}, tomlType) {
  213. if !numUnderscoresOK(it.val) {
  214. p.panicf("Invalid integer %q: underscores must be surrounded by digits", it.val)
  215. }
  216. if numHasLeadingZero(it.val) {
  217. p.panicf("Invalid integer %q: cannot have leading zeroes", it.val)
  218. }
  219. num, err := strconv.ParseInt(it.val, 0, 64)
  220. if err != nil {
  221. // Distinguish integer values. Normally, it'd be a bug if the lexer
  222. // provides an invalid integer, but it's possible that the number is
  223. // out of range of valid values (which the lexer cannot determine).
  224. // So mark the former as a bug but the latter as a legitimate user
  225. // error.
  226. if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
  227. p.panicf("Integer '%s' is out of the range of 64-bit signed integers.", it.val)
  228. } else {
  229. p.bug("Expected integer value, but got '%s'.", it.val)
  230. }
  231. }
  232. return num, p.typeOfPrimitive(it)
  233. }
  234. func (p *parser) valueFloat(it item) (interface{}, tomlType) {
  235. parts := strings.FieldsFunc(it.val, func(r rune) bool {
  236. switch r {
  237. case '.', 'e', 'E':
  238. return true
  239. }
  240. return false
  241. })
  242. for _, part := range parts {
  243. if !numUnderscoresOK(part) {
  244. p.panicf("Invalid float %q: underscores must be surrounded by digits", it.val)
  245. }
  246. }
  247. if len(parts) > 0 && numHasLeadingZero(parts[0]) {
  248. p.panicf("Invalid float %q: cannot have leading zeroes", it.val)
  249. }
  250. if !numPeriodsOK(it.val) {
  251. // As a special case, numbers like '123.' or '1.e2',
  252. // which are valid as far as Go/strconv are concerned,
  253. // must be rejected because TOML says that a fractional
  254. // part consists of '.' followed by 1+ digits.
  255. p.panicf("Invalid float %q: '.' must be followed by one or more digits", it.val)
  256. }
  257. val := strings.Replace(it.val, "_", "", -1)
  258. if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
  259. val = "nan"
  260. }
  261. num, err := strconv.ParseFloat(val, 64)
  262. if err != nil {
  263. if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
  264. p.panicf("Float '%s' is out of the range of 64-bit IEEE-754 floating-point numbers.", it.val)
  265. } else {
  266. p.panicf("Invalid float value: %q", it.val)
  267. }
  268. }
  269. return num, p.typeOfPrimitive(it)
  270. }
  271. var dtTypes = []struct {
  272. fmt string
  273. zone *time.Location
  274. }{
  275. {time.RFC3339Nano, time.Local},
  276. {"2006-01-02T15:04:05.999999999", internal.LocalDatetime},
  277. {"2006-01-02", internal.LocalDate},
  278. {"15:04:05.999999999", internal.LocalTime},
  279. }
  280. func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
  281. it.val = datetimeRepl.Replace(it.val)
  282. var (
  283. t time.Time
  284. ok bool
  285. err error
  286. )
  287. for _, dt := range dtTypes {
  288. t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
  289. if err == nil {
  290. ok = true
  291. break
  292. }
  293. }
  294. if !ok {
  295. p.panicf("Invalid TOML Datetime: %q.", it.val)
  296. }
  297. return t, p.typeOfPrimitive(it)
  298. }
  299. func (p *parser) valueArray(it item) (interface{}, tomlType) {
  300. p.setType(p.currentKey, tomlArray)
  301. // p.setType(p.currentKey, typ)
  302. var (
  303. array []interface{}
  304. types []tomlType
  305. )
  306. for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
  307. if it.typ == itemCommentStart {
  308. p.expect(itemText)
  309. continue
  310. }
  311. val, typ := p.value(it, true)
  312. array = append(array, val)
  313. types = append(types, typ)
  314. }
  315. return array, tomlArray
  316. }
  317. func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
  318. var (
  319. hash = make(map[string]interface{})
  320. outerContext = p.context
  321. outerKey = p.currentKey
  322. )
  323. p.context = append(p.context, p.currentKey)
  324. prevContext := p.context
  325. p.currentKey = ""
  326. p.addImplicit(p.context)
  327. p.addContext(p.context, parentIsArray)
  328. /// Loop over all table key/value pairs.
  329. for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
  330. if it.typ == itemCommentStart {
  331. p.expect(itemText)
  332. continue
  333. }
  334. /// Read all key parts.
  335. k := p.next()
  336. p.approxLine = k.line
  337. var key Key
  338. for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
  339. key = append(key, p.keyString(k))
  340. }
  341. p.assertEqual(itemKeyEnd, k.typ)
  342. /// The current key is the last part.
  343. p.currentKey = key[len(key)-1]
  344. /// All the other parts (if any) are the context; need to set each part
  345. /// as implicit.
  346. context := key[:len(key)-1]
  347. for i := range context {
  348. p.addImplicitContext(append(p.context, context[i:i+1]...))
  349. }
  350. /// Set the value.
  351. val, typ := p.value(p.next(), false)
  352. p.set(p.currentKey, val, typ)
  353. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  354. hash[p.currentKey] = val
  355. /// Restore context.
  356. p.context = prevContext
  357. }
  358. p.context = outerContext
  359. p.currentKey = outerKey
  360. return hash, tomlHash
  361. }
  362. // numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
  363. // +/- signs, and base prefixes.
  364. func numHasLeadingZero(s string) bool {
  365. if len(s) > 1 && s[0] == '0' && isDigit(rune(s[1])) { // >1 to allow "0" and isDigit to allow 0x
  366. return true
  367. }
  368. if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
  369. return true
  370. }
  371. return false
  372. }
  373. // numUnderscoresOK checks whether each underscore in s is surrounded by
  374. // characters that are not underscores.
  375. func numUnderscoresOK(s string) bool {
  376. switch s {
  377. case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
  378. return true
  379. }
  380. accept := false
  381. for _, r := range s {
  382. if r == '_' {
  383. if !accept {
  384. return false
  385. }
  386. }
  387. // isHexadecimal is a superset of all the permissable characters
  388. // surrounding an underscore.
  389. accept = isHexadecimal(r)
  390. }
  391. return accept
  392. }
  393. // numPeriodsOK checks whether every period in s is followed by a digit.
  394. func numPeriodsOK(s string) bool {
  395. period := false
  396. for _, r := range s {
  397. if period && !isDigit(r) {
  398. return false
  399. }
  400. period = r == '.'
  401. }
  402. return !period
  403. }
  404. // Set the current context of the parser, where the context is either a hash or
  405. // an array of hashes, depending on the value of the `array` parameter.
  406. //
  407. // Establishing the context also makes sure that the key isn't a duplicate, and
  408. // will create implicit hashes automatically.
  409. func (p *parser) addContext(key Key, array bool) {
  410. var ok bool
  411. // Always start at the top level and drill down for our context.
  412. hashContext := p.mapping
  413. keyContext := make(Key, 0)
  414. // We only need implicit hashes for key[0:-1]
  415. for _, k := range key[0 : len(key)-1] {
  416. _, ok = hashContext[k]
  417. keyContext = append(keyContext, k)
  418. // No key? Make an implicit hash and move on.
  419. if !ok {
  420. p.addImplicit(keyContext)
  421. hashContext[k] = make(map[string]interface{})
  422. }
  423. // If the hash context is actually an array of tables, then set
  424. // the hash context to the last element in that array.
  425. //
  426. // Otherwise, it better be a table, since this MUST be a key group (by
  427. // virtue of it not being the last element in a key).
  428. switch t := hashContext[k].(type) {
  429. case []map[string]interface{}:
  430. hashContext = t[len(t)-1]
  431. case map[string]interface{}:
  432. hashContext = t
  433. default:
  434. p.panicf("Key '%s' was already created as a hash.", keyContext)
  435. }
  436. }
  437. p.context = keyContext
  438. if array {
  439. // If this is the first element for this array, then allocate a new
  440. // list of tables for it.
  441. k := key[len(key)-1]
  442. if _, ok := hashContext[k]; !ok {
  443. hashContext[k] = make([]map[string]interface{}, 0, 4)
  444. }
  445. // Add a new table. But make sure the key hasn't already been used
  446. // for something else.
  447. if hash, ok := hashContext[k].([]map[string]interface{}); ok {
  448. hashContext[k] = append(hash, make(map[string]interface{}))
  449. } else {
  450. p.panicf("Key '%s' was already created and cannot be used as an array.", keyContext)
  451. }
  452. } else {
  453. p.setValue(key[len(key)-1], make(map[string]interface{}))
  454. }
  455. p.context = append(p.context, key[len(key)-1])
  456. }
  457. // set calls setValue and setType.
  458. func (p *parser) set(key string, val interface{}, typ tomlType) {
  459. p.setValue(p.currentKey, val)
  460. p.setType(p.currentKey, typ)
  461. }
  462. // setValue sets the given key to the given value in the current context.
  463. // It will make sure that the key hasn't already been defined, account for
  464. // implicit key groups.
  465. func (p *parser) setValue(key string, value interface{}) {
  466. var (
  467. tmpHash interface{}
  468. ok bool
  469. hash = p.mapping
  470. keyContext Key
  471. )
  472. for _, k := range p.context {
  473. keyContext = append(keyContext, k)
  474. if tmpHash, ok = hash[k]; !ok {
  475. p.bug("Context for key '%s' has not been established.", keyContext)
  476. }
  477. switch t := tmpHash.(type) {
  478. case []map[string]interface{}:
  479. // The context is a table of hashes. Pick the most recent table
  480. // defined as the current hash.
  481. hash = t[len(t)-1]
  482. case map[string]interface{}:
  483. hash = t
  484. default:
  485. p.panicf("Key '%s' has already been defined.", keyContext)
  486. }
  487. }
  488. keyContext = append(keyContext, key)
  489. if _, ok := hash[key]; ok {
  490. // Normally redefining keys isn't allowed, but the key could have been
  491. // defined implicitly and it's allowed to be redefined concretely. (See
  492. // the `valid/implicit-and-explicit-after.toml` in toml-test)
  493. //
  494. // But we have to make sure to stop marking it as an implicit. (So that
  495. // another redefinition provokes an error.)
  496. //
  497. // Note that since it has already been defined (as a hash), we don't
  498. // want to overwrite it. So our business is done.
  499. if p.isArray(keyContext) {
  500. p.removeImplicit(keyContext)
  501. hash[key] = value
  502. return
  503. }
  504. if p.isImplicit(keyContext) {
  505. p.removeImplicit(keyContext)
  506. return
  507. }
  508. // Otherwise, we have a concrete key trying to override a previous
  509. // key, which is *always* wrong.
  510. p.panicf("Key '%s' has already been defined.", keyContext)
  511. }
  512. hash[key] = value
  513. }
  514. // setType sets the type of a particular value at a given key.
  515. // It should be called immediately AFTER setValue.
  516. //
  517. // Note that if `key` is empty, then the type given will be applied to the
  518. // current context (which is either a table or an array of tables).
  519. func (p *parser) setType(key string, typ tomlType) {
  520. keyContext := make(Key, 0, len(p.context)+1)
  521. for _, k := range p.context {
  522. keyContext = append(keyContext, k)
  523. }
  524. if len(key) > 0 { // allow type setting for hashes
  525. keyContext = append(keyContext, key)
  526. }
  527. p.types[keyContext.String()] = typ
  528. }
  529. // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
  530. // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
  531. func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = true }
  532. func (p *parser) removeImplicit(key Key) { p.implicits[key.String()] = false }
  533. func (p *parser) isImplicit(key Key) bool { return p.implicits[key.String()] }
  534. func (p *parser) isArray(key Key) bool { return p.types[key.String()] == tomlArray }
  535. func (p *parser) addImplicitContext(key Key) {
  536. p.addImplicit(key)
  537. p.addContext(key, false)
  538. }
  539. // current returns the full key name of the current context.
  540. func (p *parser) current() string {
  541. if len(p.currentKey) == 0 {
  542. return p.context.String()
  543. }
  544. if len(p.context) == 0 {
  545. return p.currentKey
  546. }
  547. return fmt.Sprintf("%s.%s", p.context, p.currentKey)
  548. }
  549. func stripFirstNewline(s string) string {
  550. if len(s) > 0 && s[0] == '\n' {
  551. return s[1:]
  552. }
  553. if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
  554. return s[2:]
  555. }
  556. return s
  557. }
  558. // Remove newlines inside triple-quoted strings if a line ends with "\".
  559. func stripEscapedNewlines(s string) string {
  560. split := strings.Split(s, "\n")
  561. if len(split) < 1 {
  562. return s
  563. }
  564. escNL := false // Keep track of the last non-blank line was escaped.
  565. for i, line := range split {
  566. line = strings.TrimRight(line, " \t\r")
  567. if len(line) == 0 || line[len(line)-1] != '\\' {
  568. split[i] = strings.TrimRight(split[i], "\r")
  569. if !escNL && i != len(split)-1 {
  570. split[i] += "\n"
  571. }
  572. continue
  573. }
  574. escBS := true
  575. for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
  576. escBS = !escBS
  577. }
  578. if escNL {
  579. line = strings.TrimLeft(line, " \t\r")
  580. }
  581. escNL = !escBS
  582. if escBS {
  583. split[i] += "\n"
  584. continue
  585. }
  586. split[i] = line[:len(line)-1] // Remove \
  587. if len(split)-1 > i {
  588. split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
  589. }
  590. }
  591. return strings.Join(split, "")
  592. }
  593. func (p *parser) replaceEscapes(str string) string {
  594. var replaced []rune
  595. s := []byte(str)
  596. r := 0
  597. for r < len(s) {
  598. if s[r] != '\\' {
  599. c, size := utf8.DecodeRune(s[r:])
  600. r += size
  601. replaced = append(replaced, c)
  602. continue
  603. }
  604. r += 1
  605. if r >= len(s) {
  606. p.bug("Escape sequence at end of string.")
  607. return ""
  608. }
  609. switch s[r] {
  610. default:
  611. p.bug("Expected valid escape code after \\, but got %q.", s[r])
  612. return ""
  613. case ' ', '\t':
  614. p.panicf("invalid escape: '\\%c'", s[r])
  615. return ""
  616. case 'b':
  617. replaced = append(replaced, rune(0x0008))
  618. r += 1
  619. case 't':
  620. replaced = append(replaced, rune(0x0009))
  621. r += 1
  622. case 'n':
  623. replaced = append(replaced, rune(0x000A))
  624. r += 1
  625. case 'f':
  626. replaced = append(replaced, rune(0x000C))
  627. r += 1
  628. case 'r':
  629. replaced = append(replaced, rune(0x000D))
  630. r += 1
  631. case '"':
  632. replaced = append(replaced, rune(0x0022))
  633. r += 1
  634. case '\\':
  635. replaced = append(replaced, rune(0x005C))
  636. r += 1
  637. case 'u':
  638. // At this point, we know we have a Unicode escape of the form
  639. // `uXXXX` at [r, r+5). (Because the lexer guarantees this
  640. // for us.)
  641. escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
  642. replaced = append(replaced, escaped)
  643. r += 5
  644. case 'U':
  645. // At this point, we know we have a Unicode escape of the form
  646. // `uXXXX` at [r, r+9). (Because the lexer guarantees this
  647. // for us.)
  648. escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
  649. replaced = append(replaced, escaped)
  650. r += 9
  651. }
  652. }
  653. return string(replaced)
  654. }
  655. func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
  656. s := string(bs)
  657. hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
  658. if err != nil {
  659. p.bug("Could not parse '%s' as a hexadecimal number, but the "+
  660. "lexer claims it's OK: %s", s, err)
  661. }
  662. if !utf8.ValidRune(rune(hex)) {
  663. p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
  664. }
  665. return rune(hex)
  666. }