unquote.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. package logfmt
  2. import (
  3. "strconv"
  4. "unicode"
  5. "unicode/utf16"
  6. "unicode/utf8"
  7. )
  8. // Taken from Go's encoding/json
  9. // Copyright 2010 The Go Authors. All rights reserved.
  10. // Use of this source code is governed by a BSD-style
  11. // license that can be found in the LICENSE file.
  12. // getu4 decodes \uXXXX from the beginning of s, returning the hex value,
  13. // or it returns -1.
  14. func getu4(s []byte) rune {
  15. if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
  16. return -1
  17. }
  18. r, err := strconv.ParseUint(string(s[2:6]), 16, 64)
  19. if err != nil {
  20. return -1
  21. }
  22. return rune(r)
  23. }
  24. // unquote converts a quoted JSON string literal s into an actual string t.
  25. // The rules are different than for Go, so cannot use strconv.Unquote.
  26. func unquote(s []byte) (t string, ok bool) {
  27. s, ok = unquoteBytes(s)
  28. t = string(s)
  29. return
  30. }
  31. func unquoteBytes(s []byte) (t []byte, ok bool) {
  32. if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
  33. return
  34. }
  35. s = s[1 : len(s)-1]
  36. // Check for unusual characters. If there are none,
  37. // then no unquoting is needed, so return a slice of the
  38. // original bytes.
  39. r := 0
  40. for r < len(s) {
  41. c := s[r]
  42. if c == '\\' || c == '"' || c < ' ' {
  43. break
  44. }
  45. if c < utf8.RuneSelf {
  46. r++
  47. continue
  48. }
  49. rr, size := utf8.DecodeRune(s[r:])
  50. if rr == utf8.RuneError && size == 1 {
  51. break
  52. }
  53. r += size
  54. }
  55. if r == len(s) {
  56. return s, true
  57. }
  58. b := make([]byte, len(s)+2*utf8.UTFMax)
  59. w := copy(b, s[0:r])
  60. for r < len(s) {
  61. // Out of room? Can only happen if s is full of
  62. // malformed UTF-8 and we're replacing each
  63. // byte with RuneError.
  64. if w >= len(b)-2*utf8.UTFMax {
  65. nb := make([]byte, (len(b)+utf8.UTFMax)*2)
  66. copy(nb, b[0:w])
  67. b = nb
  68. }
  69. switch c := s[r]; {
  70. case c == '\\':
  71. r++
  72. if r >= len(s) {
  73. return
  74. }
  75. switch s[r] {
  76. default:
  77. return
  78. case '"', '\\', '/', '\'':
  79. b[w] = s[r]
  80. r++
  81. w++
  82. case 'b':
  83. b[w] = '\b'
  84. r++
  85. w++
  86. case 'f':
  87. b[w] = '\f'
  88. r++
  89. w++
  90. case 'n':
  91. b[w] = '\n'
  92. r++
  93. w++
  94. case 'r':
  95. b[w] = '\r'
  96. r++
  97. w++
  98. case 't':
  99. b[w] = '\t'
  100. r++
  101. w++
  102. case 'u':
  103. r--
  104. rr := getu4(s[r:])
  105. if rr < 0 {
  106. return
  107. }
  108. r += 6
  109. if utf16.IsSurrogate(rr) {
  110. rr1 := getu4(s[r:])
  111. if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
  112. // A valid pair; consume.
  113. r += 6
  114. w += utf8.EncodeRune(b[w:], dec)
  115. break
  116. }
  117. // Invalid surrogate; fall back to replacement rune.
  118. rr = unicode.ReplacementChar
  119. }
  120. w += utf8.EncodeRune(b[w:], rr)
  121. }
  122. // Quote, control characters are invalid.
  123. case c == '"', c < ' ':
  124. return
  125. // ASCII
  126. case c < utf8.RuneSelf:
  127. b[w] = c
  128. r++
  129. w++
  130. // Coerce to well-formed UTF-8.
  131. default:
  132. rr, size := utf8.DecodeRune(s[r:])
  133. r += size
  134. w += utf8.EncodeRune(b[w:], rr)
  135. }
  136. }
  137. return b[0:w], true
  138. }