decode.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. package logfmt
  2. import (
  3. "bufio"
  4. "bytes"
  5. "fmt"
  6. "io"
  7. "unicode/utf8"
  8. )
  9. // A Decoder reads and decodes logfmt records from an input stream.
  10. type Decoder struct {
  11. pos int
  12. key []byte
  13. value []byte
  14. lineNum int
  15. s *bufio.Scanner
  16. err error
  17. }
  18. // NewDecoder returns a new decoder that reads from r.
  19. //
  20. // The decoder introduces its own buffering and may read data from r beyond
  21. // the logfmt records requested.
  22. func NewDecoder(r io.Reader) *Decoder {
  23. dec := &Decoder{
  24. s: bufio.NewScanner(r),
  25. }
  26. return dec
  27. }
  28. // ScanRecord advances the Decoder to the next record, which can then be
  29. // parsed with the ScanKeyval method. It returns false when decoding stops,
  30. // either by reaching the end of the input or an error. After ScanRecord
  31. // returns false, the Err method will return any error that occurred during
  32. // decoding, except that if it was io.EOF, Err will return nil.
  33. func (dec *Decoder) ScanRecord() bool {
  34. if dec.err != nil {
  35. return false
  36. }
  37. if !dec.s.Scan() {
  38. dec.err = dec.s.Err()
  39. return false
  40. }
  41. dec.lineNum++
  42. dec.pos = 0
  43. return true
  44. }
  45. // ScanKeyval advances the Decoder to the next key/value pair of the current
  46. // record, which can then be retrieved with the Key and Value methods. It
  47. // returns false when decoding stops, either by reaching the end of the
  48. // current record or an error.
  49. func (dec *Decoder) ScanKeyval() bool {
  50. dec.key, dec.value = nil, nil
  51. if dec.err != nil {
  52. return false
  53. }
  54. line := dec.s.Bytes()
  55. // garbage
  56. for p, c := range line[dec.pos:] {
  57. if c > ' ' {
  58. dec.pos += p
  59. goto key
  60. }
  61. }
  62. dec.pos = len(line)
  63. return false
  64. key:
  65. const invalidKeyError = "invalid key"
  66. start, multibyte := dec.pos, false
  67. for p, c := range line[dec.pos:] {
  68. switch {
  69. case c == '=':
  70. dec.pos += p
  71. if dec.pos > start {
  72. dec.key = line[start:dec.pos]
  73. if multibyte && bytes.IndexRune(dec.key, utf8.RuneError) != -1 {
  74. dec.syntaxError(invalidKeyError)
  75. return false
  76. }
  77. }
  78. if dec.key == nil {
  79. dec.unexpectedByte(c)
  80. return false
  81. }
  82. goto equal
  83. case c == '"':
  84. dec.pos += p
  85. dec.unexpectedByte(c)
  86. return false
  87. case c <= ' ':
  88. dec.pos += p
  89. if dec.pos > start {
  90. dec.key = line[start:dec.pos]
  91. if multibyte && bytes.IndexRune(dec.key, utf8.RuneError) != -1 {
  92. dec.syntaxError(invalidKeyError)
  93. return false
  94. }
  95. }
  96. return true
  97. case c >= utf8.RuneSelf:
  98. multibyte = true
  99. }
  100. }
  101. dec.pos = len(line)
  102. if dec.pos > start {
  103. dec.key = line[start:dec.pos]
  104. if multibyte && bytes.IndexRune(dec.key, utf8.RuneError) != -1 {
  105. dec.syntaxError(invalidKeyError)
  106. return false
  107. }
  108. }
  109. return true
  110. equal:
  111. dec.pos++
  112. if dec.pos >= len(line) {
  113. return true
  114. }
  115. switch c := line[dec.pos]; {
  116. case c <= ' ':
  117. return true
  118. case c == '"':
  119. goto qvalue
  120. }
  121. // value
  122. start = dec.pos
  123. for p, c := range line[dec.pos:] {
  124. switch {
  125. case c == '=' || c == '"':
  126. dec.pos += p
  127. dec.unexpectedByte(c)
  128. return false
  129. case c <= ' ':
  130. dec.pos += p
  131. if dec.pos > start {
  132. dec.value = line[start:dec.pos]
  133. }
  134. return true
  135. }
  136. }
  137. dec.pos = len(line)
  138. if dec.pos > start {
  139. dec.value = line[start:dec.pos]
  140. }
  141. return true
  142. qvalue:
  143. const (
  144. untermQuote = "unterminated quoted value"
  145. invalidQuote = "invalid quoted value"
  146. )
  147. hasEsc, esc := false, false
  148. start = dec.pos
  149. for p, c := range line[dec.pos+1:] {
  150. switch {
  151. case esc:
  152. esc = false
  153. case c == '\\':
  154. hasEsc, esc = true, true
  155. case c == '"':
  156. dec.pos += p + 2
  157. if hasEsc {
  158. v, ok := unquoteBytes(line[start:dec.pos])
  159. if !ok {
  160. dec.syntaxError(invalidQuote)
  161. return false
  162. }
  163. dec.value = v
  164. } else {
  165. start++
  166. end := dec.pos - 1
  167. if end > start {
  168. dec.value = line[start:end]
  169. }
  170. }
  171. return true
  172. }
  173. }
  174. dec.pos = len(line)
  175. dec.syntaxError(untermQuote)
  176. return false
  177. }
  178. // Key returns the most recent key found by a call to ScanKeyval. The returned
  179. // slice may point to internal buffers and is only valid until the next call
  180. // to ScanRecord. It does no allocation.
  181. func (dec *Decoder) Key() []byte {
  182. return dec.key
  183. }
  184. // Value returns the most recent value found by a call to ScanKeyval. The
  185. // returned slice may point to internal buffers and is only valid until the
  186. // next call to ScanRecord. It does no allocation when the value has no
  187. // escape sequences.
  188. func (dec *Decoder) Value() []byte {
  189. return dec.value
  190. }
  191. // Err returns the first non-EOF error that was encountered by the Scanner.
  192. func (dec *Decoder) Err() error {
  193. return dec.err
  194. }
  195. func (dec *Decoder) syntaxError(msg string) {
  196. dec.err = &SyntaxError{
  197. Msg: msg,
  198. Line: dec.lineNum,
  199. Pos: dec.pos + 1,
  200. }
  201. }
  202. func (dec *Decoder) unexpectedByte(c byte) {
  203. dec.err = &SyntaxError{
  204. Msg: fmt.Sprintf("unexpected %q", c),
  205. Line: dec.lineNum,
  206. Pos: dec.pos + 1,
  207. }
  208. }
  209. // A SyntaxError represents a syntax error in the logfmt input stream.
  210. type SyntaxError struct {
  211. Msg string
  212. Line int
  213. Pos int
  214. }
  215. func (e *SyntaxError) Error() string {
  216. return fmt.Sprintf("logfmt syntax error at pos %d on line %d: %s", e.Pos, e.Line, e.Msg)
  217. }