text_parse.go 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933
  1. // Copyright 2014 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package expfmt
  14. import (
  15. "bufio"
  16. "bytes"
  17. "errors"
  18. "fmt"
  19. "io"
  20. "math"
  21. "strconv"
  22. "strings"
  23. "unicode/utf8"
  24. dto "github.com/prometheus/client_model/go"
  25. "google.golang.org/protobuf/proto"
  26. "github.com/prometheus/common/model"
  27. )
  28. // A stateFn is a function that represents a state in a state machine. By
  29. // executing it, the state is progressed to the next state. The stateFn returns
  30. // another stateFn, which represents the new state. The end state is represented
  31. // by nil.
  32. type stateFn func() stateFn
  33. // ParseError signals errors while parsing the simple and flat text-based
  34. // exchange format.
  35. type ParseError struct {
  36. Line int
  37. Msg string
  38. }
  39. // Error implements the error interface.
  40. func (e ParseError) Error() string {
  41. return fmt.Sprintf("text format parsing error in line %d: %s", e.Line, e.Msg)
  42. }
  43. // TextParser is used to parse the simple and flat text-based exchange format. Its
  44. // zero value is ready to use.
  45. type TextParser struct {
  46. metricFamiliesByName map[string]*dto.MetricFamily
  47. buf *bufio.Reader // Where the parsed input is read through.
  48. err error // Most recent error.
  49. lineCount int // Tracks the line count for error messages.
  50. currentByte byte // The most recent byte read.
  51. currentToken bytes.Buffer // Re-used each time a token has to be gathered from multiple bytes.
  52. currentMF *dto.MetricFamily
  53. currentMetric *dto.Metric
  54. currentLabelPair *dto.LabelPair
  55. currentLabelPairs []*dto.LabelPair // Temporarily stores label pairs while parsing a metric line.
  56. // The remaining member variables are only used for summaries/histograms.
  57. currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le'
  58. // Summary specific.
  59. summaries map[uint64]*dto.Metric // Key is created with LabelsToSignature.
  60. currentQuantile float64
  61. // Histogram specific.
  62. histograms map[uint64]*dto.Metric // Key is created with LabelsToSignature.
  63. currentBucket float64
  64. // These tell us if the currently processed line ends on '_count' or
  65. // '_sum' respectively and belong to a summary/histogram, representing the sample
  66. // count and sum of that summary/histogram.
  67. currentIsSummaryCount, currentIsSummarySum bool
  68. currentIsHistogramCount, currentIsHistogramSum bool
  69. // These indicate if the metric name from the current line being parsed is inside
  70. // braces and if that metric name was found respectively.
  71. currentMetricIsInsideBraces, currentMetricInsideBracesIsPresent bool
  72. // scheme sets the desired ValidationScheme for names. Defaults to the invalid
  73. // UnsetValidation.
  74. scheme model.ValidationScheme
  75. }
  76. // NewTextParser returns a new TextParser with the provided nameValidationScheme.
  77. func NewTextParser(nameValidationScheme model.ValidationScheme) TextParser {
  78. return TextParser{scheme: nameValidationScheme}
  79. }
  80. // TextToMetricFamilies reads 'in' as the simple and flat text-based exchange
  81. // format and creates MetricFamily proto messages. It returns the MetricFamily
  82. // proto messages in a map where the metric names are the keys, along with any
  83. // error encountered.
  84. //
  85. // If the input contains duplicate metrics (i.e. lines with the same metric name
  86. // and exactly the same label set), the resulting MetricFamily will contain
  87. // duplicate Metric proto messages. Similar is true for duplicate label
  88. // names. Checks for duplicates have to be performed separately, if required.
  89. // Also note that neither the metrics within each MetricFamily are sorted nor
  90. // the label pairs within each Metric. Sorting is not required for the most
  91. // frequent use of this method, which is sample ingestion in the Prometheus
  92. // server. However, for presentation purposes, you might want to sort the
  93. // metrics, and in some cases, you must sort the labels, e.g. for consumption by
  94. // the metric family injection hook of the Prometheus registry.
  95. //
  96. // Summaries and histograms are rather special beasts. You would probably not
  97. // use them in the simple text format anyway. This method can deal with
  98. // summaries and histograms if they are presented in exactly the way the
  99. // text.Create function creates them.
  100. //
  101. // This method must not be called concurrently. If you want to parse different
  102. // input concurrently, instantiate a separate Parser for each goroutine.
  103. func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) {
  104. p.reset(in)
  105. for nextState := p.startOfLine; nextState != nil; nextState = nextState() {
  106. // Magic happens here...
  107. }
  108. // Get rid of empty metric families.
  109. for k, mf := range p.metricFamiliesByName {
  110. if len(mf.GetMetric()) == 0 {
  111. delete(p.metricFamiliesByName, k)
  112. }
  113. }
  114. // If p.err is io.EOF now, we have run into a premature end of the input
  115. // stream. Turn this error into something nicer and more
  116. // meaningful. (io.EOF is often used as a signal for the legitimate end
  117. // of an input stream.)
  118. if p.err != nil && errors.Is(p.err, io.EOF) {
  119. p.parseError("unexpected end of input stream")
  120. }
  121. return p.metricFamiliesByName, p.err
  122. }
  123. func (p *TextParser) reset(in io.Reader) {
  124. p.metricFamiliesByName = map[string]*dto.MetricFamily{}
  125. p.currentLabelPairs = nil
  126. if p.buf == nil {
  127. p.buf = bufio.NewReader(in)
  128. } else {
  129. p.buf.Reset(in)
  130. }
  131. p.err = nil
  132. p.lineCount = 0
  133. if p.summaries == nil || len(p.summaries) > 0 {
  134. p.summaries = map[uint64]*dto.Metric{}
  135. }
  136. if p.histograms == nil || len(p.histograms) > 0 {
  137. p.histograms = map[uint64]*dto.Metric{}
  138. }
  139. p.currentQuantile = math.NaN()
  140. p.currentBucket = math.NaN()
  141. p.currentMF = nil
  142. }
  143. // startOfLine represents the state where the next byte read from p.buf is the
  144. // start of a line (or whitespace leading up to it).
  145. func (p *TextParser) startOfLine() stateFn {
  146. p.lineCount++
  147. p.currentMetricIsInsideBraces = false
  148. p.currentMetricInsideBracesIsPresent = false
  149. if p.skipBlankTab(); p.err != nil {
  150. // This is the only place that we expect to see io.EOF,
  151. // which is not an error but the signal that we are done.
  152. // Any other error that happens to align with the start of
  153. // a line is still an error.
  154. if errors.Is(p.err, io.EOF) {
  155. p.err = nil
  156. }
  157. return nil
  158. }
  159. switch p.currentByte {
  160. case '#':
  161. return p.startComment
  162. case '\n':
  163. return p.startOfLine // Empty line, start the next one.
  164. case '{':
  165. p.currentMetricIsInsideBraces = true
  166. return p.readingLabels
  167. }
  168. return p.readingMetricName
  169. }
  170. // startComment represents the state where the next byte read from p.buf is the
  171. // start of a comment (or whitespace leading up to it).
  172. func (p *TextParser) startComment() stateFn {
  173. if p.skipBlankTab(); p.err != nil {
  174. return nil // Unexpected end of input.
  175. }
  176. if p.currentByte == '\n' {
  177. return p.startOfLine
  178. }
  179. if p.readTokenUntilWhitespace(); p.err != nil {
  180. return nil // Unexpected end of input.
  181. }
  182. // If we have hit the end of line already, there is nothing left
  183. // to do. This is not considered a syntax error.
  184. if p.currentByte == '\n' {
  185. return p.startOfLine
  186. }
  187. keyword := p.currentToken.String()
  188. if keyword != "HELP" && keyword != "TYPE" {
  189. // Generic comment, ignore by fast forwarding to end of line.
  190. for p.currentByte != '\n' {
  191. if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
  192. return nil // Unexpected end of input.
  193. }
  194. }
  195. return p.startOfLine
  196. }
  197. // There is something. Next has to be a metric name.
  198. if p.skipBlankTab(); p.err != nil {
  199. return nil // Unexpected end of input.
  200. }
  201. if p.readTokenAsMetricName(); p.err != nil {
  202. return nil // Unexpected end of input.
  203. }
  204. if p.currentByte == '\n' {
  205. // At the end of the line already.
  206. // Again, this is not considered a syntax error.
  207. return p.startOfLine
  208. }
  209. if !isBlankOrTab(p.currentByte) {
  210. p.parseError("invalid metric name in comment")
  211. return nil
  212. }
  213. p.setOrCreateCurrentMF()
  214. if p.err != nil {
  215. return nil
  216. }
  217. if p.skipBlankTab(); p.err != nil {
  218. return nil // Unexpected end of input.
  219. }
  220. if p.currentByte == '\n' {
  221. // At the end of the line already.
  222. // Again, this is not considered a syntax error.
  223. return p.startOfLine
  224. }
  225. switch keyword {
  226. case "HELP":
  227. return p.readingHelp
  228. case "TYPE":
  229. return p.readingType
  230. }
  231. panic(fmt.Sprintf("code error: unexpected keyword %q", keyword))
  232. }
  233. // readingMetricName represents the state where the last byte read (now in
  234. // p.currentByte) is the first byte of a metric name.
  235. func (p *TextParser) readingMetricName() stateFn {
  236. if p.readTokenAsMetricName(); p.err != nil {
  237. return nil
  238. }
  239. if p.currentToken.Len() == 0 {
  240. p.parseError("invalid metric name")
  241. return nil
  242. }
  243. p.setOrCreateCurrentMF()
  244. if p.err != nil {
  245. return nil
  246. }
  247. // Now is the time to fix the type if it hasn't happened yet.
  248. if p.currentMF.Type == nil {
  249. p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
  250. }
  251. p.currentMetric = &dto.Metric{}
  252. // Do not append the newly created currentMetric to
  253. // currentMF.Metric right now. First wait if this is a summary,
  254. // and the metric exists already, which we can only know after
  255. // having read all the labels.
  256. if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
  257. return nil // Unexpected end of input.
  258. }
  259. return p.readingLabels
  260. }
  261. // readingLabels represents the state where the last byte read (now in
  262. // p.currentByte) is either the first byte of the label set (i.e. a '{'), or the
  263. // first byte of the value (otherwise).
  264. func (p *TextParser) readingLabels() stateFn {
  265. // Summaries/histograms are special. We have to reset the
  266. // currentLabels map, currentQuantile and currentBucket before starting to
  267. // read labels.
  268. if p.currentMF.GetType() == dto.MetricType_SUMMARY || p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
  269. p.currentLabels = map[string]string{}
  270. p.currentLabels[string(model.MetricNameLabel)] = p.currentMF.GetName()
  271. p.currentQuantile = math.NaN()
  272. p.currentBucket = math.NaN()
  273. }
  274. if p.currentByte != '{' {
  275. return p.readingValue
  276. }
  277. return p.startLabelName
  278. }
  279. // startLabelName represents the state where the next byte read from p.buf is
  280. // the start of a label name (or whitespace leading up to it).
  281. func (p *TextParser) startLabelName() stateFn {
  282. if p.skipBlankTab(); p.err != nil {
  283. return nil // Unexpected end of input.
  284. }
  285. if p.currentByte == '}' {
  286. p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
  287. p.currentLabelPairs = nil
  288. if p.skipBlankTab(); p.err != nil {
  289. return nil // Unexpected end of input.
  290. }
  291. return p.readingValue
  292. }
  293. if p.readTokenAsLabelName(); p.err != nil {
  294. return nil // Unexpected end of input.
  295. }
  296. if p.currentToken.Len() == 0 {
  297. p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
  298. return nil
  299. }
  300. if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
  301. return nil // Unexpected end of input.
  302. }
  303. if p.currentByte != '=' {
  304. if p.currentMetricIsInsideBraces {
  305. if p.currentMetricInsideBracesIsPresent {
  306. p.parseError(fmt.Sprintf("multiple metric names for metric %q", p.currentMF.GetName()))
  307. return nil
  308. }
  309. switch p.currentByte {
  310. case ',':
  311. p.setOrCreateCurrentMF()
  312. if p.err != nil {
  313. return nil
  314. }
  315. if p.currentMF.Type == nil {
  316. p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
  317. }
  318. p.currentMetric = &dto.Metric{}
  319. p.currentMetricInsideBracesIsPresent = true
  320. return p.startLabelName
  321. case '}':
  322. p.setOrCreateCurrentMF()
  323. if p.err != nil {
  324. p.currentLabelPairs = nil
  325. return nil
  326. }
  327. if p.currentMF.Type == nil {
  328. p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
  329. }
  330. p.currentMetric = &dto.Metric{}
  331. p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
  332. p.currentLabelPairs = nil
  333. if p.skipBlankTab(); p.err != nil {
  334. return nil // Unexpected end of input.
  335. }
  336. return p.readingValue
  337. default:
  338. p.parseError(fmt.Sprintf("unexpected end of metric name %q", p.currentByte))
  339. return nil
  340. }
  341. }
  342. p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
  343. p.currentLabelPairs = nil
  344. return nil
  345. }
  346. p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
  347. if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
  348. p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
  349. p.currentLabelPairs = nil
  350. return nil
  351. }
  352. if !p.scheme.IsValidLabelName(p.currentLabelPair.GetName()) {
  353. p.parseError(fmt.Sprintf("invalid label name %q", p.currentLabelPair.GetName()))
  354. p.currentLabelPairs = nil
  355. return nil
  356. }
  357. // Special summary/histogram treatment. Don't add 'quantile' and 'le'
  358. // labels to 'real' labels.
  359. if (p.currentMF.GetType() != dto.MetricType_SUMMARY || p.currentLabelPair.GetName() != model.QuantileLabel) &&
  360. (p.currentMF.GetType() != dto.MetricType_HISTOGRAM || p.currentLabelPair.GetName() != model.BucketLabel) {
  361. p.currentLabelPairs = append(p.currentLabelPairs, p.currentLabelPair)
  362. }
  363. // Check for duplicate label names.
  364. labels := make(map[string]struct{})
  365. for _, l := range p.currentLabelPairs {
  366. lName := l.GetName()
  367. if _, exists := labels[lName]; exists {
  368. p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
  369. p.currentLabelPairs = nil
  370. return nil
  371. }
  372. labels[lName] = struct{}{}
  373. }
  374. return p.startLabelValue
  375. }
  376. // startLabelValue represents the state where the next byte read from p.buf is
  377. // the start of a (quoted) label value (or whitespace leading up to it).
  378. func (p *TextParser) startLabelValue() stateFn {
  379. if p.skipBlankTab(); p.err != nil {
  380. return nil // Unexpected end of input.
  381. }
  382. if p.currentByte != '"' {
  383. p.parseError(fmt.Sprintf("expected '\"' at start of label value, found %q", p.currentByte))
  384. return nil
  385. }
  386. if p.readTokenAsLabelValue(); p.err != nil {
  387. return nil
  388. }
  389. if !model.LabelValue(p.currentToken.String()).IsValid() {
  390. p.parseError(fmt.Sprintf("invalid label value %q", p.currentToken.String()))
  391. return nil
  392. }
  393. p.currentLabelPair.Value = proto.String(p.currentToken.String())
  394. // Special treatment of summaries:
  395. // - Quantile labels are special, will result in dto.Quantile later.
  396. // - Other labels have to be added to currentLabels for signature calculation.
  397. if p.currentMF.GetType() == dto.MetricType_SUMMARY {
  398. if p.currentLabelPair.GetName() == model.QuantileLabel {
  399. if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
  400. // Create a more helpful error message.
  401. p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
  402. p.currentLabelPairs = nil
  403. return nil
  404. }
  405. } else {
  406. p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
  407. }
  408. }
  409. // Similar special treatment of histograms.
  410. if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
  411. if p.currentLabelPair.GetName() == model.BucketLabel {
  412. if p.currentBucket, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
  413. // Create a more helpful error message.
  414. p.parseError(fmt.Sprintf("expected float as value for 'le' label, got %q", p.currentLabelPair.GetValue()))
  415. return nil
  416. }
  417. } else {
  418. p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
  419. }
  420. }
  421. if p.skipBlankTab(); p.err != nil {
  422. return nil // Unexpected end of input.
  423. }
  424. switch p.currentByte {
  425. case ',':
  426. return p.startLabelName
  427. case '}':
  428. if p.currentMF == nil {
  429. p.parseError("invalid metric name")
  430. return nil
  431. }
  432. p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
  433. p.currentLabelPairs = nil
  434. if p.skipBlankTab(); p.err != nil {
  435. return nil // Unexpected end of input.
  436. }
  437. return p.readingValue
  438. default:
  439. p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
  440. p.currentLabelPairs = nil
  441. return nil
  442. }
  443. }
  444. // readingValue represents the state where the last byte read (now in
  445. // p.currentByte) is the first byte of the sample value (i.e. a float).
  446. func (p *TextParser) readingValue() stateFn {
  447. // When we are here, we have read all the labels, so for the
  448. // special case of a summary/histogram, we can finally find out
  449. // if the metric already exists.
  450. switch p.currentMF.GetType() {
  451. case dto.MetricType_SUMMARY:
  452. signature := model.LabelsToSignature(p.currentLabels)
  453. if summary := p.summaries[signature]; summary != nil {
  454. p.currentMetric = summary
  455. } else {
  456. p.summaries[signature] = p.currentMetric
  457. p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
  458. }
  459. case dto.MetricType_HISTOGRAM:
  460. signature := model.LabelsToSignature(p.currentLabels)
  461. if histogram := p.histograms[signature]; histogram != nil {
  462. p.currentMetric = histogram
  463. } else {
  464. p.histograms[signature] = p.currentMetric
  465. p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
  466. }
  467. default:
  468. p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
  469. }
  470. if p.readTokenUntilWhitespace(); p.err != nil {
  471. return nil // Unexpected end of input.
  472. }
  473. value, err := parseFloat(p.currentToken.String())
  474. if err != nil {
  475. // Create a more helpful error message.
  476. p.parseError(fmt.Sprintf("expected float as value, got %q", p.currentToken.String()))
  477. return nil
  478. }
  479. switch p.currentMF.GetType() {
  480. case dto.MetricType_COUNTER:
  481. p.currentMetric.Counter = &dto.Counter{Value: proto.Float64(value)}
  482. case dto.MetricType_GAUGE:
  483. p.currentMetric.Gauge = &dto.Gauge{Value: proto.Float64(value)}
  484. case dto.MetricType_UNTYPED:
  485. p.currentMetric.Untyped = &dto.Untyped{Value: proto.Float64(value)}
  486. case dto.MetricType_SUMMARY:
  487. // *sigh*
  488. if p.currentMetric.Summary == nil {
  489. p.currentMetric.Summary = &dto.Summary{}
  490. }
  491. switch {
  492. case p.currentIsSummaryCount:
  493. p.currentMetric.Summary.SampleCount = proto.Uint64(uint64(value))
  494. case p.currentIsSummarySum:
  495. p.currentMetric.Summary.SampleSum = proto.Float64(value)
  496. case !math.IsNaN(p.currentQuantile):
  497. p.currentMetric.Summary.Quantile = append(
  498. p.currentMetric.Summary.Quantile,
  499. &dto.Quantile{
  500. Quantile: proto.Float64(p.currentQuantile),
  501. Value: proto.Float64(value),
  502. },
  503. )
  504. }
  505. case dto.MetricType_HISTOGRAM:
  506. // *sigh*
  507. if p.currentMetric.Histogram == nil {
  508. p.currentMetric.Histogram = &dto.Histogram{}
  509. }
  510. switch {
  511. case p.currentIsHistogramCount:
  512. p.currentMetric.Histogram.SampleCount = proto.Uint64(uint64(value))
  513. case p.currentIsHistogramSum:
  514. p.currentMetric.Histogram.SampleSum = proto.Float64(value)
  515. case !math.IsNaN(p.currentBucket):
  516. p.currentMetric.Histogram.Bucket = append(
  517. p.currentMetric.Histogram.Bucket,
  518. &dto.Bucket{
  519. UpperBound: proto.Float64(p.currentBucket),
  520. CumulativeCount: proto.Uint64(uint64(value)),
  521. },
  522. )
  523. }
  524. default:
  525. p.err = fmt.Errorf("unexpected type for metric name %q", p.currentMF.GetName())
  526. }
  527. if p.currentByte == '\n' {
  528. return p.startOfLine
  529. }
  530. return p.startTimestamp
  531. }
  532. // startTimestamp represents the state where the next byte read from p.buf is
  533. // the start of the timestamp (or whitespace leading up to it).
  534. func (p *TextParser) startTimestamp() stateFn {
  535. if p.skipBlankTab(); p.err != nil {
  536. return nil // Unexpected end of input.
  537. }
  538. if p.readTokenUntilWhitespace(); p.err != nil {
  539. return nil // Unexpected end of input.
  540. }
  541. timestamp, err := strconv.ParseInt(p.currentToken.String(), 10, 64)
  542. if err != nil {
  543. // Create a more helpful error message.
  544. p.parseError(fmt.Sprintf("expected integer as timestamp, got %q", p.currentToken.String()))
  545. return nil
  546. }
  547. p.currentMetric.TimestampMs = proto.Int64(timestamp)
  548. if p.readTokenUntilNewline(false); p.err != nil {
  549. return nil // Unexpected end of input.
  550. }
  551. if p.currentToken.Len() > 0 {
  552. p.parseError(fmt.Sprintf("spurious string after timestamp: %q", p.currentToken.String()))
  553. return nil
  554. }
  555. return p.startOfLine
  556. }
  557. // readingHelp represents the state where the last byte read (now in
  558. // p.currentByte) is the first byte of the docstring after 'HELP'.
  559. func (p *TextParser) readingHelp() stateFn {
  560. if p.currentMF.Help != nil {
  561. p.parseError(fmt.Sprintf("second HELP line for metric name %q", p.currentMF.GetName()))
  562. return nil
  563. }
  564. // Rest of line is the docstring.
  565. if p.readTokenUntilNewline(true); p.err != nil {
  566. return nil // Unexpected end of input.
  567. }
  568. p.currentMF.Help = proto.String(p.currentToken.String())
  569. return p.startOfLine
  570. }
  571. // readingType represents the state where the last byte read (now in
  572. // p.currentByte) is the first byte of the type hint after 'HELP'.
  573. func (p *TextParser) readingType() stateFn {
  574. if p.currentMF.Type != nil {
  575. p.parseError(fmt.Sprintf("second TYPE line for metric name %q, or TYPE reported after samples", p.currentMF.GetName()))
  576. return nil
  577. }
  578. // Rest of line is the type.
  579. if p.readTokenUntilNewline(false); p.err != nil {
  580. return nil // Unexpected end of input.
  581. }
  582. metricType, ok := dto.MetricType_value[strings.ToUpper(p.currentToken.String())]
  583. if !ok {
  584. p.parseError(fmt.Sprintf("unknown metric type %q", p.currentToken.String()))
  585. return nil
  586. }
  587. p.currentMF.Type = dto.MetricType(metricType).Enum()
  588. return p.startOfLine
  589. }
  590. // parseError sets p.err to a ParseError at the current line with the given
  591. // message.
  592. func (p *TextParser) parseError(msg string) {
  593. p.err = ParseError{
  594. Line: p.lineCount,
  595. Msg: msg,
  596. }
  597. }
  598. // skipBlankTab reads (and discards) bytes from p.buf until it encounters a byte
  599. // that is neither ' ' nor '\t'. That byte is left in p.currentByte.
  600. func (p *TextParser) skipBlankTab() {
  601. for {
  602. if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil || !isBlankOrTab(p.currentByte) {
  603. return
  604. }
  605. }
  606. }
  607. // skipBlankTabIfCurrentBlankTab works exactly as skipBlankTab but doesn't do
  608. // anything if p.currentByte is neither ' ' nor '\t'.
  609. func (p *TextParser) skipBlankTabIfCurrentBlankTab() {
  610. if isBlankOrTab(p.currentByte) {
  611. p.skipBlankTab()
  612. }
  613. }
  614. // readTokenUntilWhitespace copies bytes from p.buf into p.currentToken. The
  615. // first byte considered is the byte already read (now in p.currentByte). The
  616. // first whitespace byte encountered is still copied into p.currentByte, but not
  617. // into p.currentToken.
  618. func (p *TextParser) readTokenUntilWhitespace() {
  619. p.currentToken.Reset()
  620. for p.err == nil && !isBlankOrTab(p.currentByte) && p.currentByte != '\n' {
  621. p.currentToken.WriteByte(p.currentByte)
  622. p.currentByte, p.err = p.buf.ReadByte()
  623. }
  624. }
  625. // readTokenUntilNewline copies bytes from p.buf into p.currentToken. The first
  626. // byte considered is the byte already read (now in p.currentByte). The first
  627. // newline byte encountered is still copied into p.currentByte, but not into
  628. // p.currentToken. If recognizeEscapeSequence is true, two escape sequences are
  629. // recognized: '\\' translates into '\', and '\n' into a line-feed character.
  630. // All other escape sequences are invalid and cause an error.
  631. func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
  632. p.currentToken.Reset()
  633. escaped := false
  634. for p.err == nil {
  635. if recognizeEscapeSequence && escaped {
  636. switch p.currentByte {
  637. case '\\':
  638. p.currentToken.WriteByte(p.currentByte)
  639. case 'n':
  640. p.currentToken.WriteByte('\n')
  641. case '"':
  642. p.currentToken.WriteByte('"')
  643. default:
  644. p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
  645. return
  646. }
  647. escaped = false
  648. } else {
  649. switch p.currentByte {
  650. case '\n':
  651. return
  652. case '\\':
  653. escaped = true
  654. default:
  655. p.currentToken.WriteByte(p.currentByte)
  656. }
  657. }
  658. p.currentByte, p.err = p.buf.ReadByte()
  659. }
  660. }
  661. // readTokenAsMetricName copies a metric name from p.buf into p.currentToken.
  662. // The first byte considered is the byte already read (now in p.currentByte).
  663. // The first byte not part of a metric name is still copied into p.currentByte,
  664. // but not into p.currentToken.
  665. func (p *TextParser) readTokenAsMetricName() {
  666. p.currentToken.Reset()
  667. // A UTF-8 metric name must be quoted and may have escaped characters.
  668. quoted := false
  669. escaped := false
  670. if !isValidMetricNameStart(p.currentByte) {
  671. return
  672. }
  673. for p.err == nil {
  674. if escaped {
  675. switch p.currentByte {
  676. case '\\':
  677. p.currentToken.WriteByte(p.currentByte)
  678. case 'n':
  679. p.currentToken.WriteByte('\n')
  680. case '"':
  681. p.currentToken.WriteByte('"')
  682. default:
  683. p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
  684. return
  685. }
  686. escaped = false
  687. } else {
  688. switch p.currentByte {
  689. case '"':
  690. quoted = !quoted
  691. if !quoted {
  692. p.currentByte, p.err = p.buf.ReadByte()
  693. return
  694. }
  695. case '\n':
  696. p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String()))
  697. return
  698. case '\\':
  699. escaped = true
  700. default:
  701. p.currentToken.WriteByte(p.currentByte)
  702. }
  703. }
  704. p.currentByte, p.err = p.buf.ReadByte()
  705. if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') {
  706. return
  707. }
  708. }
  709. }
  710. // readTokenAsLabelName copies a label name from p.buf into p.currentToken.
  711. // The first byte considered is the byte already read (now in p.currentByte).
  712. // The first byte not part of a label name is still copied into p.currentByte,
  713. // but not into p.currentToken.
  714. func (p *TextParser) readTokenAsLabelName() {
  715. p.currentToken.Reset()
  716. // A UTF-8 label name must be quoted and may have escaped characters.
  717. quoted := false
  718. escaped := false
  719. if !isValidLabelNameStart(p.currentByte) {
  720. return
  721. }
  722. for p.err == nil {
  723. if escaped {
  724. switch p.currentByte {
  725. case '\\':
  726. p.currentToken.WriteByte(p.currentByte)
  727. case 'n':
  728. p.currentToken.WriteByte('\n')
  729. case '"':
  730. p.currentToken.WriteByte('"')
  731. default:
  732. p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
  733. return
  734. }
  735. escaped = false
  736. } else {
  737. switch p.currentByte {
  738. case '"':
  739. quoted = !quoted
  740. if !quoted {
  741. p.currentByte, p.err = p.buf.ReadByte()
  742. return
  743. }
  744. case '\n':
  745. p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String()))
  746. return
  747. case '\\':
  748. escaped = true
  749. default:
  750. p.currentToken.WriteByte(p.currentByte)
  751. }
  752. }
  753. p.currentByte, p.err = p.buf.ReadByte()
  754. if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') {
  755. return
  756. }
  757. }
  758. }
  759. // readTokenAsLabelValue copies a label value from p.buf into p.currentToken.
  760. // In contrast to the other 'readTokenAs...' functions, which start with the
  761. // last read byte in p.currentByte, this method ignores p.currentByte and starts
  762. // with reading a new byte from p.buf. The first byte not part of a label value
  763. // is still copied into p.currentByte, but not into p.currentToken.
  764. func (p *TextParser) readTokenAsLabelValue() {
  765. p.currentToken.Reset()
  766. escaped := false
  767. for {
  768. if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
  769. return
  770. }
  771. if escaped {
  772. switch p.currentByte {
  773. case '"', '\\':
  774. p.currentToken.WriteByte(p.currentByte)
  775. case 'n':
  776. p.currentToken.WriteByte('\n')
  777. default:
  778. p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
  779. p.currentLabelPairs = nil
  780. return
  781. }
  782. escaped = false
  783. continue
  784. }
  785. switch p.currentByte {
  786. case '"':
  787. return
  788. case '\n':
  789. p.parseError(fmt.Sprintf("label value %q contains unescaped new-line", p.currentToken.String()))
  790. return
  791. case '\\':
  792. escaped = true
  793. default:
  794. p.currentToken.WriteByte(p.currentByte)
  795. }
  796. }
  797. }
  798. func (p *TextParser) setOrCreateCurrentMF() {
  799. p.currentIsSummaryCount = false
  800. p.currentIsSummarySum = false
  801. p.currentIsHistogramCount = false
  802. p.currentIsHistogramSum = false
  803. name := p.currentToken.String()
  804. if !p.scheme.IsValidMetricName(name) {
  805. p.parseError(fmt.Sprintf("invalid metric name %q", name))
  806. return
  807. }
  808. if p.currentMF = p.metricFamiliesByName[name]; p.currentMF != nil {
  809. return
  810. }
  811. // Try out if this is a _sum or _count for a summary/histogram.
  812. summaryName := summaryMetricName(name)
  813. if p.currentMF = p.metricFamiliesByName[summaryName]; p.currentMF != nil {
  814. if p.currentMF.GetType() == dto.MetricType_SUMMARY {
  815. if isCount(name) {
  816. p.currentIsSummaryCount = true
  817. }
  818. if isSum(name) {
  819. p.currentIsSummarySum = true
  820. }
  821. return
  822. }
  823. }
  824. histogramName := histogramMetricName(name)
  825. if p.currentMF = p.metricFamiliesByName[histogramName]; p.currentMF != nil {
  826. if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
  827. if isCount(name) {
  828. p.currentIsHistogramCount = true
  829. }
  830. if isSum(name) {
  831. p.currentIsHistogramSum = true
  832. }
  833. return
  834. }
  835. }
  836. p.currentMF = &dto.MetricFamily{Name: proto.String(name)}
  837. p.metricFamiliesByName[name] = p.currentMF
  838. }
  839. func isValidLabelNameStart(b byte) bool {
  840. return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"'
  841. }
  842. func isValidLabelNameContinuation(b byte, quoted bool) bool {
  843. return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b)))
  844. }
  845. func isValidMetricNameStart(b byte) bool {
  846. return isValidLabelNameStart(b) || b == ':'
  847. }
  848. func isValidMetricNameContinuation(b byte, quoted bool) bool {
  849. return isValidLabelNameContinuation(b, quoted) || b == ':'
  850. }
  851. func isBlankOrTab(b byte) bool {
  852. return b == ' ' || b == '\t'
  853. }
  854. func isCount(name string) bool {
  855. return len(name) > 6 && name[len(name)-6:] == "_count"
  856. }
  857. func isSum(name string) bool {
  858. return len(name) > 4 && name[len(name)-4:] == "_sum"
  859. }
  860. func isBucket(name string) bool {
  861. return len(name) > 7 && name[len(name)-7:] == "_bucket"
  862. }
  863. func summaryMetricName(name string) string {
  864. switch {
  865. case isCount(name):
  866. return name[:len(name)-6]
  867. case isSum(name):
  868. return name[:len(name)-4]
  869. default:
  870. return name
  871. }
  872. }
  873. func histogramMetricName(name string) string {
  874. switch {
  875. case isCount(name):
  876. return name[:len(name)-6]
  877. case isSum(name):
  878. return name[:len(name)-4]
  879. case isBucket(name):
  880. return name[:len(name)-7]
  881. default:
  882. return name
  883. }
  884. }
  885. func parseFloat(s string) (float64, error) {
  886. if strings.ContainsAny(s, "pP_") {
  887. return 0, errors.New("unsupported character in float")
  888. }
  889. return strconv.ParseFloat(s, 64)
  890. }