metric.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. // Copyright 2013 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package model
  14. import (
  15. "encoding/json"
  16. "errors"
  17. "fmt"
  18. "regexp"
  19. "sort"
  20. "strconv"
  21. "strings"
  22. "unicode/utf8"
  23. dto "github.com/prometheus/client_model/go"
  24. "go.yaml.in/yaml/v2"
  25. "google.golang.org/protobuf/proto"
  26. )
  27. var (
  28. // NameValidationScheme determines the global default method of the name
  29. // validation to be used by all calls to IsValidMetricName() and LabelName
  30. // IsValid().
  31. //
  32. // Deprecated: This variable should not be used and might be removed in the
  33. // far future. If you wish to stick to the legacy name validation use
  34. // `IsValidLegacyMetricName()` and `LabelName.IsValidLegacy()` methods
  35. // instead. This variable is here as an escape hatch for emergency cases,
  36. // given the recent change from `LegacyValidation` to `UTF8Validation`, e.g.,
  37. // to delay UTF-8 migrations in time or aid in debugging unforeseen results of
  38. // the change. In such a case, a temporary assignment to `LegacyValidation`
  39. // value in the `init()` function in your main.go or so, could be considered.
  40. //
  41. // Historically we opted for a global variable for feature gating different
  42. // validation schemes in operations that were not otherwise easily adjustable
  43. // (e.g. Labels yaml unmarshaling). That could have been a mistake, a separate
  44. // Labels structure or package might have been a better choice. Given the
  45. // change was made and many upgraded the common already, we live this as-is
  46. // with this warning and learning for the future.
  47. NameValidationScheme = UTF8Validation
  48. // NameEscapingScheme defines the default way that names will be escaped when
  49. // presented to systems that do not support UTF-8 names. If the Content-Type
  50. // "escaping" term is specified, that will override this value.
  51. // NameEscapingScheme should not be set to the NoEscaping value. That string
  52. // is used in content negotiation to indicate that a system supports UTF-8 and
  53. // has that feature enabled.
  54. NameEscapingScheme = UnderscoreEscaping
  55. )
  56. // ValidationScheme is a Go enum for determining how metric and label names will
  57. // be validated by this library.
  58. type ValidationScheme int
  59. const (
  60. // UnsetValidation represents an undefined ValidationScheme.
  61. // Should not be used in practice.
  62. UnsetValidation ValidationScheme = iota
  63. // LegacyValidation is a setting that requires that all metric and label names
  64. // conform to the original Prometheus character requirements described by
  65. // MetricNameRE and LabelNameRE.
  66. LegacyValidation
  67. // UTF8Validation only requires that metric and label names be valid UTF-8
  68. // strings.
  69. UTF8Validation
  70. )
  71. var _ interface {
  72. yaml.Marshaler
  73. yaml.Unmarshaler
  74. json.Marshaler
  75. json.Unmarshaler
  76. fmt.Stringer
  77. } = new(ValidationScheme)
  78. // String returns the string representation of s.
  79. func (s ValidationScheme) String() string {
  80. switch s {
  81. case UnsetValidation:
  82. return "unset"
  83. case LegacyValidation:
  84. return "legacy"
  85. case UTF8Validation:
  86. return "utf8"
  87. default:
  88. panic(fmt.Errorf("unhandled ValidationScheme: %d", s))
  89. }
  90. }
  91. // MarshalYAML implements the yaml.Marshaler interface.
  92. func (s ValidationScheme) MarshalYAML() (any, error) {
  93. switch s {
  94. case UnsetValidation:
  95. return "", nil
  96. case LegacyValidation, UTF8Validation:
  97. return s.String(), nil
  98. default:
  99. panic(fmt.Errorf("unhandled ValidationScheme: %d", s))
  100. }
  101. }
  102. // UnmarshalYAML implements the yaml.Unmarshaler interface.
  103. func (s *ValidationScheme) UnmarshalYAML(unmarshal func(any) error) error {
  104. var scheme string
  105. if err := unmarshal(&scheme); err != nil {
  106. return err
  107. }
  108. return s.Set(scheme)
  109. }
  110. // MarshalJSON implements the json.Marshaler interface.
  111. func (s ValidationScheme) MarshalJSON() ([]byte, error) {
  112. switch s {
  113. case UnsetValidation:
  114. return json.Marshal("")
  115. case UTF8Validation, LegacyValidation:
  116. return json.Marshal(s.String())
  117. default:
  118. return nil, fmt.Errorf("unhandled ValidationScheme: %d", s)
  119. }
  120. }
  121. // UnmarshalJSON implements the json.Unmarshaler interface.
  122. func (s *ValidationScheme) UnmarshalJSON(bytes []byte) error {
  123. var repr string
  124. if err := json.Unmarshal(bytes, &repr); err != nil {
  125. return err
  126. }
  127. return s.Set(repr)
  128. }
  129. // Set implements the pflag.Value interface.
  130. func (s *ValidationScheme) Set(text string) error {
  131. switch text {
  132. case "":
  133. // Don't change the value.
  134. case LegacyValidation.String():
  135. *s = LegacyValidation
  136. case UTF8Validation.String():
  137. *s = UTF8Validation
  138. default:
  139. return fmt.Errorf("unrecognized ValidationScheme: %q", text)
  140. }
  141. return nil
  142. }
  143. // IsValidMetricName returns whether metricName is valid according to s.
  144. func (s ValidationScheme) IsValidMetricName(metricName string) bool {
  145. switch s {
  146. case LegacyValidation:
  147. if len(metricName) == 0 {
  148. return false
  149. }
  150. for i, b := range metricName {
  151. if !isValidLegacyRune(b, i) {
  152. return false
  153. }
  154. }
  155. return true
  156. case UTF8Validation:
  157. if len(metricName) == 0 {
  158. return false
  159. }
  160. return utf8.ValidString(metricName)
  161. default:
  162. panic(fmt.Sprintf("Invalid name validation scheme requested: %s", s.String()))
  163. }
  164. }
  165. // IsValidLabelName returns whether labelName is valid according to s.
  166. func (s ValidationScheme) IsValidLabelName(labelName string) bool {
  167. switch s {
  168. case LegacyValidation:
  169. if len(labelName) == 0 {
  170. return false
  171. }
  172. for i, b := range labelName {
  173. // TODO: Apply De Morgan's law. Make sure there are tests for this.
  174. if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { //nolint:staticcheck
  175. return false
  176. }
  177. }
  178. return true
  179. case UTF8Validation:
  180. if len(labelName) == 0 {
  181. return false
  182. }
  183. return utf8.ValidString(labelName)
  184. default:
  185. panic(fmt.Sprintf("Invalid name validation scheme requested: %s", s))
  186. }
  187. }
  188. // Type implements the pflag.Value interface.
  189. func (ValidationScheme) Type() string {
  190. return "validationScheme"
  191. }
  192. type EscapingScheme int
  193. const (
  194. // NoEscaping indicates that a name will not be escaped. Unescaped names that
  195. // do not conform to the legacy validity check will use a new exposition
  196. // format syntax that will be officially standardized in future versions.
  197. NoEscaping EscapingScheme = iota
  198. // UnderscoreEscaping replaces all legacy-invalid characters with underscores.
  199. UnderscoreEscaping
  200. // DotsEscaping is similar to UnderscoreEscaping, except that dots are
  201. // converted to `_dot_` and pre-existing underscores are converted to `__`.
  202. DotsEscaping
  203. // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid
  204. // characters with the unicode value, surrounded by underscores. Single
  205. // underscores are replaced with double underscores.
  206. ValueEncodingEscaping
  207. )
  208. const (
  209. // EscapingKey is the key in an Accept or Content-Type header that defines how
  210. // metric and label names that do not conform to the legacy character
  211. // requirements should be escaped when being scraped by a legacy prometheus
  212. // system. If a system does not explicitly pass an escaping parameter in the
  213. // Accept header, the default NameEscapingScheme will be used.
  214. EscapingKey = "escaping"
  215. // Possible values for Escaping Key.
  216. AllowUTF8 = "allow-utf-8" // No escaping required.
  217. EscapeUnderscores = "underscores"
  218. EscapeDots = "dots"
  219. EscapeValues = "values"
  220. )
  221. // MetricNameRE is a regular expression matching valid metric
  222. // names. Note that the IsValidMetricName function performs the same
  223. // check but faster than a match with this regular expression.
  224. var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
  225. // A Metric is similar to a LabelSet, but the key difference is that a Metric is
  226. // a singleton and refers to one and only one stream of samples.
  227. type Metric LabelSet
  228. // Equal compares the metrics.
  229. func (m Metric) Equal(o Metric) bool {
  230. return LabelSet(m).Equal(LabelSet(o))
  231. }
  232. // Before compares the metrics' underlying label sets.
  233. func (m Metric) Before(o Metric) bool {
  234. return LabelSet(m).Before(LabelSet(o))
  235. }
  236. // Clone returns a copy of the Metric.
  237. func (m Metric) Clone() Metric {
  238. clone := make(Metric, len(m))
  239. for k, v := range m {
  240. clone[k] = v
  241. }
  242. return clone
  243. }
  244. func (m Metric) String() string {
  245. metricName, hasName := m[MetricNameLabel]
  246. numLabels := len(m) - 1
  247. if !hasName {
  248. numLabels = len(m)
  249. }
  250. labelStrings := make([]string, 0, numLabels)
  251. for label, value := range m {
  252. if label != MetricNameLabel {
  253. labelStrings = append(labelStrings, fmt.Sprintf("%s=%q", label, value))
  254. }
  255. }
  256. switch numLabels {
  257. case 0:
  258. if hasName {
  259. return string(metricName)
  260. }
  261. return "{}"
  262. default:
  263. sort.Strings(labelStrings)
  264. return fmt.Sprintf("%s{%s}", metricName, strings.Join(labelStrings, ", "))
  265. }
  266. }
  267. // Fingerprint returns a Metric's Fingerprint.
  268. func (m Metric) Fingerprint() Fingerprint {
  269. return LabelSet(m).Fingerprint()
  270. }
  271. // FastFingerprint returns a Metric's Fingerprint calculated by a faster hashing
  272. // algorithm, which is, however, more susceptible to hash collisions.
  273. func (m Metric) FastFingerprint() Fingerprint {
  274. return LabelSet(m).FastFingerprint()
  275. }
  276. // IsValidMetricName returns true iff name matches the pattern of MetricNameRE
  277. // for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is
  278. // selected.
  279. //
  280. // Deprecated: This function should not be used and might be removed in the future.
  281. // Use [ValidationScheme.IsValidMetricName] instead.
  282. func IsValidMetricName(n LabelValue) bool {
  283. return NameValidationScheme.IsValidMetricName(string(n))
  284. }
  285. // IsValidLegacyMetricName is similar to IsValidMetricName but always uses the
  286. // legacy validation scheme regardless of the value of NameValidationScheme.
  287. // This function, however, does not use MetricNameRE for the check but a much
  288. // faster hardcoded implementation.
  289. //
  290. // Deprecated: This function should not be used and might be removed in the future.
  291. // Use [LegacyValidation.IsValidMetricName] instead.
  292. func IsValidLegacyMetricName(n string) bool {
  293. return LegacyValidation.IsValidMetricName(n)
  294. }
  295. // EscapeMetricFamily escapes the given metric names and labels with the given
  296. // escaping scheme. Returns a new object that uses the same pointers to fields
  297. // when possible and creates new escaped versions so as not to mutate the
  298. // input.
  299. func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily {
  300. if v == nil {
  301. return nil
  302. }
  303. if scheme == NoEscaping {
  304. return v
  305. }
  306. out := &dto.MetricFamily{
  307. Help: v.Help,
  308. Type: v.Type,
  309. Unit: v.Unit,
  310. }
  311. // If the name is nil, copy as-is, don't try to escape.
  312. if v.Name == nil || IsValidLegacyMetricName(v.GetName()) {
  313. out.Name = v.Name
  314. } else {
  315. out.Name = proto.String(EscapeName(v.GetName(), scheme))
  316. }
  317. for _, m := range v.Metric {
  318. if !metricNeedsEscaping(m) {
  319. out.Metric = append(out.Metric, m)
  320. continue
  321. }
  322. escaped := &dto.Metric{
  323. Gauge: m.Gauge,
  324. Counter: m.Counter,
  325. Summary: m.Summary,
  326. Untyped: m.Untyped,
  327. Histogram: m.Histogram,
  328. TimestampMs: m.TimestampMs,
  329. }
  330. for _, l := range m.Label {
  331. if l.GetName() == MetricNameLabel {
  332. if l.Value == nil || IsValidLegacyMetricName(l.GetValue()) {
  333. escaped.Label = append(escaped.Label, l)
  334. continue
  335. }
  336. escaped.Label = append(escaped.Label, &dto.LabelPair{
  337. Name: proto.String(MetricNameLabel),
  338. Value: proto.String(EscapeName(l.GetValue(), scheme)),
  339. })
  340. continue
  341. }
  342. if l.Name == nil || IsValidLegacyMetricName(l.GetName()) {
  343. escaped.Label = append(escaped.Label, l)
  344. continue
  345. }
  346. escaped.Label = append(escaped.Label, &dto.LabelPair{
  347. Name: proto.String(EscapeName(l.GetName(), scheme)),
  348. Value: l.Value,
  349. })
  350. }
  351. out.Metric = append(out.Metric, escaped)
  352. }
  353. return out
  354. }
  355. func metricNeedsEscaping(m *dto.Metric) bool {
  356. for _, l := range m.Label {
  357. if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(l.GetValue()) {
  358. return true
  359. }
  360. if !IsValidLegacyMetricName(l.GetName()) {
  361. return true
  362. }
  363. }
  364. return false
  365. }
  366. // EscapeName escapes the incoming name according to the provided escaping
  367. // scheme. Depending on the rules of escaping, this may cause no change in the
  368. // string that is returned. (Especially NoEscaping, which by definition is a
  369. // noop). This function does not do any validation of the name.
  370. func EscapeName(name string, scheme EscapingScheme) string {
  371. if len(name) == 0 {
  372. return name
  373. }
  374. var escaped strings.Builder
  375. switch scheme {
  376. case NoEscaping:
  377. return name
  378. case UnderscoreEscaping:
  379. if IsValidLegacyMetricName(name) {
  380. return name
  381. }
  382. for i, b := range name {
  383. if isValidLegacyRune(b, i) {
  384. escaped.WriteRune(b)
  385. } else {
  386. escaped.WriteRune('_')
  387. }
  388. }
  389. return escaped.String()
  390. case DotsEscaping:
  391. // Do not early return for legacy valid names, we still escape underscores.
  392. for i, b := range name {
  393. switch {
  394. case b == '_':
  395. escaped.WriteString("__")
  396. case b == '.':
  397. escaped.WriteString("_dot_")
  398. case isValidLegacyRune(b, i):
  399. escaped.WriteRune(b)
  400. default:
  401. escaped.WriteString("__")
  402. }
  403. }
  404. return escaped.String()
  405. case ValueEncodingEscaping:
  406. if IsValidLegacyMetricName(name) {
  407. return name
  408. }
  409. escaped.WriteString("U__")
  410. for i, b := range name {
  411. switch {
  412. case b == '_':
  413. escaped.WriteString("__")
  414. case isValidLegacyRune(b, i):
  415. escaped.WriteRune(b)
  416. case !utf8.ValidRune(b):
  417. escaped.WriteString("_FFFD_")
  418. default:
  419. escaped.WriteRune('_')
  420. escaped.WriteString(strconv.FormatInt(int64(b), 16))
  421. escaped.WriteRune('_')
  422. }
  423. }
  424. return escaped.String()
  425. default:
  426. panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
  427. }
  428. }
  429. // lower function taken from strconv.atoi.
  430. func lower(c byte) byte {
  431. return c | ('x' - 'X')
  432. }
  433. // UnescapeName unescapes the incoming name according to the provided escaping
  434. // scheme if possible. Some schemes are partially or totally non-roundtripable.
  435. // If any error is enountered, returns the original input.
  436. func UnescapeName(name string, scheme EscapingScheme) string {
  437. if len(name) == 0 {
  438. return name
  439. }
  440. switch scheme {
  441. case NoEscaping:
  442. return name
  443. case UnderscoreEscaping:
  444. // It is not possible to unescape from underscore replacement.
  445. return name
  446. case DotsEscaping:
  447. name = strings.ReplaceAll(name, "_dot_", ".")
  448. name = strings.ReplaceAll(name, "__", "_")
  449. return name
  450. case ValueEncodingEscaping:
  451. escapedName, found := strings.CutPrefix(name, "U__")
  452. if !found {
  453. return name
  454. }
  455. var unescaped strings.Builder
  456. TOP:
  457. for i := 0; i < len(escapedName); i++ {
  458. // All non-underscores are treated normally.
  459. if escapedName[i] != '_' {
  460. unescaped.WriteByte(escapedName[i])
  461. continue
  462. }
  463. i++
  464. if i >= len(escapedName) {
  465. return name
  466. }
  467. // A double underscore is a single underscore.
  468. if escapedName[i] == '_' {
  469. unescaped.WriteByte('_')
  470. continue
  471. }
  472. // We think we are in a UTF-8 code, process it.
  473. var utf8Val uint
  474. for j := 0; i < len(escapedName); j++ {
  475. // This is too many characters for a utf8 value based on the MaxRune
  476. // value of '\U0010FFFF'.
  477. if j >= 6 {
  478. return name
  479. }
  480. // Found a closing underscore, convert to a rune, check validity, and append.
  481. if escapedName[i] == '_' {
  482. utf8Rune := rune(utf8Val)
  483. if !utf8.ValidRune(utf8Rune) {
  484. return name
  485. }
  486. unescaped.WriteRune(utf8Rune)
  487. continue TOP
  488. }
  489. r := lower(escapedName[i])
  490. utf8Val *= 16
  491. switch {
  492. case r >= '0' && r <= '9':
  493. utf8Val += uint(r) - '0'
  494. case r >= 'a' && r <= 'f':
  495. utf8Val += uint(r) - 'a' + 10
  496. default:
  497. return name
  498. }
  499. i++
  500. }
  501. // Didn't find closing underscore, invalid.
  502. return name
  503. }
  504. return unescaped.String()
  505. default:
  506. panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
  507. }
  508. }
  509. func isValidLegacyRune(b rune, i int) bool {
  510. return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0)
  511. }
  512. func (e EscapingScheme) String() string {
  513. switch e {
  514. case NoEscaping:
  515. return AllowUTF8
  516. case UnderscoreEscaping:
  517. return EscapeUnderscores
  518. case DotsEscaping:
  519. return EscapeDots
  520. case ValueEncodingEscaping:
  521. return EscapeValues
  522. default:
  523. panic(fmt.Sprintf("unknown format scheme %d", e))
  524. }
  525. }
  526. func ToEscapingScheme(s string) (EscapingScheme, error) {
  527. if s == "" {
  528. return NoEscaping, errors.New("got empty string instead of escaping scheme")
  529. }
  530. switch s {
  531. case AllowUTF8:
  532. return NoEscaping, nil
  533. case EscapeUnderscores:
  534. return UnderscoreEscaping, nil
  535. case EscapeDots:
  536. return DotsEscaping, nil
  537. case EscapeValues:
  538. return ValueEncodingEscaping, nil
  539. default:
  540. return NoEscaping, fmt.Errorf("unknown format scheme %s", s)
  541. }
  542. }