nodemetrics.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. package metrics
  2. import (
  3. "strings"
  4. "github.com/opencost/opencost/core/pkg/log"
  5. "github.com/opencost/opencost/core/pkg/util/promutil"
  6. "github.com/opencost/opencost/pkg/clustercache"
  7. "github.com/prometheus/client_golang/prometheus"
  8. dto "github.com/prometheus/client_model/go"
  9. v1 "k8s.io/api/core/v1"
  10. )
  11. var (
  12. conditionStatuses = []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionUnknown}
  13. )
  14. //--------------------------------------------------------------------------
  15. // KubeNodeCollector
  16. //--------------------------------------------------------------------------
  17. // KubeNodeCollector is a prometheus collector that generates node sourced metrics.
  18. type KubeNodeCollector struct {
  19. KubeClusterCache clustercache.ClusterCache
  20. metricsConfig MetricsConfig
  21. }
  22. // Describe sends the super-set of all possible descriptors of metrics
  23. // collected by this Collector.
  24. func (nsac KubeNodeCollector) Describe(ch chan<- *prometheus.Desc) {
  25. disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
  26. if _, disabled := disabledMetrics["kube_node_status_capacity"]; !disabled {
  27. ch <- prometheus.NewDesc("kube_node_status_capacity", "Node resource capacity.", []string{}, nil)
  28. }
  29. if _, disabled := disabledMetrics["kube_node_status_capacity_memory_bytes"]; !disabled {
  30. ch <- prometheus.NewDesc("kube_node_status_capacity_memory_bytes", "node capacity memory bytes", []string{}, nil)
  31. }
  32. if _, disabled := disabledMetrics["kube_node_status_capacity_cpu_cores"]; !disabled {
  33. ch <- prometheus.NewDesc("kube_node_status_capacity_cpu_cores", "node capacity cpu cores", []string{}, nil)
  34. }
  35. if _, disabled := disabledMetrics["kube_node_status_allocatable"]; !disabled {
  36. ch <- prometheus.NewDesc("kube_node_status_allocatable", "The allocatable for different resources of a node that are available for scheduling.", []string{}, nil)
  37. }
  38. if _, disabled := disabledMetrics["kube_node_status_allocatable_cpu_cores"]; !disabled {
  39. ch <- prometheus.NewDesc("kube_node_status_allocatable_cpu_cores", "The allocatable cpu cores.", []string{}, nil)
  40. }
  41. if _, disabled := disabledMetrics["kube_node_status_allocatable_memory_bytes"]; !disabled {
  42. ch <- prometheus.NewDesc("kube_node_status_allocatable_memory_bytes", "The allocatable memory in bytes.", []string{}, nil)
  43. }
  44. if _, disabled := disabledMetrics["kube_node_labels"]; !disabled {
  45. ch <- prometheus.NewDesc("kube_node_labels", "all labels for each node prefixed with label_", []string{}, nil)
  46. }
  47. if _, disabled := disabledMetrics["kube_node_status_condition"]; !disabled {
  48. ch <- prometheus.NewDesc("kube_node_status_condition", "The condition of a cluster node.", []string{}, nil)
  49. }
  50. }
  51. // Collect is called by the Prometheus registry when collecting metrics.
  52. func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
  53. nodes := nsac.KubeClusterCache.GetAllNodes()
  54. disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
  55. for _, node := range nodes {
  56. nodeName := node.Name
  57. // Node Capacity
  58. for resourceName, quantity := range node.Status.Capacity {
  59. resource, unit, value := toResourceUnitValue(resourceName, quantity)
  60. // failed to parse the resource type
  61. if resource == "" {
  62. log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
  63. continue
  64. }
  65. // KSM v1 Emission
  66. if _, disabled := disabledMetrics["kube_node_status_capacity_cpu_cores"]; !disabled {
  67. if resource == "cpu" {
  68. ch <- newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", nodeName, value)
  69. }
  70. }
  71. if _, disabled := disabledMetrics["kube_node_status_capacity_memory_bytes"]; !disabled {
  72. if resource == "memory" {
  73. ch <- newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", nodeName, value)
  74. }
  75. }
  76. if _, disabled := disabledMetrics["kube_node_status_capacity"]; !disabled {
  77. ch <- newKubeNodeStatusCapacityMetric("kube_node_status_capacity", nodeName, resource, unit, value)
  78. }
  79. }
  80. // Node Allocatable Resources
  81. for resourceName, quantity := range node.Status.Allocatable {
  82. resource, unit, value := toResourceUnitValue(resourceName, quantity)
  83. // failed to parse the resource type
  84. if resource == "" {
  85. log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
  86. continue
  87. }
  88. // KSM v1 Emission
  89. if _, disabled := disabledMetrics["kube_node_status_allocatable_cpu_cores"]; !disabled {
  90. if resource == "cpu" {
  91. ch <- newKubeNodeStatusAllocatableCPUCoresMetric("kube_node_status_allocatable_cpu_cores", nodeName, value)
  92. }
  93. }
  94. if _, disabled := disabledMetrics["kube_node_status_allocatable_memory_bytes"]; !disabled {
  95. if resource == "memory" {
  96. ch <- newKubeNodeStatusAllocatableMemoryBytesMetric("kube_node_status_allocatable_memory_bytes", nodeName, value)
  97. }
  98. }
  99. if _, disabled := disabledMetrics["kube_node_status_allocatable"]; !disabled {
  100. ch <- newKubeNodeStatusAllocatableMetric("kube_node_status_allocatable", nodeName, resource, unit, value)
  101. }
  102. }
  103. // node labels
  104. if _, disabled := disabledMetrics["kube_node_labels"]; !disabled {
  105. labelNames, labelValues := promutil.KubePrependQualifierToLabels(promutil.SanitizeLabels(node.Labels), "label_")
  106. ch <- newKubeNodeLabelsMetric(nodeName, "kube_node_labels", labelNames, labelValues)
  107. }
  108. // kube_node_status_condition
  109. // Collect node conditions and while default to false.
  110. if _, disabled := disabledMetrics["kube_node_status_condition"]; !disabled {
  111. for _, c := range node.Status.Conditions {
  112. conditions := getConditions(c.Status)
  113. for _, cond := range conditions {
  114. ch <- newKubeNodeStatusConditionMetric(nodeName, "kube_node_status_condition", string(c.Type), cond.status, cond.value)
  115. }
  116. }
  117. }
  118. }
  119. }
  120. //--------------------------------------------------------------------------
  121. // KubeNodeStatusCapacityMetric
  122. //--------------------------------------------------------------------------
  123. // KubeNodeStatusCapacityMetric is a prometheus.Metric
  124. type KubeNodeStatusCapacityMetric struct {
  125. fqName string
  126. help string
  127. resource string
  128. unit string
  129. node string
  130. value float64
  131. }
  132. // Creates a new KubeNodeStatusCapacityMetric, implementation of prometheus.Metric
  133. func newKubeNodeStatusCapacityMetric(fqname, node, resource, unit string, value float64) KubeNodeStatusCapacityMetric {
  134. return KubeNodeStatusCapacityMetric{
  135. fqName: fqname,
  136. help: "kube_node_status_capacity node capacity",
  137. node: node,
  138. resource: resource,
  139. unit: unit,
  140. value: value,
  141. }
  142. }
  143. // Desc returns the descriptor for the Metric. This method idempotently
  144. // returns the same descriptor throughout the lifetime of the Metric.
  145. func (kpcrr KubeNodeStatusCapacityMetric) Desc() *prometheus.Desc {
  146. l := prometheus.Labels{
  147. "node": kpcrr.node,
  148. "resource": kpcrr.resource,
  149. "unit": kpcrr.unit,
  150. }
  151. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  152. }
  153. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  154. func (kpcrr KubeNodeStatusCapacityMetric) Write(m *dto.Metric) error {
  155. m.Gauge = &dto.Gauge{
  156. Value: &kpcrr.value,
  157. }
  158. m.Label = []*dto.LabelPair{
  159. {
  160. Name: toStringPtr("node"),
  161. Value: &kpcrr.node,
  162. },
  163. {
  164. Name: toStringPtr("resource"),
  165. Value: &kpcrr.resource,
  166. },
  167. {
  168. Name: toStringPtr("unit"),
  169. Value: &kpcrr.unit,
  170. },
  171. }
  172. return nil
  173. }
  174. //--------------------------------------------------------------------------
  175. // KubeNodeStatusCapacityMemoryBytesMetric
  176. //--------------------------------------------------------------------------
  177. // KubeNodeStatusCapacityMemoryBytesMetric is a prometheus.Metric used to encode
  178. // a duplicate of the deprecated kube-state-metrics metric
  179. // kube_node_status_capacity_memory_bytes
  180. type KubeNodeStatusCapacityMemoryBytesMetric struct {
  181. fqName string
  182. help string
  183. bytes float64
  184. node string
  185. }
  186. // Creates a new KubeNodeStatusCapacityMemoryBytesMetric, implementation of prometheus.Metric
  187. func newKubeNodeStatusCapacityMemoryBytesMetric(fqname string, node string, bytes float64) KubeNodeStatusCapacityMemoryBytesMetric {
  188. return KubeNodeStatusCapacityMemoryBytesMetric{
  189. fqName: fqname,
  190. help: "kube_node_status_capacity_memory_bytes Node Capacity Memory Bytes",
  191. node: node,
  192. bytes: bytes,
  193. }
  194. }
  195. // Desc returns the descriptor for the Metric. This method idempotently
  196. // returns the same descriptor throughout the lifetime of the Metric.
  197. func (nam KubeNodeStatusCapacityMemoryBytesMetric) Desc() *prometheus.Desc {
  198. l := prometheus.Labels{"node": nam.node}
  199. return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
  200. }
  201. // Write encodes the Metric into a "Metric" Protocol Buffer data
  202. // transmission object.
  203. func (nam KubeNodeStatusCapacityMemoryBytesMetric) Write(m *dto.Metric) error {
  204. m.Gauge = &dto.Gauge{
  205. Value: &nam.bytes,
  206. }
  207. m.Label = []*dto.LabelPair{
  208. {
  209. Name: toStringPtr("node"),
  210. Value: &nam.node,
  211. },
  212. }
  213. return nil
  214. }
  215. //--------------------------------------------------------------------------
  216. // KubeNodeStatusCapacityCPUCoresMetric
  217. //--------------------------------------------------------------------------
  218. // KubeNodeStatusCapacityCPUCoresMetric is a prometheus.Metric used to encode
  219. // a duplicate of the deprecated kube-state-metrics metric
  220. // kube_node_status_capacity_memory_bytes
  221. type KubeNodeStatusCapacityCPUCoresMetric struct {
  222. fqName string
  223. help string
  224. cores float64
  225. node string
  226. }
  227. // Creates a new KubeNodeStatusCapacityCPUCoresMetric, implementation of prometheus.Metric
  228. func newKubeNodeStatusCapacityCPUCoresMetric(fqname string, node string, cores float64) KubeNodeStatusCapacityCPUCoresMetric {
  229. return KubeNodeStatusCapacityCPUCoresMetric{
  230. fqName: fqname,
  231. help: "kube_node_status_capacity_cpu_cores Node Capacity CPU Cores",
  232. cores: cores,
  233. node: node,
  234. }
  235. }
  236. // Desc returns the descriptor for the Metric. This method idempotently
  237. // returns the same descriptor throughout the lifetime of the Metric.
  238. func (nam KubeNodeStatusCapacityCPUCoresMetric) Desc() *prometheus.Desc {
  239. l := prometheus.Labels{"node": nam.node}
  240. return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
  241. }
  242. // Write encodes the Metric into a "Metric" Protocol Buffer data
  243. // transmission object.
  244. func (nam KubeNodeStatusCapacityCPUCoresMetric) Write(m *dto.Metric) error {
  245. m.Gauge = &dto.Gauge{
  246. Value: &nam.cores,
  247. }
  248. m.Label = []*dto.LabelPair{
  249. {
  250. Name: toStringPtr("node"),
  251. Value: &nam.node,
  252. },
  253. }
  254. return nil
  255. }
  256. //--------------------------------------------------------------------------
  257. // KubeNodeLabelsMetric
  258. //--------------------------------------------------------------------------
  259. // KubeNodeLabelsMetric is a prometheus.Metric used to encode
  260. // a duplicate of the deprecated kube-state-metrics metric
  261. // kube_node_labels
  262. type KubeNodeLabelsMetric struct {
  263. fqName string
  264. help string
  265. labelNames []string
  266. labelValues []string
  267. node string
  268. }
  269. // Creates a new KubeNodeLabelsMetric, implementation of prometheus.Metric
  270. func newKubeNodeLabelsMetric(node string, fqname string, labelNames []string, labelValues []string) KubeNodeLabelsMetric {
  271. return KubeNodeLabelsMetric{
  272. fqName: fqname,
  273. labelNames: labelNames,
  274. labelValues: labelValues,
  275. help: "kube_node_labels all labels for each node prefixed with label_",
  276. node: node,
  277. }
  278. }
  279. // Desc returns the descriptor for the Metric. This method idempotently
  280. // returns the same descriptor throughout the lifetime of the Metric.
  281. func (nam KubeNodeLabelsMetric) Desc() *prometheus.Desc {
  282. l := prometheus.Labels{
  283. "node": nam.node,
  284. }
  285. return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
  286. }
  287. // Write encodes the Metric into a "Metric" Protocol Buffer data
  288. // transmission object.
  289. func (nam KubeNodeLabelsMetric) Write(m *dto.Metric) error {
  290. h := float64(1)
  291. m.Gauge = &dto.Gauge{
  292. Value: &h,
  293. }
  294. var labels []*dto.LabelPair
  295. for i := range nam.labelNames {
  296. labels = append(labels, &dto.LabelPair{
  297. Name: &nam.labelNames[i],
  298. Value: &nam.labelValues[i],
  299. })
  300. }
  301. nodeString := "node"
  302. labels = append(labels, &dto.LabelPair{Name: &nodeString, Value: &nam.node})
  303. m.Label = labels
  304. return nil
  305. }
  306. //--------------------------------------------------------------------------
  307. // KubeNodeStatusConditionMetric
  308. //--------------------------------------------------------------------------
  309. // KubeNodeStatusConditionMetric
  310. type KubeNodeStatusConditionMetric struct {
  311. fqName string
  312. help string
  313. node string
  314. condition string
  315. status string
  316. value float64
  317. }
  318. // Creates a new KubeNodeStatusConditionMetric, implementation of prometheus.Metric
  319. func newKubeNodeStatusConditionMetric(node, fqname, condition, status string, value float64) KubeNodeStatusConditionMetric {
  320. return KubeNodeStatusConditionMetric{
  321. fqName: fqname,
  322. help: "kube_node_status_condition condition status for nodes",
  323. node: node,
  324. condition: condition,
  325. status: status,
  326. value: value,
  327. }
  328. }
  329. // Desc returns the descriptor for the Metric. This method idempotently
  330. // returns the same descriptor throughout the lifetime of the Metric.
  331. func (nam KubeNodeStatusConditionMetric) Desc() *prometheus.Desc {
  332. l := prometheus.Labels{
  333. "node": nam.node,
  334. "condition": nam.condition,
  335. "status": nam.status,
  336. }
  337. return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
  338. }
  339. // Write encodes the Metric into a "Metric" Protocol Buffer data
  340. // transmission object.
  341. func (nam KubeNodeStatusConditionMetric) Write(m *dto.Metric) error {
  342. m.Gauge = &dto.Gauge{
  343. Value: &nam.value,
  344. }
  345. m.Label = []*dto.LabelPair{
  346. {
  347. Name: toStringPtr("node"),
  348. Value: &nam.node,
  349. },
  350. {
  351. Name: toStringPtr("condition"),
  352. Value: &nam.condition,
  353. },
  354. {
  355. Name: toStringPtr("status"),
  356. Value: &nam.status,
  357. },
  358. }
  359. return nil
  360. }
  361. // helper type for status condition reporting and metric rollup
  362. type statusCondition struct {
  363. status string
  364. value float64
  365. }
  366. // retrieves the total status conditions and the comparison to the provided condition
  367. func getConditions(cs v1.ConditionStatus) []*statusCondition {
  368. ms := make([]*statusCondition, len(conditionStatuses))
  369. for i, status := range conditionStatuses {
  370. ms[i] = &statusCondition{
  371. status: strings.ToLower(string(status)),
  372. value: boolFloat64(cs == status),
  373. }
  374. }
  375. return ms
  376. }
  377. //--------------------------------------------------------------------------
  378. // KubeNodeStatusAllocatableMetric
  379. //--------------------------------------------------------------------------
  380. // KubeNodeStatusAllocatableMetric is a prometheus.Metric
  381. type KubeNodeStatusAllocatableMetric struct {
  382. fqName string
  383. help string
  384. resource string
  385. unit string
  386. node string
  387. value float64
  388. }
  389. // Creates a new KubeNodeStatusAllocatableMetric, implementation of prometheus.Metric
  390. func newKubeNodeStatusAllocatableMetric(fqname, node, resource, unit string, value float64) KubeNodeStatusAllocatableMetric {
  391. return KubeNodeStatusAllocatableMetric{
  392. fqName: fqname,
  393. help: "kube_node_status_allocatable node allocatable",
  394. node: node,
  395. resource: resource,
  396. unit: unit,
  397. value: value,
  398. }
  399. }
  400. // Desc returns the descriptor for the Metric. This method idempotently
  401. // returns the same descriptor throughout the lifetime of the Metric.
  402. func (kpcrr KubeNodeStatusAllocatableMetric) Desc() *prometheus.Desc {
  403. l := prometheus.Labels{
  404. "node": kpcrr.node,
  405. "resource": kpcrr.resource,
  406. "unit": kpcrr.unit,
  407. }
  408. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  409. }
  410. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  411. func (kpcrr KubeNodeStatusAllocatableMetric) Write(m *dto.Metric) error {
  412. m.Gauge = &dto.Gauge{
  413. Value: &kpcrr.value,
  414. }
  415. m.Label = []*dto.LabelPair{
  416. {
  417. Name: toStringPtr("node"),
  418. Value: &kpcrr.node,
  419. },
  420. {
  421. Name: toStringPtr("resource"),
  422. Value: &kpcrr.resource,
  423. },
  424. {
  425. Name: toStringPtr("unit"),
  426. Value: &kpcrr.unit,
  427. },
  428. }
  429. return nil
  430. }
  431. //--------------------------------------------------------------------------
  432. // KubeNodeStatusAllocatableCPUCoresMetric
  433. //--------------------------------------------------------------------------
  434. // KubeNodeStatusAllocatableCPUCoresMetric is a prometheus.Metric
  435. type KubeNodeStatusAllocatableCPUCoresMetric struct {
  436. fqName string
  437. help string
  438. resource string
  439. unit string
  440. node string
  441. value float64
  442. }
  443. // Creates a new KubeNodeStatusAllocatableCPUCoresMetric, implementation of prometheus.Metric
  444. func newKubeNodeStatusAllocatableCPUCoresMetric(fqname, node string, value float64) KubeNodeStatusAllocatableCPUCoresMetric {
  445. return KubeNodeStatusAllocatableCPUCoresMetric{
  446. fqName: fqname,
  447. help: "kube_node_status_allocatable_cpu_cores node allocatable cpu cores",
  448. node: node,
  449. value: value,
  450. }
  451. }
  452. // Desc returns the descriptor for the Metric. This method idempotently
  453. // returns the same descriptor throughout the lifetime of the Metric.
  454. func (kpcrr KubeNodeStatusAllocatableCPUCoresMetric) Desc() *prometheus.Desc {
  455. l := prometheus.Labels{
  456. "node": kpcrr.node,
  457. }
  458. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  459. }
  460. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  461. func (kpcrr KubeNodeStatusAllocatableCPUCoresMetric) Write(m *dto.Metric) error {
  462. m.Gauge = &dto.Gauge{
  463. Value: &kpcrr.value,
  464. }
  465. m.Label = []*dto.LabelPair{
  466. {
  467. Name: toStringPtr("node"),
  468. Value: &kpcrr.node,
  469. },
  470. }
  471. return nil
  472. }
  473. //--------------------------------------------------------------------------
  474. // KubeNodeStatusAllocatableMemoryBytesMetric
  475. //--------------------------------------------------------------------------
  476. // KubeNodeStatusAllocatableMemoryBytesMetric is a prometheus.Metric
  477. type KubeNodeStatusAllocatableMemoryBytesMetric struct {
  478. fqName string
  479. help string
  480. resource string
  481. unit string
  482. node string
  483. value float64
  484. }
  485. // Creates a new KubeNodeStatusAllocatableMemoryBytesMetric, implementation of prometheus.Metric
  486. func newKubeNodeStatusAllocatableMemoryBytesMetric(fqname, node string, value float64) KubeNodeStatusAllocatableMemoryBytesMetric {
  487. return KubeNodeStatusAllocatableMemoryBytesMetric{
  488. fqName: fqname,
  489. help: "kube_node_status_allocatable_memory_bytes node allocatable memory in bytes",
  490. node: node,
  491. value: value,
  492. }
  493. }
  494. // Desc returns the descriptor for the Metric. This method idempotently
  495. // returns the same descriptor throughout the lifetime of the Metric.
  496. func (kpcrr KubeNodeStatusAllocatableMemoryBytesMetric) Desc() *prometheus.Desc {
  497. l := prometheus.Labels{
  498. "node": kpcrr.node,
  499. }
  500. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  501. }
  502. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  503. func (kpcrr KubeNodeStatusAllocatableMemoryBytesMetric) Write(m *dto.Metric) error {
  504. m.Gauge = &dto.Gauge{
  505. Value: &kpcrr.value,
  506. }
  507. m.Label = []*dto.LabelPair{
  508. {
  509. Name: toStringPtr("node"),
  510. Value: &kpcrr.node,
  511. },
  512. }
  513. return nil
  514. }