nodemetrics.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. package metrics
  2. import (
  3. "strings"
  4. "github.com/kubecost/cost-model/pkg/clustercache"
  5. "github.com/kubecost/cost-model/pkg/log"
  6. "github.com/kubecost/cost-model/pkg/prom"
  7. "github.com/prometheus/client_golang/prometheus"
  8. dto "github.com/prometheus/client_model/go"
  9. v1 "k8s.io/api/core/v1"
  10. )
  11. var (
  12. conditionStatuses = []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionUnknown}
  13. )
  14. //--------------------------------------------------------------------------
  15. // KubeNodeCollector
  16. //--------------------------------------------------------------------------
  17. // KubeNodeCollector is a prometheus collector that generates node sourced metrics.
  18. type KubeNodeCollector struct {
  19. KubeClusterCache clustercache.ClusterCache
  20. }
  21. // Describe sends the super-set of all possible descriptors of metrics
  22. // collected by this Collector.
  23. func (nsac KubeNodeCollector) Describe(ch chan<- *prometheus.Desc) {
  24. ch <- prometheus.NewDesc("kube_node_status_capacity", "Node resource capacity.", []string{}, nil)
  25. ch <- prometheus.NewDesc("kube_node_status_capacity_memory_bytes", "node capacity memory bytes", []string{}, nil)
  26. ch <- prometheus.NewDesc("kube_node_status_capacity_cpu_cores", "node capacity cpu cores", []string{}, nil)
  27. ch <- prometheus.NewDesc("kube_node_status_allocatable", "The allocatable for different resources of a node that are available for scheduling.", []string{}, nil)
  28. ch <- prometheus.NewDesc("kube_node_status_allocatable_cpu_cores", "The allocatable cpu cores.", []string{}, nil)
  29. ch <- prometheus.NewDesc("kube_node_status_allocatable_memory_bytes", "The allocatable memory in bytes.", []string{}, nil)
  30. ch <- prometheus.NewDesc("kube_node_labels", "all labels for each node prefixed with label_", []string{}, nil)
  31. ch <- prometheus.NewDesc("kube_node_status_condition", "The condition of a cluster node.", []string{}, nil)
  32. }
  33. // Collect is called by the Prometheus registry when collecting metrics.
  34. func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
  35. nodes := nsac.KubeClusterCache.GetAllNodes()
  36. for _, node := range nodes {
  37. nodeName := node.GetName()
  38. // Node Capacity
  39. for resourceName, quantity := range node.Status.Capacity {
  40. resource, unit, value := toResourceUnitValue(resourceName, quantity)
  41. // failed to parse the resource type
  42. if resource == "" {
  43. log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
  44. continue
  45. }
  46. // KSM v1 Emission
  47. if resource == "cpu" {
  48. ch <- newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", nodeName, value)
  49. }
  50. if resource == "memory" {
  51. ch <- newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", nodeName, value)
  52. }
  53. ch <- newKubeNodeStatusCapacityMetric("kube_node_status_capacity", nodeName, resource, unit, value)
  54. }
  55. // Node Allocatable Resources
  56. for resourceName, quantity := range node.Status.Allocatable {
  57. resource, unit, value := toResourceUnitValue(resourceName, quantity)
  58. // failed to parse the resource type
  59. if resource == "" {
  60. log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
  61. continue
  62. }
  63. // KSM v1 Emission
  64. if resource == "cpu" {
  65. ch <- newKubeNodeStatusAllocatableCPUCoresMetric("kube_node_status_allocatable_cpu_cores", nodeName, value)
  66. }
  67. if resource == "memory" {
  68. ch <- newKubeNodeStatusAllocatableMemoryBytesMetric("kube_node_status_allocatable_memory_bytes", nodeName, value)
  69. }
  70. ch <- newKubeNodeStatusAllocatableMetric("kube_node_status_allocatable", nodeName, resource, unit, value)
  71. }
  72. // node labels
  73. labelNames, labelValues := prom.KubePrependQualifierToLabels(node.GetLabels(), "label_")
  74. ch <- newKubeNodeLabelsMetric(nodeName, "kube_node_labels", labelNames, labelValues)
  75. // kube_node_status_condition
  76. // Collect node conditions and while default to false.
  77. for _, c := range node.Status.Conditions {
  78. conditions := getConditions(c.Status)
  79. for _, cond := range conditions {
  80. ch <- newKubeNodeStatusConditionMetric(nodeName, "kube_node_status_condition", string(c.Type), cond.status, cond.value)
  81. }
  82. }
  83. }
  84. }
  85. //--------------------------------------------------------------------------
  86. // KubeNodeStatusCapacityMetric
  87. //--------------------------------------------------------------------------
  88. // KubeNodeStatusCapacityMetric is a prometheus.Metric
  89. type KubeNodeStatusCapacityMetric struct {
  90. fqName string
  91. help string
  92. resource string
  93. unit string
  94. node string
  95. value float64
  96. }
  97. // Creates a new KubeNodeStatusCapacityMetric, implementation of prometheus.Metric
  98. func newKubeNodeStatusCapacityMetric(fqname, node, resource, unit string, value float64) KubeNodeStatusCapacityMetric {
  99. return KubeNodeStatusCapacityMetric{
  100. fqName: fqname,
  101. help: "kube_node_status_capacity node capacity",
  102. node: node,
  103. resource: resource,
  104. unit: unit,
  105. value: value,
  106. }
  107. }
  108. // Desc returns the descriptor for the Metric. This method idempotently
  109. // returns the same descriptor throughout the lifetime of the Metric.
  110. func (kpcrr KubeNodeStatusCapacityMetric) Desc() *prometheus.Desc {
  111. l := prometheus.Labels{
  112. "node": kpcrr.node,
  113. "resource": kpcrr.resource,
  114. "unit": kpcrr.unit,
  115. }
  116. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  117. }
  118. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  119. func (kpcrr KubeNodeStatusCapacityMetric) Write(m *dto.Metric) error {
  120. m.Gauge = &dto.Gauge{
  121. Value: &kpcrr.value,
  122. }
  123. m.Label = []*dto.LabelPair{
  124. {
  125. Name: toStringPtr("node"),
  126. Value: &kpcrr.node,
  127. },
  128. {
  129. Name: toStringPtr("resource"),
  130. Value: &kpcrr.resource,
  131. },
  132. {
  133. Name: toStringPtr("unit"),
  134. Value: &kpcrr.unit,
  135. },
  136. }
  137. return nil
  138. }
  139. //--------------------------------------------------------------------------
  140. // KubeNodeStatusCapacityMemoryBytesMetric
  141. //--------------------------------------------------------------------------
  142. // KubeNodeStatusCapacityMemoryBytesMetric is a prometheus.Metric used to encode
  143. // a duplicate of the deprecated kube-state-metrics metric
  144. // kube_node_status_capacity_memory_bytes
  145. type KubeNodeStatusCapacityMemoryBytesMetric struct {
  146. fqName string
  147. help string
  148. bytes float64
  149. node string
  150. }
  151. // Creates a new KubeNodeStatusCapacityMemoryBytesMetric, implementation of prometheus.Metric
  152. func newKubeNodeStatusCapacityMemoryBytesMetric(fqname string, node string, bytes float64) KubeNodeStatusCapacityMemoryBytesMetric {
  153. return KubeNodeStatusCapacityMemoryBytesMetric{
  154. fqName: fqname,
  155. help: "kube_node_status_capacity_memory_bytes Node Capacity Memory Bytes",
  156. node: node,
  157. bytes: bytes,
  158. }
  159. }
  160. // Desc returns the descriptor for the Metric. This method idempotently
  161. // returns the same descriptor throughout the lifetime of the Metric.
  162. func (nam KubeNodeStatusCapacityMemoryBytesMetric) Desc() *prometheus.Desc {
  163. l := prometheus.Labels{"node": nam.node}
  164. return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
  165. }
  166. // Write encodes the Metric into a "Metric" Protocol Buffer data
  167. // transmission object.
  168. func (nam KubeNodeStatusCapacityMemoryBytesMetric) Write(m *dto.Metric) error {
  169. m.Gauge = &dto.Gauge{
  170. Value: &nam.bytes,
  171. }
  172. m.Label = []*dto.LabelPair{
  173. {
  174. Name: toStringPtr("node"),
  175. Value: &nam.node,
  176. },
  177. }
  178. return nil
  179. }
  180. //--------------------------------------------------------------------------
  181. // KubeNodeStatusCapacityCPUCoresMetric
  182. //--------------------------------------------------------------------------
  183. // KubeNodeStatusCapacityCPUCoresMetric is a prometheus.Metric used to encode
  184. // a duplicate of the deprecated kube-state-metrics metric
  185. // kube_node_status_capacity_memory_bytes
  186. type KubeNodeStatusCapacityCPUCoresMetric struct {
  187. fqName string
  188. help string
  189. cores float64
  190. node string
  191. }
  192. // Creates a new KubeNodeStatusCapacityCPUCoresMetric, implementation of prometheus.Metric
  193. func newKubeNodeStatusCapacityCPUCoresMetric(fqname string, node string, cores float64) KubeNodeStatusCapacityCPUCoresMetric {
  194. return KubeNodeStatusCapacityCPUCoresMetric{
  195. fqName: fqname,
  196. help: "kube_node_status_capacity_cpu_cores Node Capacity CPU Cores",
  197. cores: cores,
  198. node: node,
  199. }
  200. }
  201. // Desc returns the descriptor for the Metric. This method idempotently
  202. // returns the same descriptor throughout the lifetime of the Metric.
  203. func (nam KubeNodeStatusCapacityCPUCoresMetric) Desc() *prometheus.Desc {
  204. l := prometheus.Labels{"node": nam.node}
  205. return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
  206. }
  207. // Write encodes the Metric into a "Metric" Protocol Buffer data
  208. // transmission object.
  209. func (nam KubeNodeStatusCapacityCPUCoresMetric) Write(m *dto.Metric) error {
  210. m.Gauge = &dto.Gauge{
  211. Value: &nam.cores,
  212. }
  213. m.Label = []*dto.LabelPair{
  214. {
  215. Name: toStringPtr("node"),
  216. Value: &nam.node,
  217. },
  218. }
  219. return nil
  220. }
  221. //--------------------------------------------------------------------------
  222. // KubeNodeLabelsCollector
  223. //--------------------------------------------------------------------------
  224. //
  225. // We use this to emit kube_node_labels with all of a node's labels, regardless
  226. // of the whitelist setting introduced in KSM v2. See
  227. // https://github.com/kubernetes/kube-state-metrics/issues/1270#issuecomment-712986441
  228. //--------------------------------------------------------------------------
  229. // KubeNodeLabelsMetric
  230. //--------------------------------------------------------------------------
  231. // KubeNodeLabelsMetric is a prometheus.Metric used to encode
  232. // a duplicate of the deprecated kube-state-metrics metric
  233. // kube_node_labels
  234. type KubeNodeLabelsMetric struct {
  235. fqName string
  236. help string
  237. labelNames []string
  238. labelValues []string
  239. node string
  240. }
  241. // Creates a new KubeNodeLabelsMetric, implementation of prometheus.Metric
  242. func newKubeNodeLabelsMetric(node string, fqname string, labelNames []string, labelValues []string) KubeNodeLabelsMetric {
  243. return KubeNodeLabelsMetric{
  244. fqName: fqname,
  245. labelNames: labelNames,
  246. labelValues: labelValues,
  247. help: "kube_node_labels all labels for each node prefixed with label_",
  248. node: node,
  249. }
  250. }
  251. // Desc returns the descriptor for the Metric. This method idempotently
  252. // returns the same descriptor throughout the lifetime of the Metric.
  253. func (nam KubeNodeLabelsMetric) Desc() *prometheus.Desc {
  254. l := prometheus.Labels{
  255. "node": nam.node,
  256. }
  257. return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
  258. }
  259. // Write encodes the Metric into a "Metric" Protocol Buffer data
  260. // transmission object.
  261. func (nam KubeNodeLabelsMetric) Write(m *dto.Metric) error {
  262. h := float64(1)
  263. m.Gauge = &dto.Gauge{
  264. Value: &h,
  265. }
  266. var labels []*dto.LabelPair
  267. for i := range nam.labelNames {
  268. labels = append(labels, &dto.LabelPair{
  269. Name: &nam.labelNames[i],
  270. Value: &nam.labelValues[i],
  271. })
  272. }
  273. nodeString := "node"
  274. labels = append(labels, &dto.LabelPair{Name: &nodeString, Value: &nam.node})
  275. m.Label = labels
  276. return nil
  277. }
  278. //--------------------------------------------------------------------------
  279. // KubeNodeStatusConditionMetric
  280. //--------------------------------------------------------------------------
  281. // KubeNodeStatusConditionMetric
  282. type KubeNodeStatusConditionMetric struct {
  283. fqName string
  284. help string
  285. node string
  286. condition string
  287. status string
  288. value float64
  289. }
  290. // Creates a new KubeNodeStatusConditionMetric, implementation of prometheus.Metric
  291. func newKubeNodeStatusConditionMetric(node, fqname, condition, status string, value float64) KubeNodeStatusConditionMetric {
  292. return KubeNodeStatusConditionMetric{
  293. fqName: fqname,
  294. help: "kube_node_status_condition condition status for nodes",
  295. node: node,
  296. condition: condition,
  297. status: status,
  298. value: value,
  299. }
  300. }
  301. // Desc returns the descriptor for the Metric. This method idempotently
  302. // returns the same descriptor throughout the lifetime of the Metric.
  303. func (nam KubeNodeStatusConditionMetric) Desc() *prometheus.Desc {
  304. l := prometheus.Labels{
  305. "node": nam.node,
  306. "condition": nam.condition,
  307. "status": nam.status,
  308. }
  309. return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
  310. }
  311. // Write encodes the Metric into a "Metric" Protocol Buffer data
  312. // transmission object.
  313. func (nam KubeNodeStatusConditionMetric) Write(m *dto.Metric) error {
  314. m.Gauge = &dto.Gauge{
  315. Value: &nam.value,
  316. }
  317. m.Label = []*dto.LabelPair{
  318. {
  319. Name: toStringPtr("node"),
  320. Value: &nam.node,
  321. },
  322. {
  323. Name: toStringPtr("condition"),
  324. Value: &nam.condition,
  325. },
  326. {
  327. Name: toStringPtr("status"),
  328. Value: &nam.status,
  329. },
  330. }
  331. return nil
  332. }
  333. // helper type for status condition reporting and metric rollup
  334. type statusCondition struct {
  335. status string
  336. value float64
  337. }
  338. // retrieves the total status conditions and the comparison to the provided condition
  339. func getConditions(cs v1.ConditionStatus) []*statusCondition {
  340. ms := make([]*statusCondition, len(conditionStatuses))
  341. for i, status := range conditionStatuses {
  342. ms[i] = &statusCondition{
  343. status: strings.ToLower(string(status)),
  344. value: boolFloat64(cs == status),
  345. }
  346. }
  347. return ms
  348. }
  349. //--------------------------------------------------------------------------
  350. // KubeNodeStatusAllocatableMetric
  351. //--------------------------------------------------------------------------
  352. // KubeNodeStatusAllocatableMetric is a prometheus.Metric
  353. type KubeNodeStatusAllocatableMetric struct {
  354. fqName string
  355. help string
  356. resource string
  357. unit string
  358. node string
  359. value float64
  360. }
  361. // Creates a new KubeNodeStatusAllocatableMetric, implementation of prometheus.Metric
  362. func newKubeNodeStatusAllocatableMetric(fqname, node, resource, unit string, value float64) KubeNodeStatusAllocatableMetric {
  363. return KubeNodeStatusAllocatableMetric{
  364. fqName: fqname,
  365. help: "kube_node_status_allocatable node allocatable",
  366. node: node,
  367. resource: resource,
  368. unit: unit,
  369. value: value,
  370. }
  371. }
  372. // Desc returns the descriptor for the Metric. This method idempotently
  373. // returns the same descriptor throughout the lifetime of the Metric.
  374. func (kpcrr KubeNodeStatusAllocatableMetric) Desc() *prometheus.Desc {
  375. l := prometheus.Labels{
  376. "node": kpcrr.node,
  377. "resource": kpcrr.resource,
  378. "unit": kpcrr.unit,
  379. }
  380. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  381. }
  382. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  383. func (kpcrr KubeNodeStatusAllocatableMetric) Write(m *dto.Metric) error {
  384. m.Gauge = &dto.Gauge{
  385. Value: &kpcrr.value,
  386. }
  387. m.Label = []*dto.LabelPair{
  388. {
  389. Name: toStringPtr("node"),
  390. Value: &kpcrr.node,
  391. },
  392. {
  393. Name: toStringPtr("resource"),
  394. Value: &kpcrr.resource,
  395. },
  396. {
  397. Name: toStringPtr("unit"),
  398. Value: &kpcrr.unit,
  399. },
  400. }
  401. return nil
  402. }
  403. //--------------------------------------------------------------------------
  404. // KubeNodeStatusAllocatableCPUCoresMetric
  405. //--------------------------------------------------------------------------
  406. // KubeNodeStatusAllocatableCPUCoresMetric is a prometheus.Metric
  407. type KubeNodeStatusAllocatableCPUCoresMetric struct {
  408. fqName string
  409. help string
  410. resource string
  411. unit string
  412. node string
  413. value float64
  414. }
  415. // Creates a new KubeNodeStatusAllocatableCPUCoresMetric, implementation of prometheus.Metric
  416. func newKubeNodeStatusAllocatableCPUCoresMetric(fqname, node string, value float64) KubeNodeStatusAllocatableCPUCoresMetric {
  417. return KubeNodeStatusAllocatableCPUCoresMetric{
  418. fqName: fqname,
  419. help: "kube_node_status_allocatable_cpu_cores node allocatable cpu cores",
  420. node: node,
  421. value: value,
  422. }
  423. }
  424. // Desc returns the descriptor for the Metric. This method idempotently
  425. // returns the same descriptor throughout the lifetime of the Metric.
  426. func (kpcrr KubeNodeStatusAllocatableCPUCoresMetric) Desc() *prometheus.Desc {
  427. l := prometheus.Labels{
  428. "node": kpcrr.node,
  429. }
  430. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  431. }
  432. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  433. func (kpcrr KubeNodeStatusAllocatableCPUCoresMetric) Write(m *dto.Metric) error {
  434. m.Gauge = &dto.Gauge{
  435. Value: &kpcrr.value,
  436. }
  437. m.Label = []*dto.LabelPair{
  438. {
  439. Name: toStringPtr("node"),
  440. Value: &kpcrr.node,
  441. },
  442. }
  443. return nil
  444. }
  445. //--------------------------------------------------------------------------
  446. // KubeNodeStatusAllocatableMemoryBytesMetric
  447. //--------------------------------------------------------------------------
  448. // KubeNodeStatusAllocatableMemoryBytesMetric is a prometheus.Metric
  449. type KubeNodeStatusAllocatableMemoryBytesMetric struct {
  450. fqName string
  451. help string
  452. resource string
  453. unit string
  454. node string
  455. value float64
  456. }
  457. // Creates a new KubeNodeStatusAllocatableMemoryBytesMetric, implementation of prometheus.Metric
  458. func newKubeNodeStatusAllocatableMemoryBytesMetric(fqname, node string, value float64) KubeNodeStatusAllocatableMemoryBytesMetric {
  459. return KubeNodeStatusAllocatableMemoryBytesMetric{
  460. fqName: fqname,
  461. help: "kube_node_status_allocatable_memory_bytes node allocatable memory in bytes",
  462. node: node,
  463. value: value,
  464. }
  465. }
  466. // Desc returns the descriptor for the Metric. This method idempotently
  467. // returns the same descriptor throughout the lifetime of the Metric.
  468. func (kpcrr KubeNodeStatusAllocatableMemoryBytesMetric) Desc() *prometheus.Desc {
  469. l := prometheus.Labels{
  470. "node": kpcrr.node,
  471. }
  472. return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
  473. }
  474. // Write encodes the Metric into a "Metric" Protocol Buffer data transmission object.
  475. func (kpcrr KubeNodeStatusAllocatableMemoryBytesMetric) Write(m *dto.Metric) error {
  476. m.Gauge = &dto.Gauge{
  477. Value: &kpcrr.value,
  478. }
  479. m.Label = []*dto.LabelPair{
  480. {
  481. Name: toStringPtr("node"),
  482. Value: &kpcrr.node,
  483. },
  484. }
  485. return nil
  486. }