kubemetrics.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. package metrics
  2. import (
  3. "fmt"
  4. "strings"
  5. "sync"
  6. "github.com/opencost/opencost/core/pkg/util/promutil"
  7. "github.com/opencost/opencost/pkg/clustercache"
  8. "github.com/prometheus/client_golang/prometheus"
  9. batchv1 "k8s.io/api/batch/v1"
  10. v1 "k8s.io/api/core/v1"
  11. "k8s.io/apimachinery/pkg/api/resource"
  12. "k8s.io/apimachinery/pkg/util/validation"
  13. )
  14. //--------------------------------------------------------------------------
  15. // Kube Metric Registration
  16. //--------------------------------------------------------------------------
  17. // initializer
  18. var kubeMetricInit sync.Once
  19. // KubeMetricsOpts represents our Kubernetes metrics emission options.
  20. type KubeMetricsOpts struct {
  21. EmitKubecostControllerMetrics bool
  22. EmitNamespaceAnnotations bool
  23. EmitPodAnnotations bool
  24. EmitKubeStateMetrics bool
  25. EmitKubeStateMetricsV1Only bool
  26. EmitDeprecatedMetrics bool
  27. }
  28. // DefaultKubeMetricsOpts returns KubeMetricsOpts with default values set
  29. func DefaultKubeMetricsOpts() *KubeMetricsOpts {
  30. return &KubeMetricsOpts{
  31. EmitKubecostControllerMetrics: true,
  32. EmitNamespaceAnnotations: false,
  33. EmitPodAnnotations: false,
  34. EmitKubeStateMetrics: true,
  35. EmitKubeStateMetricsV1Only: false,
  36. EmitDeprecatedMetrics: false,
  37. }
  38. }
  39. // InitKubeMetrics initializes kubernetes metric emission using the provided options.
  40. func InitKubeMetrics(clusterCache clustercache.ClusterCache, metricsConfig *MetricsConfig, opts *KubeMetricsOpts) {
  41. if opts == nil {
  42. opts = DefaultKubeMetricsOpts()
  43. }
  44. kubeMetricInit.Do(func() {
  45. if !opts.EmitDeprecatedMetrics {
  46. metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics,
  47. "kube_pod_container_resource_limits",
  48. "kube_pod_container_resource_limits_memory_bytes",
  49. "kube_pod_container_resource_limits_cpu_cores",
  50. "kube_pod_container_status_restarts_total",
  51. "kube_node_status_condition",
  52. "kube_deployment_status_replicas_available",
  53. "kube_deployment_spec_replicas",
  54. "kube_persistentvolume_status_phase",
  55. "kube_pod_status_phase",
  56. )
  57. }
  58. if opts.EmitKubecostControllerMetrics {
  59. prometheus.MustRegister(KubecostServiceCollector{
  60. KubeClusterCache: clusterCache,
  61. metricsConfig: *metricsConfig,
  62. })
  63. prometheus.MustRegister(KubecostDeploymentCollector{
  64. KubeClusterCache: clusterCache,
  65. metricsConfig: *metricsConfig,
  66. })
  67. prometheus.MustRegister(KubecostStatefulsetCollector{
  68. KubeClusterCache: clusterCache,
  69. metricsConfig: *metricsConfig,
  70. })
  71. }
  72. if opts.EmitPodAnnotations {
  73. prometheus.MustRegister(KubecostPodCollector{
  74. KubeClusterCache: clusterCache,
  75. metricsConfig: *metricsConfig,
  76. })
  77. }
  78. if opts.EmitNamespaceAnnotations {
  79. prometheus.MustRegister(KubecostNamespaceCollector{
  80. KubeClusterCache: clusterCache,
  81. metricsConfig: *metricsConfig,
  82. })
  83. }
  84. if opts.EmitKubeStateMetrics {
  85. prometheus.MustRegister(KubeNodeCollector{
  86. KubeClusterCache: clusterCache,
  87. metricsConfig: *metricsConfig,
  88. })
  89. prometheus.MustRegister(KubeNamespaceCollector{
  90. KubeClusterCache: clusterCache,
  91. metricsConfig: *metricsConfig,
  92. })
  93. prometheus.MustRegister(KubeDeploymentCollector{
  94. KubeClusterCache: clusterCache,
  95. metricsConfig: *metricsConfig,
  96. })
  97. prometheus.MustRegister(KubePodCollector{
  98. KubeClusterCache: clusterCache,
  99. metricsConfig: *metricsConfig,
  100. })
  101. prometheus.MustRegister(KubePVCollector{
  102. KubeClusterCache: clusterCache,
  103. metricsConfig: *metricsConfig,
  104. })
  105. prometheus.MustRegister(KubePVCCollector{
  106. KubeClusterCache: clusterCache,
  107. metricsConfig: *metricsConfig,
  108. })
  109. prometheus.MustRegister(KubeJobCollector{
  110. KubeClusterCache: clusterCache,
  111. metricsConfig: *metricsConfig,
  112. })
  113. } else if opts.EmitKubeStateMetricsV1Only {
  114. // We still need the kubecost_pv_info metric to look up storageclass on legacy clusters.
  115. forceDisabled := []string{"kube_persistentvolume_capacity_bytes", "kube_persistentvolume_status_phase"}
  116. metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics, forceDisabled...)
  117. prometheus.MustRegister(KubeNodeCollector{
  118. KubeClusterCache: clusterCache,
  119. metricsConfig: *metricsConfig,
  120. })
  121. prometheus.MustRegister(KubeNamespaceCollector{
  122. KubeClusterCache: clusterCache,
  123. metricsConfig: *metricsConfig,
  124. })
  125. prometheus.MustRegister(KubePodLabelsCollector{
  126. KubeClusterCache: clusterCache,
  127. metricsConfig: *metricsConfig,
  128. })
  129. prometheus.MustRegister(KubePVCollector{
  130. KubeClusterCache: clusterCache,
  131. metricsConfig: *metricsConfig,
  132. })
  133. } else {
  134. // We still need the kubecost_pv_info metric to look up storageclass on legacy clusters.
  135. forceDisabled := []string{"kube_persistentvolume_capacity_bytes", "kube_persistentvolume_status_phase"}
  136. metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics, forceDisabled...)
  137. prometheus.MustRegister(KubePVCollector{
  138. KubeClusterCache: clusterCache,
  139. metricsConfig: *metricsConfig,
  140. })
  141. }
  142. })
  143. }
  144. //--------------------------------------------------------------------------
  145. // Kube Metric Helpers
  146. //--------------------------------------------------------------------------
  147. // getPersistentVolumeClaimClass returns StorageClassName. If no storage class was
  148. // requested, it returns "".
  149. func getPersistentVolumeClaimClass(claim *clustercache.PersistentVolumeClaim) string {
  150. // Use beta annotation first
  151. if class, found := claim.Annotations[v1.BetaStorageClassAnnotation]; found {
  152. return class
  153. }
  154. if claim.Spec.StorageClassName != nil {
  155. return *claim.Spec.StorageClassName
  156. }
  157. // Special non-empty string to indicate absence of storage class.
  158. return ""
  159. }
  160. // toResourceUnitValue accepts a resource name and quantity and returns the sanitized resource, the unit, and the value in the units.
  161. // Returns an empty string for resource and unit if there was a failure.
  162. func toResourceUnitValue(resourceName v1.ResourceName, quantity resource.Quantity) (resource string, unit string, value float64) {
  163. resource = promutil.SanitizeLabelName(string(resourceName))
  164. switch resourceName {
  165. case v1.ResourceCPU:
  166. unit = "core"
  167. value = float64(quantity.MilliValue()) / 1000
  168. return
  169. case v1.ResourceStorage:
  170. fallthrough
  171. case v1.ResourceEphemeralStorage:
  172. fallthrough
  173. case v1.ResourceMemory:
  174. unit = "byte"
  175. value = float64(quantity.Value())
  176. return
  177. case v1.ResourcePods:
  178. unit = "integer"
  179. value = float64(quantity.Value())
  180. return
  181. default:
  182. if isHugePageResourceName(resourceName) || isAttachableVolumeResourceName(resourceName) {
  183. unit = "byte"
  184. value = float64(quantity.Value())
  185. return
  186. }
  187. if isExtendedResourceName(resourceName) {
  188. unit = "integer"
  189. value = float64(quantity.Value())
  190. return
  191. }
  192. }
  193. resource = ""
  194. unit = ""
  195. value = 0.0
  196. return
  197. }
  198. // isHugePageResourceName checks for a huge page container resource name
  199. func isHugePageResourceName(name v1.ResourceName) bool {
  200. return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
  201. }
  202. // isAttachableVolumeResourceName checks for attached volume container resource name
  203. func isAttachableVolumeResourceName(name v1.ResourceName) bool {
  204. return strings.HasPrefix(string(name), v1.ResourceAttachableVolumesPrefix)
  205. }
  206. // isExtendedResourceName checks for extended container resource name
  207. func isExtendedResourceName(name v1.ResourceName) bool {
  208. if isNativeResource(name) || strings.HasPrefix(string(name), v1.DefaultResourceRequestsPrefix) {
  209. return false
  210. }
  211. // Ensure it satisfies the rules in IsQualifiedName() after converted into quota resource name
  212. nameForQuota := fmt.Sprintf("%s%s", v1.DefaultResourceRequestsPrefix, string(name))
  213. if errs := validation.IsQualifiedName(nameForQuota); len(errs) != 0 {
  214. return false
  215. }
  216. return true
  217. }
  218. // isNativeResource checks for a kubernetes.io/ prefixed resource name
  219. func isNativeResource(name v1.ResourceName) bool {
  220. return !strings.Contains(string(name), "/") || isPrefixedNativeResource(name)
  221. }
  222. func isPrefixedNativeResource(name v1.ResourceName) bool {
  223. return strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
  224. }
  225. func failureReason(jc *batchv1.JobCondition, reason string) bool {
  226. if jc == nil {
  227. return false
  228. }
  229. return jc.Reason == reason
  230. }
  231. // boolFloat64 converts a boolean input into a 1 or 0
  232. func boolFloat64(b bool) float64 {
  233. if b {
  234. return 1
  235. }
  236. return 0
  237. }
  238. // toStringPtr is used to create a new string pointer from iteration vars
  239. func toStringPtr(s string) *string { return &s }