kubemetrics.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. package metrics
  2. import (
  3. "fmt"
  4. "strings"
  5. "sync"
  6. "github.com/opencost/opencost/core/pkg/clustercache"
  7. "github.com/opencost/opencost/core/pkg/clusters"
  8. "github.com/opencost/opencost/core/pkg/util/promutil"
  9. "github.com/prometheus/client_golang/prometheus"
  10. batchv1 "k8s.io/api/batch/v1"
  11. v1 "k8s.io/api/core/v1"
  12. "k8s.io/apimachinery/pkg/api/resource"
  13. "k8s.io/apimachinery/pkg/util/validation"
  14. )
  15. //--------------------------------------------------------------------------
  16. // Kube Metric Registration
  17. //--------------------------------------------------------------------------
  18. // initializer
  19. var kubeMetricInit sync.Once
  20. // KubeMetricsOpts represents our Kubernetes metrics emission options.
  21. type KubeMetricsOpts struct {
  22. EmitKubecostControllerMetrics bool
  23. EmitNamespaceAnnotations bool
  24. EmitPodAnnotations bool
  25. EmitKubeStateMetrics bool
  26. EmitKubeStateMetricsV1Only bool
  27. EmitDeprecatedMetrics bool
  28. }
  29. // DefaultKubeMetricsOpts returns KubeMetricsOpts with default values set
  30. func DefaultKubeMetricsOpts() *KubeMetricsOpts {
  31. return &KubeMetricsOpts{
  32. EmitKubecostControllerMetrics: true,
  33. EmitNamespaceAnnotations: false,
  34. EmitPodAnnotations: false,
  35. EmitKubeStateMetrics: true,
  36. EmitKubeStateMetricsV1Only: false,
  37. EmitDeprecatedMetrics: false,
  38. }
  39. }
  40. // InitKubeMetrics initializes kubernetes metric emission using the provided options.
  41. func InitKubeMetrics(
  42. clusterInfo clusters.ClusterInfoProvider,
  43. clusterCache clustercache.ClusterCache,
  44. metricsConfig *MetricsConfig,
  45. opts *KubeMetricsOpts,
  46. ) {
  47. if opts == nil {
  48. opts = DefaultKubeMetricsOpts()
  49. }
  50. kubeMetricInit.Do(func() {
  51. if !opts.EmitDeprecatedMetrics {
  52. metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics,
  53. "kube_pod_container_resource_limits",
  54. "kube_pod_container_resource_limits_memory_bytes",
  55. "kube_pod_container_resource_limits_cpu_cores",
  56. "kube_pod_container_status_restarts_total",
  57. "kube_node_status_condition",
  58. "kube_deployment_status_replicas_available",
  59. "kube_deployment_spec_replicas",
  60. "kube_persistentvolume_status_phase",
  61. "kube_pod_status_phase",
  62. )
  63. }
  64. prometheus.MustRegister(KubeModelCollector{
  65. KubeClusterCache: clusterCache,
  66. ClusterInfo: clusterInfo,
  67. metricsConfig: *metricsConfig,
  68. })
  69. if opts.EmitKubecostControllerMetrics {
  70. prometheus.MustRegister(KubecostServiceCollector{
  71. KubeClusterCache: clusterCache,
  72. metricsConfig: *metricsConfig,
  73. })
  74. prometheus.MustRegister(KubecostDeploymentCollector{
  75. KubeClusterCache: clusterCache,
  76. metricsConfig: *metricsConfig,
  77. })
  78. prometheus.MustRegister(KubecostStatefulsetCollector{
  79. KubeClusterCache: clusterCache,
  80. metricsConfig: *metricsConfig,
  81. })
  82. }
  83. if opts.EmitPodAnnotations {
  84. prometheus.MustRegister(KubecostPodCollector{
  85. KubeClusterCache: clusterCache,
  86. metricsConfig: *metricsConfig,
  87. })
  88. }
  89. if opts.EmitNamespaceAnnotations {
  90. prometheus.MustRegister(KubecostNamespaceCollector{
  91. KubeClusterCache: clusterCache,
  92. metricsConfig: *metricsConfig,
  93. })
  94. }
  95. if opts.EmitKubeStateMetrics {
  96. prometheus.MustRegister(KubeNodeCollector{
  97. KubeClusterCache: clusterCache,
  98. metricsConfig: *metricsConfig,
  99. })
  100. prometheus.MustRegister(KubeNamespaceCollector{
  101. KubeClusterCache: clusterCache,
  102. metricsConfig: *metricsConfig,
  103. })
  104. prometheus.MustRegister(KubeDeploymentCollector{
  105. KubeClusterCache: clusterCache,
  106. metricsConfig: *metricsConfig,
  107. })
  108. prometheus.MustRegister(KubePodCollector{
  109. KubeClusterCache: clusterCache,
  110. metricsConfig: *metricsConfig,
  111. })
  112. prometheus.MustRegister(KubePVCollector{
  113. KubeClusterCache: clusterCache,
  114. metricsConfig: *metricsConfig,
  115. })
  116. prometheus.MustRegister(KubePVCCollector{
  117. KubeClusterCache: clusterCache,
  118. metricsConfig: *metricsConfig,
  119. })
  120. prometheus.MustRegister(KubeJobCollector{
  121. KubeClusterCache: clusterCache,
  122. metricsConfig: *metricsConfig,
  123. })
  124. } else if opts.EmitKubeStateMetricsV1Only {
  125. // We still need the kubecost_pv_info metric to look up storageclass on legacy clusters.
  126. forceDisabled := []string{"kube_persistentvolume_capacity_bytes", "kube_persistentvolume_status_phase"}
  127. metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics, forceDisabled...)
  128. prometheus.MustRegister(KubeNodeCollector{
  129. KubeClusterCache: clusterCache,
  130. metricsConfig: *metricsConfig,
  131. })
  132. prometheus.MustRegister(KubeNamespaceCollector{
  133. KubeClusterCache: clusterCache,
  134. metricsConfig: *metricsConfig,
  135. })
  136. prometheus.MustRegister(KubePodLabelsCollector{
  137. KubeClusterCache: clusterCache,
  138. metricsConfig: *metricsConfig,
  139. })
  140. prometheus.MustRegister(KubePVCollector{
  141. KubeClusterCache: clusterCache,
  142. metricsConfig: *metricsConfig,
  143. })
  144. } else {
  145. // We still need the kubecost_pv_info metric to look up storageclass on legacy clusters.
  146. forceDisabled := []string{"kube_persistentvolume_capacity_bytes", "kube_persistentvolume_status_phase"}
  147. metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics, forceDisabled...)
  148. prometheus.MustRegister(KubePVCollector{
  149. KubeClusterCache: clusterCache,
  150. metricsConfig: *metricsConfig,
  151. })
  152. }
  153. })
  154. }
  155. //--------------------------------------------------------------------------
  156. // Kube Metric Helpers
  157. //--------------------------------------------------------------------------
  158. // getPersistentVolumeClaimClass returns StorageClassName. If no storage class was
  159. // requested, it returns "".
  160. func getPersistentVolumeClaimClass(claim *clustercache.PersistentVolumeClaim) string {
  161. // Use beta annotation first
  162. if class, found := claim.Annotations[v1.BetaStorageClassAnnotation]; found {
  163. return class
  164. }
  165. if claim.Spec.StorageClassName != nil {
  166. return *claim.Spec.StorageClassName
  167. }
  168. // Special non-empty string to indicate absence of storage class.
  169. return ""
  170. }
  171. // toResourceUnitValue accepts a resource name and quantity and returns the sanitized resource, the unit, and the value in the units.
  172. // Returns an empty string for resource and unit if there was a failure.
  173. func toResourceUnitValue(resourceName v1.ResourceName, quantity resource.Quantity) (resource string, unit string, value float64) {
  174. resource = promutil.SanitizeLabelName(string(resourceName))
  175. switch resourceName {
  176. case v1.ResourceCPU:
  177. unit = "core"
  178. value = float64(quantity.MilliValue()) / 1000
  179. return
  180. case v1.ResourceStorage:
  181. fallthrough
  182. case v1.ResourceEphemeralStorage:
  183. fallthrough
  184. case v1.ResourceMemory:
  185. unit = "byte"
  186. value = float64(quantity.Value())
  187. return
  188. case v1.ResourcePods:
  189. unit = "integer"
  190. value = float64(quantity.Value())
  191. return
  192. default:
  193. if isHugePageResourceName(resourceName) || isAttachableVolumeResourceName(resourceName) {
  194. unit = "byte"
  195. value = float64(quantity.Value())
  196. return
  197. }
  198. if isExtendedResourceName(resourceName) {
  199. unit = "integer"
  200. value = float64(quantity.Value())
  201. return
  202. }
  203. }
  204. resource = ""
  205. unit = ""
  206. value = 0.0
  207. return
  208. }
  209. // isHugePageResourceName checks for a huge page container resource name
  210. func isHugePageResourceName(name v1.ResourceName) bool {
  211. return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
  212. }
  213. // isAttachableVolumeResourceName checks for attached volume container resource name
  214. func isAttachableVolumeResourceName(name v1.ResourceName) bool {
  215. return strings.HasPrefix(string(name), v1.ResourceAttachableVolumesPrefix)
  216. }
  217. // isExtendedResourceName checks for extended container resource name
  218. func isExtendedResourceName(name v1.ResourceName) bool {
  219. if isNativeResource(name) || strings.HasPrefix(string(name), v1.DefaultResourceRequestsPrefix) {
  220. return false
  221. }
  222. // Ensure it satisfies the rules in IsQualifiedName() after converted into quota resource name
  223. nameForQuota := fmt.Sprintf("%s%s", v1.DefaultResourceRequestsPrefix, string(name))
  224. if errs := validation.IsQualifiedName(nameForQuota); len(errs) != 0 {
  225. return false
  226. }
  227. return true
  228. }
  229. // isNativeResource checks for a kubernetes.io/ prefixed resource name
  230. func isNativeResource(name v1.ResourceName) bool {
  231. return !strings.Contains(string(name), "/") || isPrefixedNativeResource(name)
  232. }
  233. func isPrefixedNativeResource(name v1.ResourceName) bool {
  234. return strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
  235. }
  236. func failureReason(jc *batchv1.JobCondition, reason string) bool {
  237. if jc == nil {
  238. return false
  239. }
  240. return jc.Reason == reason
  241. }
  242. // boolFloat64 converts a boolean input into a 1 or 0
  243. func boolFloat64(b bool) float64 {
  244. if b {
  245. return 1
  246. }
  247. return 0
  248. }
  249. // toStringPtr is used to create a new string pointer from iteration vars
  250. func toStringPtr(s string) *string { return &s }