Просмотр исходного кода

Support disabling individual cost-model metrics

Kaelan Patel 4 лет назад
Родитель
Сommit
7bdda6b5af
2 измененных файлов с 156 добавлено и 90 удалено
  1. 152 86
      pkg/costmodel/metrics.go
  2. 4 4
      pkg/metrics/podlabelmetrics.go

+ 152 - 86
pkg/costmodel/metrics.go

@@ -32,22 +32,31 @@ import (
 
 // ClusterInfoCollector is a prometheus collector that generates ClusterInfoMetrics
 type ClusterInfoCollector struct {
-	ClusterInfo clusters.ClusterInfoProvider
+	ClusterInfo   clusters.ClusterInfoProvider
+	metricsConfig metrics.MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (cic ClusterInfoCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kubecost_cluster_info", "Kubecost Cluster Info", []string{}, nil)
+	disabledMetrics := cic.metricsConfig.GetDisabledMetricsMap()
+
+	if _, ok := disabledMetrics["kube_pod_annotations"]; !ok {
+		ch <- prometheus.NewDesc("kubecost_cluster_info", "Kubecost Cluster Info", []string{}, nil)
+	}
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (cic ClusterInfoCollector) Collect(ch chan<- prometheus.Metric) {
-	clusterInfo := cic.ClusterInfo.GetClusterInfo()
-	labels := prom.MapToLabels(clusterInfo)
+	disabledMetrics := cic.metricsConfig.GetDisabledMetricsMap()
+
+	if _, ok := disabledMetrics["kube_pod_annotations"]; !ok {
+		clusterInfo := cic.ClusterInfo.GetClusterInfo()
+		labels := prom.MapToLabels(clusterInfo)
 
-	m := newClusterInfoMetric("kubecost_cluster_info", labels)
-	ch <- m
+		m := newClusterInfoMetric("kubecost_cluster_info", labels)
+		ch <- m
+	}
 }
 
 //--------------------------------------------------------------------------
@@ -126,96 +135,153 @@ var (
 
 // initCostModelMetrics uses a sync.Once to ensure that these metrics are only created once
 func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider cloud.Provider, clusterInfo clusters.ClusterInfoProvider, metricsConfig *metrics.MetricsConfig) {
+
+	disabledMetrics := metricsConfig.GetDisabledMetricsMap()
+	var toRegisterGV []*prometheus.GaugeVec
+	var toRegisterGage []prometheus.Gauge
+
 	metricsInit.Do(func() {
-		cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_cpu_hourly_cost",
-			Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_ram_hourly_cost",
-			Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_gpu_hourly_cost",
-			Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_gpu_count",
-			Help: "node_gpu_count count of gpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "pv_hourly_cost",
-			Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
-		}, []string{"volumename", "persistentvolume", "provider_id"})
-
-		spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "kubecost_node_is_spot",
-			Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_total_hourly_cost",
-			Help: "node_total_hourly_cost Total node cost per hour",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "container_memory_allocation_bytes",
-			Help: "container_memory_allocation_bytes Bytes of RAM used",
-		}, []string{"namespace", "pod", "container", "instance", "node"})
-
-		cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "container_cpu_allocation",
-			Help: "container_cpu_allocation Percent of a single CPU used in a minute",
-		}, []string{"namespace", "pod", "container", "instance", "node"})
-
-		gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "container_gpu_allocation",
-			Help: "container_gpu_allocation GPU used",
-		}, []string{"namespace", "pod", "container", "instance", "node"})
-
-		pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "pod_pvc_allocation",
-			Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
-		}, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
-
-		networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "kubecost_network_zone_egress_cost",
-			Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
-		})
 
-		networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "kubecost_network_region_egress_cost",
-			Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
-		})
+		if _, ok := disabledMetrics["node_cpu_hourly_cost"]; !ok {
+			cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_cpu_hourly_cost",
+				Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, cpuGv)
+		}
 
-		networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "kubecost_network_internet_egress_cost",
-			Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
-		})
+		if _, ok := disabledMetrics["node_ram_hourly_cost"]; !ok {
+			ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_ram_hourly_cost",
+				Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, ramGv)
+		}
+
+		if _, ok := disabledMetrics["node_gpu_hourly_cost"]; !ok {
+			gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_gpu_hourly_cost",
+				Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, gpuGv)
+		}
+
+		if _, ok := disabledMetrics["node_gpu_count"]; !ok {
+			gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_gpu_count",
+				Help: "node_gpu_count count of gpu on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, gpuCountGv)
+		}
 
-		clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "kubecost_cluster_management_cost",
-			Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
-		}, []string{"provisioner_name"})
+		if _, ok := disabledMetrics["pv_hourly_cost"]; !ok {
+			pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "pv_hourly_cost",
+				Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
+			}, []string{"volumename", "persistentvolume", "provider_id"})
+			toRegisterGV = append(toRegisterGV, pvGv)
+		}
 
-		lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
-			Name: "kubecost_load_balancer_cost",
-			Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
-		}, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
+		if _, ok := disabledMetrics["kubecost_node_is_spot"]; !ok {
+			spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "kubecost_node_is_spot",
+				Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, spotGv)
+		}
+
+		if _, ok := disabledMetrics["node_total_hourly_cost"]; !ok {
+			totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_total_hourly_cost",
+				Help: "node_total_hourly_cost Total node cost per hour",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, totalGv)
+		}
+
+		if _, ok := disabledMetrics["container_memory_allocation_bytes"]; !ok {
+			ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "container_memory_allocation_bytes",
+				Help: "container_memory_allocation_bytes Bytes of RAM used",
+			}, []string{"namespace", "pod", "container", "instance", "node"})
+			toRegisterGV = append(toRegisterGV, ramAllocGv)
+		}
+
+		if _, ok := disabledMetrics["container_cpu_allocation"]; !ok {
+			cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "container_cpu_allocation",
+				Help: "container_cpu_allocation Percent of a single CPU used in a minute",
+			}, []string{"namespace", "pod", "container", "instance", "node"})
+			toRegisterGV = append(toRegisterGV, cpuAllocGv)
+		}
+
+		if _, ok := disabledMetrics["container_gpu_allocation"]; !ok {
+			gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "container_gpu_allocation",
+				Help: "container_gpu_allocation GPU used",
+			}, []string{"namespace", "pod", "container", "instance", "node"})
+			toRegisterGV = append(toRegisterGV, gpuAllocGv)
+		}
+
+		if _, ok := disabledMetrics["pod_pvc_allocation"]; !ok {
+			pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "pod_pvc_allocation",
+				Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
+			}, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
+			toRegisterGV = append(toRegisterGV, pvAllocGv)
+		}
+
+		if _, ok := disabledMetrics["kubecost_network_zone_egress_cost"]; !ok {
+			networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+				Name: "kubecost_network_zone_egress_cost",
+				Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
+			})
+			toRegisterGage = append(toRegisterGage, networkZoneEgressCostG)
+		}
+
+		if _, ok := disabledMetrics["kubecost_network_region_egress_cost"]; !ok {
+			networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+				Name: "kubecost_network_region_egress_cost",
+				Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
+			})
+			toRegisterGage = append(toRegisterGage, networkRegionEgressCostG)
+		}
+
+		if _, ok := disabledMetrics["kubecost_network_internet_egress_cost"]; !ok {
+			networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+				Name: "kubecost_network_internet_egress_cost",
+				Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
+			})
+			toRegisterGage = append(toRegisterGage, networkInternetEgressCostG)
+		}
+
+		if _, ok := disabledMetrics["kubecost_cluster_management_cost"]; !ok {
+			clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "kubecost_cluster_management_cost",
+				Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
+			}, []string{"provisioner_name"})
+			toRegisterGV = append(toRegisterGV, clusterManagementCostGv)
+		}
+
+		if _, ok := disabledMetrics["kubecost_load_balancer_cost"]; !ok {
+			lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
+				Name: "kubecost_load_balancer_cost",
+				Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
+			}, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
+			toRegisterGV = append(toRegisterGV, lbCostGv)
+		}
 
 		// Register cost-model metrics for emission
-		prometheus.MustRegister(cpuGv, ramGv, gpuGv, gpuCountGv, totalGv, pvGv, spotGv)
-		prometheus.MustRegister(ramAllocGv, cpuAllocGv, gpuAllocGv, pvAllocGv)
-		prometheus.MustRegister(networkZoneEgressCostG, networkRegionEgressCostG, networkInternetEgressCostG)
-		prometheus.MustRegister(clusterManagementCostGv, lbCostGv)
+		for i := range toRegisterGV {
+			prometheus.MustRegister(toRegisterGV[i])
+		}
+		for i := range toRegisterGage {
+			prometheus.MustRegister(toRegisterGage[i])
+		}
 
 		// General Metric Collectors
 		prometheus.MustRegister(ClusterInfoCollector{
-			ClusterInfo: clusterInfo,
+			ClusterInfo:   clusterInfo,
+			metricsConfig: *metricsConfig,
 		})
 	})
 }

+ 4 - 4
pkg/metrics/podlabelmetrics.go

@@ -51,10 +51,10 @@ type KubePodLabelsCollector struct {
 func (kpmc KubePodLabelsCollector) Describe(ch chan<- *prometheus.Desc) {
 	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
 
-	if _, ok := disabledMetrics["kube_pod_labels"]; ok {
+	if _, ok := disabledMetrics["kube_pod_labels"]; !ok {
 		ch <- prometheus.NewDesc("kube_pod_labels", "All labels for each pod prefixed with label_", []string{}, nil)
 	}
-	if _, ok := disabledMetrics["kube_pod_owner"]; ok {
+	if _, ok := disabledMetrics["kube_pod_owner"]; !ok {
 		ch <- prometheus.NewDesc("kube_pod_owner", "Information about the Pod's owner", []string{}, nil)
 	}
 }
@@ -70,13 +70,13 @@ func (kpmc KubePodLabelsCollector) Collect(ch chan<- prometheus.Metric) {
 		podUID := string(pod.GetUID())
 
 		// Pod Labels
-		if _, ok := disabledMetrics["kube_pod_labels"]; ok {
+		if _, ok := disabledMetrics["kube_pod_labels"]; !ok {
 			labelNames, labelValues := prom.KubePrependQualifierToLabels(pod.GetLabels(), "label_")
 			ch <- newKubePodLabelsMetric("kube_pod_labels", podNS, podName, podUID, labelNames, labelValues)
 		}
 
 		// Owner References
-		if _, ok := disabledMetrics["kube_pod_owner"]; ok {
+		if _, ok := disabledMetrics["kube_pod_owner"]; !ok {
 			for _, owner := range pod.OwnerReferences {
 				ch <- newKubePodOwnerMetric("kube_pod_owner", podNS, podName, owner.Name, owner.Kind, owner.Controller != nil)
 			}