Kaynağa Gözat

Merge pull request #1060 from kubecost/kaelan-disable-metrics

Add fine-grained control for disabling metrics
Kaelan Patel 4 yıl önce
ebeveyn
işleme
cf97a87e13

+ 165 - 84
pkg/costmodel/metrics.go

@@ -32,22 +32,35 @@ import (
 
 // ClusterInfoCollector is a prometheus collector that generates ClusterInfoMetrics
 type ClusterInfoCollector struct {
-	ClusterInfo clusters.ClusterInfoProvider
+	ClusterInfo   clusters.ClusterInfoProvider
+	metricsConfig metrics.MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (cic ClusterInfoCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := cic.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_pod_annotations"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("kubecost_cluster_info", "Kubecost Cluster Info", []string{}, nil)
+
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (cic ClusterInfoCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := cic.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_pod_annotations"]; disabled {
+		return
+	}
+
 	clusterInfo := cic.ClusterInfo.GetClusterInfo()
 	labels := prom.MapToLabels(clusterInfo)
 
 	m := newClusterInfoMetric("kubecost_cluster_info", labels)
 	ch <- m
+
 }
 
 //--------------------------------------------------------------------------
@@ -125,97 +138,154 @@ var (
 )
 
 // initCostModelMetrics uses a sync.Once to ensure that these metrics are only created once
-func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider cloud.Provider, clusterInfo clusters.ClusterInfoProvider) {
+func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider cloud.Provider, clusterInfo clusters.ClusterInfoProvider, metricsConfig *metrics.MetricsConfig) {
+
+	disabledMetrics := metricsConfig.GetDisabledMetricsMap()
+	var toRegisterGV []*prometheus.GaugeVec
+	var toRegisterGauge []prometheus.Gauge
+
 	metricsInit.Do(func() {
-		cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_cpu_hourly_cost",
-			Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_ram_hourly_cost",
-			Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_gpu_hourly_cost",
-			Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_gpu_count",
-			Help: "node_gpu_count count of gpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "pv_hourly_cost",
-			Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
-		}, []string{"volumename", "persistentvolume", "provider_id"})
-
-		spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "kubecost_node_is_spot",
-			Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "node_total_hourly_cost",
-			Help: "node_total_hourly_cost Total node cost per hour",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
-
-		ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "container_memory_allocation_bytes",
-			Help: "container_memory_allocation_bytes Bytes of RAM used",
-		}, []string{"namespace", "pod", "container", "instance", "node"})
-
-		cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "container_cpu_allocation",
-			Help: "container_cpu_allocation Percent of a single CPU used in a minute",
-		}, []string{"namespace", "pod", "container", "instance", "node"})
-
-		gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "container_gpu_allocation",
-			Help: "container_gpu_allocation GPU used",
-		}, []string{"namespace", "pod", "container", "instance", "node"})
-
-		pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "pod_pvc_allocation",
-			Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
-		}, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
-
-		networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "kubecost_network_zone_egress_cost",
-			Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
-		})
 
-		networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "kubecost_network_region_egress_cost",
-			Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
-		})
+		if _, disabled := disabledMetrics["node_cpu_hourly_cost"]; !disabled {
+			cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_cpu_hourly_cost",
+				Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, cpuGv)
+		}
 
-		networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "kubecost_network_internet_egress_cost",
-			Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
-		})
+		if _, disabled := disabledMetrics["node_ram_hourly_cost"]; !disabled {
+			ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_ram_hourly_cost",
+				Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, ramGv)
+		}
+
+		if _, disabled := disabledMetrics["node_gpu_hourly_cost"]; !disabled {
+			gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_gpu_hourly_cost",
+				Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, gpuGv)
+		}
+
+		if _, disabled := disabledMetrics["node_gpu_count"]; !disabled {
+			gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_gpu_count",
+				Help: "node_gpu_count count of gpu on this node",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, gpuCountGv)
+		}
+
+		if _, disabled := disabledMetrics["pv_hourly_cost"]; !disabled {
+			pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "pv_hourly_cost",
+				Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
+			}, []string{"volumename", "persistentvolume", "provider_id"})
+			toRegisterGV = append(toRegisterGV, pvGv)
+		}
+
+		if _, disabled := disabledMetrics["kubecost_node_is_spot"]; !disabled {
+			spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "kubecost_node_is_spot",
+				Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, spotGv)
+		}
+
+		if _, disabled := disabledMetrics["node_total_hourly_cost"]; !disabled {
+			totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "node_total_hourly_cost",
+				Help: "node_total_hourly_cost Total node cost per hour",
+			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+			toRegisterGV = append(toRegisterGV, totalGv)
+		}
 
-		clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-			Name: "kubecost_cluster_management_cost",
-			Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
-		}, []string{"provisioner_name"})
+		if _, disabled := disabledMetrics["container_memory_allocation_bytes"]; !disabled {
+			ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "container_memory_allocation_bytes",
+				Help: "container_memory_allocation_bytes Bytes of RAM used",
+			}, []string{"namespace", "pod", "container", "instance", "node"})
+			toRegisterGV = append(toRegisterGV, ramAllocGv)
+		}
+
+		if _, disabled := disabledMetrics["container_cpu_allocation"]; !disabled {
+			cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "container_cpu_allocation",
+				Help: "container_cpu_allocation Percent of a single CPU used in a minute",
+			}, []string{"namespace", "pod", "container", "instance", "node"})
+			toRegisterGV = append(toRegisterGV, cpuAllocGv)
+		}
+
+		if _, disabled := disabledMetrics["container_gpu_allocation"]; !disabled {
+			gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "container_gpu_allocation",
+				Help: "container_gpu_allocation GPU used",
+			}, []string{"namespace", "pod", "container", "instance", "node"})
+			toRegisterGV = append(toRegisterGV, gpuAllocGv)
+		}
+
+		if _, disabled := disabledMetrics["pod_pvc_allocation"]; !disabled {
+			pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "pod_pvc_allocation",
+				Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
+			}, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
+			toRegisterGV = append(toRegisterGV, pvAllocGv)
+		}
+
+		if _, disabled := disabledMetrics["kubecost_network_zone_egress_cost"]; !disabled {
+			networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+				Name: "kubecost_network_zone_egress_cost",
+				Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
+			})
+			toRegisterGauge = append(toRegisterGauge, networkZoneEgressCostG)
+		}
 
-		lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
-			Name: "kubecost_load_balancer_cost",
-			Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
-		}, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
+		if _, disabled := disabledMetrics["kubecost_network_region_egress_cost"]; !disabled {
+			networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+				Name: "kubecost_network_region_egress_cost",
+				Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
+			})
+			toRegisterGauge = append(toRegisterGauge, networkRegionEgressCostG)
+		}
+
+		if _, disabled := disabledMetrics["kubecost_network_internet_egress_cost"]; !disabled {
+			networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+				Name: "kubecost_network_internet_egress_cost",
+				Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
+			})
+			toRegisterGauge = append(toRegisterGauge, networkInternetEgressCostG)
+		}
+
+		if _, disabled := disabledMetrics["kubecost_cluster_management_cost"]; !disabled {
+			clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+				Name: "kubecost_cluster_management_cost",
+				Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
+			}, []string{"provisioner_name"})
+			toRegisterGV = append(toRegisterGV, clusterManagementCostGv)
+		}
+
+		if _, disabled := disabledMetrics["kubecost_load_balancer_cost"]; !disabled {
+			lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
+				Name: "kubecost_load_balancer_cost",
+				Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
+			}, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
+			toRegisterGV = append(toRegisterGV, lbCostGv)
+		}
 
 		// Register cost-model metrics for emission
-		prometheus.MustRegister(cpuGv, ramGv, gpuGv, gpuCountGv, totalGv, pvGv, spotGv)
-		prometheus.MustRegister(ramAllocGv, cpuAllocGv, gpuAllocGv, pvAllocGv)
-		prometheus.MustRegister(networkZoneEgressCostG, networkRegionEgressCostG, networkInternetEgressCostG)
-		prometheus.MustRegister(clusterManagementCostGv, lbCostGv)
+		for _, gv := range toRegisterGV {
+			prometheus.MustRegister(gv)
+		}
+		for _, g := range toRegisterGauge {
+			prometheus.MustRegister(g)
+		}
 
 		// General Metric Collectors
 		prometheus.MustRegister(ClusterInfoCollector{
-			ClusterInfo: clusterInfo,
+			ClusterInfo:   clusterInfo,
+			metricsConfig: *metricsConfig,
 		})
 	})
 }
@@ -256,10 +326,21 @@ type CostModelMetricsEmitter struct {
 
 // NewCostModelMetricsEmitter creates a new cost-model metrics emitter. Use Start() to begin metric emission.
 func NewCostModelMetricsEmitter(promClient promclient.Client, clusterCache clustercache.ClusterCache, provider cloud.Provider, clusterInfo clusters.ClusterInfoProvider, model *CostModel) *CostModelMetricsEmitter {
+
+	// Get metric configurations, if any
+	metricsConfig, err := metrics.GetMetricsConfig()
+	if err != nil {
+		log.Infof("Failed to get metrics config before init: %s", err)
+	}
+
+	if len(metricsConfig.DisabledMetrics) > 0 {
+		log.Infof("Starting metrics init with disabled metrics: %v", metricsConfig.DisabledMetrics)
+	}
+
 	// init will only actually execute once to register the custom gauges
-	initCostModelMetrics(clusterCache, provider, clusterInfo)
+	initCostModelMetrics(clusterCache, provider, clusterInfo, metricsConfig)
 
-	metrics.InitKubeMetrics(clusterCache, &metrics.KubeMetricsOpts{
+	metrics.InitKubeMetrics(clusterCache, metricsConfig, &metrics.KubeMetricsOpts{
 		EmitKubecostControllerMetrics: true,
 		EmitNamespaceAnnotations:      env.IsEmitNamespaceAnnotationsMetric(),
 		EmitPodAnnotations:            env.IsEmitPodAnnotationsMetric(),

+ 3 - 0
pkg/costmodel/router.go

@@ -13,6 +13,7 @@ import (
 	"time"
 
 	"github.com/kubecost/cost-model/pkg/config"
+	"github.com/kubecost/cost-model/pkg/metrics"
 	"github.com/kubecost/cost-model/pkg/services"
 	"github.com/kubecost/cost-model/pkg/util/httputil"
 	"github.com/kubecost/cost-model/pkg/util/timeutil"
@@ -1438,6 +1439,8 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 
 	// Append the pricing config watcher
 	configWatchers.AddWatcher(cloud.ConfigWatcherFor(cloudProvider))
+	configWatchers.AddWatcher(metrics.GetMetricsConfigWatcher())
+
 	watchConfigFunc := configWatchers.ToWatchFunc()
 	watchedConfigs := configWatchers.GetWatchedConfigs()
 

+ 5 - 0
pkg/env/costmodelenv.go

@@ -76,6 +76,7 @@ const (
 	PromClusterIDLabelEnvVar = "PROM_CLUSTER_ID_LABEL"
 
 	PricingConfigmapName  = "PRICING_CONFIGMAP_NAME"
+	MetricsConfigmapName  = "METRICS_CONFIGMAP_NAME"
 	KubecostJobNameEnvVar = "KUBECOST_JOB_NAME"
 
 	KubecostConfigBucketEnvVar    = "KUBECOST_CONFIG_BUCKET"
@@ -148,6 +149,10 @@ func GetPricingConfigmapName() string {
 	return Get(PricingConfigmapName, "pricing-configs")
 }
 
+func GetMetricsConfigmapName() string {
+	return Get(MetricsConfigmapName, "metrics-config")
+}
+
 // GetAWSAccessKeyID returns the environment variable value for AWSAccessKeyIDEnvVar which represents
 // the AWS access key for authentication
 func GetAppVersion() string {

+ 33 - 11
pkg/metrics/deploymentmetrics.go

@@ -16,18 +16,28 @@ import (
 // specific deployment metrics.
 type KubecostDeploymentCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (kdc KubecostDeploymentCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := kdc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["deployment_match_labels"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("deployment_match_labels", "deployment match labels", []string{}, nil)
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kdc KubecostDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
-	ds := kdc.KubeClusterCache.GetAllDeployments()
+	disabledMetrics := kdc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["deployment_match_labels"]; disabled {
+		return
+	}
 
+	ds := kdc.KubeClusterCache.GetAllDeployments()
 	for _, deployment := range ds {
 		deploymentName := deployment.GetName()
 		deploymentNS := deployment.GetNamespace()
@@ -38,6 +48,7 @@ func (kdc KubecostDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 			ch <- m
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------
@@ -109,19 +120,27 @@ func (dmlm DeploymentMatchLabelsMetric) Write(m *dto.Metric) error {
 // KubeDeploymentCollector is a prometheus collector that generates
 type KubeDeploymentCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (kdc KubeDeploymentCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kube_deployment_spec_replicas", "Number of desired pods for a deployment.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_deployment_status_replicas_available", "The number of available replicas per deployment.", []string{}, nil)
+	disabledMetrics := kdc.metricsConfig.GetDisabledMetricsMap()
+
+	if _, disabled := disabledMetrics["kube_deployment_spec_replicas"]; !disabled {
+		ch <- prometheus.NewDesc("kube_deployment_spec_replicas", "Number of desired pods for a deployment.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_deployment_status_replicas_available"]; !disabled {
+		ch <- prometheus.NewDesc("kube_deployment_status_replicas_available", "The number of available replicas per deployment.", []string{}, nil)
+	}
 
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kdc KubeDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 	deployments := kdc.KubeClusterCache.GetAllDeployments()
+	disabledMetrics := kdc.metricsConfig.GetDisabledMetricsMap()
 
 	for _, deployment := range deployments {
 		deploymentName := deployment.GetName()
@@ -135,14 +154,17 @@ func (kdc KubeDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 			replicas = *deployment.Spec.Replicas
 		}
 
-		ch <- newKubeDeploymentReplicasMetric("kube_deployment_spec_replicas", deploymentName, deploymentNS, replicas)
-
-		// Replicas Available
-		ch <- newKubeDeploymentStatusAvailableReplicasMetric(
-			"kube_deployment_status_replicas_available",
-			deploymentName,
-			deploymentNS,
-			deployment.Status.AvailableReplicas)
+		if _, disabled := disabledMetrics["kube_deployment_spec_replicas"]; !disabled {
+			ch <- newKubeDeploymentReplicasMetric("kube_deployment_spec_replicas", deploymentName, deploymentNS, replicas)
+		}
+		if _, disabled := disabledMetrics["kube_deployment_status_replicas_available"]; !disabled {
+			// Replicas Available
+			ch <- newKubeDeploymentStatusAvailableReplicasMetric(
+				"kube_deployment_status_replicas_available",
+				deploymentName,
+				deploymentNS,
+				deployment.Status.AvailableReplicas)
+		}
 	}
 }
 

+ 12 - 0
pkg/metrics/jobmetrics.go

@@ -18,16 +18,27 @@ var (
 // KubeJobCollector is a prometheus collector that generates job sourced metrics.
 type KubeJobCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (kjc KubeJobCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := kjc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_pod_annotations"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("kube_job_status_failed", "The number of pods which reached Phase Failed and the reason for failure.", []string{}, nil)
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kjc KubeJobCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := kjc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_pod_annotations"]; disabled {
+		return
+	}
+
 	jobs := kjc.KubeClusterCache.GetAllJobs()
 	for _, job := range jobs {
 		jobName := job.GetName()
@@ -53,6 +64,7 @@ func (kjc KubeJobCollector) Collect(ch chan<- prometheus.Metric) {
 			}
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------

+ 16 - 1
pkg/metrics/kubemetrics.go

@@ -43,7 +43,7 @@ func DefaultKubeMetricsOpts() *KubeMetricsOpts {
 }
 
 // InitKubeMetrics initializes kubernetes metric emission using the provided options.
-func InitKubeMetrics(clusterCache clustercache.ClusterCache, opts *KubeMetricsOpts) {
+func InitKubeMetrics(clusterCache clustercache.ClusterCache, metricsConfig *MetricsConfig, opts *KubeMetricsOpts) {
 	if opts == nil {
 		opts = DefaultKubeMetricsOpts()
 	}
@@ -52,58 +52,73 @@ func InitKubeMetrics(clusterCache clustercache.ClusterCache, opts *KubeMetricsOp
 		if opts.EmitKubecostControllerMetrics {
 			prometheus.MustRegister(KubecostServiceCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubecostDeploymentCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubecostStatefulsetCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 		}
 
 		if opts.EmitPodAnnotations {
 			prometheus.MustRegister(KubecostPodCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 		}
 
 		if opts.EmitNamespaceAnnotations {
 			prometheus.MustRegister(KubecostNamespaceCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 		}
 
 		if opts.EmitKubeStateMetrics {
 			prometheus.MustRegister(KubeNodeCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubeNamespaceCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubeDeploymentCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubePodCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubePVCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubePVCCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubeJobCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 		} else if opts.EmitKubeStateMetricsV1Only {
 			prometheus.MustRegister(KubeNodeCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubeNamespaceCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 			prometheus.MustRegister(KubePodLabelsCollector{
 				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
 			})
 		}
 	})

+ 104 - 0
pkg/metrics/metricsconfig.go

@@ -0,0 +1,104 @@
+package metrics
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"sync"
+
+	"github.com/kubecost/cost-model/pkg/env"
+	"github.com/kubecost/cost-model/pkg/util/watcher"
+)
+
+var metricsConfigLock = new(sync.Mutex)
+
+type MetricsConfig struct {
+	DisabledMetrics []string `json:"disabledMetrics"`
+}
+
+// Gets map of disabled metrics to empty structs
+func (mc MetricsConfig) GetDisabledMetricsMap() map[string]struct{} {
+	disabledMetricsMap := make(map[string]struct{})
+
+	for i := range mc.DisabledMetrics {
+		disabledMetricsMap[mc.DisabledMetrics[i]] = struct{}{}
+	}
+
+	return disabledMetricsMap
+}
+
+// Unmarshals metrics.json to a MetricsConfig struct
+func GetMetricsConfig() (*MetricsConfig, error) {
+	metricsConfigLock.Lock()
+	defer metricsConfigLock.Unlock()
+	mc := &MetricsConfig{}
+	body, err := ioutil.ReadFile("/var/configs/metrics.json")
+	if os.IsNotExist(err) {
+
+		return mc, nil
+	} else if err != nil {
+		return mc, fmt.Errorf("error reading metrics config file: %s", err)
+	}
+
+	err = json.Unmarshal(body, mc)
+	if err != nil {
+		return mc, fmt.Errorf("error decoding metrics config: %s", err)
+	}
+
+	return mc, nil
+}
+
+// Writes MetricsConfig struct to json file
+func UpdateMetricsConfig(mc *MetricsConfig) (*MetricsConfig, error) {
+	metricsConfigLock.Lock()
+	defer metricsConfigLock.Unlock()
+
+	mcb, err := json.Marshal(mc)
+	if err != nil {
+		return nil, fmt.Errorf("error encoding metrics config struct: %s", err)
+	}
+
+	err = ioutil.WriteFile("/var/configs/metrics.json", mcb, 0644)
+	if err != nil {
+		return nil, fmt.Errorf("error writing to metrics config file: %s", err)
+	}
+
+	return mc, nil
+}
+
+// Updates metric config file from configmap
+func UpdateMetricsConfigFromConfigmap(data map[string]string) error {
+
+	mc := &MetricsConfig{}
+	key := "metrics.json"
+
+	cdata, ok := data[key]
+	if !ok {
+		return fmt.Errorf("error finding metrics config data")
+	}
+
+	err := json.Unmarshal([]byte(cdata), &mc)
+	if err != nil {
+		return fmt.Errorf("failed to unmarshal metrics configs: %s", err)
+	}
+
+	_, err = UpdateMetricsConfig(mc)
+	if err != nil {
+		return err
+	}
+
+	return nil
+
+}
+
+// Returns ConfigMapWatcher for metrics configuration configmap
+func GetMetricsConfigWatcher() *watcher.ConfigMapWatcher {
+	return &watcher.ConfigMapWatcher{
+		ConfigMapName: env.GetMetricsConfigmapName(),
+		WatchFunc: func(name string, data map[string]string) error {
+			err := UpdateMetricsConfigFromConfigmap(data)
+			return err
+		},
+	}
+}

+ 26 - 0
pkg/metrics/namespacemetrics.go

@@ -15,16 +15,28 @@ import (
 // KubecostNamespaceCollector is a prometheus collector that generates namespace sourced metrics
 type KubecostNamespaceCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (nsac KubecostNamespaceCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_namespace_annotations"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("kube_namespace_annotations", "namespace annotations", []string{}, nil)
+
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (nsac KubecostNamespaceCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_namespace_annotations"]; disabled {
+		return
+	}
+
 	namespaces := nsac.KubeClusterCache.GetAllNamespaces()
 	for _, namespace := range namespaces {
 		nsName := namespace.GetName()
@@ -35,6 +47,7 @@ func (nsac KubecostNamespaceCollector) Collect(ch chan<- prometheus.Metric) {
 			ch <- m
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------
@@ -100,16 +113,28 @@ func (nam NamespaceAnnotationsMetric) Write(m *dto.Metric) error {
 // KubeNamespaceCollector is a prometheus collector that generates namespace sourced metrics
 type KubeNamespaceCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (nsac KubeNamespaceCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_namespace_labels"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("kube_namespace_labels", "namespace labels", []string{}, nil)
+
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (nsac KubeNamespaceCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_namespace_labels"]; disabled {
+		return
+	}
+
 	namespaces := nsac.KubeClusterCache.GetAllNamespaces()
 	for _, namespace := range namespaces {
 		nsName := namespace.GetName()
@@ -120,6 +145,7 @@ func (nsac KubeNamespaceCollector) Collect(ch chan<- prometheus.Metric) {
 			ch <- m
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------

+ 61 - 35
pkg/metrics/nodemetrics.go

@@ -22,24 +22,45 @@ var (
 // KubeNodeCollector is a prometheus collector that generates node sourced metrics.
 type KubeNodeCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (nsac KubeNodeCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kube_node_status_capacity", "Node resource capacity.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_status_capacity_memory_bytes", "node capacity memory bytes", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_status_capacity_cpu_cores", "node capacity cpu cores", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_status_allocatable", "The allocatable for different resources of a node that are available for scheduling.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_status_allocatable_cpu_cores", "The allocatable cpu cores.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_status_allocatable_memory_bytes", "The allocatable memory in bytes.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_labels", "all labels for each node prefixed with label_", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_node_status_condition", "The condition of a cluster node.", []string{}, nil)
+	disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
+
+	if _, disabled := disabledMetrics["kube_node_status_capacity"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_capacity", "Node resource capacity.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_status_capacity_memory_bytes"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_capacity_memory_bytes", "node capacity memory bytes", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_status_capacity_cpu_cores"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_capacity_cpu_cores", "node capacity cpu cores", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_status_allocatable"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_allocatable", "The allocatable for different resources of a node that are available for scheduling.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_status_allocatable_cpu_cores"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_allocatable_cpu_cores", "The allocatable cpu cores.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_status_allocatable_memory_bytes"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_allocatable_memory_bytes", "The allocatable memory in bytes.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_labels"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_labels", "all labels for each node prefixed with label_", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_node_status_condition"]; !disabled {
+		ch <- prometheus.NewDesc("kube_node_status_condition", "The condition of a cluster node.", []string{}, nil)
+	}
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 	nodes := nsac.KubeClusterCache.GetAllNodes()
+	disabledMetrics := nsac.metricsConfig.GetDisabledMetricsMap()
+
 	for _, node := range nodes {
 		nodeName := node.GetName()
 
@@ -54,15 +75,21 @@ func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 			}
 
 			// KSM v1 Emission
-			if resource == "cpu" {
-				ch <- newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", nodeName, value)
+			if _, disabled := disabledMetrics["kube_node_status_capacity_cpu_cores"]; !disabled {
+				if resource == "cpu" {
+					ch <- newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", nodeName, value)
 
+				}
 			}
-			if resource == "memory" {
-				ch <- newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", nodeName, value)
+			if _, disabled := disabledMetrics["kube_node_status_capacity_memory_bytes"]; !disabled {
+				if resource == "memory" {
+					ch <- newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", nodeName, value)
+				}
 			}
 
-			ch <- newKubeNodeStatusCapacityMetric("kube_node_status_capacity", nodeName, resource, unit, value)
+			if _, disabled := disabledMetrics["kube_node_status_capacity"]; !disabled {
+				ch <- newKubeNodeStatusCapacityMetric("kube_node_status_capacity", nodeName, resource, unit, value)
+			}
 		}
 
 		// Node Allocatable Resources
@@ -76,31 +103,38 @@ func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 			}
 
 			// KSM v1 Emission
-			if resource == "cpu" {
-				ch <- newKubeNodeStatusAllocatableCPUCoresMetric("kube_node_status_allocatable_cpu_cores", nodeName, value)
-
+			if _, disabled := disabledMetrics["kube_node_status_allocatable_cpu_cores"]; !disabled {
+				if resource == "cpu" {
+					ch <- newKubeNodeStatusAllocatableCPUCoresMetric("kube_node_status_allocatable_cpu_cores", nodeName, value)
+				}
 			}
-			if resource == "memory" {
-				ch <- newKubeNodeStatusAllocatableMemoryBytesMetric("kube_node_status_allocatable_memory_bytes", nodeName, value)
+			if _, disabled := disabledMetrics["kube_node_status_allocatable_memory_bytes"]; !disabled {
+				if resource == "memory" {
+					ch <- newKubeNodeStatusAllocatableMemoryBytesMetric("kube_node_status_allocatable_memory_bytes", nodeName, value)
+				}
+			}
+			if _, disabled := disabledMetrics["kube_node_status_allocatable"]; !disabled {
+				ch <- newKubeNodeStatusAllocatableMetric("kube_node_status_allocatable", nodeName, resource, unit, value)
 			}
-
-			ch <- newKubeNodeStatusAllocatableMetric("kube_node_status_allocatable", nodeName, resource, unit, value)
 		}
 
 		// node labels
-		labelNames, labelValues := prom.KubePrependQualifierToLabels(node.GetLabels(), "label_")
-		ch <- newKubeNodeLabelsMetric(nodeName, "kube_node_labels", labelNames, labelValues)
+		if _, disabled := disabledMetrics["kube_node_labels"]; !disabled {
+			labelNames, labelValues := prom.KubePrependQualifierToLabels(node.GetLabels(), "label_")
+			ch <- newKubeNodeLabelsMetric(nodeName, "kube_node_labels", labelNames, labelValues)
+		}
 
 		// kube_node_status_condition
 		// Collect node conditions and while default to false.
-		for _, c := range node.Status.Conditions {
-			conditions := getConditions(c.Status)
+		if _, disabled := disabledMetrics["kube_node_status_condition"]; !disabled {
+			for _, c := range node.Status.Conditions {
+				conditions := getConditions(c.Status)
 
-			for _, cond := range conditions {
-				ch <- newKubeNodeStatusConditionMetric(nodeName, "kube_node_status_condition", string(c.Type), cond.status, cond.value)
+				for _, cond := range conditions {
+					ch <- newKubeNodeStatusConditionMetric(nodeName, "kube_node_status_condition", string(c.Type), cond.status, cond.value)
+				}
 			}
 		}
-
 	}
 }
 
@@ -256,14 +290,6 @@ func (nam KubeNodeStatusCapacityCPUCoresMetric) Write(m *dto.Metric) error {
 	return nil
 }
 
-//--------------------------------------------------------------------------
-//  KubeNodeLabelsCollector
-//--------------------------------------------------------------------------
-//
-// We use this to emit kube_node_labels with all of a node's labels, regardless
-// of the whitelist setting introduced in KSM v2. See
-// https://github.com/kubernetes/kube-state-metrics/issues/1270#issuecomment-712986441
-
 //--------------------------------------------------------------------------
 //  KubeNodeLabelsMetric
 //--------------------------------------------------------------------------

+ 19 - 6
pkg/metrics/podlabelmetrics.go

@@ -43,30 +43,43 @@ func (kpmc KubecostPodLabelsCollector) Collect(ch chan<- prometheus.Metric) {
 // KubePodLabelsCollector is a prometheus collector that emits pod labels only
 type KubePodLabelsCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of pod labels only
 // collected by this Collector.
 func (kpmc KubePodLabelsCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kube_pod_labels", "All labels for each pod prefixed with label_", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_owner", "Information about the Pod's owner", []string{}, nil)
+	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
+
+	if _, disabled := disabledMetrics["kube_pod_labels"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_labels", "All labels for each pod prefixed with label_", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_owner"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_owner", "Information about the Pod's owner", []string{}, nil)
+	}
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kpmc KubePodLabelsCollector) Collect(ch chan<- prometheus.Metric) {
 	pods := kpmc.KubeClusterCache.GetAllPods()
+	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
+
 	for _, pod := range pods {
 		podName := pod.GetName()
 		podNS := pod.GetNamespace()
 		podUID := string(pod.GetUID())
 
 		// Pod Labels
-		labelNames, labelValues := prom.KubePrependQualifierToLabels(pod.GetLabels(), "label_")
-		ch <- newKubePodLabelsMetric("kube_pod_labels", podNS, podName, podUID, labelNames, labelValues)
+		if _, disabled := disabledMetrics["kube_pod_labels"]; !disabled {
+			labelNames, labelValues := prom.KubePrependQualifierToLabels(pod.GetLabels(), "label_")
+			ch <- newKubePodLabelsMetric("kube_pod_labels", podNS, podName, podUID, labelNames, labelValues)
+		}
 
 		// Owner References
-		for _, owner := range pod.OwnerReferences {
-			ch <- newKubePodOwnerMetric("kube_pod_owner", podNS, podName, owner.Name, owner.Kind, owner.Controller != nil)
+		if _, disabled := disabledMetrics["kube_pod_owner"]; !disabled {
+			for _, owner := range pod.OwnerReferences {
+				ch <- newKubePodOwnerMetric("kube_pod_owner", podNS, podName, owner.Name, owner.Kind, owner.Controller != nil)
+			}
 		}
 	}
 }

+ 135 - 77
pkg/metrics/podmetrics.go

@@ -18,16 +18,28 @@ import (
 // KubecostPodCollector is a prometheus collector that emits pod metrics
 type KubecostPodCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (kpmc KubecostPodCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_pod_annotations"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("kube_pod_annotations", "All annotations for each pod prefix with annotation_", []string{}, nil)
+
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kpmc KubecostPodCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["kube_pod_annotations"]; disabled {
+		return
+	}
+
 	pods := kpmc.KubeClusterCache.GetAllPods()
 	for _, pod := range pods {
 		podName := pod.GetName()
@@ -39,6 +51,7 @@ func (kpmc KubecostPodCollector) Collect(ch chan<- prometheus.Metric) {
 			ch <- newPodAnnotationMetric("kube_pod_annotations", podNS, podName, labels, values)
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------
@@ -48,26 +61,51 @@ func (kpmc KubecostPodCollector) Collect(ch chan<- prometheus.Metric) {
 // KubePodMetricCollector is a prometheus collector that emits pod metrics
 type KubePodCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (kpmc KubePodCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kube_pod_labels", "All labels for each pod prefixed with label_", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_owner", "Information about the Pod's owner", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_status_running", "Describes whether the container is currently in running state", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_status_terminated_reason", "Describes the reason the container is currently in terminated state.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_status_restarts_total", "The number of container restarts per container.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_resource_requests", "The number of requested resource by a container", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_resource_limits", "The number of requested limit resource by a container.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_resource_limits_cpu_cores", "The number of requested limit cpu core resource by a container.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_container_resource_limits_memory_bytes", "The number of requested limit memory resource by a container.", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_pod_status_phase", "The pods current phase.", []string{}, nil)
+	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
+
+	if _, disabled := disabledMetrics["kube_pod_labels"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_labels", "All labels for each pod prefixed with label_", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_owner"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_owner", "Information about the Pod's owner", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_status_running"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_status_running", "Describes whether the container is currently in running state", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_status_terminated_reason"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_status_terminated_reason", "Describes the reason the container is currently in terminated state.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_status_restarts_total"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_status_restarts_total", "The number of container restarts per container.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_resource_requests"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_resource_requests", "The number of requested resource by a container", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_resource_limits"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_resource_limits", "The number of requested limit resource by a container.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_resource_limits_cpu_cores"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_resource_limits_cpu_cores", "The number of requested limit cpu core resource by a container.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_container_resource_limits_memory_bytes"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_container_resource_limits_memory_bytes", "The number of requested limit memory resource by a container.", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_pod_status_phase"]; !disabled {
+		ch <- prometheus.NewDesc("kube_pod_status_phase", "The pods current phase.", []string{}, nil)
+	}
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kpmc KubePodCollector) Collect(ch chan<- prometheus.Metric) {
 	pods := kpmc.KubeClusterCache.GetAllPods()
+	disabledMetrics := kpmc.metricsConfig.GetDisabledMetricsMap()
+
 	for _, pod := range pods {
 		podName := pod.GetName()
 		podNS := pod.GetNamespace()
@@ -76,71 +114,86 @@ func (kpmc KubePodCollector) Collect(ch chan<- prometheus.Metric) {
 		phase := pod.Status.Phase
 
 		// Pod Status Phase
-		if phase != "" {
-			phases := []struct {
-				v bool
-				n string
-			}{
-				{phase == v1.PodPending, string(v1.PodPending)},
-				{phase == v1.PodSucceeded, string(v1.PodSucceeded)},
-				{phase == v1.PodFailed, string(v1.PodFailed)},
-				{phase == v1.PodUnknown, string(v1.PodUnknown)},
-				{phase == v1.PodRunning, string(v1.PodRunning)},
-			}
+		if _, disabled := disabledMetrics["kube_pod_status_phase"]; !disabled {
+			if phase != "" {
+				phases := []struct {
+					v bool
+					n string
+				}{
+					{phase == v1.PodPending, string(v1.PodPending)},
+					{phase == v1.PodSucceeded, string(v1.PodSucceeded)},
+					{phase == v1.PodFailed, string(v1.PodFailed)},
+					{phase == v1.PodUnknown, string(v1.PodUnknown)},
+					{phase == v1.PodRunning, string(v1.PodRunning)},
+				}
 
-			for _, p := range phases {
-				ch <- newKubePodStatusPhaseMetric("kube_pod_status_phase", podNS, podName, podUID, p.n, boolFloat64(p.v))
+				for _, p := range phases {
+					ch <- newKubePodStatusPhaseMetric("kube_pod_status_phase", podNS, podName, podUID, p.n, boolFloat64(p.v))
+				}
 			}
 		}
 
 		// Pod Labels
-		labelNames, labelValues := prom.KubePrependQualifierToLabels(pod.GetLabels(), "label_")
-		ch <- newKubePodLabelsMetric("kube_pod_labels", podNS, podName, podUID, labelNames, labelValues)
+		if _, disabled := disabledMetrics["kube_pod_labels"]; !disabled {
+			labelNames, labelValues := prom.KubePrependQualifierToLabels(pod.GetLabels(), "label_")
+			ch <- newKubePodLabelsMetric("kube_pod_labels", podNS, podName, podUID, labelNames, labelValues)
+		}
 
 		// Owner References
-		for _, owner := range pod.OwnerReferences {
-			ch <- newKubePodOwnerMetric("kube_pod_owner", podNS, podName, owner.Name, owner.Kind, owner.Controller != nil)
+		if _, disabled := disabledMetrics["kube_pod_owner"]; !disabled {
+			for _, owner := range pod.OwnerReferences {
+				ch <- newKubePodOwnerMetric("kube_pod_owner", podNS, podName, owner.Name, owner.Kind, owner.Controller != nil)
+			}
 		}
 
 		// Container Status
 		for _, status := range pod.Status.ContainerStatuses {
-			ch <- newKubePodContainerStatusRestartsTotalMetric("kube_pod_container_status_restarts_total", podNS, podName, podUID, status.Name, float64(status.RestartCount))
+			if _, disabled := disabledMetrics["kube_pod_container_status_restarts_total"]; !disabled {
+				ch <- newKubePodContainerStatusRestartsTotalMetric("kube_pod_container_status_restarts_total", podNS, podName, podUID, status.Name, float64(status.RestartCount))
+			}
 			if status.State.Running != nil {
-				ch <- newKubePodContainerStatusRunningMetric("kube_pod_container_status_running", podNS, podName, podUID, status.Name)
+				if _, disabled := disabledMetrics["kube_pod_container_status_running"]; !disabled {
+					ch <- newKubePodContainerStatusRunningMetric("kube_pod_container_status_running", podNS, podName, podUID, status.Name)
+				}
 			}
 
 			if status.State.Terminated != nil {
-				ch <- newKubePodContainerStatusTerminatedReasonMetric(
-					"kube_pod_container_status_terminated_reason",
-					podNS,
-					podName,
-					podUID,
-					status.Name,
-					status.State.Terminated.Reason)
+				if _, disabled := disabledMetrics["kube_pod_container_status_terminated_reason"]; !disabled {
+					ch <- newKubePodContainerStatusTerminatedReasonMetric(
+						"kube_pod_container_status_terminated_reason",
+						podNS,
+						podName,
+						podUID,
+						status.Name,
+						status.State.Terminated.Reason)
+				}
 			}
 		}
 
 		for _, container := range pod.Spec.Containers {
-			// Requests
-			for resourceName, quantity := range container.Resources.Requests {
-				resource, unit, value := toResourceUnitValue(resourceName, quantity)
 
-				// failed to parse the resource type
-				if resource == "" {
-					log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
-					continue
+			// Requests
+			if _, disabled := disabledMetrics["kube_pod_container_resource_requests"]; !disabled {
+				for resourceName, quantity := range container.Resources.Requests {
+					resource, unit, value := toResourceUnitValue(resourceName, quantity)
+
+					// failed to parse the resource type
+					if resource == "" {
+						log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
+						continue
+					}
+
+					ch <- newKubePodContainerResourceRequestsMetric(
+						"kube_pod_container_resource_requests",
+						podNS,
+						podName,
+						podUID,
+						container.Name,
+						node,
+						resource,
+						unit,
+						value)
 				}
-
-				ch <- newKubePodContainerResourceRequestsMetric(
-					"kube_pod_container_resource_requests",
-					podNS,
-					podName,
-					podUID,
-					container.Name,
-					node,
-					resource,
-					unit,
-					value)
 			}
 
 			// Limits
@@ -154,37 +207,42 @@ func (kpmc KubePodCollector) Collect(ch chan<- prometheus.Metric) {
 				}
 
 				// KSM v1 Emission
-				if resource == "cpu" {
-					ch <- newKubePodContainerResourceLimitsCPUCoresMetric(
-						"kube_pod_container_resource_limits_cpu_cores",
-						podNS,
-						podName,
-						podUID,
-						container.Name,
-						node,
-						value)
+				if _, disabled := disabledMetrics["kube_pod_container_resource_limits_cpu_cores"]; !disabled {
+					if resource == "cpu" {
+						ch <- newKubePodContainerResourceLimitsCPUCoresMetric(
+							"kube_pod_container_resource_limits_cpu_cores",
+							podNS,
+							podName,
+							podUID,
+							container.Name,
+							node,
+							value)
+					}
+				}
+				if _, disabled := disabledMetrics["kube_pod_container_resource_limits_memory_bytes"]; !disabled {
+					if resource == "memory" {
+						ch <- newKubePodContainerResourceLimitsMemoryBytesMetric(
+							"kube_pod_container_resource_limits_memory_bytes",
+							podNS,
+							podName,
+							podUID,
+							container.Name,
+							node,
+							value)
+					}
 				}
-				if resource == "memory" {
-					ch <- newKubePodContainerResourceLimitsMemoryBytesMetric(
-						"kube_pod_container_resource_limits_memory_bytes",
+				if _, disabled := disabledMetrics["kube_pod_container_resource_limits"]; !disabled {
+					ch <- newKubePodContainerResourceLimitsMetric(
+						"kube_pod_container_resource_limits",
 						podNS,
 						podName,
 						podUID,
 						container.Name,
 						node,
+						resource,
+						unit,
 						value)
 				}
-
-				ch <- newKubePodContainerResourceLimitsMetric(
-					"kube_pod_container_resource_limits",
-					podNS,
-					podName,
-					podUID,
-					container.Name,
-					node,
-					resource,
-					unit,
-					value)
 			}
 		}
 	}

+ 17 - 4
pkg/metrics/pvcmetrics.go

@@ -14,25 +14,38 @@ import (
 // KubePVCCollector is a prometheus collector that generates pvc sourced metrics
 type KubePVCCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics collected by this Collector.
 func (kpvc KubePVCCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kube_persistentvolumeclaim_resource_requests_storage_bytes", "The pvc storage resource requests in bytes", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_persistentvolumeclaim_info", "The pvc storage resource requests in bytes", []string{}, nil)
+	disabledMetrics := kpvc.metricsConfig.GetDisabledMetricsMap()
+
+	if _, disabled := disabledMetrics["kube_persistentvolumeclaim_resource_requests_storage_bytes"]; !disabled {
+		ch <- prometheus.NewDesc("kube_persistentvolumeclaim_resource_requests_storage_bytes", "The pvc storage resource requests in bytes", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_persistentvolumeclaim_info"]; !disabled {
+		ch <- prometheus.NewDesc("kube_persistentvolumeclaim_info", "The pvc storage resource requests in bytes", []string{}, nil)
+	}
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kpvc KubePVCCollector) Collect(ch chan<- prometheus.Metric) {
 	pvcs := kpvc.KubeClusterCache.GetAllPersistentVolumeClaims()
+	disabledMetrics := kpvc.metricsConfig.GetDisabledMetricsMap()
+
 	for _, pvc := range pvcs {
 		storageClass := getPersistentVolumeClaimClass(pvc)
 		volume := pvc.Spec.VolumeName
 
-		ch <- newKubePVCInfoMetric("kube_persistentvolumeclaim_info", pvc.Name, pvc.Namespace, storageClass, volume)
+		if _, disabled := disabledMetrics["kube_persistentvolumeclaim_info"]; !disabled {
+			ch <- newKubePVCInfoMetric("kube_persistentvolumeclaim_info", pvc.Name, pvc.Namespace, storageClass, volume)
+		}
 
 		if storage, ok := pvc.Spec.Resources.Requests[v1.ResourceStorage]; ok {
-			ch <- newKubePVCResourceRequestsStorageBytesMetric("kube_persistentvolumeclaim_resource_requests_storage_bytes", pvc.Name, pvc.Namespace, float64(storage.Value()))
+			if _, disabled := disabledMetrics["kube_persistentvolumeclaim_resource_requests_storage_bytes"]; !disabled {
+				ch <- newKubePVCResourceRequestsStorageBytesMetric("kube_persistentvolumeclaim_resource_requests_storage_bytes", pvc.Name, pvc.Namespace, float64(storage.Value()))
+			}
 		}
 	}
 }

+ 33 - 20
pkg/metrics/pvmetrics.go

@@ -14,41 +14,54 @@ import (
 // KubePVCollector is a prometheus collector that generates PV metrics
 type KubePVCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (kpvcb KubePVCollector) Describe(ch chan<- *prometheus.Desc) {
-	ch <- prometheus.NewDesc("kube_persistentvolume_capacity_bytes", "The pv storage capacity in bytes", []string{}, nil)
-	ch <- prometheus.NewDesc("kube_persistentvolume_status_phase", "The phase indicates if a volume is available, bound to a claim, or released by a claim.", []string{}, nil)
+	disabledMetrics := kpvcb.metricsConfig.GetDisabledMetricsMap()
+
+	if _, disabled := disabledMetrics["kube_persistentvolume_capacity_bytes"]; !disabled {
+		ch <- prometheus.NewDesc("kube_persistentvolume_capacity_bytes", "The pv storage capacity in bytes", []string{}, nil)
+	}
+	if _, disabled := disabledMetrics["kube_persistentvolume_status_phase"]; !disabled {
+		ch <- prometheus.NewDesc("kube_persistentvolume_status_phase", "The phase indicates if a volume is available, bound to a claim, or released by a claim.", []string{}, nil)
+	}
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kpvcb KubePVCollector) Collect(ch chan<- prometheus.Metric) {
 	pvs := kpvcb.KubeClusterCache.GetAllPersistentVolumes()
-	for _, pv := range pvs {
-		phase := pv.Status.Phase
-		if phase != "" {
-			phases := []struct {
-				v bool
-				n string
-			}{
-				{phase == v1.VolumePending, string(v1.VolumePending)},
-				{phase == v1.VolumeAvailable, string(v1.VolumeAvailable)},
-				{phase == v1.VolumeBound, string(v1.VolumeBound)},
-				{phase == v1.VolumeReleased, string(v1.VolumeReleased)},
-				{phase == v1.VolumeFailed, string(v1.VolumeFailed)},
-			}
+	disabledMetrics := kpvcb.metricsConfig.GetDisabledMetricsMap()
 
-			for _, p := range phases {
-				ch <- newKubePVStatusPhaseMetric("kube_persistentvolume_status_phase", pv.Name, p.n, boolFloat64(p.v))
+	for _, pv := range pvs {
+		if _, disabled := disabledMetrics["kube_persistentvolume_status_phase"]; !disabled {
+			phase := pv.Status.Phase
+			if phase != "" {
+				phases := []struct {
+					v bool
+					n string
+				}{
+					{phase == v1.VolumePending, string(v1.VolumePending)},
+					{phase == v1.VolumeAvailable, string(v1.VolumeAvailable)},
+					{phase == v1.VolumeBound, string(v1.VolumeBound)},
+					{phase == v1.VolumeReleased, string(v1.VolumeReleased)},
+					{phase == v1.VolumeFailed, string(v1.VolumeFailed)},
+				}
+
+				for _, p := range phases {
+					ch <- newKubePVStatusPhaseMetric("kube_persistentvolume_status_phase", pv.Name, p.n, boolFloat64(p.v))
+				}
 			}
 		}
 
-		storage := pv.Spec.Capacity[v1.ResourceStorage]
-		m := newKubePVCapacityBytesMetric("kube_persistentvolume_capacity_bytes", pv.Name, float64(storage.Value()))
+		if _, disabled := disabledMetrics["kube_persistentvolume_capacity_bytes"]; !disabled {
+			storage := pv.Spec.Capacity[v1.ResourceStorage]
+			m := newKubePVCapacityBytesMetric("kube_persistentvolume_capacity_bytes", pv.Name, float64(storage.Value()))
 
-		ch <- m
+			ch <- m
+		}
 	}
 }
 

+ 13 - 0
pkg/metrics/servicemetrics.go

@@ -15,16 +15,28 @@ import (
 // KubecostServiceCollector is a prometheus collector that generates service sourced metrics.
 type KubecostServiceCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (sc KubecostServiceCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := sc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["service_selector_labels"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("service_selector_labels", "service selector labels", []string{}, nil)
+
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (sc KubecostServiceCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := sc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["service_selector_labels"]; disabled {
+		return
+	}
+
 	svcs := sc.KubeClusterCache.GetAllServices()
 	for _, svc := range svcs {
 		serviceName := svc.GetName()
@@ -36,6 +48,7 @@ func (sc KubecostServiceCollector) Collect(ch chan<- prometheus.Metric) {
 			ch <- m
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------

+ 12 - 0
pkg/metrics/statefulsetmetrics.go

@@ -15,16 +15,27 @@ import (
 // StatefulsetCollector is a prometheus collector that generates StatefulsetMetrics
 type KubecostStatefulsetCollector struct {
 	KubeClusterCache clustercache.ClusterCache
+	metricsConfig    MetricsConfig
 }
 
 // Describe sends the super-set of all possible descriptors of metrics
 // collected by this Collector.
 func (sc KubecostStatefulsetCollector) Describe(ch chan<- *prometheus.Desc) {
+	disabledMetrics := sc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["statefulSet_match_labels"]; disabled {
+		return
+	}
+
 	ch <- prometheus.NewDesc("statefulSet_match_labels", "statfulSet match labels", []string{}, nil)
 }
 
 // Collect is called by the Prometheus registry when collecting metrics.
 func (sc KubecostStatefulsetCollector) Collect(ch chan<- prometheus.Metric) {
+	disabledMetrics := sc.metricsConfig.GetDisabledMetricsMap()
+	if _, disabled := disabledMetrics["statefulSet_match_labels"]; disabled {
+		return
+	}
+
 	ds := sc.KubeClusterCache.GetAllStatefulSets()
 	for _, statefulset := range ds {
 		statefulsetName := statefulset.GetName()
@@ -36,6 +47,7 @@ func (sc KubecostStatefulsetCollector) Collect(ch chan<- prometheus.Metric) {
 			ch <- m
 		}
 	}
+
 }
 
 //--------------------------------------------------------------------------