فهرست منبع

Data Model 2.0 - Add UID support and tests for all K8s resource metrics (Combined PR) (#3366)

Signed-off-by: Sparsh <sparsh.raj30@gmail.com>
Sparsh Raj 8 ماه پیش
والد
کامیت
0ea2743e1e

+ 20 - 0
core/pkg/clustercache/clustercache.go

@@ -15,6 +15,7 @@ import (
 )
 
 type Namespace struct {
+	UID         types.UID
 	Name        string
 	Labels      map[string]string
 	Annotations map[string]string
@@ -51,6 +52,7 @@ type Container struct {
 }
 
 type Node struct {
+	UID            types.UID
 	Name           string
 	Labels         map[string]string
 	Annotations    map[string]string
@@ -59,6 +61,7 @@ type Node struct {
 }
 
 type Service struct {
+	UID          types.UID
 	Name         string
 	Namespace    string
 	SpecSelector map[string]string
@@ -75,6 +78,7 @@ type DaemonSet struct {
 }
 
 type Deployment struct {
+	UID                     types.UID
 	Name                    string
 	Namespace               string
 	Labels                  map[string]string
@@ -88,6 +92,7 @@ type Deployment struct {
 }
 
 type StatefulSet struct {
+	UID          types.UID
 	Name         string
 	Namespace    string
 	Labels       map[string]string
@@ -98,6 +103,7 @@ type StatefulSet struct {
 }
 
 type PersistentVolumeClaim struct {
+	UID         types.UID
 	Name        string
 	Namespace   string
 	Spec        v1.PersistentVolumeClaimSpec
@@ -116,12 +122,14 @@ type StorageClass struct {
 }
 
 type Job struct {
+	UID       types.UID
 	Name      string
 	Namespace string
 	Status    batchv1.JobStatus
 }
 
 type PersistentVolume struct {
+	UID         types.UID
 	Name        string
 	Namespace   string
 	Labels      map[string]string
@@ -144,6 +152,7 @@ type PodDisruptionBudget struct {
 }
 
 type ReplicaSet struct {
+	UID             types.UID
 	Name            string
 	Namespace       string
 	OwnerReferences []metav1.OwnerReference
@@ -181,6 +190,7 @@ func GetControllerOfNoCopy(pod *Pod) *metav1.OwnerReference {
 
 func TransformNamespace(input *v1.Namespace) *Namespace {
 	return &Namespace{
+		UID:         input.UID,
 		Name:        input.Name,
 		Annotations: input.Annotations,
 		Labels:      input.Labels,
@@ -241,6 +251,7 @@ func TransformPod(input *v1.Pod) *Pod {
 
 func TransformNode(input *v1.Node) *Node {
 	return &Node{
+		UID:            input.UID,
 		Name:           input.Name,
 		Labels:         input.Labels,
 		Annotations:    input.Annotations,
@@ -251,6 +262,7 @@ func TransformNode(input *v1.Node) *Node {
 
 func TransformService(input *v1.Service) *Service {
 	return &Service{
+		UID:          input.UID,
 		Name:         input.Name,
 		Namespace:    input.Namespace,
 		SpecSelector: input.Spec.Selector,
@@ -271,6 +283,7 @@ func TransformDaemonSet(input *appsv1.DaemonSet) *DaemonSet {
 
 func TransformDeployment(input *appsv1.Deployment) *Deployment {
 	return &Deployment{
+		UID:                     input.UID,
 		Name:                    input.Name,
 		Namespace:               input.Namespace,
 		Labels:                  input.Labels,
@@ -290,11 +303,15 @@ func TransformStatefulSet(input *appsv1.StatefulSet) *StatefulSet {
 		SpecSelector: input.Spec.Selector,
 		SpecReplicas: input.Spec.Replicas,
 		PodSpec:      TransformPodSpec(input.Spec.Template.Spec),
+		Labels:       input.Labels,
+		Annotations:  input.Annotations,
+		UID:          input.UID,
 	}
 }
 
 func TransformPersistentVolume(input *v1.PersistentVolume) *PersistentVolume {
 	return &PersistentVolume{
+		UID:         input.UID,
 		Name:        input.Name,
 		Namespace:   input.Namespace,
 		Labels:      input.Labels,
@@ -306,6 +323,7 @@ func TransformPersistentVolume(input *v1.PersistentVolume) *PersistentVolume {
 
 func TransformPersistentVolumeClaim(input *v1.PersistentVolumeClaim) *PersistentVolumeClaim {
 	return &PersistentVolumeClaim{
+		UID:         input.UID,
 		Name:        input.Name,
 		Namespace:   input.Namespace,
 		Spec:        input.Spec,
@@ -328,6 +346,7 @@ func TransformStorageClass(input *stv1.StorageClass) *StorageClass {
 
 func TransformJob(input *batchv1.Job) *Job {
 	return &Job{
+		UID:       input.UID,
 		Name:      input.Name,
 		Namespace: input.Namespace,
 		Status:    input.Status,
@@ -353,6 +372,7 @@ func TransformPodDisruptionBudget(input *policyv1.PodDisruptionBudget) *PodDisru
 
 func TransformReplicaSet(input *appsv1.ReplicaSet) *ReplicaSet {
 	return &ReplicaSet{
+		UID:             input.UID,
 		Name:            input.Name,
 		Namespace:       input.Namespace,
 		OwnerReferences: input.OwnerReferences,

+ 30 - 6
pkg/metrics/deploymentmetrics.go

@@ -41,10 +41,11 @@ func (kdc KubecostDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, deployment := range ds {
 		deploymentName := deployment.Name
 		deploymentNS := deployment.Namespace
+		deploymentUID := string(deployment.UID)
 
 		labels, values := promutil.KubeLabelsToLabels(promutil.SanitizeLabels(deployment.MatchLabels))
 		if len(labels) > 0 {
-			m := newDeploymentMatchLabelsMetric(deploymentName, deploymentNS, "deployment_match_labels", labels, values)
+			m := newDeploymentMatchLabelsMetric(deploymentName, deploymentNS, "deployment_match_labels", labels, values, deploymentUID)
 			ch <- m
 		}
 	}
@@ -63,10 +64,11 @@ type DeploymentMatchLabelsMetric struct {
 	labelValues    []string
 	deploymentName string
 	namespace      string
+	uid            string
 }
 
 // Creates a new DeploymentMatchLabelsMetric, implementation of prometheus.Metric
-func newDeploymentMatchLabelsMetric(name, namespace, fqname string, labelNames, labelvalues []string) DeploymentMatchLabelsMetric {
+func newDeploymentMatchLabelsMetric(name, namespace, fqname string, labelNames, labelvalues []string, uid string) DeploymentMatchLabelsMetric {
 	return DeploymentMatchLabelsMetric{
 		fqName:         fqname,
 		labelNames:     labelNames,
@@ -74,6 +76,7 @@ func newDeploymentMatchLabelsMetric(name, namespace, fqname string, labelNames,
 		help:           "deployment_match_labels Deployment Match Labels",
 		deploymentName: name,
 		namespace:      namespace,
+		uid:            uid,
 	}
 }
 
@@ -83,6 +86,7 @@ func (dmlm DeploymentMatchLabelsMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"deployment": dmlm.deploymentName,
 		"namespace":  dmlm.namespace,
+		"uid":        dmlm.uid,
 	}
 	return prometheus.NewDesc(dmlm.fqName, dmlm.help, dmlm.labelNames, l)
 }
@@ -109,6 +113,10 @@ func (dmlm DeploymentMatchLabelsMetric) Write(m *dto.Metric) error {
 		Name:  toStringPtr("deployment"),
 		Value: &dmlm.deploymentName,
 	})
+	labels = append(labels, &dto.LabelPair{
+		Name:  toStringPtr("uid"),
+		Value: &dmlm.uid,
+	})
 	m.Label = labels
 	return nil
 }
@@ -145,6 +153,7 @@ func (kdc KubeDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, deployment := range deployments {
 		deploymentName := deployment.Name
 		deploymentNS := deployment.Namespace
+		deploymentUID := string(deployment.UID)
 
 		// Replicas Defined
 		var replicas int32
@@ -155,7 +164,7 @@ func (kdc KubeDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 		}
 
 		if _, disabled := disabledMetrics["kube_deployment_spec_replicas"]; !disabled {
-			ch <- newKubeDeploymentReplicasMetric("kube_deployment_spec_replicas", deploymentName, deploymentNS, replicas)
+			ch <- newKubeDeploymentReplicasMetric("kube_deployment_spec_replicas", deploymentName, deploymentNS, replicas, deploymentUID)
 		}
 		if _, disabled := disabledMetrics["kube_deployment_status_replicas_available"]; !disabled {
 			// Replicas Available
@@ -163,7 +172,8 @@ func (kdc KubeDeploymentCollector) Collect(ch chan<- prometheus.Metric) {
 				"kube_deployment_status_replicas_available",
 				deploymentName,
 				deploymentNS,
-				deployment.StatusAvailableReplicas)
+				deployment.StatusAvailableReplicas,
+				deploymentUID)
 		}
 	}
 }
@@ -179,16 +189,18 @@ type KubeDeploymentReplicasMetric struct {
 	deployment string
 	namespace  string
 	replicas   float64
+	uid        string
 }
 
 // Creates a new DeploymentMatchLabelsMetric, implementation of prometheus.Metric
-func newKubeDeploymentReplicasMetric(fqname, deployment, namespace string, replicas int32) KubeDeploymentReplicasMetric {
+func newKubeDeploymentReplicasMetric(fqname, deployment, namespace string, replicas int32, uid string) KubeDeploymentReplicasMetric {
 	return KubeDeploymentReplicasMetric{
 		fqName:     fqname,
 		help:       "kube_deployment_spec_replicas Number of desired pods for a deployment.",
 		deployment: deployment,
 		namespace:  namespace,
 		replicas:   float64(replicas),
+		uid:        uid,
 	}
 }
 
@@ -198,6 +210,7 @@ func (kdr KubeDeploymentReplicasMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"deployment": kdr.deployment,
 		"namespace":  kdr.namespace,
+		"uid":        kdr.uid,
 	}
 	return prometheus.NewDesc(kdr.fqName, kdr.help, []string{}, l)
 }
@@ -217,6 +230,10 @@ func (kdr KubeDeploymentReplicasMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("deployment"),
 			Value: &kdr.deployment,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kdr.uid,
+		},
 	}
 
 	return nil
@@ -233,16 +250,18 @@ type KubeDeploymentStatusAvailableReplicasMetric struct {
 	deployment        string
 	namespace         string
 	replicasAvailable float64
+	uid               string
 }
 
 // Creates a new DeploymentMatchLabelsMetric, implementation of prometheus.Metric
-func newKubeDeploymentStatusAvailableReplicasMetric(fqname, deployment, namespace string, replicasAvailable int32) KubeDeploymentStatusAvailableReplicasMetric {
+func newKubeDeploymentStatusAvailableReplicasMetric(fqname, deployment, namespace string, replicasAvailable int32, uid string) KubeDeploymentStatusAvailableReplicasMetric {
 	return KubeDeploymentStatusAvailableReplicasMetric{
 		fqName:            fqname,
 		help:              "kube_deployment_status_replicas_available The number of available replicas per deployment.",
 		deployment:        deployment,
 		namespace:         namespace,
 		replicasAvailable: float64(replicasAvailable),
+		uid:               uid,
 	}
 }
 
@@ -252,6 +271,7 @@ func (kdr KubeDeploymentStatusAvailableReplicasMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"deployment": kdr.deployment,
 		"namespace":  kdr.namespace,
+		"uid":        kdr.uid,
 	}
 	return prometheus.NewDesc(kdr.fqName, kdr.help, []string{}, l)
 }
@@ -271,6 +291,10 @@ func (kdr KubeDeploymentStatusAvailableReplicasMetric) Write(m *dto.Metric) erro
 			Name:  toStringPtr("deployment"),
 			Value: &kdr.deployment,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kdr.uid,
+		},
 	}
 
 	return nil

+ 541 - 0
pkg/metrics/deploymentmetrics_test.go

@@ -0,0 +1,541 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func TestKubecostDeploymentCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectMetric    bool
+	}{
+		{
+			name:            "deployment_match_labels enabled",
+			disabledMetrics: []string{},
+			expectMetric:    true,
+		},
+		{
+			name:            "deployment_match_labels disabled",
+			disabledMetrics: []string{"deployment_match_labels"},
+			expectMetric:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kdc := KubecostDeploymentCollector{
+				KubeClusterCache: NewFakeDeploymentCache([]*clustercache.Deployment{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 10)
+			kdc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if tt.expectMetric && count == 0 {
+				t.Error("Expected metric description but got none")
+			}
+			if !tt.expectMetric && count > 0 {
+				t.Error("Expected no metric description but got some")
+			}
+		})
+	}
+}
+
+func TestKubecostDeploymentCollector_Collect(t *testing.T) {
+	tests := []struct {
+		name            string
+		deployments     []*clustercache.Deployment
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "single deployment with match labels",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:         types.UID("test-uid-1"),
+					Name:        "test-deployment",
+					Namespace:   "default",
+					MatchLabels: map[string]string{"app": "test", "version": "v1"},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1,
+		},
+		{
+			name: "deployment without match labels",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:         types.UID("test-uid-2"),
+					Name:        "empty-deployment",
+					Namespace:   "default",
+					MatchLabels: map[string]string{},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   0,
+		},
+		{
+			name: "multiple deployments with match labels",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:         types.UID("test-uid-3"),
+					Name:        "deployment1",
+					Namespace:   "ns1",
+					MatchLabels: map[string]string{"app": "app1"},
+				},
+				{
+					UID:         types.UID("test-uid-4"),
+					Name:        "deployment2",
+					Namespace:   "ns2",
+					MatchLabels: map[string]string{"component": "frontend", "tier": "web"},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   2,
+		},
+		{
+			name: "metric disabled",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:         types.UID("test-uid-5"),
+					Name:        "test-deployment",
+					Namespace:   "default",
+					MatchLabels: map[string]string{"app": "test"},
+				},
+			},
+			disabledMetrics: []string{"deployment_match_labels"},
+			expectedCount:   0,
+		},
+		{
+			name: "mixed deployments with and without labels",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:         types.UID("test-uid-6"),
+					Name:        "with-labels",
+					Namespace:   "default",
+					MatchLabels: map[string]string{"app": "test"},
+				},
+				{
+					UID:         types.UID("test-uid-7"),
+					Name:        "without-labels",
+					Namespace:   "default",
+					MatchLabels: map[string]string{},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kdc := KubecostDeploymentCollector{
+				KubeClusterCache: NewFakeDeploymentCache(tt.deployments),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 10)
+			kdc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestDeploymentMatchLabelsMetric(t *testing.T) {
+	labelNames := []string{"app", "version", "tier"}
+	labelValues := []string{"myapp", "v2.0", "backend"}
+	uid := "test-deployment-uid"
+
+	metric := newDeploymentMatchLabelsMetric("test-deployment", "production", "deployment_match_labels", labelNames, labelValues, uid)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"app":        "myapp",
+		"version":    "v2.0",
+		"tier":       "backend",
+		"deployment": "test-deployment",
+		"namespace":  "production",
+		"uid":        uid,
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestKubeDeploymentCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name:            "all metrics enabled",
+			disabledMetrics: []string{},
+			expectedCount:   2,
+		},
+		{
+			name:            "spec replicas disabled",
+			disabledMetrics: []string{"kube_deployment_spec_replicas"},
+			expectedCount:   1,
+		},
+		{
+			name:            "status replicas disabled",
+			disabledMetrics: []string{"kube_deployment_status_replicas_available"},
+			expectedCount:   1,
+		},
+		{
+			name:            "all metrics disabled",
+			disabledMetrics: []string{"kube_deployment_spec_replicas", "kube_deployment_status_replicas_available"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kdc := KubeDeploymentCollector{
+				KubeClusterCache: NewFakeDeploymentCache([]*clustercache.Deployment{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 10)
+			kdc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestKubeDeploymentCollector_Collect(t *testing.T) {
+	replicas3 := int32(3)
+	replicas0 := int32(0)
+
+	tests := []struct {
+		name            string
+		deployments     []*clustercache.Deployment
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "deployment with explicit replicas",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:                     types.UID("test-uid-1"),
+					Name:                    "test-deployment",
+					Namespace:               "default",
+					SpecReplicas:            &replicas3,
+					StatusAvailableReplicas: 2,
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   2, // spec replicas + status available replicas
+		},
+		{
+			name: "deployment with nil replicas defaults to 1",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:                     types.UID("test-uid-2"),
+					Name:                    "default-replicas",
+					Namespace:               "default",
+					SpecReplicas:            nil,
+					StatusAvailableReplicas: 1,
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   2,
+		},
+		{
+			name: "deployment with zero replicas",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:                     types.UID("test-uid-3"),
+					Name:                    "zero-replicas",
+					Namespace:               "default",
+					SpecReplicas:            &replicas0,
+					StatusAvailableReplicas: 0,
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   2,
+		},
+		{
+			name: "multiple deployments",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:                     types.UID("test-uid-4"),
+					Name:                    "deployment1",
+					Namespace:               "ns1",
+					SpecReplicas:            &replicas3,
+					StatusAvailableReplicas: 3,
+				},
+				{
+					UID:                     types.UID("test-uid-5"),
+					Name:                    "deployment2",
+					Namespace:               "ns2",
+					SpecReplicas:            nil,
+					StatusAvailableReplicas: 0,
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   4, // 2 metrics per deployment
+		},
+		{
+			name: "spec replicas disabled",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:                     types.UID("test-uid-6"),
+					Name:                    "test-deployment",
+					Namespace:               "default",
+					SpecReplicas:            &replicas3,
+					StatusAvailableReplicas: 2,
+				},
+			},
+			disabledMetrics: []string{"kube_deployment_spec_replicas"},
+			expectedCount:   1, // only status available replicas
+		},
+		{
+			name: "status replicas disabled",
+			deployments: []*clustercache.Deployment{
+				{
+					UID:                     types.UID("test-uid-7"),
+					Name:                    "test-deployment",
+					Namespace:               "default",
+					SpecReplicas:            &replicas3,
+					StatusAvailableReplicas: 2,
+				},
+			},
+			disabledMetrics: []string{"kube_deployment_status_replicas_available"},
+			expectedCount:   1, // only spec replicas
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kdc := KubeDeploymentCollector{
+				KubeClusterCache: NewFakeDeploymentCache(tt.deployments),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 10)
+			kdc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestKubeDeploymentReplicasMetric(t *testing.T) {
+	metric := newKubeDeploymentReplicasMetric("kube_deployment_spec_replicas", "web-app", "production", 5, "deployment-uid")
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 5.0 {
+		t.Errorf("Expected gauge value 5.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"deployment": "web-app",
+		"namespace":  "production",
+		"uid":        "deployment-uid",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestKubeDeploymentStatusAvailableReplicasMetric(t *testing.T) {
+	metric := newKubeDeploymentStatusAvailableReplicasMetric("kube_deployment_status_replicas_available", "api-server", "backend", 3, "api-uid")
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 3.0 {
+		t.Errorf("Expected gauge value 3.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"deployment": "api-server",
+		"namespace":  "backend",
+		"uid":        "api-uid",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestKubeDeploymentCollector_DefaultReplicas(t *testing.T) {
+	// Test that nil replicas defaults to 1
+	deployment := &clustercache.Deployment{
+		UID:                     types.UID("test-uid"),
+		Name:                    "test-deployment",
+		Namespace:               "default",
+		SpecReplicas:            nil,
+		StatusAvailableReplicas: 0,
+	}
+
+	mc := MetricsConfig{
+		DisabledMetrics: []string{"kube_deployment_status_replicas_available"}, // Only test spec replicas
+	}
+	kdc := KubeDeploymentCollector{
+		KubeClusterCache: NewFakeDeploymentCache([]*clustercache.Deployment{deployment}),
+		metricsConfig:    mc,
+	}
+
+	ch := make(chan prometheus.Metric, 10)
+	kdc.Collect(ch)
+	close(ch)
+
+	for metric := range ch {
+		var dtoMetric dto.Metric
+		metric.Write(&dtoMetric)
+		if *dtoMetric.Gauge.Value != 1.0 {
+			t.Errorf("Expected default replicas value 1.0, got %f", *dtoMetric.Gauge.Value)
+		}
+	}
+}
+
+// FakeDeploymentCache implements ClusterCache interface for testing
+type FakeDeploymentCache struct {
+	clustercache.ClusterCache
+	deployments []*clustercache.Deployment
+}
+
+func (f FakeDeploymentCache) GetAllDeployments() []*clustercache.Deployment {
+	return f.deployments
+}
+
+func NewFakeDeploymentCache(deployments []*clustercache.Deployment) FakeDeploymentCache {
+	return FakeDeploymentCache{
+		deployments: deployments,
+	}
+}

+ 12 - 4
pkg/metrics/jobmetrics.go

@@ -43,9 +43,10 @@ func (kjc KubeJobCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, job := range jobs {
 		jobName := job.Name
 		jobNS := job.Namespace
+		jobUID := string(job.UID)
 
 		if job.Status.Failed == 0 {
-			ch <- newKubeJobStatusFailedMetric(jobName, jobNS, "kube_job_status_failed", "", 0)
+			ch <- newKubeJobStatusFailedMetric(jobName, jobNS, jobUID, "kube_job_status_failed", "", 0)
 		} else {
 			for _, condition := range job.Status.Conditions {
 				if condition.Type == batchv1.JobFailed {
@@ -53,12 +54,12 @@ func (kjc KubeJobCollector) Collect(ch chan<- prometheus.Metric) {
 					for _, reason := range jobFailureReasons {
 						reasonKnown = reasonKnown || failureReason(&condition, reason)
 
-						ch <- newKubeJobStatusFailedMetric(jobName, jobNS, "kube_job_status_failed", reason, boolFloat64(failureReason(&condition, reason)))
+						ch <- newKubeJobStatusFailedMetric(jobName, jobNS, jobUID, "kube_job_status_failed", reason, boolFloat64(failureReason(&condition, reason)))
 					}
 
 					// for unknown reasons
 					if !reasonKnown {
-						ch <- newKubeJobStatusFailedMetric(jobName, jobNS, "kube_job_status_failed", "", float64(job.Status.Failed))
+						ch <- newKubeJobStatusFailedMetric(jobName, jobNS, jobUID, "kube_job_status_failed", "", float64(job.Status.Failed))
 					}
 				}
 			}
@@ -77,17 +78,19 @@ type KubeJobStatusFailedMetric struct {
 	help      string
 	job       string
 	namespace string
+	uid       string
 	reason    string
 	value     float64
 }
 
 // Creates a new KubeJobStatusFailedMetric, implementation of prometheus.Metric
-func newKubeJobStatusFailedMetric(job, namespace, fqName, reason string, value float64) KubeJobStatusFailedMetric {
+func newKubeJobStatusFailedMetric(job, namespace, uid, fqName, reason string, value float64) KubeJobStatusFailedMetric {
 	return KubeJobStatusFailedMetric{
 		fqName:    fqName,
 		help:      "kube_job_status_failed Failed job",
 		job:       job,
 		namespace: namespace,
+		uid:       uid,
 		reason:    reason,
 		value:     value,
 	}
@@ -99,6 +102,7 @@ func (kjsf KubeJobStatusFailedMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"job_name":  kjsf.job,
 		"namespace": kjsf.namespace,
+		"uid":       kjsf.uid,
 		"reason":    kjsf.reason,
 	}
 	return prometheus.NewDesc(kjsf.fqName, kjsf.help, []string{}, l)
@@ -119,6 +123,10 @@ func (kjsf KubeJobStatusFailedMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("namespace"),
 			Value: &kjsf.namespace,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kjsf.uid,
+		},
 		{
 			Name:  toStringPtr("reason"),
 			Value: &kjsf.reason,

+ 91 - 0
pkg/metrics/jobmetrics_test.go

@@ -0,0 +1,91 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	batchv1 "k8s.io/api/batch/v1"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+type mockJobCache struct {
+	clustercache.ClusterCache
+	jobs []*clustercache.Job
+}
+
+func (m mockJobCache) GetAllJobs() []*clustercache.Job {
+	return m.jobs
+}
+
+func TestKubeJobCollector_Collect(t *testing.T) {
+	// Test with job that has no failures
+	cache := mockJobCache{
+		jobs: []*clustercache.Job{
+			{
+				Name:      "test-job",
+				Namespace: "default",
+				UID:       types.UID("test-job-uid"),
+				Status:    batchv1.JobStatus{Failed: 0},
+			},
+		},
+	}
+
+	collector := KubeJobCollector{
+		KubeClusterCache: cache,
+		metricsConfig:    MetricsConfig{},
+	}
+
+	ch := make(chan prometheus.Metric, 10)
+	go func() {
+		collector.Collect(ch)
+		close(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 1 {
+		t.Errorf("Expected 1 metric, got %d", count)
+	}
+}
+
+func TestKubeJobStatusFailedMetric_Write(t *testing.T) {
+	metric := newKubeJobStatusFailedMetric(
+		"test-job",
+		"default",
+		"test-job-uid",
+		"kube_job_status_failed",
+		"",
+		0.0,
+	)
+
+	pbMetric := &dto.Metric{}
+	err := metric.Write(pbMetric)
+	if err != nil {
+		t.Fatalf("Write failed: %v", err)
+	}
+
+	if pbMetric.Gauge == nil || *pbMetric.Gauge.Value != 0.0 {
+		t.Error("Expected gauge value 0.0")
+	}
+
+	if len(pbMetric.Label) != 4 { // job_name + namespace + uid + reason
+		t.Errorf("Expected 4 labels, got %d", len(pbMetric.Label))
+	}
+
+	// Verify UID label is present
+	foundUID := false
+	for _, label := range pbMetric.Label {
+		if *label.Name == "uid" && *label.Value == "test-job-uid" {
+			foundUID = true
+			break
+		}
+	}
+	if !foundUID {
+		t.Error("Expected uid label not found")
+	}
+}

+ 29 - 12
pkg/metrics/namespacemetrics.go

@@ -40,10 +40,11 @@ func (nsac KubecostNamespaceCollector) Collect(ch chan<- prometheus.Metric) {
 	namespaces := nsac.KubeClusterCache.GetAllNamespaces()
 	for _, namespace := range namespaces {
 		nsName := namespace.Name
+		nsUID := string(namespace.UID)
 
 		labels, values := promutil.KubeAnnotationsToLabels(namespace.Annotations)
 		if len(labels) > 0 {
-			m := newNamespaceAnnotationsMetric("kube_namespace_annotations", nsName, labels, values)
+			m := newNamespaceAnnotationsMetric("kube_namespace_annotations", nsName, nsUID, labels, values)
 			ch <- m
 		}
 	}
@@ -59,16 +60,18 @@ type NamespaceAnnotationsMetric struct {
 	fqName      string
 	help        string
 	namespace   string
+	uid         string
 	labelNames  []string
 	labelValues []string
 }
 
 // Creates a new NamespaceAnnotationsMetric, implementation of prometheus.Metric
-func newNamespaceAnnotationsMetric(fqname, namespace string, labelNames []string, labelValues []string) NamespaceAnnotationsMetric {
+func newNamespaceAnnotationsMetric(fqname, namespace string, uid string, labelNames []string, labelValues []string) NamespaceAnnotationsMetric {
 	return NamespaceAnnotationsMetric{
 		fqName:      fqname,
 		help:        "kube_namespace_annotations Namespace Annotations",
 		namespace:   namespace,
+		uid:         uid,
 		labelNames:  labelNames,
 		labelValues: labelValues,
 	}
@@ -79,6 +82,7 @@ func newNamespaceAnnotationsMetric(fqname, namespace string, labelNames []string
 func (nam NamespaceAnnotationsMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"namespace": nam.namespace,
+		"uid":       nam.uid,
 	}
 	return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
 }
@@ -98,10 +102,15 @@ func (nam NamespaceAnnotationsMetric) Write(m *dto.Metric) error {
 			Value: &nam.labelValues[i],
 		})
 	}
-	labels = append(labels, &dto.LabelPair{
-		Name:  toStringPtr("namespace"),
-		Value: &nam.namespace,
-	})
+	labels = append(labels,
+		&dto.LabelPair{
+			Name:  toStringPtr("namespace"),
+			Value: &nam.namespace,
+		},
+		&dto.LabelPair{
+			Name:  toStringPtr("uid"),
+			Value: &nam.uid,
+		})
 	m.Label = labels
 	return nil
 }
@@ -138,10 +147,11 @@ func (nsac KubeNamespaceCollector) Collect(ch chan<- prometheus.Metric) {
 	namespaces := nsac.KubeClusterCache.GetAllNamespaces()
 	for _, namespace := range namespaces {
 		nsName := namespace.Name
+		nsUID := string(namespace.UID)
 
 		labels, values := promutil.KubeLabelsToLabels(promutil.SanitizeLabels(namespace.Labels))
 		if len(labels) > 0 {
-			m := newKubeNamespaceLabelsMetric("kube_namespace_labels", nsName, labels, values)
+			m := newKubeNamespaceLabelsMetric("kube_namespace_labels", nsName, nsUID, labels, values)
 			ch <- m
 		}
 	}
@@ -157,14 +167,16 @@ type KubeNamespaceLabelsMetric struct {
 	fqName      string
 	help        string
 	namespace   string
+	uid         string
 	labelNames  []string
 	labelValues []string
 }
 
 // Creates a new KubeNamespaceLabelsMetric, implementation of prometheus.Metric
-func newKubeNamespaceLabelsMetric(fqname, namespace string, labelNames []string, labelValues []string) KubeNamespaceLabelsMetric {
+func newKubeNamespaceLabelsMetric(fqname, namespace string, uid string, labelNames []string, labelValues []string) KubeNamespaceLabelsMetric {
 	return KubeNamespaceLabelsMetric{
 		namespace:   namespace,
+		uid:         uid,
 		fqName:      fqname,
 		labelNames:  labelNames,
 		labelValues: labelValues,
@@ -195,10 +207,15 @@ func (nam KubeNamespaceLabelsMetric) Write(m *dto.Metric) error {
 			Value: &nam.labelValues[i],
 		})
 	}
-	labels = append(labels, &dto.LabelPair{
-		Name:  toStringPtr("namespace"),
-		Value: &nam.namespace,
-	})
+	labels = append(labels,
+		&dto.LabelPair{
+			Name:  toStringPtr("namespace"),
+			Value: &nam.namespace,
+		},
+		&dto.LabelPair{
+			Name:  toStringPtr("uid"),
+			Value: &nam.uid,
+		})
 	m.Label = labels
 	return nil
 }

+ 195 - 0
pkg/metrics/namespacemetrics_test.go

@@ -0,0 +1,195 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+type mockNamespaceCache struct {
+	clustercache.ClusterCache
+	namespaces []*clustercache.Namespace
+}
+
+func (m mockNamespaceCache) GetAllNamespaces() []*clustercache.Namespace {
+	return m.namespaces
+}
+
+func TestKubecostNamespaceCollector_Collect(t *testing.T) {
+	// Test with namespace that has annotations
+	cache := mockNamespaceCache{
+		namespaces: []*clustercache.Namespace{
+			{
+				Name:        "test-ns",
+				UID:         types.UID("test-uid"),
+				Annotations: map[string]string{"team": "backend"},
+			},
+		},
+	}
+	
+	collector := KubecostNamespaceCollector{
+		KubeClusterCache: cache,
+		metricsConfig:    MetricsConfig{},
+	}
+
+	ch := make(chan prometheus.Metric, 10)
+	go func() {
+		collector.Collect(ch)
+		close(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 1 {
+		t.Errorf("Expected 1 metric, got %d", count)
+	}
+}
+
+func TestKubeNamespaceCollector_Collect(t *testing.T) {
+	// Test with namespace that has labels
+	cache := mockNamespaceCache{
+		namespaces: []*clustercache.Namespace{
+			{
+				Name:   "test-ns",
+				UID:    types.UID("test-uid"),
+				Labels: map[string]string{"env": "prod"},
+			},
+		},
+	}
+	
+	collector := KubeNamespaceCollector{
+		KubeClusterCache: cache,
+		metricsConfig:    MetricsConfig{},
+	}
+
+	ch := make(chan prometheus.Metric, 10)
+	go func() {
+		collector.Collect(ch)
+		close(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 1 {
+		t.Errorf("Expected 1 metric, got %d", count)
+	}
+}
+
+func TestNamespaceAnnotationsMetric_Write(t *testing.T) {
+	metric := newNamespaceAnnotationsMetric(
+		"test_metric",
+		"test-ns",
+		"test-uid",
+		[]string{"team"},
+		[]string{"backend"},
+	)
+
+	pbMetric := &dto.Metric{}
+	err := metric.Write(pbMetric)
+	if err != nil {
+		t.Fatalf("Write failed: %v", err)
+	}
+
+	if pbMetric.Gauge == nil || *pbMetric.Gauge.Value != 1.0 {
+		t.Error("Expected gauge value 1.0")
+	}
+
+	if len(pbMetric.Label) != 3 { // team + namespace + uid
+		t.Errorf("Expected 3 labels, got %d", len(pbMetric.Label))
+	}
+
+	// Verify UID label exists and has correct value
+	foundUID := false
+	for _, label := range pbMetric.Label {
+		if *label.Name == "uid" && *label.Value == "test-uid" {
+			foundUID = true
+			break
+		}
+	}
+	if !foundUID {
+		t.Error("Expected uid label with value 'test-uid' not found")
+	}
+}
+
+func TestKubeNamespaceLabelsMetric_Write(t *testing.T) {
+	metric := newKubeNamespaceLabelsMetric(
+		"test_metric",
+		"test-ns", 
+		"test-uid",
+		[]string{"env"},
+		[]string{"prod"},
+	)
+
+	pbMetric := &dto.Metric{}
+	err := metric.Write(pbMetric)
+	if err != nil {
+		t.Fatalf("Write failed: %v", err)
+	}
+
+	if pbMetric.Gauge == nil || *pbMetric.Gauge.Value != 1.0 {
+		t.Error("Expected gauge value 1.0")
+	}
+
+	if len(pbMetric.Label) != 3 { // env + namespace + uid
+		t.Errorf("Expected 3 labels, got %d", len(pbMetric.Label))
+	}
+
+	// Verify UID label exists and has correct value
+	foundUID := false
+	for _, label := range pbMetric.Label {
+		if *label.Name == "uid" && *label.Value == "test-uid" {
+			foundUID = true
+			break
+		}
+	}
+	if !foundUID {
+		t.Error("Expected uid label with value 'test-uid' not found")
+	}
+}
+
+func TestKubecostNamespaceCollector_Describe(t *testing.T) {
+	collector := KubecostNamespaceCollector{metricsConfig: MetricsConfig{}}
+	
+	ch := make(chan *prometheus.Desc, 1)
+	go func() {
+		collector.Describe(ch)
+		close(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 1 {
+		t.Errorf("Expected 1 descriptor, got %d", count)
+	}
+}
+
+func TestKubeNamespaceCollector_Describe(t *testing.T) {
+	collector := KubeNamespaceCollector{metricsConfig: MetricsConfig{}}
+	
+	ch := make(chan *prometheus.Desc, 1)
+	go func() {
+		collector.Describe(ch)
+		close(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 1 {
+		t.Errorf("Expected 1 descriptor, got %d", count)
+	}
+}

+ 71 - 18
pkg/metrics/nodemetrics.go

@@ -63,6 +63,7 @@ func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 
 	for _, node := range nodes {
 		nodeName := node.Name
+		nodeUID := string(node.UID)
 
 		// Node Capacity
 		for resourceName, quantity := range node.Status.Capacity {
@@ -77,18 +78,18 @@ func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 			// KSM v1 Emission
 			if _, disabled := disabledMetrics["kube_node_status_capacity_cpu_cores"]; !disabled {
 				if resource == "cpu" {
-					ch <- newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", nodeName, value)
+					ch <- newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", nodeName, nodeUID, value)
 
 				}
 			}
 			if _, disabled := disabledMetrics["kube_node_status_capacity_memory_bytes"]; !disabled {
 				if resource == "memory" {
-					ch <- newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", nodeName, value)
+					ch <- newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", nodeName, nodeUID, value)
 				}
 			}
 
 			if _, disabled := disabledMetrics["kube_node_status_capacity"]; !disabled {
-				ch <- newKubeNodeStatusCapacityMetric("kube_node_status_capacity", nodeName, resource, unit, value)
+				ch <- newKubeNodeStatusCapacityMetric("kube_node_status_capacity", nodeName, resource, unit, nodeUID, value)
 			}
 		}
 
@@ -105,23 +106,23 @@ func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 			// KSM v1 Emission
 			if _, disabled := disabledMetrics["kube_node_status_allocatable_cpu_cores"]; !disabled {
 				if resource == "cpu" {
-					ch <- newKubeNodeStatusAllocatableCPUCoresMetric("kube_node_status_allocatable_cpu_cores", nodeName, value)
+					ch <- newKubeNodeStatusAllocatableCPUCoresMetric("kube_node_status_allocatable_cpu_cores", nodeName, value, nodeUID)
 				}
 			}
 			if _, disabled := disabledMetrics["kube_node_status_allocatable_memory_bytes"]; !disabled {
 				if resource == "memory" {
-					ch <- newKubeNodeStatusAllocatableMemoryBytesMetric("kube_node_status_allocatable_memory_bytes", nodeName, value)
+					ch <- newKubeNodeStatusAllocatableMemoryBytesMetric("kube_node_status_allocatable_memory_bytes", nodeName, value, nodeUID)
 				}
 			}
 			if _, disabled := disabledMetrics["kube_node_status_allocatable"]; !disabled {
-				ch <- newKubeNodeStatusAllocatableMetric("kube_node_status_allocatable", nodeName, resource, unit, value)
+				ch <- newKubeNodeStatusAllocatableMetric("kube_node_status_allocatable", nodeName, resource, unit, value, nodeUID)
 			}
 		}
 
 		// node labels
 		if _, disabled := disabledMetrics["kube_node_labels"]; !disabled {
 			labelNames, labelValues := promutil.KubePrependQualifierToLabels(promutil.SanitizeLabels(node.Labels), "label_")
-			ch <- newKubeNodeLabelsMetric(nodeName, "kube_node_labels", labelNames, labelValues)
+			ch <- newKubeNodeLabelsMetric(nodeName, "kube_node_labels", labelNames, labelValues, nodeUID)
 		}
 
 		// kube_node_status_condition
@@ -131,7 +132,7 @@ func (nsac KubeNodeCollector) Collect(ch chan<- prometheus.Metric) {
 				conditions := getConditions(c.Status)
 
 				for _, cond := range conditions {
-					ch <- newKubeNodeStatusConditionMetric(nodeName, "kube_node_status_condition", string(c.Type), cond.status, cond.value)
+					ch <- newKubeNodeStatusConditionMetric(nodeName, "kube_node_status_condition", string(c.Type), cond.status, cond.value, nodeUID)
 				}
 			}
 		}
@@ -150,10 +151,11 @@ type KubeNodeStatusCapacityMetric struct {
 	unit     string
 	node     string
 	value    float64
+	uid      string
 }
 
 // Creates a new KubeNodeStatusCapacityMetric, implementation of prometheus.Metric
-func newKubeNodeStatusCapacityMetric(fqname, node, resource, unit string, value float64) KubeNodeStatusCapacityMetric {
+func newKubeNodeStatusCapacityMetric(fqname, node, resource, unit, uid string, value float64) KubeNodeStatusCapacityMetric {
 	return KubeNodeStatusCapacityMetric{
 		fqName:   fqname,
 		help:     "kube_node_status_capacity node capacity",
@@ -161,6 +163,7 @@ func newKubeNodeStatusCapacityMetric(fqname, node, resource, unit string, value
 		resource: resource,
 		unit:     unit,
 		value:    value,
+		uid:      uid,
 	}
 }
 
@@ -171,6 +174,7 @@ func (kpcrr KubeNodeStatusCapacityMetric) Desc() *prometheus.Desc {
 		"node":     kpcrr.node,
 		"resource": kpcrr.resource,
 		"unit":     kpcrr.unit,
+		"uid":      kpcrr.uid,
 	}
 	return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
 }
@@ -194,6 +198,10 @@ func (kpcrr KubeNodeStatusCapacityMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("unit"),
 			Value: &kpcrr.unit,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpcrr.uid,
+		},
 	}
 	return nil
 }
@@ -210,22 +218,24 @@ type KubeNodeStatusCapacityMemoryBytesMetric struct {
 	help   string
 	bytes  float64
 	node   string
+	uid    string
 }
 
 // Creates a new KubeNodeStatusCapacityMemoryBytesMetric, implementation of prometheus.Metric
-func newKubeNodeStatusCapacityMemoryBytesMetric(fqname string, node string, bytes float64) KubeNodeStatusCapacityMemoryBytesMetric {
+func newKubeNodeStatusCapacityMemoryBytesMetric(fqname string, node string, uid string, bytes float64) KubeNodeStatusCapacityMemoryBytesMetric {
 	return KubeNodeStatusCapacityMemoryBytesMetric{
 		fqName: fqname,
 		help:   "kube_node_status_capacity_memory_bytes Node Capacity Memory Bytes",
 		node:   node,
 		bytes:  bytes,
+		uid:    uid,
 	}
 }
 
 // Desc returns the descriptor for the Metric. This method idempotently
 // returns the same descriptor throughout the lifetime of the Metric.
 func (nam KubeNodeStatusCapacityMemoryBytesMetric) Desc() *prometheus.Desc {
-	l := prometheus.Labels{"node": nam.node}
+	l := prometheus.Labels{"node": nam.node, "uid": nam.uid}
 	return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
 }
 
@@ -240,6 +250,10 @@ func (nam KubeNodeStatusCapacityMemoryBytesMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("node"),
 			Value: &nam.node,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &nam.uid,
+		},
 	}
 	return nil
 }
@@ -256,22 +270,24 @@ type KubeNodeStatusCapacityCPUCoresMetric struct {
 	help   string
 	cores  float64
 	node   string
+	uid    string
 }
 
 // Creates a new KubeNodeStatusCapacityCPUCoresMetric, implementation of prometheus.Metric
-func newKubeNodeStatusCapacityCPUCoresMetric(fqname string, node string, cores float64) KubeNodeStatusCapacityCPUCoresMetric {
+func newKubeNodeStatusCapacityCPUCoresMetric(fqname string, node string, uid string, cores float64) KubeNodeStatusCapacityCPUCoresMetric {
 	return KubeNodeStatusCapacityCPUCoresMetric{
 		fqName: fqname,
 		help:   "kube_node_status_capacity_cpu_cores Node Capacity CPU Cores",
 		cores:  cores,
 		node:   node,
+		uid:    uid,
 	}
 }
 
 // Desc returns the descriptor for the Metric. This method idempotently
 // returns the same descriptor throughout the lifetime of the Metric.
 func (nam KubeNodeStatusCapacityCPUCoresMetric) Desc() *prometheus.Desc {
-	l := prometheus.Labels{"node": nam.node}
+	l := prometheus.Labels{"node": nam.node, "uid": nam.uid}
 	return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
 }
 
@@ -286,6 +302,10 @@ func (nam KubeNodeStatusCapacityCPUCoresMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("node"),
 			Value: &nam.node,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &nam.uid,
+		},
 	}
 	return nil
 }
@@ -303,16 +323,18 @@ type KubeNodeLabelsMetric struct {
 	labelNames  []string
 	labelValues []string
 	node        string
+	uid         string
 }
 
 // Creates a new KubeNodeLabelsMetric, implementation of prometheus.Metric
-func newKubeNodeLabelsMetric(node string, fqname string, labelNames []string, labelValues []string) KubeNodeLabelsMetric {
+func newKubeNodeLabelsMetric(node string, fqname string, labelNames []string, labelValues []string, uid string) KubeNodeLabelsMetric {
 	return KubeNodeLabelsMetric{
 		fqName:      fqname,
 		labelNames:  labelNames,
 		labelValues: labelValues,
 		help:        "kube_node_labels all labels for each node prefixed with label_",
 		node:        node,
+		uid:         uid,
 	}
 }
 
@@ -321,6 +343,7 @@ func newKubeNodeLabelsMetric(node string, fqname string, labelNames []string, la
 func (nam KubeNodeLabelsMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"node": nam.node,
+		"uid":  nam.uid,
 	}
 	return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
 }
@@ -343,6 +366,8 @@ func (nam KubeNodeLabelsMetric) Write(m *dto.Metric) error {
 
 	nodeString := "node"
 	labels = append(labels, &dto.LabelPair{Name: &nodeString, Value: &nam.node})
+	uidString := "uid"
+	labels = append(labels, &dto.LabelPair{Name: &uidString, Value: &nam.uid})
 	m.Label = labels
 	return nil
 }
@@ -359,10 +384,11 @@ type KubeNodeStatusConditionMetric struct {
 	condition string
 	status    string
 	value     float64
+	uid       string
 }
 
 // Creates a new KubeNodeStatusConditionMetric, implementation of prometheus.Metric
-func newKubeNodeStatusConditionMetric(node, fqname, condition, status string, value float64) KubeNodeStatusConditionMetric {
+func newKubeNodeStatusConditionMetric(node, fqname, condition, status string, value float64, uid string) KubeNodeStatusConditionMetric {
 	return KubeNodeStatusConditionMetric{
 		fqName:    fqname,
 		help:      "kube_node_status_condition condition status for nodes",
@@ -370,6 +396,7 @@ func newKubeNodeStatusConditionMetric(node, fqname, condition, status string, va
 		condition: condition,
 		status:    status,
 		value:     value,
+		uid:       uid,
 	}
 }
 
@@ -380,6 +407,7 @@ func (nam KubeNodeStatusConditionMetric) Desc() *prometheus.Desc {
 		"node":      nam.node,
 		"condition": nam.condition,
 		"status":    nam.status,
+		"uid":       nam.uid,
 	}
 	return prometheus.NewDesc(nam.fqName, nam.help, []string{}, l)
 }
@@ -403,6 +431,10 @@ func (nam KubeNodeStatusConditionMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("status"),
 			Value: &nam.status,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &nam.uid,
+		},
 	}
 	return nil
 }
@@ -439,10 +471,11 @@ type KubeNodeStatusAllocatableMetric struct {
 	unit     string
 	node     string
 	value    float64
+	uid      string
 }
 
 // Creates a new KubeNodeStatusAllocatableMetric, implementation of prometheus.Metric
-func newKubeNodeStatusAllocatableMetric(fqname, node, resource, unit string, value float64) KubeNodeStatusAllocatableMetric {
+func newKubeNodeStatusAllocatableMetric(fqname, node, resource, unit string, value float64, uid string) KubeNodeStatusAllocatableMetric {
 	return KubeNodeStatusAllocatableMetric{
 		fqName:   fqname,
 		help:     "kube_node_status_allocatable node allocatable",
@@ -450,6 +483,7 @@ func newKubeNodeStatusAllocatableMetric(fqname, node, resource, unit string, val
 		resource: resource,
 		unit:     unit,
 		value:    value,
+		uid:      uid,
 	}
 }
 
@@ -460,6 +494,7 @@ func (kpcrr KubeNodeStatusAllocatableMetric) Desc() *prometheus.Desc {
 		"node":     kpcrr.node,
 		"resource": kpcrr.resource,
 		"unit":     kpcrr.unit,
+		"uid":      kpcrr.uid,
 	}
 	return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
 }
@@ -483,6 +518,10 @@ func (kpcrr KubeNodeStatusAllocatableMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("unit"),
 			Value: &kpcrr.unit,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpcrr.uid,
+		},
 	}
 	return nil
 }
@@ -499,15 +538,17 @@ type KubeNodeStatusAllocatableCPUCoresMetric struct {
 	unit     string
 	node     string
 	value    float64
+	uid      string
 }
 
 // Creates a new KubeNodeStatusAllocatableCPUCoresMetric, implementation of prometheus.Metric
-func newKubeNodeStatusAllocatableCPUCoresMetric(fqname, node string, value float64) KubeNodeStatusAllocatableCPUCoresMetric {
+func newKubeNodeStatusAllocatableCPUCoresMetric(fqname, node string, value float64, uid string) KubeNodeStatusAllocatableCPUCoresMetric {
 	return KubeNodeStatusAllocatableCPUCoresMetric{
 		fqName: fqname,
 		help:   "kube_node_status_allocatable_cpu_cores node allocatable cpu cores",
 		node:   node,
 		value:  value,
+		uid:    uid,
 	}
 }
 
@@ -516,6 +557,7 @@ func newKubeNodeStatusAllocatableCPUCoresMetric(fqname, node string, value float
 func (kpcrr KubeNodeStatusAllocatableCPUCoresMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"node": kpcrr.node,
+		"uid":  kpcrr.uid,
 	}
 	return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
 }
@@ -531,6 +573,10 @@ func (kpcrr KubeNodeStatusAllocatableCPUCoresMetric) Write(m *dto.Metric) error
 			Name:  toStringPtr("node"),
 			Value: &kpcrr.node,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpcrr.uid,
+		},
 	}
 	return nil
 }
@@ -547,15 +593,17 @@ type KubeNodeStatusAllocatableMemoryBytesMetric struct {
 	unit     string
 	node     string
 	value    float64
+	uid      string
 }
 
 // Creates a new KubeNodeStatusAllocatableMemoryBytesMetric, implementation of prometheus.Metric
-func newKubeNodeStatusAllocatableMemoryBytesMetric(fqname, node string, value float64) KubeNodeStatusAllocatableMemoryBytesMetric {
+func newKubeNodeStatusAllocatableMemoryBytesMetric(fqname, node string, value float64, uid string) KubeNodeStatusAllocatableMemoryBytesMetric {
 	return KubeNodeStatusAllocatableMemoryBytesMetric{
 		fqName: fqname,
 		help:   "kube_node_status_allocatable_memory_bytes node allocatable memory in bytes",
 		node:   node,
 		value:  value,
+		uid:    uid,
 	}
 }
 
@@ -564,6 +612,7 @@ func newKubeNodeStatusAllocatableMemoryBytesMetric(fqname, node string, value fl
 func (kpcrr KubeNodeStatusAllocatableMemoryBytesMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"node": kpcrr.node,
+		"uid":  kpcrr.uid,
 	}
 	return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
 }
@@ -579,6 +628,10 @@ func (kpcrr KubeNodeStatusAllocatableMemoryBytesMetric) Write(m *dto.Metric) err
 			Name:  toStringPtr("node"),
 			Value: &kpcrr.node,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpcrr.uid,
+		},
 	}
 	return nil
 }

+ 454 - 0
pkg/metrics/nodemetrics_test.go

@@ -0,0 +1,454 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func TestKubeNodeCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name:            "all metrics enabled",
+			disabledMetrics: []string{},
+			expectedCount:   8,
+		},
+		{
+			name:            "capacity metric disabled",
+			disabledMetrics: []string{"kube_node_status_capacity"},
+			expectedCount:   7,
+		},
+		{
+			name:            "all metrics disabled",
+			disabledMetrics: []string{"kube_node_status_capacity", "kube_node_status_capacity_memory_bytes", "kube_node_status_capacity_cpu_cores", "kube_node_status_allocatable", "kube_node_status_allocatable_cpu_cores", "kube_node_status_allocatable_memory_bytes", "kube_node_labels", "kube_node_status_condition"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			nc := KubeNodeCollector{
+				KubeClusterCache: NewFakeNodeCache([]*clustercache.Node{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 10)
+			nc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestKubeNodeCollector_Collect(t *testing.T) {
+	tests := []struct {
+		name            string
+		nodes           []*clustercache.Node
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "single node with resources",
+			nodes: []*clustercache.Node{
+				{
+					UID:  types.UID("node-uid-1"),
+					Name: "node-1",
+					Labels: map[string]string{
+						"app": "test",
+					},
+					Status: v1.NodeStatus{
+						Capacity: v1.ResourceList{
+							v1.ResourceCPU:    resource.MustParse("4"),
+							v1.ResourceMemory: resource.MustParse("8Gi"),
+						},
+						Allocatable: v1.ResourceList{
+							v1.ResourceCPU:    resource.MustParse("3.8"),
+							v1.ResourceMemory: resource.MustParse("7.5Gi"),
+						},
+						Conditions: []v1.NodeCondition{
+							{
+								Type:   v1.NodeReady,
+								Status: v1.ConditionTrue,
+							},
+						},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   12, // 2 capacity + 2 capacity specific + 2 allocatable + 2 allocatable specific + 1 labels + 3 conditions
+		},
+		{
+			name: "multiple_nodes",
+			nodes: []*clustercache.Node{
+				{
+					Name:   "node-1",
+					Labels: map[string]string{}, // Empty labels to avoid label metrics
+					Status: v1.NodeStatus{
+						Capacity: v1.ResourceList{
+							v1.ResourceCPU:    resource.MustParse("4"),
+							v1.ResourceMemory: resource.MustParse("8Gi"),
+						},
+						Allocatable: v1.ResourceList{
+							v1.ResourceCPU:    resource.MustParse("3"),
+							v1.ResourceMemory: resource.MustParse("7Gi"),
+						},
+						Conditions: []v1.NodeCondition{}, // Empty conditions to avoid condition metrics
+					},
+					UID: types.UID("test-node-1-uid"),
+				},
+				{
+					Name:   "node-2",
+					Labels: map[string]string{}, // Empty labels to avoid label metrics
+					Status: v1.NodeStatus{
+						Capacity: v1.ResourceList{
+							v1.ResourceCPU:    resource.MustParse("4"),
+							v1.ResourceMemory: resource.MustParse("8Gi"),
+						},
+						Allocatable: v1.ResourceList{
+							v1.ResourceCPU:    resource.MustParse("3"),
+							v1.ResourceMemory: resource.MustParse("7Gi"),
+						},
+						Conditions: []v1.NodeCondition{}, // Empty conditions to avoid condition metrics
+					},
+					UID: types.UID("test-node-2-uid"),
+				},
+			},
+
+			expectedCount: 18, // 9 metrics per node × 2 nodes
+		},
+		{
+			name:            "no nodes",
+			nodes:           []*clustercache.Node{},
+			disabledMetrics: []string{},
+			expectedCount:   0,
+		},
+		{
+			name: "metrics disabled",
+			nodes: []*clustercache.Node{
+				{
+					UID:  types.UID("node-uid-1"),
+					Name: "node-1",
+					Status: v1.NodeStatus{
+						Capacity: v1.ResourceList{
+							v1.ResourceCPU: resource.MustParse("2"),
+						},
+					},
+				},
+			},
+			disabledMetrics: []string{"kube_node_status_capacity", "kube_node_status_capacity_cpu_cores", "kube_node_labels"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			nc := KubeNodeCollector{
+				KubeClusterCache: NewFakeNodeCache(tt.nodes),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 20)
+			nc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestKubeNodeStatusCapacityMetric(t *testing.T) {
+	metric := newKubeNodeStatusCapacityMetric("kube_node_status_capacity", "test-node", "cpu", "core", "test-uid", 4.0)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 4.0 {
+		t.Errorf("Expected gauge value 4.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"node":     "test-node",
+		"resource": "cpu",
+		"unit":     "core",
+		"uid":      "test-uid",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Expected label %s=%s, got %s=%s", key, expectedValue, key, actualValue)
+		}
+	}
+}
+
+func TestKubeNodeLabelsMetric(t *testing.T) {
+	labelNames := []string{"app", "version"}
+	labelValues := []string{"test-app", "v1.0"}
+	uid := "test-uid"
+
+	metric := newKubeNodeLabelsMetric("test-node", "kube_node_labels", labelNames, labelValues, uid)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"app":     "test-app",
+		"version": "v1.0",
+		"node":    "test-node",
+		"uid":     uid,
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Expected label %s=%s, got %s=%s", key, expectedValue, key, actualValue)
+		}
+	}
+}
+
+func TestKubeNodeStatusConditionMetric(t *testing.T) {
+	metric := newKubeNodeStatusConditionMetric("test-node", "kube_node_status_condition", "Ready", "true", 1.0, "test-uid")
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"node":      "test-node",
+		"condition": "Ready",
+		"status":    "true",
+		"uid":       "test-uid",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Expected label %s=%s, got %s=%s", key, expectedValue, key, actualValue)
+		}
+	}
+}
+
+func TestKubeNodeStatusCapacityMemoryBytesMetric(t *testing.T) {
+	metric := newKubeNodeStatusCapacityMemoryBytesMetric("kube_node_status_capacity_memory_bytes", "test-node", "test-uid", 8589934592.0)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 8589934592.0 {
+		t.Errorf("Expected gauge value 8589934592.0, got %f", *dtoMetric.Gauge.Value)
+	}
+}
+
+func TestKubeNodeStatusCapacityCPUCoresMetric(t *testing.T) {
+	metric := newKubeNodeStatusCapacityCPUCoresMetric("kube_node_status_capacity_cpu_cores", "test-node", "test-uid", 4.0)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 4.0 {
+		t.Errorf("Expected gauge value 4.0, got %f", *dtoMetric.Gauge.Value)
+	}
+}
+
+func TestGetConditions(t *testing.T) {
+	tests := []struct {
+		name           string
+		status         v1.ConditionStatus
+		expectedValues map[string]float64
+	}{
+		{
+			name:   "condition true",
+			status: v1.ConditionTrue,
+			expectedValues: map[string]float64{
+				"true":    1.0,
+				"false":   0.0,
+				"unknown": 0.0,
+			},
+		},
+		{
+			name:   "condition false",
+			status: v1.ConditionFalse,
+			expectedValues: map[string]float64{
+				"true":    0.0,
+				"false":   1.0,
+				"unknown": 0.0,
+			},
+		},
+		{
+			name:   "condition unknown",
+			status: v1.ConditionUnknown,
+			expectedValues: map[string]float64{
+				"true":    0.0,
+				"false":   0.0,
+				"unknown": 1.0,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			conditions := getConditions(tt.status)
+
+			if len(conditions) != 3 {
+				t.Errorf("Expected 3 conditions, got %d", len(conditions))
+			}
+
+			actualValues := make(map[string]float64)
+			for _, cond := range conditions {
+				actualValues[cond.status] = cond.value
+			}
+
+			for status, expectedValue := range tt.expectedValues {
+				if actualValue, ok := actualValues[status]; !ok {
+					t.Errorf("Missing status %s", status)
+				} else if actualValue != expectedValue {
+					t.Errorf("Expected status %s=%f, got %f", status, expectedValue, actualValue)
+				}
+			}
+		})
+	}
+}
+
+// FakeNodeCache implements ClusterCache interface for testing
+type FakeNodeCache struct {
+	clustercache.ClusterCache
+	nodes []*clustercache.Node
+}
+
+func (f FakeNodeCache) GetAllNodes() []*clustercache.Node {
+	return f.nodes
+}
+
+func NewFakeNodeCache(nodes []*clustercache.Node) FakeNodeCache {
+	return FakeNodeCache{
+		nodes: nodes,
+	}
+}

+ 10 - 2
pkg/metrics/podmetrics.go

@@ -44,11 +44,12 @@ func (kpmc KubecostPodCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, pod := range pods {
 		podName := pod.Name
 		podNS := pod.Namespace
+		podUID := string(pod.UID)
 
 		// Pod Annotations
 		labels, values := promutil.KubeAnnotationsToLabels(pod.Annotations)
 		if len(labels) > 0 {
-			ch <- newPodAnnotationMetric("kube_pod_annotations", podNS, podName, labels, values)
+			ch <- newPodAnnotationMetric("kube_pod_annotations", podNS, podName, podUID, labels, values)
 		}
 	}
 
@@ -258,17 +259,19 @@ type PodAnnotationsMetric struct {
 	help        string
 	namespace   string
 	pod         string
+	uid         string
 	labelNames  []string
 	labelValues []string
 }
 
 // Creates a new PodAnnotationsMetric, implementation of prometheus.Metric
-func newPodAnnotationMetric(fqname, namespace, pod string, labelNames, labelValues []string) PodAnnotationsMetric {
+func newPodAnnotationMetric(fqname, namespace, pod string, uid string, labelNames, labelValues []string) PodAnnotationsMetric {
 	return PodAnnotationsMetric{
 		fqName:      fqname,
 		help:        "kube_pod_annotations Pod Annotations",
 		namespace:   namespace,
 		pod:         pod,
+		uid:         uid,
 		labelNames:  labelNames,
 		labelValues: labelValues,
 	}
@@ -280,6 +283,7 @@ func (pam PodAnnotationsMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"namespace": pam.namespace,
 		"pod":       pam.pod,
+		"uid":       pam.uid,
 	}
 	return prometheus.NewDesc(pam.fqName, pam.help, []string{}, l)
 }
@@ -307,6 +311,10 @@ func (pam PodAnnotationsMetric) Write(m *dto.Metric) error {
 		&dto.LabelPair{
 			Name:  toStringPtr("pod"),
 			Value: &pam.pod,
+		},
+		&dto.LabelPair{
+			Name:  toStringPtr("uid"),
+			Value: &pam.uid,
 		})
 	m.Label = labels
 	return nil

+ 862 - 0
pkg/metrics/podmetrics_test.go

@@ -0,0 +1,862 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func TestKubecostPodCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectMetric    bool
+	}{
+		{
+			name:            "annotations enabled",
+			disabledMetrics: []string{},
+			expectMetric:    true,
+		},
+		{
+			name:            "annotations disabled",
+			disabledMetrics: []string{"kube_pod_annotations"},
+			expectMetric:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kpc := KubecostPodCollector{
+				KubeClusterCache: NewFakePodCache([]*clustercache.Pod{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 10)
+			kpc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if tt.expectMetric && count == 0 {
+				t.Error("Expected metric description but got none")
+			}
+			if !tt.expectMetric && count > 0 {
+				t.Error("Expected no metric description but got some")
+			}
+		})
+	}
+}
+
+func TestKubecostPodCollector_Collect(t *testing.T) {
+	tests := []struct {
+		name            string
+		pods            []*clustercache.Pod
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "pod with annotations",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-1"),
+					Name:      "test-pod",
+					Namespace: "default",
+					Annotations: map[string]string{
+						"prometheus.io/scrape": "true",
+						"prometheus.io/port":   "8080",
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1,
+		},
+		{
+			name: "pod without annotations",
+			pods: []*clustercache.Pod{
+				{
+					UID:         types.UID("pod-uid-2"),
+					Name:        "empty-pod",
+					Namespace:   "default",
+					Annotations: map[string]string{},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   0,
+		},
+		{
+			name: "multiple pods with mixed annotations",
+			pods: []*clustercache.Pod{
+				{
+					UID:         types.UID("pod-uid-3"),
+					Name:        "pod1",
+					Namespace:   "ns1",
+					Annotations: map[string]string{"key": "value"},
+				},
+				{
+					UID:         types.UID("pod-uid-4"),
+					Name:        "pod2",
+					Namespace:   "ns1",
+					Annotations: map[string]string{},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1,
+		},
+		{
+			name: "metric disabled",
+			pods: []*clustercache.Pod{
+				{
+					UID:         types.UID("pod-uid-5"),
+					Name:        "test-pod",
+					Namespace:   "default",
+					Annotations: map[string]string{"test": "annotation"},
+				},
+			},
+			disabledMetrics: []string{"kube_pod_annotations"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kpc := KubecostPodCollector{
+				KubeClusterCache: NewFakePodCache(tt.pods),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 10)
+			kpc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestPodAnnotationMetric(t *testing.T) {
+	labelNames := []string{"annotation_key1", "annotation_key2"}
+	labelValues := []string{"value1", "value2"}
+
+	metric := newPodAnnotationMetric("kube_pod_annotations", "test-ns", "test-pod", "test-uid", labelNames, labelValues)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"annotation_key1": "value1",
+		"annotation_key2": "value2",
+		"namespace":       "test-ns",
+		"pod":             "test-pod",
+		"uid":             "test-uid",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestKubePodCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name:            "all metrics enabled",
+			disabledMetrics: []string{},
+			expectedCount:   10,
+		},
+		{
+			name: "some metrics disabled",
+			disabledMetrics: []string{
+				"kube_pod_labels",
+				"kube_pod_owner",
+				"kube_pod_container_status_running",
+			},
+			expectedCount: 7,
+		},
+		{
+			name: "all metrics disabled",
+			disabledMetrics: []string{
+				"kube_pod_labels",
+				"kube_pod_owner",
+				"kube_pod_container_status_running",
+				"kube_pod_container_status_terminated_reason",
+				"kube_pod_container_status_restarts_total",
+				"kube_pod_container_resource_requests",
+				"kube_pod_container_resource_limits",
+				"kube_pod_container_resource_limits_cpu_cores",
+				"kube_pod_container_resource_limits_memory_bytes",
+				"kube_pod_status_phase",
+			},
+			expectedCount: 0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kpc := KubePodCollector{
+				KubeClusterCache: NewFakePodCache([]*clustercache.Pod{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 15)
+			kpc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestKubePodCollector_Collect(t *testing.T) {
+	boolTrue := true
+	tests := []struct {
+		name            string
+		pods            []*clustercache.Pod
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "pod with all features",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-1"),
+					Name:      "test-pod",
+					Namespace: "default",
+					Labels: map[string]string{
+						"app":     "test",
+						"version": "v1",
+					},
+					OwnerReferences: []metav1.OwnerReference{
+						{
+							Name:       "test-deployment",
+							Kind:       "Deployment",
+							Controller: &boolTrue,
+						},
+					},
+					Status: clustercache.PodStatus{
+						Phase: v1.PodRunning,
+						ContainerStatuses: []v1.ContainerStatus{
+							{
+								Name:         "container1",
+								RestartCount: 2,
+								State: v1.ContainerState{
+									Running: &v1.ContainerStateRunning{},
+								},
+							},
+						},
+					},
+					Spec: clustercache.PodSpec{
+						NodeName: "node1",
+						Containers: []clustercache.Container{
+							{
+								Name: "container1",
+								Resources: v1.ResourceRequirements{
+									Requests: v1.ResourceList{
+										v1.ResourceCPU:    resource.MustParse("100m"),
+										v1.ResourceMemory: resource.MustParse("128Mi"),
+									},
+									Limits: v1.ResourceList{
+										v1.ResourceCPU:    resource.MustParse("200m"),
+										v1.ResourceMemory: resource.MustParse("256Mi"),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   15, // 5 phases + 1 labels + 1 owner + 1 restarts + 1 running + 2 requests + 4 limits
+		},
+		{
+			name: "pod without containers",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-2"),
+					Name:      "empty-pod",
+					Namespace: "default",
+					Labels:    map[string]string{"test": "label"},
+					Status: clustercache.PodStatus{
+						Phase: v1.PodPending,
+					},
+					Spec: clustercache.PodSpec{
+						Containers: []clustercache.Container{},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   6, // 5 phases + 1 labels
+		},
+		{
+			name: "pod with terminated container",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-3"),
+					Name:      "terminated-pod",
+					Namespace: "default",
+					Labels:    map[string]string{},
+					Status: clustercache.PodStatus{
+						Phase: v1.PodFailed,
+						ContainerStatuses: []v1.ContainerStatus{
+							{
+								Name:         "failed-container",
+								RestartCount: 5,
+								State: v1.ContainerState{
+									Terminated: &v1.ContainerStateTerminated{
+										Reason: "OOMKilled",
+									},
+								},
+							},
+						},
+					},
+					Spec: clustercache.PodSpec{
+						Containers: []clustercache.Container{
+							{
+								Name:      "failed-container",
+								Resources: v1.ResourceRequirements{},
+							},
+						},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   8, // 5 phases + 1 labels + 1 restarts + 1 terminated reason
+		},
+		{
+			name: "pod without phase",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-4"),
+					Name:      "no-phase-pod",
+					Namespace: "default",
+					Labels:    map[string]string{"app": "test"},
+					Status: clustercache.PodStatus{
+						Phase: "", // Empty phase
+					},
+					Spec: clustercache.PodSpec{},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1, // Only labels
+		},
+		{
+			name: "multiple containers",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-5"),
+					Name:      "multi-container-pod",
+					Namespace: "default",
+					Labels:    map[string]string{},
+					Status: clustercache.PodStatus{
+						Phase: v1.PodRunning,
+						ContainerStatuses: []v1.ContainerStatus{
+							{
+								Name:         "container1",
+								RestartCount: 0,
+								State: v1.ContainerState{
+									Running: &v1.ContainerStateRunning{},
+								},
+							},
+							{
+								Name:         "container2",
+								RestartCount: 1,
+								State: v1.ContainerState{
+									Running: &v1.ContainerStateRunning{},
+								},
+							},
+						},
+					},
+					Spec: clustercache.PodSpec{
+						NodeName: "node2",
+						Containers: []clustercache.Container{
+							{
+								Name: "container1",
+								Resources: v1.ResourceRequirements{
+									Requests: v1.ResourceList{
+										v1.ResourceCPU: resource.MustParse("50m"),
+									},
+									Limits: v1.ResourceList{
+										v1.ResourceCPU: resource.MustParse("100m"),
+									},
+								},
+							},
+							{
+								Name: "container2",
+								Resources: v1.ResourceRequirements{
+									Requests: v1.ResourceList{
+										v1.ResourceMemory: resource.MustParse("64Mi"),
+									},
+									Limits: v1.ResourceList{
+										v1.ResourceMemory: resource.MustParse("128Mi"),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   16, // 5 phases + 1 labels + 2 restarts + 2 running + 2 requests + 4 limits
+		},
+		{
+			name: "metrics disabled",
+			pods: []*clustercache.Pod{
+				{
+					UID:       types.UID("pod-uid-6"),
+					Name:      "test-pod",
+					Namespace: "default",
+					Labels:    map[string]string{"app": "test"},
+					Status: clustercache.PodStatus{
+						Phase: v1.PodRunning,
+					},
+					Spec: clustercache.PodSpec{},
+				},
+			},
+			disabledMetrics: []string{"kube_pod_labels", "kube_pod_status_phase"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			kpc := KubePodCollector{
+				KubeClusterCache: NewFakePodCache(tt.pods),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 30)
+			kpc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestKubePodLabelsMetric(t *testing.T) {
+	labelNames := []string{"label_app", "label_env"}
+	labelValues := []string{"webapp", "production"}
+
+	metric := newKubePodLabelsMetric("kube_pod_labels", "prod", "web-pod", "pod-uid", labelNames, labelValues)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"label_app": "webapp",
+		"label_env": "production",
+		"namespace": "prod",
+		"pod":       "web-pod",
+		"uid":       "pod-uid",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestKubePodContainerStatusRestartsTotalMetric(t *testing.T) {
+	metric := newKubePodContainerStatusRestartsTotalMetric("kube_pod_container_status_restarts_total", "default", "test-pod", "pod-uid", "app-container", 3.0)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Counter == nil {
+		t.Error("Expected counter metric")
+	}
+
+	if *dtoMetric.Counter.Value != 3.0 {
+		t.Errorf("Expected counter value 3.0, got %f", *dtoMetric.Counter.Value)
+	}
+}
+
+func TestKubePodContainerStatusTerminatedReasonMetric(t *testing.T) {
+	metric := newKubePodContainerStatusTerminatedReasonMetric("kube_pod_container_status_terminated_reason", "default", "crashed-pod", "pod-uid", "failing-container", "Error")
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Check for reason label
+	hasReason := false
+	for _, label := range dtoMetric.Label {
+		if *label.Name == "reason" && *label.Value == "Error" {
+			hasReason = true
+			break
+		}
+	}
+	if !hasReason {
+		t.Error("Expected reason label with value 'Error'")
+	}
+}
+
+func TestKubePodStatusPhaseMetric(t *testing.T) {
+	metric := newKubePodStatusPhaseMetric("kube_pod_status_phase", "default", "test-pod", "pod-uid", "Running", 1.0)
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	// Check phase label
+	hasPhase := false
+	for _, label := range dtoMetric.Label {
+		if *label.Name == "phase" && *label.Value == "Running" {
+			hasPhase = true
+			break
+		}
+	}
+	if !hasPhase {
+		t.Error("Expected phase label with value 'Running'")
+	}
+}
+
+func TestKubePodContainerStatusRunningMetric(t *testing.T) {
+	metric := newKubePodContainerStatusRunningMetric("kube_pod_container_status_running", "default", "running-pod", "pod-uid", "web-container")
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+}
+
+func TestKubePodContainerResourceRequestsMetric(t *testing.T) {
+	metric := newKubePodContainerResourceRequestsMetric("kube_pod_container_resource_requests", "default", "test-pod", "pod-uid", "container1", "node1", "cpu", "core", 0.1)
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 0.1 {
+		t.Errorf("Expected gauge value 0.1, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify all labels
+	expectedLabels := map[string]string{
+		"namespace": "default",
+		"pod":       "test-pod",
+		"container": "container1",
+		"uid":       "pod-uid",
+		"node":      "node1",
+		"resource":  "cpu",
+		"unit":      "core",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestKubePodContainerResourceLimitsMetric(t *testing.T) {
+	metric := newKubePodContainerResourceLimitsMetric("kube_pod_container_resource_limits", "default", "test-pod", "pod-uid", "container1", "node1", "memory", "byte", 268435456)
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 268435456 {
+		t.Errorf("Expected gauge value 268435456, got %f", *dtoMetric.Gauge.Value)
+	}
+}
+
+func TestKubePodContainerResourceLimitsCPUCoresMetric(t *testing.T) {
+	metric := newKubePodContainerResourceLimitsCPUCoresMetric("kube_pod_container_resource_limits_cpu_cores", "default", "test-pod", "pod-uid", "container1", "node1", 2.0)
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 2.0 {
+		t.Errorf("Expected gauge value 2.0, got %f", *dtoMetric.Gauge.Value)
+	}
+}
+
+func TestKubePodContainerResourceLimitsMemoryBytesMetric(t *testing.T) {
+	metric := newKubePodContainerResourceLimitsMemoryBytesMetric("kube_pod_container_resource_limits_memory_bytes", "default", "test-pod", "pod-uid", "container1", "node1", 536870912)
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 536870912 {
+		t.Errorf("Expected gauge value 536870912, got %f", *dtoMetric.Gauge.Value)
+	}
+}
+
+func TestKubePodOwnerMetric(t *testing.T) {
+	metric := newKubePodOwnerMetric("kube_pod_owner", "default", "test-pod", "test-replicaset", "ReplicaSet", true)
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify owner-specific labels
+	expectedLabels := map[string]string{
+		"namespace":           "default",
+		"pod":                 "test-pod",
+		"owner_name":          "test-replicaset",
+		"owner_kind":          "ReplicaSet",
+		"owner_is_controller": "true",
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestPodPhaseMetrics(t *testing.T) {
+	// Test that all pod phases generate correct metrics
+	pod := &clustercache.Pod{
+		UID:       types.UID("phase-test-uid"),
+		Name:      "phase-test-pod",
+		Namespace: "default",
+		Labels:    map[string]string{},
+		Status: clustercache.PodStatus{
+			Phase: v1.PodRunning,
+		},
+		Spec: clustercache.PodSpec{},
+	}
+
+	mc := MetricsConfig{
+		DisabledMetrics: []string{"kube_pod_labels"}, // Only test phase metrics
+	}
+	kpc := KubePodCollector{
+		KubeClusterCache: NewFakePodCache([]*clustercache.Pod{pod}),
+		metricsConfig:    mc,
+	}
+
+	ch := make(chan prometheus.Metric, 10)
+	kpc.Collect(ch)
+	close(ch)
+
+	phaseMetrics := make(map[string]float64)
+	for metric := range ch {
+		var dtoMetric dto.Metric
+		metric.Write(&dtoMetric)
+
+		for _, label := range dtoMetric.Label {
+			if *label.Name == "phase" {
+				phaseMetrics[*label.Value] = *dtoMetric.Gauge.Value
+			}
+		}
+	}
+
+	// Verify all phases are emitted
+	expectedPhases := map[string]float64{
+		"Pending":   0.0,
+		"Succeeded": 0.0,
+		"Failed":    0.0,
+		"Unknown":   0.0,
+		"Running":   1.0, // Only Running should be 1
+	}
+
+	for phase, expectedValue := range expectedPhases {
+		if actualValue, ok := phaseMetrics[phase]; !ok {
+			t.Errorf("Missing phase metric for %s", phase)
+		} else if actualValue != expectedValue {
+			t.Errorf("Phase %s: expected value %f, got %f", phase, expectedValue, actualValue)
+		}
+	}
+}
+
+// FakePodCache implements ClusterCache interface for testing
+type FakePodCache struct {
+	clustercache.ClusterCache
+	pods []*clustercache.Pod
+}
+
+func (f FakePodCache) GetAllPods() []*clustercache.Pod {
+	return f.pods
+}
+
+func NewFakePodCache(pods []*clustercache.Pod) FakePodCache {
+	return FakePodCache{
+		pods: pods,
+	}
+}

+ 19 - 4
pkg/metrics/pvcmetrics.go

@@ -37,14 +37,15 @@ func (kpvc KubePVCCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, pvc := range pvcs {
 		storageClass := getPersistentVolumeClaimClass(pvc)
 		volume := pvc.Spec.VolumeName
+		pvcUID := string(pvc.UID)
 
 		if _, disabled := disabledMetrics["kube_persistentvolumeclaim_info"]; !disabled {
-			ch <- newKubePVCInfoMetric("kube_persistentvolumeclaim_info", pvc.Name, pvc.Namespace, storageClass, volume)
+			ch <- newKubePVCInfoMetric("kube_persistentvolumeclaim_info", pvc.Name, pvc.Namespace, pvcUID, storageClass, volume)
 		}
 
 		if storage, ok := pvc.Spec.Resources.Requests[v1.ResourceStorage]; ok {
 			if _, disabled := disabledMetrics["kube_persistentvolumeclaim_resource_requests_storage_bytes"]; !disabled {
-				ch <- newKubePVCResourceRequestsStorageBytesMetric("kube_persistentvolumeclaim_resource_requests_storage_bytes", pvc.Name, pvc.Namespace, float64(storage.Value()))
+				ch <- newKubePVCResourceRequestsStorageBytesMetric("kube_persistentvolumeclaim_resource_requests_storage_bytes", pvc.Name, pvc.Namespace, pvcUID, float64(storage.Value()))
 			}
 		}
 	}
@@ -61,16 +62,18 @@ type KubePVCResourceRequestsStorageBytesMetric struct {
 	namespace string
 	pvc       string
 	value     float64
+	uid       string
 }
 
 // Creates a new KubePVCResourceRequestsStorageBytesMetric, implementation of prometheus.Metric
-func newKubePVCResourceRequestsStorageBytesMetric(fqname, pvc, namespace string, value float64) KubePVCResourceRequestsStorageBytesMetric {
+func newKubePVCResourceRequestsStorageBytesMetric(fqname, pvc, namespace, uid string, value float64) KubePVCResourceRequestsStorageBytesMetric {
 	return KubePVCResourceRequestsStorageBytesMetric{
 		fqName:    fqname,
 		help:      "kube_persistentvolumeclaim_resource_requests_storage_bytes pvc storage resource requests in bytes",
 		pvc:       pvc,
 		namespace: namespace,
 		value:     value,
+		uid:       uid,
 	}
 }
 
@@ -80,6 +83,7 @@ func (kpvcrr KubePVCResourceRequestsStorageBytesMetric) Desc() *prometheus.Desc
 	l := prometheus.Labels{
 		"persistentvolumeclaim": kpvcrr.pvc,
 		"namespace":             kpvcrr.namespace,
+		"uid":                   kpvcrr.uid,
 	}
 	return prometheus.NewDesc(kpvcrr.fqName, kpvcrr.help, []string{}, l)
 }
@@ -100,6 +104,10 @@ func (kpvcrr KubePVCResourceRequestsStorageBytesMetric) Write(m *dto.Metric) err
 			Name:  toStringPtr("namespace"),
 			Value: &kpvcrr.namespace,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpvcrr.uid,
+		},
 	}
 	return nil
 }
@@ -116,10 +124,11 @@ type KubePVCInfoMetric struct {
 	pvc          string
 	storageclass string
 	volume       string
+	uid          string
 }
 
 // Creates a new KubePVCInfoMetric, implementation of prometheus.Metric
-func newKubePVCInfoMetric(fqname, pvc, namespace, storageclass, volume string) KubePVCInfoMetric {
+func newKubePVCInfoMetric(fqname, pvc, namespace, uid, storageclass, volume string) KubePVCInfoMetric {
 	return KubePVCInfoMetric{
 		fqName:       fqname,
 		help:         "kube_persistentvolumeclaim_info pvc storage resource requests in bytes",
@@ -127,6 +136,7 @@ func newKubePVCInfoMetric(fqname, pvc, namespace, storageclass, volume string) K
 		namespace:    namespace,
 		storageclass: storageclass,
 		volume:       volume,
+		uid:          uid,
 	}
 }
 
@@ -138,6 +148,7 @@ func (kpvcrr KubePVCInfoMetric) Desc() *prometheus.Desc {
 		"namespace":             kpvcrr.namespace,
 		"storageclass":          kpvcrr.storageclass,
 		"volumename":            kpvcrr.volume,
+		"uid":                   kpvcrr.uid,
 	}
 	return prometheus.NewDesc(kpvcrr.fqName, kpvcrr.help, []string{}, l)
 }
@@ -167,6 +178,10 @@ func (kpvci KubePVCInfoMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("volumename"),
 			Value: &kpvci.volume,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpvci.uid,
+		},
 	}
 	return nil
 }

+ 130 - 0
pkg/metrics/pvcmetrics_test.go

@@ -0,0 +1,130 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func collectMetrics(collector KubePVCCollector) []prometheus.Metric {
+	ch := make(chan prometheus.Metric, 10)
+	go func() {
+		defer close(ch)
+		collector.Collect(ch)
+	}()
+
+	var metrics []prometheus.Metric
+	for metric := range ch {
+		metrics = append(metrics, metric)
+	}
+	return metrics
+}
+
+
+func TestKubePVCCollector_Describe(t *testing.T) {
+	collector := KubePVCCollector{metricsConfig: MetricsConfig{}}
+	ch := make(chan *prometheus.Desc, 5)
+	go func() {
+		defer close(ch)
+		collector.Describe(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 2 {
+		t.Errorf("Expected 2 metrics described, got %d", count)
+	}
+}
+
+
+func TestKubePVCCollector_Collect(t *testing.T) {
+	storageSize := resource.MustParse("1Gi")
+	pvc := &clustercache.PersistentVolumeClaim{
+		UID:       types.UID("test-uid"),
+		Name:      "test-pvc",
+		Namespace: "default",
+		Spec: v1.PersistentVolumeClaimSpec{
+			Resources: v1.VolumeResourceRequirements{
+				Requests: v1.ResourceList{v1.ResourceStorage: storageSize},
+			},
+		},
+	}
+
+	cache := NewFakePVCCache([]*clustercache.PersistentVolumeClaim{pvc})
+	collector := KubePVCCollector{
+		KubeClusterCache: cache,
+		metricsConfig:    MetricsConfig{},
+	}
+
+	metrics := collectMetrics(collector)
+	if len(metrics) != 2 {
+		t.Errorf("Expected 2 metrics, got %d", len(metrics))
+	}
+
+	// Verify UID label exists in metrics
+	for _, metric := range metrics {
+		var m dto.Metric
+		if err := metric.Write(&m); err != nil {
+			t.Errorf("Error writing metric: %v", err)
+		}
+
+		hasUID := false
+		for _, label := range m.Label {
+			if *label.Name == "uid" && *label.Value == "test-uid" {
+				hasUID = true
+				break
+			}
+		}
+		if !hasUID {
+			t.Error("Metric missing UID label")
+		}
+	}
+}
+
+
+func TestKubePVCMetrics_UIDLabel(t *testing.T) {
+	metric := newKubePVCResourceRequestsStorageBytesMetric(
+		"test_metric", "test-pvc", "test-namespace", "test-uid", 1000.0,
+	)
+
+	var m dto.Metric
+	if err := metric.Write(&m); err != nil {
+		t.Fatalf("Error writing metric: %v", err)
+	}
+
+	// Verify UID label exists
+	for _, label := range m.Label {
+		if *label.Name == "uid" && *label.Value == "test-uid" {
+			return
+		}
+	}
+	t.Error("UID label not found in metric")
+}
+
+
+
+
+
+
+type FakePVCCache struct {
+	clustercache.ClusterCache
+	pvcs []*clustercache.PersistentVolumeClaim
+}
+
+func (f FakePVCCache) GetAllPersistentVolumeClaims() []*clustercache.PersistentVolumeClaim {
+	return f.pvcs
+}
+
+func NewFakePVCCache(pvcs []*clustercache.PersistentVolumeClaim) FakePVCCache {
+	return FakePVCCache{
+		pvcs: pvcs,
+	}
+}

+ 30 - 7
pkg/metrics/pvmetrics.go

@@ -39,6 +39,8 @@ func (kpvcb KubePVCollector) Collect(ch chan<- prometheus.Metric) {
 	disabledMetrics := kpvcb.metricsConfig.GetDisabledMetricsMap()
 
 	for _, pv := range pvs {
+		pvUID := string(pv.UID)
+
 		if _, disabled := disabledMetrics["kube_persistentvolume_status_phase"]; !disabled {
 			phase := pv.Status.Phase
 			if phase != "" {
@@ -54,14 +56,14 @@ func (kpvcb KubePVCollector) Collect(ch chan<- prometheus.Metric) {
 				}
 
 				for _, p := range phases {
-					ch <- newKubePVStatusPhaseMetric("kube_persistentvolume_status_phase", pv.Name, p.n, boolFloat64(p.v))
+					ch <- newKubePVStatusPhaseMetric("kube_persistentvolume_status_phase", pv.Name, pvUID, p.n, boolFloat64(p.v))
 				}
 			}
 		}
 
 		if _, disabled := disabledMetrics["kube_persistentvolume_capacity_bytes"]; !disabled {
 			storage := pv.Spec.Capacity[v1.ResourceStorage]
-			m := newKubePVCapacityBytesMetric("kube_persistentvolume_capacity_bytes", pv.Name, float64(storage.Value()))
+			m := newKubePVCapacityBytesMetric("kube_persistentvolume_capacity_bytes", pv.Name, pvUID, float64(storage.Value()))
 			ch <- m
 		}
 
@@ -72,7 +74,7 @@ func (kpvcb KubePVCollector) Collect(ch chan<- prometheus.Metric) {
 			if pv.Spec.CSI != nil && pv.Spec.CSI.VolumeHandle != "" {
 				providerID = pv.Spec.CSI.VolumeHandle
 			}
-			m := newKubecostPVInfoMetric("kubecost_pv_info", pv.Name, storageClass, providerID, float64(1))
+			m := newKubecostPVInfoMetric("kubecost_pv_info", pv.Name, pvUID, storageClass, providerID, float64(1))
 			ch <- m
 		}
 	}
@@ -88,15 +90,17 @@ type KubePVCapacityBytesMetric struct {
 	help   string
 	pv     string
 	value  float64
+	uid    string
 }
 
 // Creates a new KubePVCapacityBytesMetric, implementation of prometheus.Metric
-func newKubePVCapacityBytesMetric(fqname, pv string, value float64) KubePVCapacityBytesMetric {
+func newKubePVCapacityBytesMetric(fqname, pv, uid string, value float64) KubePVCapacityBytesMetric {
 	return KubePVCapacityBytesMetric{
 		fqName: fqname,
 		help:   "kube_persistentvolume_capacity_bytes pv storage capacity in bytes",
 		pv:     pv,
 		value:  value,
+		uid:    uid,
 	}
 }
 
@@ -105,6 +109,7 @@ func newKubePVCapacityBytesMetric(fqname, pv string, value float64) KubePVCapaci
 func (kpcrr KubePVCapacityBytesMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"persistentvolume": kpcrr.pv,
+		"uid":              kpcrr.uid,
 	}
 	return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
 }
@@ -121,6 +126,10 @@ func (kpcrr KubePVCapacityBytesMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("persistentvolume"),
 			Value: &kpcrr.pv,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpcrr.uid,
+		},
 	}
 	return nil
 }
@@ -136,16 +145,18 @@ type KubePVStatusPhaseMetric struct {
 	pv     string
 	phase  string
 	value  float64
+	uid    string
 }
 
-// Creates a new KubePVCapacityBytesMetric, implementation of prometheus.Metric
-func newKubePVStatusPhaseMetric(fqname, pv, phase string, value float64) KubePVStatusPhaseMetric {
+// Creates a new KubePVStatusPhaseMetric, implementation of prometheus.Metric
+func newKubePVStatusPhaseMetric(fqname, pv, uid, phase string, value float64) KubePVStatusPhaseMetric {
 	return KubePVStatusPhaseMetric{
 		fqName: fqname,
 		help:   "kube_persistentvolume_status_phase pv status phase",
 		pv:     pv,
 		phase:  phase,
 		value:  value,
+		uid:    uid,
 	}
 }
 
@@ -155,6 +166,7 @@ func (kpcrr KubePVStatusPhaseMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"persistentvolume": kpcrr.pv,
 		"phase":            kpcrr.phase,
+		"uid":              kpcrr.uid,
 	}
 	return prometheus.NewDesc(kpcrr.fqName, kpcrr.help, []string{}, l)
 }
@@ -175,6 +187,10 @@ func (kpcrr KubePVStatusPhaseMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("phase"),
 			Value: &kpcrr.phase,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpcrr.uid,
+		},
 	}
 	return nil
 }
@@ -192,10 +208,11 @@ type KubecostPVInfoMetric struct {
 	storageClass string
 	value        float64
 	providerId   string
+	uid          string
 }
 
 // Creates a new newKubecostPVInfoMetric, implementation of prometheus.Metric
-func newKubecostPVInfoMetric(fqname, pv, storageClass, providerID string, value float64) KubecostPVInfoMetric {
+func newKubecostPVInfoMetric(fqname, pv, uid, storageClass, providerID string, value float64) KubecostPVInfoMetric {
 	return KubecostPVInfoMetric{
 		fqName:       fqname,
 		help:         "kubecost_pv_info pv info",
@@ -203,6 +220,7 @@ func newKubecostPVInfoMetric(fqname, pv, storageClass, providerID string, value
 		storageClass: storageClass,
 		value:        value,
 		providerId:   providerID,
+		uid:          uid,
 	}
 }
 
@@ -213,6 +231,7 @@ func (kpvim KubecostPVInfoMetric) Desc() *prometheus.Desc {
 		"persistentvolume": kpvim.pv,
 		"storageclass":     kpvim.storageClass,
 		"provider_id":      kpvim.providerId,
+		"uid":              kpvim.uid,
 	}
 	return prometheus.NewDesc(kpvim.fqName, kpvim.help, []string{}, l)
 }
@@ -237,6 +256,10 @@ func (kpvim KubecostPVInfoMetric) Write(m *dto.Metric) error {
 			Name:  toStringPtr("provider_id"),
 			Value: &kpvim.providerId,
 		},
+		{
+			Name:  toStringPtr("uid"),
+			Value: &kpvim.uid,
+		},
 	}
 	return nil
 }

+ 124 - 0
pkg/metrics/pvmetrics_test.go

@@ -0,0 +1,124 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func collectPVMetrics(collector KubePVCollector) []prometheus.Metric {
+	ch := make(chan prometheus.Metric, 10)
+	go func() {
+		defer close(ch)
+		collector.Collect(ch)
+	}()
+
+	var metrics []prometheus.Metric
+	for metric := range ch {
+		metrics = append(metrics, metric)
+	}
+	return metrics
+}
+
+func TestKubePVCollector_Describe(t *testing.T) {
+	collector := KubePVCollector{metricsConfig: MetricsConfig{}}
+	ch := make(chan *prometheus.Desc, 5)
+	go func() {
+		defer close(ch)
+		collector.Describe(ch)
+	}()
+
+	count := 0
+	for range ch {
+		count++
+	}
+
+	if count != 3 {
+		t.Errorf("Expected 3 metrics described, got %d", count)
+	}
+}
+
+func TestKubePVCollector_Collect(t *testing.T) {
+	storageSize := resource.MustParse("10Gi")
+	pv := &clustercache.PersistentVolume{
+		UID:  types.UID("test-pv-uid"),
+		Name: "test-pv",
+		Spec: v1.PersistentVolumeSpec{
+			Capacity: v1.ResourceList{
+				v1.ResourceStorage: storageSize,
+			},
+		},
+		Status: v1.PersistentVolumeStatus{
+			Phase: v1.VolumeBound,
+		},
+	}
+
+	cache := NewFakePVCache([]*clustercache.PersistentVolume{pv})
+	collector := KubePVCollector{
+		KubeClusterCache: cache,
+		metricsConfig:    MetricsConfig{},
+	}
+
+	metrics := collectPVMetrics(collector)
+	if len(metrics) != 7 { // 1 capacity + 5 phase + 1 info
+		t.Errorf("Expected 7 metrics, got %d", len(metrics))
+	}
+
+	// Verify UID label exists in metrics
+	for _, metric := range metrics {
+		var m dto.Metric
+		if err := metric.Write(&m); err != nil {
+			t.Errorf("Error writing metric: %v", err)
+		}
+
+		hasUID := false
+		for _, label := range m.Label {
+			if *label.Name == "uid" && *label.Value == "test-pv-uid" {
+				hasUID = true
+				break
+			}
+		}
+		if !hasUID {
+			t.Error("Metric missing UID label")
+		}
+	}
+}
+
+func TestKubePVMetrics_UIDLabel(t *testing.T) {
+	metric := newKubePVCapacityBytesMetric(
+		"test_metric", "test-pv", "test-uid", 1000.0,
+	)
+
+	var m dto.Metric
+	if err := metric.Write(&m); err != nil {
+		t.Fatalf("Error writing metric: %v", err)
+	}
+
+	// Verify UID label exists
+	for _, label := range m.Label {
+		if *label.Name == "uid" && *label.Value == "test-uid" {
+			return
+		}
+	}
+	t.Error("UID label not found in metric")
+}
+
+type FakePVCache struct {
+	clustercache.ClusterCache
+	pvs []*clustercache.PersistentVolume
+}
+
+func (f FakePVCache) GetAllPersistentVolumes() []*clustercache.PersistentVolume {
+	return f.pvs
+}
+
+func NewFakePVCache(pvs []*clustercache.PersistentVolume) FakePVCache {
+	return FakePVCache{
+		pvs: pvs,
+	}
+}

+ 10 - 2
pkg/metrics/servicemetrics.go

@@ -41,10 +41,11 @@ func (sc KubecostServiceCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, svc := range svcs {
 		serviceName := svc.Name
 		serviceNS := svc.Namespace
+		serviceUID := string(svc.UID)
 
 		labels, values := promutil.KubeLabelsToLabels(promutil.SanitizeLabels(svc.SpecSelector))
 		if len(labels) > 0 {
-			m := newServiceSelectorLabelsMetric(serviceName, serviceNS, "service_selector_labels", labels, values)
+			m := newServiceSelectorLabelsMetric(serviceName, serviceNS, "service_selector_labels", labels, values, serviceUID)
 			ch <- m
 		}
 	}
@@ -63,10 +64,11 @@ type ServiceSelectorLabelsMetric struct {
 	labelValues []string
 	serviceName string
 	namespace   string
+	uid         string
 }
 
 // Creates a new ServiceMetric, implementation of prometheus.Metric
-func newServiceSelectorLabelsMetric(name, namespace, fqname string, labelNames, labelvalues []string) ServiceSelectorLabelsMetric {
+func newServiceSelectorLabelsMetric(name, namespace, fqname string, labelNames, labelvalues []string, uid string) ServiceSelectorLabelsMetric {
 	return ServiceSelectorLabelsMetric{
 		fqName:      fqname,
 		labelNames:  labelNames,
@@ -74,6 +76,7 @@ func newServiceSelectorLabelsMetric(name, namespace, fqname string, labelNames,
 		help:        "service_selector_labels Service Selector Labels",
 		serviceName: name,
 		namespace:   namespace,
+		uid:         uid,
 	}
 }
 
@@ -83,6 +86,7 @@ func (s ServiceSelectorLabelsMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"service":   s.serviceName,
 		"namespace": s.namespace,
+		"uid":       s.uid,
 	}
 	return prometheus.NewDesc(s.fqName, s.help, s.labelNames, l)
 }
@@ -109,6 +113,10 @@ func (s ServiceSelectorLabelsMetric) Write(m *dto.Metric) error {
 		Name:  toStringPtr("service"),
 		Value: &s.serviceName,
 	})
+	labels = append(labels, &dto.LabelPair{
+		Name:  toStringPtr("uid"),
+		Value: &s.uid,
+	})
 	m.Label = labels
 	return nil
 }

+ 233 - 0
pkg/metrics/servicemetrics_test.go

@@ -0,0 +1,233 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func TestKubecostServiceCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectMetric    bool
+	}{
+		{
+			name:            "service_selector_labels enabled",
+			disabledMetrics: []string{},
+			expectMetric:    true,
+		},
+		{
+			name:            "service_selector_labels disabled",
+			disabledMetrics: []string{"service_selector_labels"},
+			expectMetric:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			sc := KubecostServiceCollector{
+				KubeClusterCache: NewFakeServiceCache([]*clustercache.Service{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 10)
+			sc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if tt.expectMetric && count == 0 {
+				t.Error("Expected metric description but got none")
+			}
+			if !tt.expectMetric && count > 0 {
+				t.Error("Expected no metric description but got some")
+			}
+		})
+	}
+}
+
+func TestKubecostServiceCollector_Collect(t *testing.T) {
+	tests := []struct {
+		name            string
+		services        []*clustercache.Service
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "single service with selector",
+			services: []*clustercache.Service{
+				{
+					UID:          types.UID("test-uid-1"),
+					Name:         "test-service",
+					Namespace:    "default",
+					SpecSelector: map[string]string{"app": "test", "version": "v1"},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1,
+		},
+		{
+			name: "service without selector",
+			services: []*clustercache.Service{
+				{
+					UID:          types.UID("test-uid-2"),
+					Name:         "headless-service",
+					Namespace:    "default",
+					SpecSelector: map[string]string{},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   0,
+		},
+		{
+			name: "multiple services with selectors",
+			services: []*clustercache.Service{
+				{
+					UID:          types.UID("test-uid-3"),
+					Name:         "service1",
+					Namespace:    "ns1",
+					SpecSelector: map[string]string{"app": "app1"},
+				},
+				{
+					UID:          types.UID("test-uid-4"),
+					Name:         "service2",
+					Namespace:    "ns2",
+					SpecSelector: map[string]string{"component": "frontend"},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   2,
+		},
+		{
+			name: "metric disabled",
+			services: []*clustercache.Service{
+				{
+					UID:          types.UID("test-uid-5"),
+					Name:         "test-service",
+					Namespace:    "default",
+					SpecSelector: map[string]string{"app": "test"},
+				},
+			},
+			disabledMetrics: []string{"service_selector_labels"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			sc := KubecostServiceCollector{
+				KubeClusterCache: NewFakeServiceCache(tt.services),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 10)
+			sc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestServiceSelectorLabelsMetric(t *testing.T) {
+	labelNames := []string{"app", "version"}
+	labelValues := []string{"test-app", "v1.0"}
+	uid := "test-uid"
+
+	metric := newServiceSelectorLabelsMetric("test-service", "default", "service_selector_labels", labelNames, labelValues, uid)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"app":       "test-app",
+		"version":   "v1.0",
+		"service":   "test-service",
+		"namespace": "default",
+		"uid":       uid,
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestServiceSelectorLabelsMetric_EmptyLabels(t *testing.T) {
+	metric := newServiceSelectorLabelsMetric("empty-service", "test-ns", "service_selector_labels", []string{}, []string{}, "empty-uid")
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	// Should still have the service metadata labels
+	expectedCount := 3 // service, namespace, uid
+	if len(dtoMetric.Label) != expectedCount {
+		t.Errorf("Expected %d labels, got %d", expectedCount, len(dtoMetric.Label))
+	}
+}
+
+// FakeServiceCache implements ClusterCache interface for testing
+type FakeServiceCache struct {
+	clustercache.ClusterCache
+	services []*clustercache.Service
+}
+
+func (f FakeServiceCache) GetAllServices() []*clustercache.Service {
+	return f.services
+}
+
+func NewFakeServiceCache(services []*clustercache.Service) FakeServiceCache {
+	return FakeServiceCache{
+		services: services,
+	}
+}

+ 15 - 5
pkg/metrics/statefulsetmetrics.go

@@ -40,11 +40,14 @@ func (sc KubecostStatefulsetCollector) Collect(ch chan<- prometheus.Metric) {
 	for _, statefulset := range ds {
 		statefulsetName := statefulset.Name
 		statefulsetNS := statefulset.Namespace
+		statefulsetUID := string(statefulset.UID)
 
-		labels, values := promutil.KubeLabelsToLabels(promutil.SanitizeLabels(statefulset.SpecSelector.MatchLabels))
-		if len(labels) > 0 {
-			m := newStatefulsetMatchLabelsMetric(statefulsetName, statefulsetNS, "statefulSet_match_labels", labels, values)
-			ch <- m
+		if statefulset.SpecSelector != nil {
+			labels, values := promutil.KubeLabelsToLabels(promutil.SanitizeLabels(statefulset.SpecSelector.MatchLabels))
+			if len(labels) > 0 {
+				m := newStatefulsetMatchLabelsMetric(statefulsetName, statefulsetNS, "statefulSet_match_labels", labels, values, statefulsetUID)
+				ch <- m
+			}
 		}
 	}
 
@@ -62,10 +65,11 @@ type StatefulsetMatchLabelsMetric struct {
 	labelValues     []string
 	statefulsetName string
 	namespace       string
+	uid             string
 }
 
 // Creates a new StatefulsetMetric, implementation of prometheus.Metric
-func newStatefulsetMatchLabelsMetric(name, namespace, fqname string, labelNames, labelvalues []string) StatefulsetMatchLabelsMetric {
+func newStatefulsetMatchLabelsMetric(name, namespace, fqname string, labelNames, labelvalues []string, uid string) StatefulsetMatchLabelsMetric {
 	return StatefulsetMatchLabelsMetric{
 		fqName:          fqname,
 		labelNames:      labelNames,
@@ -73,6 +77,7 @@ func newStatefulsetMatchLabelsMetric(name, namespace, fqname string, labelNames,
 		help:            "statefulSet_match_labels StatefulSet Match Labels",
 		statefulsetName: name,
 		namespace:       namespace,
+		uid:             uid,
 	}
 }
 
@@ -82,6 +87,7 @@ func (s StatefulsetMatchLabelsMetric) Desc() *prometheus.Desc {
 	l := prometheus.Labels{
 		"statefulSet": s.statefulsetName,
 		"namespace":   s.namespace,
+		"uid":         s.uid,
 	}
 	return prometheus.NewDesc(s.fqName, s.help, s.labelNames, l)
 }
@@ -108,6 +114,10 @@ func (s StatefulsetMatchLabelsMetric) Write(m *dto.Metric) error {
 		Name:  toStringPtr("statefulSet"),
 		Value: &s.statefulsetName,
 	})
+	labels = append(labels, &dto.LabelPair{
+		Name:  toStringPtr("uid"),
+		Value: &s.uid,
+	})
 	m.Label = labels
 	return nil
 }

+ 302 - 0
pkg/metrics/statefulsetmetrics_test.go

@@ -0,0 +1,302 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/prometheus/client_golang/prometheus"
+	dto "github.com/prometheus/client_model/go"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+)
+
+func TestKubecostStatefulsetCollector_Describe(t *testing.T) {
+	tests := []struct {
+		name            string
+		disabledMetrics []string
+		expectMetric    bool
+	}{
+		{
+			name:            "statefulSet_match_labels enabled",
+			disabledMetrics: []string{},
+			expectMetric:    true,
+		},
+		{
+			name:            "statefulSet_match_labels disabled",
+			disabledMetrics: []string{"statefulSet_match_labels"},
+			expectMetric:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			sc := KubecostStatefulsetCollector{
+				KubeClusterCache: NewFakeStatefulsetCache([]*clustercache.StatefulSet{}),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan *prometheus.Desc, 10)
+			sc.Describe(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if tt.expectMetric && count == 0 {
+				t.Error("Expected metric description but got none")
+			}
+			if !tt.expectMetric && count > 0 {
+				t.Error("Expected no metric description but got some")
+			}
+		})
+	}
+}
+
+func TestKubecostStatefulsetCollector_Collect(t *testing.T) {
+	tests := []struct {
+		name            string
+		statefulsets    []*clustercache.StatefulSet
+		disabledMetrics []string
+		expectedCount   int
+	}{
+		{
+			name: "single statefulset with match labels",
+			statefulsets: []*clustercache.StatefulSet{
+				{
+					UID:       types.UID("test-uid-1"),
+					Name:      "test-statefulset",
+					Namespace: "default",
+					SpecSelector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{"app": "test", "version": "v1"},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   1,
+		},
+		{
+			name: "statefulset without match labels",
+			statefulsets: []*clustercache.StatefulSet{
+				{
+					UID:       types.UID("test-uid-2"),
+					Name:      "empty-statefulset",
+					Namespace: "default",
+					SpecSelector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   0,
+		},
+		{
+			name: "statefulset with nil selector",
+			statefulsets: []*clustercache.StatefulSet{
+				{
+					UID:          types.UID("test-uid-3"),
+					Name:         "nil-selector-statefulset",
+					Namespace:    "default",
+					SpecSelector: nil,
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   0,
+		},
+		{
+			name: "multiple statefulsets with match labels",
+			statefulsets: []*clustercache.StatefulSet{
+				{
+					UID:       types.UID("test-uid-4"),
+					Name:      "statefulset1",
+					Namespace: "ns1",
+					SpecSelector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{"app": "app1"},
+					},
+				},
+				{
+					UID:       types.UID("test-uid-5"),
+					Name:      "statefulset2",
+					Namespace: "ns2",
+					SpecSelector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{"component": "database"},
+					},
+				},
+			},
+			disabledMetrics: []string{},
+			expectedCount:   2,
+		},
+		{
+			name: "metric disabled",
+			statefulsets: []*clustercache.StatefulSet{
+				{
+					UID:       types.UID("test-uid-6"),
+					Name:      "test-statefulset",
+					Namespace: "default",
+					SpecSelector: &metav1.LabelSelector{
+						MatchLabels: map[string]string{"app": "test"},
+					},
+				},
+			},
+			disabledMetrics: []string{"statefulSet_match_labels"},
+			expectedCount:   0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mc := MetricsConfig{
+				DisabledMetrics: tt.disabledMetrics,
+			}
+			sc := KubecostStatefulsetCollector{
+				KubeClusterCache: NewFakeStatefulsetCache(tt.statefulsets),
+				metricsConfig:    mc,
+			}
+
+			ch := make(chan prometheus.Metric, 10)
+			sc.Collect(ch)
+			close(ch)
+
+			count := 0
+			for range ch {
+				count++
+			}
+
+			if count != tt.expectedCount {
+				t.Errorf("Expected %d metrics, got %d", tt.expectedCount, count)
+			}
+		})
+	}
+}
+
+func TestStatefulsetMatchLabelsMetric(t *testing.T) {
+	labelNames := []string{"app", "version"}
+	labelValues := []string{"test-app", "v1.0"}
+	uid := "test-uid"
+
+	metric := newStatefulsetMatchLabelsMetric("test-statefulset", "default", "statefulSet_match_labels", labelNames, labelValues, uid)
+
+	// Test Desc method
+	desc := metric.Desc()
+	if desc == nil {
+		t.Error("Expected non-nil descriptor")
+	}
+
+	// Test Write method
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	if dtoMetric.Gauge == nil {
+		t.Error("Expected gauge metric")
+	}
+
+	if *dtoMetric.Gauge.Value != 1.0 {
+		t.Errorf("Expected gauge value 1.0, got %f", *dtoMetric.Gauge.Value)
+	}
+
+	// Verify labels
+	expectedLabels := map[string]string{
+		"app":         "test-app",
+		"version":     "v1.0",
+		"statefulSet": "test-statefulset",
+		"namespace":   "default",
+		"uid":         uid,
+	}
+
+	actualLabels := make(map[string]string)
+	for _, label := range dtoMetric.Label {
+		actualLabels[*label.Name] = *label.Value
+	}
+
+	for key, expectedValue := range expectedLabels {
+		if actualValue, ok := actualLabels[key]; !ok {
+			t.Errorf("Missing label %s", key)
+		} else if actualValue != expectedValue {
+			t.Errorf("Label %s: expected %s, got %s", key, expectedValue, actualValue)
+		}
+	}
+}
+
+func TestStatefulsetMatchLabelsMetric_EmptyLabels(t *testing.T) {
+	metric := newStatefulsetMatchLabelsMetric("empty-statefulset", "test-ns", "statefulSet_match_labels", []string{}, []string{}, "empty-uid")
+
+	var dtoMetric dto.Metric
+	err := metric.Write(&dtoMetric)
+	if err != nil {
+		t.Errorf("Expected no error, got %v", err)
+	}
+
+	// Should still have the statefulset metadata labels
+	expectedCount := 3 // statefulSet, namespace, uid
+	if len(dtoMetric.Label) != expectedCount {
+		t.Errorf("Expected %d labels, got %d", expectedCount, len(dtoMetric.Label))
+	}
+}
+
+func TestStatefulsetMatchLabelsMetric_MissingFields(t *testing.T) {
+	tests := []struct {
+		name            string
+		statefulsetName string
+		namespace       string
+		uid             string
+	}{
+		{
+			name:            "empty statefulset name",
+			statefulsetName: "",
+			namespace:       "test-ns",
+			uid:             "test-uid",
+		},
+		{
+			name:            "empty namespace",
+			statefulsetName: "test-statefulset",
+			namespace:       "",
+			uid:             "test-uid",
+		},
+		{
+			name:            "empty uid",
+			statefulsetName: "test-statefulset",
+			namespace:       "test-ns",
+			uid:             "",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metric := newStatefulsetMatchLabelsMetric(tt.statefulsetName, tt.namespace, "statefulSet_match_labels", []string{}, []string{}, tt.uid)
+
+			var dtoMetric dto.Metric
+			err := metric.Write(&dtoMetric)
+			if err != nil {
+				t.Errorf("Expected no error, got %v", err)
+			}
+
+			// Should still create the metric with empty values
+			if len(dtoMetric.Label) != 3 {
+				t.Errorf("Expected 3 labels, got %d", len(dtoMetric.Label))
+			}
+		})
+	}
+}
+
+// FakeStatefulsetCache implements ClusterCache interface for testing
+type FakeStatefulsetCache struct {
+	clustercache.ClusterCache
+	statefulsets []*clustercache.StatefulSet
+}
+
+func (f FakeStatefulsetCache) GetAllStatefulSets() []*clustercache.StatefulSet {
+	return f.statefulsets
+}
+
+func NewFakeStatefulsetCache(statefulsets []*clustercache.StatefulSet) FakeStatefulsetCache {
+	return FakeStatefulsetCache{
+		statefulsets: statefulsets,
+	}
+}