Selaa lähdekoodia

Sth/kcm 3442 (#3110)

Signed-off-by: Sean Holcomb <seanholcomb@gmail.com>
Sean Holcomb 1 vuosi sitten
vanhempi
sitoutus
b3fbed6f3b
64 muutettua tiedostoa jossa 6408 lisäystä ja 3630 poistoa
  1. 12 8
      core/pkg/clustercache/clustercache.go
  2. 69 34
      core/pkg/source/decoders.go
  3. 9 9
      core/pkg/source/queryresult.go
  4. 13 13
      modules/collector-source/go.mod
  5. 30 26
      modules/collector-source/go.sum
  6. 86 0
      modules/collector-source/pkg/collector/clustermap.go
  7. 1781 198
      modules/collector-source/pkg/collector/collector.go
  8. 10 5
      modules/collector-source/pkg/collector/collector_test.go
  9. 0 57
      modules/collector-source/pkg/collector/collectordriver.go
  10. 71 0
      modules/collector-source/pkg/collector/collectorprovider.go
  11. 80 0
      modules/collector-source/pkg/collector/collectorprovider_test.go
  12. 39 0
      modules/collector-source/pkg/collector/config.go
  13. 108 0
      modules/collector-source/pkg/collector/datasource.go
  14. 0 122
      modules/collector-source/pkg/collector/metric.go
  15. 0 1348
      modules/collector-source/pkg/collector/metrics.go
  16. 178 238
      modules/collector-source/pkg/collector/metricsquerier.go
  17. 169 17
      modules/collector-source/pkg/collector/metricsquerier_test.go
  18. 0 87
      modules/collector-source/pkg/collector/mock.go
  19. 0 45
      modules/collector-source/pkg/collector/networktargetprovider.go
  20. 0 83
      modules/collector-source/pkg/collector/opencost.go
  21. 0 342
      modules/collector-source/pkg/collector/scraper.go
  22. 0 796
      modules/collector-source/pkg/collector/scraper_test.go
  23. 0 32
      modules/collector-source/pkg/collector/targetscraper.go
  24. 0 116
      modules/collector-source/pkg/collector/targetscraper_test.go
  25. 43 0
      modules/collector-source/pkg/env/collectorenv.go
  26. 1 1
      modules/collector-source/pkg/metric/aggregator/activeminutes.go
  27. 59 0
      modules/collector-source/pkg/metric/aggregator/aggregator.go
  28. 1 1
      modules/collector-source/pkg/metric/aggregator/avgovertime.go
  29. 1 1
      modules/collector-source/pkg/metric/aggregator/increase.go
  30. 1 1
      modules/collector-source/pkg/metric/aggregator/info.go
  31. 1 1
      modules/collector-source/pkg/metric/aggregator/maxovertime.go
  32. 155 0
      modules/collector-source/pkg/metric/collector.go
  33. 163 0
      modules/collector-source/pkg/metric/repository.go
  34. 100 0
      modules/collector-source/pkg/metric/store.go
  35. 69 0
      modules/collector-source/pkg/metric/updater.go
  36. 0 31
      modules/collector-source/pkg/metrics/scraper.go
  37. 0 13
      modules/collector-source/pkg/metrics/target/target.go
  38. 394 0
      modules/collector-source/pkg/scrape/clustercache.go
  39. 878 0
      modules/collector-source/pkg/scrape/clustercache_test.go
  40. 64 0
      modules/collector-source/pkg/scrape/dcgm.go
  41. 86 0
      modules/collector-source/pkg/scrape/network.go
  42. 60 0
      modules/collector-source/pkg/scrape/opencost.go
  43. 0 0
      modules/collector-source/pkg/scrape/parser/lexer.go
  44. 0 0
      modules/collector-source/pkg/scrape/parser/lexer_test.go
  45. 0 0
      modules/collector-source/pkg/scrape/parser/parser.go
  46. 0 0
      modules/collector-source/pkg/scrape/parser/parser_test.go
  47. 0 0
      modules/collector-source/pkg/scrape/parser/scrape.txt
  48. 0 0
      modules/collector-source/pkg/scrape/parser/token.go
  49. 85 0
      modules/collector-source/pkg/scrape/scrapecontroller.go
  50. 5 0
      modules/collector-source/pkg/scrape/scraper.go
  51. 175 0
      modules/collector-source/pkg/scrape/statsummary.go
  52. 360 0
      modules/collector-source/pkg/scrape/statsummary_test.go
  53. 0 0
      modules/collector-source/pkg/scrape/target/filetarget.go
  54. 0 0
      modules/collector-source/pkg/scrape/target/stringtarget.go
  55. 25 0
      modules/collector-source/pkg/scrape/target/target.go
  56. 0 0
      modules/collector-source/pkg/scrape/target/urltarget.go
  57. 52 0
      modules/collector-source/pkg/scrape/targetscraper.go
  58. 489 0
      modules/collector-source/pkg/scrape/targetscraper_test.go
  59. 11 5
      modules/collector-source/pkg/util/helper.go
  60. 80 0
      modules/collector-source/pkg/util/interval.go
  61. 320 0
      modules/collector-source/pkg/util/interval_test.go
  62. 60 0
      modules/collector-source/pkg/util/resolution.go
  63. 9 0
      modules/collector-source/pkg/util/statsummaryclient.go
  64. 6 0
      pkg/env/costmodelenv.go

+ 12 - 8
core/pkg/clustercache/clustercache.go

@@ -63,6 +63,7 @@ type Service struct {
 	SpecSelector map[string]string
 	Type         v1.ServiceType
 	Status       v1.ServiceStatus
+	ClusterIP    string
 }
 
 type DaemonSet struct {
@@ -142,10 +143,11 @@ type PodDisruptionBudget struct {
 }
 
 type ReplicaSet struct {
-	Name         string
-	Namespace    string
-	SpecSelector *metav1.LabelSelector
-	Spec         appsv1.ReplicaSetSpec
+	Name            string
+	Namespace       string
+	OwnerReferences []metav1.OwnerReference
+	SpecSelector    *metav1.LabelSelector
+	Spec            appsv1.ReplicaSetSpec
 }
 
 type Volume struct {
@@ -252,6 +254,7 @@ func TransformService(input *v1.Service) *Service {
 		SpecSelector: input.Spec.Selector,
 		Type:         input.Spec.Type,
 		Status:       input.Status,
+		ClusterIP:    input.Spec.ClusterIP,
 	}
 }
 
@@ -348,10 +351,11 @@ func TransformPodDisruptionBudget(input *policyv1.PodDisruptionBudget) *PodDisru
 
 func TransformReplicaSet(input *appsv1.ReplicaSet) *ReplicaSet {
 	return &ReplicaSet{
-		Name:         input.Name,
-		Namespace:    input.Namespace,
-		Spec:         input.Spec,
-		SpecSelector: input.Spec.Selector,
+		Name:            input.Name,
+		Namespace:       input.Namespace,
+		OwnerReferences: input.OwnerReferences,
+		Spec:            input.Spec,
+		SpecSelector:    input.Spec.Selector,
 	}
 }
 

+ 69 - 34
core/pkg/source/decoders.go

@@ -4,6 +4,41 @@ import (
 	"github.com/opencost/opencost/core/pkg/util"
 )
 
+const (
+	ClusterIDLabel       = "cluster_id"
+	NamespaceLabel       = "namespace"
+	NodeLabel            = "node"
+	InstanceLabel        = "instance"
+	InstanceTypeLabel    = "instance_type"
+	ContainerLabel       = "container"
+	PodLabel             = "pod"
+	ProviderIDLabel      = "provider_id"
+	DeviceLabel          = "device"
+	PVCLabel             = "persistentvolumeclaim"
+	PVLabel              = "persistentvolume"
+	StorageClassLabel    = "storageclass"
+	VolumeNameLabel      = "volumename"
+	ServiceLabel         = "service"
+	ServiceNameLabel     = "service_name"
+	IngressIPLabel       = "ingress_ip"
+	ProvisionerNameLabel = "provisioner_name"
+	UIDLabel             = "uid"
+	KubernetesNodeLabel  = "kubernetes_node"
+	ModeLabel            = "mode"
+	ModelNameLabel       = "modelName"
+	UUIDLabel            = "UUID"
+	ResourceLabel        = "resource"
+	DeploymentLabel      = "deployment"
+	StatefulSetLabel     = "statefulSet"
+	ReplicaSetLabel      = "replicaset"
+	OwnerNameLabel       = "owner_name"
+	OwnerKindLabel       = "owner_kind"
+	UnitLabel            = "unit"
+	InternetLabel        = "internet"
+	SameZoneLabel        = "same_zone"
+	SameRegionLabel      = "same_region"
+)
+
 type PVResult struct {
 	Cluster          string
 	PersistentVolume string
@@ -20,7 +55,7 @@ type PVUsedAvgResult struct {
 func DecodePVUsedAvgResult(result *QueryResult) *PVUsedAvgResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	pvc, _ := result.GetString("persistentvolumeclaim")
+	pvc, _ := result.GetString(PVCLabel)
 
 	return &PVUsedAvgResult{
 		Cluster:               cluster,
@@ -39,7 +74,7 @@ type PVActiveMinutesResult struct {
 
 func DecodePVActiveMinutesResult(result *QueryResult) *PVActiveMinutesResult {
 	cluster, _ := result.GetCluster()
-	pv, _ := result.GetString("persistentvolume")
+	pv, _ := result.GetString(PVLabel)
 
 	return &PVActiveMinutesResult{
 		Cluster:          cluster,
@@ -58,7 +93,7 @@ type PVUsedMaxResult struct {
 func DecodePVUsedMaxResult(result *QueryResult) *PVUsedMaxResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	pvc, _ := result.GetString("persistentvolumeclaim")
+	pvc, _ := result.GetString(PVCLabel)
 
 	return &PVUsedMaxResult{
 		Cluster:               cluster,
@@ -289,8 +324,8 @@ type NodeCPUModeTotalResult struct {
 
 func DecodeNodeCPUModeTotalResult(result *QueryResult) *NodeCPUModeTotalResult {
 	cluster, _ := result.GetCluster()
-	node, _ := result.GetString("kubernetes_node")
-	mode, _ := result.GetString("mode")
+	node, _ := result.GetString(KubernetesNodeLabel)
+	mode, _ := result.GetString(ModeLabel)
 
 	return &NodeCPUModeTotalResult{
 		Cluster: cluster,
@@ -355,8 +390,8 @@ type LBActiveMinutesResult struct {
 func DecodeLBActiveMinutesResult(result *QueryResult) *LBActiveMinutesResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	service, _ := result.GetString("service_name")
-	ingressIp, _ := result.GetString("ingress_ip")
+	service, _ := result.GetString(ServiceNameLabel)
+	ingressIp, _ := result.GetString(IngressIPLabel)
 
 	return &LBActiveMinutesResult{
 		Cluster:   cluster,
@@ -381,7 +416,7 @@ type ClusterManagementDurationResult struct {
 
 func DecodeClusterManagementDurationResult(result *QueryResult) *ClusterManagementDurationResult {
 	cluster, _ := result.GetCluster()
-	provisioner, _ := result.GetString("provisioner_name")
+	provisioner, _ := result.GetString(ProvisionerNameLabel)
 
 	return &ClusterManagementDurationResult{
 		Cluster:     cluster,
@@ -406,7 +441,7 @@ type PodsResult struct {
 }
 
 func DecodePodsResult(result *QueryResult) *PodsResult {
-	uid, _ := result.GetString("uid")
+	uid, _ := result.GetString(UIDLabel)
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
@@ -668,9 +703,9 @@ func DecodeGPUInfoResult(result *QueryResult) *GPUInfoResult {
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
 	container, _ := result.GetContainer()
-	device, _ := result.GetString("device")
-	modelName, _ := result.GetString("modelName")
-	uuid, _ := result.GetString("UUID")
+	device, _ := result.GetString(DeviceLabel)
+	modelName, _ := result.GetString(ModelNameLabel)
+	uuid, _ := result.GetString(UUIDLabel)
 
 	return &GPUInfoResult{
 		Cluster:   cluster,
@@ -698,7 +733,7 @@ func DecodeIsGPUSharedResult(result *QueryResult) *IsGPUSharedResult {
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
 	container, _ := result.GetContainer()
-	resource, _ := result.GetString("resource")
+	resource, _ := result.GetString(ResourceLabel)
 
 	return &IsGPUSharedResult{
 		Cluster:   cluster,
@@ -723,8 +758,8 @@ func DecodePodPVCAllocationResult(result *QueryResult) *PodPVCAllocationResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
-	pv, _ := result.GetString("persistentvolume")
-	pvc, _ := result.GetString("persistentvolumeclaim")
+	pv, _ := result.GetString(PVLabel)
+	pvc, _ := result.GetString(PVCLabel)
 
 	return &PodPVCAllocationResult{
 		Cluster:               cluster,
@@ -747,7 +782,7 @@ type PVCBytesRequestedResult struct {
 func DecodePVCBytesRequestedResult(result *QueryResult) *PVCBytesRequestedResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	pvc, _ := result.GetString("persistentvolumeclaim")
+	pvc, _ := result.GetString(PVCLabel)
 
 	return &PVCBytesRequestedResult{
 		Cluster:               cluster,
@@ -770,9 +805,9 @@ type PVCInfoResult struct {
 func DecodePVCInfoResult(result *QueryResult) *PVCInfoResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	volumeName, _ := result.GetString("volumename")
-	pvc, _ := result.GetString("persistentvolumeclaim")
-	storageClass, _ := result.GetString("storageclass")
+	volumeName, _ := result.GetString(VolumeNameLabel)
+	pvc, _ := result.GetString(PVCLabel)
+	storageClass, _ := result.GetString(StorageClassLabel)
 
 	return &PVCInfoResult{
 		Cluster:               cluster,
@@ -793,7 +828,7 @@ type PVBytesResult struct {
 
 func DecodePVBytesResult(result *QueryResult) *PVBytesResult {
 	cluster, _ := result.GetCluster()
-	pv, _ := result.GetString("persistentvolume")
+	pv, _ := result.GetString(PVLabel)
 
 	return &PVBytesResult{
 		Cluster:          cluster,
@@ -813,8 +848,8 @@ type PVPricePerGiBHourResult struct {
 
 func DecodePVPricePerGiBHourResult(result *QueryResult) *PVPricePerGiBHourResult {
 	cluster, _ := result.GetCluster()
-	volumeName, _ := result.GetString("volumename")
-	pv, _ := result.GetString("persistentvolume")
+	volumeName, _ := result.GetString(VolumeNameLabel)
+	pv, _ := result.GetString(PVLabel)
 	providerId, _ := result.GetProviderID()
 
 	return &PVPricePerGiBHourResult{
@@ -838,9 +873,9 @@ type PVInfoResult struct {
 
 func DecodePVInfoResult(result *QueryResult) *PVInfoResult {
 	cluster, _ := result.GetCluster()
-	storageClass, _ := result.GetString("storageclass")
+	storageClass, _ := result.GetString(StorageClassLabel)
 	providerId, _ := result.GetProviderID()
-	pv, _ := result.GetString("persistentvolume")
+	pv, _ := result.GetString(PVLabel)
 
 	return &PVInfoResult{
 		Cluster:          cluster,
@@ -865,7 +900,7 @@ func DecodeNetworkGiBResult(result *QueryResult) *NetworkGiBResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
-	service, _ := result.GetString("service")
+	service, _ := result.GetString(ServiceLabel)
 
 	return &NetworkGiBResult{
 		Cluster:   cluster,
@@ -1117,7 +1152,7 @@ type ServiceLabelsResult struct {
 func DecodeServiceLabelsResult(result *QueryResult) *ServiceLabelsResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	service, _ := result.GetString("service")
+	service, _ := result.GetString(ServiceLabel)
 	labels := result.GetLabels()
 
 	return &ServiceLabelsResult{
@@ -1140,7 +1175,7 @@ type DeploymentLabelsResult struct {
 func DecodeDeploymentLabelsResult(result *QueryResult) *DeploymentLabelsResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	deployment, _ := result.GetString("deployment")
+	deployment, _ := result.GetString(DeploymentLabel)
 	labels := result.GetLabels()
 
 	return &DeploymentLabelsResult{
@@ -1163,7 +1198,7 @@ type StatefulSetLabelsResult struct {
 func DecodeStatefulSetLabelsResult(result *QueryResult) *StatefulSetLabelsResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	statefulSet, _ := result.GetString("statefulSet")
+	statefulSet, _ := result.GetString(StatefulSetLabel)
 	labels := result.GetLabels()
 
 	return &StatefulSetLabelsResult{
@@ -1188,7 +1223,7 @@ func DecodeDaemonSetLabelsResult(result *QueryResult) *DaemonSetLabelsResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
-	daemonSet, _ := result.GetString("owner_name")
+	daemonSet, _ := result.GetString(OwnerNameLabel)
 	labels := result.GetLabels()
 
 	return &DaemonSetLabelsResult{
@@ -1214,7 +1249,7 @@ func DecodeJobLabelsResult(result *QueryResult) *JobLabelsResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
 	pod, _ := result.GetPod()
-	job, _ := result.GetString("owner_name")
+	job, _ := result.GetString(OwnerNameLabel)
 	labels := result.GetLabels()
 
 	return &JobLabelsResult{
@@ -1239,7 +1274,7 @@ type PodsWithReplicaSetOwnerResult struct {
 func DecodePodsWithReplicaSetOwnerResult(result *QueryResult) *PodsWithReplicaSetOwnerResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	replicaSet, _ := result.GetString("owner_name")
+	replicaSet, _ := result.GetString(OwnerNameLabel)
 	pod, _ := result.GetPod()
 
 	return &PodsWithReplicaSetOwnerResult{
@@ -1277,9 +1312,9 @@ type ReplicaSetsWithRolloutResult struct {
 func DecodeReplicaSetsWithRolloutResult(result *QueryResult) *ReplicaSetsWithRolloutResult {
 	cluster, _ := result.GetCluster()
 	namespace, _ := result.GetNamespace()
-	replicaSet, _ := result.GetString("replicaset")
-	ownerName, _ := result.GetString("owner_name")
-	ownerKind, _ := result.GetString("owner_kind")
+	replicaSet, _ := result.GetString(ReplicaSetLabel)
+	ownerName, _ := result.GetString(OwnerNameLabel)
+	ownerKind, _ := result.GetString(OwnerKindLabel)
 
 	return &ReplicaSetsWithRolloutResult{
 		Cluster:    cluster,

+ 9 - 9
core/pkg/source/queryresult.go

@@ -52,15 +52,15 @@ type ResultKeys struct {
 // DefaultResultKeys returns a new ResultKeys instance with typical default values.
 func DefaultResultKeys() *ResultKeys {
 	return &ResultKeys{
-		ClusterKey:      "cluster_id",
-		NamespaceKey:    "namespace",
-		NodeKey:         "node",
-		InstanceKey:     "instance",
-		InstanceTypeKey: "instance_type",
-		ContainerKey:    "container",
-		PodKey:          "pod",
-		ProviderIDKey:   "provider_id",
-		DeviceKey:       "device",
+		ClusterKey:      ClusterIDLabel,
+		NamespaceKey:    NamespaceLabel,
+		NodeKey:         NodeLabel,
+		InstanceKey:     InstanceLabel,
+		InstanceTypeKey: InstanceTypeLabel,
+		ContainerKey:    ContainerLabel,
+		PodKey:          PodLabel,
+		ProviderIDKey:   ProviderIDLabel,
+		DeviceKey:       DeviceLabel,
 	}
 }
 

+ 13 - 13
modules/collector-source/go.mod

@@ -5,11 +5,14 @@ replace github.com/opencost/opencost/core => ./../../core
 go 1.24.2
 
 require (
+	github.com/julienschmidt/httprouter v1.3.0
+	github.com/opencost/opencost v1.114.0
 	github.com/opencost/opencost/core v0.0.0-00010101000000-000000000000
 	golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0
-	k8s.io/api v0.32.3
-	k8s.io/apimachinery v0.32.3
-	k8s.io/client-go v0.32.3
+	k8s.io/api v0.33.0
+	k8s.io/apimachinery v0.33.0
+	k8s.io/client-go v0.33.0
+	k8s.io/kubelet v0.33.0
 )
 
 require (
@@ -23,15 +26,12 @@ require (
 	github.com/go-openapi/swag v0.23.0 // indirect
 	github.com/goccy/go-json v0.10.5 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
-	github.com/golang/protobuf v1.5.4 // indirect
-	github.com/google/gnostic-models v0.6.8 // indirect
+	github.com/google/gnostic-models v0.6.9 // indirect
 	github.com/google/go-cmp v0.7.0 // indirect
-	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/hashicorp/hcl v1.0.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/julienschmidt/httprouter v1.3.0 // indirect
 	github.com/magiconair/properties v1.8.5 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
@@ -46,15 +46,14 @@ require (
 	github.com/spf13/jwalterweatherman v1.1.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/spf13/viper v1.8.1 // indirect
-	github.com/stretchr/testify v1.10.0 // indirect
 	github.com/subosito/gotenv v1.2.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
-	golang.org/x/net v0.37.0 // indirect
-	golang.org/x/oauth2 v0.25.0 // indirect
+	golang.org/x/net v0.38.0 // indirect
+	golang.org/x/oauth2 v0.27.0 // indirect
 	golang.org/x/sys v0.31.0 // indirect
 	golang.org/x/term v0.30.0 // indirect
 	golang.org/x/text v0.23.0 // indirect
-	golang.org/x/time v0.7.0 // indirect
+	golang.org/x/time v0.9.0 // indirect
 	google.golang.org/protobuf v1.36.5 // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
@@ -62,9 +61,10 @@ require (
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	k8s.io/klog/v2 v2.130.1 // indirect
-	k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect
+	k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
 	k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e // indirect
 	sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
-	sigs.k8s.io/structured-merge-diff/v4 v4.4.2 // indirect
+	sigs.k8s.io/randfill v1.0.0 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
 	sigs.k8s.io/yaml v1.4.0 // indirect
 )

+ 30 - 26
modules/collector-source/go.sum

@@ -125,12 +125,10 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=
 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
-github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
-github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
-github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
-github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U=
+github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
+github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@@ -146,8 +144,6 @@ github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
-github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
-github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
 github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
@@ -246,6 +242,8 @@ github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM
 github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
 github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
 github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
+github.com/opencost/opencost v1.114.0 h1:eR2RBzMhtfBGyco1z6ZofvCHfj+pzz9LgtKWZEKBGGo=
+github.com/opencost/opencost v1.114.0/go.mod h1:wjcc9kpG2jGjLnxsb5jDTNZWKF7waEQWeS4niMhpQO4=
 github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
 github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ=
 github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
@@ -253,15 +251,14 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
-github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
 github.com/rs/zerolog v1.26.1 h1:/ihwxqH+4z8UxyI70wM1z9yCvkWcfz/a3mj48k/Zngc=
 github.com/rs/zerolog v1.26.1/go.mod h1:/wSSJWX7lVrsOwlbyTRSOJvqRlc+WjWlfes+CiJ+tmc=
@@ -282,6 +279,8 @@ github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
@@ -398,8 +397,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
-golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c=
-golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
+golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
+golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -412,8 +411,8 @@ golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ
 golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20210313182246-cd4f82c27b84/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70=
-golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M=
+golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -487,8 +486,8 @@ golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
-golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
+golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
@@ -679,16 +678,18 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
 honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
 honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
 honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
-k8s.io/api v0.32.3 h1:Hw7KqxRusq+6QSplE3NYG4MBxZw1BZnq4aP4cJVINls=
-k8s.io/api v0.32.3/go.mod h1:2wEDTXADtm/HA7CCMD8D8bK4yuBUptzaRhYcYEEYA3k=
-k8s.io/apimachinery v0.32.3 h1:JmDuDarhDmA/Li7j3aPrwhpNBA94Nvk5zLeOge9HH1U=
-k8s.io/apimachinery v0.32.3/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE=
-k8s.io/client-go v0.32.3 h1:RKPVltzopkSgHS7aS98QdscAgtgah/+zmpAogooIqVU=
-k8s.io/client-go v0.32.3/go.mod h1:3v0+3k4IcT9bXTc4V2rt+d2ZPPG700Xy6Oi0Gdl2PaY=
+k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU=
+k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM=
+k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ=
+k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM=
+k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98=
+k8s.io/client-go v0.33.0/go.mod h1:kGkd+l/gNGg8GYWAPr0xF1rRKvVWvzh9vmZAMXtaKOg=
 k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
 k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
-k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y=
-k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4=
+k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4=
+k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
+k8s.io/kubelet v0.33.0 h1:4pJA2Ge6Rp0kDNV76KH7pTBiaV2T1a1874QHMcubuSU=
+k8s.io/kubelet v0.33.0/go.mod h1:iDnxbJQMy9DUNaML5L/WUlt3uJtNLWh7ZAe0JSp4Yi0=
 k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e h1:KqK5c/ghOm8xkHYhlodbp6i6+r+ChV2vuAuVRdFbLro=
 k8s.io/utils v0.0.0-20250321185631-1f6e0b77f77e/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
 rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
@@ -696,7 +697,10 @@ rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
 rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
 sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
-sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA=
-sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4=
+sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
+sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
+sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
+sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
 sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
 sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=

+ 86 - 0
modules/collector-source/pkg/collector/clustermap.go

@@ -0,0 +1,86 @@
+package collector
+
+import (
+	"fmt"
+
+	"github.com/opencost/opencost/core/pkg/clusters"
+	"github.com/opencost/opencost/core/pkg/log"
+)
+
+type collectorClusterMap struct {
+	clusterInfo clusters.ClusterInfoProvider
+}
+
+func newCollectorClusterMap(clusterInfo clusters.ClusterInfoProvider) *collectorClusterMap {
+	return &collectorClusterMap{
+		clusterInfo: clusterInfo,
+	}
+}
+
+// getLocalClusterInfo returns the local cluster info in the event there does not exist a metric available.
+func (c *collectorClusterMap) getLocalClusterInfo() (*clusters.ClusterInfo, error) {
+	info := c.clusterInfo.GetClusterInfo()
+	clusterInfo, err := clusters.MapToClusterInfo(info)
+	if err != nil {
+		return nil, fmt.Errorf("parsing local cluster info failed: %w", err)
+	}
+
+	return clusterInfo, nil
+}
+
+func (c *collectorClusterMap) GetClusterIDs() []string {
+	info, err := c.getLocalClusterInfo()
+	if err != nil {
+		log.Errorf("%s", err.Error())
+		return nil
+	}
+	return []string{info.ID}
+}
+
+func (c *collectorClusterMap) AsMap() map[string]*clusters.ClusterInfo {
+	info, err := c.getLocalClusterInfo()
+	if err != nil {
+		log.Errorf("%s", err.Error())
+		return nil
+	}
+	return map[string]*clusters.ClusterInfo{
+		info.ID: info,
+	}
+}
+
+func (c *collectorClusterMap) InfoFor(clusterID string) *clusters.ClusterInfo {
+	info, err := c.getLocalClusterInfo()
+	if err != nil {
+		log.Errorf("%s", err.Error())
+		return nil
+	}
+
+	if info.ID == clusterID {
+		return info
+	}
+	return nil
+}
+
+func (c *collectorClusterMap) NameFor(clusterID string) string {
+	info, err := c.getLocalClusterInfo()
+	if err != nil {
+		log.Errorf("%s", err.Error())
+		return ""
+	}
+	if info.ID == clusterID {
+		return info.Name
+	}
+	return ""
+}
+
+func (c *collectorClusterMap) NameIDFor(clusterID string) string {
+	info, err := c.getLocalClusterInfo()
+	if err != nil {
+		log.Errorf("%s", err.Error())
+		return clusterID
+	}
+	if info.ID == clusterID {
+		return fmt.Sprintf("%s/%s", info.Name, clusterID)
+	}
+	return clusterID
+}

+ 1781 - 198
modules/collector-source/pkg/collector/collector.go

@@ -1,229 +1,1812 @@
 package collector
 
 import (
-	"fmt"
-	"slices"
-	"sync"
-	"time"
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric/aggregator"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape"
 )
 
-// Metric names
-const (
-	PVHourlyCost                                          = "pv_hourly_cost"
-	KubeletVolumeStatsUsedBytes                           = "kubelet_volume_stats_used_bytes"
-	KubePersistenVolumeClaimInfo                          = "kube_persistentvolumeclaim_info"
-	KubePersistentVolumeCapacityBytes                     = "kube_persistentvolume_capacity_bytes"
-	ContainerFSLimitBytes                                 = "container_fs_limit_bytes"
-	ContainerFSUsageBytes                                 = "container_fs_usage_bytes"
-	NodeTotalHourlyCost                                   = "node_total_hourly_cost"
-	KubeNodeStatusCapacityCPUCores                        = "kube_node_status_capacity_cpu_cores"
-	KubeNodeStatusCapacityMemoryBytes                     = "kube_node_status_capacity_memory_bytes"
-	KubeNodeStatusAllocatableCPUCores                     = "kube_node_status_allocatable_cpu_cores"
-	KubeNodeStatusAllocatableMemoryBytes                  = "kube_node_status_allocatable_memory_bytes"
-	NodeGPUCount                                          = "node_gpu_count"
-	KubeNodeLabels                                        = "kube_node_labels"
-	NodeCPUSecondsTotal                                   = "node_cpu_seconds_total"
-	KubecostLoadBalancerCost                              = "kubecost_load_balancer_cost"
-	KubecostClusterManagementCost                         = "kubecost_cluster_management_cost"
-	KubePodContainerStatusRunning                         = "kube_pod_container_status_running"
-	ContainerMemoryAllocationBytes                        = "container_memory_allocation_bytes"
-	KubePodContainerResourceRequests                      = "kube_pod_container_resource_requests"
-	ContainerMemoryWorkingSetBytes                        = "container_memory_working_set_bytes"
-	ContainerCPUAllocation                                = "container_cpu_allocation"
-	ContainerCPUUsageSecondsTotal                         = "container_cpu_usage_seconds_total"
-	KubecostContainerCPUUsageIrate                        = "kubecost_container_cpu_usage_irate"
-	DCGMFIPROFGRENGINEACTIVE                              = "DCGM_FI_PROF_GR_ENGINE_ACTIVE"
-	ContainerGPUAllocation                                = "container_gpu_allocation"
-	DCGMFIDEVDECUTIL                                      = "DCGM_FI_DEV_DEC_UTIL"
-	NodeCPUHourlyCost                                     = "node_cpu_hourly_cost"
-	NodeRAMHourlyCost                                     = "node_ram_hourly_cost"
-	NodeGPUHourlyCost                                     = "node_gpu_hourly_cost"
-	KubecostNodeIsSpot                                    = "kubecost_node_is_spot"
-	PodPVCAllocation                                      = "pod_pvc_allocation"
-	KubePersistentVolumeClaimResourceRequestsStorageBytes = "kube_persistentvolumeclaim_resource_requests_storage_bytes"
-	KubecostPVInfo                                        = "kubecost_pv_info"
-	KubecostPodNetworkEgressBytesTotal                    = "kubecost_pod_network_egress_bytes_total"
-	KubecostNetworkZoneEgressCost                         = "kubecost_network_zone_egress_cost"
-	KubecostNetworkRegionEgressCost                       = "kubecost_network_region_egress_cost"
-	KubecostNetworkInternetEgressCost                     = "kubecost_network_internet_egress_cost"
-	ContainerNetworkReceiveBytesTotal                     = "container_network_receive_bytes_total"
-	ContainerNetworkTransmitBytesTotal                    = "container_network_transmit_bytes_total"
-	KubeNamespaceLabels                                   = "kube_namespace_labels"
-	KubeNamespaceAnnotations                              = "kube_namespace_annotations"
-	KubePodLabels                                         = "kube_pod_labels"
-	KubePodAnnotations                                    = "kube_pod_annotations"
-	ServiceSelectorLabels                                 = "service_selector_labels"
-	DeploymentMatchLabels                                 = "deployment_match_labels"
-	StatefulSetMatchLabels                                = "statefulSet_match_labels"
-	KubePodOwner                                          = "kube_pod_owner"
-	KubeReplicasetOwner                                   = "kube_replicaset_owner"
-)
+// NewOpenCostMetricStore creates a new MetricStore which has registered all MetricCollector instances required
+// for OpenCost
+func NewOpenCostMetricStore() metric.MetricStore {
+	memStore := metric.NewInMemoryMetricStore()
 
-// MetricCollectorID is a unique identifier for a specific metric collector instance. We
-// use this identifier to register and unregister metric instances from the metrics collector
-// instead of the metric name and aggregation type to allow selectable cardinality (via labels)
-// across multiple instances of the same aggregation type and metric name.
-type MetricCollectorID string
-
-const (
-	PVPricePerGiBHourID             MetricCollectorID = "PVPricePerGiBHour"
-	PVUsedAverageID                 MetricCollectorID = "PVUsedAverage"
-	PVUsedMaxID                     MetricCollectorID = "PVUsedMax"
-	PVCInfoID                       MetricCollectorID = "PVCInfo"
-	PVActiveMinutesID               MetricCollectorID = "PVActiveMinutes"
-	LocalStorageCostID              MetricCollectorID = "LocalStorageCost"
-	LocalStorageUsedCostID          MetricCollectorID = "LocalStorageUsedCost"
-	LocalStorageUsedAverageID       MetricCollectorID = "LocalStorageUsedAverage"
-	LocalStorageUsedMaxID           MetricCollectorID = "LocalStorageUsedMax"
-	LocalStorageBytesID             MetricCollectorID = "LocalStorageBytesID"
-	LocalStorageActiveMinutesID     MetricCollectorID = "LocalStorageActiveMinutes"
-	NodeCPUCoresCapacityID          MetricCollectorID = "NodeCPUCoresCapacity"
-	NodeCPUCoresAllocatableID       MetricCollectorID = "NodeCPUCoresAllocatable"
-	NodeRAMBytesCapacityID          MetricCollectorID = "NodeRAMBytesCapacity"
-	NodeRAMBytesAllocatableID       MetricCollectorID = "NodeRAMBytesAllocatable"
-	NodeGPUCountID                  MetricCollectorID = "NodeGPUCount"
-	NodeLabelsID                    MetricCollectorID = "NodeLabels"
-	NodeActiveMinutesID             MetricCollectorID = "NodeActiveMinutes"
-	NodeCPUModeTotalID              MetricCollectorID = "NodeCPUModeTotal"
-	NodeRAMSystemUsageAverageID     MetricCollectorID = "NodeRAMSystemUsageAverage"
-	NodeRAMUserUsageAverageID       MetricCollectorID = "NodeRAMUserUsageAverage"
-	LBPricePerHourID                MetricCollectorID = "LBPricePerHour"
-	LBActiveMinutesID               MetricCollectorID = "LBActiveMinutes"
-	ClusterManagementDurationID     MetricCollectorID = "ClusterManagementDuration"
-	ClusterManagementPricePerHourID MetricCollectorID = "ClusterManagementPricePerHour"
-	PodActiveMinutesID              MetricCollectorID = "PodActiveMinutes"
-	RAMBytesAllocatedID             MetricCollectorID = "RAMBytesAllocated"
-	RAMRequestsID                   MetricCollectorID = "RAMRequests"
-	RAMUsageAverageID               MetricCollectorID = "RAMUsageAverage"
-	RAMUsageMaxID                   MetricCollectorID = "RAMUsageMax"
-	CPUCoresAllocatedID             MetricCollectorID = "CPUCoresAllocated"
-	CPURequestsID                   MetricCollectorID = "CPURequestsID"
-	CPUUsageAverageID               MetricCollectorID = "CPUUsageAverage"
-	CPUUsageMaxID                   MetricCollectorID = "CPUUsageMax"
-	GPUsRequestedID                 MetricCollectorID = "GPUsRequested"
-	GPUsUsageAverageID              MetricCollectorID = "GPUsUsageAverage"
-	GPUsUsageMaxID                  MetricCollectorID = "GPUsUsageMax"
-	GPUsAllocatedID                 MetricCollectorID = "GPUsAllocated"
-	IsGPUSharedID                   MetricCollectorID = "IsGPUShared"
-	GPUInfoID                       MetricCollectorID = "GPUInfo"
-	NodeCPUPricePerHourID           MetricCollectorID = "NodeCPUPricePerHour"
-	NodeRAMPricePerGiBHourID        MetricCollectorID = "NodeRAMPricePerGiBHour"
-	NodeGPUPricePerHourID           MetricCollectorID = "NodeGPUPricePerHour"
-	NodeIsSpotID                    MetricCollectorID = "NodeIsSpot"
-	PodPVCAllocationID              MetricCollectorID = "PodPVCAllocation"
-	PVCBytesRequestedID             MetricCollectorID = "PVCBytesRequested"
-	PVBytesID                       MetricCollectorID = "PVBytesID"
-	PVCostPerGiBHourID              MetricCollectorID = "PVCostPerGiBHour"
-	PVInfoID                        MetricCollectorID = "PVInfo"
-	NetZoneGiBID                    MetricCollectorID = "NetZoneGiB"
-	NetZonePricePerGiBID            MetricCollectorID = "NetZonePricePerGiB"
-	NetRegionGiBID                  MetricCollectorID = "NetRegionGiB"
-	NetRegionPricePerGiBID          MetricCollectorID = "NetRegionPricePerGiB"
-	NetInternetGiBID                MetricCollectorID = "NetInternetGiB"
-	NetInternetPricePerGiBID        MetricCollectorID = "NetInternetPricePerGiB"
-	NetReceiveBytesID               MetricCollectorID = "NetReceiveBytes"
-	NetTransferBytesID              MetricCollectorID = "NetTransferBytes"
-	NamespaceLabelsID               MetricCollectorID = "NamespaceLabels"
-	NamespaceAnnotationsID          MetricCollectorID = "NamespaceAnnotations"
-	PodLabelsID                     MetricCollectorID = "PodLabels"
-	PodAnnotationsID                MetricCollectorID = "PodAnnotations"
-	ServiceLabelsID                 MetricCollectorID = "ServiceLabels"
-	DeploymentLabelsID              MetricCollectorID = "DeploymentLabels"
-	StatefulSetLabelsID             MetricCollectorID = "StatefulSetLabels"
-	DaemonSetLabelsID               MetricCollectorID = "DaemonSetLabels"
-	JobLabelsID                     MetricCollectorID = "JobLabels"
-	PodsWithReplicaSetOwnerID       MetricCollectorID = "PodsWithReplicaSetOwner"
-	ReplicaSetsWithoutOwnersID      MetricCollectorID = "ReplicaSetsWithoutOwners"
-	ReplicaSetsWithRolloutID        MetricCollectorID = "ReplicaSetsWithRollout"
-)
+	// Register all the metrics
+	memStore.Register(NewPVPricePerGiBHourMetricCollector())
+	memStore.Register(NewPVUsedAverageMetricCollector())
+	memStore.Register(NewPVUsedMaxMetricCollector())
+	memStore.Register(NewPVCInfoMetricCollector())
+	memStore.Register(NewPVActiveMinutesMetricCollector())
+	memStore.Register(NewLocalStorageCostMetricCollector())
+	memStore.Register(NewLocalStorageUsedCostMetricCollector())
+	memStore.Register(NewLocalStorageUsedAverageMetricCollector())
+	memStore.Register(NewLocalStorageUsedMaxMetricCollector())
+	memStore.Register(NewLocalStorageBytesMetricCollector())
+	memStore.Register(NewLocalStorageActiveMinutesMetricCollector())
+	memStore.Register(NewNodeCPUCoresCapacityMetricCollector())
+	memStore.Register(NewNodeCPUCoresAllocatableMetricCollector())
+	memStore.Register(NewNodeRAMBytesCapacityMetricCollector())
+	memStore.Register(NewNodeRAMBytesAllocatableMetricCollector())
+	memStore.Register(NewNodeGPUCountMetricCollector())
+	memStore.Register(NewNodeLabelsMetricCollector())
+	memStore.Register(NewNodeActiveMinutesMetricCollector())
+	memStore.Register(NewNodeCPUModeTotalMetricCollector())
+	memStore.Register(NewNodeRAMSystemUsageAverageMetricCollector())
+	memStore.Register(NewNodeRAMUserUsageAverageMetricCollector())
+	memStore.Register(NewLBPricePerHourMetricCollector())
+	memStore.Register(NewLBActiveMinutesMetricCollector())
+	memStore.Register(NewClusterManagementDurationMetricCollector())
+	memStore.Register(NewClusterManagementPricePerHourMetricCollector())
+	memStore.Register(NewPodActiveMinutesMetricCollector())
+	memStore.Register(NewRAMBytesAllocatedMetricCollector())
+	memStore.Register(NewRAMRequestsMetricCollector())
+	memStore.Register(NewRAMUsageAverageMetricCollector())
+	memStore.Register(NewRAMUsageMaxMetricCollector())
+	memStore.Register(NewCPUCoresAllocatedMetricCollector())
+	memStore.Register(NewCPURequestsMetricCollector())
+	memStore.Register(NewCPUUsageAverageMetricCollector())
+	memStore.Register(NewCPUUsageMaxMetricCollector())
+	memStore.Register(NewGPUsRequestedMetricCollector())
+	memStore.Register(NewGPUsUsageAverageMetricCollector())
+	memStore.Register(NewGPUsUsageMaxMetricCollector())
+	memStore.Register(NewGPUsAllocatedMetricCollector())
+	memStore.Register(NewIsGPUSharedMetricCollector())
+	memStore.Register(NewGPUInfoMetricCollector())
+	memStore.Register(NewNodeCPUPricePerHourMetricCollector())
+	memStore.Register(NewNodeRAMPricePerGiBHourMetricCollector())
+	memStore.Register(NewNodeGPUPricePerHourMetricCollector())
+	memStore.Register(NewNodeIsSpotMetricCollector())
+	memStore.Register(NewPodPVCAllocationMetricCollector())
+	memStore.Register(NewPVCBytesRequestedMetricCollector())
+	memStore.Register(NewPVBytesMetricCollector())
+	memStore.Register(NewPVCostPerGiBHourMetricCollector())
+	memStore.Register(NewPVInfoMetricCollector())
+	memStore.Register(NewNetZoneGiBMetricCollector())
+	memStore.Register(NewNetZonePricePerGiBMetricCollector())
+	memStore.Register(NewNetRegionGiBMetricCollector())
+	memStore.Register(NewNetRegionPricePerGiBMetricCollector())
+	memStore.Register(NewNetInternetGiBMetricCollector())
+	memStore.Register(NewNetInternetPricePerGiBMetricCollector())
+	memStore.Register(NewNetInternetServiceGiBMetricCollector())
+	memStore.Register(NewNetReceiveBytesMetricCollector())
+	memStore.Register(NewNetZoneIngressGiBMetricCollector())
+	memStore.Register(NewNetRegionIngressGiBMetricCollector())
+	memStore.Register(NewNetInternetIngressGiBMetricCollector())
+	memStore.Register(NewNetInternetServiceIngressGiBMetricCollector())
+	memStore.Register(NewNetTransferBytesMetricCollector())
+	memStore.Register(NewNamespaceLabelsMetricCollector())
+	memStore.Register(NewNamespaceAnnotationsMetricCollector())
+	memStore.Register(NewPodLabelsMetricCollector())
+	memStore.Register(NewPodAnnotationsMetricCollector())
+	memStore.Register(NewServiceLabelsMetricCollector())
+	memStore.Register(NewDeploymentLabelsMetricCollector())
+	memStore.Register(NewStatefulSetLabelsMetricCollector())
+	memStore.Register(NewDaemonSetLabelsMetricCollector())
+	memStore.Register(NewJobLabelsMetricCollector())
+	memStore.Register(NewPodsWithReplicaSetOwnerMetricCollector())
+	memStore.Register(NewReplicaSetsWithoutOwnersMetricCollector())
+	memStore.Register(NewReplicaSetsWithRolloutMetricCollector())
+
+	return memStore
+}
+
+//	avg(
+//		avg_over_time(
+//			pv_hourly_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, persistentvolume, volumename, provider_id)
+
+func NewPVPricePerGiBHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVPricePerGiBHourID,
+		scrape.PVHourlyCost,
+		[]string{
+			source.VolumeNameLabel,
+			source.PVLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubelet_volume_stats_used_bytes{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, persistentvolumeclaim, namespace)
+
+func NewPVUsedAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVUsedAverageID,
+		scrape.KubeletVolumeStatsUsedBytes,
+		[]string{
+			source.NamespaceLabel,
+			source.PVCLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	max(
+//		max_over_time(
+//			kubelet_volume_stats_used_bytes{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, persistentvolumeclaim, namespace)
+
+func NewPVUsedMaxMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVUsedMaxID,
+		scrape.KubeletVolumeStatsUsedBytes,
+		[]string{
+			source.NamespaceLabel,
+			source.PVCLabel,
+		},
+		aggregator.MaxOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		kube_persistentvolumeclaim_info{
+//			volumename != "",
+//			<some_custom_filter>
+//		}
+//	) by (persistentvolumeclaim, storageclass, volumename, namespace, cluster_id)[0:10m]
+
+func NewPVCInfoMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVCInfoID,
+		scrape.KubePersistentVolumeClaimInfo,
+		[]string{
+			source.NamespaceLabel,
+			source.VolumeNameLabel,
+			source.PVCLabel,
+			source.StorageClassLabel,
+		},
+		aggregator.Info,
+		nil, // TODO missing filter
+	)
+}
+
+//	avg(
+//		kube_persistentvolume_capacity_bytes{
+//			<some_custom_filter>
+//		}
+//	) by (cluster_id, persistentvolume)[0:10m]
+
+func NewPVActiveMinutesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVActiveMinutesID,
+		scrape.KubePersistentVolumeCapacityBytes,
+		[]string{
+			source.PVLabel,
+		},
+		aggregator.ActiveMinutes,
+		nil,
+	)
+}
+
+// todo revisit this
+//
+//	sum_over_time(
+//		sum(
+//			container_fs_limit_bytes{
+//				device=~"/dev/(nvme|sda).*",
+//				id="/",
+//				<some_custom_filter>
+//			}
+//		) by (instance, device, cluster_id)[%s:%dm]
+//	) / 1024 / 1024 / 1024 * %f * %f
+func NewLocalStorageCostMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LocalStorageCostID,
+		scrape.NodeFSCapacityBytes,
+		[]string{
+			source.InstanceLabel,
+			source.DeviceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			// todo this filter needs a regex
+			return true
+		},
+	)
+}
+
+// sum_over_time(
+//
+//	sum(
+//		container_fs_usage_bytes{
+//			device=~"/dev/(nvme|sda).*",
+//			id="/",
+//			<some_custom_filter>
+//		}
+//	) by (instance, device, cluster_id)[%s:%dm]
+//
+// ) / 1024 / 1024 / 1024 * %f * %f`
+func NewLocalStorageUsedCostMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LocalStorageUsedCostID,
+		scrape.ContainerFSUsageBytes,
+		[]string{
+			source.InstanceLabel,
+			source.DeviceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			// todo this filter needs a regex
+			return true
+		},
+	)
+}
+
+//	avg(
+//		sum(
+//			avg_over_time(
+//				container_fs_usage_bytes{
+//					device=~"/dev/(nvme|sda).*",
+//					id="/",
+//					<some_custom_filter>
+//				}[1h]
+//			)
+//		) by (instance, device, cluster_id, job)
+//	) by (instance, device, cluster_id)
+
+func NewLocalStorageUsedAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LocalStorageUsedAverageID,
+		scrape.ContainerFSUsageBytes,
+		[]string{
+			source.InstanceLabel,
+			source.DeviceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			// todo this filter needs a regex
+			return true
+		},
+	)
+}
+
+// max(
+//
+//	sum(
+//		max_over_time(
+//			container_fs_usage_bytes{
+//				device=~"/dev/(nvme|sda).*",
+//				id="/",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (instance, device, cluster_id, job)
+//
+// ) by (instance, device, cluster_id)
+func NewLocalStorageUsedMaxMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LocalStorageUsedMaxID,
+		scrape.ContainerFSUsageBytes,
+		[]string{
+			source.InstanceLabel,
+			source.DeviceLabel,
+		},
+		aggregator.MaxOverTime,
+		func(labels map[string]string) bool {
+			// todo this filter needs a regex
+			return true
+		},
+	)
+}
+
+// avg_over_time(
+//
+//	sum(
+//		container_fs_limit_bytes{
+//			device=~"/dev/(nvme|sda).*",
+//			id="/",
+//			<some_custom_filter>
+//		}
+//	) by (instance, device, cluster_id)[%s:%dm]
+//
+// )
+func NewLocalStorageBytesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LocalStorageBytesID,
+		scrape.NodeFSCapacityBytes,
+		[]string{
+			source.InstanceLabel,
+			source.DeviceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			// todo this filter needs a regex
+			return true
+		},
+	)
+}
+
+// count(
+//
+//	node_total_hourly_cost{
+//		<some_custom_filter>
+//	}
+//
+// ) by (cluster_id, node, instance, provider_id)[%s:%dm]
+func NewLocalStorageActiveMinutesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LocalStorageActiveMinutesID,
+		scrape.NodeTotalHourlyCost,
+		[]string{
+			source.NodeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.ActiveMinutes,
+		nil,
+	)
+}
+
+// avg(
+//
+//	avg_over_time(
+//		kube_node_status_capacity_cpu_cores{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+//
+// ) by (cluster_id, node)
+func NewNodeCPUCoresCapacityMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeCPUCoresCapacityID,
+		scrape.KubeNodeStatusCapacityCPUCores,
+		[]string{
+			source.NodeLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kube_node_status_allocatable_cpu_cores{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, node)
+
+func NewNodeCPUCoresAllocatableMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeCPUCoresAllocatableID,
+		scrape.KubeNodeStatusAllocatableCPUCores,
+		[]string{
+			source.NodeLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kube_node_status_capacity_memory_bytes{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, node)
+
+func NewNodeRAMBytesCapacityMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeRAMBytesCapacityID,
+		scrape.KubeNodeStatusCapacityMemoryBytes,
+		[]string{
+			source.NodeLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kube_node_status_allocatable_memory_bytes{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, node)
+
+func NewNodeRAMBytesAllocatableMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeRAMBytesAllocatableID,
+		scrape.KubeNodeStatusAllocatableMemoryBytes,
+		[]string{
+			source.NodeLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			node_gpu_count{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, node, provider_id)
+
+func NewNodeGPUCountMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeGPUCountID,
+		scrape.NodeGPUCount,
+		[]string{
+			source.NodeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		kube_node_labels{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewNodeLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeLabelsID,
+		scrape.KubeNodeLabels,
+		[]string{
+			source.NodeLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg(
+//		node_total_hourly_cost{
+//			<some_custom_filter>
+//		}
+//	) by (node, cluster_id, provider_id)[%s:%dm]
+
+func NewNodeActiveMinutesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeActiveMinutesID,
+		scrape.NodeTotalHourlyCost,
+		[]string{
+			source.NodeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.ActiveMinutes,
+		nil,
+	)
+}
+
+//	sum(
+//		rate(
+//			node_cpu_seconds_total{
+//				<some_custom_filter>
+//			}[%s:%dm]
+//		)
+//	) by (kubernetes_node, cluster_id, mode)
+
+func NewNodeCPUModeTotalMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeCPUModeTotalID,
+		scrape.NodeCPUSecondsTotal,
+		[]string{
+			source.KubernetesNodeLabel,
+			source.ModeLabel,
+		},
+		aggregator.Increase,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			container_memory_working_set_bytes{
+//				container_name!="POD",
+//				container_name!="",
+//				namespace="kube-system",
+//				<some_custom_filter>
+//			}[%s:%dm]
+//		)
+//	) by (instance, cluster_id)
+
+func NewNodeRAMSystemUsageAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeRAMSystemUsageAverageID,
+		scrape.ContainerMemoryWorkingSetBytes,
+		[]string{
+			source.InstanceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NamespaceLabel] == "kube-system"
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			container_memory_working_set_bytes{
+//				container_name!="POD",
+//				container_name!="",
+//				namespace!="kube-system",
+//				<some_custom_filter>
+//			}[%s:%dm]
+//		)
+//	) by (instance, cluster_id)
+
+func NewNodeRAMUserUsageAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeRAMUserUsageAverageID,
+		scrape.ContainerMemoryWorkingSetBytes,
+		[]string{
+			source.InstanceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NamespaceLabel] != "kube-system"
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubecost_load_balancer_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (namespace, service_name, ingress_ip, cluster_id)
+
+func NewLBPricePerHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LBPricePerHourID,
+		scrape.KubecostLoadBalancerCost,
+		[]string{
+			source.NamespaceLabel,
+			source.ServiceNameLabel,
+			source.IngressIPLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		kubecost_load_balancer_cost{
+//			<some_custom_filter>
+//		}
+//	) by (namespace, service_name, cluster_id, ingress_ip)[%s:%dm]
+
+func NewLBActiveMinutesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.LBActiveMinutesID,
+		scrape.KubecostLoadBalancerCost,
+		[]string{
+			source.NamespaceLabel,
+			source.ServiceNameLabel,
+			source.IngressIPLabel,
+		},
+		aggregator.ActiveMinutes,
+		nil,
+	)
+}
+
+//	avg(
+//		kubecost_cluster_management_cost{
+//			<some_custom_filter>
+//		}
+//	) by (cluster_id, provisioner_name)[%s:%dm]
+
+func NewClusterManagementDurationMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.ClusterManagementDurationID,
+		scrape.KubecostClusterManagementCost,
+		[]string{
+			source.ProvisionerNameLabel,
+		},
+		aggregator.ActiveMinutes,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubecost_cluster_management_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, provisioner_name)
+
+func NewClusterManagementPricePerHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.ClusterManagementPricePerHourID,
+		scrape.KubecostClusterManagementCost,
+		[]string{
+			source.ProvisionerNameLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		kube_pod_container_status_running{
+//			<some_custom_filter>
+//		} != 0
+//	) by (pod, namespace, uid, cluster_id)[%s:%s]
+
+func NewPodActiveMinutesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PodActiveMinutesID,
+		scrape.KubePodContainerStatusRunning,
+		[]string{
+			source.UIDLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+		},
+		aggregator.ActiveMinutes,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			container_memory_allocation_bytes{
+//				container!="",
+//				container!="POD",
+//				node!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, node, cluster_id, provider_id)
+
+func NewRAMBytesAllocatedMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.RAMBytesAllocatedID,
+		scrape.ContainerMemoryAllocationBytes,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NodeLabel] != ""
+		},
+	)
+}
+
+// avg(
+//	avg_over_time(
+//		kube_pod_container_resource_requests{
+//			resource="memory",
+//			unit="byte",
+//			container!="",
+//			container!="POD",
+//			node!="",
+//			<some_custom_filter>
+//		}[1h]
+//	)
+//) by (container, pod, namespace, node, cluster_id)
+
+func NewRAMRequestsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.RAMRequestsID,
+		scrape.KubePodContainerResourceRequests,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ResourceLabel] == "memory" && labels[source.UnitLabel] == "byte" && labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NodeLabel] != ""
+		},
+	)
+}
+
+// avg(
+// 		avg_over_time(
+// 			container_memory_working_set_bytes{
+// 				container!="",
+// 				container!="POD",
+// 				<some_custom_filter>
+// 			}[1h]
+// 		)
+// ) by (container, pod, namespace, instance, cluster_id)
+
+func NewRAMUsageAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.RAMUsageAverageID,
+		scrape.ContainerMemoryWorkingSetBytes,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != ""
+		},
+	)
+}
+
+//	max(
+//		max_over_time(
+//			container_memory_working_set_bytes{
+//				container!="",
+//				container_name!="POD",
+//				container!="POD",
+//				<some_custom_filter>
+//			}[%s]
+//		)
+//	) by (container_name, container, pod_name, pod, namespace, node, instance, %s)
+
+func NewRAMUsageMaxMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.RAMUsageMaxID,
+		scrape.ContainerMemoryWorkingSetBytes,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.MaxOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "" && labels[source.ContainerLabel] != "POD" && labels[source.NodeLabel] != ""
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			container_cpu_allocation{
+//				container!="",
+//				container!="POD",
+//				node!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, node, cluster_id)
+
+func NewCPUCoresAllocatedMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.CPUCoresAllocatedID,
+		scrape.ContainerCPUAllocation,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NodeLabel] != ""
+		},
+	)
+}
 
-// MetricsCollector is an interface that defines an implementation capable of managing a collection
-// of metric instances, and exposes helper methods for routing metric updates and queries to the
-// proper collector instances.
-type MetricsCollector interface {
-	// Register accepts a `MetricCollector` instance and registers it for routing updates and querying.
-	Register(collector *MetricCollector) error
+//	avg(
+//		avg_over_time(
+//			kube_pod_container_resource_requests{
+//				resource="cpu",
+//				unit="core",
+//				container!="",
+//				container!="POD",
+//				node!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, node, cluster_id)
 
-	// Unregister accepts a `MetricCollectorID` and unregisters the metric collector instance from receiving metrics
-	// updates and query availability.
-	Unregister(collectorID MetricCollectorID) bool
+func NewCPURequestsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.CPURequestsID,
+		scrape.KubePodContainerResourceRequests,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ResourceLabel] == "cpu" && labels[source.UnitLabel] == "core" && labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NodeLabel] != ""
+		},
+	)
+}
 
-	// Query accepts a `MetricCollectorID` and returns a slice of `MetricResult` instances for that collector.
-	Query(collectorID MetricCollectorID) ([]*MetricResult, error)
+//	avg(
+//		rate(
+//			container_cpu_usage_seconds_total{
+//				container!="",
+//				container_name!="POD",
+//				container!="POD",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container_name, container, pod_name, pod, namespace, node, instance, cluster_id)
 
-	// Update accepts the name of a metric, the label set and values to update the metric, the updated value, and a timestamp.
-	// This method does not accept a `MetricCollectorID` because it provides updates across many potential metric collector instances
-	// which utilize the same metric.
-	Update(metricName string, labels map[string]string, value float64, timestamp *time.Time, additionalInformation map[string]string)
+func NewCPUUsageAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.CPUUsageAverageID,
+		scrape.ContainerCPUUsageSecondsTotal,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "" && labels[source.ContainerLabel] != "POD"
+		},
+	)
 }
 
-// InMemoryMetricsCollector is a thread-safe implementation of the `MetricsCollector` interface that stores metric instances
-// in memory.
-type InMemoryMetricsCollector struct {
-	lock          sync.Mutex
-	byMetricName  map[string][]*MetricCollector
-	byCollectorID map[MetricCollectorID]*MetricCollector
+// TODO this is a special case
+func NewCPUUsageMaxMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.CPUUsageMaxID,
+		scrape.ContainerCPUUsageSecondsTotal,
+		[]string{
+			source.NodeLabel,
+			source.InstanceLabel,
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.MaxOverTime,
+		nil,
+	)
 }
 
-func NewInMemoryMetricsCollector() MetricsCollector {
-	return &InMemoryMetricsCollector{
-		byMetricName:  make(map[string][]*MetricCollector),
-		byCollectorID: make(map[MetricCollectorID]*MetricCollector),
-	}
+//	avg(
+//		avg_over_time(
+//			kube_pod_container_resource_requests{
+//				resource="nvidia_com_gpu",
+//				container!="",
+//				container!="POD",
+//				node!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, node, cluster_id)
+
+func NewGPUsRequestedMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.GPUsRequestedID,
+		scrape.KubePodContainerResourceRequests,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ResourceLabel] == "nvidia_com_gpu" && labels[source.ContainerLabel] != "POD" && labels[source.ContainerLabel] != "" && labels[source.NodeLabel] != ""
+		},
+	)
 }
 
-func (immc *InMemoryMetricsCollector) Register(collector *MetricCollector) error {
-	immc.lock.Lock()
-	defer immc.lock.Unlock()
+//	avg(
+//		avg_over_time(
+//			DCGM_FI_PROF_GR_ENGINE_ACTIVE{
+//				container!=""
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, cluster_id)
 
-	if _, ok := immc.byCollectorID[collector.id]; ok {
-		return fmt.Errorf("collector with ID: %s already exists", collector.id)
-	}
+func NewGPUsUsageAverageMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.GPUsUsageAverageID,
+		scrape.DCGMFIPROFGRENGINEACTIVE,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != ""
+		},
+	)
+}
+
+//	max(
+//		max_over_time(
+//			DCGM_FI_PROF_GR_ENGINE_ACTIVE{
+//				container!=""
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, cluster_id)
 
-	immc.byCollectorID[collector.id] = collector
-	immc.byMetricName[collector.metricName] = append(immc.byMetricName[collector.metricName], collector)
-	return nil
+func NewGPUsUsageMaxMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.GPUsUsageMaxID,
+		scrape.DCGMFIPROFGRENGINEACTIVE,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.MaxOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != ""
+		},
+	)
 }
 
-func (immc *InMemoryMetricsCollector) Unregister(collectorID MetricCollectorID) bool {
-	immc.lock.Lock()
-	defer immc.lock.Unlock()
+//	avg(
+//		avg_over_time(
+//			container_gpu_allocation{
+//				container!="",
+//				container!="POD",
+//				node!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, node, cluster_id)
 
-	if _, ok := immc.byCollectorID[collectorID]; !ok {
-		return false
-	}
+func NewGPUsAllocatedMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.GPUsAllocatedID,
+		scrape.ContainerGPUAllocation,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "" && labels[source.ContainerLabel] != "POD" && labels[source.NodeLabel] != ""
+		},
+	)
+}
 
-	inst := immc.byCollectorID[collectorID]
-	immc.byMetricName[inst.metricName] = slices.DeleteFunc(immc.byMetricName[inst.metricName], func(mc *MetricCollector) bool {
-		return mc == nil || mc.id == collectorID
-	})
+//	avg(
+//		avg_over_time(
+//			kube_pod_container_resource_requests{
+//				container!="",
+//				node != "",
+//				pod != "",
+//				container!= "",
+//				unit = "integer",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, node, resource) // TODO is this missing cluster
 
-	delete(immc.byCollectorID, collectorID)
-	return true
+func NewIsGPUSharedMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.IsGPUSharedID,
+		scrape.KubePodContainerResourceRequests,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+			source.ResourceLabel,
+		},
+		aggregator.AverageOverTime,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != "" && labels[source.NodeLabel] != "" && labels[source.PodLabel] != "" && labels[source.UnitLabel] == "integer"
+		},
+	)
 }
 
-func (immc *InMemoryMetricsCollector) Query(collectorID MetricCollectorID) ([]*MetricResult, error) {
-	immc.lock.Lock()
-	defer immc.lock.Unlock()
+//	avg(
+//		avg_over_time(
+//			DCGM_FI_DEV_DEC_UTIL{
+//				container!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (container, pod, namespace, device, modelName, UUID) // TODO is this missing cluster
+
+func NewGPUInfoMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.GPUInfoID,
+		scrape.DCGMFIDEVDECUTIL,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+			source.DeviceLabel,
+			source.ModelNameLabel,
+			source.UUIDLabel,
+		},
+		aggregator.Info,
+		func(labels map[string]string) bool {
+			return labels[source.ContainerLabel] != ""
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			node_cpu_hourly_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (node, cluster_id, instance_type, provider_id)
+
+func NewNodeCPUPricePerHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeCPUPricePerHourID,
+		scrape.NodeCPUHourlyCost,
+		[]string{
+			source.NodeLabel,
+			source.InstanceTypeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			node_ram_hourly_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (node, cluster_id, instance_type, provider_id)
+
+func NewNodeRAMPricePerGiBHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeRAMPricePerGiBHourID,
+		scrape.NodeRAMHourlyCost,
+		[]string{
+			source.NodeLabel,
+			source.InstanceTypeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			node_gpu_hourly_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (node, cluster_id, instance_type, provider_id)
+
+func NewNodeGPUPricePerHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeGPUPricePerHourID,
+		scrape.NodeGPUHourlyCost,
+		[]string{
+			source.NodeLabel,
+			source.InstanceTypeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		kubecost_node_is_spot{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewNodeIsSpotMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NodeIsSpotID,
+		scrape.KubecostNodeIsSpot,
+		[]string{
+			source.NodeLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			pod_pvc_allocation{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (persistentvolume, persistentvolumeclaim, pod, namespace, cluster_id)
+
+func NewPodPVCAllocationMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PodPVCAllocationID,
+		scrape.PodPVCAllocation,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.PVLabel,
+			source.PVCLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kube_persistentvolumeclaim_resource_requests_storage_bytes{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (persistentvolumeclaim, namespace, cluster_id)
+
+func NewPVCBytesRequestedMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVCBytesRequestedID,
+		scrape.KubePersistentVolumeClaimResourceRequestsStorageBytes,
+		[]string{
+			source.NamespaceLabel,
+			source.PVCLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kube_persistentvolume_capacity_bytes{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (persistentvolume, cluster_id)
+
+func NewPVBytesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVBytesID,
+		scrape.KubePersistentVolumeCapacityBytes,
+		[]string{
+			source.PVLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			pv_hourly_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (volumename, cluster_id)
+//
+// TODO what is going on here, does not appear to be a query
+func NewPVCostPerGiBHourMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVCostPerGiBHourID,
+		scrape.PVHourlyCost,
+		[]string{
+			source.VolumeNameLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubecost_pv_info{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id, storageclass, persistentvolume, provider_id)
+
+func NewPVInfoMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PVInfoID,
+		scrape.KubecostPVInfo,
+		[]string{
+			source.PVLabel,
+			source.StorageClassLabel,
+			source.ProviderIDLabel,
+		},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_egress_bytes_total{
+//				internet="false",
+//				same_zone="false",
+//				same_region="true",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
+//
+// TODO double check that changing "pod_name" to the source.PodLabel did not break something
+func NewNetZoneGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetZoneGiBID,
+		scrape.KubecostPodNetworkEgressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ServiceLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] == "false" && labels[source.SameZoneLabel] == "false" && labels[source.SameRegionLabel] == "true"
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubecost_network_zone_egress_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id)
+//
+// TODO check that this works with no labels
+func NewNetZonePricePerGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetZonePricePerGiBID,
+		scrape.KubecostNetworkZoneEgressCost,
+		[]string{},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_egress_bytes_total{
+//				internet="false",
+//				same_zone="false",
+//				same_region="false",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
+
+func NewNetRegionGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetRegionGiBID,
+		scrape.KubecostPodNetworkEgressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ServiceLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] == "false" && labels[source.SameZoneLabel] == "false" && labels[source.SameRegionLabel] == "false"
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubecost_network_region_egress_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id)
+
+func NewNetRegionPricePerGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetRegionPricePerGiBID,
+		scrape.KubecostNetworkRegionEgressCost,
+		[]string{},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_egress_bytes_total{
+//				internet="true",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
+
+func NewNetInternetGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetInternetGiBID,
+		scrape.KubecostPodNetworkEgressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ServiceLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] == "true"
+		},
+	)
+}
+
+//	avg(
+//		avg_over_time(
+//			kubecost_network_internet_egress_cost{
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (cluster_id)
+
+func NewNetInternetPricePerGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetInternetPricePerGiBID,
+		scrape.KubecostNetworkInternetEgressCost,
+		[]string{},
+		aggregator.AverageOverTime,
+		nil,
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_egress_bytes_total{
+//				internet="true",
+//				<some_custom_filter>
+//			}[%s]
+//		)
+//	) by (pod_name, namespace, service, %s) / 1024 / 1024 / 1024
+
+func NewNetInternetServiceGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetInternetServiceGiBID,
+		scrape.KubecostNetworkInternetEgressCost,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ServiceLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] == "true"
+		},
+	)
+}
+
+//	sum(
+//		increase(
+//			container_network_receive_bytes_total{
+//				pod!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, pod, namespace, cluster_id)
+
+func NewNetReceiveBytesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetReceiveBytesID,
+		scrape.ContainerNetworkReceiveBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.PodLabel] != ""
+		},
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_ingress_bytes_total{
+//				internet="false",
+//				same_zone="false",
+//				same_region="true",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
+
+func NewNetZoneIngressGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetZoneIngressGiBID,
+		scrape.KubecostPodNetworkIngressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] != "false" &&
+				labels[source.SameZoneLabel] != "false" &&
+				labels[source.SameRegionLabel] != "true"
+		},
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_ingress_bytes_total{
+//				internet="false",
+//				same_zone="false",
+//				same_region="false",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
+
+func NewNetRegionIngressGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetRegionIngressGiBID,
+		scrape.KubecostPodNetworkIngressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] != "false" &&
+				labels[source.SameZoneLabel] != "false" &&
+				labels[source.SameRegionLabel] != "false"
+		},
+	)
+}
+
+//	sum(
+//		increase(
+//			kubecost_pod_network_ingress_bytes_total{
+//				internet="true",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
+
+func NewNetInternetIngressGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetInternetIngressGiBID,
+		scrape.KubecostPodNetworkIngressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] != "true"
+		},
+	)
+}
+
+//	`sum(
+//		increase(
+//			kubecost_pod_network_ingress_bytes_total{
+//				internet="true",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, namespace, service, cluster_id) / 1024 / 1024 / 1024
+
+func NewNetInternetServiceIngressGiBMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetInternetServiceIngressGiBID,
+		scrape.KubecostPodNetworkIngressBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ServiceLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.InternetLabel] != "true"
+		},
+	)
+}
+
+//	sum(
+//		increase(
+//			container_network_transmit_bytes_total{
+//				pod!="",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod_name, pod, namespace, cluster_id)
+
+func NewNetTransferBytesMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NetTransferBytesID,
+		scrape.ContainerNetworkTransmitBytesTotal,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.ContainerLabel,
+		},
+		aggregator.Increase,
+		func(labels map[string]string) bool {
+			return labels[source.PodLabel] != ""
+		},
+	)
+}
+
+//	avg_over_time(
+//		kube_namespace_labels{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewNamespaceLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NamespaceLabelsID,
+		scrape.KubeNamespaceLabels,
+		[]string{
+			source.NamespaceLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		kube_namespace_annotations{
+//			<some_custom_filter>
+//		}[1h]
+//	) // TODO decoder missing cluster
+
+func NewNamespaceAnnotationsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.NamespaceAnnotationsID,
+		scrape.KubeNamespaceAnnotations,
+		[]string{
+			source.NamespaceLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		kube_pod_labels{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewPodLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PodLabelsID,
+		scrape.KubePodLabels,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		kube_pod_annotations{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewPodAnnotationsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PodAnnotationsID,
+		scrape.KubePodAnnotations,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		service_selector_labels{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewServiceLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.ServiceLabelsID,
+		scrape.ServiceSelectorLabels,
+		[]string{
+			source.NamespaceLabel,
+			source.ServiceLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		deployment_match_labels{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewDeploymentLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.DeploymentLabelsID,
+		scrape.DeploymentMatchLabels,
+		[]string{
+			source.NamespaceLabel,
+			source.DeploymentLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	avg_over_time(
+//		statefulSet_match_labels{
+//			<some_custom_filter>
+//		}[1h]
+//	)
+
+func NewStatefulSetLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.StatefulSetLabelsID,
+		scrape.StatefulSetMatchLabels,
+		[]string{
+			source.NamespaceLabel,
+			source.StatefulSetLabel,
+		},
+		aggregator.Info,
+		nil,
+	)
+}
+
+//	sum(
+//		avg_over_time(
+//			kube_pod_owner{
+//				owner_kind="DaemonSet",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod, owner_name, namespace, cluster_id)
+
+func NewDaemonSetLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.DaemonSetLabelsID,
+		scrape.KubePodOwner,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.OwnerNameLabel,
+		},
+		aggregator.Info,
+		func(labels map[string]string) bool {
+			return labels[source.OwnerKindLabel] == "DaemonSet"
+		},
+	)
+}
+
+//	sum(
+//		avg_over_time(
+//			kube_pod_owner{
+//				owner_kind="Job",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod, owner_name, namespace, cluster_id)
+
+func NewJobLabelsMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.JobLabelsID,
+		scrape.KubePodOwner,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.OwnerNameLabel,
+		},
+		aggregator.Info,
+		func(labels map[string]string) bool {
+			return labels[source.OwnerKindLabel] == "Job"
+		},
+	)
+}
+
+//	sum(
+//		avg_over_time(
+//			kube_pod_owner{
+//				owner_kind="ReplicaSet",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (pod, owner_name, namespace, cluster_id)
+
+func NewPodsWithReplicaSetOwnerMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.PodsWithReplicaSetOwnerID,
+		scrape.KubePodOwner,
+		[]string{
+			source.NamespaceLabel,
+			source.PodLabel,
+			source.OwnerNameLabel,
+		},
+		aggregator.Info,
+		func(labels map[string]string) bool {
+			return labels[source.OwnerKindLabel] == "ReplicaSet"
+		},
+	)
+}
 
-	if _, ok := immc.byCollectorID[collectorID]; !ok {
-		return nil, fmt.Errorf("collector with ID: %s does not exist", collectorID)
-	}
+//	sum(
+//		avg_over_time(
+//			kube_replicaset_owner{
+//				owner_kind="<none>",
+//				owner_name="<none>",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (replicaset, namespace, cluster_id)
 
-	return immc.byCollectorID[collectorID].Get(), nil
+func NewReplicaSetsWithoutOwnersMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.ReplicaSetsWithoutOwnersID,
+		scrape.KubeReplicasetOwner,
+		[]string{
+			source.NamespaceLabel,
+			source.ReplicaSetLabel,
+		},
+		aggregator.Info,
+		func(labels map[string]string) bool {
+			return labels[source.OwnerKindLabel] == "<none>" && labels[source.OwnerNameLabel] == "<none>"
+		},
+	)
 }
 
-func (immc *InMemoryMetricsCollector) Update(
-	metricName string,
-	labels map[string]string,
-	value float64,
-	timestamp *time.Time,
-	additionalInformation map[string]string,
-) {
-	immc.lock.Lock()
-	defer immc.lock.Unlock()
+//	sum(
+//		avg_over_time(
+//			kube_replicaset_owner{
+//				owner_kind="Rollout",
+//				<some_custom_filter>
+//			}[1h]
+//		)
+//	) by (replicaset, namespace, owner_kind, owner_name, cluster_id)
 
-	for _, collector := range immc.byMetricName[metricName] {
-		collector.Update(labels, value, timestamp, additionalInformation)
-	}
+func NewReplicaSetsWithRolloutMetricCollector() *metric.MetricCollector {
+	return metric.NewMetricCollector(
+		metric.ReplicaSetsWithRolloutID,
+		scrape.KubeReplicasetOwner,
+		[]string{
+			source.NamespaceLabel,
+			source.ReplicaSetLabel,
+			source.OwnerNameLabel,
+			source.OwnerKindLabel,
+		},
+		aggregator.Info,
+		func(labels map[string]string) bool {
+			return labels[source.OwnerKindLabel] == "Rollout"
+		},
+	)
 }

+ 10 - 5
modules/collector-source/pkg/collector/collector_test.go

@@ -1,6 +1,11 @@
 package collector
 
-import "testing"
+import (
+	"testing"
+
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape"
+)
 
 func TestBasicCollectorFunctionality(t *testing.T) {
 	// avg of 55 (sum of [1,10]) / data points (10) = 5.5
@@ -26,14 +31,14 @@ func TestBasicCollectorFunctionality(t *testing.T) {
 		"cluster":   "cluster-a",
 	}
 
-	collector := NewOpenCostMetricCollector()
+	collector := NewOpenCostMetricStore()
 
 	for i := 1; i <= 10; i++ {
-		collector.Update(ContainerMemoryWorkingSetBytes, labelsA, float64(i), nil, nil)
-		collector.Update(ContainerMemoryWorkingSetBytes, labelsB, float64(i), nil, nil)
+		collector.Update(scrape.ContainerMemoryWorkingSetBytes, labelsA, float64(i), nil, nil)
+		collector.Update(scrape.ContainerMemoryWorkingSetBytes, labelsB, float64(i), nil, nil)
 	}
 
-	results, err := collector.Query(RAMUsageAverageID)
+	results, err := collector.Query(metric.RAMUsageAverageID)
 	if err != nil {
 		t.Fatalf("error: %v", err)
 	}

+ 0 - 57
modules/collector-source/pkg/collector/collectordriver.go

@@ -1,57 +0,0 @@
-package collector
-
-import (
-	"time"
-
-	"github.com/opencost/opencost/core/pkg/log"
-	"github.com/opencost/opencost/core/pkg/util/atomic"
-)
-
-type Config struct {
-	ScrapeInterval time.Duration
-}
-type CollectorDriver struct {
-	config    Config
-	runState  atomic.AtomicRunState
-	stop      chan struct{}
-	collector MetricsCollector
-}
-
-func NewCollectorDriver(config Config) *CollectorDriver {
-	return &CollectorDriver{
-		collector: NewOpenCostMetricCollector(),
-	}
-}
-
-func (cd *CollectorDriver) Start() {
-	// Before we attempt to start, we must ensure we are not in a stopping state
-	cd.runState.WaitForReset()
-
-	// This will atomically check the current state to ensure we can run, then advances the state.
-	// If the state is already started, it will return false.
-	if !cd.runState.Start() {
-		log.Info("collector already running")
-		return
-	}
-	func() {
-		for {
-			select {
-			case <-cd.runState.OnStop():
-				cd.runState.Reset()
-				return // exit go routine
-			default:
-
-			}
-			time.Sleep(cd.config.ScrapeInterval)
-		}
-
-	}()
-}
-
-func (cd *CollectorDriver) Stop() {
-	cd.runState.Stop()
-}
-
-func (cd *CollectorDriver) scrape() {
-
-}

+ 71 - 0
modules/collector-source/pkg/collector/collectorprovider.go

@@ -0,0 +1,71 @@
+package collector
+
+import (
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+)
+
+// StoreProvider returns an appropriate collector for the given window. This is meant to bridge the mismatch of a system
+// that was designed to make queries against a continuous datasource with now stores its data in discrete blocks
+type StoreProvider interface {
+	GetStore(start, end time.Time) metric.MetricStore
+}
+
+// repoStoreProvider is a StoreProvider implementation which uses a Repository and the Intervals of its Resolutions that it is
+// configured with to return the most appropriate time.
+type repoStoreProvider struct {
+	repo      *metric.MetricRepository
+	intervals map[string]util.Interval
+}
+
+func newRepoStoreProvider(repo *metric.MetricRepository, resoluationConfigs []util.ResolutionConfiguration) *repoStoreProvider {
+	intervals := make(map[string]util.Interval)
+	for _, resConf := range resoluationConfigs {
+		interval, err := util.NewInterval(resConf.Interval)
+		if err != nil {
+			continue
+		}
+		intervals[resConf.Interval] = interval
+	}
+	return &repoStoreProvider{
+		repo:      repo,
+		intervals: intervals,
+	}
+}
+
+func (r *repoStoreProvider) GetStore(start, end time.Time) metric.MetricStore {
+	resKey, start := r.getStoreKeys(start, end)
+	store, err := r.repo.GetCollector(resKey, start)
+	if err != nil {
+		log.Debugf("failed to get Store for window '%s - %s': %s", start, end, err)
+	}
+	return store
+}
+
+// getStoreKeys compares the given start and end against each resolution by truncating the start time and
+// add one interval to the truncated value. The duration between start and end is compared with the duration
+// between the interval generated times, with the lowest
+func (r *repoStoreProvider) getStoreKeys(start, end time.Time) (string, time.Time) {
+	windowDuration := int64(end.Sub(start))
+	var minDiff *int64
+	var minKey string
+	var minStart time.Time
+	for key, interval := range r.intervals {
+		intStart := interval.Truncate(start)
+		intEnd := interval.Add(start, 1)
+		intDuration := int64(intEnd.Sub(intStart))
+		diffDuration := windowDuration - intDuration
+		if diffDuration < 0 {
+			diffDuration = -diffDuration
+		}
+		if minDiff == nil || diffDuration < *minDiff {
+			minDiff = &diffDuration
+			minKey = key
+			minStart = intStart
+		}
+	}
+	return minKey, minStart
+}

+ 80 - 0
modules/collector-source/pkg/collector/collectorprovider_test.go

@@ -0,0 +1,80 @@
+package collector
+
+import (
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+)
+
+func Test_repoStoreProvider_getStoreKeys(t *testing.T) {
+
+	defaultResConfigs := []util.ResolutionConfiguration{
+		{
+			Interval: "10m",
+		},
+		{
+			Interval: "1h",
+		},
+		{
+			Interval: "1d",
+		},
+	}
+
+	tests := map[string]struct {
+		configs    []util.ResolutionConfiguration
+		start      time.Time
+		end        time.Time
+		intevalKey string
+		startKey   time.Time
+	}{
+		"10m": {
+			configs:    defaultResConfigs,
+			start:      time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+			end:        time.Date(2025, time.May, 3, 0, 10, 0, 0, time.UTC),
+			intevalKey: "10m",
+			startKey:   time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+		},
+		"1h": {
+			configs:    defaultResConfigs,
+			start:      time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+			end:        time.Date(2025, time.May, 3, 1, 0, 0, 0, time.UTC),
+			intevalKey: "1h",
+			startKey:   time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+		},
+		"1d": {
+			configs:    defaultResConfigs,
+			start:      time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+			end:        time.Date(2025, time.May, 4, 0, 10, 0, 0, time.UTC),
+			intevalKey: "1d",
+			startKey:   time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+		},
+		"2m": {
+			configs:    defaultResConfigs,
+			start:      time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+			end:        time.Date(2025, time.May, 3, 0, 2, 0, 0, time.UTC),
+			intevalKey: "10m",
+			startKey:   time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+		},
+		"2m offset": {
+			configs:    defaultResConfigs,
+			start:      time.Date(2025, time.May, 3, 0, 9, 0, 0, time.UTC),
+			end:        time.Date(2025, time.May, 3, 0, 11, 0, 0, time.UTC),
+			intevalKey: "10m",
+			startKey:   time.Date(2025, time.May, 3, 0, 0, 0, 0, time.UTC),
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			r := newRepoStoreProvider(nil, tt.configs)
+			intevalKey, startKey := r.getStoreKeys(tt.start, tt.end)
+			if intevalKey != tt.intevalKey {
+				t.Errorf("getStoreKeys() got = %v, want %v", intevalKey, tt.intevalKey)
+			}
+			if !reflect.DeepEqual(startKey, tt.startKey) {
+				t.Errorf("getStoreKeys() got1 = %v, want %v", startKey, tt.startKey)
+			}
+		})
+	}
+}

+ 39 - 0
modules/collector-source/pkg/collector/config.go

@@ -0,0 +1,39 @@
+package collector
+
+import (
+	"time"
+
+	"github.com/opencost/opencost/modules/collector-source/pkg/env"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+)
+
+type CollectorConfig struct {
+	Resolutions    []util.ResolutionConfiguration `json:"resolutions"`
+	ScrapeInterval time.Duration                  `json:"scrape_interval"`
+	ClusterID      string                         `json:"cluster_id"`
+	ReleaseName    string                         `json:"release_name"`
+	NetworkPort    int                            `json:"network_port"`
+}
+
+func NewOpenCostCollectorConfigFromEnv() CollectorConfig {
+	return CollectorConfig{
+		Resolutions: []util.ResolutionConfiguration{
+			{
+				Interval:  "10m",
+				Retention: 2, // TODO UNDO env.GetCollector10mResolutionRetention(),
+			},
+			{
+				Interval:  "1h",
+				Retention: 1, // TODO UNDO env.GetCollector1hResolutionRetention(),
+			},
+			{
+				Interval:  "1d",
+				Retention: 1, // TODO UNDO env.GetCollection1dResolutionRetention(),
+			},
+		},
+		ScrapeInterval: time.Second * time.Duration(env.GetCollectorScrapeIntervalSeconds()),
+		ClusterID:      env.GetClusterID(),
+		ReleaseName:    env.GetReleaseName(),
+		NetworkPort:    env.GetNetworkPort(),
+	}
+}

+ 108 - 0
modules/collector-source/pkg/collector/datasource.go

@@ -0,0 +1,108 @@
+package collector
+
+import (
+	"time"
+
+	"github.com/julienschmidt/httprouter"
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/opencost/opencost/core/pkg/clusters"
+	"github.com/opencost/opencost/core/pkg/diagnostics"
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+	"k8s.io/client-go/kubernetes"
+)
+
+type collectorDataSource struct {
+	metricsQuerier *collectorMetricsQuerier
+	clusterMap     clusters.ClusterMap
+	clusterInfo    clusters.ClusterInfoProvider
+	config         CollectorConfig
+}
+
+func NewDefaultCollectorDataSource(
+	clusterInfoProvider clusters.ClusterInfoProvider,
+	clusterCache clustercache.ClusterCache,
+	k8s kubernetes.Interface,
+	statSummaryClient util.StatSummaryClient,
+) source.OpenCostDataSource {
+	config := NewOpenCostCollectorConfigFromEnv()
+	return NewCollectorDataSource(
+		config,
+		clusterInfoProvider,
+		clusterCache,
+		k8s,
+		statSummaryClient,
+	)
+}
+
+func NewCollectorDataSource(
+	config CollectorConfig,
+	clusterInfoProvider clusters.ClusterInfoProvider,
+	clusterCache clustercache.ClusterCache,
+	k8s kubernetes.Interface,
+	statSummaryClient util.StatSummaryClient,
+) source.OpenCostDataSource {
+
+	var storeFactory metric.MetricStoreFactory
+	storeFactory = NewOpenCostMetricStore
+
+	repo := metric.NewMetricRepository(metric.RepositoryConfig{
+		Resolutions: config.Resolutions,
+	}, storeFactory)
+
+	scrapeController := scrape.NewScrapeController(
+		config.ScrapeInterval,
+		config.ReleaseName,
+		config.NetworkPort,
+		repo,
+		clusterCache,
+		k8s,
+		statSummaryClient,
+	)
+	scrapeController.Start()
+
+	metricQuerier := newCollectorMetricsQuerier(repo, config.Resolutions)
+
+	// cluster info provider
+	clusterInfo := clusterInfoProvider
+
+	clusterMap := newCollectorClusterMap(clusterInfo)
+
+	return &collectorDataSource{
+		metricsQuerier: metricQuerier,
+		clusterInfo:    clusterInfo,
+		clusterMap:     clusterMap,
+	}
+}
+
+func (c *collectorDataSource) RegisterEndPoints(router *httprouter.Router) {
+	return
+}
+
+func (c *collectorDataSource) RegisterDiagnostics(diagService diagnostics.DiagnosticService) {
+	return
+}
+
+func (c *collectorDataSource) Metrics() source.MetricsQuerier {
+	return c.metricsQuerier
+}
+
+func (c *collectorDataSource) ClusterMap() clusters.ClusterMap {
+	return c.clusterMap
+}
+
+func (c *collectorDataSource) ClusterInfo() clusters.ClusterInfoProvider {
+	return c.clusterInfo
+}
+
+// BatchDuration collector data source queries do not need to be broken up
+func (c *collectorDataSource) BatchDuration() time.Duration {
+	var maxDuration time.Duration = 1<<63 - 1
+	return maxDuration
+}
+
+func (c *collectorDataSource) Resolution() time.Duration {
+	return c.config.ScrapeInterval
+}

+ 0 - 122
modules/collector-source/pkg/collector/metric.go

@@ -1,122 +0,0 @@
-package collector
-
-import (
-	"maps"
-	"time"
-
-	"github.com/opencost/opencost/core/pkg/source"
-	"github.com/opencost/opencost/core/pkg/util"
-)
-
-// MetricValue is a resulting data point value with an optional timestamp.
-type MetricValue struct {
-	Value     float64
-	Timestamp *time.Time
-}
-
-// MetricResult contains a resulting metric name, the associated labels and label values, and a slice of
-// MetricValues.
-type MetricResult struct {
-	Name         string
-	MetricLabels map[string]string
-	Values       []MetricValue
-}
-
-func (mr *MetricResult) ToQueryResult() *source.QueryResult {
-	metrics := map[string]any{}
-	for key, value := range mr.MetricLabels {
-		metrics[key] = value
-	}
-
-	values := make([]*util.Vector, len(mr.Values))
-	for i, value := range mr.Values {
-		timestamp := 0.0
-		if value.Timestamp != nil {
-			timestamp = float64(value.Timestamp.Unix())
-		}
-		values[i] = &util.Vector{
-			Timestamp: timestamp,
-			Value:     value.Value,
-		}
-	}
-
-	return source.NewQueryResult(metrics, values, nil)
-}
-
-// MetricAggregator is an interface that defines the methods for a metric collector aggregation.
-// For example, we have a metric `foo_metric`, and we wish to query and collect the average over time.
-// In this case, the `AverageOverTime` component is the MetricAggregator. It is the component responsible
-// for routing updates to metric values into their proper condensed form.
-type MetricAggregator interface {
-	Name() string
-	AdditionInfo() map[string]string
-	Update(value float64, timestamp *time.Time, additionalInfo map[string]string)
-	Value() []MetricValue
-	LabelValues() []string
-}
-
-// MetricAggregatorFactory is a function that accepts a string name and returns a pointer to a MetricAggregator
-// implementation.
-type MetricAggregatorFactory func(name string, labelValues []string) MetricAggregator
-
-// MetricCollector is a data structure that represents a specific metric collector instance that contains it's own breakdown
-// of stored metrics by a specific label set.
-type MetricCollector struct {
-	id                MetricCollectorID // ie: RAMUsageAverage
-	metricName        string            // ie: container_memory_working_set_bytes
-	labels            []string
-	aggregatorFactory MetricAggregatorFactory
-	metrics           map[uint64]MetricAggregator // map[hash(labelValues)] = aggregator
-	filter            func(map[string]string) bool
-}
-
-// NewMetricCollector creates a new MetricCollector instance with a unique identifier. The metric name is the specific
-// name of the collected metric that will be used to query the
-func NewMetricCollector(id MetricCollectorID, metricName string, labels []string, aggregatorFactory MetricAggregatorFactory, fn func(map[string]string) bool) *MetricCollector {
-	return &MetricCollector{
-		id:                id,
-		metricName:        metricName,
-		labels:            labels,
-		aggregatorFactory: aggregatorFactory,
-		metrics:           make(map[uint64]MetricAggregator),
-		filter:            fn,
-	}
-}
-
-func (mi *MetricCollector) Update(labels map[string]string, value float64, timestamp *time.Time, additionalInfo map[string]string) {
-	if mi.filter != nil && !mi.filter(labels) {
-		return
-	}
-
-	labelValues := make([]string, len(mi.labels))
-	for i, key := range mi.labels {
-		labelValues[i] = labels[key]
-	}
-	key := hash(labelValues)
-	if mi.metrics[key] == nil {
-		mi.metrics[key] = mi.aggregatorFactory(metricNameFor(mi.metricName, mi.labels, labelValues), labelValues)
-	}
-
-	mi.metrics[key].Update(value, timestamp, additionalInfo)
-}
-
-func (mi *MetricCollector) Get() []*MetricResult {
-	results := make([]*MetricResult, 0, len(mi.metrics))
-	for _, metric := range mi.metrics {
-		labels := toMap(mi.labels, metric.LabelValues())
-		maps.Copy(labels, metric.AdditionInfo())
-		mr := &MetricResult{
-			Name:         metric.Name(),
-			MetricLabels: labels,
-			Values:       metric.Value(),
-		}
-
-		results = append(results, mr)
-	}
-
-	return results
-}
-
-func (mi *MetricCollector) Labels() []string {
-	return mi.labels
-}

+ 0 - 1348
modules/collector-source/pkg/collector/metrics.go

@@ -1,1348 +0,0 @@
-package collector
-
-//	avg(
-//		avg_over_time(
-//			pv_hourly_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, persistentvolume, volumename, provider_id)
-
-func NewPVPricePerGiBHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVPricePerGiBHourID,
-		PVHourlyCost,
-		[]string{"persistentvolume", "volumename", "provider_id"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubelet_volume_stats_used_bytes{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, persistentvolumeclaim, namespace)
-
-func NewPVUsedAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVUsedAverageID,
-		KubeletVolumeStatsUsedBytes,
-		[]string{"persistentvolumeclaim", "namespace"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	max(
-//		max_over_time(
-//			kubelet_volume_stats_used_bytes{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, persistentvolumeclaim, namespace)
-
-func NewPVUsedMaxMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVUsedMaxID,
-		KubeletVolumeStatsUsedBytes,
-		[]string{"persistentvolumeclaim", "namespace"},
-		MaxOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		kube_persistentvolumeclaim_info{
-//			volumename != "",
-//			<some_custom_filter>
-//		}
-//	) by (persistentvolumeclaim, storageclass, volumename, namespace, cluster_id)[0:10m]
-
-func NewPVCInfoMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVCInfoID,
-		KubePersistenVolumeClaimInfo,
-		[]string{"persistentvolumeclaim", "storageclass", "volumename", "namespace"},
-		Info,
-		nil,
-	)
-}
-
-//	avg(
-//		kube_persistentvolume_capacity_bytes{
-//			<some_custom_filter>
-//		}
-//	) by (cluster_id, persistentvolume)[0:10m]
-
-func NewPVActiveMinutesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVActiveMinutesID,
-		KubePersistentVolumeCapacityBytes,
-		[]string{"persistentvolume"},
-		ActiveMinutes,
-		nil,
-	)
-}
-
-// todo revisit this
-//
-//	sum_over_time(
-//		sum(
-//			container_fs_limit_bytes{
-//				device=~"/dev/(nvme|sda).*",
-//				id="/",
-//				<some_custom_filter>
-//			}
-//		) by (instance, device, cluster_id)[%s:%dm]
-//	) / 1024 / 1024 / 1024 * %f * %f
-func NewLocalStorageCostMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LocalStorageCostID,
-		ContainerFSLimitBytes,
-		[]string{"instance", "device"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			// todo this filter needs a regex
-			return true
-		},
-	)
-}
-
-// sum_over_time(
-//
-//	sum(
-//		container_fs_usage_bytes{
-//			device=~"/dev/(nvme|sda).*",
-//			id="/",
-//			<some_custom_filter>
-//		}
-//	) by (instance, device, cluster_id)[%s:%dm]
-//
-// ) / 1024 / 1024 / 1024 * %f * %f`
-func NewLocalStorageUsedCostMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LocalStorageUsedCostID,
-		ContainerFSUsageBytes,
-		[]string{"instance", "device"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			// todo this filter needs a regex
-			return true
-		},
-	)
-}
-
-//	avg(
-//		sum(
-//			avg_over_time(
-//				container_fs_usage_bytes{
-//					device=~"/dev/(nvme|sda).*",
-//					id="/",
-//					<some_custom_filter>
-//				}[1h]
-//			)
-//		) by (instance, device, cluster_id, job)
-//	) by (instance, device, cluster_id)
-
-func NewLocalStorageUsedAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LocalStorageUsedAverageID,
-		ContainerFSUsageBytes,
-		[]string{"instance", "device"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			// todo this filter needs a regex
-			return true
-		},
-	)
-}
-
-// max(
-//
-//	sum(
-//		max_over_time(
-//			container_fs_usage_bytes{
-//				device=~"/dev/(nvme|sda).*",
-//				id="/",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (instance, device, cluster_id, job)
-//
-// ) by (instance, device, cluster_id)
-func NewLocalStorageUsedMaxMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LocalStorageUsedMaxID,
-		ContainerFSUsageBytes,
-		[]string{"instance", "device"},
-		MaxOverTime,
-		func(labels map[string]string) bool {
-			// todo this filter needs a regex
-			return true
-		},
-	)
-}
-
-// avg_over_time(
-//
-//	sum(
-//		container_fs_limit_bytes{
-//			device=~"/dev/(nvme|sda).*",
-//			id="/",
-//			<some_custom_filter>
-//		}
-//	) by (instance, device, cluster_id)[%s:%dm]
-//
-// )
-func NewLocalStorageBytesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LocalStorageBytesID,
-		ContainerFSLimitBytes,
-		[]string{"instance", "device"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			// todo this filter needs a regex
-			return true
-		},
-	)
-}
-
-// count(
-//
-//	node_total_hourly_cost{
-//		<some_custom_filter>
-//	}
-//
-// ) by (cluster_id, node, instance, provider_id)[%s:%dm]
-func NewLocalStorageActiveMinutesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LocalStorageActiveMinutesID,
-		NodeTotalHourlyCost,
-		[]string{"node", "instance", "provider_id"},
-		ActiveMinutes,
-		nil,
-	)
-}
-
-// avg(
-//
-//	avg_over_time(
-//		kube_node_status_capacity_cpu_cores{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-//
-// ) by (cluster_id, node)
-func NewNodeCPUCoresCapacityMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeCPUCoresCapacityID,
-		KubeNodeStatusCapacityCPUCores,
-		[]string{"node"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_node_status_allocatable_cpu_cores{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, node)
-
-func NewNodeCPUCoresAllocatableMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeCPUCoresAllocatableID,
-		KubeNodeStatusAllocatableCPUCores,
-		[]string{"node"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_node_status_capacity_memory_bytes{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, node)
-
-func NewNodeRAMBytesCapacityMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeRAMBytesCapacityID,
-		KubeNodeStatusCapacityMemoryBytes,
-		[]string{"node"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_node_status_allocatable_memory_bytes{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, node)
-
-func NewNodeRAMBytesAllocatableMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeRAMBytesAllocatableID,
-		KubeNodeStatusAllocatableMemoryBytes,
-		[]string{"node"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			node_gpu_count{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, node, provider_id)
-
-func NewNodeGPUCountMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeGPUCountID,
-		NodeGPUCount,
-		[]string{"node", "provider_id"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		kube_node_labels{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewNodeLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeLabelsID,
-		KubeNodeLabels,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg(
-//		node_total_hourly_cost{
-//			<some_custom_filter>
-//		}
-//	) by (node, cluster_id, provider_id)[%s:%dm]
-
-func NewNodeActiveMinutesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeActiveMinutesID,
-		NodeTotalHourlyCost,
-		[]string{"node", "provider_id"},
-		ActiveMinutes,
-		nil,
-	)
-}
-
-//	sum(
-//		rate(
-//			node_cpu_seconds_total{
-//				<some_custom_filter>
-//			}[%s:%dm]
-//		)
-//	) by (kubernetes_node, cluster_id, mode)
-
-func NewNodeCPUModeTotalMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeCPUModeTotalID,
-		NodeCPUSecondsTotal,
-		[]string{"kubernetes_node", "mode"},
-		Increase,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			container_memory_working_set_bytes{
-//				container_name!="POD",
-//				container_name!="",
-//				namespace="kube-system",
-//				<some_custom_filter>
-//			}[%s:%dm]
-//		)
-//	) by (instance, cluster_id)
-
-func NewNodeRAMSystemUsageAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeRAMSystemUsageAverageID,
-		ContainerMemoryWorkingSetBytes,
-		[]string{"instance"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container_name"] != "POD" && labels["container_name"] != "" && labels["namespace"] == "kube-system"
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			container_memory_working_set_bytes{
-//				container_name!="POD",
-//				container_name!="",
-//				namespace!="kube-system",
-//				<some_custom_filter>
-//			}[%s:%dm]
-//		)
-//	) by (instance, cluster_id)
-
-func NewNodeRAMUserUsageAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeRAMUserUsageAverageID,
-		ContainerMemoryWorkingSetBytes,
-		[]string{"instance"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container_name"] != "POD" && labels["container_name"] != "" && labels["namespace"] != "kube-system"
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubecost_load_balancer_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (namespace, service_name, ingress_ip, cluster_id)
-
-func NewLBPricePerHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LBPricePerHourID,
-		KubecostLoadBalancerCost,
-		[]string{"namespace", "service_name", "ingress_ip"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		kubecost_load_balancer_cost{
-//			<some_custom_filter>
-//		}
-//	) by (namespace, service_name, cluster_id, ingress_ip)[%s:%dm]
-
-func NewLBActiveMinutesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		LBActiveMinutesID,
-		KubecostLoadBalancerCost,
-		[]string{"namespace", "service_name", "ingress_ip"},
-		ActiveMinutes,
-		nil,
-	)
-}
-
-//	avg(
-//		kubecost_cluster_management_cost{
-//			<some_custom_filter>
-//		}
-//	) by (cluster_id, provisioner_name)[%s:%dm]
-
-func NewClusterManagementDurationMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		ClusterManagementDurationID,
-		KubecostClusterManagementCost,
-		[]string{"provisioner_name"},
-		ActiveMinutes,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubecost_cluster_management_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, provisioner_name)
-
-func NewClusterManagementPricePerHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		ClusterManagementPricePerHourID,
-		KubecostClusterManagementCost,
-		[]string{"provisioner_name"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		kube_pod_container_status_running{
-//			<some_custom_filter>
-//		} != 0
-//	) by (pod, namespace, uid, cluster_id)[%s:%s]
-
-func NewPodActiveMinutesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PodActiveMinutesID,
-		KubePodContainerStatusRunning,
-		[]string{"pod", "namespace", "uid"},
-		ActiveMinutes,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			container_memory_allocation_bytes{
-//				container!="",
-//				container!="POD",
-//				node!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, node, cluster_id, provider_id)
-
-func NewRAMBytesAllocatedMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		RAMBytesAllocatedID,
-		ContainerMemoryAllocationBytes,
-		[]string{"container", "pod", "uid", "namespace", "node", "provider_id"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != "POD" && labels["container"] != "" && labels["node"] != ""
-		},
-	)
-}
-
-// avg(
-//	avg_over_time(
-//		kube_pod_container_resource_requests{
-//			resource="memory",
-//			unit="byte",
-//			container!="",
-//			container!="POD",
-//			node!="",
-//			<some_custom_filter>
-//		}[1h]
-//	)
-//) by (container, pod, namespace, node, cluster_id)
-
-func NewRAMRequestsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		RAMRequestsID,
-		KubePodContainerResourceRequests,
-		[]string{"container", "pod", "uid", "namespace", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["resource"] == "memory" && labels["unit"] == "byte" && labels["container"] != "POD" && labels["container"] != "" && labels["node"] != ""
-		},
-	)
-}
-
-// avg(
-// 		avg_over_time(
-// 			container_memory_working_set_bytes{
-// 				container!="",
-// 				container!="POD",
-// 				<some_custom_filter>
-// 			}[1h]
-// 		)
-// ) by (container, pod, namespace, instance, cluster_id)
-
-func NewRAMUsageAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		RAMUsageAverageID,
-		ContainerMemoryWorkingSetBytes,
-		[]string{"container", "uid", "pod", "namespace", "instance", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != "POD" && labels["container"] != ""
-		},
-	)
-}
-
-//	max(
-//		max_over_time(
-//			container_memory_working_set_bytes{
-//				container!="",
-//				container_name!="POD",
-//				container!="POD",
-//				<some_custom_filter>
-//			}[%s]
-//		)
-//	) by (container_name, container, pod_name, pod, namespace, node, instance, %s)
-
-func NewRAMUsageMaxMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		RAMUsageMaxID,
-		ContainerMemoryWorkingSetBytes,
-		[]string{"container_name", "container", "uid", "pod", "namespace", "instance", "node"},
-		MaxOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != "" && labels["container_name"] != "POD" && labels["container"] != "POD" && labels["node"] != ""
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			container_cpu_allocation{
-//				container!="",
-//				container!="POD",
-//				node!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, node, cluster_id)
-
-func NewCPUCoresAllocatedMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		CPUCoresAllocatedID,
-		ContainerCPUAllocation,
-		[]string{"container", "uid", "pod", "namespace", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != "POD" && labels["container"] != "" && labels["node"] != ""
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_pod_container_resource_requests{
-//				resource="cpu",
-//				unit="core",
-//				container!="",
-//				container!="POD",
-//				node!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, node, cluster_id)
-
-func NewCPURequestsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		CPURequestsID,
-		KubePodContainerResourceRequests,
-		[]string{"container", "uid", "pod", "namespace", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["resource"] == "cpu" && labels["unit"] == "core" && labels["container"] != "POD" && labels["container"] != "" && labels["node"] != ""
-		},
-	)
-}
-
-//	avg(
-//		rate(
-//			container_cpu_usage_seconds_total{
-//				container!="",
-//				container_name!="POD",
-//				container!="POD",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container_name, container, pod_name, pod, namespace, node, instance, cluster_id)
-
-func NewCPUUsageAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		CPUUsageAverageID,
-		ContainerCPUUsageSecondsTotal,
-		[]string{"container", "uid", "pod", "namespace", "node", "instance"},
-		Increase,
-		func(labels map[string]string) bool {
-			return labels["container"] != "" && labels["container_name"] != "POD" && labels["container"] != "POD"
-		},
-	)
-}
-
-// TODO this is a special case
-func NewCPUUsageMaxMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		CPUUsageMaxID,
-		ContainerCPUUsageSecondsTotal,
-		[]string{"container", "uid", "pod", "namespace", "node", "instance"},
-		MaxOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_pod_container_resource_requests{
-//				resource="nvidia_com_gpu",
-//				container!="",
-//				container!="POD",
-//				node!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, node, cluster_id)
-
-func NewGPUsRequestedMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		GPUsRequestedID,
-		KubePodContainerResourceRequests,
-		[]string{"container", "uid", "pod", "namespace", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["resource"] == "nvidia_com_gpu" && labels["container"] != "POD" && labels["container"] != "" && labels["node"] != ""
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			DCGM_FI_PROF_GR_ENGINE_ACTIVE{
-//				container!=""
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, cluster_id)
-
-func NewGPUsUsageAverageMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		GPUsUsageAverageID,
-		DCGMFIPROFGRENGINEACTIVE,
-		[]string{"container", "uid", "pod", "namespace"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != ""
-		},
-	)
-}
-
-//	max(
-//		max_over_time(
-//			DCGM_FI_PROF_GR_ENGINE_ACTIVE{
-//				container!=""
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, cluster_id)
-
-func NewGPUsUsageMaxMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		GPUsUsageMaxID,
-		DCGMFIPROFGRENGINEACTIVE,
-		[]string{"container", "uid", "pod", "namespace"},
-		MaxOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != ""
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			container_gpu_allocation{
-//				container!="",
-//				container!="POD",
-//				node!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, node, cluster_id)
-
-func NewGPUsAllocatedMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		GPUsAllocatedID,
-		ContainerGPUAllocation,
-		[]string{"container", "uid", "pod", "namespace", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != "" && labels["container"] != "POD" && labels["node"] != ""
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_pod_container_resource_requests{
-//				container!="",
-//				node != "",
-//				pod != "",
-//				container!= "",
-//				unit = "integer",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, node, resource) // TODO is this missing cluster
-
-func NewIsGPUSharedMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		IsGPUSharedID,
-		KubePodContainerResourceRequests,
-		[]string{"container", "uid", "pod", "namespace", "node"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != "" && labels["node"] != "" && labels["pod"] != "" && labels["unit"] == "integer"
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			DCGM_FI_DEV_DEC_UTIL{
-//				container!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (container, pod, namespace, device, modelName, UUID) // TODO is this missing cluster
-
-func NewGPUInfoMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		GPUInfoID,
-		DCGMFIDEVDECUTIL,
-		[]string{"container", "uid", "pod", "namespace", "device", "modelName", "uuid"},
-		AverageOverTime,
-		func(labels map[string]string) bool {
-			return labels["container"] != ""
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			node_cpu_hourly_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (node, cluster_id, instance_type, provider_id)
-
-func NewNodeCPUPricePerHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeCPUPricePerHourID,
-		NodeCPUHourlyCost,
-		[]string{"node", "instance_type", "provider_id"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			node_ram_hourly_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (node, cluster_id, instance_type, provider_id)
-
-func NewNodeRAMPricePerGiBHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeRAMPricePerGiBHourID,
-		NodeRAMHourlyCost,
-		[]string{"node", "instance_type", "provider_id"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			node_gpu_hourly_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (node, cluster_id, instance_type, provider_id)
-
-func NewNodeGPUPricePerHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeGPUPricePerHourID,
-		NodeGPUHourlyCost,
-		[]string{"node", "instance_type", "provider_id"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		kubecost_node_is_spot{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewNodeIsSpotMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NodeIsSpotID,
-		KubecostNodeIsSpot,
-		[]string{"node"}, // Todo are these the correct labels
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			pod_pvc_allocation{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (persistentvolume, persistentvolumeclaim, pod, namespace, cluster_id)
-
-func NewPodPVCAllocationMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PodPVCAllocationID,
-		PodPVCAllocation,
-		[]string{"persistentvolume", "persistentvolumeclaim", "pod", "namespace"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_persistentvolumeclaim_resource_requests_storage_bytes{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (persistentvolumeclaim, namespace, cluster_id)
-
-func NewPVCBytesRequestedMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVCBytesRequestedID,
-		KubePersistentVolumeClaimResourceRequestsStorageBytes,
-		[]string{"persistentvolumeclaim", "namespace"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kube_persistentvolume_capacity_bytes{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (persistentvolume, cluster_id)
-
-func NewPVBytesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVBytesID,
-		KubePersistentVolumeCapacityBytes,
-		[]string{"persistentvolume"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			pv_hourly_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (volumename, cluster_id)
-
-func NewPVCostPerGiBHourMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVCostPerGiBHourID,
-		PVHourlyCost,
-		[]string{"volumename"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubecost_pv_info{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id, storageclass, persistentvolume, provider_id)
-
-func NewPVInfoMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PVInfoID,
-		KubecostPVInfo,
-		[]string{"storageclass", "persistentvolume", "provider_id"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	sum(
-//		increase(
-//			kubecost_pod_network_egress_bytes_total{
-//				internet="false",
-//				same_zone="false",
-//				same_region="true",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
-
-func NewNetZoneGiBMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetZoneGiBID,
-		KubecostPodNetworkEgressBytesTotal,
-		[]string{"pod_name", "namespace"},
-		Increase,
-		func(labels map[string]string) bool {
-			return labels["internet"] == "false" && labels["same_zone"] == "false" && labels["same_region"] == "true"
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubecost_network_zone_egress_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id)
-
-func NewNetZonePricePerGiBMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetZonePricePerGiBID,
-		KubecostNetworkZoneEgressCost,
-		[]string{"cluster"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	sum(
-//		increase(
-//			kubecost_pod_network_egress_bytes_total{
-//				internet="false",
-//				same_zone="false",
-//				same_region="false",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
-
-func NewNetRegionGiBMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetRegionGiBID,
-		KubecostPodNetworkEgressBytesTotal,
-		[]string{"pod_name", "namespace"},
-		Increase,
-		func(labels map[string]string) bool {
-			return labels["internet"] == "false" && labels["same_zone"] == "false" && labels["same_region"] == "false"
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubecost_network_region_egress_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id)
-
-func NewNetRegionPricePerGiBMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetRegionPricePerGiBID,
-		KubecostNetworkRegionEgressCost,
-		[]string{"cluster"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	sum(
-//		increase(
-//			kubecost_pod_network_egress_bytes_total{
-//				internet="true",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024
-
-func NewNetInternetGiBMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetInternetGiBID,
-		KubecostPodNetworkEgressBytesTotal,
-		[]string{"pod_name", "namespace"},
-		Increase,
-		func(labels map[string]string) bool {
-			return labels["internet"] == "true"
-		},
-	)
-}
-
-//	avg(
-//		avg_over_time(
-//			kubecost_network_internet_egress_cost{
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (cluster_id)
-
-func NewNetInternetPricePerGiBMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetInternetPricePerGiBID,
-		KubecostNetworkInternetEgressCost,
-		[]string{"cluster"},
-		AverageOverTime,
-		nil,
-	)
-}
-
-//	sum(
-//		increase(
-//			container_network_receive_bytes_total{
-//				pod!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod_name, pod, namespace, cluster_id)
-
-func NewNetReceiveBytesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetReceiveBytesID,
-		ContainerNetworkReceiveBytesTotal,
-		[]string{"pod_name", "pod", "namespace"},
-		Increase,
-		func(labels map[string]string) bool {
-			return labels["pod"] != ""
-		},
-	)
-}
-
-//	sum(
-//		increase(
-//			container_network_transmit_bytes_total{
-//				pod!="",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod_name, pod, namespace, cluster_id)
-
-func NewNetTransferBytesMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NetTransferBytesID,
-		ContainerNetworkTransmitBytesTotal,
-		[]string{"pod_name", "pod", "namespace"},
-		Increase,
-		func(labels map[string]string) bool {
-			return labels["pod"] != ""
-		},
-	)
-}
-
-//	avg_over_time(
-//		kube_namespace_labels{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewNamespaceLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NamespaceLabelsID,
-		KubeNamespaceLabels,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		kube_namespace_annotations{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewNamespaceAnnotationsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		NamespaceAnnotationsID,
-		KubeNamespaceAnnotations,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		kube_pod_labels{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewPodLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PodLabelsID,
-		KubePodLabels,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		kube_pod_annotations{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewPodAnnotationsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PodAnnotationsID,
-		KubePodAnnotations,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		service_selector_labels{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewServiceLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		ServiceLabelsID,
-		ServiceSelectorLabels,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		deployment_match_labels{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewDeploymentLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		DeploymentLabelsID,
-		DeploymentMatchLabels,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	avg_over_time(
-//		statefulSet_match_labels{
-//			<some_custom_filter>
-//		}[1h]
-//	)
-
-func NewStatefulSetLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		StatefulSetLabelsID,
-		StatefulSetMatchLabels,
-		[]string{},
-		Info,
-		nil,
-	)
-}
-
-//	sum(
-//		avg_over_time(
-//			kube_pod_owner{
-//				owner_kind="DaemonSet",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod, owner_name, namespace, cluster_id)
-
-func NewDaemonSetLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		DaemonSetLabelsID,
-		KubePodOwner,
-		[]string{"pod", "owner_name", "namespace"},
-		Info,
-		func(labels map[string]string) bool {
-			return labels["owner_kind"] == "DaemonSet"
-		},
-	)
-}
-
-//	sum(
-//		avg_over_time(
-//			kube_pod_owner{
-//				owner_kind="Job",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod, owner_name, namespace, cluster_id)
-
-func NewJobLabelsMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		JobLabelsID,
-		KubePodOwner,
-		[]string{"pod", "owner_name", "namespace"},
-		Info,
-		func(labels map[string]string) bool {
-			return labels["owner_kind"] == "Job"
-		},
-	)
-}
-
-//	sum(
-//		avg_over_time(
-//			kube_pod_owner{
-//				owner_kind="ReplicaSet",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (pod, owner_name, namespace, cluster_id)
-
-func NewPodsWithReplicaSetOwnerMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		PodsWithReplicaSetOwnerID,
-		KubePodOwner,
-		[]string{"pod", "owner_name", "namespace"},
-		Info,
-		func(labels map[string]string) bool {
-			return labels["owner_kind"] == "ReplicaSet"
-		},
-	)
-}
-
-//	sum(
-//		avg_over_time(
-//			kube_replicaset_owner{
-//				owner_kind="<none>",
-//				owner_name="<none>",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (replicaset, namespace, cluster_id)
-
-func NewReplicaSetsWithoutOwnersMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		ReplicaSetsWithoutOwnersID,
-		KubeReplicasetOwner,
-		[]string{"replicaset", "namespace"},
-		Info,
-		func(labels map[string]string) bool {
-			return labels["owner_kind"] == "<none>" && labels["owner_name"] == "<none>"
-		},
-	)
-}
-
-//	sum(
-//		avg_over_time(
-//			kube_replicaset_owner{
-//				owner_kind="Rollout",
-//				<some_custom_filter>
-//			}[1h]
-//		)
-//	) by (replicaset, namespace, owner_kind, owner_name, cluster_id)
-
-func NewReplicaSetsWithRolloutMetricCollector() *MetricCollector {
-	return NewMetricCollector(
-		ReplicaSetsWithRolloutID,
-		KubeReplicasetOwner,
-		[]string{"replicaset", "namespace", "owner_kind", "owner_name"},
-		Info,
-		func(labels map[string]string) bool {
-			return labels["owner_kind"] == "Rollout"
-		},
-	)
-}

+ 178 - 238
modules/collector-source/pkg/collector/metricsquerier.go

@@ -4,397 +4,337 @@ import (
 	"time"
 
 	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
 )
 
-type CollectorProvider interface {
-	GetCollector(start, end time.Time) MetricsCollector
+type collectorMetricsQuerier struct {
+	collectorProvider StoreProvider
 }
-type CollectorMetricsQuerier struct {
-	collectorProvider CollectorProvider
+
+func newCollectorMetricsQuerier(repo *metric.MetricRepository, resoluationConfigs []util.ResolutionConfiguration) *collectorMetricsQuerier {
+	return &collectorMetricsQuerier{
+		collectorProvider: newRepoStoreProvider(repo, resoluationConfigs),
+	}
 }
 
-func (c CollectorMetricsQuerier) QueryPVActiveMinutes(start, end time.Time) *source.Future[source.PVActiveMinutesResult] {
-	//TODO implement me
-	panic("implement me")
+func queryCollector[T any](c *collectorMetricsQuerier, start, end time.Time, id metric.MetricCollectorID, decoder source.ResultDecoder[T]) *source.Future[T] {
+	queryResults := source.NewQueryResults(string(id))
+	collector := c.collectorProvider.GetStore(start, end)
+	if collector != nil {
+		results, err := collector.Query(id)
+		queryResults.Error = err
+		for _, result := range results {
+			queryResults.Results = append(queryResults.Results, result.ToQueryResult())
+		}
+	}
+	ch := make(source.QueryResultsChan, 1)
+	ch <- queryResults
+	f := source.NewFuture[T](decoder, ch)
+	return f
+
 }
 
-func (c CollectorMetricsQuerier) QueryPVUsedAverage(start, end time.Time) *source.Future[source.PVUsedAvgResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVActiveMinutes(start, end time.Time) *source.Future[source.PVActiveMinutesResult] {
+	return queryCollector(c, start, end, metric.PVActiveMinutesID, source.DecodePVActiveMinutesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPVUsedMax(start, end time.Time) *source.Future[source.PVUsedMaxResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVUsedAverage(start, end time.Time) *source.Future[source.PVUsedAvgResult] {
+	return queryCollector(c, start, end, metric.PVUsedAverageID, source.DecodePVUsedAvgResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLocalStorageActiveMinutes(start, end time.Time) *source.Future[source.LocalStorageActiveMinutesResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVUsedMax(start, end time.Time) *source.Future[source.PVUsedMaxResult] {
+	return queryCollector(c, start, end, metric.PVUsedMaxID, source.DecodePVUsedMaxResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLocalStorageCost(start, end time.Time) *source.Future[source.LocalStorageCostResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLocalStorageActiveMinutes(start, end time.Time) *source.Future[source.LocalStorageActiveMinutesResult] {
+	return queryCollector(c, start, end, metric.LocalStorageActiveMinutesID, source.DecodeLocalStorageActiveMinutesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLocalStorageUsedCost(start, end time.Time) *source.Future[source.LocalStorageUsedCostResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLocalStorageCost(start, end time.Time) *source.Future[source.LocalStorageCostResult] {
+	return queryCollector(c, start, end, metric.LocalStorageCostID, source.DecodeLocalStorageCostResult)
+
 }
 
-func (c CollectorMetricsQuerier) QueryLocalStorageUsedAvg(start, end time.Time) *source.Future[source.LocalStorageUsedAvgResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLocalStorageUsedCost(start, end time.Time) *source.Future[source.LocalStorageUsedCostResult] {
+	return queryCollector(c, start, end, metric.LocalStorageUsedCostID, source.DecodeLocalStorageUsedCostResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLocalStorageUsedMax(start, end time.Time) *source.Future[source.LocalStorageUsedMaxResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLocalStorageUsedAvg(start, end time.Time) *source.Future[source.LocalStorageUsedAvgResult] {
+	return queryCollector(c, start, end, metric.LocalStorageUsedAverageID, source.DecodeLocalStorageUsedAvgResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLocalStorageBytes(start, end time.Time) *source.Future[source.LocalStorageBytesResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLocalStorageUsedMax(start, end time.Time) *source.Future[source.LocalStorageUsedMaxResult] {
+	return queryCollector(c, start, end, metric.LocalStorageUsedMaxID, source.DecodeLocalStorageUsedMaxResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeActiveMinutes(start, end time.Time) *source.Future[source.NodeActiveMinutesResult] {
-	collector := c.collectorProvider.GetCollector(start, end)
-	results, err := collector.Query(NodeActiveMinutesID)
-	queryResults := source.NewQueryResults(string(NodeActiveMinutesID))
-	queryResults.Error = err
-	for _, result := range results {
-		queryResults.Results = append(queryResults.Results, result.ToQueryResult())
-	}
+func (c *collectorMetricsQuerier) QueryLocalStorageBytes(start, end time.Time) *source.Future[source.LocalStorageBytesResult] {
+	return queryCollector(c, start, end, metric.LocalStorageBytesID, source.DecodeLocalStorageBytesResult)
+}
 
-	ch := make(source.QueryResultsChan)
-	go func() {
-		ch <- queryResults
-	}()
-	return source.NewFuture[source.NodeActiveMinutesResult](source.DecodeNodeActiveMinutesResult, ch)
+func (c *collectorMetricsQuerier) QueryNodeActiveMinutes(start, end time.Time) *source.Future[source.NodeActiveMinutesResult] {
+	return queryCollector(c, start, end, metric.NodeActiveMinutesID, source.DecodeNodeActiveMinutesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeCPUCoresCapacity(start, end time.Time) *source.Future[source.NodeCPUCoresCapacityResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeCPUCoresCapacity(start, end time.Time) *source.Future[source.NodeCPUCoresCapacityResult] {
+	return queryCollector(c, start, end, metric.NodeCPUCoresCapacityID, source.DecodeNodeCPUCoresCapacityResult)
+
 }
 
-func (c CollectorMetricsQuerier) QueryNodeCPUCoresAllocatable(start, end time.Time) *source.Future[source.NodeCPUCoresAllocatableResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeCPUCoresAllocatable(start, end time.Time) *source.Future[source.NodeCPUCoresAllocatableResult] {
+	return queryCollector(c, start, end, metric.NodeCPUCoresAllocatableID, source.DecodeNodeCPUCoresAllocatableResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeRAMBytesCapacity(start, end time.Time) *source.Future[source.NodeRAMBytesCapacityResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeRAMBytesCapacity(start, end time.Time) *source.Future[source.NodeRAMBytesCapacityResult] {
+	return queryCollector(c, start, end, metric.NodeRAMBytesCapacityID, source.DecodeNodeRAMBytesCapacityResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeRAMBytesAllocatable(start, end time.Time) *source.Future[source.NodeRAMBytesAllocatableResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeRAMBytesAllocatable(start, end time.Time) *source.Future[source.NodeRAMBytesAllocatableResult] {
+	return queryCollector(c, start, end, metric.NodeRAMBytesAllocatableID, source.DecodeNodeRAMBytesAllocatableResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeGPUCount(start, end time.Time) *source.Future[source.NodeGPUCountResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeGPUCount(start, end time.Time) *source.Future[source.NodeGPUCountResult] {
+	return queryCollector(c, start, end, metric.NodeGPUCountID, source.DecodeNodeGPUCountResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeCPUModeTotal(start, end time.Time) *source.Future[source.NodeCPUModeTotalResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeCPUModeTotal(start, end time.Time) *source.Future[source.NodeCPUModeTotalResult] {
+	return queryCollector(c, start, end, metric.NodeCPUModeTotalID, source.DecodeNodeCPUModeTotalResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeIsSpot(start, end time.Time) *source.Future[source.NodeIsSpotResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeIsSpot(start, end time.Time) *source.Future[source.NodeIsSpotResult] {
+	return queryCollector(c, start, end, metric.NodeIsSpotID, source.DecodeNodeIsSpotResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeRAMSystemPercent(start, end time.Time) *source.Future[source.NodeRAMSystemPercentResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeRAMSystemPercent(start, end time.Time) *source.Future[source.NodeRAMSystemPercentResult] {
+	return queryCollector(c, start, end, metric.NodeRAMSystemUsageAverageID, source.DecodeNodeRAMSystemPercentResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeRAMUserPercent(start, end time.Time) *source.Future[source.NodeRAMUserPercentResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeRAMUserPercent(start, end time.Time) *source.Future[source.NodeRAMUserPercentResult] {
+	return queryCollector(c, start, end, metric.NodeRAMUserUsageAverageID, source.DecodeNodeRAMUserPercentResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLBActiveMinutes(start, end time.Time) *source.Future[source.LBActiveMinutesResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLBActiveMinutes(start, end time.Time) *source.Future[source.LBActiveMinutesResult] {
+	return queryCollector(c, start, end, metric.LBActiveMinutesID, source.DecodeLBActiveMinutesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryLBPricePerHr(start, end time.Time) *source.Future[source.LBPricePerHrResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryLBPricePerHr(start, end time.Time) *source.Future[source.LBPricePerHrResult] {
+	return queryCollector(c, start, end, metric.LBPricePerHourID, source.DecodeLBPricePerHrResult)
 }
 
-func (c CollectorMetricsQuerier) QueryClusterManagementDuration(start, end time.Time) *source.Future[source.ClusterManagementDurationResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryClusterManagementDuration(start, end time.Time) *source.Future[source.ClusterManagementDurationResult] {
+	return queryCollector(c, start, end, metric.ClusterManagementDurationID, source.DecodeClusterManagementDurationResult)
 }
 
-func (c CollectorMetricsQuerier) QueryClusterManagementPricePerHr(start, end time.Time) *source.Future[source.ClusterManagementPricePerHrResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryClusterManagementPricePerHr(start, end time.Time) *source.Future[source.ClusterManagementPricePerHrResult] {
+	return queryCollector(c, start, end, metric.ClusterManagementPricePerHourID, source.DecodeClusterManagementPricePerHrResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPods(start, end time.Time) *source.Future[source.PodsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPods(start, end time.Time) *source.Future[source.PodsResult] {
+	return queryCollector(c, start, end, metric.PodActiveMinutesID, source.DecodePodsResult)
+
 }
 
-func (c CollectorMetricsQuerier) QueryPodsUID(start, end time.Time) *source.Future[source.PodsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPodsUID(start, end time.Time) *source.Future[source.PodsResult] {
+	return queryCollector(c, start, end, metric.PodActiveMinutesID, source.DecodePodsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryRAMBytesAllocated(start, end time.Time) *source.Future[source.RAMBytesAllocatedResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryRAMBytesAllocated(start, end time.Time) *source.Future[source.RAMBytesAllocatedResult] {
+	return queryCollector(c, start, end, metric.RAMBytesAllocatedID, source.DecodeRAMBytesAllocatedResult)
 }
 
-func (c CollectorMetricsQuerier) QueryRAMRequests(start, end time.Time) *source.Future[source.RAMRequestsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryRAMRequests(start, end time.Time) *source.Future[source.RAMRequestsResult] {
+	return queryCollector(c, start, end, metric.RAMRequestsID, source.DecodeRAMRequestsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryRAMUsageAvg(start, end time.Time) *source.Future[source.RAMUsageAvgResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryRAMUsageAvg(start, end time.Time) *source.Future[source.RAMUsageAvgResult] {
+	return queryCollector(c, start, end, metric.RAMUsageAverageID, source.DecodeRAMUsageAvgResult)
 }
 
-func (c CollectorMetricsQuerier) QueryRAMUsageMax(start, end time.Time) *source.Future[source.RAMUsageMaxResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryRAMUsageMax(start, end time.Time) *source.Future[source.RAMUsageMaxResult] {
+	return queryCollector(c, start, end, metric.RAMUsageMaxID, source.DecodeRAMUsageMaxResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeRAMPricePerGiBHr(start, end time.Time) *source.Future[source.NodeRAMPricePerGiBHrResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeRAMPricePerGiBHr(start, end time.Time) *source.Future[source.NodeRAMPricePerGiBHrResult] {
+	return queryCollector(c, start, end, metric.NodeRAMPricePerGiBHourID, source.DecodeNodeRAMPricePerGiBHrResult)
 }
 
-func (c CollectorMetricsQuerier) QueryCPUCoresAllocated(start, end time.Time) *source.Future[source.CPUCoresAllocatedResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryCPUCoresAllocated(start, end time.Time) *source.Future[source.CPUCoresAllocatedResult] {
+	return queryCollector(c, start, end, metric.CPUCoresAllocatedID, source.DecodeCPUCoresAllocatedResult)
 }
 
-func (c CollectorMetricsQuerier) QueryCPURequests(start, end time.Time) *source.Future[source.CPURequestsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryCPURequests(start, end time.Time) *source.Future[source.CPURequestsResult] {
+	return queryCollector(c, start, end, metric.CPURequestsID, source.DecodeCPURequestsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryCPUUsageAvg(start, end time.Time) *source.Future[source.CPUUsageAvgResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryCPUUsageAvg(start, end time.Time) *source.Future[source.CPUUsageAvgResult] {
+	return queryCollector(c, start, end, metric.CPUUsageAverageID, source.DecodeCPUUsageAvgResult)
 }
 
-func (c CollectorMetricsQuerier) QueryCPUUsageMax(start, end time.Time) *source.Future[source.CPUUsageMaxResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryCPUUsageMax(start, end time.Time) *source.Future[source.CPUUsageMaxResult] {
+	return queryCollector(c, start, end, metric.CPUUsageMaxID, source.DecodeCPUUsageMaxResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeCPUPricePerHr(start, end time.Time) *source.Future[source.NodeCPUPricePerHrResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeCPUPricePerHr(start, end time.Time) *source.Future[source.NodeCPUPricePerHrResult] {
+	return queryCollector(c, start, end, metric.NodeCPUPricePerHourID, source.DecodeNodeCPUPricePerHrResult)
 }
 
-func (c CollectorMetricsQuerier) QueryGPUsAllocated(start, end time.Time) *source.Future[source.GPUsAllocatedResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryGPUsAllocated(start, end time.Time) *source.Future[source.GPUsAllocatedResult] {
+	return queryCollector(c, start, end, metric.GPUsAllocatedID, source.DecodeGPUsAllocatedResult)
 }
 
-func (c CollectorMetricsQuerier) QueryGPUsRequested(start, end time.Time) *source.Future[source.GPUsRequestedResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryGPUsRequested(start, end time.Time) *source.Future[source.GPUsRequestedResult] {
+	return queryCollector(c, start, end, metric.GPUsRequestedID, source.DecodeGPUsRequestedResult)
 }
 
-func (c CollectorMetricsQuerier) QueryGPUsUsageAvg(start, end time.Time) *source.Future[source.GPUsUsageAvgResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryGPUsUsageAvg(start, end time.Time) *source.Future[source.GPUsUsageAvgResult] {
+	return queryCollector(c, start, end, metric.GPUsUsageAverageID, source.DecodeGPUsUsageAvgResult)
 }
 
-func (c CollectorMetricsQuerier) QueryGPUsUsageMax(start, end time.Time) *source.Future[source.GPUsUsageMaxResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryGPUsUsageMax(start, end time.Time) *source.Future[source.GPUsUsageMaxResult] {
+	return queryCollector(c, start, end, metric.GPUsUsageMaxID, source.DecodeGPUsUsageMaxResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeGPUPricePerHr(start, end time.Time) *source.Future[source.NodeGPUPricePerHrResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeGPUPricePerHr(start, end time.Time) *source.Future[source.NodeGPUPricePerHrResult] {
+	return queryCollector(c, start, end, metric.NodeGPUPricePerHourID, source.DecodeNodeGPUPricePerHrResult)
 }
 
-func (c CollectorMetricsQuerier) QueryGPUInfo(start, end time.Time) *source.Future[source.GPUInfoResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryGPUInfo(start, end time.Time) *source.Future[source.GPUInfoResult] {
+	return queryCollector(c, start, end, metric.GPUInfoID, source.DecodeGPUInfoResult)
 }
 
-func (c CollectorMetricsQuerier) QueryIsGPUShared(start, end time.Time) *source.Future[source.IsGPUSharedResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryIsGPUShared(start, end time.Time) *source.Future[source.IsGPUSharedResult] {
+	return queryCollector(c, start, end, metric.IsGPUSharedID, source.DecodeIsGPUSharedResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPodPVCAllocation(start, end time.Time) *source.Future[source.PodPVCAllocationResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPodPVCAllocation(start, end time.Time) *source.Future[source.PodPVCAllocationResult] {
+	return queryCollector(c, start, end, metric.PodPVCAllocationID, source.DecodePodPVCAllocationResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPVCBytesRequested(start, end time.Time) *source.Future[source.PVCBytesRequestedResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVCBytesRequested(start, end time.Time) *source.Future[source.PVCBytesRequestedResult] {
+	return queryCollector(c, start, end, metric.PVCBytesRequestedID, source.DecodePVCBytesRequestedResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPVCInfo(start, end time.Time) *source.Future[source.PVCInfoResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVCInfo(start, end time.Time) *source.Future[source.PVCInfoResult] {
+	return queryCollector(c, start, end, metric.PVCInfoID, source.DecodePVCInfoResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPVBytes(start, end time.Time) *source.Future[source.PVBytesResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVBytes(start, end time.Time) *source.Future[source.PVBytesResult] {
+	return queryCollector(c, start, end, metric.PVBytesID, source.DecodePVBytesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPVPricePerGiBHour(start, end time.Time) *source.Future[source.PVPricePerGiBHourResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVPricePerGiBHour(start, end time.Time) *source.Future[source.PVPricePerGiBHourResult] {
+	return queryCollector(c, start, end, metric.PVPricePerGiBHourID, source.DecodePVPricePerGiBHourResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPVInfo(start, end time.Time) *source.Future[source.PVInfoResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPVInfo(start, end time.Time) *source.Future[source.PVInfoResult] {
+	return queryCollector(c, start, end, metric.PVInfoID, source.DecodePVInfoResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetZoneGiB(start, end time.Time) *source.Future[source.NetZoneGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetZoneGiB(start, end time.Time) *source.Future[source.NetZoneGiBResult] {
+	return queryCollector(c, start, end, metric.NetZoneGiBID, source.DecodeNetZoneGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetZonePricePerGiB(start, end time.Time) *source.Future[source.NetZonePricePerGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetZonePricePerGiB(start, end time.Time) *source.Future[source.NetZonePricePerGiBResult] {
+	return queryCollector(c, start, end, metric.NetZonePricePerGiBID, source.DecodeNetZonePricePerGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetRegionGiB(start, end time.Time) *source.Future[source.NetRegionGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetRegionGiB(start, end time.Time) *source.Future[source.NetRegionGiBResult] {
+	return queryCollector(c, start, end, metric.NetRegionGiBID, source.DecodeNetRegionGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetRegionPricePerGiB(start, end time.Time) *source.Future[source.NetRegionPricePerGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetRegionPricePerGiB(start, end time.Time) *source.Future[source.NetRegionPricePerGiBResult] {
+	return queryCollector(c, start, end, metric.NetRegionPricePerGiBID, source.DecodeNetRegionPricePerGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetInternetGiB(start, end time.Time) *source.Future[source.NetInternetGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetInternetGiB(start, end time.Time) *source.Future[source.NetInternetGiBResult] {
+	return queryCollector(c, start, end, metric.NetInternetGiBID, source.DecodeNetInternetGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetInternetPricePerGiB(start, end time.Time) *source.Future[source.NetInternetPricePerGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetInternetPricePerGiB(start, end time.Time) *source.Future[source.NetInternetPricePerGiBResult] {
+	return queryCollector(c, start, end, metric.NetInternetPricePerGiBID, source.DecodeNetInternetPricePerGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetInternetServiceGiB(start, end time.Time) *source.Future[source.NetInternetServiceGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetInternetServiceGiB(start, end time.Time) *source.Future[source.NetInternetServiceGiBResult] {
+	return queryCollector(c, start, end, metric.NetInternetServiceGiBID, source.DecodeNetInternetServiceGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetTransferBytes(start, end time.Time) *source.Future[source.NetTransferBytesResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetTransferBytes(start, end time.Time) *source.Future[source.NetTransferBytesResult] {
+	return queryCollector(c, start, end, metric.NetTransferBytesID, source.DecodeNetTransferBytesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetZoneIngressGiB(start, end time.Time) *source.Future[source.NetZoneIngressGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetZoneIngressGiB(start, end time.Time) *source.Future[source.NetZoneIngressGiBResult] {
+	return queryCollector(c, start, end, metric.NetZoneIngressGiBID, source.DecodeNetZoneIngressGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetRegionIngressGiB(start, end time.Time) *source.Future[source.NetRegionIngressGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetRegionIngressGiB(start, end time.Time) *source.Future[source.NetRegionIngressGiBResult] {
+	return queryCollector(c, start, end, metric.NetRegionIngressGiBID, source.DecodeNetRegionIngressGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetInternetIngressGiB(start, end time.Time) *source.Future[source.NetInternetIngressGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetInternetIngressGiB(start, end time.Time) *source.Future[source.NetInternetIngressGiBResult] {
+	return queryCollector(c, start, end, metric.NetInternetIngressGiBID, source.DecodeNetInternetIngressGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetInternetServiceIngressGiB(start, end time.Time) *source.Future[source.NetInternetServiceIngressGiBResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetInternetServiceIngressGiB(start, end time.Time) *source.Future[source.NetInternetServiceIngressGiBResult] {
+	return queryCollector(c, start, end, metric.NetInternetServiceIngressGiBID, source.DecodeNetInternetServiceIngressGiBResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNetReceiveBytes(start, end time.Time) *source.Future[source.NetReceiveBytesResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNetReceiveBytes(start, end time.Time) *source.Future[source.NetReceiveBytesResult] {
+	return queryCollector(c, start, end, metric.NetReceiveBytesID, source.DecodeNetReceiveBytesResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNamespaceAnnotations(start, end time.Time) *source.Future[source.NamespaceAnnotationsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNamespaceAnnotations(start, end time.Time) *source.Future[source.NamespaceAnnotationsResult] {
+	return queryCollector(c, start, end, metric.NamespaceAnnotationsID, source.DecodeNamespaceAnnotationsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPodAnnotations(start, end time.Time) *source.Future[source.PodAnnotationsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPodAnnotations(start, end time.Time) *source.Future[source.PodAnnotationsResult] {
+	return queryCollector(c, start, end, metric.PodAnnotationsID, source.DecodePodAnnotationsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNodeLabels(start, end time.Time) *source.Future[source.NodeLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNodeLabels(start, end time.Time) *source.Future[source.NodeLabelsResult] {
+	return queryCollector(c, start, end, metric.NodeLabelsID, source.DecodeNodeLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryNamespaceLabels(start, end time.Time) *source.Future[source.NamespaceLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryNamespaceLabels(start, end time.Time) *source.Future[source.NamespaceLabelsResult] {
+	return queryCollector(c, start, end, metric.NamespaceLabelsID, source.DecodeNamespaceLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPodLabels(start, end time.Time) *source.Future[source.PodLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPodLabels(start, end time.Time) *source.Future[source.PodLabelsResult] {
+	return queryCollector(c, start, end, metric.PodLabelsID, source.DecodePodLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryServiceLabels(start, end time.Time) *source.Future[source.ServiceLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryServiceLabels(start, end time.Time) *source.Future[source.ServiceLabelsResult] {
+	return queryCollector(c, start, end, metric.ServiceLabelsID, source.DecodeServiceLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryDeploymentLabels(start, end time.Time) *source.Future[source.DeploymentLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryDeploymentLabels(start, end time.Time) *source.Future[source.DeploymentLabelsResult] {
+	return queryCollector(c, start, end, metric.DeploymentLabelsID, source.DecodeDeploymentLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryStatefulSetLabels(start, end time.Time) *source.Future[source.StatefulSetLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryStatefulSetLabels(start, end time.Time) *source.Future[source.StatefulSetLabelsResult] {
+	return queryCollector(c, start, end, metric.StatefulSetLabelsID, source.DecodeStatefulSetLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryDaemonSetLabels(start, end time.Time) *source.Future[source.DaemonSetLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryDaemonSetLabels(start, end time.Time) *source.Future[source.DaemonSetLabelsResult] {
+	return queryCollector(c, start, end, metric.DaemonSetLabelsID, source.DecodeDaemonSetLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryJobLabels(start, end time.Time) *source.Future[source.JobLabelsResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryJobLabels(start, end time.Time) *source.Future[source.JobLabelsResult] {
+	return queryCollector(c, start, end, metric.JobLabelsID, source.DecodeJobLabelsResult)
 }
 
-func (c CollectorMetricsQuerier) QueryPodsWithReplicaSetOwner(start, end time.Time) *source.Future[source.PodsWithReplicaSetOwnerResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryPodsWithReplicaSetOwner(start, end time.Time) *source.Future[source.PodsWithReplicaSetOwnerResult] {
+	return queryCollector(c, start, end, metric.PodsWithReplicaSetOwnerID, source.DecodePodsWithReplicaSetOwnerResult)
 }
 
-func (c CollectorMetricsQuerier) QueryReplicaSetsWithoutOwners(start, end time.Time) *source.Future[source.ReplicaSetsWithoutOwnersResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryReplicaSetsWithoutOwners(start, end time.Time) *source.Future[source.ReplicaSetsWithoutOwnersResult] {
+	return queryCollector(c, start, end, metric.ReplicaSetsWithoutOwnersID, source.DecodeReplicaSetsWithoutOwnersResult)
 }
 
-func (c CollectorMetricsQuerier) QueryReplicaSetsWithRollout(start, end time.Time) *source.Future[source.ReplicaSetsWithRolloutResult] {
-	//TODO implement me
-	panic("implement me")
+func (c *collectorMetricsQuerier) QueryReplicaSetsWithRollout(start, end time.Time) *source.Future[source.ReplicaSetsWithRolloutResult] {
+	return queryCollector(c, start, end, metric.ReplicaSetsWithRolloutID, source.DecodeReplicaSetsWithRolloutResult)
 }
 
-func (c CollectorMetricsQuerier) QueryDataCoverage(limitDays int) (time.Time, time.Time, error) {
-	//TODO implement me
+func (c *collectorMetricsQuerier) QueryDataCoverage(limitDays int) (time.Time, time.Time, error) {
+	// TODO immplement me
 	panic("implement me")
 }

+ 169 - 17
modules/collector-source/pkg/collector/metricsquerier_test.go

@@ -7,32 +7,57 @@ import (
 
 	"github.com/opencost/opencost/core/pkg/source"
 	"github.com/opencost/opencost/core/pkg/util"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape"
 )
 
-var start1Str = "2025-01-01T00:00:00Z00:00"
-var end1Str = "2025-01-01T00:01:00Z00:00"
+var Start1Str = "2025-01-01T00:00:00Z00:00"
+var End1Str = "2025-01-01T00:01:00Z00:00"
 
 type MockCollectorProvider struct {
-	metricsCollector MetricsCollector
+	metricsCollector metric.MetricStore
 }
 
-func (m *MockCollectorProvider) GetCollector(start, end time.Time) MetricsCollector {
+func (m *MockCollectorProvider) GetStore(start, end time.Time) metric.MetricStore {
 	return m.metricsCollector
 }
 
-func GetMockCollectorProvider() CollectorProvider {
-	collector := NewOpenCostMetricCollector()
+func GetMockCollectorProvider() StoreProvider {
+	collector := NewOpenCostMetricStore()
 
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-	end1, _ := time.Parse(time.RFC3339, end1Str)
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	end1, _ := time.Parse(time.RFC3339, End1Str)
 
 	node1Info := map[string]string{
 		"node":        "node1",
 		"provider_id": "node1",
 	}
 
-	collector.Update(NodeTotalHourlyCost, node1Info, 0, &start1, nil)
-	collector.Update(NodeTotalHourlyCost, node1Info, 0, &end1, nil)
+	cluster1Info := map[string]string{
+		"provisioner_name": "GKE",
+	}
+
+	gpu1Info := map[string]string{
+		"namespace":  "namespace1",
+		"pod":        "pod1",
+		"container":  "container1",
+		"gpu":        "0",
+		"UUID":       "GPU-1",
+		"pci_bus_id": "00000000:00:0A.0",
+		"device":     "nvidia0",
+		"modelName":  "Tesla T4",
+		"Hostname":   "localhost",
+	}
+
+	collector.Update(scrape.NodeTotalHourlyCost, node1Info, 0, &start1, nil)
+	collector.Update(scrape.NodeTotalHourlyCost, node1Info, 0, &end1, nil)
+
+	collector.Update(scrape.KubecostClusterManagementCost, cluster1Info, 0.1, &start1, nil)
+	collector.Update(scrape.KubecostClusterManagementCost, cluster1Info, 0.1, &end1, nil)
+
+	collector.Update(scrape.DCGMFIDEVDECUTIL, gpu1Info, 0, &start1, nil)
+	collector.Update(scrape.DCGMFIPROFGRENGINEACTIVE, gpu1Info, 0, &start1, nil)
+	collector.Update(scrape.DCGMFIPROFGRENGINEACTIVE, gpu1Info, 1, &end1, nil)
 
 	return &MockCollectorProvider{
 		metricsCollector: collector,
@@ -40,14 +65,14 @@ func GetMockCollectorProvider() CollectorProvider {
 }
 
 func TestCollectorMetricsQuerier_QueryNodeActiveMinutes(t *testing.T) {
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-	end1, _ := time.Parse(time.RFC3339, end1Str)
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	end1, _ := time.Parse(time.RFC3339, End1Str)
 
-	c := CollectorMetricsQuerier{
+	c := collectorMetricsQuerier{
 		collectorProvider: GetMockCollectorProvider(),
 	}
-	resChActiveMins := c.QueryNodeActiveMinutes(time.Now(), time.Now())
-	resActiveMins, err := resChActiveMins.Await()
+	resCh := c.QueryNodeActiveMinutes(time.Now(), time.Now())
+	res, err := resCh.Await()
 	if err != nil {
 		t.Errorf("unexpected error: %v", err.Error())
 	}
@@ -68,7 +93,134 @@ func TestCollectorMetricsQuerier_QueryNodeActiveMinutes(t *testing.T) {
 			},
 		},
 	}
-	if !reflect.DeepEqual(resActiveMins, expected) {
-		t.Errorf("QueryNodeActiveMinutes() = %v, want %v", resActiveMins, expected)
+	if !reflect.DeepEqual(res, expected) {
+		t.Errorf("QueryNodeActiveMinutes() = %v, want %v", res, expected)
+	}
+}
+
+func TestCollectorMetricsQuerier_QueryClusterManagementDuration(t *testing.T) {
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	end1, _ := time.Parse(time.RFC3339, End1Str)
+
+	c := collectorMetricsQuerier{
+		collectorProvider: GetMockCollectorProvider(),
+	}
+	resCh := c.QueryClusterManagementDuration(start1, end1)
+	res, err := resCh.Await()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err.Error())
+	}
+	expected := []*source.ClusterManagementDurationResult{
+		{
+			Cluster:     "",
+			Provisioner: "GKE",
+			Data: []*util.Vector{
+				{
+					Timestamp: float64(start1.Unix()),
+					Value:     1,
+				},
+				{
+					Timestamp: float64(end1.Unix()),
+					Value:     1,
+				},
+			},
+		},
+	}
+	if !reflect.DeepEqual(res, expected) {
+		t.Errorf("QueryNodeActiveMinutes() = %v, want %v", res, expected)
+	}
+
+}
+
+func TestCollectorMetricsQuerier_QueryGPUsUsageAvg(t *testing.T) {
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	end1, _ := time.Parse(time.RFC3339, End1Str)
+
+	c := collectorMetricsQuerier{
+		collectorProvider: GetMockCollectorProvider(),
+	}
+	resCh := c.QueryGPUsUsageAvg(start1, end1)
+	res, err := resCh.Await()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err.Error())
+	}
+	expected := []*source.GPUsUsageAvgResult{
+		{
+			Cluster:   "",
+			Namespace: "namespace1",
+			Pod:       "pod1",
+			Container: "container1",
+			Data: []*util.Vector{
+				{
+					Value: 0.5,
+				},
+			},
+		},
+	}
+	if !reflect.DeepEqual(res, expected) {
+		t.Errorf("QueryGPUsUsageAvg() = %v, want %v", res, expected)
+	}
+}
+
+func TestCollectorMetricsQuerier_QueryGPUsUsageMax(t *testing.T) {
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	end1, _ := time.Parse(time.RFC3339, End1Str)
+
+	c := collectorMetricsQuerier{
+		collectorProvider: GetMockCollectorProvider(),
+	}
+	resCh := c.QueryGPUsUsageMax(start1, end1)
+	res, err := resCh.Await()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err.Error())
+	}
+	expected := []*source.GPUsUsageMaxResult{
+		{
+			Cluster:   "",
+			Namespace: "namespace1",
+			Pod:       "pod1",
+			Container: "container1",
+			Data: []*util.Vector{
+				{
+					Value: 1.0,
+				},
+			},
+		},
+	}
+	if !reflect.DeepEqual(res, expected) {
+		t.Errorf("QueryGPUsUsageMax() = %v, want %v", res, expected)
+	}
+}
+
+func TestCollectorMetricsQuerier_QueryGPUInfo(t *testing.T) {
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	end1, _ := time.Parse(time.RFC3339, End1Str)
+
+	c := collectorMetricsQuerier{
+		collectorProvider: GetMockCollectorProvider(),
+	}
+	resCh := c.QueryGPUInfo(start1, end1)
+	res, err := resCh.Await()
+	if err != nil {
+		t.Errorf("unexpected error: %v", err.Error())
+	}
+	expected := []*source.GPUInfoResult{
+		{
+			Cluster:   "",
+			Namespace: "namespace1",
+			Pod:       "pod1",
+			Container: "container1",
+			Device:    "nvidia0",
+			ModelName: "Tesla T4",
+			UUID:      "GPU-1",
+			Data: []*util.Vector{
+				{
+					Value: 1,
+				},
+			},
+		},
+	}
+	if !reflect.DeepEqual(res, expected) {
+		t.Errorf("QueryGPUInfo() = %v, want %v", res, expected)
 	}
 }

+ 0 - 87
modules/collector-source/pkg/collector/mock.go

@@ -1,87 +0,0 @@
-package collector
-
-import (
-	"fmt"
-	"time"
-
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/target"
-	"golang.org/x/exp/maps"
-)
-
-// UpdateRecorderCollector is a mock MetricsCollector which records the arguments passed to the update function in an array
-type UpdateRecorderCollector struct {
-	updateArgs []UpdateArgs
-}
-
-func (u *UpdateRecorderCollector) Register(collector *MetricCollector) error {
-	panic("this mock does not support this action")
-}
-
-func (u *UpdateRecorderCollector) Unregister(collectorID MetricCollectorID) bool {
-	panic("this mock does not support this action")
-}
-
-func (u *UpdateRecorderCollector) Query(collectorID MetricCollectorID) ([]*MetricResult, error) {
-	panic("this mock does not support this action")
-}
-
-func (u *UpdateRecorderCollector) Update(metricName string, labels map[string]string, value float64, timestamp *time.Time, additionalInformation map[string]string) {
-	u.updateArgs = append(u.updateArgs, UpdateArgs{
-		metricName:            metricName,
-		labels:                labels,
-		value:                 value,
-		timestamp:             timestamp,
-		additionalInformation: additionalInformation,
-	})
-}
-
-type UpdateArgs struct {
-	metricName            string
-	labels                map[string]string
-	value                 float64
-	timestamp             *time.Time
-	additionalInformation map[string]string
-}
-
-func (u UpdateArgs) equals(that UpdateArgs) error {
-	if u.metricName != that.metricName {
-		return fmt.Errorf("expected metric name %s, got %s", u.metricName, that.metricName)
-	}
-
-	if !maps.Equal(u.labels, that.labels) {
-		return fmt.Errorf("expected labels %s, got %s", u.labels, that.labels)
-	}
-
-	if u.value != that.value {
-		return fmt.Errorf("expected value %f, got %f", u.value, that.value)
-	}
-
-	if that.timestamp != nil {
-		if u.timestamp == nil {
-			return fmt.Errorf("expected timestamp nil, got %v", that.timestamp)
-		}
-		if !u.timestamp.Equal(*that.timestamp) {
-			return fmt.Errorf("expected timestamp %s, got %s", u.timestamp, that.timestamp)
-		}
-	} else if u.timestamp != nil {
-		return fmt.Errorf("expected timestamp %v, got nil", u.timestamp)
-	}
-
-	if !maps.Equal(u.additionalInformation, that.additionalInformation) {
-		return fmt.Errorf("expected additionalInformation %v, got %v", u.additionalInformation, that.additionalInformation)
-	}
-
-	return nil
-}
-
-type MockTargetProvider struct {
-	targets []target.ScrapeTarget
-}
-
-func NewMockTargetProvider(targets ...target.ScrapeTarget) *MockTargetProvider {
-	return &MockTargetProvider{targets: targets}
-}
-
-func (m *MockTargetProvider) GetTargets() []target.ScrapeTarget {
-	return m.targets
-}

+ 0 - 45
modules/collector-source/pkg/collector/networktargetprovider.go

@@ -1,45 +0,0 @@
-package collector
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/opencost/opencost/core/pkg/log"
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/target"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/client-go/kubernetes"
-)
-
-type NetworkTargetProvider struct {
-	releaseName   string
-	port          int
-	kubeClientSet kubernetes.Interface
-}
-
-func NewNetworkTargetProvider(releaseName string, port int, k8s kubernetes.Interface) *NetworkTargetProvider {
-	return &NetworkTargetProvider{
-		releaseName:   releaseName,
-		port:          port,
-		kubeClientSet: k8s,
-	}
-}
-
-func (n NetworkTargetProvider) GetTargets() []target.ScrapeTarget {
-	k8s := n.kubeClientSet
-
-	pods, err := k8s.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{
-		LabelSelector: fmt.Sprintf("app=%s-network-costs", n.releaseName),
-	})
-	if err != nil {
-		log.Errorf("NetworkTargetProvider: failed to retieve nodes from kubernetes client: %s", err.Error())
-		return nil
-	}
-
-	var targets []target.ScrapeTarget
-	for _, pod := range pods.Items {
-		t := target.NewUrlTarget(fmt.Sprintf("http://%s:%d/metrics", pod.Status.PodIP, n.port))
-		targets = append(targets, t)
-	}
-
-	return targets
-}

+ 0 - 83
modules/collector-source/pkg/collector/opencost.go

@@ -1,83 +0,0 @@
-package collector
-
-func NewOpenCostMetricCollector() MetricsCollector {
-	memCollector := NewInMemoryMetricsCollector()
-
-	// Register all the metrics
-	memCollector.Register(NewPVPricePerGiBHourMetricCollector())
-	memCollector.Register(NewPVUsedAverageMetricCollector())
-	memCollector.Register(NewPVUsedMaxMetricCollector())
-	memCollector.Register(NewPVCInfoMetricCollector())
-	memCollector.Register(NewPVActiveMinutesMetricCollector())
-	memCollector.Register(NewLocalStorageCostMetricCollector())
-	memCollector.Register(NewLocalStorageUsedCostMetricCollector())
-	memCollector.Register(NewLocalStorageUsedAverageMetricCollector())
-	memCollector.Register(NewLocalStorageUsedMaxMetricCollector())
-	memCollector.Register(NewLocalStorageBytesMetricCollector())
-	memCollector.Register(NewLocalStorageActiveMinutesMetricCollector())
-	memCollector.Register(NewNodeCPUCoresCapacityMetricCollector())
-	memCollector.Register(NewNodeCPUCoresAllocatableMetricCollector())
-	memCollector.Register(NewNodeRAMBytesCapacityMetricCollector())
-	memCollector.Register(NewNodeRAMBytesAllocatableMetricCollector())
-	memCollector.Register(NewNodeGPUCountMetricCollector())
-	memCollector.Register(NewNodeLabelsMetricCollector())
-	memCollector.Register(NewNodeActiveMinutesMetricCollector())
-	memCollector.Register(NewNodeCPUModeTotalMetricCollector())
-	memCollector.Register(NewNodeRAMSystemUsageAverageMetricCollector())
-	memCollector.Register(NewNodeRAMUserUsageAverageMetricCollector())
-	memCollector.Register(NewLBPricePerHourMetricCollector())
-	memCollector.Register(NewLBActiveMinutesMetricCollector())
-	memCollector.Register(NewClusterManagementDurationMetricCollector())
-	memCollector.Register(NewClusterManagementPricePerHourMetricCollector())
-	memCollector.Register(NewPodActiveMinutesMetricCollector())
-	memCollector.Register(NewRAMBytesAllocatedMetricCollector())
-	memCollector.Register(NewRAMRequestsMetricCollector())
-	memCollector.Register(NewRAMUsageAverageMetricCollector())
-	memCollector.Register(NewRAMUsageMaxMetricCollector())
-	memCollector.Register(NewCPUCoresAllocatedMetricCollector())
-	memCollector.Register(NewCPURequestsMetricCollector())
-	memCollector.Register(NewCPUUsageAverageMetricCollector())
-	memCollector.Register(NewCPUUsageMaxMetricCollector())
-	memCollector.Register(NewGPUsRequestedMetricCollector())
-	memCollector.Register(NewGPUsUsageAverageMetricCollector())
-	memCollector.Register(NewGPUsUsageMaxMetricCollector())
-	memCollector.Register(NewGPUsAllocatedMetricCollector())
-	memCollector.Register(NewIsGPUSharedMetricCollector())
-	memCollector.Register(NewGPUInfoMetricCollector())
-	memCollector.Register(NewNodeCPUPricePerHourMetricCollector())
-	memCollector.Register(NewNodeRAMPricePerGiBHourMetricCollector())
-	memCollector.Register(NewNodeGPUPricePerHourMetricCollector())
-	memCollector.Register(NewNodeIsSpotMetricCollector())
-	memCollector.Register(NewPodPVCAllocationMetricCollector())
-	memCollector.Register(NewPVCBytesRequestedMetricCollector())
-	memCollector.Register(NewPVBytesMetricCollector())
-	memCollector.Register(NewPVCostPerGiBHourMetricCollector())
-	memCollector.Register(NewPVInfoMetricCollector())
-	memCollector.Register(NewNetZoneGiBMetricCollector())
-	memCollector.Register(NewNetZonePricePerGiBMetricCollector())
-	memCollector.Register(NewNetRegionGiBMetricCollector())
-	memCollector.Register(NewNetRegionPricePerGiBMetricCollector())
-	memCollector.Register(NewNetInternetGiBMetricCollector())
-	memCollector.Register(NewNetInternetPricePerGiBMetricCollector())
-	memCollector.Register(NewNetReceiveBytesMetricCollector())
-	memCollector.Register(NewNetTransferBytesMetricCollector())
-	memCollector.Register(NewNamespaceLabelsMetricCollector())
-	memCollector.Register(NewNamespaceAnnotationsMetricCollector())
-	memCollector.Register(NewPodLabelsMetricCollector())
-	memCollector.Register(NewPodAnnotationsMetricCollector())
-	memCollector.Register(NewServiceLabelsMetricCollector())
-	memCollector.Register(NewDeploymentLabelsMetricCollector())
-	memCollector.Register(NewStatefulSetLabelsMetricCollector())
-	memCollector.Register(NewDaemonSetLabelsMetricCollector())
-	memCollector.Register(NewJobLabelsMetricCollector())
-	memCollector.Register(NewPodsWithReplicaSetOwnerMetricCollector())
-	memCollector.Register(NewReplicaSetsWithoutOwnersMetricCollector())
-	memCollector.Register(NewReplicaSetsWithRolloutMetricCollector())
-
-	return memCollector
-}
-
-// There are a couple ways we can make "Reporting" of the metrics a bit cleaner:
-// -- we can write thin API friendly wrappers that can be used to funnel value updates into
-//    collector.Update(...) calls [similar to prom]. This is purely convenience and there isn't
-//    really an architecture bearing weight on this decisions. Whatever is easier to use.

+ 0 - 342
modules/collector-source/pkg/collector/scraper.go

@@ -1,342 +0,0 @@
-package collector
-
-import (
-	"fmt"
-	"slices"
-	"strings"
-	"time"
-
-	"github.com/opencost/opencost/core/pkg/clustercache"
-	"github.com/opencost/opencost/core/pkg/log"
-	"github.com/opencost/opencost/core/pkg/util/promutil"
-	"golang.org/x/exp/maps"
-	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/resource"
-	"k8s.io/apimachinery/pkg/util/validation"
-)
-
-type kubernetesScraper struct {
-	clusterCache clustercache.ClusterCache
-	collector    MetricsCollector
-}
-
-func (ks *kubernetesScraper) Scrape() {
-	timestamp := time.Now().UTC()
-	nodes := ks.clusterCache.GetAllNodes()
-	deployments := ks.clusterCache.GetAllDeployments()
-	namespaces := ks.clusterCache.GetAllNamespaces()
-	pods := ks.clusterCache.GetAllPods()
-	pvcs := ks.clusterCache.GetAllPersistentVolumeClaims()
-	pvs := ks.clusterCache.GetAllPersistentVolumes()
-	services := ks.clusterCache.GetAllServices()
-	statefulSets := ks.clusterCache.GetAllStatefulSets()
-
-	ks.scrapeNodes(nodes, timestamp)
-	ks.scrapeDeployments(deployments, timestamp)
-	ks.scrapeNamespaces(namespaces, timestamp)
-	ks.scrapePods(pods, timestamp)
-	ks.scrapePVCs(pvcs, timestamp)
-	ks.scrapePVs(pvs, timestamp)
-	ks.scrapeServices(services, timestamp)
-	ks.scrapeStatefulSets(statefulSets, timestamp)
-}
-
-func (ks *kubernetesScraper) scrapeNodes(nodes []*clustercache.Node, timestamp time.Time) {
-	for _, node := range nodes {
-		nodeInfo := map[string]string{
-			"node":        node.Name,
-			"provider_id": node.SpecProviderID,
-		}
-
-		// Node Capacity
-		if node.Status.Capacity != nil {
-			if quantity, ok := node.Status.Capacity[v1.ResourceCPU]; ok {
-				_, _, value := toResourceUnitValue(v1.ResourceCPU, quantity)
-				ks.collector.Update(KubeNodeStatusCapacityCPUCores, nodeInfo, value, &timestamp, nil)
-			}
-
-			if quantity, ok := node.Status.Capacity[v1.ResourceMemory]; ok {
-				_, _, value := toResourceUnitValue(v1.ResourceMemory, quantity)
-				ks.collector.Update(KubeNodeStatusCapacityMemoryBytes, nodeInfo, value, &timestamp, nil)
-			}
-		}
-
-		// Node Allocatable Resources
-		if node.Status.Allocatable != nil {
-			if quantity, ok := node.Status.Allocatable[v1.ResourceCPU]; ok {
-				_, _, value := toResourceUnitValue(v1.ResourceCPU, quantity)
-				ks.collector.Update(KubeNodeStatusAllocatableCPUCores, nodeInfo, value, &timestamp, nil)
-			}
-
-			if quantity, ok := node.Status.Allocatable[v1.ResourceMemory]; ok {
-				_, _, value := toResourceUnitValue(v1.ResourceMemory, quantity)
-				ks.collector.Update(KubeNodeStatusAllocatableMemoryBytes, nodeInfo, value, &timestamp, nil)
-			}
-		}
-
-		// node labels
-		labelNames, labelValues := promutil.KubeLabelsToLabels(node.Labels)
-		nodeLabels := toMap(labelNames, labelValues)
-
-		ks.collector.Update(KubeNodeLabels, nodeInfo, 0, &timestamp, nodeLabels)
-
-	}
-}
-
-func (ks *kubernetesScraper) scrapeDeployments(deployments []*clustercache.Deployment, timestamp time.Time) {
-	for _, deployment := range deployments {
-		deploymentInfo := map[string]string{
-			"deployment": deployment.Name,
-			"namespace":  deployment.Namespace,
-		}
-
-		// deployment labels
-		labelNames, labelValues := promutil.KubeLabelsToLabels(deployment.MatchLabels)
-		deploymentLabels := toMap(labelNames, labelValues)
-
-		ks.collector.Update(DeploymentMatchLabels, deploymentInfo, 0, &timestamp, deploymentLabels)
-
-	}
-}
-
-func (ks *kubernetesScraper) scrapeNamespaces(namespaces []*clustercache.Namespace, timestamp time.Time) {
-	for _, namespace := range namespaces {
-		namespaceInfo := map[string]string{
-			"namespace": namespace.Name,
-		}
-
-		// namespace labels
-		labelNames, labelValues := promutil.KubeLabelsToLabels(namespace.Labels)
-		namespaceLabels := toMap(labelNames, labelValues)
-		ks.collector.Update(KubeNamespaceLabels, namespaceInfo, 0, &timestamp, namespaceLabels)
-
-		// namespace annotations
-		annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(namespace.Annotations)
-		namespaceAnnotations := toMap(annotationNames, annotationValues)
-		ks.collector.Update(KubeNamespaceAnnotations, namespaceInfo, 0, &timestamp, namespaceAnnotations)
-	}
-}
-
-func (ks *kubernetesScraper) scrapePods(pods []*clustercache.Pod, timestamp time.Time) {
-	for _, pod := range pods {
-		podInfo := map[string]string{
-			"name":      pod.Name,
-			"namespace": pod.Namespace,
-			"uid":       string(pod.UID),
-			"node":      pod.Spec.NodeName,
-		}
-
-		// pod labels
-		labelNames, labelValues := promutil.KubeLabelsToLabels(pod.Labels)
-		podLabels := toMap(labelNames, labelValues)
-		ks.collector.Update(KubePodLabels, podInfo, 0, &timestamp, podLabels)
-
-		// pod annotations
-		annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(pod.Annotations)
-		podAnnotations := toMap(annotationNames, annotationValues)
-		ks.collector.Update(KubePodAnnotations, podInfo, 0, &timestamp, podAnnotations)
-
-		// Pod owner metric
-		for _, owner := range pod.OwnerReferences {
-			ownerInfo := maps.Clone(podInfo)
-			ownerInfo["owner_kind"] = owner.Kind
-			ownerInfo["owner_name"] = owner.Name
-			ownerInfo["owner_is_controller"] = fmt.Sprintf("%t", owner.Controller != nil)
-			ks.collector.Update(KubePodOwner, ownerInfo, 0, &timestamp, nil)
-		}
-
-		// Container Status
-		for _, status := range pod.Status.ContainerStatuses {
-			if status.State.Running != nil {
-				containerInfo := maps.Clone(podInfo)
-				containerInfo["container"] = status.Name
-				ks.collector.Update(KubePodContainerStatusRunning, containerInfo, 0, &timestamp, nil)
-			}
-		}
-
-		for _, container := range pod.Spec.Containers {
-			containerInfo := maps.Clone(podInfo)
-			containerInfo["container"] = container.Name
-			// Requests
-			if container.Resources.Requests != nil {
-				// sorting keys here for testing purposes
-				keys := maps.Keys(container.Resources.Requests)
-				slices.Sort(keys)
-				for _, resourceName := range keys {
-					quantity := container.Resources.Requests[resourceName]
-					resource, unit, value := toResourceUnitValue(resourceName, quantity)
-
-					// failed to parse the resource type
-					if resource == "" {
-						log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
-						continue
-					}
-
-					resourceRequestInfo := maps.Clone(containerInfo)
-					resourceRequestInfo["resource"] = resource
-					resourceRequestInfo["unit"] = unit
-					ks.collector.Update(KubePodContainerResourceRequests, resourceRequestInfo, value, &timestamp, nil)
-				}
-			}
-		}
-	}
-}
-
-func (ks *kubernetesScraper) scrapePVCs(pvcs []*clustercache.PersistentVolumeClaim, timestamp time.Time) {
-	for _, pvc := range pvcs {
-		pvcInfo := map[string]string{
-			"name":         pvc.Name,
-			"namespace":    pvc.Namespace,
-			"volumename":   pvc.Spec.VolumeName,
-			"storageclass": getPersistentVolumeClaimClass(pvc),
-		}
-
-		ks.collector.Update(KubePersistenVolumeClaimInfo, pvcInfo, 0, &timestamp, nil)
-
-		if storage, ok := pvc.Spec.Resources.Requests[v1.ResourceStorage]; ok {
-			ks.collector.Update(KubePersistentVolumeClaimResourceRequestsStorageBytes, pvcInfo, float64(storage.Value()), &timestamp, nil)
-		}
-	}
-}
-
-func (ks *kubernetesScraper) scrapePVs(pvs []*clustercache.PersistentVolume, timestamp time.Time) {
-	for _, pv := range pvs {
-		providerID := pv.Name
-		// if a more accurate provider ID is available, use that
-		if pv.Spec.CSI != nil && pv.Spec.CSI.VolumeHandle != "" {
-			providerID = pv.Spec.CSI.VolumeHandle
-		}
-		pvInfo := map[string]string{
-			"name":         pv.Name,
-			"storageClass": pv.Spec.StorageClassName,
-			"providerID":   providerID,
-		}
-
-		ks.collector.Update(KubecostPVInfo, pvInfo, 0, &timestamp, nil)
-
-		if storage, ok := pv.Spec.Capacity[v1.ResourceStorage]; ok {
-			ks.collector.Update(KubePersistentVolumeCapacityBytes, pvInfo, float64(storage.Value()), &timestamp, nil)
-		}
-	}
-}
-
-func (ks *kubernetesScraper) scrapeServices(services []*clustercache.Service, timestamp time.Time) {
-	for _, service := range services {
-		serviceInfo := map[string]string{
-			"service":   service.Name,
-			"namespace": service.Namespace,
-		}
-
-		// service labels
-		labelNames, labelValues := promutil.KubeLabelsToLabels(service.SpecSelector)
-		serviceLabels := toMap(labelNames, labelValues)
-		ks.collector.Update(ServiceSelectorLabels, serviceInfo, 0, &timestamp, serviceLabels)
-
-	}
-}
-
-func (ks *kubernetesScraper) scrapeStatefulSets(statefulSets []*clustercache.StatefulSet, timestamp time.Time) {
-	for _, statefulSet := range statefulSets {
-		statefulSetInfo := map[string]string{
-			"name":      statefulSet.Name,
-			"namespace": statefulSet.Namespace,
-		}
-
-		// statefulSet labels
-		labelNames, labelValues := promutil.KubeLabelsToLabels(statefulSet.SpecSelector.MatchLabels)
-		statefulSetLabels := toMap(labelNames, labelValues)
-		ks.collector.Update(StatefulSetMatchLabels, statefulSetInfo, 0, &timestamp, statefulSetLabels)
-
-	}
-}
-
-// getPersistentVolumeClaimClass returns StorageClassName. If no storage class was
-// requested, it returns "".
-func getPersistentVolumeClaimClass(claim *clustercache.PersistentVolumeClaim) string {
-	// Use beta annotation first
-	if class, found := claim.Annotations[v1.BetaStorageClassAnnotation]; found {
-		return class
-	}
-
-	if claim.Spec.StorageClassName != nil {
-		return *claim.Spec.StorageClassName
-	}
-
-	// Special non-empty string to indicate absence of storage class.
-	return ""
-}
-
-// toResourceUnitValue accepts a resource name and quantity and returns the sanitized resource, the unit, and the value in the units.
-// Returns an empty string for resource and unit if there was a failure.
-func toResourceUnitValue(resourceName v1.ResourceName, quantity resource.Quantity) (resource string, unit string, value float64) {
-	resource = promutil.SanitizeLabelName(string(resourceName))
-
-	switch resourceName {
-	case v1.ResourceCPU:
-		unit = "core"
-		value = float64(quantity.MilliValue()) / 1000
-		return
-
-	case v1.ResourceStorage:
-		fallthrough
-	case v1.ResourceEphemeralStorage:
-		fallthrough
-	case v1.ResourceMemory:
-		unit = "byte"
-		value = float64(quantity.Value())
-		return
-	case v1.ResourcePods:
-		unit = "integer"
-		value = float64(quantity.Value())
-		return
-	default:
-		if isHugePageResourceName(resourceName) || isAttachableVolumeResourceName(resourceName) {
-			unit = "byte"
-			value = float64(quantity.Value())
-			return
-		}
-
-		if isExtendedResourceName(resourceName) {
-			unit = "integer"
-			value = float64(quantity.Value())
-			return
-		}
-	}
-
-	resource = ""
-	unit = ""
-	value = 0.0
-	return
-}
-
-// isHugePageResourceName checks for a huge page container resource name
-func isHugePageResourceName(name v1.ResourceName) bool {
-	return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
-}
-
-// isAttachableVolumeResourceName checks for attached volume container resource name
-func isAttachableVolumeResourceName(name v1.ResourceName) bool {
-	return strings.HasPrefix(string(name), v1.ResourceAttachableVolumesPrefix)
-}
-
-// isExtendedResourceName checks for extended container resource name
-func isExtendedResourceName(name v1.ResourceName) bool {
-	if isNativeResource(name) || strings.HasPrefix(string(name), v1.DefaultResourceRequestsPrefix) {
-		return false
-	}
-	// Ensure it satisfies the rules in IsQualifiedName() after converted into quota resource name
-	nameForQuota := fmt.Sprintf("%s%s", v1.DefaultResourceRequestsPrefix, string(name))
-	if errs := validation.IsQualifiedName(nameForQuota); len(errs) != 0 {
-		return false
-	}
-	return true
-}
-
-// isNativeResource checks for a kubernetes.io/ prefixed resource name
-func isNativeResource(name v1.ResourceName) bool {
-	return !strings.Contains(string(name), "/") || isPrefixedNativeResource(name)
-}
-
-func isPrefixedNativeResource(name v1.ResourceName) bool {
-	return strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
-}

+ 0 - 796
modules/collector-source/pkg/collector/scraper_test.go

@@ -1,796 +0,0 @@
-package collector
-
-import (
-	"testing"
-	"time"
-
-	"github.com/opencost/opencost/core/pkg/clustercache"
-	v1 "k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/api/resource"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-func Test_kubernetesScraper_scrapeNodes(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		nodes     []*clustercache.Node
-		timestamp time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					nodes: []*clustercache.Node{
-						{
-							Name:           "node1",
-							SpecProviderID: "i-1",
-							Status: v1.NodeStatus{
-								Capacity: v1.ResourceList{
-									v1.ResourceCPU:    resource.MustParse("2"),
-									v1.ResourceMemory: resource.MustParse("2048"),
-								},
-								Allocatable: v1.ResourceList{
-									v1.ResourceCPU:    resource.MustParse("1"),
-									v1.ResourceMemory: resource.MustParse("1024"),
-								},
-							},
-							Labels: map[string]string{
-								"test1": "blah",
-								"test2": "blah2",
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: KubeNodeStatusCapacityCPUCores,
-					labels: map[string]string{
-						"node":        "node1",
-						"provider_id": "i-1",
-					},
-					value:                 2.0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubeNodeStatusCapacityMemoryBytes,
-					labels: map[string]string{
-						"node":        "node1",
-						"provider_id": "i-1",
-					},
-					value:                 2048.0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubeNodeStatusAllocatableCPUCores,
-					labels: map[string]string{
-						"node":        "node1",
-						"provider_id": "i-1",
-					},
-					value:                 1.0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubeNodeStatusAllocatableMemoryBytes,
-					labels: map[string]string{
-						"node":        "node1",
-						"provider_id": "i-1",
-					},
-					value:                 1024.0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubeNodeLabels,
-					labels: map[string]string{
-						"node":        "node1",
-						"provider_id": "i-1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"label_test1": "blah",
-						"label_test2": "blah2",
-					},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapeNodes(s.nodes, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapeDeployments(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		deployments []*clustercache.Deployment
-		timestamp   time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					deployments: []*clustercache.Deployment{
-						{
-							Name:      "deployment1",
-							Namespace: "namespace1",
-							MatchLabels: map[string]string{
-								"test1": "blah",
-								"test2": "blah2",
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-
-				{
-					metricName: DeploymentMatchLabels,
-					labels: map[string]string{
-						"deployment": "deployment1",
-						"namespace":  "namespace1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"label_test1": "blah",
-						"label_test2": "blah2",
-					},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapeDeployments(s.deployments, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapeNamespaces(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		namespaces []*clustercache.Namespace
-		timestamp  time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					namespaces: []*clustercache.Namespace{
-						{
-							Name: "namespace1",
-							Labels: map[string]string{
-								"test1": "blah",
-								"test2": "blah2",
-							},
-							Annotations: map[string]string{
-								"test3": "blah3",
-								"test4": "blah4",
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: KubeNamespaceLabels,
-					labels: map[string]string{
-						"namespace": "namespace1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"label_test1": "blah",
-						"label_test2": "blah2",
-					},
-				},
-				{
-					metricName: KubeNamespaceAnnotations,
-					labels: map[string]string{
-						"namespace": "namespace1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"annotation_test3": "blah3",
-						"annotation_test4": "blah4",
-					},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapeNamespaces(s.namespaces, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapePods(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		pods      []*clustercache.Pod
-		timestamp time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					pods: []*clustercache.Pod{
-						{
-							Name:      "pod1",
-							Namespace: "namespace1",
-							UID:       "uuid1",
-							Spec: clustercache.PodSpec{
-								NodeName: "node1",
-								Containers: []clustercache.Container{
-									{
-										Name: "container1",
-										Resources: v1.ResourceRequirements{
-											Requests: map[v1.ResourceName]resource.Quantity{
-												v1.ResourceCPU:    resource.MustParse("500m"),
-												v1.ResourceMemory: resource.MustParse("512"),
-											},
-										},
-									},
-								},
-							},
-							Labels: map[string]string{
-								"test1": "blah",
-								"test2": "blah2",
-							},
-							Annotations: map[string]string{
-								"test3": "blah3",
-								"test4": "blah4",
-							},
-							OwnerReferences: []metav1.OwnerReference{
-								{
-									Kind:       "deployment",
-									Name:       "deployment1",
-									Controller: nil,
-								},
-							},
-							Status: clustercache.PodStatus{
-								ContainerStatuses: []v1.ContainerStatus{
-									{
-										Name: "container1",
-										State: v1.ContainerState{
-											Running: &v1.ContainerStateRunning{},
-										},
-									},
-								},
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: KubePodLabels,
-					labels: map[string]string{
-						"name":      "pod1",
-						"namespace": "namespace1",
-						"uid":       "uuid1",
-						"node":      "node1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"label_test1": "blah",
-						"label_test2": "blah2",
-					},
-				},
-				{
-					metricName: KubePodAnnotations,
-					labels: map[string]string{
-						"name":      "pod1",
-						"namespace": "namespace1",
-						"uid":       "uuid1",
-						"node":      "node1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"annotation_test3": "blah3",
-						"annotation_test4": "blah4",
-					},
-				},
-				{
-					metricName: KubePodOwner,
-					labels: map[string]string{
-						"name":                "pod1",
-						"namespace":           "namespace1",
-						"uid":                 "uuid1",
-						"node":                "node1",
-						"owner_kind":          "deployment",
-						"owner_name":          "deployment1",
-						"owner_is_controller": "false",
-					},
-					value:                 0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubePodContainerStatusRunning,
-					labels: map[string]string{
-						"name":      "pod1",
-						"namespace": "namespace1",
-						"uid":       "uuid1",
-						"node":      "node1",
-						"container": "container1",
-					},
-					value:                 0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubePodContainerResourceRequests,
-					labels: map[string]string{
-						"name":      "pod1",
-						"namespace": "namespace1",
-						"uid":       "uuid1",
-						"node":      "node1",
-						"container": "container1",
-						"resource":  "cpu",
-						"unit":      "core",
-					},
-					value:                 0.5,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubePodContainerResourceRequests,
-					labels: map[string]string{
-						"name":      "pod1",
-						"namespace": "namespace1",
-						"uid":       "uuid1",
-						"node":      "node1",
-						"container": "container1",
-						"resource":  "memory",
-						"unit":      "byte",
-					},
-					value:                 512,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapePods(s.pods, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapePVCs(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		pvcs      []*clustercache.PersistentVolumeClaim
-		timestamp time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					pvcs: []*clustercache.PersistentVolumeClaim{
-						{
-							Name:      "pvc1",
-							Namespace: "namespace1",
-							Spec: v1.PersistentVolumeClaimSpec{
-								VolumeName:       "vol1",
-								StorageClassName: ptr("storageClass1"),
-								Resources: v1.VolumeResourceRequirements{
-									Requests: v1.ResourceList{
-										v1.ResourceStorage: resource.MustParse("4096"),
-									},
-								},
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: KubePersistenVolumeClaimInfo,
-					labels: map[string]string{
-						"name":         "pvc1",
-						"namespace":    "namespace1",
-						"volumename":   "vol1",
-						"storageclass": "storageClass1",
-					},
-					value:                 0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubePersistentVolumeClaimResourceRequestsStorageBytes,
-					labels: map[string]string{
-						"name":         "pvc1",
-						"namespace":    "namespace1",
-						"volumename":   "vol1",
-						"storageclass": "storageClass1",
-					},
-					value:                 4096,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapePVCs(s.pvcs, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapePVs(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		pvs       []*clustercache.PersistentVolume
-		timestamp time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					pvs: []*clustercache.PersistentVolume{
-						{
-							Name: "pv1",
-							Spec: v1.PersistentVolumeSpec{
-								StorageClassName: "storageClass1",
-								PersistentVolumeSource: v1.PersistentVolumeSource{
-									CSI: &v1.CSIPersistentVolumeSource{
-										VolumeHandle: "vol-1",
-									},
-								},
-								Capacity: v1.ResourceList{
-									v1.ResourceStorage: resource.MustParse("4096"),
-								},
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: KubecostPVInfo,
-					labels: map[string]string{
-						"name":         "pv1",
-						"providerID":   "vol-1",
-						"storageClass": "storageClass1",
-					},
-					value:                 0,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubePersistentVolumeCapacityBytes,
-					labels: map[string]string{
-						"name":         "pv1",
-						"providerID":   "vol-1",
-						"storageClass": "storageClass1",
-					},
-					value:                 4096,
-					timestamp:             &start1,
-					additionalInformation: nil,
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapePVs(s.pvs, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapeServices(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		services  []*clustercache.Service
-		timestamp time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					services: []*clustercache.Service{
-						{
-							Name:      "service1",
-							Namespace: "namespace1",
-							SpecSelector: map[string]string{
-								"test1": "blah",
-								"test2": "blah2",
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: ServiceSelectorLabels,
-					labels: map[string]string{
-						"service":   "service1",
-						"namespace": "namespace1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"label_test1": "blah",
-						"label_test2": "blah2",
-					},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapeServices(s.services, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}
-
-func Test_kubernetesScraper_scrapeStatefulSets(t *testing.T) {
-
-	start1, _ := time.Parse(time.RFC3339, start1Str)
-
-	type scrape struct {
-		statefulSets []*clustercache.StatefulSet
-		timestamp    time.Time
-	}
-	tests := []struct {
-		name     string
-		scrapes  []scrape
-		expected []UpdateArgs
-	}{
-		{
-			name: "simple",
-			scrapes: []scrape{
-				{
-					statefulSets: []*clustercache.StatefulSet{
-						{
-							Name:      "statefulSet1",
-							Namespace: "namespace1",
-							SpecSelector: &metav1.LabelSelector{
-								MatchLabels: map[string]string{
-									"test1": "blah",
-									"test2": "blah2",
-								},
-							},
-						},
-					},
-					timestamp: start1,
-				},
-			},
-			expected: []UpdateArgs{
-				{
-					metricName: StatefulSetMatchLabels,
-					labels: map[string]string{
-						"name":      "statefulSet1",
-						"namespace": "namespace1",
-					},
-					value:     0,
-					timestamp: &start1,
-					additionalInformation: map[string]string{
-						"label_test1": "blah",
-						"label_test2": "blah2",
-					},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			ks := &kubernetesScraper{
-				collector: &updateRecorder,
-			}
-			for _, s := range tt.scrapes {
-				ks.scrapeStatefulSets(s.statefulSets, s.timestamp)
-			}
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}

+ 0 - 32
modules/collector-source/pkg/collector/targetscraper.go

@@ -1,32 +0,0 @@
-package collector
-
-import (
-	"github.com/opencost/opencost/core/pkg/log"
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/parser"
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/target"
-)
-
-type TargetScraper struct {
-	targetProvider target.TargetProvider
-	collector      MetricsCollector
-}
-
-func (s *TargetScraper) Scrape() {
-	targets := s.targetProvider.GetTargets()
-	for _, target := range targets {
-		f, err := target.Load()
-		if err != nil {
-			log.Errorf("failed to scrape target: %s", err.Error())
-			continue
-		}
-		results, err := parser.Parse(f)
-		if err != nil {
-			log.Errorf("failed to parse target: %s", err.Error())
-			continue
-		}
-
-		for _, result := range results {
-			s.collector.Update(result.Name, result.Labels, result.Value, result.Timestamp, nil)
-		}
-	}
-}

+ 0 - 116
modules/collector-source/pkg/collector/targetscraper_test.go

@@ -1,116 +0,0 @@
-package collector
-
-import (
-	"testing"
-
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/target"
-)
-
-const networkScape = `
-# HELP kubecost_pod_network_egress_bytes kubecost_pod_network_egress_bytes_total egressed byte counts by pod.
-# TYPE kubecost_pod_network_egress_bytes counter
-kubecost_pod_network_egress_bytes_total{pod_name="pod1",namespace="namespace1",internet="false",same_region="true",same_zone="true",service="service1"} 3127969647
-kubecost_pod_network_egress_bytes_total{pod_name="pod2",namespace="namespace1",internet="true",same_region="false",same_zone="false",service=""} 335188219
-# HELP kubecost_pod_network_ingress_bytes kubecost_pod_network_ingress_bytes_total ingressed byte counts by pod.
-# TYPE kubecost_pod_network_ingress_bytes counter
-kubecost_pod_network_ingress_bytes_total{pod_name="pod1",namespace="namespace1",internet="true",same_region="false",same_zone="false",service="service1"} 17941460
-kubecost_pod_network_ingress_bytes_total{pod_name="pod2",namespace="namespace1",internet="false",same_region="true",same_zone="false",service=""} 13948766
-# HELP kubecost_network_costs_parsed_entries kubecost_network_costs_parsed_entries total parsed conntrack entries.
-# TYPE kubecost_network_costs_parsed_entries gauge
-# HELP kubecost_network_costs_parse_time kubecost_network_costs_parse_time total time in milliseconds it took to parse conntrack entries.
-# TYPE kubecost_network_costs_parse_time gauge
-# EOF
-`
-
-func TestTargetScraper_Scrape(t *testing.T) {
-	tests := []struct {
-		name     string
-		target   target.ScrapeTarget
-		expected []UpdateArgs
-	}{
-		{
-			name:   "Network Scrape",
-			target: target.NewStringTarget(networkScape),
-			expected: []UpdateArgs{
-				{
-					metricName: KubecostPodNetworkEgressBytesTotal,
-					labels: map[string]string{
-						"pod_name":    "pod1",
-						"namespace":   "namespace1",
-						"internet":    "false",
-						"same_region": "true",
-						"same_zone":   "true",
-						"service":     "service1",
-					},
-					value:                 3127969647,
-					timestamp:             nil,
-					additionalInformation: nil,
-				},
-				{
-					metricName: KubecostPodNetworkEgressBytesTotal,
-					labels: map[string]string{
-						"pod_name":    "pod2",
-						"namespace":   "namespace1",
-						"internet":    "true",
-						"same_region": "false",
-						"same_zone":   "false",
-						"service":     "",
-					},
-					value:                 335188219,
-					timestamp:             nil,
-					additionalInformation: nil,
-				},
-				{
-					metricName: "kubecost_pod_network_ingress_bytes_total",
-					labels: map[string]string{
-						"pod_name":    "pod1",
-						"namespace":   "namespace1",
-						"internet":    "true",
-						"same_region": "false",
-						"same_zone":   "false",
-						"service":     "service1",
-					},
-					value:                 17941460,
-					timestamp:             nil,
-					additionalInformation: nil,
-				},
-				{
-					metricName: "kubecost_pod_network_ingress_bytes_total",
-					labels: map[string]string{
-						"pod_name":    "pod2",
-						"namespace":   "namespace1",
-						"internet":    "false",
-						"same_region": "true",
-						"same_zone":   "false",
-						"service":     "",
-					},
-					value:                 13948766,
-					timestamp:             nil,
-					additionalInformation: nil,
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			updateRecorder := UpdateRecorderCollector{}
-			scrapper := &TargetScraper{
-				targetProvider: NewMockTargetProvider(tt.target),
-				collector:      &updateRecorder,
-			}
-			scrapper.Scrape()
-
-			if len(updateRecorder.updateArgs) != len(tt.expected) {
-				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.updateArgs))
-			}
-
-			for i, expected := range tt.expected {
-				updateArg := updateRecorder.updateArgs[i]
-				err := expected.equals(updateArg)
-				if err != nil {
-					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
-				}
-			}
-		})
-	}
-}

+ 43 - 0
modules/collector-source/pkg/env/collectorenv.go

@@ -0,0 +1,43 @@
+package env
+
+import (
+	"github.com/opencost/opencost/core/pkg/env"
+)
+
+const (
+	ClusterIDEnvVar                 = "CLUSTER_ID"
+	ReleaseNameEnvVar               = "RELEASE_NAME"
+	NetworkPortEnvVar               = "NETWORK_PORT"
+	Collector10mResolutionRetention = "COLLECTOR_10M_RESOLUTION_RETENTION"
+	Collector1hResolutionRetention  = "COLLECTOR_1H_RESOLUTION_RETENTION"
+	Collection1dResolutionRetention = "COLLECTOR_1D_RESOLUTION_RETENTION"
+	CollectorScrapeIntervalSeconds  = "COLLECTOR_SCRAPE_INTERVAL_SECONDS"
+)
+
+func GetClusterID() string {
+	return env.Get(ClusterIDEnvVar, "")
+}
+
+func GetReleaseName() string {
+	return env.Get(ReleaseNameEnvVar, "kubecost")
+}
+
+func GetNetworkPort() int {
+	return env.GetInt(NetworkPortEnvVar, 3001)
+}
+
+func GetCollector10mResolutionRetention() int {
+	return env.GetInt(Collector10mResolutionRetention, 36)
+}
+
+func GetCollector1hResolutionRetention() int {
+	return env.GetInt(Collector1hResolutionRetention, 49)
+}
+
+func GetCollection1dResolutionRetention() int {
+	return env.GetInt(Collection1dResolutionRetention, 15)
+}
+
+func GetCollectorScrapeIntervalSeconds() int {
+	return env.GetInt(CollectorScrapeIntervalSeconds, 30)
+}

+ 1 - 1
modules/collector-source/pkg/collector/activeminutes.go → modules/collector-source/pkg/metric/aggregator/activeminutes.go

@@ -1,4 +1,4 @@
-package collector
+package aggregator
 
 import (
 	"time"

+ 59 - 0
modules/collector-source/pkg/metric/aggregator/aggregator.go

@@ -0,0 +1,59 @@
+package aggregator
+
+import (
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/core/pkg/util"
+)
+
+// MetricValue is a resulting data point value with an optional timestamp.
+type MetricValue struct {
+	Value     float64
+	Timestamp *time.Time
+}
+
+// MetricResult contains a resulting metric name, the associated labels and label values, and a slice of
+// MetricValues.
+type MetricResult struct {
+	Name         string
+	MetricLabels map[string]string
+	Values       []MetricValue
+}
+
+func (mr *MetricResult) ToQueryResult() *source.QueryResult {
+	metrics := map[string]any{}
+	for key, value := range mr.MetricLabels {
+		metrics[key] = value
+	}
+
+	values := make([]*util.Vector, len(mr.Values))
+	for i, value := range mr.Values {
+		timestamp := 0.0
+		if value.Timestamp != nil {
+			timestamp = float64(value.Timestamp.Unix())
+		}
+		values[i] = &util.Vector{
+			Timestamp: timestamp,
+			Value:     value.Value,
+		}
+	}
+
+	return source.NewQueryResult(metrics, values, nil)
+}
+
+// MetricAggregator is an interface that defines the methods for a metric metric aggregation.
+// For example, we have a metric `foo_metric`, and we wish to query and collect the average over time.
+// In this case, the `AverageOverTime` component is the MetricAggregator. It is the component responsible
+// for routing updates to metric values into their proper condensed form.
+type MetricAggregator interface {
+	Name() string
+	AdditionInfo() map[string]string
+	Update(value float64, timestamp *time.Time, additionalInfo map[string]string)
+	Value() []MetricValue
+	LabelValues() []string
+}
+
+// MetricAggregatorFactory is a function that accepts a string name and returns a pointer to a MetricAggregator
+// implementation.
+type MetricAggregatorFactory func(name string, labelValues []string) MetricAggregator

+ 1 - 1
modules/collector-source/pkg/collector/avgovertime.go → modules/collector-source/pkg/metric/aggregator/avgovertime.go

@@ -1,4 +1,4 @@
-package collector
+package aggregator
 
 import (
 	"time"

+ 1 - 1
modules/collector-source/pkg/collector/increase.go → modules/collector-source/pkg/metric/aggregator/increase.go

@@ -1,4 +1,4 @@
-package collector
+package aggregator
 
 import (
 	"time"

+ 1 - 1
modules/collector-source/pkg/collector/info.go → modules/collector-source/pkg/metric/aggregator/info.go

@@ -1,4 +1,4 @@
-package collector
+package aggregator
 
 import (
 	"maps"

+ 1 - 1
modules/collector-source/pkg/collector/maxovertime.go → modules/collector-source/pkg/metric/aggregator/maxovertime.go

@@ -1,4 +1,4 @@
-package collector
+package aggregator
 
 import (
 	"time"

+ 155 - 0
modules/collector-source/pkg/metric/collector.go

@@ -0,0 +1,155 @@
+package metric
+
+import (
+	"maps"
+	"time"
+
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric/aggregator"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+)
+
+// MetricCollectorID is a unique identifier for a specific metric collector instance. We
+// use this identifier to register and unregister metric instances from the metrics metric
+// instead of the metric name and aggregation type to allow selectable cardinality (via Labels)
+// across multiple instances of the same aggregation type and metric name.
+type MetricCollectorID string
+
+const (
+	PVPricePerGiBHourID             MetricCollectorID = "PVPricePerGiBHour"
+	PVUsedAverageID                 MetricCollectorID = "PVUsedAverage"
+	PVUsedMaxID                     MetricCollectorID = "PVUsedMax"
+	PVCInfoID                       MetricCollectorID = "PVCInfo"
+	PVActiveMinutesID               MetricCollectorID = "PVActiveMinutes"
+	LocalStorageCostID              MetricCollectorID = "LocalStorageCost"
+	LocalStorageUsedCostID          MetricCollectorID = "LocalStorageUsedCost"
+	LocalStorageUsedAverageID       MetricCollectorID = "LocalStorageUsedAverage"
+	LocalStorageUsedMaxID           MetricCollectorID = "LocalStorageUsedMax"
+	LocalStorageBytesID             MetricCollectorID = "LocalStorageBytesID"
+	LocalStorageActiveMinutesID     MetricCollectorID = "LocalStorageActiveMinutes"
+	NodeCPUCoresCapacityID          MetricCollectorID = "NodeCPUCoresCapacity"
+	NodeCPUCoresAllocatableID       MetricCollectorID = "NodeCPUCoresAllocatable"
+	NodeRAMBytesCapacityID          MetricCollectorID = "NodeRAMBytesCapacity"
+	NodeRAMBytesAllocatableID       MetricCollectorID = "NodeRAMBytesAllocatable"
+	NodeGPUCountID                  MetricCollectorID = "NodeGPUCount"
+	NodeLabelsID                    MetricCollectorID = "NodeLabels"
+	NodeActiveMinutesID             MetricCollectorID = "NodeActiveMinutes"
+	NodeCPUModeTotalID              MetricCollectorID = "NodeCPUModeTotal"
+	NodeRAMSystemUsageAverageID     MetricCollectorID = "NodeRAMSystemUsageAverage"
+	NodeRAMUserUsageAverageID       MetricCollectorID = "NodeRAMUserUsageAverage"
+	LBPricePerHourID                MetricCollectorID = "LBPricePerHour"
+	LBActiveMinutesID               MetricCollectorID = "LBActiveMinutes"
+	ClusterManagementDurationID     MetricCollectorID = "ClusterManagementDuration"
+	ClusterManagementPricePerHourID MetricCollectorID = "ClusterManagementPricePerHour"
+	PodActiveMinutesID              MetricCollectorID = "PodActiveMinutes"
+	RAMBytesAllocatedID             MetricCollectorID = "RAMBytesAllocated"
+	RAMRequestsID                   MetricCollectorID = "RAMRequests"
+	RAMUsageAverageID               MetricCollectorID = "RAMUsageAverage"
+	RAMUsageMaxID                   MetricCollectorID = "RAMUsageMax"
+	CPUCoresAllocatedID             MetricCollectorID = "CPUCoresAllocated"
+	CPURequestsID                   MetricCollectorID = "CPURequestsID"
+	CPUUsageAverageID               MetricCollectorID = "CPUUsageAverage"
+	CPUUsageMaxID                   MetricCollectorID = "CPUUsageMax"
+	GPUsRequestedID                 MetricCollectorID = "GPUsRequested"
+	GPUsUsageAverageID              MetricCollectorID = "GPUsUsageAverage"
+	GPUsUsageMaxID                  MetricCollectorID = "GPUsUsageMax"
+	GPUsAllocatedID                 MetricCollectorID = "GPUsAllocated"
+	IsGPUSharedID                   MetricCollectorID = "IsGPUShared"
+	GPUInfoID                       MetricCollectorID = "GPUInfo"
+	NodeCPUPricePerHourID           MetricCollectorID = "NodeCPUPricePerHour"
+	NodeRAMPricePerGiBHourID        MetricCollectorID = "NodeRAMPricePerGiBHour"
+	NodeGPUPricePerHourID           MetricCollectorID = "NodeGPUPricePerHour"
+	NodeIsSpotID                    MetricCollectorID = "NodeIsSpot"
+	PodPVCAllocationID              MetricCollectorID = "PodPVCAllocation"
+	PVCBytesRequestedID             MetricCollectorID = "PVCBytesRequested"
+	PVBytesID                       MetricCollectorID = "PVBytesID"
+	PVCostPerGiBHourID              MetricCollectorID = "PVCostPerGiBHour"
+	PVInfoID                        MetricCollectorID = "PVInfo"
+	NetZoneGiBID                    MetricCollectorID = "NetZoneGiB"
+	NetZonePricePerGiBID            MetricCollectorID = "NetZonePricePerGiB"
+	NetRegionGiBID                  MetricCollectorID = "NetRegionGiB"
+	NetRegionPricePerGiBID          MetricCollectorID = "NetRegionPricePerGiB"
+	NetInternetGiBID                MetricCollectorID = "NetInternetGiB"
+	NetInternetPricePerGiBID        MetricCollectorID = "NetInternetPricePerGiB"
+	NetInternetServiceGiBID         MetricCollectorID = "NetInternetServiceGiB"
+	NetTransferBytesID              MetricCollectorID = "NetTransferBytes"
+	NetZoneIngressGiBID             MetricCollectorID = "NetZoneIngressGiB"
+	NetRegionIngressGiBID           MetricCollectorID = "NetRegionIngressGiB"
+	NetInternetIngressGiBID         MetricCollectorID = "NetInternetIngressGiB"
+	NetInternetServiceIngressGiBID  MetricCollectorID = "NetInternetServiceIngressGiB"
+	NetReceiveBytesID               MetricCollectorID = "NetReceiveBytes"
+	NamespaceLabelsID               MetricCollectorID = "NamespaceLabels"
+	NamespaceAnnotationsID          MetricCollectorID = "NamespaceAnnotations"
+	PodLabelsID                     MetricCollectorID = "PodLabels"
+	PodAnnotationsID                MetricCollectorID = "PodAnnotations"
+	ServiceLabelsID                 MetricCollectorID = "ServiceLabels"
+	DeploymentLabelsID              MetricCollectorID = "DeploymentLabels"
+	StatefulSetLabelsID             MetricCollectorID = "StatefulSetLabels"
+	DaemonSetLabelsID               MetricCollectorID = "DaemonSetLabels"
+	JobLabelsID                     MetricCollectorID = "JobLabels"
+	PodsWithReplicaSetOwnerID       MetricCollectorID = "PodsWithReplicaSetOwner"
+	ReplicaSetsWithoutOwnersID      MetricCollectorID = "ReplicaSetsWithoutOwners"
+	ReplicaSetsWithRolloutID        MetricCollectorID = "ReplicaSetsWithRollout"
+)
+
+// MetricCollector is a data structure that represents a specific metric metric instance that contains it's own breakdown
+// of stored metrics by a specific label set.
+type MetricCollector struct {
+	id                MetricCollectorID // ie: RAMUsageAverage
+	metricName        string            // ie: container_memory_working_set_bytes
+	labels            []string
+	aggregatorFactory aggregator.MetricAggregatorFactory
+	metrics           map[uint64]aggregator.MetricAggregator // map[Hash(labelValues)] = aggregator
+	filter            func(map[string]string) bool
+}
+
+// NewMetricCollector creates a new MetricCollector instance with a unique identifier. The metric name is the specific
+// name of the collected metric that will be used to query the
+func NewMetricCollector(id MetricCollectorID, metricName string, labels []string, aggregatorFactory aggregator.MetricAggregatorFactory, fn func(map[string]string) bool) *MetricCollector {
+	return &MetricCollector{
+		id:                id,
+		metricName:        metricName,
+		labels:            labels,
+		aggregatorFactory: aggregatorFactory,
+		metrics:           make(map[uint64]aggregator.MetricAggregator),
+		filter:            fn,
+	}
+}
+
+func (mi *MetricCollector) Update(labels map[string]string, value float64, timestamp *time.Time, additionalInfo map[string]string) {
+	if mi.filter != nil && !mi.filter(labels) {
+		return
+	}
+
+	labelValues := make([]string, len(mi.labels))
+	for i, key := range mi.labels {
+		labelValues[i] = labels[key]
+	}
+	key := util.Hash(labelValues)
+	if mi.metrics[key] == nil {
+		mi.metrics[key] = mi.aggregatorFactory(
+			util.MetricNameFor(mi.metricName, mi.labels, labelValues), labelValues)
+	}
+
+	mi.metrics[key].Update(value, timestamp, additionalInfo)
+}
+
+func (mi *MetricCollector) Get() []*aggregator.MetricResult {
+	results := make([]*aggregator.MetricResult, 0, len(mi.metrics))
+	for _, metric := range mi.metrics {
+		labels := util.ToMap(mi.labels, metric.LabelValues())
+		maps.Copy(labels, metric.AdditionInfo())
+		mr := &aggregator.MetricResult{
+			Name:         metric.Name(),
+			MetricLabels: labels,
+			Values:       metric.Value(),
+		}
+
+		results = append(results, mr)
+	}
+
+	return results
+}
+
+func (mi *MetricCollector) Labels() []string {
+	return mi.labels
+}

+ 163 - 0
modules/collector-source/pkg/metric/repository.go

@@ -0,0 +1,163 @@
+package metric
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+)
+
+type RepositoryConfig struct {
+	Resolutions []util.ResolutionConfiguration
+}
+
+// MetricRepository is an MetricUpdater which applies calls to update to all resolutions being tracked. It holds the
+// MetricStore instances for each resolution.
+type MetricRepository struct {
+	lock             sync.Mutex
+	resolutionStores map[string]*resolutionStores
+}
+
+func NewMetricRepository(config RepositoryConfig, factory MetricStoreFactory) *MetricRepository {
+	resoluationCollectors := make(map[string]*resolutionStores)
+
+	for _, resConf := range config.Resolutions {
+		resCollector, err := newResolutionStores(resConf, factory)
+		if err != nil {
+			log.Errorf("NewMetricRepository: failed to init resolution metric: %s", err.Error())
+			continue
+		}
+		resoluationCollectors[resConf.Interval] = resCollector
+	}
+
+	repo := &MetricRepository{
+		resolutionStores: resoluationCollectors,
+	}
+
+	return repo
+}
+
+func (r *MetricRepository) GetCollector(interval string, t time.Time) (MetricStore, error) {
+	r.lock.Lock()
+	defer r.lock.Unlock()
+
+	resCollector, ok := r.resolutionStores[interval]
+	if !ok {
+		return nil, fmt.Errorf("failed to find resolution for key %s", interval)
+	}
+
+	return resCollector.getCollector(t)
+}
+
+// Update calls Update on the collectors for each resolution
+func (r *MetricRepository) Update(
+	metricName string,
+	labels map[string]string,
+	value float64,
+	timestamp *time.Time,
+	additionalInformation map[string]string,
+) {
+	r.lock.Lock()
+	defer r.lock.Unlock()
+	if timestamp == nil {
+		timestamp = util.Ptr(time.Now().UTC())
+	}
+	t := *timestamp
+	// Call update on the collectors for each resolution
+	for _, resCollector := range r.resolutionStores {
+		resCollector.update(metricName, labels, value, t, additionalInformation)
+	}
+}
+
+// resolutionStores is a grouping of a resolution and the instances of MetricStore that it is used to manage
+type resolutionStores struct {
+	lock       sync.Mutex
+	resolution *util.Resolution
+	collectors map[int64]MetricStore
+	factory    func() MetricStore
+}
+
+func newResolutionStores(resConf util.ResolutionConfiguration, factory MetricStoreFactory) (*resolutionStores, error) {
+	resolution, err := util.NewResolution(resConf)
+	if err != nil {
+		return nil, fmt.Errorf("NewResolutionCollectors: %w", err)
+	}
+
+	resCol := &resolutionStores{
+		resolution: resolution,
+		collectors: map[int64]MetricStore{},
+		factory:    factory,
+	}
+
+	// Start loop which will remove expired MetricStore
+	go func() {
+		for {
+			time.Sleep(resCol.resolution.Next().Sub(time.Now().UTC()))
+			resCol.clean()
+		}
+	}()
+
+	return resCol, nil
+}
+
+func (r *resolutionStores) clean() {
+	r.lock.Lock()
+	defer r.lock.Unlock()
+	limitKey := r.resolution.Limit().UnixMilli()
+	for key := range r.collectors {
+		if key < limitKey {
+			delete(r.collectors, key)
+		}
+	}
+}
+
+func (r *resolutionStores) update(
+	metricName string,
+	labels map[string]string,
+	value float64,
+	timestamp time.Time,
+	additionalInformation map[string]string,
+) {
+	r.lock.Lock()
+	defer r.lock.Unlock()
+	limit := r.resolution.Limit()
+	if timestamp.Before(limit) {
+		log.Debugf(
+			"failed to call update on resolution '%s' because Timestamp '%s' is before the limit '%s",
+			r.resolution.Interval(),
+			timestamp.Format(time.RFC3339),
+			limit.Format(time.RFC3339),
+		)
+		return
+	}
+	key := r.resolution.Get(timestamp).UnixMilli()
+	collector, ok := r.collectors[key]
+	if !ok {
+		collector = r.factory()
+		r.collectors[key] = collector
+	}
+	collector.Update(metricName, labels, value, &timestamp, additionalInformation)
+}
+
+func (r *resolutionStores) getCollector(t time.Time) (MetricStore, error) {
+	r.lock.Lock()
+	defer r.lock.Unlock()
+	if t.Before(r.resolution.Limit()) {
+		return nil, fmt.Errorf(
+			"request for metric at time '%s' for resolution '%s' is past limit of '%s'",
+			t.Format(time.RFC3339),
+			r.resolution.Interval(),
+			r.resolution.Limit().Format(time.RFC3339),
+		)
+	}
+	key := r.resolution.Get(t).UnixMilli()
+
+	collector, ok := r.collectors[key]
+	if !ok {
+		return nil, fmt.Errorf("failed to find MetricCollector for interval '%s' for time '%s'", r.resolution.Interval(), t.Format(time.RFC3339))
+	}
+
+	return collector, nil
+}

+ 100 - 0
modules/collector-source/pkg/metric/store.go

@@ -0,0 +1,100 @@
+package metric
+
+import (
+	"fmt"
+	"slices"
+	"sync"
+	"time"
+
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric/aggregator"
+)
+
+// MetricStore is an interface that defines an implementation capable of managing a collection
+// of metric instances, and exposes helper methods for routing metric updates and queries to the
+// proper metric instances.
+type MetricStore interface {
+	// Register accepts a `MetricCollector` instance and registers it for routing updates and querying.
+	Register(collector *MetricCollector) error
+
+	// Unregister accepts a `MetricCollectorID` and unregisters the metric metric instance from receiving metrics
+	// updates and query availability.
+	Unregister(collectorID MetricCollectorID) bool
+
+	// Query accepts a `MetricCollectorID` and returns a slice of `MetricResult` instances for that metric.
+	Query(collectorID MetricCollectorID) ([]*aggregator.MetricResult, error)
+
+	MetricUpdater
+}
+
+type MetricStoreFactory func() MetricStore
+
+// InMemoryMetricStore is a thread-safe implementation of the MetricStore interface that stores MetricCollector instances
+// in memory.
+type InMemoryMetricStore struct {
+	lock          sync.Mutex
+	byMetricName  map[string][]*MetricCollector
+	byCollectorID map[MetricCollectorID]*MetricCollector
+}
+
+func NewInMemoryMetricStore() MetricStore {
+	return &InMemoryMetricStore{
+		byMetricName:  make(map[string][]*MetricCollector),
+		byCollectorID: make(map[MetricCollectorID]*MetricCollector),
+	}
+}
+
+func (m *InMemoryMetricStore) Register(collector *MetricCollector) error {
+	m.lock.Lock()
+	defer m.lock.Unlock()
+
+	if _, ok := m.byCollectorID[collector.id]; ok {
+		return fmt.Errorf("metric with ID: %s already exists", collector.id)
+	}
+
+	m.byCollectorID[collector.id] = collector
+	m.byMetricName[collector.metricName] = append(m.byMetricName[collector.metricName], collector)
+	return nil
+}
+
+func (m *InMemoryMetricStore) Unregister(collectorID MetricCollectorID) bool {
+	m.lock.Lock()
+	defer m.lock.Unlock()
+
+	if _, ok := m.byCollectorID[collectorID]; !ok {
+		return false
+	}
+
+	inst := m.byCollectorID[collectorID]
+	m.byMetricName[inst.metricName] = slices.DeleteFunc(m.byMetricName[inst.metricName], func(mc *MetricCollector) bool {
+		return mc == nil || mc.id == collectorID
+	})
+
+	delete(m.byCollectorID, collectorID)
+	return true
+}
+
+func (m *InMemoryMetricStore) Query(collectorID MetricCollectorID) ([]*aggregator.MetricResult, error) {
+	m.lock.Lock()
+	defer m.lock.Unlock()
+
+	if _, ok := m.byCollectorID[collectorID]; !ok {
+		return nil, fmt.Errorf("metric with ID: %s does not exist", collectorID)
+	}
+
+	return m.byCollectorID[collectorID].Get(), nil
+}
+
+func (m *InMemoryMetricStore) Update(
+	metricName string,
+	labels map[string]string,
+	value float64,
+	timestamp *time.Time,
+	additionalInformation map[string]string,
+) {
+	m.lock.Lock()
+	defer m.lock.Unlock()
+
+	for _, collector := range m.byMetricName[metricName] {
+		collector.Update(labels, value, timestamp, additionalInformation)
+	}
+}

+ 69 - 0
modules/collector-source/pkg/metric/updater.go

@@ -0,0 +1,69 @@
+package metric
+
+import (
+	"fmt"
+	"time"
+
+	"golang.org/x/exp/maps"
+)
+
+type MetricUpdater interface {
+	// Update accepts the name of a metric, the label set and values to update the metric, the updated Value, and a Timestamp.
+	// This method does not accept a `MetricCollectorID` because it provides updates across many potential MetricCollector instances
+	// which utilize the same metric.
+	Update(metricName string, labels map[string]string, value float64, timestamp *time.Time, additionalInformation map[string]string)
+}
+
+// ArgRecordUpdater is a mock MetricStore which records the arguments passed to the update function in an array
+type ArgRecordUpdater struct {
+	UpdateArgs []UpdateArgs
+}
+
+func (u *ArgRecordUpdater) Update(metricName string, labels map[string]string, value float64, timestamp *time.Time, additionalInformation map[string]string) {
+	u.UpdateArgs = append(u.UpdateArgs, UpdateArgs{
+		MetricName:            metricName,
+		Labels:                labels,
+		Value:                 value,
+		Timestamp:             timestamp,
+		AdditionalInformation: additionalInformation,
+	})
+}
+
+type UpdateArgs struct {
+	MetricName            string
+	Labels                map[string]string
+	Value                 float64
+	Timestamp             *time.Time
+	AdditionalInformation map[string]string
+}
+
+func (u UpdateArgs) Equals(that UpdateArgs) error {
+	if u.MetricName != that.MetricName {
+		return fmt.Errorf("expected metric name %s, got %s", u.MetricName, that.MetricName)
+	}
+
+	if !maps.Equal(u.Labels, that.Labels) {
+		return fmt.Errorf("expected Labels %s, got %s", u.Labels, that.Labels)
+	}
+
+	if u.Value != that.Value {
+		return fmt.Errorf("expected Value %f, got %f", u.Value, that.Value)
+	}
+
+	if that.Timestamp != nil {
+		if u.Timestamp == nil {
+			return fmt.Errorf("expected Timestamp nil, got %v", that.Timestamp)
+		}
+		if !u.Timestamp.Equal(*that.Timestamp) {
+			return fmt.Errorf("expected Timestamp %s, got %s", u.Timestamp, that.Timestamp)
+		}
+	} else if u.Timestamp != nil {
+		return fmt.Errorf("expected Timestamp %v, got nil", u.Timestamp)
+	}
+
+	if !maps.Equal(u.AdditionalInformation, that.AdditionalInformation) {
+		return fmt.Errorf("expected AdditionalInformation %v, got %v", u.AdditionalInformation, that.AdditionalInformation)
+	}
+
+	return nil
+}

+ 0 - 31
modules/collector-source/pkg/metrics/scraper.go

@@ -1,31 +0,0 @@
-package metrics
-
-import (
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/parser"
-	"github.com/opencost/opencost/modules/collector-source/pkg/metrics/target"
-)
-
-// MetricScraper is a struct that is used to scrape and parse a raw metrics `ScrapeTarget.`
-type MetricScraper struct {
-	scrapeTarget target.ScrapeTarget
-}
-
-func NewMetricScraper(scrapeTarget target.ScrapeTarget) *MetricScraper {
-	return &MetricScraper{
-		scrapeTarget: scrapeTarget,
-	}
-}
-
-func (s *MetricScraper) Scrape() ([]*parser.MetricRecord, error) {
-	reader, err := s.scrapeTarget.Load()
-	if err != nil {
-		return nil, err
-	}
-
-	metrics, err := parser.Parse(reader)
-	if err != nil {
-		return nil, err
-	}
-
-	return metrics, nil
-}

+ 0 - 13
modules/collector-source/pkg/metrics/target/target.go

@@ -1,13 +0,0 @@
-package target
-
-import "io"
-
-// ScrapeTarget is an interface representing an object that is capable of loading/refreshing it's
-// target data.
-type ScrapeTarget interface {
-	Load() (io.Reader, error)
-}
-
-type TargetProvider interface {
-	GetTargets() []ScrapeTarget
-}

+ 394 - 0
modules/collector-source/pkg/scrape/clustercache.go

@@ -0,0 +1,394 @@
+package scrape
+
+import (
+	"fmt"
+	"slices"
+	"strings"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/core/pkg/util/promutil"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+	"golang.org/x/exp/maps"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/apimachinery/pkg/util/validation"
+)
+
+// Cluster Cache Metrics
+const (
+	KubeNodeStatusCapacityCPUCores                        = "kube_node_status_capacity_cpu_cores"
+	KubeNodeStatusCapacityMemoryBytes                     = "kube_node_status_capacity_memory_bytes"
+	KubeNodeStatusAllocatableCPUCores                     = "kube_node_status_allocatable_cpu_cores"
+	KubeNodeStatusAllocatableMemoryBytes                  = "kube_node_status_allocatable_memory_bytes"
+	KubeNodeLabels                                        = "kube_node_labels"
+	KubePodLabels                                         = "kube_pod_labels"
+	KubePodAnnotations                                    = "kube_pod_annotations"
+	KubePodOwner                                          = "kube_pod_owner"
+	KubePodContainerStatusRunning                         = "kube_pod_container_status_running"
+	KubePodContainerResourceRequests                      = "kube_pod_container_resource_requests"
+	KubePersistentVolumeClaimInfo                         = "kube_persistentvolumeclaim_info"
+	KubePersistentVolumeClaimResourceRequestsStorageBytes = "kube_persistentvolumeclaim_resource_requests_storage_bytes"
+	KubecostPVInfo                                        = "kubecost_pv_info"
+	KubePersistentVolumeCapacityBytes                     = "kube_persistentvolume_capacity_bytes"
+	DeploymentMatchLabels                                 = "deployment_match_labels"
+	KubeNamespaceLabels                                   = "kube_namespace_labels"
+	KubeNamespaceAnnotations                              = "kube_namespace_annotations"
+	ServiceSelectorLabels                                 = "service_selector_labels"
+	StatefulSetMatchLabels                                = "statefulSet_match_labels"
+	KubeReplicasetOwner                                   = "kube_replicaset_owner"
+)
+
+type ClusterCacheScraper struct {
+	clusterCache clustercache.ClusterCache
+	updater      metric.MetricUpdater
+}
+
+func newClusterCacheScraper(clusterCache clustercache.ClusterCache, updater metric.MetricUpdater) Scraper {
+	return &ClusterCacheScraper{
+		clusterCache: clusterCache,
+		updater:      updater,
+	}
+}
+
+func (ccs *ClusterCacheScraper) Scrape() {
+	timestamp := time.Now().UTC()
+	nodes := ccs.clusterCache.GetAllNodes()
+	deployments := ccs.clusterCache.GetAllDeployments()
+	namespaces := ccs.clusterCache.GetAllNamespaces()
+	pods := ccs.clusterCache.GetAllPods()
+	pvcs := ccs.clusterCache.GetAllPersistentVolumeClaims()
+	pvs := ccs.clusterCache.GetAllPersistentVolumes()
+	services := ccs.clusterCache.GetAllServices()
+	statefulSets := ccs.clusterCache.GetAllStatefulSets()
+	replicaSets := ccs.clusterCache.GetAllReplicaSets()
+
+	ccs.scrapeNodes(nodes, timestamp)
+	ccs.scrapeDeployments(deployments, timestamp)
+	ccs.scrapeNamespaces(namespaces, timestamp)
+	ccs.scrapePods(pods, timestamp)
+	ccs.scrapePVCs(pvcs, timestamp)
+	ccs.scrapePVs(pvs, timestamp)
+	ccs.scrapeServices(services, timestamp)
+	ccs.scrapeStatefulSets(statefulSets, timestamp)
+	ccs.scrapeReplicaSets(replicaSets, timestamp)
+}
+
+func (ccs *ClusterCacheScraper) scrapeNodes(nodes []*clustercache.Node, timestamp time.Time) {
+	for _, node := range nodes {
+		nodeInfo := map[string]string{
+			source.NodeLabel:       node.Name,
+			source.ProviderIDLabel: node.SpecProviderID,
+		}
+
+		// Node Capacity
+		if node.Status.Capacity != nil {
+			if quantity, ok := node.Status.Capacity[v1.ResourceCPU]; ok {
+				_, _, value := toResourceUnitValue(v1.ResourceCPU, quantity)
+				ccs.updater.Update(KubeNodeStatusCapacityCPUCores, nodeInfo, value, &timestamp, nil)
+			}
+
+			if quantity, ok := node.Status.Capacity[v1.ResourceMemory]; ok {
+				_, _, value := toResourceUnitValue(v1.ResourceMemory, quantity)
+				ccs.updater.Update(KubeNodeStatusCapacityMemoryBytes, nodeInfo, value, &timestamp, nil)
+			}
+		}
+
+		// Node Allocatable Resources
+		if node.Status.Allocatable != nil {
+			if quantity, ok := node.Status.Allocatable[v1.ResourceCPU]; ok {
+				_, _, value := toResourceUnitValue(v1.ResourceCPU, quantity)
+				ccs.updater.Update(KubeNodeStatusAllocatableCPUCores, nodeInfo, value, &timestamp, nil)
+			}
+
+			if quantity, ok := node.Status.Allocatable[v1.ResourceMemory]; ok {
+				_, _, value := toResourceUnitValue(v1.ResourceMemory, quantity)
+				ccs.updater.Update(KubeNodeStatusAllocatableMemoryBytes, nodeInfo, value, &timestamp, nil)
+			}
+		}
+
+		// node labels
+		labelNames, labelValues := promutil.KubeLabelsToLabels(node.Labels)
+		nodeLabels := util.ToMap(labelNames, labelValues)
+
+		ccs.updater.Update(KubeNodeLabels, nodeInfo, 0, &timestamp, nodeLabels)
+
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapeDeployments(deployments []*clustercache.Deployment, timestamp time.Time) {
+	for _, deployment := range deployments {
+		deploymentInfo := map[string]string{
+			source.DeploymentLabel: deployment.Name,
+			source.NamespaceLabel:  deployment.Namespace,
+		}
+
+		// deployment labels
+		labelNames, labelValues := promutil.KubeLabelsToLabels(deployment.MatchLabels)
+		deploymentLabels := util.ToMap(labelNames, labelValues)
+
+		ccs.updater.Update(DeploymentMatchLabels, deploymentInfo, 0, &timestamp, deploymentLabels)
+
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapeNamespaces(namespaces []*clustercache.Namespace, timestamp time.Time) {
+	for _, namespace := range namespaces {
+		namespaceInfo := map[string]string{
+			source.NamespaceLabel: namespace.Name,
+		}
+
+		// namespace labels
+		labelNames, labelValues := promutil.KubeLabelsToLabels(namespace.Labels)
+		namespaceLabels := util.ToMap(labelNames, labelValues)
+		ccs.updater.Update(KubeNamespaceLabels, namespaceInfo, 0, &timestamp, namespaceLabels)
+
+		// namespace annotations
+		annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(namespace.Annotations)
+		namespaceAnnotations := util.ToMap(annotationNames, annotationValues)
+		ccs.updater.Update(KubeNamespaceAnnotations, namespaceInfo, 0, &timestamp, namespaceAnnotations)
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapePods(pods []*clustercache.Pod, timestamp time.Time) {
+	for _, pod := range pods {
+		podInfo := map[string]string{
+			source.PodLabel:       pod.Name,
+			source.NamespaceLabel: pod.Namespace,
+			source.UIDLabel:       string(pod.UID),
+			source.NodeLabel:      pod.Spec.NodeName,
+			source.InstanceLabel:  pod.Spec.NodeName,
+		}
+
+		// pod labels
+		labelNames, labelValues := promutil.KubeLabelsToLabels(pod.Labels)
+		podLabels := util.ToMap(labelNames, labelValues)
+		ccs.updater.Update(KubePodLabels, podInfo, 0, &timestamp, podLabels)
+
+		// pod annotations
+		annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(pod.Annotations)
+		podAnnotations := util.ToMap(annotationNames, annotationValues)
+		ccs.updater.Update(KubePodAnnotations, podInfo, 0, &timestamp, podAnnotations)
+
+		// Pod owner metric
+		for _, owner := range pod.OwnerReferences {
+			ownerInfo := maps.Clone(podInfo)
+			ownerInfo[source.OwnerKindLabel] = owner.Kind
+			ownerInfo[source.OwnerNameLabel] = owner.Name
+			ccs.updater.Update(KubePodOwner, ownerInfo, 0, &timestamp, nil)
+		}
+
+		// Container Status
+		for _, status := range pod.Status.ContainerStatuses {
+			if status.State.Running != nil {
+				containerInfo := maps.Clone(podInfo)
+				containerInfo[source.ContainerLabel] = status.Name
+				ccs.updater.Update(KubePodContainerStatusRunning, containerInfo, 0, &timestamp, nil)
+			}
+		}
+
+		for _, container := range pod.Spec.Containers {
+			containerInfo := maps.Clone(podInfo)
+			containerInfo[source.ContainerLabel] = container.Name
+			// Requests
+			if container.Resources.Requests != nil {
+				// sorting keys here for testing purposes
+				keys := maps.Keys(container.Resources.Requests)
+				slices.Sort(keys)
+				for _, resourceName := range keys {
+					quantity := container.Resources.Requests[resourceName]
+					resource, unit, value := toResourceUnitValue(resourceName, quantity)
+
+					// failed to parse the resource type
+					if resource == "" {
+						log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
+						continue
+					}
+
+					resourceRequestInfo := maps.Clone(containerInfo)
+					resourceRequestInfo[source.ResourceLabel] = resource
+					resourceRequestInfo[source.UnitLabel] = unit
+					ccs.updater.Update(KubePodContainerResourceRequests, resourceRequestInfo, value, &timestamp, nil)
+				}
+			}
+		}
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapePVCs(pvcs []*clustercache.PersistentVolumeClaim, timestamp time.Time) {
+	for _, pvc := range pvcs {
+		pvcInfo := map[string]string{
+			source.PVCLabel:          pvc.Name,
+			source.NamespaceLabel:    pvc.Namespace,
+			source.VolumeNameLabel:   pvc.Spec.VolumeName,
+			source.StorageClassLabel: getPersistentVolumeClaimClass(pvc),
+		}
+
+		ccs.updater.Update(KubePersistentVolumeClaimInfo, pvcInfo, 0, &timestamp, nil)
+
+		if storage, ok := pvc.Spec.Resources.Requests[v1.ResourceStorage]; ok {
+			ccs.updater.Update(KubePersistentVolumeClaimResourceRequestsStorageBytes, pvcInfo, float64(storage.Value()), &timestamp, nil)
+		}
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapePVs(pvs []*clustercache.PersistentVolume, timestamp time.Time) {
+	for _, pv := range pvs {
+		providerID := pv.Name
+		// if a more accurate provider ID is available, use that
+		if pv.Spec.CSI != nil && pv.Spec.CSI.VolumeHandle != "" {
+			providerID = pv.Spec.CSI.VolumeHandle
+		}
+		pvInfo := map[string]string{
+			source.PVLabel:           pv.Name,
+			source.StorageClassLabel: pv.Spec.StorageClassName,
+			source.ProviderIDLabel:   providerID,
+		}
+
+		ccs.updater.Update(KubecostPVInfo, pvInfo, 0, &timestamp, nil)
+
+		if storage, ok := pv.Spec.Capacity[v1.ResourceStorage]; ok {
+			ccs.updater.Update(KubePersistentVolumeCapacityBytes, pvInfo, float64(storage.Value()), &timestamp, nil)
+		}
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapeServices(services []*clustercache.Service, timestamp time.Time) {
+	for _, service := range services {
+		serviceInfo := map[string]string{
+			source.ServiceLabel:   service.Name,
+			source.NamespaceLabel: service.Namespace,
+		}
+
+		// service labels
+		labelNames, labelValues := promutil.KubeLabelsToLabels(service.SpecSelector)
+		serviceLabels := util.ToMap(labelNames, labelValues)
+		ccs.updater.Update(ServiceSelectorLabels, serviceInfo, 0, &timestamp, serviceLabels)
+
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapeStatefulSets(statefulSets []*clustercache.StatefulSet, timestamp time.Time) {
+	for _, statefulSet := range statefulSets {
+		statefulSetInfo := map[string]string{
+			source.StatefulSetLabel: statefulSet.Name,
+			source.NamespaceLabel:   statefulSet.Namespace,
+		}
+
+		// statefulSet labels
+		labelNames, labelValues := promutil.KubeLabelsToLabels(statefulSet.SpecSelector.MatchLabels)
+		statefulSetLabels := util.ToMap(labelNames, labelValues)
+		ccs.updater.Update(StatefulSetMatchLabels, statefulSetInfo, 0, &timestamp, statefulSetLabels)
+
+	}
+}
+
+func (ccs *ClusterCacheScraper) scrapeReplicaSets(replicaSets []*clustercache.ReplicaSet, timestamp time.Time) {
+	for _, replicaSet := range replicaSets {
+		replicaSetInfo := map[string]string{
+			source.ReplicaSetLabel: replicaSet.Name,
+			source.NamespaceLabel:  replicaSet.Namespace,
+		}
+
+		for _, owner := range replicaSet.OwnerReferences {
+			ownerInfo := maps.Clone(replicaSetInfo)
+			ownerInfo[source.OwnerKindLabel] = owner.Kind
+			ownerInfo[source.OwnerNameLabel] = owner.Name
+			ccs.updater.Update(KubeReplicasetOwner, ownerInfo, 0, &timestamp, nil)
+		}
+	}
+}
+
+// getPersistentVolumeClaimClass returns StorageClassName. If no storage class was
+// requested, it returns "".
+func getPersistentVolumeClaimClass(claim *clustercache.PersistentVolumeClaim) string {
+	// Use beta annotation first
+	if class, found := claim.Annotations[v1.BetaStorageClassAnnotation]; found {
+		return class
+	}
+
+	if claim.Spec.StorageClassName != nil {
+		return *claim.Spec.StorageClassName
+	}
+
+	// Special non-empty string to indicate absence of storage class.
+	return ""
+}
+
+// toResourceUnitValue accepts a resource name and quantity and returns the sanitized resource, the unit, and the value in the units.
+// Returns an empty string for resource and unit if there was a failure.
+func toResourceUnitValue(resourceName v1.ResourceName, quantity resource.Quantity) (resource string, unit string, value float64) {
+	resource = promutil.SanitizeLabelName(string(resourceName))
+
+	switch resourceName {
+	case v1.ResourceCPU:
+		unit = "core"
+		value = float64(quantity.MilliValue()) / 1000
+		return
+
+	case v1.ResourceStorage:
+		fallthrough
+	case v1.ResourceEphemeralStorage:
+		fallthrough
+	case v1.ResourceMemory:
+		unit = "byte"
+		value = float64(quantity.Value())
+		return
+	case v1.ResourcePods:
+		unit = "integer"
+		value = float64(quantity.Value())
+		return
+	default:
+		if isHugePageResourceName(resourceName) || isAttachableVolumeResourceName(resourceName) {
+			unit = "byte"
+			value = float64(quantity.Value())
+			return
+		}
+
+		if isExtendedResourceName(resourceName) {
+			unit = "integer"
+			value = float64(quantity.Value())
+			return
+		}
+	}
+
+	resource = ""
+	unit = ""
+	value = 0.0
+	return
+}
+
+// isHugePageResourceName checks for a huge page container resource name
+func isHugePageResourceName(name v1.ResourceName) bool {
+	return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
+}
+
+// isAttachableVolumeResourceName checks for attached volume container resource name
+func isAttachableVolumeResourceName(name v1.ResourceName) bool {
+	return strings.HasPrefix(string(name), v1.ResourceAttachableVolumesPrefix)
+}
+
+// isExtendedResourceName checks for extended container resource name
+func isExtendedResourceName(name v1.ResourceName) bool {
+	if isNativeResource(name) || strings.HasPrefix(string(name), v1.DefaultResourceRequestsPrefix) {
+		return false
+	}
+	// Ensure it satisfies the rules in IsQualifiedName() after converted into quota resource name
+	nameForQuota := fmt.Sprintf("%s%s", v1.DefaultResourceRequestsPrefix, string(name))
+	if errs := validation.IsQualifiedName(nameForQuota); len(errs) != 0 {
+		return false
+	}
+	return true
+}
+
+// isNativeResource checks for a kubernetes.io/ prefixed resource name
+func isNativeResource(name v1.ResourceName) bool {
+	return !strings.Contains(string(name), "/") || isPrefixedNativeResource(name)
+}
+
+func isPrefixedNativeResource(name v1.ResourceName) bool {
+	return strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
+}

+ 878 - 0
modules/collector-source/pkg/scrape/clustercache_test.go

@@ -0,0 +1,878 @@
+package scrape
+
+import (
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+var Start1Str = "2025-01-01T00:00:00Z00:00"
+
+func Test_kubernetesScraper_scrapeNodes(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		Nodes     []*clustercache.Node
+		Timestamp time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					Nodes: []*clustercache.Node{
+						{
+							Name:           "node1",
+							SpecProviderID: "i-1",
+							Status: v1.NodeStatus{
+								Capacity: v1.ResourceList{
+									v1.ResourceCPU:    resource.MustParse("2"),
+									v1.ResourceMemory: resource.MustParse("2048"),
+								},
+								Allocatable: v1.ResourceList{
+									v1.ResourceCPU:    resource.MustParse("1"),
+									v1.ResourceMemory: resource.MustParse("1024"),
+								},
+							},
+							Labels: map[string]string{
+								"test1": "blah",
+								"test2": "blah2",
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubeNodeStatusCapacityCPUCores,
+					Labels: map[string]string{
+						source.NodeLabel:       "node1",
+						source.ProviderIDLabel: "i-1",
+					},
+					Value:                 2.0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubeNodeStatusCapacityMemoryBytes,
+					Labels: map[string]string{
+						source.NodeLabel:       "node1",
+						source.ProviderIDLabel: "i-1",
+					},
+					Value:                 2048.0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubeNodeStatusAllocatableCPUCores,
+					Labels: map[string]string{
+						source.NodeLabel:       "node1",
+						source.ProviderIDLabel: "i-1",
+					},
+					Value:                 1.0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubeNodeStatusAllocatableMemoryBytes,
+					Labels: map[string]string{
+						source.NodeLabel:       "node1",
+						source.ProviderIDLabel: "i-1",
+					},
+					Value:                 1024.0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubeNodeLabels,
+					Labels: map[string]string{
+						source.NodeLabel:       "node1",
+						source.ProviderIDLabel: "i-1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"label_test1": "blah",
+						"label_test2": "blah2",
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapeNodes(s.Nodes, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapeDeployments(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		Deployments []*clustercache.Deployment
+		Timestamp   time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					Deployments: []*clustercache.Deployment{
+						{
+							Name:      "deployment1",
+							Namespace: "namespace1",
+							MatchLabels: map[string]string{
+								"test1": "blah",
+								"test2": "blah2",
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+
+				{
+					MetricName: DeploymentMatchLabels,
+					Labels: map[string]string{
+						source.DeploymentLabel: "deployment1",
+						source.NamespaceLabel:  "namespace1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"label_test1": "blah",
+						"label_test2": "blah2",
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapeDeployments(s.Deployments, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapeNamespaces(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		Namespaces []*clustercache.Namespace
+		Timestamp  time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					Namespaces: []*clustercache.Namespace{
+						{
+							Name: "namespace1",
+							Labels: map[string]string{
+								"test1": "blah",
+								"test2": "blah2",
+							},
+							Annotations: map[string]string{
+								"test3": "blah3",
+								"test4": "blah4",
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubeNamespaceLabels,
+					Labels: map[string]string{
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"label_test1": "blah",
+						"label_test2": "blah2",
+					},
+				},
+				{
+					MetricName: KubeNamespaceAnnotations,
+					Labels: map[string]string{
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"annotation_test3": "blah3",
+						"annotation_test4": "blah4",
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapeNamespaces(s.Namespaces, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapePods(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		Pods      []*clustercache.Pod
+		Timestamp time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					Pods: []*clustercache.Pod{
+						{
+							Name:      "pod1",
+							Namespace: "namespace1",
+							UID:       "uuid1",
+							Spec: clustercache.PodSpec{
+								NodeName: "node1",
+								Containers: []clustercache.Container{
+									{
+										Name: "container1",
+										Resources: v1.ResourceRequirements{
+											Requests: map[v1.ResourceName]resource.Quantity{
+												v1.ResourceCPU:    resource.MustParse("500m"),
+												v1.ResourceMemory: resource.MustParse("512"),
+											},
+										},
+									},
+								},
+							},
+							Labels: map[string]string{
+								"test1": "blah",
+								"test2": "blah2",
+							},
+							Annotations: map[string]string{
+								"test3": "blah3",
+								"test4": "blah4",
+							},
+							OwnerReferences: []metav1.OwnerReference{
+								{
+									Kind:       source.DeploymentLabel,
+									Name:       "deployment1",
+									Controller: nil,
+								},
+							},
+							Status: clustercache.PodStatus{
+								ContainerStatuses: []v1.ContainerStatus{
+									{
+										Name: "container1",
+										State: v1.ContainerState{
+											Running: &v1.ContainerStateRunning{},
+										},
+									},
+								},
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubePodLabels,
+					Labels: map[string]string{
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.UIDLabel:       "uuid1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"label_test1": "blah",
+						"label_test2": "blah2",
+					},
+				},
+				{
+					MetricName: KubePodAnnotations,
+					Labels: map[string]string{
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.UIDLabel:       "uuid1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"annotation_test3": "blah3",
+						"annotation_test4": "blah4",
+					},
+				},
+				{
+					MetricName: KubePodOwner,
+					Labels: map[string]string{
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.UIDLabel:       "uuid1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+						source.OwnerKindLabel: "deployment",
+						source.OwnerNameLabel: "deployment1",
+					},
+					Value:                 0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubePodContainerStatusRunning,
+					Labels: map[string]string{
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.UIDLabel:       "uuid1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+						source.ContainerLabel: "container1",
+					},
+					Value:                 0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubePodContainerResourceRequests,
+					Labels: map[string]string{
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.UIDLabel:       "uuid1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+						source.ContainerLabel: "container1",
+						source.ResourceLabel:  "cpu",
+						source.UnitLabel:      "core",
+					},
+					Value:                 0.5,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubePodContainerResourceRequests,
+					Labels: map[string]string{
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.UIDLabel:       "uuid1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+						source.ContainerLabel: "container1",
+						source.ResourceLabel:  "memory",
+						source.UnitLabel:      "byte",
+					},
+					Value:                 512,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapePods(s.Pods, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapePVCs(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		PVCs      []*clustercache.PersistentVolumeClaim
+		Timestamp time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					PVCs: []*clustercache.PersistentVolumeClaim{
+						{
+							Name:      "pvc1",
+							Namespace: "namespace1",
+							Spec: v1.PersistentVolumeClaimSpec{
+								VolumeName:       "vol1",
+								StorageClassName: util.Ptr("storageClass1"),
+								Resources: v1.VolumeResourceRequirements{
+									Requests: v1.ResourceList{
+										v1.ResourceStorage: resource.MustParse("4096"),
+									},
+								},
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubePersistentVolumeClaimInfo,
+					Labels: map[string]string{
+						source.PVCLabel:          "pvc1",
+						source.NamespaceLabel:    "namespace1",
+						source.VolumeNameLabel:   "vol1",
+						source.StorageClassLabel: "storageClass1",
+					},
+					Value:                 0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubePersistentVolumeClaimResourceRequestsStorageBytes,
+					Labels: map[string]string{
+						source.PVCLabel:          "pvc1",
+						source.NamespaceLabel:    "namespace1",
+						source.VolumeNameLabel:   "vol1",
+						source.StorageClassLabel: "storageClass1",
+					},
+					Value:                 4096,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapePVCs(s.PVCs, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapePVs(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		PVs       []*clustercache.PersistentVolume
+		Timestamp time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					PVs: []*clustercache.PersistentVolume{
+						{
+							Name: "pv1",
+							Spec: v1.PersistentVolumeSpec{
+								StorageClassName: "storageClass1",
+								PersistentVolumeSource: v1.PersistentVolumeSource{
+									CSI: &v1.CSIPersistentVolumeSource{
+										VolumeHandle: "vol-1",
+									},
+								},
+								Capacity: v1.ResourceList{
+									v1.ResourceStorage: resource.MustParse("4096"),
+								},
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubecostPVInfo,
+					Labels: map[string]string{
+						source.PVLabel:           "pv1",
+						source.ProviderIDLabel:   "vol-1",
+						source.StorageClassLabel: "storageClass1",
+					},
+					Value:                 0,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+				{
+					MetricName: KubePersistentVolumeCapacityBytes,
+					Labels: map[string]string{
+						source.PVLabel:           "pv1",
+						source.ProviderIDLabel:   "vol-1",
+						source.StorageClassLabel: "storageClass1",
+					},
+					Value:                 4096,
+					Timestamp:             &start1,
+					AdditionalInformation: nil,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapePVs(s.PVs, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapeServices(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		Services  []*clustercache.Service
+		Timestamp time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					Services: []*clustercache.Service{
+						{
+							Name:      "service1",
+							Namespace: "namespace1",
+							SpecSelector: map[string]string{
+								"test1": "blah",
+								"test2": "blah2",
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: ServiceSelectorLabels,
+					Labels: map[string]string{
+						"service":             "service1",
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"label_test1": "blah",
+						"label_test2": "blah2",
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapeServices(s.Services, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapeStatefulSets(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		StatefulSets []*clustercache.StatefulSet
+		Timestamp    time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					StatefulSets: []*clustercache.StatefulSet{
+						{
+							Name:      "statefulSet1",
+							Namespace: "namespace1",
+							SpecSelector: &metav1.LabelSelector{
+								MatchLabels: map[string]string{
+									"test1": "blah",
+									"test2": "blah2",
+								},
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: StatefulSetMatchLabels,
+					Labels: map[string]string{
+						source.StatefulSetLabel: "statefulSet1",
+						source.NamespaceLabel:   "namespace1",
+					},
+					Value:     0,
+					Timestamp: &start1,
+					AdditionalInformation: map[string]string{
+						"label_test1": "blah",
+						"label_test2": "blah2",
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapeStatefulSets(s.StatefulSets, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}
+
+func Test_kubernetesScraper_scrapeReplicaSets(t *testing.T) {
+
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+
+	type scrape struct {
+		ReplicaSets []*clustercache.ReplicaSet
+		Timestamp   time.Time
+	}
+	tests := []struct {
+		name     string
+		scrapes  []scrape
+		expected []metric.UpdateArgs
+	}{
+		{
+			name: "simple",
+			scrapes: []scrape{
+				{
+					ReplicaSets: []*clustercache.ReplicaSet{
+						{
+							Name:      "replicaSet1",
+							Namespace: "namespace1",
+							OwnerReferences: []metav1.OwnerReference{
+								{
+									Name: "rollout1",
+									Kind: "Rollout",
+								},
+							},
+						},
+					},
+					Timestamp: start1,
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubeReplicasetOwner,
+					Labels: map[string]string{
+						"replicaset":          "replicaSet1",
+						source.NamespaceLabel: "namespace1",
+						source.OwnerNameLabel: "rollout1",
+						source.OwnerKindLabel: "Rollout",
+					},
+					Value:     0,
+					Timestamp: &start1,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			ks := &ClusterCacheScraper{
+				updater: &updateRecorder,
+			}
+			for _, s := range tt.scrapes {
+				ks.scrapeReplicaSets(s.ReplicaSets, s.Timestamp)
+			}
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}

+ 64 - 0
modules/collector-source/pkg/scrape/dcgm.go

@@ -0,0 +1,64 @@
+package scrape
+
+import (
+	"fmt"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape/target"
+)
+
+// DCGM metrics
+const (
+	DCGMFIPROFGRENGINEACTIVE = "DCGM_FI_PROF_GR_ENGINE_ACTIVE"
+	DCGMFIDEVDECUTIL         = "DCGM_FI_DEV_DEC_UTIL"
+)
+
+func newDCGMScrapper(clusterCache clustercache.ClusterCache, updater metric.MetricUpdater) Scraper {
+	//tp := newDCGMTargetProvider(clusterCache)
+	tp := target.NewDefaultTargetProvider(
+		target.NewUrlTarget("http://localhost:9400/metrics"))
+	return newDCGMTargetScraper(tp, updater)
+}
+
+func newDCGMTargetScraper(provider target.TargetProvider, updater metric.MetricUpdater) *TargetScraper {
+	return newTargetScrapper(
+		provider,
+		updater,
+		[]string{
+			DCGMFIPROFGRENGINEACTIVE,
+			DCGMFIDEVDECUTIL,
+		},
+		true)
+}
+
+type DCGMTargetProvider struct {
+	clusterCache clustercache.ClusterCache
+}
+
+func newDCGMTargetProvider(clusterCache clustercache.ClusterCache) *DCGMTargetProvider {
+	return &DCGMTargetProvider{
+		clusterCache: clusterCache,
+	}
+}
+
+func (p *DCGMTargetProvider) GetTargets() []target.ScrapeTarget {
+	svcs := p.clusterCache.GetAllServices()
+
+	var targets []target.ScrapeTarget
+	for _, svc := range svcs {
+		if svc.ClusterIP == "" || svc.SpecSelector == nil {
+			continue
+		}
+		// TODO do something in relation to Thomas' comment https://github.com/opencost/opencost/pull/3110
+		if name := svc.SpecSelector["app.kubernetes.io/name"]; name != "dcm-collector" {
+			continue
+		}
+		port := 9400
+
+		t := target.NewUrlTarget(fmt.Sprintf("http://%s:%d/metrics", svc.ClusterIP, port))
+		targets = append(targets, t)
+	}
+
+	return targets
+}

+ 86 - 0
modules/collector-source/pkg/scrape/network.go

@@ -0,0 +1,86 @@
+package scrape
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape/target"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+)
+
+// Network Metrics
+const (
+	KubecostPodNetworkEgressBytesTotal  = "kubecost_pod_network_egress_bytes_total"
+	KubecostPodNetworkIngressBytesTotal = "kubecost_pod_network_ingress_bytes_total"
+)
+
+func newNetworkScraper(
+	releaseName string,
+	port int,
+	k8s kubernetes.Interface,
+	updater metric.MetricUpdater,
+) Scraper {
+	// TODO revert this
+	//tp := NewNetworkTargetProvider(releaseName, port, k8s)
+	tp := target.NewDefaultTargetProvider(
+		target.NewUrlTarget("http://localhost:9111/metrics"),
+		target.NewUrlTarget("http://localhost:9112/metrics"),
+		target.NewUrlTarget("http://localhost:9113/metrics"),
+		target.NewUrlTarget("http://localhost:9114/metrics"),
+		target.NewUrlTarget("http://localhost:9115/metrics"),
+		target.NewUrlTarget("http://localhost:9116/metrics"),
+		target.NewUrlTarget("http://localhost:9117/metrics"),
+		target.NewUrlTarget("http://localhost:9118/metrics"),
+		target.NewUrlTarget("http://localhost:9119/metrics"),
+		target.NewUrlTarget("http://localhost:9120/metrics"),
+	)
+	return newNetworkTargetScraper(tp, updater)
+}
+
+func newNetworkTargetScraper(provider target.TargetProvider, updater metric.MetricUpdater) *TargetScraper {
+	return newTargetScrapper(
+		provider,
+		updater,
+		[]string{
+			KubecostPodNetworkEgressBytesTotal,
+			KubecostPodNetworkIngressBytesTotal,
+		},
+		true)
+}
+
+type NetworkTargetProvider struct {
+	releaseName   string
+	port          int
+	kubeClientSet kubernetes.Interface
+}
+
+func NewNetworkTargetProvider(releaseName string, port int, k8s kubernetes.Interface) *NetworkTargetProvider {
+	return &NetworkTargetProvider{
+		releaseName:   releaseName,
+		port:          port,
+		kubeClientSet: k8s,
+	}
+}
+
+func (n *NetworkTargetProvider) GetTargets() []target.ScrapeTarget {
+	k8s := n.kubeClientSet
+
+	pods, err := k8s.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{
+		LabelSelector: fmt.Sprintf("app=%s-network-costs", n.releaseName),
+	})
+	if err != nil {
+		log.Errorf("NetworkTargetProvider: failed to retieve pods from kubernetes client: %s", err.Error())
+		return nil
+	}
+
+	var targets []target.ScrapeTarget
+	for _, pod := range pods.Items {
+		t := target.NewUrlTarget(fmt.Sprintf("http://%s:%d/metrics", pod.Status.PodIP, n.port))
+		targets = append(targets, t)
+	}
+
+	return targets
+}

+ 60 - 0
modules/collector-source/pkg/scrape/opencost.go

@@ -0,0 +1,60 @@
+package scrape
+
+import (
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape/target"
+)
+
+// Opencost Metrics
+const (
+	KubecostClusterManagementCost     = "kubecost_cluster_management_cost"
+	KubecostNetworkZoneEgressCost     = "kubecost_network_zone_egress_cost"
+	KubecostNetworkRegionEgressCost   = "kubecost_network_region_egress_cost"
+	KubecostNetworkInternetEgressCost = "kubecost_network_internet_egress_cost"
+	PVHourlyCost                      = "pv_hourly_cost"
+	KubecostLoadBalancerCost          = "kubecost_load_balancer_cost"
+	NodeTotalHourlyCost               = "node_total_hourly_cost"
+	NodeCPUHourlyCost                 = "node_cpu_hourly_cost"
+	NodeRAMHourlyCost                 = "node_ram_hourly_cost"
+	NodeGPUHourlyCost                 = "node_gpu_hourly_cost"
+	NodeGPUCount                      = "node_gpu_count"
+	KubecostNodeIsSpot                = "kubecost_node_is_spot"
+	ContainerCPUAllocation            = "container_cpu_allocation"
+	ContainerMemoryAllocationBytes    = "container_memory_allocation_bytes"
+	ContainerGPUAllocation            = "container_gpu_allocation"
+	PodPVCAllocation                  = "pod_pvc_allocation"
+)
+
+func newOpenCostTargetProvider() target.TargetProvider {
+	// localhost is used here because we are hitting an endpoint of this container
+	return target.NewDefaultTargetProvider(target.NewUrlTarget("http://localhost:9003/metrics"))
+}
+
+func newOpenCostScraper(updater metric.MetricUpdater) Scraper {
+	return newOpencostTargetScraper(newOpenCostTargetProvider(), updater)
+}
+
+func newOpencostTargetScraper(provider target.TargetProvider, updater metric.MetricUpdater) *TargetScraper {
+	return newTargetScrapper(
+		provider,
+		updater,
+		[]string{
+			KubecostClusterManagementCost,
+			KubecostNetworkZoneEgressCost,
+			KubecostNetworkRegionEgressCost,
+			KubecostNetworkInternetEgressCost,
+			PVHourlyCost,
+			KubecostLoadBalancerCost,
+			NodeTotalHourlyCost,
+			NodeCPUHourlyCost,
+			NodeRAMHourlyCost,
+			NodeGPUHourlyCost,
+			NodeGPUCount,
+			KubecostNodeIsSpot,
+			ContainerCPUAllocation,
+			ContainerMemoryAllocationBytes,
+			ContainerGPUAllocation,
+			PodPVCAllocation,
+		},
+		true)
+}

+ 0 - 0
modules/collector-source/pkg/metrics/parser/lexer.go → modules/collector-source/pkg/scrape/parser/lexer.go


+ 0 - 0
modules/collector-source/pkg/metrics/parser/lexer_test.go → modules/collector-source/pkg/scrape/parser/lexer_test.go


+ 0 - 0
modules/collector-source/pkg/metrics/parser/parser.go → modules/collector-source/pkg/scrape/parser/parser.go


+ 0 - 0
modules/collector-source/pkg/metrics/parser/parser_test.go → modules/collector-source/pkg/scrape/parser/parser_test.go


+ 0 - 0
modules/collector-source/pkg/metrics/parser/scrape.txt → modules/collector-source/pkg/scrape/parser/scrape.txt


+ 0 - 0
modules/collector-source/pkg/metrics/parser/token.go → modules/collector-source/pkg/scrape/parser/token.go


+ 85 - 0
modules/collector-source/pkg/scrape/scrapecontroller.go

@@ -0,0 +1,85 @@
+package scrape
+
+import (
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/clustercache"
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/core/pkg/util/atomic"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+	"k8s.io/client-go/kubernetes"
+)
+
+// ScrapeController initializes and holds the scrapers in addition to running the loop that triggers scrapes
+type ScrapeController struct {
+	scrapeInterval time.Duration
+	runState       atomic.AtomicRunState
+	scrapers       []Scraper
+}
+
+func NewScrapeController(
+	scrapeInterval time.Duration,
+	releaseName string,
+	networkPort int,
+	updater metric.MetricUpdater,
+	clusterCache clustercache.ClusterCache,
+	k8s kubernetes.Interface,
+	statSummaryClient util.StatSummaryClient,
+) *ScrapeController {
+	var scrapers []Scraper
+
+	clusterCacheScraper := newClusterCacheScraper(clusterCache, updater)
+	scrapers = append(scrapers, clusterCacheScraper)
+
+	opencostScraper := newOpenCostScraper(updater)
+	scrapers = append(scrapers, opencostScraper)
+
+	statSummaryScraper := newStatSummaryScraper(statSummaryClient, updater)
+	scrapers = append(scrapers, statSummaryScraper)
+
+	networkScraper := newNetworkScraper(releaseName, networkPort, k8s, updater)
+	scrapers = append(scrapers, networkScraper)
+
+	dcgmScraper := newDCGMScrapper(clusterCache, updater)
+	scrapers = append(scrapers, dcgmScraper)
+
+	sc := &ScrapeController{
+		scrapeInterval: scrapeInterval,
+		scrapers:       scrapers,
+	}
+	return sc
+}
+
+func (sc *ScrapeController) Start() {
+	// Before we attempt to start, we must ensure we are not in a stopping state
+	sc.runState.WaitForReset()
+
+	// This will atomically check the current state to ensure we can run, then advances the state.
+	// If the state is already started, it will return false.
+	if !sc.runState.Start() {
+		log.Info("metric already running")
+		return
+	}
+	go func() {
+		ticker := time.NewTicker(sc.scrapeInterval)
+		for {
+			for _, scraper := range sc.scrapers {
+				scraper.Scrape()
+			}
+			select {
+			case <-sc.runState.OnStop():
+				sc.runState.Reset()
+				ticker.Stop()
+				return // exit go routine
+			case <-ticker.C:
+			}
+
+		}
+
+	}()
+}
+
+func (sc *ScrapeController) Stop() {
+	sc.runState.Stop()
+}

+ 5 - 0
modules/collector-source/pkg/scrape/scraper.go

@@ -0,0 +1,5 @@
+package scrape
+
+type Scraper interface {
+	Scrape()
+}

+ 175 - 0
modules/collector-source/pkg/scrape/statsummary.go

@@ -0,0 +1,175 @@
+package scrape
+
+import (
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+	stats "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
+)
+
+// Stat Summary Metrics
+const (
+	NodeCPUSecondsTotal                = "node_cpu_seconds_total"
+	NodeFSCapacityBytes                = "node_fs_capacity_bytes" // replaces container_fs_limit_bytes
+	ContainerNetworkReceiveBytesTotal  = "container_network_receive_bytes_total"
+	ContainerNetworkTransmitBytesTotal = "container_network_transmit_bytes_total"
+	ContainerCPUUsageSecondsTotal      = "container_cpu_usage_seconds_total"
+	ContainerMemoryWorkingSetBytes     = "container_memory_working_set_bytes"
+	ContainerFSUsageBytes              = "container_fs_usage_bytes"
+	KubeletVolumeStatsUsedBytes        = "kubelet_volume_stats_used_bytes"
+)
+
+type StatSummaryScraper struct {
+	client  util.StatSummaryClient
+	updater metric.MetricUpdater
+}
+
+func newStatSummaryScraper(client util.StatSummaryClient, updater metric.MetricUpdater) Scraper {
+	return &StatSummaryScraper{
+		client:  client,
+		updater: updater,
+	}
+}
+
+func (s *StatSummaryScraper) Scrape() {
+	nodeStats, err := s.client.GetNodeData()
+	if err != nil {
+		log.Errorf("error retrieving node stat data: %s", err.Error())
+		return
+	}
+
+	// track if a pvc has already been seen when updating KubeletVolumeStatsUsedBytes
+	seenPVC := map[stats.PVCReference]struct{}{}
+
+	for _, stat := range nodeStats {
+		nodeName := stat.Node.NodeName
+		if stat.Node.CPU != nil && stat.Node.CPU.UsageCoreNanoSeconds != nil {
+			s.updater.Update(
+				NodeCPUSecondsTotal,
+				map[string]string{
+					source.KubernetesNodeLabel: nodeName,
+					source.ModeLabel:           "", // TODO
+				},
+				float64(*stat.Node.CPU.UsageCoreNanoSeconds)*1e-9,
+				&stat.Node.CPU.Time.Time,
+				nil,
+			)
+		}
+
+		if stat.Node.Fs != nil && stat.Node.Fs.CapacityBytes != nil {
+			s.updater.Update(
+				NodeFSCapacityBytes,
+				map[string]string{
+					source.InstanceLabel: nodeName,
+					source.DeviceLabel:   "local", // This value has to be populated but isn't important here
+				},
+				float64(*stat.Node.Fs.CapacityBytes),
+				&stat.Node.Fs.Time.Time,
+				nil,
+			)
+		}
+
+		for _, pod := range stat.Pods {
+			podName := pod.PodRef.Name
+			namespace := pod.PodRef.Namespace
+			podUID := pod.PodRef.UID
+
+			if pod.Network != nil {
+				if pod.Network.RxBytes != nil {
+					s.updater.Update(
+						ContainerNetworkReceiveBytesTotal,
+						map[string]string{
+							source.UIDLabel:       podUID,
+							source.PodLabel:       podName,
+							source.NamespaceLabel: namespace,
+						},
+						float64(*pod.Network.RxBytes),
+						&pod.Network.Time.Time,
+						nil,
+					)
+				}
+
+				if pod.Network.TxBytes != nil {
+					s.updater.Update(
+						ContainerNetworkTransmitBytesTotal,
+						map[string]string{
+							source.UIDLabel:       podUID,
+							source.PodLabel:       podName,
+							source.NamespaceLabel: namespace,
+						},
+						float64(*pod.Network.TxBytes),
+						&pod.Network.Time.Time,
+						nil,
+					)
+				}
+			}
+
+			for _, volumeStats := range pod.VolumeStats {
+				if volumeStats.PVCRef == nil || volumeStats.UsedBytes == nil {
+					continue
+				}
+				if _, ok := seenPVC[*volumeStats.PVCRef]; ok {
+					continue
+				}
+				s.updater.Update(
+					KubeletVolumeStatsUsedBytes,
+					map[string]string{
+						source.PVCLabel:       volumeStats.PVCRef.Name,
+						source.NamespaceLabel: volumeStats.PVCRef.Namespace,
+					},
+					float64(*volumeStats.UsedBytes),
+					&volumeStats.Time.Time,
+					nil,
+				)
+				seenPVC[*volumeStats.PVCRef] = struct{}{}
+			}
+
+			for _, container := range pod.Containers {
+				if container.CPU != nil && container.CPU.UsageCoreNanoSeconds != nil {
+					s.updater.Update(
+						ContainerCPUUsageSecondsTotal,
+						map[string]string{
+							source.ContainerLabel: container.Name,
+							source.PodLabel:       podName,
+							source.NamespaceLabel: namespace,
+							source.NodeLabel:      nodeName,
+							source.InstanceLabel:  nodeName,
+						},
+						float64(*container.CPU.UsageCoreNanoSeconds)*1e-9,
+						&container.CPU.Time.Time,
+						nil,
+					)
+				}
+				if container.Memory != nil && container.Memory.WorkingSetBytes != nil {
+					s.updater.Update(
+						ContainerMemoryWorkingSetBytes,
+						map[string]string{
+							source.ContainerLabel: container.Name,
+							source.PodLabel:       podName,
+							source.NamespaceLabel: namespace,
+							source.NodeLabel:      nodeName,
+							source.InstanceLabel:  nodeName,
+						},
+						float64(*container.Memory.WorkingSetBytes),
+						&container.Memory.Time.Time,
+						nil,
+					)
+				}
+
+				if container.Rootfs != nil && container.Rootfs.UsedBytes != nil {
+					s.updater.Update(
+						ContainerFSUsageBytes,
+						map[string]string{
+							source.InstanceLabel: nodeName,
+							source.DeviceLabel:   "local",
+						},
+						float64(*container.Rootfs.UsedBytes),
+						&container.Rootfs.Time.Time,
+						nil,
+					)
+				}
+			}
+		}
+	}
+}

+ 360 - 0
modules/collector-source/pkg/scrape/statsummary_test.go

@@ -0,0 +1,360 @@
+package scrape
+
+import (
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/source"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/util"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	stats "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
+)
+
+type mockStatSummaryClient struct {
+	results []*stats.Summary
+}
+
+func (m *mockStatSummaryClient) GetNodeData() ([]*stats.Summary, error) {
+	return m.results, nil
+}
+
+func TestStatScraper_Scrape(t *testing.T) {
+	start1, _ := time.Parse(time.RFC3339, Start1Str)
+	tests := map[string]struct {
+		summaries []*stats.Summary
+		expected  []metric.UpdateArgs
+	}{
+		"nil values": {
+			summaries: []*stats.Summary{
+				{
+					Node: stats.NodeStats{
+						NodeName: "node1",
+						CPU: &stats.CPUStats{
+							Time:                 metav1.Time{Time: start1},
+							UsageCoreNanoSeconds: nil,
+						},
+						Fs: &stats.FsStats{
+							Time:          metav1.Time{Time: start1},
+							CapacityBytes: nil,
+						},
+					},
+					Pods: []stats.PodStats{
+						{
+							PodRef: stats.PodReference{
+								Name:      "pod1",
+								Namespace: "namespace1",
+								UID:       "uid1",
+							},
+							Network: &stats.NetworkStats{
+								Time: metav1.Time{Time: start1},
+								InterfaceStats: stats.InterfaceStats{
+									RxBytes: nil,
+									TxBytes: nil,
+								},
+							},
+							VolumeStats: []stats.VolumeStats{
+								{
+									Name: "vol1",
+									PVCRef: &stats.PVCReference{
+										Namespace: "namespace1",
+										Name:      "pvc1",
+									},
+									FsStats: stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: nil,
+									},
+								},
+							},
+							Containers: []stats.ContainerStats{
+								{
+									Name: "container1",
+									CPU: &stats.CPUStats{
+										Time:                 metav1.Time{Time: start1},
+										UsageCoreNanoSeconds: nil,
+									},
+									Memory: &stats.MemoryStats{
+										Time:            metav1.Time{Time: start1},
+										WorkingSetBytes: nil,
+									},
+									Rootfs: &stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: nil,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: []metric.UpdateArgs{},
+		},
+		"nil structs": {
+			summaries: []*stats.Summary{
+				{
+					Node: stats.NodeStats{
+						NodeName: "node1",
+						CPU:      nil,
+						Fs:       nil,
+					},
+					Pods: []stats.PodStats{
+						{
+							PodRef: stats.PodReference{
+								Name:      "pod1",
+								Namespace: "namespace1",
+								UID:       "uid1",
+							},
+							Network:     nil,
+							VolumeStats: nil,
+							Containers: []stats.ContainerStats{
+								{
+									Name:   "container1",
+									CPU:    nil,
+									Memory: nil,
+									Rootfs: nil,
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: []metric.UpdateArgs{},
+		},
+		"single node": {
+			summaries: []*stats.Summary{
+				{
+					Node: stats.NodeStats{
+						NodeName: "node1",
+						CPU: &stats.CPUStats{
+							Time:                 metav1.Time{Time: start1},
+							UsageCoreNanoSeconds: util.Ptr(uint64(2000000000)),
+						},
+						Fs: &stats.FsStats{
+							Time:          metav1.Time{Time: start1},
+							CapacityBytes: util.Ptr(uint64(2 * util.GB)),
+						},
+					},
+					Pods: []stats.PodStats{
+						{
+							PodRef: stats.PodReference{
+								Name:      "pod1",
+								Namespace: "namespace1",
+								UID:       "uid1",
+							},
+							Network: &stats.NetworkStats{
+								Time: metav1.Time{Time: start1},
+								InterfaceStats: stats.InterfaceStats{
+									RxBytes: util.Ptr(uint64(1 * util.MB)),
+									TxBytes: util.Ptr(uint64(2 * util.MB)),
+								},
+							},
+							VolumeStats: []stats.VolumeStats{
+								{
+									Name: "ignoreVol1",
+									FsStats: stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: util.Ptr(uint64(1 * util.GB)),
+									},
+								},
+								{
+									Name: "vol1",
+									PVCRef: &stats.PVCReference{
+										Namespace: "namespace1",
+										Name:      "pvc1",
+									},
+									FsStats: stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: util.Ptr(uint64(1 * util.GB)),
+									},
+								},
+							},
+							Containers: []stats.ContainerStats{
+								{
+									Name: "container1",
+									CPU: &stats.CPUStats{
+										Time:                 metav1.Time{Time: start1},
+										UsageCoreNanoSeconds: util.Ptr(uint64(1000000000)),
+									},
+									Memory: &stats.MemoryStats{
+										Time:            metav1.Time{Time: start1},
+										WorkingSetBytes: util.Ptr(uint64(5 * util.MB)),
+									},
+									Rootfs: &stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: util.Ptr(uint64(1 * util.GB)),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: NodeCPUSecondsTotal,
+					Labels: map[string]string{
+						source.KubernetesNodeLabel: "node1",
+						source.ModeLabel:           "",
+					},
+					Value:     2,
+					Timestamp: &start1,
+				},
+				{
+					MetricName: NodeFSCapacityBytes,
+					Labels: map[string]string{
+						source.InstanceLabel: "node1",
+						source.DeviceLabel:   "local",
+					},
+					Value:     float64(2 * util.GB),
+					Timestamp: &start1,
+				},
+				{
+					MetricName: ContainerNetworkReceiveBytesTotal,
+					Labels: map[string]string{
+						source.UIDLabel:       "uid1",
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     float64(1 * util.MB),
+					Timestamp: &start1,
+				},
+				{
+					MetricName: ContainerNetworkTransmitBytesTotal,
+					Labels: map[string]string{
+						source.UIDLabel:       "uid1",
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     float64(2 * util.MB),
+					Timestamp: &start1,
+				},
+				{
+					MetricName: KubeletVolumeStatsUsedBytes,
+					Labels: map[string]string{
+						source.PVCLabel:       "pvc1",
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     float64(1 * util.GB),
+					Timestamp: &start1,
+				},
+				{
+					MetricName: ContainerCPUUsageSecondsTotal,
+					Labels: map[string]string{
+						source.ContainerLabel: "container1",
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+					},
+					Value:     1,
+					Timestamp: &start1,
+				},
+				{
+					MetricName: ContainerMemoryWorkingSetBytes,
+					Labels: map[string]string{
+						source.ContainerLabel: "container1",
+						source.PodLabel:       "pod1",
+						source.NamespaceLabel: "namespace1",
+						source.NodeLabel:      "node1",
+						source.InstanceLabel:  "node1",
+					},
+					Value:     float64(5 * util.MB),
+					Timestamp: &start1,
+				},
+				{
+					MetricName: ContainerFSUsageBytes,
+					Labels: map[string]string{
+						source.InstanceLabel: "node1",
+						source.DeviceLabel:   "local",
+					},
+					Value:     float64(1 * util.GB),
+					Timestamp: &start1,
+				},
+			},
+		},
+		"repeat pvc": {
+			summaries: []*stats.Summary{
+				{
+					Node: stats.NodeStats{
+						NodeName: "node1",
+					},
+					Pods: []stats.PodStats{
+						{
+							PodRef: stats.PodReference{
+								Name:      "pod1",
+								Namespace: "namespace1",
+								UID:       "uid1",
+							},
+							VolumeStats: []stats.VolumeStats{
+								{
+									Name: "vol1",
+									PVCRef: &stats.PVCReference{
+										Namespace: "namespace1",
+										Name:      "pvc1",
+									},
+									FsStats: stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: util.Ptr(uint64(1 * util.GB)),
+									},
+								},
+							},
+						},
+						{
+							PodRef: stats.PodReference{
+								Name:      "pod2",
+								Namespace: "namespace1",
+								UID:       "uid1",
+							},
+							VolumeStats: []stats.VolumeStats{
+								{
+									Name: "vol1",
+									PVCRef: &stats.PVCReference{
+										Namespace: "namespace1",
+										Name:      "pvc1",
+									},
+									FsStats: stats.FsStats{
+										Time:      metav1.Time{Time: start1},
+										UsedBytes: util.Ptr(uint64(1 * util.GB)),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubeletVolumeStatsUsedBytes,
+					Labels: map[string]string{
+						source.PVCLabel:       "pvc1",
+						source.NamespaceLabel: "namespace1",
+					},
+					Value:     float64(1 * util.GB),
+					Timestamp: &start1,
+				},
+			},
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			s := &StatSummaryScraper{
+				client:  &mockStatSummaryClient{results: tt.summaries},
+				updater: &updateRecorder,
+			}
+			s.Scrape()
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}

+ 0 - 0
modules/collector-source/pkg/metrics/target/filetarget.go → modules/collector-source/pkg/scrape/target/filetarget.go


+ 0 - 0
modules/collector-source/pkg/metrics/target/stringtarget.go → modules/collector-source/pkg/scrape/target/stringtarget.go


+ 25 - 0
modules/collector-source/pkg/scrape/target/target.go

@@ -0,0 +1,25 @@
+package target
+
+import "io"
+
+// ScrapeTarget is an interface representing an object that is capable of loading/refreshing it's
+// target data.
+type ScrapeTarget interface {
+	Load() (io.Reader, error)
+}
+
+type TargetProvider interface {
+	GetTargets() []ScrapeTarget
+}
+
+type DefaultTargetProvider struct {
+	targets []ScrapeTarget
+}
+
+func NewDefaultTargetProvider(targets ...ScrapeTarget) *DefaultTargetProvider {
+	return &DefaultTargetProvider{targets: targets}
+}
+
+func (m *DefaultTargetProvider) GetTargets() []ScrapeTarget {
+	return m.targets
+}

+ 0 - 0
modules/collector-source/pkg/metrics/target/urltarget.go → modules/collector-source/pkg/scrape/target/urltarget.go


+ 52 - 0
modules/collector-source/pkg/scrape/targetscraper.go

@@ -0,0 +1,52 @@
+package scrape
+
+import (
+	"github.com/opencost/opencost/core/pkg/log"
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape/parser"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape/target"
+)
+
+type TargetScraper struct {
+	targetProvider target.TargetProvider
+	metricUpdater  metric.MetricUpdater
+	metricNames    map[string]struct{} // filter for which metrics will be processed
+	includeMetrics bool                // toggle to make metrics an include or exclude list
+}
+
+func newTargetScrapper(provider target.TargetProvider, updater metric.MetricUpdater, metricNames []string, includeMetrics bool) *TargetScraper {
+	metricSet := make(map[string]struct{})
+	for _, metricName := range metricNames {
+		metricSet[metricName] = struct{}{}
+	}
+	return &TargetScraper{
+		targetProvider: provider,
+		metricUpdater:  updater,
+		metricNames:    metricSet,
+		includeMetrics: includeMetrics,
+	}
+}
+
+func (s *TargetScraper) Scrape() {
+	targets := s.targetProvider.GetTargets()
+	for _, target := range targets {
+		f, err := target.Load()
+		if err != nil {
+			log.Errorf("failed to scrape target: %s", err.Error())
+			continue
+		}
+		results, err := parser.Parse(f)
+		if err != nil {
+			log.Errorf("failed to parse target: %s", err.Error())
+			continue
+		}
+
+		for _, result := range results {
+			// filter metrics to be processed by name
+			if _, ok := s.metricNames[result.Name]; ok != s.includeMetrics {
+				continue
+			}
+			s.metricUpdater.Update(result.Name, result.Labels, result.Value, result.Timestamp, nil)
+		}
+	}
+}

+ 489 - 0
modules/collector-source/pkg/scrape/targetscraper_test.go

@@ -0,0 +1,489 @@
+package scrape
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/modules/collector-source/pkg/metric"
+	"github.com/opencost/opencost/modules/collector-source/pkg/scrape/target"
+)
+
+const networkScape = `
+# HELP kubecost_pod_network_egress_bytes kubecost_pod_network_egress_bytes_total egressed byte counts by pod.
+# TYPE kubecost_pod_network_egress_bytes counter
+kubecost_pod_network_egress_bytes_total{pod_name="pod1",namespace="namespace1",internet="false",same_region="true",same_zone="true",service="service1"} 3127969647
+kubecost_pod_network_egress_bytes_total{pod_name="pod2",namespace="namespace1",internet="true",same_region="false",same_zone="false",service=""} 335188219
+# HELP kubecost_pod_network_ingress_bytes kubecost_pod_network_ingress_bytes_total ingressed byte counts by pod.
+# TYPE kubecost_pod_network_ingress_bytes counter
+kubecost_pod_network_ingress_bytes_total{pod_name="pod1",namespace="namespace1",internet="true",same_region="false",same_zone="false",service="service1"} 17941460
+kubecost_pod_network_ingress_bytes_total{pod_name="pod2",namespace="namespace1",internet="false",same_region="true",same_zone="false",service=""} 13948766
+# HELP kubecost_network_costs_parsed_entries kubecost_network_costs_parsed_entries total parsed conntrack entries.
+# TYPE kubecost_network_costs_parsed_entries gauge
+# HELP kubecost_network_costs_parse_time kubecost_network_costs_parse_time total time in milliseconds it took to parse conntrack entries.
+# TYPE kubecost_network_costs_parse_time gauge
+# EOF
+`
+
+const opencostScrape = `
+# HELP kubecost_cluster_management_cost kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.
+# TYPE kubecost_cluster_management_cost gauge
+kubecost_cluster_management_cost{provisioner_name="GKE"} 0.1
+# HELP kubecost_network_zone_egress_cost kubecost_network_zone_egress_cost Total cost per GB egress across zones
+# TYPE kubecost_network_zone_egress_cost gauge
+kubecost_network_zone_egress_cost 0.01
+# HELP kubecost_network_region_egress_cost kubecost_network_region_egress_cost Total cost per GB egress across regions
+# TYPE kubecost_network_region_egress_cost gauge
+kubecost_network_region_egress_cost 0.01
+# HELP kubecost_network_internet_egress_cost kubecost_network_internet_egress_cost Total cost per GB of internet egress.
+# TYPE kubecost_network_internet_egress_cost gauge
+kubecost_network_internet_egress_cost 0.12
+# HELP pv_hourly_cost pv_hourly_cost Cost per GB per hour on a persistent disk
+# TYPE pv_hourly_cost gauge
+pv_hourly_cost{persistentvolume="pvc-1",provider_id="pvc-1",volumename="pvc-1"} 5.479452054794521e-05
+pv_hourly_cost{persistentvolume="pvc-2",provider_id="pvc-2",volumename="pvc-2"} 5.479452054794521e-05
+# HELP kubecost_load_balancer_cost kubecost_load_balancer_cost Hourly cost of load balancer
+# TYPE kubecost_load_balancer_cost gauge
+kubecost_load_balancer_cost{ingress_ip="127.0.0.1",namespace="namespace1",service_name="service1"} 0.025
+# HELP container_cpu_allocation container_cpu_allocation Percent of a single CPU used in a minute
+# TYPE container_cpu_allocation gauge
+# HELP node_total_hourly_cost node_total_hourly_cost Total node cost per hour
+# TYPE node_total_hourly_cost gauge
+node_total_hourly_cost{arch="amd64",instance="node1",instance_type="e2-standard-2",node="node1",provider_id="node1",region="region1"} 0.06631302438846588
+node_total_hourly_cost{arch="amd64",instance="node2",instance_type="e2-standard-2",node="node2",provider_id="node2",region="region1"} 0.06631302438846588
+# HELP node_cpu_hourly_cost node_cpu_hourly_cost hourly cost for each cpu on this node
+# TYPE node_cpu_hourly_cost gauge
+node_cpu_hourly_cost{arch="amd64",instance="node1",instance_type="e2-standard-2",node="node1",provider_id="node1",region="region1"} 0.021811590000000002
+node_cpu_hourly_cost{arch="amd64",instance="node2",instance_type="e2-standard-2",node="node2",provider_id="node2",region="region1"} 0.021811590000000002
+# HELP node_ram_hourly_cost node_ram_hourly_cost hourly cost for each gb of ram on this node
+# TYPE node_ram_hourly_cost gauge
+node_ram_hourly_cost{arch="amd64",instance="node1",instance_type="e2-standard-2",node="node1",provider_id="node1",region="region1"} 0.00292353
+node_ram_hourly_cost{arch="amd64",instance="node2",instance_type="e2-standard-2",node="node2",provider_id="node2",region="region1"} 0.00292353
+# HELP node_gpu_hourly_cost node_gpu_hourly_cost hourly cost for each gpu on this node
+# TYPE node_gpu_hourly_cost gauge
+node_gpu_hourly_cost{arch="amd64",instance="node1",instance_type="e2-standard-2",node="node1",provider_id="node1",region="region1"} 0
+node_gpu_hourly_cost{arch="amd64",instance="node2",instance_type="e2-standard-2",node="node2",provider_id="node2",region="region1"} 0
+# HELP node_gpu_count node_gpu_count count of gpu on this node
+# TYPE node_gpu_count gauge
+node_gpu_count{arch="amd64",instance="node1",instance_type="e2-standard-2",node="node1",provider_id="node1",region="region1"} 0
+node_gpu_count{arch="amd64",instance="node2",instance_type="e2-standard-2",node="node2",provider_id="node2",region="region1"} 0
+# HELP kubecost_node_is_spot kubecost_node_is_spot Cloud provider info about node preemptibility
+# TYPE kubecost_node_is_spot gauge
+kubecost_node_is_spot{arch="amd64",instance="node1",instance_type="e2-standard-2",node="node1",provider_id="node1",region="region1"} 0
+kubecost_node_is_spot{arch="amd64",instance="node2",instance_type="e2-standard-2",node="node2",provider_id="node2",region="region1"} 0
+# HELP ignore_fake_metric fake metric that the scrapper should ignore
+# TYPE ignore_fake_metric gauge
+ignore_fake_metric{container="container1",instance="node1",namespace="namespace1",node="node1",pod="pod1"} 0.02
+# HELP container_cpu_allocation container_cpu_allocation Percent of a single CPU used in a minute
+# TYPE container_cpu_allocation gauge
+container_cpu_allocation{container="container1",instance="node1",namespace="namespace1",node="node1",pod="pod1"} 0.02
+container_cpu_allocation{container="container2",instance="node2",namespace="namespace1",node="node2",pod="pod2"} 0.01
+# HELP container_memory_allocation_bytes container_memory_allocation_bytes Bytes of RAM used
+# TYPE container_memory_allocation_bytes gauge
+container_memory_allocation_bytes{container="container1",instance="node1",namespace="namespace1",node="node1",pod="pod1"} 1.1528192e+07
+container_memory_allocation_bytes{container="container2",instance="node2",namespace="namespace1",node="node2",pod="pod2"} 1e+07
+# HELP container_gpu_allocation container_gpu_allocation GPU used
+# TYPE container_gpu_allocation gauge
+container_gpu_allocation{container="container1",instance="node1",namespace="namespace1",node="node1",pod="pod1"} 0
+container_gpu_allocation{container="container2",instance="node2",namespace="namespace1",node="node2",pod="pod2"} 0
+# HELP pod_pvc_allocation pod_pvc_allocation Bytes used by a PVC attached to a pod
+# TYPE pod_pvc_allocation gauge
+pod_pvc_allocation{namespace="namespace1",persistentvolume="pvc-1",persistentvolumeclaim="pvc1",pod="pod1"} 3.4359738368e+10
+pod_pvc_allocation{namespace="namespace1",persistentvolume="pvc-2",persistentvolumeclaim="pvc2",pod="pod2"} 3.4359738368e+10
+`
+
+const dcgmScrape = `
+# HELP DCGM_FI_PROF_GR_ENGINE_ACTIVE Ratio of time the graphics engine is active.
+# TYPE DCGM_FI_PROF_GR_ENGINE_ACTIVE gauge
+DCGM_FI_PROF_GR_ENGINE_ACTIVE{gpu="0",UUID="GPU-1",pci_bus_id="00000000:00:0A.0",device="nvidia0",modelName="Tesla T4",Hostname="localhost"} 0.999999
+# HELP DCGM_FI_DEV_DEC_UTIL Decoder utilization (in %).
+# TYPE DCGM_FI_DEV_DEC_UTIL gauge
+DCGM_FI_DEV_DEC_UTIL{gpu="0",UUID="GPU-1",pci_bus_id="00000000:00:0A.0",device="nvidia0",modelName="Tesla T4",Hostname="localhost"} 0
+`
+
+func TestTargetScraper_Scrape(t *testing.T) {
+
+	tests := []struct {
+		name            string
+		scrapperFactory func(metric.MetricUpdater) *TargetScraper
+		expected        []metric.UpdateArgs
+	}{
+		{
+			name: "Network Scrape",
+			scrapperFactory: func(updater metric.MetricUpdater) *TargetScraper {
+				return newNetworkTargetScraper(
+					target.NewDefaultTargetProvider(target.NewStringTarget(networkScape)),
+					updater,
+				)
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubecostPodNetworkEgressBytesTotal,
+					Labels: map[string]string{
+						"pod_name":    "pod1",
+						"namespace":   "namespace1",
+						"internet":    "false",
+						"same_region": "true",
+						"same_zone":   "true",
+						"service":     "service1",
+					},
+					Value:     3127969647,
+					Timestamp: nil,
+				},
+				{
+					MetricName: KubecostPodNetworkEgressBytesTotal,
+					Labels: map[string]string{
+						"pod_name":    "pod2",
+						"namespace":   "namespace1",
+						"internet":    "true",
+						"same_region": "false",
+						"same_zone":   "false",
+						"service":     "",
+					},
+					Value:     335188219,
+					Timestamp: nil,
+				},
+			},
+		},
+		{
+			name: "Opencost Metric",
+			scrapperFactory: func(updater metric.MetricUpdater) *TargetScraper {
+				return newOpencostTargetScraper(target.NewDefaultTargetProvider(target.NewStringTarget(opencostScrape)),
+					updater,
+				)
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: KubecostClusterManagementCost,
+					Labels: map[string]string{
+						"provisioner_name": "GKE",
+					},
+					Value: 0.1,
+				},
+				{
+					MetricName: KubecostNetworkZoneEgressCost,
+					Labels:     map[string]string{},
+					Value:      0.01,
+				},
+				{
+					MetricName: KubecostNetworkRegionEgressCost,
+					Labels:     map[string]string{},
+					Value:      0.01,
+				},
+				{
+					MetricName: KubecostNetworkInternetEgressCost,
+					Labels:     map[string]string{},
+					Value:      0.12,
+				},
+				{
+					MetricName: PVHourlyCost,
+					Labels: map[string]string{
+						"persistentvolume": "pvc-1",
+						"provider_id":      "pvc-1",
+						"volumename":       "pvc-1",
+					},
+					Value: 5.479452054794521e-05,
+				},
+				{
+					MetricName: PVHourlyCost,
+					Labels: map[string]string{
+						"persistentvolume": "pvc-2",
+						"provider_id":      "pvc-2",
+						"volumename":       "pvc-2",
+					},
+					Value: 5.479452054794521e-05,
+				},
+				{
+					MetricName: KubecostLoadBalancerCost,
+					Labels: map[string]string{
+						"ingress_ip":   "127.0.0.1",
+						"namespace":    "namespace1",
+						"service_name": "service1",
+					},
+					Value: 0.025,
+				},
+				{
+					MetricName: NodeTotalHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node1",
+						"instance_type": "e2-standard-2",
+						"node":          "node1",
+						"provider_id":   "node1",
+						"region":        "region1",
+					},
+					Value: 0.06631302438846588,
+				},
+				{
+					MetricName: NodeTotalHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node2",
+						"instance_type": "e2-standard-2",
+						"node":          "node2",
+						"provider_id":   "node2",
+						"region":        "region1",
+					},
+					Value: 0.06631302438846588,
+				},
+				{
+					MetricName: NodeCPUHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node1",
+						"instance_type": "e2-standard-2",
+						"node":          "node1",
+						"provider_id":   "node1",
+						"region":        "region1",
+					},
+					Value: 0.021811590000000002,
+				},
+				{
+					MetricName: NodeCPUHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node2",
+						"instance_type": "e2-standard-2",
+						"node":          "node2",
+						"provider_id":   "node2",
+						"region":        "region1",
+					},
+					Value: 0.021811590000000002,
+				},
+				{
+					MetricName: NodeRAMHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node1",
+						"instance_type": "e2-standard-2",
+						"node":          "node1",
+						"provider_id":   "node1",
+						"region":        "region1",
+					},
+					Value: 0.00292353,
+				},
+				{
+					MetricName: NodeRAMHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node2",
+						"instance_type": "e2-standard-2",
+						"node":          "node2",
+						"provider_id":   "node2",
+						"region":        "region1",
+					},
+					Value: 0.00292353,
+				},
+				{
+					MetricName: NodeGPUHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node1",
+						"instance_type": "e2-standard-2",
+						"node":          "node1",
+						"provider_id":   "node1",
+						"region":        "region1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: NodeGPUHourlyCost,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node2",
+						"instance_type": "e2-standard-2",
+						"node":          "node2",
+						"provider_id":   "node2",
+						"region":        "region1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: NodeGPUCount,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node1",
+						"instance_type": "e2-standard-2",
+						"node":          "node1",
+						"provider_id":   "node1",
+						"region":        "region1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: NodeGPUCount,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node2",
+						"instance_type": "e2-standard-2",
+						"node":          "node2",
+						"provider_id":   "node2",
+						"region":        "region1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: KubecostNodeIsSpot,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node1",
+						"instance_type": "e2-standard-2",
+						"node":          "node1",
+						"provider_id":   "node1",
+						"region":        "region1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: KubecostNodeIsSpot,
+					Labels: map[string]string{
+						"arch":          "amd64",
+						"instance":      "node2",
+						"instance_type": "e2-standard-2",
+						"node":          "node2",
+						"provider_id":   "node2",
+						"region":        "region1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: ContainerCPUAllocation,
+					Labels: map[string]string{
+						"container": "container1",
+						"instance":  "node1",
+						"namespace": "namespace1",
+						"node":      "node1",
+						"pod":       "pod1",
+					},
+					Value: 0.02,
+				},
+				{
+					MetricName: ContainerCPUAllocation,
+					Labels: map[string]string{
+						"container": "container2",
+						"instance":  "node2",
+						"namespace": "namespace1",
+						"node":      "node2",
+						"pod":       "pod2",
+					},
+					Value: 0.01,
+				},
+				{
+					MetricName: ContainerMemoryAllocationBytes,
+					Labels: map[string]string{
+						"container": "container1",
+						"instance":  "node1",
+						"namespace": "namespace1",
+						"node":      "node1",
+						"pod":       "pod1",
+					},
+					Value: 1.1528192e+07,
+				},
+				{
+					MetricName: ContainerMemoryAllocationBytes,
+					Labels: map[string]string{
+						"container": "container2",
+						"instance":  "node2",
+						"namespace": "namespace1",
+						"node":      "node2",
+						"pod":       "pod2",
+					},
+					Value: 1e+07,
+				},
+				{
+					MetricName: ContainerGPUAllocation,
+					Labels: map[string]string{
+						"container": "container1",
+						"instance":  "node1",
+						"namespace": "namespace1",
+						"node":      "node1",
+						"pod":       "pod1",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: ContainerGPUAllocation,
+					Labels: map[string]string{
+						"container": "container2",
+						"instance":  "node2",
+						"namespace": "namespace1",
+						"node":      "node2",
+						"pod":       "pod2",
+					},
+					Value: 0,
+				},
+				{
+					MetricName: PodPVCAllocation,
+					Labels: map[string]string{
+						"namespace":             "namespace1",
+						"persistentvolume":      "pvc-1",
+						"persistentvolumeclaim": "pvc1",
+						"pod":                   "pod1",
+					},
+					Value: 3.4359738368e+10,
+				},
+				{
+					MetricName: PodPVCAllocation,
+					Labels: map[string]string{
+						"namespace":             "namespace1",
+						"persistentvolume":      "pvc-2",
+						"persistentvolumeclaim": "pvc2",
+						"pod":                   "pod2",
+					},
+					Value: 3.4359738368e+10,
+				},
+			},
+		},
+		{
+			name: "GPU Metric",
+			scrapperFactory: func(updater metric.MetricUpdater) *TargetScraper {
+				return newDCGMTargetScraper(target.NewDefaultTargetProvider(target.NewStringTarget(dcgmScrape)),
+					updater,
+				)
+			},
+			expected: []metric.UpdateArgs{
+				{
+					MetricName: DCGMFIPROFGRENGINEACTIVE,
+					Labels: map[string]string{
+						"gpu":        "0",
+						"UUID":       "GPU-1",
+						"pci_bus_id": "00000000:00:0A.0",
+						"device":     "nvidia0",
+						"modelName":  "Tesla T4",
+						"Hostname":   "localhost",
+					},
+					Value: 0.999999,
+				},
+				{
+					MetricName: DCGMFIDEVDECUTIL,
+					Labels: map[string]string{
+						"gpu":        "0",
+						"UUID":       "GPU-1",
+						"pci_bus_id": "00000000:00:0A.0",
+						"device":     "nvidia0",
+						"modelName":  "Tesla T4",
+						"Hostname":   "localhost",
+					},
+					Value: 0,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			updateRecorder := metric.ArgRecordUpdater{}
+			scrapper := tt.scrapperFactory(&updateRecorder)
+			scrapper.Scrape()
+
+			if len(updateRecorder.UpdateArgs) != len(tt.expected) {
+				t.Errorf("Expected result length of %d, got %d", len(tt.expected), len(updateRecorder.UpdateArgs))
+			}
+
+			for i, expected := range tt.expected {
+				updateArg := updateRecorder.UpdateArgs[i]
+				err := expected.Equals(updateArg)
+				if err != nil {
+					t.Errorf("Result did not match expected at index %d: %s", i, err.Error())
+				}
+			}
+		})
+	}
+}

+ 11 - 5
modules/collector-source/pkg/collector/helper.go → modules/collector-source/pkg/util/helper.go

@@ -1,11 +1,17 @@
-package collector
+package util
 
 import (
 	"hash/fnv"
 	"strings"
 )
 
-func hash(s []string) uint64 {
+var (
+	KB = 1024
+	MB = 1024 * KB
+	GB = 1024 * MB
+)
+
+func Hash(s []string) uint64 {
 	h := fnv.New64a()
 	for _, v := range s {
 		h.Write([]byte(v))
@@ -13,7 +19,7 @@ func hash(s []string) uint64 {
 	return h.Sum64()
 }
 
-func metricNameFor(metric string, labels []string, values []string) string {
+func MetricNameFor(metric string, labels []string, values []string) string {
 	var sb strings.Builder
 	sb.WriteString(metric)
 	sb.WriteRune('{')
@@ -31,7 +37,7 @@ func metricNameFor(metric string, labels []string, values []string) string {
 	return sb.String()
 }
 
-func toMap(labels []string, values []string) map[string]string {
+func ToMap(labels []string, values []string) map[string]string {
 	min := len(labels)
 	if len(values) < min {
 		min = len(values)
@@ -44,6 +50,6 @@ func toMap(labels []string, values []string) map[string]string {
 	return m
 }
 
-func ptr[T any](v T) *T {
+func Ptr[T any](v T) *T {
 	return &v
 }

+ 80 - 0
modules/collector-source/pkg/util/interval.go

@@ -0,0 +1,80 @@
+package util
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/util/timeutil"
+)
+
+var intervalRegex = regexp.MustCompile(`^(\d+)(m|h|d|w)$`)
+
+// Interval is a time period defined by a string with a integer followed by a letter (ex: 5d = 5 days)
+type Interval interface {
+	// Add adds the interval multiplied by the given int to the given time. (A 10m interval called with 3 would add 30
+	// minutes to the given time)
+	Add(time.Time, int) time.Time
+
+	// Truncate returns the start of the interval that the given time is a part off
+	Truncate(time time.Time) time.Time
+}
+
+func NewInterval(def string) (Interval, error) {
+	match := intervalRegex.FindStringSubmatch(def)
+	if match == nil {
+		return nil, fmt.Errorf("failed to parse interval '%s'", def)
+	}
+
+	num, err := strconv.ParseInt(match[1], 10, 64)
+	// This should not happen
+	if err != nil {
+		panic(fmt.Sprintf("NewInterval: regex failure on int '%s'", def))
+	}
+
+	switch match[2] {
+	case "m":
+		return &durationInterval{time.Duration(num) * time.Minute}, nil
+	case "h":
+		return &durationInterval{time.Duration(num) * time.Hour}, nil
+	case "d":
+		return &durationInterval{time.Duration(num) * timeutil.Day}, nil
+	case "w":
+		return &weekInterval{int(num)}, nil
+	default:
+		panic(fmt.Sprintf("NewInterval: regex failure on unit '%s'", def))
+	}
+}
+
+type durationInterval struct {
+	duration time.Duration
+}
+
+func (d *durationInterval) Add(t time.Time, i int) time.Time {
+	return t.Add(d.duration * time.Duration(i))
+}
+
+func (d *durationInterval) Truncate(time time.Time) time.Time {
+	return time.UTC().Truncate(d.duration)
+}
+
+// weekInterval is an interval that tracks multiples of weeks with the week starting on Sunday
+type weekInterval struct {
+	count int
+}
+
+func (w *weekInterval) Add(t time.Time, num int) time.Time {
+	return t.Add(timeutil.Week * time.Duration(num*w.count))
+}
+
+// Truncate to the nearest Sunday that is a multiple of the count starting from 0000-31-12
+func (w *weekInterval) Truncate(t time.Time) time.Time {
+	// add a day to Sundays to prevent times that would truncate to themselves from going to the previous step
+	if t.UTC().Weekday() == time.Sunday {
+		t = t.UTC().AddDate(0, 0, 1)
+	}
+
+	// truncate to monday using a weekly duration multiple (0001-01-01 was a monday) then subtract a day
+	return t.UTC().Truncate(timeutil.Week * time.Duration(w.count)).Add(-timeutil.Day)
+}

+ 320 - 0
modules/collector-source/pkg/util/interval_test.go

@@ -0,0 +1,320 @@
+package util
+
+import (
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/util/timeutil"
+)
+
+func TestNewInterval(t *testing.T) {
+	tests := map[string]struct {
+		def     string
+		want    Interval
+		wantErr bool
+	}{
+		"invalid": {
+			def:     "invalid",
+			want:    nil,
+			wantErr: true,
+		},
+		"invalid2": {
+			def:     "1M",
+			want:    nil,
+			wantErr: true,
+		},
+		"invalid3": {
+			def:     "d20",
+			want:    nil,
+			wantErr: true,
+		},
+		"one minute": {
+			def: "1m",
+			want: &durationInterval{
+				duration: time.Minute,
+			},
+			wantErr: false,
+		},
+		"ten minute": {
+			def: "10m",
+			want: &durationInterval{
+				duration: time.Minute * 10,
+			},
+			wantErr: false,
+		},
+		"one hour": {
+			def: "1h",
+			want: &durationInterval{
+				duration: time.Hour,
+			},
+		},
+		"six hours": {
+			def: "6h",
+			want: &durationInterval{
+				duration: time.Hour * 6,
+			},
+		},
+		"one day": {
+			def: "1d",
+			want: &durationInterval{
+				duration: timeutil.Day,
+			},
+		},
+		"seven days": {
+			def: "7d",
+			want: &durationInterval{
+				duration: timeutil.Day * 7,
+			},
+		},
+		"one week": {
+			def: "1w",
+			want: &weekInterval{
+				count: 1,
+			},
+		},
+		"two weeks": {
+			def: "2w",
+			want: &weekInterval{
+				count: 2,
+			},
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			got, err := NewInterval(tt.def)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("NewInterval() error = %v, wantErr %v", err, tt.wantErr)
+				return
+			}
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("NewInterval() got = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_durationInterval_Add(t *testing.T) {
+
+	type args struct {
+		t time.Time
+		i int
+	}
+	tests := map[string]struct {
+		duration time.Duration
+		args     args
+		want     time.Time
+	}{
+		"day interval add 1": {
+			duration: timeutil.Day,
+			args: args{
+				t: time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+				i: 1,
+			},
+			want: time.Date(2025, time.April, 3, 0, 0, 0, 0, time.UTC),
+		},
+		"day interval sub 1": {
+			duration: timeutil.Day,
+			args: args{
+				t: time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+				i: -1,
+			},
+			want: time.Date(2025, time.April, 1, 0, 0, 0, 0, time.UTC),
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			d := &durationInterval{
+				duration: tt.duration,
+			}
+			if got := d.Add(tt.args.t, tt.args.i); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("Add() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_durationInterval_Truncate(t *testing.T) {
+	tests := map[string]struct {
+		duration time.Duration
+		input    time.Time
+		want     time.Time
+	}{
+		"one day truncate": {
+			duration: timeutil.Day,
+			input:    time.Date(2025, time.April, 7, 3, 0, 0, 0, time.UTC),
+			want:     time.Date(2025, time.April, 7, 0, 0, 0, 0, time.UTC),
+		},
+		"two day truncate": {
+			duration: 2 * timeutil.Day,
+			input:    time.Date(2025, time.April, 7, 3, 0, 0, 0, time.UTC),
+			want:     time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+		},
+		"two day truncate 2": {
+			duration: 2 * timeutil.Day,
+			input:    time.Date(2025, time.April, 6, 3, 0, 0, 0, time.UTC),
+			want:     time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+		},
+		"seven day truncate": {
+			duration: 7 * timeutil.Day,
+			input:    time.Date(2025, time.April, 7, 3, 0, 0, 0, time.UTC),
+			want:     time.Date(2025, time.April, 7, 0, 0, 0, 0, time.UTC),
+		},
+		"seven day truncate 2": {
+			duration: 7 * timeutil.Day,
+			input:    time.Date(2025, time.March, 7, 3, 0, 0, 0, time.UTC),
+			want:     time.Date(2025, time.March, 3, 0, 0, 0, 0, time.UTC),
+		},
+		"seven day truncate 3": {
+			duration: 7 * timeutil.Day,
+			input:    time.Date(2025, time.March, 3, 0, 0, 0, 0, time.UTC),
+			want:     time.Date(2025, time.March, 3, 0, 0, 0, 0, time.UTC),
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			d := &durationInterval{
+				duration: tt.duration,
+			}
+			if got := d.Truncate(tt.input); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("Truncate() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_weekInterval_Add(t *testing.T) {
+
+	tests := map[string]struct {
+		count int
+		t     time.Time
+		num   int
+		want  time.Time
+	}{
+		"one week add one": {
+			count: 1,
+			t:     time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+			num:   1,
+			want:  time.Date(2025, time.April, 9, 0, 0, 0, 0, time.UTC),
+		},
+		"one week subtract one": {
+			count: 1,
+			t:     time.Date(2025, time.April, 9, 0, 0, 0, 0, time.UTC),
+			num:   -1,
+			want:  time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+		},
+		"two week add one": {
+			count: 1,
+			t:     time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+			num:   2,
+			want:  time.Date(2025, time.April, 16, 0, 0, 0, 0, time.UTC),
+		},
+		"one week add two": {
+			count: 2,
+			t:     time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+			num:   1,
+			want:  time.Date(2025, time.April, 16, 0, 0, 0, 0, time.UTC),
+		},
+		"two week add two": {
+			count: 2,
+			t:     time.Date(2025, time.April, 2, 0, 0, 0, 0, time.UTC),
+			num:   2,
+			want:  time.Date(2025, time.April, 30, 0, 0, 0, 0, time.UTC),
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			w := &weekInterval{
+				count: tt.count,
+			}
+			if got := w.Add(tt.t, tt.num); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("Add() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_weekInterval_Truncate(t *testing.T) {
+
+	tests := map[string]struct {
+		count int
+		input time.Time
+		want  time.Time
+	}{
+		"one week no change": {
+			count: 1,
+			input: time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+		},
+
+		"one week": {
+			count: 1,
+			input: time.Date(2025, time.April, 7, 3, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+		},
+		"one week 2": {
+			count: 1,
+			input: time.Date(2025, time.March, 7, 3, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.March, 2, 0, 0, 0, 0, time.UTC),
+		},
+		"one week 3": {
+			count: 1,
+			input: time.Date(2025, time.March, 3, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.March, 2, 0, 0, 0, 0, time.UTC),
+		},
+		"two week no change": {
+			count: 2,
+			input: time.Date(2025, time.March, 30, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.March, 30, 0, 0, 0, 0, time.UTC),
+		},
+		"two week": {
+			count: 2,
+			input: time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.March, 30, 0, 0, 0, 0, time.UTC),
+		},
+		"two week 2": {
+			count: 2,
+			input: time.Date(2025, time.April, 13, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.April, 13, 0, 0, 0, 0, time.UTC),
+		},
+		"three week": {
+			count: 3,
+			input: time.Date(2025, time.April, 7, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+		},
+		"three week 2": {
+			count: 3,
+			input: time.Date(2025, time.April, 14, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(2025, time.April, 6, 0, 0, 0, 0, time.UTC),
+		},
+		"one week first week": {
+			count: 1,
+			input: time.Date(1, time.January, 6, 0, 0, 0, 0, time.UTC),
+			want:  time.Time{}.Add(-1 * timeutil.Day),
+		},
+		"one week second week": {
+			count: 1,
+			input: time.Date(1, time.January, 9, 0, 0, 0, 0, time.UTC),
+			want:  time.Date(1, time.January, 7, 0, 0, 0, 0, time.UTC),
+		},
+		"two week second week": {
+			count: 2,
+			input: time.Date(1, time.January, 9, 0, 0, 0, 0, time.UTC),
+			want:  time.Time{}.Add(-1 * timeutil.Day),
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			w := &weekInterval{
+				count: tt.count,
+			}
+			got := w.Truncate(tt.input)
+			if got.Weekday() != time.Sunday {
+				t.Errorf("result was not a sunday: %s", got.Weekday().String())
+			}
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("Truncate() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}

+ 60 - 0
modules/collector-source/pkg/util/resolution.go

@@ -0,0 +1,60 @@
+package util
+
+import (
+	"fmt"
+	"time"
+)
+
+type ResolutionConfiguration struct {
+	Interval  string
+	Retention int
+}
+
+// Resolution is a utility for maintaining a set of windows that span the time period defined by the interval and
+// with a count of retention.
+type Resolution struct {
+	interval    Interval
+	intervalDef string
+	retention   int
+}
+
+func NewResolution(configuration ResolutionConfiguration) (*Resolution, error) {
+	interval, err := NewInterval(configuration.Interval)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create resolution: %w", err)
+	}
+	return &Resolution{
+		interval:  interval,
+		retention: configuration.Retention,
+	}, nil
+}
+
+// Retention is a getter which returns the retention of the Resolution
+func (r *Resolution) Retention() int {
+	return r.retention
+}
+
+// Interval is a getter which returns the interval definition string of the Resolution
+func (r *Resolution) Interval() string {
+	return r.intervalDef
+}
+
+// Current returns the time that the current interval began
+func (r *Resolution) Current() time.Time {
+	return r.interval.Truncate(time.Now())
+}
+
+// Next returns the time that the next interval will start at
+func (r *Resolution) Next() time.Time {
+	return r.interval.Add(r.interval.Truncate(time.Now()), 1)
+}
+
+// Limit returns the time that oldest interval in retention began
+func (r *Resolution) Limit() time.Time {
+	return r.interval.Add(r.interval.Truncate(time.Now()), -(r.retention - 1))
+}
+
+// Get returns the interval start time for the given time
+func (r *Resolution) Get(t time.Time) time.Time {
+	return r.interval.Truncate(t)
+}

+ 9 - 0
modules/collector-source/pkg/util/statsummaryclient.go

@@ -0,0 +1,9 @@
+package util
+
+import (
+	stats "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
+)
+
+type StatSummaryClient interface {
+	GetNodeData() ([]*stats.Summary, error)
+}

+ 6 - 0
pkg/env/costmodelenv.go

@@ -39,6 +39,7 @@ const (
 	CSVPathEnvVar                  = "CSV_PATH"
 	ConfigPathEnvVar               = "CONFIG_PATH"
 	CloudProviderAPIKeyEnvVar      = "CLOUD_PROVIDER_API_KEY"
+	PromlessEnvVar                 = "PROMLESS"
 	DisableAggregateCostModelCache = "DISABLE_AGGREGATE_COST_MODEL_CACHE"
 
 	EmitPodAnnotationsMetricEnvVar       = "EMIT_POD_ANNOTATIONS_METRIC"
@@ -364,6 +365,11 @@ func GetCloudProviderAPIKey() string {
 	return env.Get(CloudProviderAPIKeyEnvVar, "")
 }
 
+// GetPromless returns the environment variable which enables a source.OpencostDatasource which uses Prometheus
+func GetPromless() bool {
+	return env.GetBool(PromlessEnvVar, false)
+}
+
 // IsLogCollectionEnabled returns the environment variable value for LogCollectionEnabledEnvVar which represents
 // whether or not log collection has been enabled for kubecost deployments.
 func IsLogCollectionEnabled() bool {