6 лет назад · 6429a32e60
--- a/PROMETHEUS.md
+++ b/PROMETHEUS.md
@@ -58,5 +58,7 @@ sum(node_total_hourly_cost) * 730
 
				 | node_ram_hourly_cost   | Hourly cost per Gb of memory on this node                       |
			
 
				 | node_total_hourly_cost   | Total node cost per hour                       |
			
 
				 | container_cpu_allocation   | Average number of CPUs requested/used over last 1m                      |
			
 
				+| container_gpu_allocation   | Average number of GPUs requested over last 1m                      |
			
 
				 | container_memory_allocation_bytes   | Average bytes of RAM requested/used over last 1m                 |
			
 
				+| pod_pvc_allocation   | Bytes provisioned for a PVC attached to a pod                      |
			
 
				 | pv_hourly_cost   | Hourly cost per GP on a persistent volume                 |
			
--- a/pkg/cloud/awsprovider.go
+++ b/pkg/cloud/awsprovider.go
@@ -1594,8 +1594,7 @@ func (a *AWS) ExternalAllocations(start string, end string, aggregators []string
 
				 			return nil, err
			
 
				 		}
			
 
				 		if len(op.ResultSet.Rows) > 1 {
			
 
				-			for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows) - 1)] {
			
 
				-
			
 
				+			for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows))] {
			
 
				 				cost, err := strconv.ParseFloat(*r.Data[lastIdx].VarCharValue, 64)
			
 
				 				if err != nil {
			
 
				 					return nil, err
			
@@ -1631,8 +1630,7 @@ func (a *AWS) ExternalAllocations(start string, end string, aggregators []string
 
				 		}
			
 
				 		oocAllocs = append(oocAllocs, gcpOOC...)
			
 
				 	}
			
 
				-
			
 
				-	return oocAllocs, nil // TODO: transform the QuerySQL lines into the new OutOfClusterAllocation Struct
			
 
				+	return oocAllocs, nil
			
 
				 }
			
 
				 
			
 
				 // QuerySQL can query a properly configured Athena database.
			
--- a/pkg/clustercache/watchcontroller.go
+++ b/pkg/clustercache/watchcontroller.go
@@ -88,7 +88,19 @@ func NewCachingWatcher(restClient rest.Interface, resource string, resourceType
 
				 }
			
 
				 
			
 
				 func (c *CachingWatchController) GetAll() []interface{} {
			
 
				-	return c.indexer.List()
			
 
				+	list := c.indexer.List()
			
 
				+
			
 
				+	// since the indexer returns the as-is pointer to the resource,
			
 
				+	// we deep copy the resources such that callers don't corrupt the
			
 
				+	// index
			
 
				+	cloneList := make([]interface{}, 0, len(list))
			
 
				+	for _, v := range list {
			
 
				+		if deepCopyable, ok := v.(rt.Object); ok {
			
 
				+			cloneList = append(cloneList, deepCopyable.DeepCopyObject())
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return cloneList
			
 
				 }
			
 
				 
			
 
				 func (c *CachingWatchController) SetUpdateHandler(handler WatchHandler) WatchController {
			
--- a/pkg/costmodel/cluster.go
+++ b/pkg/costmodel/cluster.go
@@ -7,6 +7,7 @@ import (
 
				 	"time"
			
 
				 
			
 
				 	"github.com/kubecost/cost-model/pkg/cloud"
			
 
				+	"github.com/kubecost/cost-model/pkg/prom"
			
 
				 	"github.com/kubecost/cost-model/pkg/util"
			
 
				 	prometheus "github.com/prometheus/client_golang/api"
			
 
				 	"k8s.io/klog"
			
@@ -125,7 +126,7 @@ func NewClusterCostsFromCumulative(cpu, gpu, ram, storage float64, window, offse
 
				 }
			
 
				 
			
 
				 // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
			
 
				-func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string) (map[string]*ClusterCosts, error) {
			
 
				+func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
			
 
				 	// Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
			
 
				 	start, end, err := util.ParseTimeRange(window, offset)
			
 
				 	if err != nil {
			
@@ -133,35 +134,61 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
				 	}
			
 
				 	mins := end.Sub(*start).Minutes()
			
 
				 
			
 
				-	const fmtQueryDataCount = `count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:1m]%s)`
			
 
				-
			
 
				-	const fmtQueryTotalGPU = `sum(
			
 
				-		sum_over_time(node_gpu_hourly_cost[%s:1m]%s) / 60
			
 
				-	) by (cluster_id)`
			
 
				-
			
 
				-	const fmtQueryTotalCPU = `sum(
			
 
				-		sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:1m]%s) *
			
 
				-		avg(avg_over_time(node_cpu_hourly_cost[%s:1m]%s)) by (node, cluster_id) / 60
			
 
				-	) by (cluster_id)`
			
 
				-
			
 
				-	const fmtQueryTotalRAM = `sum(
			
 
				-		sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:1m]%s) / 1024 / 1024 / 1024 *
			
 
				-		avg(avg_over_time(node_ram_hourly_cost[%s:1m]%s)) by (node, cluster_id) / 60
			
 
				-	) by (cluster_id)`
			
 
				-
			
 
				-	const fmtQueryTotalStorage = `sum(
			
 
				-		sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:1m]%s) / 1024 / 1024 / 1024 *
			
 
				-		avg(avg_over_time(pv_hourly_cost[%s:1m]%s)) by (persistentvolume, cluster_id) / 60
			
 
				-	) by (cluster_id) %s`
			
 
				-
			
 
				-	const fmtQueryCPUModePct = `sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
			
 
				-	group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)`
			
 
				-
			
 
				-	const fmtQueryRAMSystemPct = `sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:1m]%s)) by (cluster_id)
			
 
				-	/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:1m]%s)) by (cluster_id)`
			
 
				-
			
 
				-	const fmtQueryRAMUserPct = `sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:1m]%s)) by (cluster_id)
			
 
				-	/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:1m]%s)) by (cluster_id)`
			
 
				+	// minsPerResolution determines accuracy and resource use for the following
			
 
				+	// queries. Smaller values (higher resolution) result in better accuracy,
			
 
				+	// but more expensive queries, and vice-a-versa.
			
 
				+	minsPerResolution := 5
			
 
				+
			
 
				+	// hourlyToCumulative is a scaling factor that, when multiplied by an hourly
			
 
				+	// value, converts it to a cumulative value; i.e.
			
 
				+	// [$/hr] * [min/res]*[hr/min] = [$/res]
			
 
				+	hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
			
 
				+
			
 
				+	const fmtQueryDataCount = `
			
 
				+		count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:%dm]%s) * %d
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryTotalGPU = `
			
 
				+		sum(
			
 
				+			sum_over_time(node_gpu_hourly_cost[%s:%dm]%s) * %f
			
 
				+		) by (cluster_id)
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryTotalCPU = `
			
 
				+		sum(
			
 
				+			sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:%dm]%s) *
			
 
				+			avg(avg_over_time(node_cpu_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
			
 
				+		) by (cluster_id)
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryTotalRAM = `
			
 
				+		sum(
			
 
				+			sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
			
 
				+			avg(avg_over_time(node_ram_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
			
 
				+		) by (cluster_id)
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryTotalStorage = `
			
 
				+		sum(
			
 
				+			sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
			
 
				+			avg(avg_over_time(pv_hourly_cost[%s:%dm]%s)) by (persistentvolume, cluster_id) * %f
			
 
				+		) by (cluster_id)
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryCPUModePct = `
			
 
				+		sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
			
 
				+		group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryRAMSystemPct = `
			
 
				+		sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (cluster_id)
			
 
				+		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
			
 
				+	`
			
 
				+
			
 
				+	const fmtQueryRAMUserPct = `
			
 
				+		sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:%dm]%s)) by (cluster_id)
			
 
				+		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
			
 
				+	`
			
 
				 
			
 
				 	// TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
			
 
				 	// const fmtQueryPVStorageUsePct = `(sum(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass,namespace) + on (persistentvolumeclaim,namespace)
			
@@ -179,94 +206,42 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
				 		fmtOffset = fmt.Sprintf("offset %s", offset)
			
 
				 	}
			
 
				 
			
 
				-	queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, fmtOffset)
			
 
				-	queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, fmtOffset)
			
 
				-	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, fmtOffset, window, fmtOffset)
			
 
				-	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, fmtOffset, window, fmtOffset)
			
 
				-	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, fmtOffset, window, fmtOffset, queryTotalLocalStorage)
			
 
				-	queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
			
 
				-	queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, fmtOffset, window, fmtOffset)
			
 
				-	queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, fmtOffset, window, fmtOffset)
			
 
				-
			
 
				-	numQueries := 9
			
 
				-
			
 
				-	klog.V(4).Infof("[Debug] queryDataCount: %s", queryDataCount)
			
 
				-	klog.V(4).Infof("[Debug] queryTotalGPU: %s", queryTotalGPU)
			
 
				-	klog.V(4).Infof("[Debug] queryTotalCPU: %s", queryTotalCPU)
			
 
				-	klog.V(4).Infof("[Debug] queryTotalRAM: %s", queryTotalRAM)
			
 
				-	klog.V(4).Infof("[Debug] queryTotalStorage: %s", queryTotalStorage)
			
 
				-	klog.V(4).Infof("[Debug] queryCPUModePct: %s", queryCPUModePct)
			
 
				-	klog.V(4).Infof("[Debug] queryRAMSystemPct: %s", queryRAMSystemPct)
			
 
				-	klog.V(4).Infof("[Debug] queryRAMUserPct: %s", queryRAMUserPct)
			
 
				-	klog.V(4).Infof("[Debug] queryUsedLocalStorage: %s", queryUsedLocalStorage)
			
 
				-
			
 
				-	// Submit queries to Prometheus asynchronously
			
 
				-	var ec util.ErrorCollector
			
 
				-	var wg sync.WaitGroup
			
 
				-	ctx := PromQueryContext{client, &ec, &wg}
			
 
				-	ctx.WaitGroup.Add(numQueries)
			
 
				-
			
 
				-	chDataCount := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryDataCount, chDataCount, ctx)
			
 
				-
			
 
				-	chTotalGPU := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryTotalGPU, chTotalGPU, ctx)
			
 
				-
			
 
				-	chTotalCPU := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryTotalCPU, chTotalCPU, ctx)
			
 
				-
			
 
				-	chTotalRAM := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryTotalRAM, chTotalRAM, ctx)
			
 
				-
			
 
				-	chTotalStorage := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryTotalStorage, chTotalStorage, ctx)
			
 
				-
			
 
				-	chCPUModePct := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryCPUModePct, chCPUModePct, ctx)
			
 
				-
			
 
				-	chRAMSystemPct := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryRAMSystemPct, chRAMSystemPct, ctx)
			
 
				-
			
 
				-	chRAMUserPct := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryRAMUserPct, chRAMUserPct, ctx)
			
 
				+	queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, minsPerResolution, fmtOffset, minsPerResolution)
			
 
				+	queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, minsPerResolution, fmtOffset, hourlyToCumulative)
			
 
				+	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
			
 
				+	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
			
 
				+	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
			
 
				 
			
 
				-	chUsedLocalStorage := make(chan []*PromQueryResult, 1)
			
 
				-	go AsyncPromQuery(queryUsedLocalStorage, chUsedLocalStorage, ctx)
			
 
				+	ctx := prom.NewContext(client)
			
 
				 
			
 
				-	// After queries complete, retrieve results
			
 
				-	wg.Wait()
			
 
				+	resChs := ctx.QueryAll(
			
 
				+		queryDataCount,
			
 
				+		queryTotalGPU,
			
 
				+		queryTotalCPU,
			
 
				+		queryTotalRAM,
			
 
				+		queryTotalStorage,
			
 
				+		queryTotalLocalStorage,
			
 
				+	)
			
 
				 
			
 
				-	resultsDataCount := <-chDataCount
			
 
				-	close(chDataCount)
			
 
				+	if withBreakdown {
			
 
				+		queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
			
 
				+		queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
			
 
				+		queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
			
 
				 
			
 
				-	resultsTotalGPU := <-chTotalGPU
			
 
				-	close(chTotalGPU)
			
 
				+		bdResChs := ctx.QueryAll(
			
 
				+			queryCPUModePct,
			
 
				+			queryRAMSystemPct,
			
 
				+			queryRAMUserPct,
			
 
				+			queryUsedLocalStorage,
			
 
				+		)
			
 
				 
			
 
				-	resultsTotalCPU := <-chTotalCPU
			
 
				-	close(chTotalCPU)
			
 
				-
			
 
				-	resultsTotalRAM := <-chTotalRAM
			
 
				-	close(chTotalRAM)
			
 
				-
			
 
				-	resultsTotalStorage := <-chTotalStorage
			
 
				-	close(chTotalStorage)
			
 
				-
			
 
				-	resultsCPUModePct := <-chCPUModePct
			
 
				-	close(chCPUModePct)
			
 
				-
			
 
				-	resultsRAMSystemPct := <-chRAMSystemPct
			
 
				-	close(chRAMSystemPct)
			
 
				-
			
 
				-	resultsRAMUserPct := <-chRAMUserPct
			
 
				-	close(chRAMUserPct)
			
 
				-
			
 
				-	resultsUsedLocalStorage := <-chUsedLocalStorage
			
 
				-	close(chUsedLocalStorage)
			
 
				+		resChs = append(resChs, bdResChs...)
			
 
				+	}
			
 
				 
			
 
				 	defaultClusterID := os.Getenv(clusterIDKey)
			
 
				 
			
 
				 	dataMinsByCluster := map[string]float64{}
			
 
				-	for _, result := range resultsDataCount {
			
 
				+	for _, result := range resChs[0].Await() {
			
 
				 		clusterID, _ := result.GetString("cluster_id")
			
 
				 		if clusterID == "" {
			
 
				 			clusterID = defaultClusterID
			
@@ -299,7 +274,7 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
				 
			
 
				 	// Helper function to iterate over Prom query results, parsing the raw values into
			
 
				 	// the intermediate costData structure.
			
 
				-	setCostsFromResults := func(costData map[string]map[string]float64, results []*PromQueryResult, name string, discount float64, customDiscount float64) {
			
 
				+	setCostsFromResults := func(costData map[string]map[string]float64, results []*prom.QueryResult, name string, discount float64, customDiscount float64) {
			
 
				 		for _, result := range results {
			
 
				 			clusterID, _ := result.GetString("cluster_id")
			
 
				 			if clusterID == "" {
			
@@ -315,79 +290,82 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
				 		}
			
 
				 	}
			
 
				 	// Apply both sustained use and custom discounts to RAM and CPU
			
 
				-	setCostsFromResults(costData, resultsTotalCPU, "cpu", discount, customDiscount)
			
 
				-	setCostsFromResults(costData, resultsTotalRAM, "ram", discount, customDiscount)
			
 
				+	setCostsFromResults(costData, resChs[2].Await(), "cpu", discount, customDiscount)
			
 
				+	setCostsFromResults(costData, resChs[3].Await(), "ram", discount, customDiscount)
			
 
				 	// Apply only custom discount to GPU and storage
			
 
				-	setCostsFromResults(costData, resultsTotalGPU, "gpu", 0.0, customDiscount)
			
 
				-	setCostsFromResults(costData, resultsTotalStorage, "storage", 0.0, customDiscount)
			
 
				+	setCostsFromResults(costData, resChs[1].Await(), "gpu", 0.0, customDiscount)
			
 
				+	setCostsFromResults(costData, resChs[4].Await(), "storage", 0.0, customDiscount)
			
 
				+	setCostsFromResults(costData, resChs[5].Await(), "localstorage", 0.0, customDiscount)
			
 
				 
			
 
				 	cpuBreakdownMap := map[string]*ClusterCostsBreakdown{}
			
 
				-	for _, result := range resultsCPUModePct {
			
 
				-		clusterID, _ := result.GetString("cluster_id")
			
 
				-		if clusterID == "" {
			
 
				-			clusterID = defaultClusterID
			
 
				-		}
			
 
				-		if _, ok := cpuBreakdownMap[clusterID]; !ok {
			
 
				-			cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
			
 
				-		}
			
 
				-		cpuBD := cpuBreakdownMap[clusterID]
			
 
				+	ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
			
 
				+	pvUsedCostMap := map[string]float64{}
			
 
				+	if withBreakdown {
			
 
				+		for _, result := range resChs[6].Await() {
			
 
				+			clusterID, _ := result.GetString("cluster_id")
			
 
				+			if clusterID == "" {
			
 
				+				clusterID = defaultClusterID
			
 
				+			}
			
 
				+			if _, ok := cpuBreakdownMap[clusterID]; !ok {
			
 
				+				cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
			
 
				+			}
			
 
				+			cpuBD := cpuBreakdownMap[clusterID]
			
 
				 
			
 
				-		mode, err := result.GetString("mode")
			
 
				-		if err != nil {
			
 
				-			klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
			
 
				-			mode = "other"
			
 
				-		}
			
 
				+			mode, err := result.GetString("mode")
			
 
				+			if err != nil {
			
 
				+				klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
			
 
				+				mode = "other"
			
 
				+			}
			
 
				 
			
 
				-		switch mode {
			
 
				-		case "idle":
			
 
				-			cpuBD.Idle += result.Values[0].Value
			
 
				-		case "system":
			
 
				-			cpuBD.System += result.Values[0].Value
			
 
				-		case "user":
			
 
				-			cpuBD.User += result.Values[0].Value
			
 
				-		default:
			
 
				-			cpuBD.Other += result.Values[0].Value
			
 
				+			switch mode {
			
 
				+			case "idle":
			
 
				+				cpuBD.Idle += result.Values[0].Value
			
 
				+			case "system":
			
 
				+				cpuBD.System += result.Values[0].Value
			
 
				+			case "user":
			
 
				+				cpuBD.User += result.Values[0].Value
			
 
				+			default:
			
 
				+				cpuBD.Other += result.Values[0].Value
			
 
				+			}
			
 
				 		}
			
 
				-	}
			
 
				 
			
 
				-	ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
			
 
				-	for _, result := range resultsRAMSystemPct {
			
 
				-		clusterID, _ := result.GetString("cluster_id")
			
 
				-		if clusterID == "" {
			
 
				-			clusterID = defaultClusterID
			
 
				-		}
			
 
				-		if _, ok := ramBreakdownMap[clusterID]; !ok {
			
 
				-			ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
			
 
				+		for _, result := range resChs[7].Await() {
			
 
				+			clusterID, _ := result.GetString("cluster_id")
			
 
				+			if clusterID == "" {
			
 
				+				clusterID = defaultClusterID
			
 
				+			}
			
 
				+			if _, ok := ramBreakdownMap[clusterID]; !ok {
			
 
				+				ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
			
 
				+			}
			
 
				+			ramBD := ramBreakdownMap[clusterID]
			
 
				+			ramBD.System += result.Values[0].Value
			
 
				 		}
			
 
				-		ramBD := ramBreakdownMap[clusterID]
			
 
				-		ramBD.System += result.Values[0].Value
			
 
				-	}
			
 
				-	for _, result := range resultsRAMUserPct {
			
 
				-		clusterID, _ := result.GetString("cluster_id")
			
 
				-		if clusterID == "" {
			
 
				-			clusterID = defaultClusterID
			
 
				+		for _, result := range resChs[8].Await() {
			
 
				+			clusterID, _ := result.GetString("cluster_id")
			
 
				+			if clusterID == "" {
			
 
				+				clusterID = defaultClusterID
			
 
				+			}
			
 
				+			if _, ok := ramBreakdownMap[clusterID]; !ok {
			
 
				+				ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
			
 
				+			}
			
 
				+			ramBD := ramBreakdownMap[clusterID]
			
 
				+			ramBD.User += result.Values[0].Value
			
 
				 		}
			
 
				-		if _, ok := ramBreakdownMap[clusterID]; !ok {
			
 
				-			ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
			
 
				+		for _, ramBD := range ramBreakdownMap {
			
 
				+			remaining := 1.0
			
 
				+			remaining -= ramBD.Other
			
 
				+			remaining -= ramBD.System
			
 
				+			remaining -= ramBD.User
			
 
				+			ramBD.Idle = remaining
			
 
				 		}
			
 
				-		ramBD := ramBreakdownMap[clusterID]
			
 
				-		ramBD.User += result.Values[0].Value
			
 
				-	}
			
 
				-	for _, ramBD := range ramBreakdownMap {
			
 
				-		remaining := 1.0
			
 
				-		remaining -= ramBD.Other
			
 
				-		remaining -= ramBD.System
			
 
				-		remaining -= ramBD.User
			
 
				-		ramBD.Idle = remaining
			
 
				-	}
			
 
				 
			
 
				-	pvUsedCostMap := map[string]float64{}
			
 
				-	for _, result := range resultsUsedLocalStorage {
			
 
				-		clusterID, _ := result.GetString("cluster_id")
			
 
				-		if clusterID == "" {
			
 
				-			clusterID = defaultClusterID
			
 
				+		for _, result := range resChs[9].Await() {
			
 
				+			clusterID, _ := result.GetString("cluster_id")
			
 
				+			if clusterID == "" {
			
 
				+				clusterID = defaultClusterID
			
 
				+			}
			
 
				+			pvUsedCostMap[clusterID] += result.Values[0].Value
			
 
				 		}
			
 
				-		pvUsedCostMap[clusterID] += result.Values[0].Value
			
 
				 	}
			
 
				 
			
 
				 	// Convert intermediate structure to Costs instances
			
@@ -398,7 +376,7 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
				 			dataMins = mins
			
 
				 			klog.V(3).Infof("[Warning] cluster cost data count not found for cluster %s", id)
			
 
				 		}
			
 
				-		costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"], window, offset, dataMins/util.MinsPerHour)
			
 
				+		costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"]+cd["localstorage"], window, offset, dataMins/util.MinsPerHour)
			
 
				 		if err != nil {
			
 
				 			klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
			
 
				 			return nil, err
			
--- a/pkg/costmodel/costmodel.go
+++ b/pkg/costmodel/costmodel.go
@@ -2685,10 +2685,15 @@ type ContainerMetric struct {
 
				 	ContainerName string
			
 
				 	NodeName      string
			
 
				 	ClusterID     string
			
 
				+	key           string
			
 
				 }
			
 
				 
			
 
				 func (c *ContainerMetric) Key() string {
			
 
				-	return c.Namespace + "," + c.PodName + "," + c.ContainerName + "," + c.NodeName + "," + c.ClusterID
			
 
				+	return c.key
			
 
				+}
			
 
				+
			
 
				+func containerMetricKey(ns, podName, containerName, nodeName, clusterID string) string {
			
 
				+	return ns + "," + podName + "," + containerName + "," + nodeName + "," + clusterID
			
 
				 }
			
 
				 
			
 
				 func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
			
@@ -2700,6 +2705,7 @@ func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
 
				 			ContainerName: s[2],
			
 
				 			NodeName:      s[3],
			
 
				 			ClusterID:     s[4],
			
 
				+			key:           key,
			
 
				 		}, nil
			
 
				 	}
			
 
				 	return nil, fmt.Errorf("Not a valid key")
			
@@ -2712,6 +2718,7 @@ func newContainerMetricFromValues(ns string, podName string, containerName strin
 
				 		ContainerName: containerName,
			
 
				 		NodeName:      nodeName,
			
 
				 		ClusterID:     clusterId,
			
 
				+		key:           containerMetricKey(ns, podName, containerName, nodeName, clusterId),
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -2728,6 +2735,7 @@ func newContainerMetricsFromPod(pod v1.Pod, clusterID string) ([]*ContainerMetri
 
				 			ContainerName: containerName,
			
 
				 			NodeName:      node,
			
 
				 			ClusterID:     clusterID,
			
 
				+			key:           containerMetricKey(ns, podName, containerName, node, clusterID),
			
 
				 		})
			
 
				 	}
			
 
				 	return cs, nil
			
@@ -2782,6 +2790,7 @@ func newContainerMetricFromPrometheus(metrics map[string]interface{}, defaultClu
 
				 		Namespace:     namespace,
			
 
				 		NodeName:      nodeName,
			
 
				 		ClusterID:     clusterID,
			
 
				+		key:           containerMetricKey(namespace, podName, containerName, nodeName, clusterID),
			
 
				 	}, nil
			
 
				 }
			
 
				 
			
--- a/pkg/costmodel/router.go
+++ b/pkg/costmodel/router.go
@@ -332,7 +332,7 @@ func (a *Accesses) ClusterCosts(w http.ResponseWriter, r *http.Request, ps httpr
 
				 	window := r.URL.Query().Get("window")
			
 
				 	offset := r.URL.Query().Get("offset")
			
 
				 
			
 
				-	data, err := ComputeClusterCosts(a.PrometheusClient, a.Cloud, window, offset)
			
 
				+	data, err := ComputeClusterCosts(a.PrometheusClient, a.Cloud, window, offset, true)
			
 
				 	w.Write(WrapData(data, err))
			
 
				 }