Просмотр исходного кода

Merge branch 'develop' of github.com:kubecost/cost-model into AjayTripathy-suppress-logs

AjayTripathy 6 лет назад
Родитель
Сommit
6429a32e60

+ 2 - 0
PROMETHEUS.md

@@ -58,5 +58,7 @@ sum(node_total_hourly_cost) * 730
 | node_ram_hourly_cost   | Hourly cost per Gb of memory on this node                       |
 | node_total_hourly_cost   | Total node cost per hour                       |
 | container_cpu_allocation   | Average number of CPUs requested/used over last 1m                      |
+| container_gpu_allocation   | Average number of GPUs requested over last 1m                      |
 | container_memory_allocation_bytes   | Average bytes of RAM requested/used over last 1m                 |
+| pod_pvc_allocation   | Bytes provisioned for a PVC attached to a pod                      |
 | pv_hourly_cost   | Hourly cost per GP on a persistent volume                 |

+ 2 - 4
pkg/cloud/awsprovider.go

@@ -1594,8 +1594,7 @@ func (a *AWS) ExternalAllocations(start string, end string, aggregators []string
 			return nil, err
 		}
 		if len(op.ResultSet.Rows) > 1 {
-			for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows) - 1)] {
-
+			for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows))] {
 				cost, err := strconv.ParseFloat(*r.Data[lastIdx].VarCharValue, 64)
 				if err != nil {
 					return nil, err
@@ -1631,8 +1630,7 @@ func (a *AWS) ExternalAllocations(start string, end string, aggregators []string
 		}
 		oocAllocs = append(oocAllocs, gcpOOC...)
 	}
-
-	return oocAllocs, nil // TODO: transform the QuerySQL lines into the new OutOfClusterAllocation Struct
+	return oocAllocs, nil
 }
 
 // QuerySQL can query a properly configured Athena database.

+ 13 - 1
pkg/clustercache/watchcontroller.go

@@ -88,7 +88,19 @@ func NewCachingWatcher(restClient rest.Interface, resource string, resourceType
 }
 
 func (c *CachingWatchController) GetAll() []interface{} {
-	return c.indexer.List()
+	list := c.indexer.List()
+
+	// since the indexer returns the as-is pointer to the resource,
+	// we deep copy the resources such that callers don't corrupt the
+	// index
+	cloneList := make([]interface{}, 0, len(list))
+	for _, v := range list {
+		if deepCopyable, ok := v.(rt.Object); ok {
+			cloneList = append(cloneList, deepCopyable.DeepCopyObject())
+		}
+	}
+
+	return cloneList
 }
 
 func (c *CachingWatchController) SetUpdateHandler(handler WatchHandler) WatchController {

+ 150 - 172
pkg/costmodel/cluster.go

@@ -7,6 +7,7 @@ import (
 	"time"
 
 	"github.com/kubecost/cost-model/pkg/cloud"
+	"github.com/kubecost/cost-model/pkg/prom"
 	"github.com/kubecost/cost-model/pkg/util"
 	prometheus "github.com/prometheus/client_golang/api"
 	"k8s.io/klog"
@@ -125,7 +126,7 @@ func NewClusterCostsFromCumulative(cpu, gpu, ram, storage float64, window, offse
 }
 
 // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
-func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string) (map[string]*ClusterCosts, error) {
+func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
 	// Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
 	start, end, err := util.ParseTimeRange(window, offset)
 	if err != nil {
@@ -133,35 +134,61 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 	}
 	mins := end.Sub(*start).Minutes()
 
-	const fmtQueryDataCount = `count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:1m]%s)`
-
-	const fmtQueryTotalGPU = `sum(
-		sum_over_time(node_gpu_hourly_cost[%s:1m]%s) / 60
-	) by (cluster_id)`
-
-	const fmtQueryTotalCPU = `sum(
-		sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:1m]%s) *
-		avg(avg_over_time(node_cpu_hourly_cost[%s:1m]%s)) by (node, cluster_id) / 60
-	) by (cluster_id)`
-
-	const fmtQueryTotalRAM = `sum(
-		sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:1m]%s) / 1024 / 1024 / 1024 *
-		avg(avg_over_time(node_ram_hourly_cost[%s:1m]%s)) by (node, cluster_id) / 60
-	) by (cluster_id)`
-
-	const fmtQueryTotalStorage = `sum(
-		sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:1m]%s) / 1024 / 1024 / 1024 *
-		avg(avg_over_time(pv_hourly_cost[%s:1m]%s)) by (persistentvolume, cluster_id) / 60
-	) by (cluster_id) %s`
-
-	const fmtQueryCPUModePct = `sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
-	group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)`
-
-	const fmtQueryRAMSystemPct = `sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:1m]%s)) by (cluster_id)
-	/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:1m]%s)) by (cluster_id)`
-
-	const fmtQueryRAMUserPct = `sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:1m]%s)) by (cluster_id)
-	/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:1m]%s)) by (cluster_id)`
+	// minsPerResolution determines accuracy and resource use for the following
+	// queries. Smaller values (higher resolution) result in better accuracy,
+	// but more expensive queries, and vice-a-versa.
+	minsPerResolution := 5
+
+	// hourlyToCumulative is a scaling factor that, when multiplied by an hourly
+	// value, converts it to a cumulative value; i.e.
+	// [$/hr] * [min/res]*[hr/min] = [$/res]
+	hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
+
+	const fmtQueryDataCount = `
+		count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:%dm]%s) * %d
+	`
+
+	const fmtQueryTotalGPU = `
+		sum(
+			sum_over_time(node_gpu_hourly_cost[%s:%dm]%s) * %f
+		) by (cluster_id)
+	`
+
+	const fmtQueryTotalCPU = `
+		sum(
+			sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:%dm]%s) *
+			avg(avg_over_time(node_cpu_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
+		) by (cluster_id)
+	`
+
+	const fmtQueryTotalRAM = `
+		sum(
+			sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
+			avg(avg_over_time(node_ram_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
+		) by (cluster_id)
+	`
+
+	const fmtQueryTotalStorage = `
+		sum(
+			sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
+			avg(avg_over_time(pv_hourly_cost[%s:%dm]%s)) by (persistentvolume, cluster_id) * %f
+		) by (cluster_id)
+	`
+
+	const fmtQueryCPUModePct = `
+		sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
+		group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)
+	`
+
+	const fmtQueryRAMSystemPct = `
+		sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (cluster_id)
+		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
+	`
+
+	const fmtQueryRAMUserPct = `
+		sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:%dm]%s)) by (cluster_id)
+		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
+	`
 
 	// TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
 	// const fmtQueryPVStorageUsePct = `(sum(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass,namespace) + on (persistentvolumeclaim,namespace)
@@ -179,94 +206,42 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 		fmtOffset = fmt.Sprintf("offset %s", offset)
 	}
 
-	queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, fmtOffset)
-	queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, fmtOffset)
-	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, fmtOffset, window, fmtOffset)
-	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, fmtOffset, window, fmtOffset)
-	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, fmtOffset, window, fmtOffset, queryTotalLocalStorage)
-	queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
-	queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, fmtOffset, window, fmtOffset)
-	queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, fmtOffset, window, fmtOffset)
-
-	numQueries := 9
-
-	klog.V(4).Infof("[Debug] queryDataCount: %s", queryDataCount)
-	klog.V(4).Infof("[Debug] queryTotalGPU: %s", queryTotalGPU)
-	klog.V(4).Infof("[Debug] queryTotalCPU: %s", queryTotalCPU)
-	klog.V(4).Infof("[Debug] queryTotalRAM: %s", queryTotalRAM)
-	klog.V(4).Infof("[Debug] queryTotalStorage: %s", queryTotalStorage)
-	klog.V(4).Infof("[Debug] queryCPUModePct: %s", queryCPUModePct)
-	klog.V(4).Infof("[Debug] queryRAMSystemPct: %s", queryRAMSystemPct)
-	klog.V(4).Infof("[Debug] queryRAMUserPct: %s", queryRAMUserPct)
-	klog.V(4).Infof("[Debug] queryUsedLocalStorage: %s", queryUsedLocalStorage)
-
-	// Submit queries to Prometheus asynchronously
-	var ec util.ErrorCollector
-	var wg sync.WaitGroup
-	ctx := PromQueryContext{client, &ec, &wg}
-	ctx.WaitGroup.Add(numQueries)
-
-	chDataCount := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryDataCount, chDataCount, ctx)
-
-	chTotalGPU := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryTotalGPU, chTotalGPU, ctx)
-
-	chTotalCPU := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryTotalCPU, chTotalCPU, ctx)
-
-	chTotalRAM := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryTotalRAM, chTotalRAM, ctx)
-
-	chTotalStorage := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryTotalStorage, chTotalStorage, ctx)
-
-	chCPUModePct := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryCPUModePct, chCPUModePct, ctx)
-
-	chRAMSystemPct := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryRAMSystemPct, chRAMSystemPct, ctx)
-
-	chRAMUserPct := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryRAMUserPct, chRAMUserPct, ctx)
+	queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, minsPerResolution, fmtOffset, minsPerResolution)
+	queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, minsPerResolution, fmtOffset, hourlyToCumulative)
+	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
+	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
+	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
 
-	chUsedLocalStorage := make(chan []*PromQueryResult, 1)
-	go AsyncPromQuery(queryUsedLocalStorage, chUsedLocalStorage, ctx)
+	ctx := prom.NewContext(client)
 
-	// After queries complete, retrieve results
-	wg.Wait()
+	resChs := ctx.QueryAll(
+		queryDataCount,
+		queryTotalGPU,
+		queryTotalCPU,
+		queryTotalRAM,
+		queryTotalStorage,
+		queryTotalLocalStorage,
+	)
 
-	resultsDataCount := <-chDataCount
-	close(chDataCount)
+	if withBreakdown {
+		queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
+		queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
+		queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
 
-	resultsTotalGPU := <-chTotalGPU
-	close(chTotalGPU)
+		bdResChs := ctx.QueryAll(
+			queryCPUModePct,
+			queryRAMSystemPct,
+			queryRAMUserPct,
+			queryUsedLocalStorage,
+		)
 
-	resultsTotalCPU := <-chTotalCPU
-	close(chTotalCPU)
-
-	resultsTotalRAM := <-chTotalRAM
-	close(chTotalRAM)
-
-	resultsTotalStorage := <-chTotalStorage
-	close(chTotalStorage)
-
-	resultsCPUModePct := <-chCPUModePct
-	close(chCPUModePct)
-
-	resultsRAMSystemPct := <-chRAMSystemPct
-	close(chRAMSystemPct)
-
-	resultsRAMUserPct := <-chRAMUserPct
-	close(chRAMUserPct)
-
-	resultsUsedLocalStorage := <-chUsedLocalStorage
-	close(chUsedLocalStorage)
+		resChs = append(resChs, bdResChs...)
+	}
 
 	defaultClusterID := os.Getenv(clusterIDKey)
 
 	dataMinsByCluster := map[string]float64{}
-	for _, result := range resultsDataCount {
+	for _, result := range resChs[0].Await() {
 		clusterID, _ := result.GetString("cluster_id")
 		if clusterID == "" {
 			clusterID = defaultClusterID
@@ -299,7 +274,7 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
 	// Helper function to iterate over Prom query results, parsing the raw values into
 	// the intermediate costData structure.
-	setCostsFromResults := func(costData map[string]map[string]float64, results []*PromQueryResult, name string, discount float64, customDiscount float64) {
+	setCostsFromResults := func(costData map[string]map[string]float64, results []*prom.QueryResult, name string, discount float64, customDiscount float64) {
 		for _, result := range results {
 			clusterID, _ := result.GetString("cluster_id")
 			if clusterID == "" {
@@ -315,79 +290,82 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 		}
 	}
 	// Apply both sustained use and custom discounts to RAM and CPU
-	setCostsFromResults(costData, resultsTotalCPU, "cpu", discount, customDiscount)
-	setCostsFromResults(costData, resultsTotalRAM, "ram", discount, customDiscount)
+	setCostsFromResults(costData, resChs[2].Await(), "cpu", discount, customDiscount)
+	setCostsFromResults(costData, resChs[3].Await(), "ram", discount, customDiscount)
 	// Apply only custom discount to GPU and storage
-	setCostsFromResults(costData, resultsTotalGPU, "gpu", 0.0, customDiscount)
-	setCostsFromResults(costData, resultsTotalStorage, "storage", 0.0, customDiscount)
+	setCostsFromResults(costData, resChs[1].Await(), "gpu", 0.0, customDiscount)
+	setCostsFromResults(costData, resChs[4].Await(), "storage", 0.0, customDiscount)
+	setCostsFromResults(costData, resChs[5].Await(), "localstorage", 0.0, customDiscount)
 
 	cpuBreakdownMap := map[string]*ClusterCostsBreakdown{}
-	for _, result := range resultsCPUModePct {
-		clusterID, _ := result.GetString("cluster_id")
-		if clusterID == "" {
-			clusterID = defaultClusterID
-		}
-		if _, ok := cpuBreakdownMap[clusterID]; !ok {
-			cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
-		}
-		cpuBD := cpuBreakdownMap[clusterID]
+	ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
+	pvUsedCostMap := map[string]float64{}
+	if withBreakdown {
+		for _, result := range resChs[6].Await() {
+			clusterID, _ := result.GetString("cluster_id")
+			if clusterID == "" {
+				clusterID = defaultClusterID
+			}
+			if _, ok := cpuBreakdownMap[clusterID]; !ok {
+				cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+			}
+			cpuBD := cpuBreakdownMap[clusterID]
 
-		mode, err := result.GetString("mode")
-		if err != nil {
-			klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
-			mode = "other"
-		}
+			mode, err := result.GetString("mode")
+			if err != nil {
+				klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
+				mode = "other"
+			}
 
-		switch mode {
-		case "idle":
-			cpuBD.Idle += result.Values[0].Value
-		case "system":
-			cpuBD.System += result.Values[0].Value
-		case "user":
-			cpuBD.User += result.Values[0].Value
-		default:
-			cpuBD.Other += result.Values[0].Value
+			switch mode {
+			case "idle":
+				cpuBD.Idle += result.Values[0].Value
+			case "system":
+				cpuBD.System += result.Values[0].Value
+			case "user":
+				cpuBD.User += result.Values[0].Value
+			default:
+				cpuBD.Other += result.Values[0].Value
+			}
 		}
-	}
 
-	ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
-	for _, result := range resultsRAMSystemPct {
-		clusterID, _ := result.GetString("cluster_id")
-		if clusterID == "" {
-			clusterID = defaultClusterID
-		}
-		if _, ok := ramBreakdownMap[clusterID]; !ok {
-			ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+		for _, result := range resChs[7].Await() {
+			clusterID, _ := result.GetString("cluster_id")
+			if clusterID == "" {
+				clusterID = defaultClusterID
+			}
+			if _, ok := ramBreakdownMap[clusterID]; !ok {
+				ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+			}
+			ramBD := ramBreakdownMap[clusterID]
+			ramBD.System += result.Values[0].Value
 		}
-		ramBD := ramBreakdownMap[clusterID]
-		ramBD.System += result.Values[0].Value
-	}
-	for _, result := range resultsRAMUserPct {
-		clusterID, _ := result.GetString("cluster_id")
-		if clusterID == "" {
-			clusterID = defaultClusterID
+		for _, result := range resChs[8].Await() {
+			clusterID, _ := result.GetString("cluster_id")
+			if clusterID == "" {
+				clusterID = defaultClusterID
+			}
+			if _, ok := ramBreakdownMap[clusterID]; !ok {
+				ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+			}
+			ramBD := ramBreakdownMap[clusterID]
+			ramBD.User += result.Values[0].Value
 		}
-		if _, ok := ramBreakdownMap[clusterID]; !ok {
-			ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+		for _, ramBD := range ramBreakdownMap {
+			remaining := 1.0
+			remaining -= ramBD.Other
+			remaining -= ramBD.System
+			remaining -= ramBD.User
+			ramBD.Idle = remaining
 		}
-		ramBD := ramBreakdownMap[clusterID]
-		ramBD.User += result.Values[0].Value
-	}
-	for _, ramBD := range ramBreakdownMap {
-		remaining := 1.0
-		remaining -= ramBD.Other
-		remaining -= ramBD.System
-		remaining -= ramBD.User
-		ramBD.Idle = remaining
-	}
 
-	pvUsedCostMap := map[string]float64{}
-	for _, result := range resultsUsedLocalStorage {
-		clusterID, _ := result.GetString("cluster_id")
-		if clusterID == "" {
-			clusterID = defaultClusterID
+		for _, result := range resChs[9].Await() {
+			clusterID, _ := result.GetString("cluster_id")
+			if clusterID == "" {
+				clusterID = defaultClusterID
+			}
+			pvUsedCostMap[clusterID] += result.Values[0].Value
 		}
-		pvUsedCostMap[clusterID] += result.Values[0].Value
 	}
 
 	// Convert intermediate structure to Costs instances
@@ -398,7 +376,7 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 			dataMins = mins
 			klog.V(3).Infof("[Warning] cluster cost data count not found for cluster %s", id)
 		}
-		costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"], window, offset, dataMins/util.MinsPerHour)
+		costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"]+cd["localstorage"], window, offset, dataMins/util.MinsPerHour)
 		if err != nil {
 			klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
 			return nil, err

+ 10 - 1
pkg/costmodel/costmodel.go

@@ -2685,10 +2685,15 @@ type ContainerMetric struct {
 	ContainerName string
 	NodeName      string
 	ClusterID     string
+	key           string
 }
 
 func (c *ContainerMetric) Key() string {
-	return c.Namespace + "," + c.PodName + "," + c.ContainerName + "," + c.NodeName + "," + c.ClusterID
+	return c.key
+}
+
+func containerMetricKey(ns, podName, containerName, nodeName, clusterID string) string {
+	return ns + "," + podName + "," + containerName + "," + nodeName + "," + clusterID
 }
 
 func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
@@ -2700,6 +2705,7 @@ func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
 			ContainerName: s[2],
 			NodeName:      s[3],
 			ClusterID:     s[4],
+			key:           key,
 		}, nil
 	}
 	return nil, fmt.Errorf("Not a valid key")
@@ -2712,6 +2718,7 @@ func newContainerMetricFromValues(ns string, podName string, containerName strin
 		ContainerName: containerName,
 		NodeName:      nodeName,
 		ClusterID:     clusterId,
+		key:           containerMetricKey(ns, podName, containerName, nodeName, clusterId),
 	}
 }
 
@@ -2728,6 +2735,7 @@ func newContainerMetricsFromPod(pod v1.Pod, clusterID string) ([]*ContainerMetri
 			ContainerName: containerName,
 			NodeName:      node,
 			ClusterID:     clusterID,
+			key:           containerMetricKey(ns, podName, containerName, node, clusterID),
 		})
 	}
 	return cs, nil
@@ -2782,6 +2790,7 @@ func newContainerMetricFromPrometheus(metrics map[string]interface{}, defaultClu
 		Namespace:     namespace,
 		NodeName:      nodeName,
 		ClusterID:     clusterID,
+		key:           containerMetricKey(namespace, podName, containerName, nodeName, clusterID),
 	}, nil
 }
 

+ 1 - 1
pkg/costmodel/router.go

@@ -332,7 +332,7 @@ func (a *Accesses) ClusterCosts(w http.ResponseWriter, r *http.Request, ps httpr
 	window := r.URL.Query().Get("window")
 	offset := r.URL.Query().Get("offset")
 
-	data, err := ComputeClusterCosts(a.PrometheusClient, a.Cloud, window, offset)
+	data, err := ComputeClusterCosts(a.PrometheusClient, a.Cloud, window, offset, true)
 	w.Write(WrapData(data, err))
 }