Просмотр исходного кода

Merge pull request #374 from kubecost/develop

merge develop into master
Ajay Tripathy 6 лет назад
Родитель
Сommit
ce5f40d717

+ 1 - 1
pkg/cloud/awsprovider.go

@@ -209,7 +209,7 @@ var regionToBillingRegionCode = map[string]string{
 	"us-gov-west-1":  "UGW1",
 }
 
-func (aws *AWS) GetLocalStorageQuery(window, offset string, rate bool) string {
+func (aws *AWS) GetLocalStorageQuery(window, offset string, rate bool, used bool) string {
 	return ""
 }
 

+ 1 - 1
pkg/cloud/azureprovider.go

@@ -688,6 +688,6 @@ func (az *Azure) PVPricing(pvk PVKey) (*PV, error) {
 	return pricing.PV, nil
 }
 
-func (az *Azure) GetLocalStorageQuery(window, offset string, rate bool) string {
+func (az *Azure) GetLocalStorageQuery(window, offset string, rate bool, used bool) string {
 	return ""
 }

+ 1 - 1
pkg/cloud/customprovider.go

@@ -37,7 +37,7 @@ type customProviderKey struct {
 	Labels         map[string]string
 }
 
-func (*CustomProvider) GetLocalStorageQuery(window, offset string, rate bool) string {
+func (*CustomProvider) GetLocalStorageQuery(window, offset string, rate bool, used bool) string {
 	return ""
 }
 

+ 11 - 4
pkg/cloud/gcpprovider.go

@@ -115,22 +115,29 @@ func gcpAllocationToOutOfClusterAllocation(gcpAlloc gcpAllocation) *OutOfCluster
 	}
 }
 
-func (gcp *GCP) GetLocalStorageQuery(window, offset string, rate bool) string {
+// GetLocalStorageQuery returns the cost of local storage for the given window. Setting rate=true
+// returns hourly spend. Setting used=true only tracks used storage, not total.
+func (gcp *GCP) GetLocalStorageQuery(window, offset string, rate bool, used bool) string {
 	// TODO Set to the price for the appropriate storage class. It's not trivial to determine the local storage disk type
 	// See https://cloud.google.com/compute/disks-image-pricing#persistentdisk
 	localStorageCost := 0.04
 
+	baseMetric := "container_fs_limit_bytes"
+	if used {
+		baseMetric = "container_fs_usage_bytes"
+	}
+
 	fmtOffset := ""
 	if offset != "" {
 		fmtOffset = fmt.Sprintf("offset %s", offset)
 	}
 
 	fmtCumulativeQuery := `sum(
-		sum_over_time(container_fs_limit_bytes{device!="tmpfs", id="/"}[%s:1m]%s)
+		sum_over_time(%s{device!="tmpfs", id="/"}[%s:1m]%s)
 	) by (cluster_id) / 60 / 730 / 1024 / 1024 / 1024 * %f`
 
 	fmtMonthlyQuery := `sum(
-		avg_over_time(container_fs_limit_bytes{device!="tmpfs", id="/"}[%s:1m]%s)
+		avg_over_time(%s{device!="tmpfs", id="/"}[%s:1m]%s)
 	) by (cluster_id) / 1024 / 1024 / 1024 * %f`
 
 	fmtQuery := fmtCumulativeQuery
@@ -138,7 +145,7 @@ func (gcp *GCP) GetLocalStorageQuery(window, offset string, rate bool) string {
 		fmtQuery = fmtMonthlyQuery
 	}
 
-	return fmt.Sprintf(fmtQuery, window, fmtOffset, localStorageCost)
+	return fmt.Sprintf(fmtQuery, baseMetric, window, fmtOffset, localStorageCost)
 }
 
 func (gcp *GCP) GetConfig() (*CustomPricing, error) {

+ 1 - 1
pkg/cloud/provider.go

@@ -175,7 +175,7 @@ type Provider interface {
 	UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)
 	GetConfig() (*CustomPricing, error)
 	GetManagementPlatform() (string, error)
-	GetLocalStorageQuery(string, string, bool) string
+	GetLocalStorageQuery(string, string, bool, bool) string
 	ExternalAllocations(string, string, []string, string, string, bool) ([]*OutOfClusterAllocation, error)
 	ApplyReservedInstancePricing(map[string]*Node)
 }

+ 210 - 249
pkg/costmodel/cluster.go

@@ -32,27 +32,29 @@ const (
 		avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730 
 		* avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
 	  ) by (cluster_id) %s`
+
+	queryNodes = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 %s`
 )
 
 // TODO move this to a package-accessible helper
 type PromQueryContext struct {
-	client prometheus.Client
-	ec     *util.ErrorCollector
-	wg     *sync.WaitGroup
+	Client         prometheus.Client
+	ErrorCollector *util.ErrorCollector
+	WaitGroup      *sync.WaitGroup
 }
 
 // TODO move this to a package-accessible helper function once dependencies are able to
 // be extricated from costmodel package (PromQueryResult -> util.Vector). Otherwise, circular deps.
 func AsyncPromQuery(query string, resultCh chan []*PromQueryResult, ctx PromQueryContext) {
-	if ctx.wg != nil {
-		defer ctx.wg.Done()
+	if ctx.WaitGroup != nil {
+		defer ctx.WaitGroup.Done()
 	}
 
-	raw, promErr := Query(ctx.client, query)
-	ctx.ec.Report(promErr)
+	raw, promErr := Query(ctx.Client, query)
+	ctx.ErrorCollector.Report(promErr)
 
 	results, parseErr := NewQueryResults(raw)
-	ctx.ec.Report(parseErr)
+	ctx.ErrorCollector.Report(parseErr)
 
 	resultCh <- results
 }
@@ -60,18 +62,30 @@ func AsyncPromQuery(query string, resultCh chan []*PromQueryResult, ctx PromQuer
 // Costs represents cumulative and monthly cluster costs over a given duration. Costs
 // are broken down by cores, memory, and storage.
 type ClusterCosts struct {
-	Start             *time.Time `json:"startTime"`
-	End               *time.Time `json:"endTime"`
-	CPUCumulative     float64    `json:"cpuCumulativeCost"`
-	CPUMonthly        float64    `json:"cpuMonthlyCost"`
-	GPUCumulative     float64    `json:"gpuCumulativeCost"`
-	GPUMonthly        float64    `json:"gpuMonthlyCost"`
-	RAMCumulative     float64    `json:"ramCumulativeCost"`
-	RAMMonthly        float64    `json:"ramMonthlyCost"`
-	StorageCumulative float64    `json:"storageCumulativeCost"`
-	StorageMonthly    float64    `json:"storageMonthlyCost"`
-	TotalCumulative   float64    `json:"totalCumulativeCost"`
-	TotalMonthly      float64    `json:"totalMonthlyCost"`
+	Start             *time.Time             `json:"startTime"`
+	End               *time.Time             `json:"endTime"`
+	CPUCumulative     float64                `json:"cpuCumulativeCost"`
+	CPUMonthly        float64                `json:"cpuMonthlyCost"`
+	CPUBreakdown      *ClusterCostsBreakdown `json:"cpuBreakdown"`
+	GPUCumulative     float64                `json:"gpuCumulativeCost"`
+	GPUMonthly        float64                `json:"gpuMonthlyCost"`
+	RAMCumulative     float64                `json:"ramCumulativeCost"`
+	RAMMonthly        float64                `json:"ramMonthlyCost"`
+	RAMBreakdown      *ClusterCostsBreakdown `json:"ramBreakdown"`
+	StorageCumulative float64                `json:"storageCumulativeCost"`
+	StorageMonthly    float64                `json:"storageMonthlyCost"`
+	StorageBreakdown  *ClusterCostsBreakdown `json:"storageBreakdown"`
+	TotalCumulative   float64                `json:"totalCumulativeCost"`
+	TotalMonthly      float64                `json:"totalMonthlyCost"`
+}
+
+// ClusterCostsBreakdown provides percentage-based breakdown of a resource by
+// categories: user for user-space (i.e. non-system) usage, system, and idle.
+type ClusterCostsBreakdown struct {
+	Idle   float64 `json:"idle"`
+	Other  float64 `json:"other"`
+	System float64 `json:"system"`
+	User   float64 `json:"user"`
 }
 
 // NewClusterCostsFromCumulative takes cumulative cost data over a given time range, computes
@@ -110,42 +124,6 @@ func NewClusterCostsFromCumulative(cpu, gpu, ram, storage float64, window, offse
 	return cc, nil
 }
 
-// NewClusterCostsFromMonthly takes monthly-rate cost data over a given time range, computes
-// the associated cumulative cost data, and returns the Costs.
-func NewClusterCostsFromMonthly(cpuMonthly, gpuMonthly, ramMonthly, storageMonthly float64, window, offset string, dataHours float64) (*ClusterCosts, error) {
-	start, end, err := util.ParseTimeRange(window, offset)
-	if err != nil {
-		return nil, err
-	}
-
-	// If the number of hours is not given (i.e. is zero) compute one from the window and offset
-	if dataHours == 0 {
-		dataHours = end.Sub(*start).Hours()
-	}
-
-	// Do not allow zero-length windows to prevent divide-by-zero issues
-	if dataHours == 0 {
-		return nil, fmt.Errorf("illegal time range: window %s, offset %s", window, offset)
-	}
-
-	cc := &ClusterCosts{
-		Start:             start,
-		End:               end,
-		CPUMonthly:        cpuMonthly,
-		GPUMonthly:        gpuMonthly,
-		RAMMonthly:        ramMonthly,
-		StorageMonthly:    storageMonthly,
-		TotalMonthly:      cpuMonthly + gpuMonthly + ramMonthly + storageMonthly,
-		CPUCumulative:     cpuMonthly / util.HoursPerMonth * dataHours,
-		GPUCumulative:     gpuMonthly / util.HoursPerMonth * dataHours,
-		RAMCumulative:     ramMonthly / util.HoursPerMonth * dataHours,
-		StorageCumulative: storageMonthly / util.HoursPerMonth * dataHours,
-	}
-	cc.TotalCumulative = cc.CPUCumulative + cc.GPUCumulative + cc.RAMCumulative + cc.StorageCumulative
-
-	return cc, nil
-}
-
 // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
 func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string) (map[string]*ClusterCosts, error) {
 	// Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
@@ -176,7 +154,22 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 		avg(avg_over_time(pv_hourly_cost[%s:1m]%s)) by (persistentvolume, cluster_id) / 60
 	) by (cluster_id) %s`
 
-	queryTotalLocalStorage := provider.GetLocalStorageQuery(window, offset, false)
+	const fmtQueryCPUModePct = `sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
+	group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)`
+
+	const fmtQueryRAMSystemPct = `sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:1m]%s)) by (cluster_id)
+	/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:1m]%s)) by (cluster_id)`
+
+	const fmtQueryRAMUserPct = `sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:1m]%s)) by (cluster_id)
+	/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:1m]%s)) by (cluster_id)`
+
+	// TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
+	// const fmtQueryPVStorageUsePct = `(sum(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass,namespace) + on (persistentvolumeclaim,namespace)
+	// group_right(storageclass) sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim,namespace))`
+
+	queryUsedLocalStorage := provider.GetLocalStorageQuery(window, offset, false, true)
+
+	queryTotalLocalStorage := provider.GetLocalStorageQuery(window, offset, false, false)
 	if queryTotalLocalStorage != "" {
 		queryTotalLocalStorage = fmt.Sprintf(" + %s", queryTotalLocalStorage)
 	}
@@ -191,19 +184,27 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, fmtOffset, window, fmtOffset)
 	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, fmtOffset, window, fmtOffset)
 	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, fmtOffset, window, fmtOffset, queryTotalLocalStorage)
-	numQueries := 5
+	queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
+	queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, fmtOffset, window, fmtOffset)
+	queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, fmtOffset, window, fmtOffset)
+
+	numQueries := 9
 
 	klog.V(4).Infof("[Debug] queryDataCount: %s", queryDataCount)
 	klog.V(4).Infof("[Debug] queryTotalGPU: %s", queryTotalGPU)
 	klog.V(4).Infof("[Debug] queryTotalCPU: %s", queryTotalCPU)
 	klog.V(4).Infof("[Debug] queryTotalRAM: %s", queryTotalRAM)
 	klog.V(4).Infof("[Debug] queryTotalStorage: %s", queryTotalStorage)
+	klog.V(4).Infof("[Debug] queryCPUModePct: %s", queryCPUModePct)
+	klog.V(4).Infof("[Debug] queryRAMSystemPct: %s", queryRAMSystemPct)
+	klog.V(4).Infof("[Debug] queryRAMUserPct: %s", queryRAMUserPct)
+	klog.V(4).Infof("[Debug] queryUsedLocalStorage: %s", queryUsedLocalStorage)
 
 	// Submit queries to Prometheus asynchronously
 	var ec util.ErrorCollector
 	var wg sync.WaitGroup
 	ctx := PromQueryContext{client, &ec, &wg}
-	ctx.wg.Add(numQueries)
+	ctx.WaitGroup.Add(numQueries)
 
 	chDataCount := make(chan []*PromQueryResult, 1)
 	go AsyncPromQuery(queryDataCount, chDataCount, ctx)
@@ -220,6 +221,18 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 	chTotalStorage := make(chan []*PromQueryResult, 1)
 	go AsyncPromQuery(queryTotalStorage, chTotalStorage, ctx)
 
+	chCPUModePct := make(chan []*PromQueryResult, 1)
+	go AsyncPromQuery(queryCPUModePct, chCPUModePct, ctx)
+
+	chRAMSystemPct := make(chan []*PromQueryResult, 1)
+	go AsyncPromQuery(queryRAMSystemPct, chRAMSystemPct, ctx)
+
+	chRAMUserPct := make(chan []*PromQueryResult, 1)
+	go AsyncPromQuery(queryRAMUserPct, chRAMUserPct, ctx)
+
+	chUsedLocalStorage := make(chan []*PromQueryResult, 1)
+	go AsyncPromQuery(queryUsedLocalStorage, chUsedLocalStorage, ctx)
+
 	// After queries complete, retrieve results
 	wg.Wait()
 
@@ -238,6 +251,18 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 	resultsTotalStorage := <-chTotalStorage
 	close(chTotalStorage)
 
+	resultsCPUModePct := <-chCPUModePct
+	close(chCPUModePct)
+
+	resultsRAMSystemPct := <-chRAMSystemPct
+	close(chRAMSystemPct)
+
+	resultsRAMUserPct := <-chRAMUserPct
+	close(chRAMUserPct)
+
+	resultsUsedLocalStorage := <-chUsedLocalStorage
+	close(chUsedLocalStorage)
+
 	defaultClusterID := os.Getenv(clusterIDKey)
 
 	dataMinsByCluster := map[string]float64{}
@@ -255,12 +280,26 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 		dataMinsByCluster[clusterID] = dataMins
 	}
 
+	// Determine combined discount
+	discount, customDiscount := 0.0, 0.0
+	c, err := A.Cloud.GetConfig()
+	if err == nil {
+		discount, err = ParsePercentString(c.Discount)
+		if err != nil {
+			discount = 0.0
+		}
+		customDiscount, err = ParsePercentString(c.NegotiatedDiscount)
+		if err != nil {
+			customDiscount = 0.0
+		}
+	}
+
 	// Intermediate structure storing mapping of [clusterID][type ∈ {cpu, ram, storage, total}]=cost
 	costData := make(map[string]map[string]float64)
 
 	// Helper function to iterate over Prom query results, parsing the raw values into
 	// the intermediate costData structure.
-	setCostsFromResults := func(costData map[string]map[string]float64, results []*PromQueryResult, name string) {
+	setCostsFromResults := func(costData map[string]map[string]float64, results []*PromQueryResult, name string, discount float64, customDiscount float64) {
 		for _, result := range results {
 			clusterID, _ := result.GetString("cluster_id")
 			if clusterID == "" {
@@ -270,15 +309,86 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 				costData[clusterID] = map[string]float64{}
 			}
 			if len(result.Values) > 0 {
-				costData[clusterID][name] += result.Values[0].Value
-				costData[clusterID]["total"] += result.Values[0].Value
+				costData[clusterID][name] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
+				costData[clusterID]["total"] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
 			}
 		}
 	}
-	setCostsFromResults(costData, resultsTotalGPU, "gpu")
-	setCostsFromResults(costData, resultsTotalCPU, "cpu")
-	setCostsFromResults(costData, resultsTotalRAM, "ram")
-	setCostsFromResults(costData, resultsTotalStorage, "storage")
+	// Apply both sustained use and custom discounts to RAM and CPU
+	setCostsFromResults(costData, resultsTotalCPU, "cpu", discount, customDiscount)
+	setCostsFromResults(costData, resultsTotalRAM, "ram", discount, customDiscount)
+	// Apply only custom discount to GPU and storage
+	setCostsFromResults(costData, resultsTotalGPU, "gpu", 0.0, customDiscount)
+	setCostsFromResults(costData, resultsTotalStorage, "storage", 0.0, customDiscount)
+
+	cpuBreakdownMap := map[string]*ClusterCostsBreakdown{}
+	for _, result := range resultsCPUModePct {
+		clusterID, _ := result.GetString("cluster_id")
+		if clusterID == "" {
+			clusterID = defaultClusterID
+		}
+		if _, ok := cpuBreakdownMap[clusterID]; !ok {
+			cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+		}
+		cpuBD := cpuBreakdownMap[clusterID]
+
+		mode, err := result.GetString("mode")
+		if err != nil {
+			klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
+			mode = "other"
+		}
+
+		switch mode {
+		case "idle":
+			cpuBD.Idle += result.Values[0].Value
+		case "system":
+			cpuBD.System += result.Values[0].Value
+		case "user":
+			cpuBD.User += result.Values[0].Value
+		default:
+			cpuBD.Other += result.Values[0].Value
+		}
+	}
+
+	ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
+	for _, result := range resultsRAMSystemPct {
+		clusterID, _ := result.GetString("cluster_id")
+		if clusterID == "" {
+			clusterID = defaultClusterID
+		}
+		if _, ok := ramBreakdownMap[clusterID]; !ok {
+			ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+		}
+		ramBD := ramBreakdownMap[clusterID]
+		ramBD.System += result.Values[0].Value
+	}
+	for _, result := range resultsRAMUserPct {
+		clusterID, _ := result.GetString("cluster_id")
+		if clusterID == "" {
+			clusterID = defaultClusterID
+		}
+		if _, ok := ramBreakdownMap[clusterID]; !ok {
+			ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
+		}
+		ramBD := ramBreakdownMap[clusterID]
+		ramBD.User += result.Values[0].Value
+	}
+	for _, ramBD := range ramBreakdownMap {
+		remaining := 1.0
+		remaining -= ramBD.Other
+		remaining -= ramBD.System
+		remaining -= ramBD.User
+		ramBD.Idle = remaining
+	}
+
+	pvUsedCostMap := map[string]float64{}
+	for _, result := range resultsUsedLocalStorage {
+		clusterID, _ := result.GetString("cluster_id")
+		if clusterID == "" {
+			clusterID = defaultClusterID
+		}
+		pvUsedCostMap[clusterID] += result.Values[0].Value
+	}
 
 	// Convert intermediate structure to Costs instances
 	costsByCluster := map[string]*ClusterCosts{}
@@ -293,6 +403,19 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 			klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
 			return nil, err
 		}
+
+		if cpuBD, ok := cpuBreakdownMap[id]; ok {
+			costs.CPUBreakdown = cpuBD
+		}
+		if ramBD, ok := ramBreakdownMap[id]; ok {
+			costs.RAMBreakdown = ramBD
+		}
+		costs.StorageBreakdown = &ClusterCostsBreakdown{}
+		if pvUC, ok := pvUsedCostMap[id]; ok {
+			costs.StorageBreakdown.Idle = (costs.StorageCumulative - pvUC) / costs.StorageCumulative
+			costs.StorageBreakdown.User = pvUC / costs.StorageCumulative
+		}
+
 		costsByCluster[id] = costs
 	}
 
@@ -313,7 +436,7 @@ func resultToTotals(qr interface{}) ([][]string, error) {
 	}
 
 	if len(results) == 0 {
-		return nil, fmt.Errorf("Not enough data available in the selected time range")
+		return [][]string{}, fmt.Errorf("Not enough data available in the selected time range")
 	}
 
 	result := results[0]
@@ -330,187 +453,9 @@ func resultToTotals(qr interface{}) ([][]string, error) {
 	return totals, nil
 }
 
-func resultToTotal(qr interface{}) (map[string][][]string, error) {
-	defaultClusterID := os.Getenv(clusterIDKey)
-
-	results, err := NewQueryResults(qr)
-	if err != nil {
-		return nil, err
-	}
-
-	toReturn := make(map[string][][]string)
-	for _, result := range results {
-		clusterID, _ := result.GetString("cluster_id")
-		if clusterID == "" {
-			clusterID = defaultClusterID
-		}
-
-		// Expect a single value only
-		if len(result.Values) == 0 {
-			klog.V(1).Infof("[Warning] Metric values did not contain any valid data.")
-			continue
-		}
-
-		value := result.Values[0]
-		d0 := fmt.Sprintf("%f", value.Timestamp)
-		d1 := fmt.Sprintf("%f", value.Value)
-		toAppend := []string{
-			d0,
-			d1,
-		}
-		if t, ok := toReturn[clusterID]; ok {
-			t = append(t, toAppend)
-		} else {
-			toReturn[clusterID] = [][]string{toAppend}
-		}
-	}
-
-	return toReturn, nil
-}
-
-// ClusterCostsForAllClusters gives the cluster costs averaged over a window of time for all clusters.
-func ClusterCostsForAllClusters(cli prometheus.Client, provider cloud.Provider, window, offset string) (map[string]*Totals, error) {
-	localStorageQuery := provider.GetLocalStorageQuery(window, offset, true)
-	if localStorageQuery != "" {
-		localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
-	}
-
-	fmtOffset := ""
-	if offset != "" {
-		fmtOffset = fmt.Sprintf("offset %s", offset)
-	}
-
-	qCores := fmt.Sprintf(queryClusterCores, window, fmtOffset, window, fmtOffset, window, fmtOffset)
-	qRAM := fmt.Sprintf(queryClusterRAM, window, fmtOffset, window, fmtOffset)
-	qStorage := fmt.Sprintf(queryStorage, window, fmtOffset, window, fmtOffset, localStorageQuery)
-
-	klog.V(4).Infof("Running query %s", qCores)
-	resultClusterCores, err := Query(cli, qCores)
-	if err != nil {
-		return nil, fmt.Errorf("Error for query %s: %s", qCores, err.Error())
-	}
-
-	klog.V(4).Infof("Running query %s", qRAM)
-	resultClusterRAM, err := Query(cli, qRAM)
-	if err != nil {
-		return nil, fmt.Errorf("Error for query %s: %s", qRAM, err.Error())
-	}
-
-	klog.V(4).Infof("Running query %s", qRAM)
-	resultStorage, err := Query(cli, qStorage)
-	if err != nil {
-		return nil, fmt.Errorf("Error for query %s: %s", qStorage, err.Error())
-	}
-
-	toReturn := make(map[string]*Totals)
-
-	coreTotal, err := resultToTotal(resultClusterCores)
-	if err != nil {
-		return nil, fmt.Errorf("Error for query %s: %s", qCores, err.Error())
-	}
-	for clusterID, total := range coreTotal {
-		if _, ok := toReturn[clusterID]; !ok {
-			toReturn[clusterID] = &Totals{}
-		}
-		toReturn[clusterID].CPUCost = total
-	}
-
-	ramTotal, err := resultToTotal(resultClusterRAM)
-	if err != nil {
-		return nil, fmt.Errorf("Error for query %s: %s", qRAM, err.Error())
-	}
-	for clusterID, total := range ramTotal {
-		if _, ok := toReturn[clusterID]; !ok {
-			toReturn[clusterID] = &Totals{}
-		}
-		toReturn[clusterID].MemCost = total
-	}
-
-	storageTotal, err := resultToTotal(resultStorage)
-	if err != nil {
-		return nil, fmt.Errorf("Error for query %s: %s", qStorage, err.Error())
-	}
-	for clusterID, total := range storageTotal {
-		if _, ok := toReturn[clusterID]; !ok {
-			toReturn[clusterID] = &Totals{}
-		}
-		toReturn[clusterID].StorageCost = total
-	}
-
-	return toReturn, nil
-}
-
-// AverageClusterTotals gives the current full cluster costs averaged over a window of time.
-// Used to be ClutserCosts, but has been deprecated for that use.
-func AverageClusterTotals(cli prometheus.Client, provider cloud.Provider, windowString, offset string) (*Totals, error) {
-	// turn offsets of the format "[0-9+]h" into the format "offset [0-9+]h" for use in query templatess
-	fmtOffset := ""
-	if offset != "" {
-		fmtOffset = fmt.Sprintf("offset %s", offset)
-	}
-
-	localStorageQuery := provider.GetLocalStorageQuery(windowString, offset, true)
-	if localStorageQuery != "" {
-		localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
-	}
-
-	qCores := fmt.Sprintf(queryClusterCores, windowString, fmtOffset, windowString, fmtOffset, windowString, fmtOffset)
-	qRAM := fmt.Sprintf(queryClusterRAM, windowString, fmtOffset, windowString, fmtOffset)
-	qStorage := fmt.Sprintf(queryStorage, windowString, fmtOffset, windowString, fmtOffset, localStorageQuery)
-	qTotal := fmt.Sprintf(queryTotal, localStorageQuery)
-
-	resultClusterCores, err := Query(cli, qCores)
-	if err != nil {
-		return nil, err
-	}
-	resultClusterRAM, err := Query(cli, qRAM)
-	if err != nil {
-		return nil, err
-	}
-
-	resultStorage, err := Query(cli, qStorage)
-	if err != nil {
-		return nil, err
-	}
-
-	resultTotal, err := Query(cli, qTotal)
-	if err != nil {
-		return nil, err
-	}
-
-	coreTotal, err := resultToTotal(resultClusterCores)
-	if err != nil {
-		return nil, err
-	}
-
-	ramTotal, err := resultToTotal(resultClusterRAM)
-	if err != nil {
-		return nil, err
-	}
-
-	storageTotal, err := resultToTotal(resultStorage)
-	if err != nil {
-		return nil, err
-	}
-
-	clusterTotal, err := resultToTotal(resultTotal)
-	if err != nil {
-		return nil, err
-	}
-
-	defaultClusterID := os.Getenv(clusterIDKey)
-
-	return &Totals{
-		TotalCost:   clusterTotal[defaultClusterID],
-		CPUCost:     coreTotal[defaultClusterID],
-		MemCost:     ramTotal[defaultClusterID],
-		StorageCost: storageTotal[defaultClusterID],
-	}, nil
-}
-
 // ClusterCostsOverTime gives the full cluster costs over time
 func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startString, endString, windowString, offset string) (*Totals, error) {
-	localStorageQuery := provider.GetLocalStorageQuery(windowString, offset, true)
+	localStorageQuery := provider.GetLocalStorageQuery(windowString, offset, true, false)
 	if localStorageQuery != "" {
 		localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
 	}
@@ -547,6 +492,7 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 	if err != nil {
 		return nil, err
 	}
+
 	resultClusterRAM, err := QueryRange(cli, qRAM, start, end, window)
 	if err != nil {
 		return nil, err
@@ -564,22 +510,38 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 
 	coreTotal, err := resultToTotals(resultClusterCores)
 	if err != nil {
+		klog.Infof("[Warning] ClusterCostsOverTime: no cpu data: %s", err)
 		return nil, err
 	}
 
 	ramTotal, err := resultToTotals(resultClusterRAM)
 	if err != nil {
+		klog.Infof("[Warning] ClusterCostsOverTime: no ram data: %s", err)
 		return nil, err
 	}
 
 	storageTotal, err := resultToTotals(resultStorage)
 	if err != nil {
-		return nil, err
+		klog.Infof("[Warning] ClusterCostsOverTime: no storage data: %s", err)
 	}
 
 	clusterTotal, err := resultToTotals(resultTotal)
 	if err != nil {
-		return nil, err
+		// If clusterTotal query failed, it's likely because there are no PVs, which
+		// causes the qTotal query to return no data. Instead, query only node costs.
+		// If that fails, return an error because something is actually wrong.
+		qNodes := fmt.Sprintf(queryNodes, localStorageQuery)
+
+		resultNodes, err := QueryRange(cli, qNodes, start, end, window)
+		if err != nil {
+			return nil, err
+		}
+
+		clusterTotal, err = resultToTotals(resultNodes)
+		if err != nil {
+			klog.Infof("[Warning] ClusterCostsOverTime: no node data: %s", err)
+			return nil, err
+		}
 	}
 
 	return &Totals{
@@ -588,5 +550,4 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 		MemCost:     ramTotal,
 		StorageCost: storageTotal,
 	}, nil
-
 }