فهرست منبع

Migrate AggAPI WIP: refactor global singleton into application instance and parametrize or make receiver; same with Router; continue to move and refactor things from closed-source that have to move

Niko Kovacevic 5 سال پیش
والد
کامیت
8c5dc99396
7فایلهای تغییر یافته به همراه2318 افزوده شده و 197 حذف شده
  1. 3 3
      cmd/costmodel/main.go
  2. 1782 0
      pkg/costmodel/aggregation.go
  3. 195 0
      pkg/costmodel/aggregation_test.go
  4. 4 4
      pkg/costmodel/cluster.go
  5. 7 7
      pkg/costmodel/metrics.go
  6. 317 180
      pkg/costmodel/router.go
  7. 10 3
      pkg/util/time.go

+ 3 - 3
cmd/costmodel/main.go

@@ -17,11 +17,11 @@ func Healthz(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
 }
 
 func main() {
-	costmodel.Initialize()
+	a := costmodel.Initialize()
 
 	rootMux := http.NewServeMux()
-	costmodel.Router.GET("/healthz", Healthz)
-	rootMux.Handle("/", costmodel.Router)
+	a.Router.GET("/healthz", Healthz)
+	rootMux.Handle("/", a.Router)
 	rootMux.Handle("/metrics", promhttp.Handler())
 	klog.Fatal(http.ListenAndServe(":9003", errors.PanicHandlerMiddleware(rootMux)))
 }

+ 1782 - 0
pkg/costmodel/aggregation.go

@@ -1,10 +1,41 @@
 package costmodel
 
 import (
+	"fmt"
+	"math"
+	"net/http"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/julienschmidt/httprouter"
+	"github.com/kubecost/cost-model/pkg/cloud"
 	"github.com/kubecost/cost-model/pkg/kubecost"
+	"github.com/kubecost/cost-model/pkg/log"
+	"github.com/kubecost/cost-model/pkg/prom"
+	"github.com/kubecost/cost-model/pkg/thanos"
 	"github.com/kubecost/cost-model/pkg/util"
+	"github.com/patrickmn/go-cache"
+	prometheusClient "github.com/prometheus/client_golang/api"
+	"k8s.io/klog"
+)
+
+const (
+	// SplitTypeWeighted signals that shared costs should be shared
+	// proportionally, rather than evenly
+	SplitTypeWeighted = "weighted"
+
+	// UnallocatedSubfield indicates an allocation datum that does not have the
+	// chosen Aggregator; e.g. during aggregation by some label, there may be
+	// cost data that do not have the given label.
+	UnallocatedSubfield = "__unallocated__"
 )
 
+// Aggregation describes aggregated cost data, containing cumulative cost and
+// allocation data per resource, vectors of rate data per resource, efficiency
+// data, and metadata describing the type of aggregation operation.
 type Aggregation struct {
 	Aggregator                 string               `json:"aggregation"`
 	Subfields                  []string             `json:"subfields,omitempty"`
@@ -44,3 +75,1754 @@ type Aggregation struct {
 	TotalCost                  float64              `json:"totalCost"`
 	TotalCostVector            []*util.Vector       `json:"totalCostVector,omitempty"`
 }
+
+// TotalHours determines the amount of hours the Aggregation covers, as a
+// function of the cost vectors and the resolution of those vectors' data
+func (a *Aggregation) TotalHours(resolutionHours float64) float64 {
+	length := 1
+
+	if length < len(a.CPUCostVector) {
+		length = len(a.CPUCostVector)
+	}
+	if length < len(a.RAMCostVector) {
+		length = len(a.RAMCostVector)
+	}
+	if length < len(a.PVCostVector) {
+		length = len(a.PVCostVector)
+	}
+	if length < len(a.GPUCostVector) {
+		length = len(a.GPUCostVector)
+	}
+	if length < len(a.NetworkCostVector) {
+		length = len(a.NetworkCostVector)
+	}
+
+	return float64(length) * resolutionHours
+}
+
+// RateCoefficient computes the coefficient by which the total cost needs to be
+// multiplied in order to convert totals costs into per-rate costs.
+func (a *Aggregation) RateCoefficient(rateStr string, resolutionHours float64) float64 {
+	// monthly rate = (730.0)*(total cost)/(total hours)
+	// daily rate = (24.0)*(total cost)/(total hours)
+	// hourly rate = (1.0)*(total cost)/(total hours)
+
+	// default to hourly rate
+	coeff := 1.0
+	switch rateStr {
+	case "daily":
+		coeff = util.HoursPerDay
+	case "monthly":
+		coeff = util.HoursPerMonth
+	}
+
+	return coeff / a.TotalHours(resolutionHours)
+}
+
+type SharedResourceInfo struct {
+	ShareResources  bool
+	SharedNamespace map[string]bool
+	LabelSelectors  map[string]map[string]bool
+}
+
+type SharedCostInfo struct {
+	Name      string
+	Cost      float64
+	ShareType string
+}
+
+func (s *SharedResourceInfo) IsSharedResource(costDatum *CostData) bool {
+	// exists in a shared namespace
+	if _, ok := s.SharedNamespace[costDatum.Namespace]; ok {
+		return true
+	}
+	// has at least one shared label (OR, not AND in the case of multiple labels)
+	for labelName, labelValues := range s.LabelSelectors {
+		if val, ok := costDatum.Labels[labelName]; ok && labelValues[val] {
+			return true
+		}
+	}
+	return false
+}
+
+func NewSharedResourceInfo(shareResources bool, sharedNamespaces []string, labelNames []string, labelValues []string) *SharedResourceInfo {
+	sr := &SharedResourceInfo{
+		ShareResources:  shareResources,
+		SharedNamespace: make(map[string]bool),
+		LabelSelectors:  make(map[string]map[string]bool),
+	}
+
+	for _, ns := range sharedNamespaces {
+		sr.SharedNamespace[strings.Trim(ns, " ")] = true
+	}
+
+	// Creating a map of label name to label value, but only if
+	// the cardinality matches
+	if len(labelNames) == len(labelValues) {
+		for i := range labelNames {
+			cleanedLname := SanitizeLabelName(strings.Trim(labelNames[i], " "))
+			if values, ok := sr.LabelSelectors[cleanedLname]; ok {
+				values[strings.Trim(labelValues[i], " ")] = true
+			} else {
+				sr.LabelSelectors[cleanedLname] = map[string]bool{strings.Trim(labelValues[i], " "): true}
+			}
+		}
+	}
+
+	return sr
+}
+
+func GetTotalContainerCost(costData map[string]*CostData, rate string, cp cloud.Provider, discount float64, customDiscount float64, idleCoefficients map[string]float64) float64 {
+	totalContainerCost := 0.0
+	for _, costDatum := range costData {
+		clusterID := costDatum.ClusterID
+		cpuv, ramv, gpuv, pvvs, netv := getPriceVectors(cp, costDatum, rate, discount, customDiscount, idleCoefficients[clusterID])
+		totalContainerCost += totalVectors(cpuv)
+		totalContainerCost += totalVectors(ramv)
+		totalContainerCost += totalVectors(gpuv)
+		for _, pv := range pvvs {
+			totalContainerCost += totalVectors(pv)
+		}
+		totalContainerCost += totalVectors(netv)
+	}
+	return totalContainerCost
+}
+
+func (a *Accesses) ComputeIdleCoefficient(costData map[string]*CostData, cli prometheusClient.Client, cp cloud.Provider, discount float64, customDiscount float64, windowString, offset string) (map[string]float64, error) {
+	coefficients := make(map[string]float64)
+
+	profileName := "ComputeIdleCoefficient: ComputeClusterCosts"
+	profileStart := time.Now()
+
+	var clusterCosts map[string]*ClusterCosts
+	var err error
+
+	key := fmt.Sprintf("%s:%s", windowString, offset)
+	if data, valid := a.ClusterCostsCache.Get(key); valid {
+		clusterCosts = data.(map[string]*ClusterCosts)
+	} else {
+		clusterCosts, err = a.ComputeClusterCosts(cli, cp, windowString, offset, false)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	measureTime(profileStart, profileThreshold, profileName)
+
+	for cid, costs := range clusterCosts {
+		if costs.CPUCumulative == 0 && costs.RAMCumulative == 0 && costs.StorageCumulative == 0 {
+			klog.V(1).Infof("[Warning] No ClusterCosts data for cluster '%s'. Is it emitting data?", cid)
+			coefficients[cid] = 1.0
+			continue
+		}
+
+		if costs.TotalCumulative == 0 {
+			return nil, fmt.Errorf("TotalCumulative cluster cost for cluster '%s' returned 0 over window '%s' offset '%s'", cid, windowString, offset)
+		}
+
+		totalContainerCost := 0.0
+		for _, costDatum := range costData {
+			if costDatum.ClusterID == cid {
+				cpuv, ramv, gpuv, pvvs, _ := getPriceVectors(cp, costDatum, "", discount, customDiscount, 1)
+				totalContainerCost += totalVectors(cpuv)
+				totalContainerCost += totalVectors(ramv)
+				totalContainerCost += totalVectors(gpuv)
+				for _, pv := range pvvs {
+					totalContainerCost += totalVectors(pv)
+				}
+			}
+		}
+
+		coeff := totalContainerCost / costs.TotalCumulative
+		coefficients[cid] = coeff
+	}
+
+	return coefficients, nil
+}
+
+// AggregationOptions provides optional parameters to AggregateCostData, allowing callers to perform more complex operations
+type AggregationOptions struct {
+	Discount               float64            // percent by which to discount CPU, RAM, and GPU cost
+	CustomDiscount         float64            // additional custom discount applied to all prices
+	IdleCoefficients       map[string]float64 // scales costs by amount of idle resources on a per-cluster basis
+	IncludeEfficiency      bool               // set to true to receive efficiency/usage data
+	IncludeTimeSeries      bool               // set to true to receive time series data
+	Rate                   string             // set to "hourly", "daily", or "monthly" to receive cost rate, rather than cumulative cost
+	ResolutionHours        float64
+	SharedResourceInfo     *SharedResourceInfo
+	SharedCosts            map[string]*SharedCostInfo
+	FilteredContainerCount int
+	FilteredEnvironments   map[string]int
+	SharedSplit            string
+	TotalContainerCost     float64
+}
+
+// Helper method to test request/usgae values against allocation averages for efficiency scores. Generate a warning log if
+// clamp is required
+func clampAverage(requestsAvg float64, usedAverage float64, allocationAvg float64, resource string) (float64, float64) {
+	rAvg := requestsAvg
+	if rAvg > allocationAvg {
+		klog.V(4).Infof("[Warning] Average %s Requested (%f) > Average %s Allocated (%f). Clamping.", resource, rAvg, resource, allocationAvg)
+		rAvg = allocationAvg
+	}
+
+	uAvg := usedAverage
+	if uAvg > allocationAvg {
+		klog.V(4).Infof("[Warning]: Average %s Used (%f) > Average %s Allocated (%f). Clamping.", resource, uAvg, resource, allocationAvg)
+		uAvg = allocationAvg
+	}
+
+	return rAvg, uAvg
+}
+
+// AggregateCostData aggregates raw cost data by field; e.g. namespace, cluster, service, or label. In the case of label, callers
+// must pass a slice of subfields indicating the labels by which to group. Provider is used to define custom resource pricing.
+// See AggregationOptions for optional parameters.
+func AggregateCostData(costData map[string]*CostData, field string, subfields []string, cp cloud.Provider, opts *AggregationOptions) map[string]*Aggregation {
+	discount := opts.Discount
+	customDiscount := opts.CustomDiscount
+	idleCoefficients := opts.IdleCoefficients
+	includeTimeSeries := opts.IncludeTimeSeries
+	includeEfficiency := opts.IncludeEfficiency
+	rate := opts.Rate
+	sr := opts.SharedResourceInfo
+
+	resolutionHours := 1.0
+	if opts.ResolutionHours > 0.0 {
+		resolutionHours = opts.ResolutionHours
+	}
+
+	if idleCoefficients == nil {
+		idleCoefficients = make(map[string]float64)
+	}
+
+	// aggregations collects key-value pairs of resource group-to-aggregated data
+	// e.g. namespace-to-data or label-value-to-data
+	aggregations := make(map[string]*Aggregation)
+
+	// sharedResourceCost is the running total cost of resources that should be reported
+	// as shared across all other resources, rather than reported as a stand-alone category
+	sharedResourceCost := 0.0
+
+	for _, costDatum := range costData {
+		idleCoefficient, ok := idleCoefficients[costDatum.ClusterID]
+		if !ok {
+			idleCoefficient = 1.0
+		}
+		if sr != nil && sr.ShareResources && sr.IsSharedResource(costDatum) {
+			cpuv, ramv, gpuv, pvvs, netv := getPriceVectors(cp, costDatum, rate, discount, customDiscount, idleCoefficient)
+			sharedResourceCost += totalVectors(cpuv)
+			sharedResourceCost += totalVectors(ramv)
+			sharedResourceCost += totalVectors(gpuv)
+			sharedResourceCost += totalVectors(netv)
+			for _, pv := range pvvs {
+				sharedResourceCost += totalVectors(pv)
+			}
+		} else {
+			if field == "cluster" {
+				aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.ClusterID, discount, customDiscount, idleCoefficient, false)
+			} else if field == "node" {
+				aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.NodeName, discount, customDiscount, idleCoefficient, false)
+			} else if field == "namespace" {
+				aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.Namespace, discount, customDiscount, idleCoefficient, false)
+			} else if field == "service" {
+				if len(costDatum.Services) > 0 {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.Namespace+"/"+costDatum.Services[0], discount, customDiscount, idleCoefficient, false)
+				} else {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, UnallocatedSubfield, discount, customDiscount, idleCoefficient, false)
+				}
+			} else if field == "deployment" {
+				if len(costDatum.Deployments) > 0 {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.Namespace+"/"+costDatum.Deployments[0], discount, customDiscount, idleCoefficient, false)
+				} else {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, UnallocatedSubfield, discount, customDiscount, idleCoefficient, false)
+				}
+			} else if field == "statefulset" {
+				if len(costDatum.Statefulsets) > 0 {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.Namespace+"/"+costDatum.Statefulsets[0], discount, customDiscount, idleCoefficient, false)
+				} else {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, UnallocatedSubfield, discount, customDiscount, idleCoefficient, false)
+				}
+			} else if field == "daemonset" {
+				if len(costDatum.Daemonsets) > 0 {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.Namespace+"/"+costDatum.Daemonsets[0], discount, customDiscount, idleCoefficient, false)
+				} else {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, UnallocatedSubfield, discount, customDiscount, idleCoefficient, false)
+				}
+			} else if field == "controller" {
+				if controller, kind, hasController := costDatum.GetController(); hasController {
+					key := fmt.Sprintf("%s/%s:%s", costDatum.Namespace, kind, controller)
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, key, discount, customDiscount, idleCoefficient, false)
+				} else {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, UnallocatedSubfield, discount, customDiscount, idleCoefficient, false)
+				}
+			} else if field == "label" {
+				found := false
+				if costDatum.Labels != nil {
+					for _, sf := range subfields {
+						if subfieldName, ok := costDatum.Labels[sf]; ok {
+							aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, subfieldName, discount, customDiscount, idleCoefficient, false)
+							found = true
+							break
+						}
+					}
+				}
+				if !found {
+					aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, UnallocatedSubfield, discount, customDiscount, idleCoefficient, false)
+				}
+			} else if field == "pod" {
+				aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, costDatum.Namespace+"/"+costDatum.PodName, discount, customDiscount, idleCoefficient, false)
+			} else if field == "container" {
+				key := fmt.Sprintf("%s/%s/%s/%s", costDatum.ClusterID, costDatum.Namespace, costDatum.PodName, costDatum.Name)
+				aggregateDatum(cp, aggregations, costDatum, field, subfields, rate, key, discount, customDiscount, idleCoefficient, true)
+			}
+		}
+	}
+
+	for key, agg := range aggregations {
+		sharedCoefficient := 1 / float64(len(opts.FilteredEnvironments)+len(aggregations))
+
+		agg.CPUCost = totalVectors(agg.CPUCostVector)
+		agg.RAMCost = totalVectors(agg.RAMCostVector)
+		agg.GPUCost = totalVectors(agg.GPUCostVector)
+		agg.PVCost = totalVectors(agg.PVCostVector)
+		agg.NetworkCost = totalVectors(agg.NetworkCostVector)
+		if opts.SharedSplit == SplitTypeWeighted {
+			d := opts.TotalContainerCost - sharedResourceCost
+			if d == 0 {
+				klog.V(1).Infof("[Warning] Total container cost '%f' and shared resource cost '%f are the same'. Setting sharedCoefficient to 1", opts.TotalContainerCost, sharedResourceCost)
+				sharedCoefficient = 1.0
+			} else {
+				sharedCoefficient = (agg.CPUCost + agg.RAMCost + agg.GPUCost + agg.PVCost + agg.NetworkCost) / d
+			}
+		}
+		agg.SharedCost = sharedResourceCost * sharedCoefficient
+
+		for _, v := range opts.SharedCosts {
+			agg.SharedCost += v.Cost * sharedCoefficient
+		}
+
+		if rate != "" {
+			rateCoeff := agg.RateCoefficient(rate, resolutionHours)
+			agg.CPUCost *= rateCoeff
+			agg.RAMCost *= rateCoeff
+			agg.GPUCost *= rateCoeff
+			agg.PVCost *= rateCoeff
+			agg.NetworkCost *= rateCoeff
+			agg.SharedCost *= rateCoeff
+		}
+
+		agg.TotalCost = agg.CPUCost + agg.RAMCost + agg.GPUCost + agg.PVCost + agg.NetworkCost + agg.SharedCost
+
+		// Evicted and Completed Pods can still show up here, but have 0 cost.
+		// Filter these by default. Any reason to keep them?
+		if agg.TotalCost == 0 {
+			delete(aggregations, key)
+			continue
+		}
+
+		// CPU, RAM, and PV allocation are cumulative per-datum, whereas GPU is rate per-datum
+		agg.CPUAllocationHourlyAverage = totalVectors(agg.CPUAllocationVectors) / agg.TotalHours(resolutionHours)
+		agg.RAMAllocationHourlyAverage = totalVectors(agg.RAMAllocationVectors) / agg.TotalHours(resolutionHours)
+		agg.GPUAllocationHourlyAverage = averageVectors(agg.GPUAllocationVectors)
+		agg.PVAllocationHourlyAverage = totalVectors(agg.PVAllocationVectors) / agg.TotalHours(resolutionHours)
+
+		// TODO niko/etl does this check out for GPU data? Do we need to rewrite GPU queries to be
+		// culumative?
+		agg.CPUAllocationTotal = totalVectors(agg.CPUAllocationVectors)
+		agg.GPUAllocationTotal = totalVectors(agg.GPUAllocationVectors)
+		agg.PVAllocationTotal = totalVectors(agg.PVAllocationVectors)
+		agg.RAMAllocationTotal = totalVectors(agg.RAMAllocationVectors)
+
+		if includeEfficiency {
+			// Default both RAM and CPU to 0% efficiency so that a 0-requested, 0-allocated, 0-used situation
+			// returns 0% efficiency, which should be a red-flag.
+			//
+			// If non-zero numbers are available, then efficiency is defined as:
+			//   idlePercentage =  (requested - used) / allocated
+			//   efficiency = (1.0 - idlePercentage)
+			//
+			// It is possible to score > 100% efficiency, which is meant to be interpreted as a red flag.
+			// It is not possible to score < 0% efficiency.
+
+			agg.CPUEfficiency = 0.0
+			CPUIdle := 0.0
+			if agg.CPUAllocationHourlyAverage > 0.0 {
+				avgCPURequested := averageVectors(agg.CPURequestedVectors)
+				avgCPUUsed := averageVectors(agg.CPUUsedVectors)
+
+				// Clamp averages, log range violations
+				avgCPURequested, avgCPUUsed = clampAverage(avgCPURequested, avgCPUUsed, agg.CPUAllocationHourlyAverage, "CPU")
+
+				CPUIdle = ((avgCPURequested - avgCPUUsed) / agg.CPUAllocationHourlyAverage)
+				agg.CPUEfficiency = 1.0 - CPUIdle
+			}
+
+			agg.RAMEfficiency = 0.0
+			RAMIdle := 0.0
+			if agg.RAMAllocationHourlyAverage > 0.0 {
+				avgRAMRequested := averageVectors(agg.RAMRequestedVectors)
+				avgRAMUsed := averageVectors(agg.RAMUsedVectors)
+
+				// Clamp averages, log range violations
+				avgRAMRequested, avgRAMUsed = clampAverage(avgRAMRequested, avgRAMUsed, agg.RAMAllocationHourlyAverage, "RAM")
+
+				RAMIdle = ((avgRAMRequested - avgRAMUsed) / agg.RAMAllocationHourlyAverage)
+				agg.RAMEfficiency = 1.0 - RAMIdle
+			}
+
+			// Score total efficiency by the sum of CPU and RAM efficiency, weighted by their
+			// respective total costs.
+			agg.Efficiency = 0.0
+			if (agg.CPUCost + agg.RAMCost) > 0 {
+				agg.Efficiency = ((agg.CPUCost * agg.CPUEfficiency) + (agg.RAMCost * agg.RAMEfficiency)) / (agg.CPUCost + agg.RAMCost)
+			}
+		}
+
+		// convert RAM from bytes to GiB
+		agg.RAMAllocationHourlyAverage = agg.RAMAllocationHourlyAverage / 1024 / 1024 / 1024
+		// convert storage from bytes to GiB
+		agg.PVAllocationHourlyAverage = agg.PVAllocationHourlyAverage / 1024 / 1024 / 1024
+
+		// remove time series data if it is not explicitly requested
+		if !includeTimeSeries {
+			agg.CPUCostVector = nil
+			agg.RAMCostVector = nil
+			agg.GPUCostVector = nil
+			agg.PVCostVector = nil
+			agg.NetworkCostVector = nil
+			agg.TotalCostVector = nil
+		} else { // otherwise compute a totalcostvector
+			v1 := addVectors(agg.CPUCostVector, agg.RAMCostVector)
+			v2 := addVectors(v1, agg.GPUCostVector)
+			v3 := addVectors(v2, agg.PVCostVector)
+			v4 := addVectors(v3, agg.NetworkCostVector)
+			agg.TotalCostVector = v4
+		}
+		// Typesafety checks
+		if math.IsNaN(agg.CPUAllocationHourlyAverage) || math.IsInf(agg.CPUAllocationHourlyAverage, 0) {
+			klog.V(1).Infof("[Warning] CPUAllocationHourlyAverage is %f for '%s: %s/%s'", agg.CPUAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.CPUAllocationHourlyAverage = 0
+		}
+		if math.IsNaN(agg.CPUCost) || math.IsInf(agg.CPUCost, 0) {
+			klog.V(1).Infof("[Warning] CPUCost is %f for '%s: %s/%s'", agg.CPUCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.CPUCost = 0
+		}
+		if math.IsNaN(agg.CPUEfficiency) || math.IsInf(agg.CPUEfficiency, 0) {
+			klog.V(1).Infof("[Warning] CPUEfficiency is %f for '%s: %s/%s'", agg.CPUEfficiency, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.CPUEfficiency = 0
+		}
+		if math.IsNaN(agg.Efficiency) || math.IsInf(agg.Efficiency, 0) {
+			klog.V(1).Infof("[Warning] Efficiency is %f for '%s: %s/%s'", agg.Efficiency, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.Efficiency = 0
+		}
+		if math.IsNaN(agg.GPUAllocationHourlyAverage) || math.IsInf(agg.GPUAllocationHourlyAverage, 0) {
+			klog.V(1).Infof("[Warning] GPUAllocationHourlyAverage is %f for '%s: %s/%s'", agg.GPUAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.GPUAllocationHourlyAverage = 0
+		}
+		if math.IsNaN(agg.GPUCost) || math.IsInf(agg.GPUCost, 0) {
+			klog.V(1).Infof("[Warning] GPUCost is %f for '%s: %s/%s'", agg.GPUCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.GPUCost = 0
+		}
+		if math.IsNaN(agg.RAMAllocationHourlyAverage) || math.IsInf(agg.RAMAllocationHourlyAverage, 0) {
+			klog.V(1).Infof("[Warning] RAMAllocationHourlyAverage is %f for '%s: %s/%s'", agg.RAMAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.RAMAllocationHourlyAverage = 0
+		}
+		if math.IsNaN(agg.RAMCost) || math.IsInf(agg.RAMCost, 0) {
+			klog.V(1).Infof("[Warning] RAMCost is %f for '%s: %s/%s'", agg.RAMCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.RAMCost = 0
+		}
+		if math.IsNaN(agg.RAMEfficiency) || math.IsInf(agg.RAMEfficiency, 0) {
+			klog.V(1).Infof("[Warning] RAMEfficiency is %f for '%s: %s/%s'", agg.RAMEfficiency, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.RAMEfficiency = 0
+		}
+		if math.IsNaN(agg.PVAllocationHourlyAverage) || math.IsInf(agg.PVAllocationHourlyAverage, 0) {
+			klog.V(1).Infof("[Warning] PVAllocationHourlyAverage is %f for '%s: %s/%s'", agg.PVAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.PVAllocationHourlyAverage = 0
+		}
+		if math.IsNaN(agg.PVCost) || math.IsInf(agg.PVCost, 0) {
+			klog.V(1).Infof("[Warning] PVCost is %f for '%s: %s/%s'", agg.PVCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.PVCost = 0
+		}
+		if math.IsNaN(agg.NetworkCost) || math.IsInf(agg.NetworkCost, 0) {
+			klog.V(1).Infof("[Warning] NetworkCost is %f for '%s: %s/%s'", agg.NetworkCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.NetworkCost = 0
+		}
+		if math.IsNaN(agg.SharedCost) || math.IsInf(agg.SharedCost, 0) {
+			klog.V(1).Infof("[Warning] SharedCost is %f for '%s: %s/%s'", agg.SharedCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.SharedCost = 0
+		}
+		if math.IsNaN(agg.TotalCost) || math.IsInf(agg.TotalCost, 0) {
+			klog.V(1).Infof("[Warning] TotalCost is %f for '%s: %s/%s'", agg.TotalCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			agg.TotalCost = 0
+		}
+	}
+
+	return aggregations
+}
+
+func aggregateDatum(cp cloud.Provider, aggregations map[string]*Aggregation, costDatum *CostData, field string, subfields []string, rate string, key string, discount float64, customDiscount float64, idleCoefficient float64, includeProperties bool) {
+	// add new entry to aggregation results if a new key is encountered
+	if _, ok := aggregations[key]; !ok {
+		agg := &Aggregation{
+			Aggregator:  field,
+			Environment: key,
+		}
+		if len(subfields) > 0 {
+			agg.Subfields = subfields
+		}
+		if includeProperties {
+			props := &kubecost.Properties{}
+			props.SetCluster(costDatum.ClusterID)
+			props.SetNode(costDatum.NodeName)
+			if controller, kind, hasController := costDatum.GetController(); hasController {
+				props.SetController(controller)
+				props.SetControllerKind(kind)
+			}
+			props.SetLabels(costDatum.Labels)
+			props.SetNamespace(costDatum.Namespace)
+			props.SetPod(costDatum.PodName)
+			props.SetServices(costDatum.Services)
+			props.SetContainer(costDatum.Name)
+			agg.Properties = props
+		}
+
+		aggregations[key] = agg
+	}
+
+	mergeVectors(cp, costDatum, aggregations[key], rate, discount, customDiscount, idleCoefficient)
+}
+
+func mergeVectors(cp cloud.Provider, costDatum *CostData, aggregation *Aggregation, rate string, discount float64, customDiscount float64, idleCoefficient float64) {
+	aggregation.CPUAllocationVectors = addVectors(costDatum.CPUAllocation, aggregation.CPUAllocationVectors)
+	aggregation.CPURequestedVectors = addVectors(costDatum.CPUReq, aggregation.CPURequestedVectors)
+	aggregation.CPUUsedVectors = addVectors(costDatum.CPUUsed, aggregation.CPUUsedVectors)
+
+	aggregation.RAMAllocationVectors = addVectors(costDatum.RAMAllocation, aggregation.RAMAllocationVectors)
+	aggregation.RAMRequestedVectors = addVectors(costDatum.RAMReq, aggregation.RAMRequestedVectors)
+	aggregation.RAMUsedVectors = addVectors(costDatum.RAMUsed, aggregation.RAMUsedVectors)
+
+	aggregation.GPUAllocationVectors = addVectors(costDatum.GPUReq, aggregation.GPUAllocationVectors)
+
+	for _, pvcd := range costDatum.PVCData {
+		aggregation.PVAllocationVectors = addVectors(pvcd.Values, aggregation.PVAllocationVectors)
+	}
+
+	cpuv, ramv, gpuv, pvvs, netv := getPriceVectors(cp, costDatum, rate, discount, customDiscount, idleCoefficient)
+	aggregation.CPUCostVector = addVectors(cpuv, aggregation.CPUCostVector)
+	aggregation.RAMCostVector = addVectors(ramv, aggregation.RAMCostVector)
+	aggregation.GPUCostVector = addVectors(gpuv, aggregation.GPUCostVector)
+	aggregation.NetworkCostVector = addVectors(netv, aggregation.NetworkCostVector)
+	for _, vectorList := range pvvs {
+		aggregation.PVCostVector = addVectors(aggregation.PVCostVector, vectorList)
+	}
+}
+
+// Returns the blended discounts applied to the node as a result of global discounts and reserved instance
+// discounts
+func getDiscounts(costDatum *CostData, cpuCost float64, ramCost float64, discount float64) (float64, float64) {
+	if costDatum.NodeData == nil {
+		return discount, discount
+	}
+	if costDatum.NodeData.IsSpot() {
+		return 0, 0
+	}
+
+	reserved := costDatum.NodeData.Reserved
+
+	// blended discounts
+	blendedCPUDiscount := discount
+	blendedRAMDiscount := discount
+
+	if reserved != nil && reserved.CPUCost > 0 && reserved.RAMCost > 0 {
+		reservedCPUDiscount := 0.0
+		if cpuCost == 0 {
+			klog.V(1).Infof("[Warning] No cpu cost found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+		} else {
+			reservedCPUDiscount = 1.0 - (reserved.CPUCost / cpuCost)
+		}
+		reservedRAMDiscount := 0.0
+		if ramCost == 0 {
+			klog.V(1).Infof("[Warning] No ram cost found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+		} else {
+			reservedRAMDiscount = 1.0 - (reserved.RAMCost / ramCost)
+		}
+
+		// AWS passes the # of reserved CPU and RAM as -1 to represent "All"
+		if reserved.ReservedCPU < 0 && reserved.ReservedRAM < 0 {
+			blendedCPUDiscount = reservedCPUDiscount
+			blendedRAMDiscount = reservedRAMDiscount
+		} else {
+			nodeCPU, ierr := strconv.ParseInt(costDatum.NodeData.VCPU, 10, 64)
+			nodeRAM, ferr := strconv.ParseFloat(costDatum.NodeData.RAMBytes, 64)
+			if ierr == nil && ferr == nil {
+				nodeRAMGB := nodeRAM / 1024 / 1024 / 1024
+				reservedRAMGB := float64(reserved.ReservedRAM) / 1024 / 1024 / 1024
+				nonReservedCPU := nodeCPU - reserved.ReservedCPU
+				nonReservedRAM := nodeRAMGB - reservedRAMGB
+
+				if nonReservedCPU == 0 {
+					blendedCPUDiscount = reservedCPUDiscount
+				} else {
+					if nodeCPU == 0 {
+						klog.V(1).Infof("[Warning] No ram found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+					} else {
+						blendedCPUDiscount = (float64(reserved.ReservedCPU) * reservedCPUDiscount) + (float64(nonReservedCPU)*discount)/float64(nodeCPU)
+					}
+				}
+
+				if nonReservedRAM == 0 {
+					blendedRAMDiscount = reservedRAMDiscount
+				} else {
+					if nodeRAMGB == 0 {
+						klog.V(1).Infof("[Warning] No ram found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+					} else {
+						blendedRAMDiscount = (reservedRAMGB * reservedRAMDiscount) + (nonReservedRAM*discount)/nodeRAMGB
+					}
+				}
+			}
+		}
+	}
+
+	return blendedCPUDiscount, blendedRAMDiscount
+}
+
+func parseVectorPricing(cfg *cloud.CustomPricing, costDatum *CostData, cpuCostStr, ramCostStr, gpuCostStr, pvCostStr string) (float64, float64, float64, float64, bool) {
+	usesCustom := false
+	cpuCost, err := strconv.ParseFloat(cpuCostStr, 64)
+	if err != nil || math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) || cpuCost == 0 {
+		cpuCost, err = strconv.ParseFloat(cfg.CPU, 64)
+		usesCustom = true
+		if err != nil || math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) {
+			cpuCost = 0
+		}
+	}
+	ramCost, err := strconv.ParseFloat(ramCostStr, 64)
+	if err != nil || math.IsNaN(ramCost) || math.IsInf(ramCost, 0) || ramCost == 0 {
+		ramCost, err = strconv.ParseFloat(cfg.RAM, 64)
+		usesCustom = true
+		if err != nil || math.IsNaN(ramCost) || math.IsInf(ramCost, 0) {
+			ramCost = 0
+		}
+	}
+	gpuCost, err := strconv.ParseFloat(gpuCostStr, 64)
+	if err != nil || math.IsNaN(gpuCost) || math.IsInf(gpuCost, 0) {
+		gpuCost, err = strconv.ParseFloat(cfg.GPU, 64)
+		if err != nil || math.IsNaN(gpuCost) || math.IsInf(gpuCost, 0) {
+			gpuCost = 0
+		}
+	}
+	pvCost, err := strconv.ParseFloat(pvCostStr, 64)
+	if err != nil || math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) {
+		pvCost, err = strconv.ParseFloat(cfg.Storage, 64)
+		if err != nil || math.IsNaN(pvCost) || math.IsInf(pvCost, 0) {
+			pvCost = 0
+		}
+	}
+	return cpuCost, ramCost, gpuCost, pvCost, usesCustom
+}
+
+func getPriceVectors(cp cloud.Provider, costDatum *CostData, rate string, discount float64, customDiscount float64, idleCoefficient float64) ([]*util.Vector, []*util.Vector, []*util.Vector, [][]*util.Vector, []*util.Vector) {
+
+	var cpuCost float64
+	var ramCost float64
+	var gpuCost float64
+	var pvCost float64
+	var usesCustom bool
+
+	// If custom pricing is enabled and can be retrieved, replace
+	// default cost values with custom values
+	customPricing, err := cp.GetConfig()
+	if err != nil {
+		klog.Errorf("failed to load custom pricing: %s", err)
+	}
+	if cloud.CustomPricesEnabled(cp) && err == nil {
+		var cpuCostStr string
+		var ramCostStr string
+		var gpuCostStr string
+		var pvCostStr string
+		if costDatum.NodeData.IsSpot() {
+			cpuCostStr = customPricing.SpotCPU
+			ramCostStr = customPricing.SpotRAM
+			gpuCostStr = customPricing.SpotGPU
+		} else {
+			cpuCostStr = customPricing.CPU
+			ramCostStr = customPricing.RAM
+			gpuCostStr = customPricing.GPU
+		}
+		pvCostStr = customPricing.Storage
+		cpuCost, ramCost, gpuCost, pvCost, usesCustom = parseVectorPricing(customPricing, costDatum, cpuCostStr, ramCostStr, gpuCostStr, pvCostStr)
+	} else if costDatum.NodeData == nil && err == nil {
+		cpuCostStr := customPricing.CPU
+		ramCostStr := customPricing.RAM
+		gpuCostStr := customPricing.GPU
+		pvCostStr := customPricing.Storage
+		cpuCost, ramCost, gpuCost, pvCost, usesCustom = parseVectorPricing(customPricing, costDatum, cpuCostStr, ramCostStr, gpuCostStr, pvCostStr)
+	} else {
+		cpuCostStr := costDatum.NodeData.VCPUCost
+		ramCostStr := costDatum.NodeData.RAMCost
+		gpuCostStr := costDatum.NodeData.GPUCost
+		pvCostStr := costDatum.NodeData.StorageCost
+		cpuCost, ramCost, gpuCost, pvCost, usesCustom = parseVectorPricing(customPricing, costDatum, cpuCostStr, ramCostStr, gpuCostStr, pvCostStr)
+	}
+
+	if usesCustom {
+		log.DedupedWarningf(5, "No pricing data found for node `%s` , using custom pricing", costDatum.NodeName)
+	}
+
+	cpuDiscount, ramDiscount := getDiscounts(costDatum, cpuCost, ramCost, discount)
+
+	klog.V(4).Infof("Node Name: %s", costDatum.NodeName)
+	klog.V(4).Infof("Blended CPU Discount: %f", cpuDiscount)
+	klog.V(4).Infof("Blended RAM Discount: %f", ramDiscount)
+
+	// TODO should we try to apply the rate coefficient here or leave it as a totals-only metric?
+	rateCoeff := 1.0
+
+	if idleCoefficient == 0 {
+		idleCoefficient = 1.0
+	}
+
+	cpuv := make([]*util.Vector, 0, len(costDatum.CPUAllocation))
+	for _, val := range costDatum.CPUAllocation {
+		cpuv = append(cpuv, &util.Vector{
+			Timestamp: math.Round(val.Timestamp/10) * 10,
+			Value:     (val.Value * cpuCost * (1 - cpuDiscount) * (1 - customDiscount) / idleCoefficient) * rateCoeff,
+		})
+	}
+
+	ramv := make([]*util.Vector, 0, len(costDatum.RAMAllocation))
+	for _, val := range costDatum.RAMAllocation {
+		ramv = append(ramv, &util.Vector{
+			Timestamp: math.Round(val.Timestamp/10) * 10,
+			Value:     ((val.Value / 1024 / 1024 / 1024) * ramCost * (1 - ramDiscount) * (1 - customDiscount) / idleCoefficient) * rateCoeff,
+		})
+	}
+
+	gpuv := make([]*util.Vector, 0, len(costDatum.GPUReq))
+	for _, val := range costDatum.GPUReq {
+		gpuv = append(gpuv, &util.Vector{
+			Timestamp: math.Round(val.Timestamp/10) * 10,
+			Value:     (val.Value * gpuCost * (1 - discount) * (1 - customDiscount) / idleCoefficient) * rateCoeff,
+		})
+	}
+
+	pvvs := make([][]*util.Vector, 0, len(costDatum.PVCData))
+	for _, pvcData := range costDatum.PVCData {
+		pvv := make([]*util.Vector, 0, len(pvcData.Values))
+		if pvcData.Volume != nil {
+			cost, _ := strconv.ParseFloat(pvcData.Volume.Cost, 64)
+
+			// override with custom pricing if enabled
+			if cloud.CustomPricesEnabled(cp) {
+				cost = pvCost
+			}
+
+			for _, val := range pvcData.Values {
+				pvv = append(pvv, &util.Vector{
+					Timestamp: math.Round(val.Timestamp/10) * 10,
+					Value:     ((val.Value / 1024 / 1024 / 1024) * cost * (1 - customDiscount) / idleCoefficient) * rateCoeff,
+				})
+			}
+			pvvs = append(pvvs, pvv)
+		}
+	}
+
+	netv := make([]*util.Vector, 0, len(costDatum.NetworkData))
+	for _, val := range costDatum.NetworkData {
+		netv = append(netv, &util.Vector{
+			Timestamp: math.Round(val.Timestamp/10) * 10,
+			Value:     val.Value,
+		})
+	}
+
+	return cpuv, ramv, gpuv, pvvs, netv
+}
+
+func averageVectors(vectors []*util.Vector) float64 {
+	if len(vectors) == 0 {
+		return 0.0
+	}
+	return totalVectors(vectors) / float64(len(vectors))
+}
+
+func totalVectors(vectors []*util.Vector) float64 {
+	total := 0.0
+	for _, vector := range vectors {
+		total += vector.Value
+	}
+	return total
+}
+
+// addVectors adds two slices of Vectors. Vector timestamps are rounded to the
+// nearest ten seconds to allow matching of Vectors within a delta allowance.
+// Matching Vectors are summed, while unmatched Vectors are passed through.
+// e.g. [(t=1, 1), (t=2, 2)] + [(t=2, 2), (t=3, 3)] = [(t=1, 1), (t=2, 4), (t=3, 3)]
+func addVectors(xvs []*util.Vector, yvs []*util.Vector) []*util.Vector {
+	sumOp := func(result *util.Vector, x *float64, y *float64) bool {
+		if x != nil && y != nil {
+			result.Value = *x + *y
+		} else if y != nil {
+			result.Value = *y
+		} else if x != nil {
+			result.Value = *x
+		}
+
+		return true
+	}
+
+	return util.ApplyVectorOp(xvs, yvs, sumOp)
+}
+
+// minCostDataLength sets the minimum number of time series data required to
+// cache both raw and aggregated cost data
+const minCostDataLength = 2
+
+// EmptyDataError describes an error caused by empty cost data for some
+// defined interval
+type EmptyDataError struct {
+	err      error
+	duration string
+	offset   string
+}
+
+// Error implements the error interface
+func (ede *EmptyDataError) Error() string {
+	err := fmt.Sprintf("empty data for range: %s", ede.duration)
+	if ede.offset != "" {
+		err += fmt.Sprintf(" offset %s", ede.offset)
+	}
+	if ede.err != nil {
+		err += fmt.Sprintf(": %s", ede.err)
+	}
+	return err
+}
+
+func costDataTimeSeriesLength(costData map[string]*CostData) int {
+	l := 0
+	for _, cd := range costData {
+		if l < len(cd.RAMAllocation) {
+			l = len(cd.RAMAllocation)
+		}
+		if l < len(cd.CPUAllocation) {
+			l = len(cd.CPUAllocation)
+		}
+	}
+	return l
+}
+
+// ScaleHourlyCostData converts per-hour cost data to per-resolution data. If the target resolution is higher (i.e. < 1.0h)
+// then we can do simple multiplication by the fraction-of-an-hour and retain accuracy. If the target resolution is
+// lower (i.e. > 1.0h) then we sum groups of hourly data by resolution to maintain fidelity.
+// e.g. (100 hours of per-hour hourly data, resolutionHours=10) => 10 data points, grouped and summed by 10-hour window
+// e.g. (20 minutes of per-minute hourly data, resolutionHours=1/60) => 20 data points, scaled down by a factor of 60
+func ScaleHourlyCostData(data map[string]*CostData, resolutionHours float64) map[string]*CostData {
+	scaled := map[string]*CostData{}
+
+	for key, datum := range data {
+		datum.RAMReq = scaleVectorSeries(datum.RAMReq, resolutionHours)
+		datum.RAMUsed = scaleVectorSeries(datum.RAMUsed, resolutionHours)
+		datum.RAMAllocation = scaleVectorSeries(datum.RAMAllocation, resolutionHours)
+		datum.CPUReq = scaleVectorSeries(datum.CPUReq, resolutionHours)
+		datum.CPUUsed = scaleVectorSeries(datum.CPUUsed, resolutionHours)
+		datum.CPUAllocation = scaleVectorSeries(datum.CPUAllocation, resolutionHours)
+		datum.GPUReq = scaleVectorSeries(datum.GPUReq, resolutionHours)
+		datum.NetworkData = scaleVectorSeries(datum.NetworkData, resolutionHours)
+
+		for _, pvcDatum := range datum.PVCData {
+			pvcDatum.Values = scaleVectorSeries(pvcDatum.Values, resolutionHours)
+		}
+
+		scaled[key] = datum
+	}
+
+	return scaled
+}
+
+func scaleVectorSeries(vs []*util.Vector, resolutionHours float64) []*util.Vector {
+	// if scaling to a lower resolution, compress the hourly data for maximum accuracy
+	if resolutionHours > 1.0 {
+		return compressVectorSeries(vs, resolutionHours)
+	}
+
+	// if scaling to a higher resolution, simply scale each value down by the fraction of an hour
+	for _, v := range vs {
+		v.Value *= resolutionHours
+	}
+	return vs
+}
+
+func compressVectorSeries(vs []*util.Vector, resolutionHours float64) []*util.Vector {
+	if len(vs) == 0 {
+		return vs
+	}
+
+	compressed := []*util.Vector{}
+
+	threshold := float64(60 * 60 * resolutionHours)
+	var acc *util.Vector
+
+	for i, v := range vs {
+		if acc == nil {
+			// start a new accumulation from current datum
+			acc = &util.Vector{
+				Value:     vs[i].Value,
+				Timestamp: vs[i].Timestamp,
+			}
+			continue
+		}
+		if v.Timestamp-acc.Timestamp < threshold {
+			// v should be accumulated in current datum
+			acc.Value += v.Value
+		} else {
+			// v falls outside current datum's threshold; append and start a new one
+			compressed = append(compressed, acc)
+			acc = &util.Vector{
+				Value:     vs[i].Value,
+				Timestamp: vs[i].Timestamp,
+			}
+		}
+	}
+	// append any remaining, incomplete accumulation
+	if acc != nil {
+		compressed = append(compressed, acc)
+	}
+
+	return compressed
+}
+
+// ComputeAggregateCostModel computes cost data for the given window, then aggregates it by the given fields.
+// Data is cached on two levels: the aggregation is cached as well as the underlying cost data.
+func (a *Accesses) ComputeAggregateCostModel(promClient prometheusClient.Client, duration, offset, field string, subfields []string, rate string, filters map[string]string,
+	sri *SharedResourceInfo, shared string, allocateIdle, includeTimeSeries, includeEfficiency, disableCache, clearCache, noCache, noExpireCache, remoteEnabled, disableSharedOverhead bool) (map[string]*Aggregation, string, error) {
+
+	profileBaseName := fmt.Sprintf("ComputeAggregateCostModel(duration=%s, offet=%s, field=%s)", duration, offset, field)
+	defer measureTime(time.Now(), profileThreshold, profileBaseName)
+
+	// parse cost data filters into FilterFuncs
+	filterFuncs := []FilterFunc{}
+	retainFuncs := []FilterFunc{}
+	retainFuncs = append(retainFuncs, func(cd *CostData) (bool, string) {
+		if sri != nil {
+			return sri.IsSharedResource(cd), ""
+		}
+		return false, ""
+	})
+	aggregateEnvironment := func(costDatum *CostData) string {
+		if field == "cluster" {
+			return costDatum.ClusterID
+		} else if field == "node" {
+			return costDatum.NodeName
+		} else if field == "namespace" {
+			return costDatum.Namespace
+		} else if field == "service" {
+			if len(costDatum.Services) > 0 {
+				return costDatum.Namespace + "/" + costDatum.Services[0]
+			}
+		} else if field == "deployment" {
+			if len(costDatum.Deployments) > 0 {
+				return costDatum.Namespace + "/" + costDatum.Deployments[0]
+			}
+		} else if field == "daemonset" {
+			if len(costDatum.Daemonsets) > 0 {
+				return costDatum.Namespace + "/" + costDatum.Daemonsets[0]
+			}
+		} else if field == "statefulset" {
+			if len(costDatum.Statefulsets) > 0 {
+				return costDatum.Namespace + "/" + costDatum.Statefulsets[0]
+			}
+		} else if field == "label" {
+			if costDatum.Labels != nil {
+				for _, sf := range subfields {
+					if subfieldName, ok := costDatum.Labels[sf]; ok {
+						return fmt.Sprintf("%s=%s", sf, subfieldName)
+					}
+				}
+			}
+		} else if field == "pod" {
+			return costDatum.Namespace + "/" + costDatum.PodName
+		} else if field == "container" {
+			return costDatum.Namespace + "/" + costDatum.PodName + "/" + costDatum.Name
+		}
+		return ""
+	}
+
+	if filters["podprefix"] != "" {
+		pps := []string{}
+		for _, fp := range strings.Split(filters["podprefix"], ",") {
+			if fp != "" {
+				cleanedFilter := strings.TrimSpace(fp)
+				pps = append(pps, cleanedFilter)
+			}
+		}
+		filterFuncs = append(filterFuncs, func(cd *CostData) (bool, string) {
+			aggEnv := aggregateEnvironment(cd)
+			for _, pp := range pps {
+				cleanedFilter := strings.TrimSpace(pp)
+				if strings.HasPrefix(cd.PodName, cleanedFilter) {
+					return true, aggEnv
+				}
+			}
+			return false, aggEnv
+		})
+	}
+
+	if filters["namespace"] != "" {
+		// namespaces may be comma-separated, e.g. kubecost,default
+		// multiple namespaces are evaluated as an OR relationship
+		nss := strings.Split(filters["namespace"], ",")
+		filterFuncs = append(filterFuncs, func(cd *CostData) (bool, string) {
+			aggEnv := aggregateEnvironment(cd)
+			for _, ns := range nss {
+				nsTrim := strings.TrimSpace(ns)
+				if cd.Namespace == nsTrim {
+					return true, aggEnv
+				} else if strings.HasSuffix(nsTrim, "*") { // trigger wildcard prefix filtering
+					nsTrimAsterisk := strings.TrimSuffix(nsTrim, "*")
+					if strings.HasPrefix(cd.Namespace, nsTrimAsterisk) {
+						return true, aggEnv
+					}
+				}
+			}
+			return false, aggEnv
+		})
+	}
+	if filters["node"] != "" {
+		// nodes may be comma-separated, e.g. aws-node-1,aws-node-2
+		// multiple nodes are evaluated as an OR relationship
+		nodes := strings.Split(filters["node"], ",")
+		filterFuncs = append(filterFuncs, func(cd *CostData) (bool, string) {
+			aggEnv := aggregateEnvironment(cd)
+			for _, node := range nodes {
+				nodeTrim := strings.TrimSpace(node)
+				if cd.NodeName == nodeTrim {
+					return true, aggEnv
+				} else if strings.HasSuffix(nodeTrim, "*") { // trigger wildcard prefix filtering
+					nodeTrimAsterisk := strings.TrimSuffix(nodeTrim, "*")
+					if strings.HasPrefix(cd.NodeName, nodeTrimAsterisk) {
+						return true, aggEnv
+					}
+				}
+			}
+			return false, aggEnv
+		})
+	}
+	if filters["cluster"] != "" {
+		// clusters may be comma-separated, e.g. cluster-one,cluster-two
+		// multiple clusters are evaluated as an OR relationship
+		cs := strings.Split(filters["cluster"], ",")
+		filterFuncs = append(filterFuncs, func(cd *CostData) (bool, string) {
+			aggEnv := aggregateEnvironment(cd)
+			for _, c := range cs {
+				cTrim := strings.TrimSpace(c)
+				id, name := cd.ClusterID, cd.ClusterName
+				if id == cTrim || name == cTrim {
+					return true, aggEnv
+				} else if strings.HasSuffix(cTrim, "*") { // trigger wildcard prefix filtering
+					cTrimAsterisk := strings.TrimSuffix(cTrim, "*")
+					if strings.HasPrefix(id, cTrimAsterisk) || strings.HasPrefix(name, cTrimAsterisk) {
+						return true, aggEnv
+					}
+				}
+			}
+			return false, aggEnv
+		})
+	}
+	if filters["labels"] != "" {
+		// labels are expected to be comma-separated and to take the form key=value
+		// e.g. app=cost-analyzer,app.kubernetes.io/instance=kubecost
+		// each different label will be applied as an AND
+		// multiple values for a single label will be evaluated as an OR
+		labelValues := map[string][]string{}
+		ls := strings.Split(filters["labels"], ",")
+		for _, l := range ls {
+			lTrim := strings.TrimSpace(l)
+			label := strings.Split(lTrim, "=")
+			if len(label) == 2 {
+				ln := SanitizeLabelName(strings.TrimSpace(label[0]))
+				lv := strings.TrimSpace(label[1])
+				labelValues[ln] = append(labelValues[ln], lv)
+			} else {
+				// label is not of the form name=value, so log it and move on
+				klog.V(2).Infof("[Warning] aggregate cost model: skipping illegal label filter: %s", l)
+			}
+		}
+
+		// Generate FilterFunc for each set of label filters by invoking a function instead of accessing
+		// values by closure to prevent reference-type looping bug.
+		// (see https://github.com/golang/go/wiki/CommonMistakes#using-reference-to-loop-iterator-variable)
+		for label, values := range labelValues {
+			ff := (func(l string, vs []string) FilterFunc {
+				return func(cd *CostData) (bool, string) {
+					ae := aggregateEnvironment(cd)
+					for _, v := range vs {
+						if v == "__unallocated__" { // Special case. __unallocated__ means return all pods without the attached label
+							if _, ok := cd.Labels[label]; !ok {
+								return true, ae
+							}
+						}
+						if cd.Labels[label] == v {
+							return true, ae
+						} else if strings.HasSuffix(v, "*") { // trigger wildcard prefix filtering
+							vTrim := strings.TrimSuffix(v, "*")
+							if strings.HasPrefix(cd.Labels[label], vTrim) {
+								return true, ae
+							}
+						}
+					}
+					return false, ae
+				}
+			})(label, values)
+			filterFuncs = append(filterFuncs, ff)
+		}
+	}
+
+	// clear cache prior to checking the cache so that a clearCache=true
+	// request always returns a freshly computed value
+	if clearCache {
+		a.AggregateCache.Flush()
+		a.CostDataCache.Flush()
+	}
+
+	cacheExpiry := a.GetCacheExpiration(duration)
+	if noExpireCache {
+		cacheExpiry = cache.NoExpiration
+	}
+
+	// parametrize cache key by all request parameters
+	aggKey := GenerateAggKey(aggKeyParams{
+		duration:   duration,
+		offset:     offset,
+		filters:    filters,
+		field:      field,
+		subfields:  subfields,
+		rate:       rate,
+		sri:        sri,
+		shareType:  shared,
+		idle:       allocateIdle,
+		timeSeries: includeTimeSeries,
+		efficiency: includeEfficiency,
+	})
+
+	// convert duration and offset to start and end times
+	startTime, endTime, err := ParseTimeRange(duration, offset)
+	if err != nil {
+		return nil, "", fmt.Errorf("Error parsing duration (%s) and offset (%s): %s", duration, offset, err)
+	}
+	durationHours := endTime.Sub(*startTime).Hours()
+
+	thanosOffset := time.Now().Add(-thanos.OffsetDuration())
+	if a.ThanosClient != nil && endTime.After(thanosOffset) {
+		klog.V(4).Infof("Setting end time backwards to first present data")
+
+		// Apply offsets to both end and start times to maintain correct time range
+		deltaDuration := endTime.Sub(thanosOffset)
+		*startTime = startTime.Add(-1 * deltaDuration)
+		*endTime = time.Now().Add(-thanos.OffsetDuration())
+	}
+
+	// determine resolution by size of duration
+	resolutionHours := durationHours
+	if durationHours >= 2160 {
+		// 90 days
+		resolutionHours = 72.0
+	} else if durationHours >= 720 {
+		// 30 days
+		resolutionHours = 24.0
+	} else if durationHours >= 168 {
+		// 7 days
+		resolutionHours = 24.0
+	} else if durationHours >= 48 {
+		// 2 days
+		resolutionHours = 2.0
+	} else if durationHours >= 1 {
+		resolutionHours = 1.0
+	}
+
+	key := fmt.Sprintf(`%s:%s:%fh:%t`, duration, offset, resolutionHours, remoteEnabled)
+
+	// report message about which of the two caches hit. by default report a miss
+	cacheMessage := fmt.Sprintf("L1 cache miss: %s L2 cache miss: %s", aggKey, key)
+
+	// check the cache for aggregated response; if cache is hit and not disabled, return response
+	if value, found := a.AggregateCache.Get(aggKey); found && !disableCache && !noCache {
+		result, ok := value.(map[string]*Aggregation)
+		if !ok {
+			// disable cache and recompute if type cast fails
+			klog.Errorf("caching error: failed to cast aggregate data to struct: %s", aggKey)
+			return a.ComputeAggregateCostModel(promClient, duration, offset, field, subfields, rate, filters,
+				sri, shared, allocateIdle, includeTimeSeries, includeEfficiency, true, false, noExpireCache, noCache, remoteEnabled, disableSharedOverhead)
+		}
+		return result, fmt.Sprintf("aggregate cache hit: %s", aggKey), nil
+	}
+
+	profileStart := time.Now()
+	profileName := profileBaseName + ": "
+
+	window := duration
+	if durationHours >= 1 {
+		window = fmt.Sprintf("%dh", int(resolutionHours))
+		// exclude the last window of the time frame to match Prometheus definitions of range, offset, and resolution
+		*startTime = startTime.Add(time.Duration(resolutionHours) * time.Hour)
+	} else {
+		// don't cache requests for durations of less than one hour
+		klog.Infof("key %s has durationhours %f", key, durationHours)
+		disableCache = true
+	}
+
+	profileBaseName = fmt.Sprintf("ComputeAggregateCostModel(duration=%s, offset=%s, field=%s, window=%s)", duration, offset, field, window)
+
+	// attempt to retrieve cost data from cache
+	var costData map[string]*CostData
+	cacheData, found := a.CostDataCache.Get(key)
+	if found && !disableCache && !noCache {
+		profileName += "get cost data from cache"
+
+		ok := false
+		costData, ok = cacheData.(map[string]*CostData)
+		cacheMessage = fmt.Sprintf("L1 cache miss: %s, L2 cost data cache hit: %s", aggKey, key)
+		if !ok {
+			klog.Errorf("caching error: failed to cast cost data to struct: %s", key)
+		}
+	} else {
+		klog.Infof("key %s missed cache. found %t, disableCache %t, noCache %t ", key, found, disableCache, noCache)
+
+		cv := a.CostDataCache.Items()
+		klog.V(3).Infof("Logging cache items...")
+		for k := range cv {
+			klog.V(3).Infof("Cache item: %s", k)
+		}
+
+		profileName += "compute cost data"
+
+		start := startTime.Format(RFC3339Milli)
+		end := endTime.Format(RFC3339Milli)
+
+		costData, err = a.Model.ComputeCostDataRange(promClient, a.KubeClientSet, a.CloudProvider, start, end, window, resolutionHours, "", "", remoteEnabled, offset)
+		if err != nil {
+			if pce, ok := err.(prom.CommError); ok {
+				return nil, "", pce
+			}
+			if strings.Contains(err.Error(), "data is empty") {
+				return nil, "", &EmptyDataError{err: err, duration: duration, offset: offset}
+			}
+			return nil, "", err
+		}
+
+		// compute length of the time series in the cost data and only compute
+		// aggregates and cache if the length is sufficiently high
+		costDataLen := costDataTimeSeriesLength(costData)
+
+		if durationHours < 1.0 {
+			// scale hourly cost data down to fractional hour
+			costData = ScaleHourlyCostData(costData, resolutionHours)
+		}
+
+		if costDataLen == 0 {
+			return nil, "", &EmptyDataError{duration: duration, offset: offset}
+		}
+		if costDataLen >= minCostDataLength && !noCache {
+			klog.Infof("Setting L2 cache: %s", key)
+			a.CostDataCache.Set(key, costData, cacheExpiry)
+		}
+	}
+
+	measureTime(profileStart, profileThreshold, profileName)
+
+	c, err := a.CloudProvider.GetConfig()
+	if err != nil {
+		return nil, "", err
+	}
+	discount, err := ParsePercentString(c.Discount)
+	if err != nil {
+		return nil, "", err
+	}
+	customDiscount, err := ParsePercentString(c.NegotiatedDiscount)
+	if err != nil {
+		return nil, "", err
+	}
+
+	sc := make(map[string]*SharedCostInfo)
+	if !disableSharedOverhead {
+		for key, val := range c.SharedCosts {
+			cost, err := strconv.ParseFloat(val, 64)
+			durationCoefficient := durationHours / util.HoursPerMonth
+			if err != nil {
+				return nil, "", fmt.Errorf("Unable to parse shared cost %s: %s", val, err.Error())
+			}
+			sc[key] = &SharedCostInfo{
+				Name: key,
+				Cost: cost * durationCoefficient,
+			}
+		}
+	}
+
+	profileStart = time.Now()
+	profileName = profileBaseName + ": compute idle coefficient"
+
+	idleCoefficients := make(map[string]float64)
+	if allocateIdle {
+		idleDurationCalcHours := durationHours
+		if durationHours < 1 {
+			idleDurationCalcHours = 1
+		}
+		windowStr := fmt.Sprintf("%dh", int(idleDurationCalcHours))
+		if a.ThanosClient != nil {
+			offset = thanos.Offset()
+			klog.Infof("Setting offset to %s", offset)
+		}
+		idleCoefficients, err = a.ComputeIdleCoefficient(costData, promClient, a.CloudProvider, discount, customDiscount, windowStr, offset)
+		if err != nil {
+			klog.Errorf("error computing idle coefficient: windowString=%s, offset=%s, err=%s", windowStr, offset, err)
+			return nil, "", err
+		}
+	}
+	for cid, idleCoefficient := range idleCoefficients {
+		klog.Infof("Idle Coeff: %s: %f", cid, idleCoefficient)
+	}
+
+	totalContainerCost := 0.0
+	if shared == SplitTypeWeighted {
+		totalContainerCost = GetTotalContainerCost(costData, rate, a.CloudProvider, discount, customDiscount, idleCoefficients)
+	}
+
+	measureTime(profileStart, profileThreshold, profileName)
+
+	profileStart = time.Now()
+	profileName = profileBaseName + ": filter cost data"
+
+	// filter cost data by namespace and cluster after caching for maximal cache hits
+	costData, filteredContainerCount, filteredEnvironments := FilterCostData(costData, retainFuncs, filterFuncs)
+
+	measureTime(profileStart, profileThreshold, profileName)
+
+	profileStart = time.Now()
+	profileName = profileBaseName + ": aggregate cost data"
+
+	// aggregate cost model data by given fields and cache the result for the default expiration
+	opts := &AggregationOptions{
+		Discount:               discount,
+		CustomDiscount:         customDiscount,
+		IdleCoefficients:       idleCoefficients,
+		IncludeEfficiency:      includeEfficiency,
+		IncludeTimeSeries:      includeTimeSeries,
+		Rate:                   rate,
+		ResolutionHours:        resolutionHours,
+		SharedResourceInfo:     sri,
+		SharedCosts:            sc,
+		FilteredContainerCount: filteredContainerCount,
+		FilteredEnvironments:   filteredEnvironments,
+		TotalContainerCost:     totalContainerCost,
+		SharedSplit:            shared,
+	}
+	result := AggregateCostData(costData, field, subfields, a.CloudProvider, opts)
+
+	// If sending time series data back, switch scale back to hourly data. At this point,
+	// resolutionHours may have converted our hourly data to more- or less-than hourly data.
+	if includeTimeSeries {
+		for _, aggs := range result {
+			ScaleAggregationTimeSeries(aggs, resolutionHours)
+		}
+	}
+
+	// compute length of the time series in the cost data and only cache
+	// aggregation results if the length is sufficiently high
+	costDataLen := costDataTimeSeriesLength(costData)
+	if costDataLen >= minCostDataLength && durationHours > 1 && !noCache {
+		// Set the result map (rather than a pointer to it) because map is a reference type
+		klog.Infof("Caching key in aggregate cache: %s", key)
+		a.AggregateCache.Set(aggKey, result, cacheExpiry)
+	} else {
+		klog.Infof("Not caching for key %s. Not enough data: %t, Duration less than 1h: %t, noCache: %t", key, costDataLen < minCostDataLength, durationHours < 1, noCache)
+	}
+
+	measureTime(profileStart, profileThreshold, profileName)
+
+	return result, cacheMessage, nil
+}
+
+// ScaleAggregationTimeSeries reverses the scaling done by ScaleHourlyCostData, returning
+// the aggregation's time series to hourly data.
+func ScaleAggregationTimeSeries(aggregation *Aggregation, resolutionHours float64) {
+	for _, v := range aggregation.CPUCostVector {
+		v.Value /= resolutionHours
+	}
+
+	for _, v := range aggregation.GPUCostVector {
+		v.Value /= resolutionHours
+	}
+
+	for _, v := range aggregation.RAMCostVector {
+		v.Value /= resolutionHours
+	}
+
+	for _, v := range aggregation.PVCostVector {
+		v.Value /= resolutionHours
+	}
+
+	for _, v := range aggregation.NetworkCostVector {
+		v.Value /= resolutionHours
+	}
+
+	for _, v := range aggregation.TotalCostVector {
+		v.Value /= resolutionHours
+	}
+
+	return
+}
+
+// String returns a string representation of the encapsulated shared resources, which
+// can be used to uniquely identify a set of shared resources. Sorting sets of shared
+// resources ensures that strings representing permutations of the same combination match.
+func (s *SharedResourceInfo) String() string {
+	if s == nil {
+		return ""
+	}
+
+	nss := []string{}
+	for ns := range s.SharedNamespace {
+		nss = append(nss, ns)
+	}
+	sort.Strings(nss)
+	nsStr := strings.Join(nss, ",")
+
+	labels := []string{}
+	for lbl, vals := range s.LabelSelectors {
+		for val := range vals {
+			if lbl != "" && val != "" {
+				labels = append(labels, fmt.Sprintf("%s=%s", lbl, val))
+			}
+		}
+	}
+	sort.Strings(labels)
+	labelStr := strings.Join(labels, ",")
+
+	return fmt.Sprintf("%s:%s", nsStr, labelStr)
+}
+
+type aggKeyParams struct {
+	duration   string
+	offset     string
+	filters    map[string]string
+	field      string
+	subfields  []string
+	rate       string
+	sri        *SharedResourceInfo
+	shareType  string
+	idle       bool
+	timeSeries bool
+	efficiency bool
+}
+
+// GenerateAggKey generates a parameter-unique key for caching the aggregate cost model
+func GenerateAggKey(ps aggKeyParams) string {
+	// parse, trim, and sort podprefix filters
+	podPrefixFilters := []string{}
+	if ppfs, ok := ps.filters["podprefix"]; ok && ppfs != "" {
+		for _, psf := range strings.Split(ppfs, ",") {
+			podPrefixFilters = append(podPrefixFilters, strings.TrimSpace(psf))
+		}
+	}
+	sort.Strings(podPrefixFilters)
+	podPrefixFiltersStr := strings.Join(podPrefixFilters, ",")
+
+	// parse, trim, and sort namespace filters
+	nsFilters := []string{}
+	if nsfs, ok := ps.filters["namespace"]; ok && nsfs != "" {
+		for _, nsf := range strings.Split(nsfs, ",") {
+			nsFilters = append(nsFilters, strings.TrimSpace(nsf))
+		}
+	}
+	sort.Strings(nsFilters)
+	nsFilterStr := strings.Join(nsFilters, ",")
+
+	// parse, trim, and sort node filters
+	nodeFilters := []string{}
+	if nodefs, ok := ps.filters["node"]; ok && nodefs != "" {
+		for _, nodef := range strings.Split(nodefs, ",") {
+			nodeFilters = append(nodeFilters, strings.TrimSpace(nodef))
+		}
+	}
+	sort.Strings(nodeFilters)
+	nodeFilterStr := strings.Join(nodeFilters, ",")
+
+	// parse, trim, and sort cluster filters
+	cFilters := []string{}
+	if cfs, ok := ps.filters["cluster"]; ok && cfs != "" {
+		for _, cf := range strings.Split(cfs, ",") {
+			cFilters = append(cFilters, strings.TrimSpace(cf))
+		}
+	}
+	sort.Strings(cFilters)
+	cFilterStr := strings.Join(cFilters, ",")
+
+	// parse, trim, and sort label filters
+	lFilters := []string{}
+	if lfs, ok := ps.filters["labels"]; ok && lfs != "" {
+		for _, lf := range strings.Split(lfs, ",") {
+			// trim whitespace from the label name and the label value
+			// of each label name/value pair, then reconstruct
+			// e.g. "tier = frontend, app = kubecost" == "app=kubecost,tier=frontend"
+			lfa := strings.Split(lf, "=")
+			if len(lfa) == 2 {
+				lfn := strings.TrimSpace(lfa[0])
+				lfv := strings.TrimSpace(lfa[1])
+				lFilters = append(lFilters, fmt.Sprintf("%s=%s", lfn, lfv))
+			} else {
+				// label is not of the form name=value, so log it and move on
+				klog.V(2).Infof("[Warning] GenerateAggKey: skipping illegal label filter: %s", lf)
+			}
+		}
+	}
+	sort.Strings(lFilters)
+	lFilterStr := strings.Join(lFilters, ",")
+
+	filterStr := fmt.Sprintf("%s:%s:%s:%s:%s", nsFilterStr, nodeFilterStr, cFilterStr, lFilterStr, podPrefixFiltersStr)
+
+	sort.Strings(ps.subfields)
+	fieldStr := fmt.Sprintf("%s:%s", ps.field, strings.Join(ps.subfields, ","))
+
+	return fmt.Sprintf("%s:%s:%s:%s:%s:%s:%s:%t:%t:%t", ps.duration, ps.offset, filterStr, fieldStr, ps.rate,
+		ps.sri, ps.shareType, ps.idle, ps.timeSeries, ps.efficiency)
+}
+
+// AggregateCostModelHandler handles requests to the aggregated cost model API. See
+// ComputeAggregateCostModel for details.
+func (a *Accesses) AggregateCostModelHandler(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	w.Header().Set("Content-Type", "application/json")
+
+	windowStr := r.URL.Query().Get("window")
+
+	// Convert UTC-RFC3339 pairs to configured UTC offset
+	// e.g. with UTC offset of -0600, 2020-07-01T00:00:00Z becomes
+	// 2020-07-01T06:00:00Z == 2020-07-01T00:00:00-0600
+	// TODO niko/etl fix the frontend because this is confusing if you're
+	// actually asking for UTC time (...Z) and we swap that "Z" out for the
+	// configured UTC offset without asking
+	rfc3339 := `\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ`
+	regex := regexp.MustCompile(fmt.Sprintf(`(%s),(%s)`, rfc3339, rfc3339))
+	match := regex.FindStringSubmatch(windowStr)
+	if match != nil {
+		start, _ := time.Parse(time.RFC3339, match[1])
+		start = start.Add(-env.GetParsedUTCOffset()).In(time.UTC)
+		end, _ := time.Parse(time.RFC3339, match[2])
+		end = end.Add(-env.GetParsedUTCOffset()).In(time.UTC)
+		windowStr = fmt.Sprintf("%sZ,%sZ", start.Format("2006-01-02T15:04:05"), end.Format("2006-01-02T15:04:05Z"))
+	}
+
+	// determine duration and offset from query parameters
+	window, err := kubecost.ParseWindowWithOffset(windowStr, env.GetParsedUTCOffset())
+	if err != nil || window.Start() == nil {
+		WriteError(w, BadRequest(fmt.Sprintf("invalid window: %s", err)))
+		return
+	}
+	duration, offset := window.ToDurationOffset()
+
+	durRegex := regexp.MustCompile(`^(\d+)(m|h|d|s)$`)
+	isDurationStr := durRegex.MatchString(windowStr)
+
+	// legacy offset option should override window offset
+	if r.URL.Query().Get("offset") != "" {
+		offset = r.URL.Query().Get("offset")
+		// Shift window by offset, but only when manually set with separate
+		// parameter and window was provided as a duration string. Otherwise,
+		// do not alter the (duration, offset) from ParseWindowWithOffset.
+		if offset != "1m" && isDurationStr {
+			match := durRegex.FindStringSubmatch(offset)
+			if match != nil && len(match) == 3 {
+				dur := time.Minute
+				if match[2] == "h" {
+					dur = time.Hour
+				}
+				if match[2] == "d" {
+					dur = 24 * time.Hour
+				}
+				if match[2] == "s" {
+					dur = time.Second
+				}
+
+				num, _ := strconv.ParseInt(match[1], 10, 64)
+				window = window.Shift(-time.Duration(num) * dur)
+			}
+		}
+	}
+
+	// redirect requests with no offset to a 1m offset to improve cache hits
+	if offset == "" {
+		offset = "1m"
+	}
+
+	// parse remaining query parameters
+	namespace := r.URL.Query().Get("namespace")
+	cluster := r.URL.Query().Get("cluster")
+	labels := r.URL.Query().Get("labels")
+	podprefix := r.URL.Query().Get("podprefix")
+	labelArray := strings.Split(labels, "=")
+	labelArray[0] = strings.ReplaceAll(labelArray[0], "-", "_")
+	labels = strings.Join(labelArray, "=")
+	field := r.URL.Query().Get("aggregation")
+	subfieldStr := r.URL.Query().Get("aggregationSubfield")
+	rate := r.URL.Query().Get("rate")
+	idleFlag := r.URL.Query().Get("allocateIdle")
+	sharedNamespaces := r.URL.Query().Get("sharedNamespaces")
+	sharedLabelNames := r.URL.Query().Get("sharedLabelNames")
+	sharedLabelValues := r.URL.Query().Get("sharedLabelValues")
+	remote := r.URL.Query().Get("remote") != "false"
+	shared := r.URL.Query().Get("sharedSplit")
+	subfields := []string{}
+	if len(subfieldStr) > 0 {
+		s := strings.Split(r.URL.Query().Get("aggregationSubfield"), ",")
+		for _, rawLabel := range s {
+			subfields = append(subfields, SanitizeLabelName(rawLabel))
+		}
+	}
+
+	var allocateIdle bool
+	if idleFlag == "default" {
+		c, _ := a.CloudProvider.GetConfig()
+		allocateIdle = (c.DefaultIdle == "true")
+	} else {
+		allocateIdle = (idleFlag == "true")
+	}
+
+	// timeSeries == true maintains the time series dimension of the data,
+	// which by default gets summed over the entire interval
+	includeTimeSeries := r.URL.Query().Get("timeSeries") == "true"
+
+	// efficiency == true aggregates and returns usage and efficiency data
+	// includeEfficiency := r.URL.Query().Get("efficiency") == "true"
+
+	// efficiency has been deprecated in favor of a default to always send efficiency
+	includeEfficiency := true
+
+	// TODO niko/caching rename "recomputeCache"
+	// disableCache, if set to "true", tells this function to recompute and
+	// cache the requested data
+	disableCache := r.URL.Query().Get("disableCache") == "true"
+
+	// clearCache, if set to "true", tells this function to flush the cache,
+	// then recompute and cache the requested data
+	clearCache := r.URL.Query().Get("clearCache") == "true"
+
+	// noCache avoids the cache altogether, both reading from and writing to
+	noCache := r.URL.Query().Get("noCache") == "true"
+
+	// noExpireCache should only be used by cache warming to set non-expiring caches
+	noExpireCache := false
+
+	// aggregation field is required
+	if field == "" {
+		WriteError(w, BadRequest("Missing aggregation field parameter"))
+		return
+	}
+
+	// aggregation subfield is required when aggregation field is "label"
+	if field == "label" && len(subfields) == 0 {
+		WriteError(w, BadRequest("Missing aggregation subfield parameter for aggregation by label"))
+		return
+	}
+
+	// enforce one of four available rate options
+	if rate != "" && rate != "hourly" && rate != "daily" && rate != "monthly" {
+		WriteError(w, BadRequest("If set, rate parameter must be one of: 'hourly', 'daily', 'monthly'"))
+		return
+	}
+
+	// parse cost data filters
+	// namespace and cluster are exact-string-matches
+	// labels are expected to be comma-separated and to take the form key=value
+	// e.g. app=cost-analyzer,app.kubernetes.io/instance=kubecost
+	filters := map[string]string{
+		"namespace": namespace,
+		"cluster":   cluster,
+		"labels":    labels,
+		"podprefix": podprefix,
+	}
+
+	// parse shared resources
+	sn := []string{}
+	sln := []string{}
+	slv := []string{}
+	if sharedNamespaces != "" {
+		sn = strings.Split(sharedNamespaces, ",")
+	}
+	if sharedLabelNames != "" {
+		sln = strings.Split(sharedLabelNames, ",")
+		slv = strings.Split(sharedLabelValues, ",")
+		if len(sln) != len(slv) || slv[0] == "" {
+			WriteError(w, BadRequest("Supply exacly one shared label value per shared label name"))
+			return
+		}
+	}
+	var sr *SharedResourceInfo
+	if len(sn) > 0 || len(sln) > 0 {
+		sr = NewSharedResourceInfo(true, sn, sln, slv)
+	}
+
+	// enable remote if it is available and not disabled
+	remoteEnabled := remote && env.IsRemoteEnabled()
+
+	// if custom pricing has changed, then clear the cache and recompute data
+	if a.CustomPricingHasChanged() {
+		clearCache = true
+	}
+
+	promClient := a.GetPrometheusClient(remote)
+
+	var data map[string]*Aggregation
+	var message string
+
+	etlEnabled := env.IsETLEnabled()
+	useETLAdapter := r.URL.Query().Get("etl") == "true"
+	if etlEnabled && useETLAdapter {
+		data, message, err = a.AdaptETLAggregateCostModel(window, field, subfields, rate, filters, sr, shared, allocateIdle, includeTimeSeries)
+	} else {
+		data, message, err = a.ComputeAggregateCostModel(promClient, duration, offset, field, subfields, rate, filters,
+			sr, shared, allocateIdle, includeTimeSeries, includeEfficiency, disableCache, clearCache, noCache, noExpireCache, remoteEnabled, false)
+	}
+
+	// Find any warnings in http request context
+	warning, _ := product.GetWarning(r)
+
+	if err != nil {
+		if emptyErr, ok := err.(*EmptyDataError); ok {
+			if warning == "" {
+				WriteDataWithMessage(w, map[string]interface{}{}, emptyErr.Error())
+			} else {
+				WriteDataWithMessageAndWarning(w, map[string]interface{}{}, emptyErr.Error(), warning)
+			}
+			return
+		}
+		if boundaryErr, ok := err.(*kubecost.BoundaryError); ok {
+			if window.Start() != nil && window.Start().After(time.Now().Add(-90*24*time.Hour)) {
+				// Asking for data within a 90 day period: it will be available
+				// after the pipeline builds
+				msg := "Data will be available after ETL is built"
+
+				rex := regexp.MustCompile(`(\d+\.*\d*)%`)
+				match := rex.FindStringSubmatch(boundaryErr.Message)
+				if len(match) > 1 {
+					completionPct, err := strconv.ParseFloat(match[1], 64)
+					if err == nil {
+						msg = fmt.Sprintf("%s (%.1f%% complete)", msg, completionPct)
+					}
+				}
+
+				WriteError(w, InternalServerError(msg))
+			} else {
+				// Boundary error outside of 90 day period; may not be available
+				WriteError(w, InternalServerError(boundaryErr.Error()))
+			}
+			return
+		}
+		errStr := fmt.Sprintf("error computing aggregate cost model: %s", err)
+		WriteError(w, InternalServerError(errStr))
+		return
+	}
+
+	if warning == "" {
+		WriteDataWithMessage(w, data, message)
+	} else {
+		WriteDataWithMessageAndWarning(w, data, message, warning)
+	}
+}

+ 195 - 0
pkg/costmodel/aggregation_test.go

@@ -0,0 +1,195 @@
+package costmodel
+
+import (
+	"testing"
+
+	"github.com/kubecost/cost-model/pkg/util"
+)
+
+func TestScaleHourlyCostData(t *testing.T) {
+	costData := map[string]*CostData{}
+
+	start := 1570000000
+	oneHour := 60 * 60
+
+	generateVectorSeries := func(start, count, interval int, value float64) []*util.Vector {
+		vs := []*util.Vector{}
+		for i := 0; i < count; i++ {
+			v := &util.Vector{
+				Timestamp: float64(start + (i * oneHour)),
+				Value:     value,
+			}
+			vs = append(vs, v)
+		}
+		return vs
+	}
+
+	costData["default"] = &CostData{
+		RAMReq:        generateVectorSeries(start, 100, oneHour, 1.0),
+		RAMUsed:       generateVectorSeries(start, 0, oneHour, 1.0),
+		RAMAllocation: generateVectorSeries(start, 100, oneHour, 107.226),
+		CPUReq:        generateVectorSeries(start, 100, oneHour, 0.00317),
+		CPUUsed:       generateVectorSeries(start, 95, oneHour, 1.0),
+		CPUAllocation: generateVectorSeries(start, 2, oneHour, 123.456),
+		PVCData: []*PersistentVolumeClaimData{
+			{Values: generateVectorSeries(start, 100, oneHour, 1.34)},
+		},
+	}
+
+	compressedData := ScaleHourlyCostData(costData, 10)
+
+	act, ok := compressedData["default"]
+	if !ok {
+		t.Errorf("compressed data should have key \"default\"")
+	}
+
+	// RAMReq
+	if len(act.RAMReq) != 10 {
+		t.Errorf("expected RAMReq to have length %d, was actually %d", 10, len(act.RAMReq))
+	}
+	for _, val := range act.RAMReq {
+		if val.Value != 10.0 {
+			t.Errorf("expected each RAMReq Vector to have Value %f, was actually %f", 10.0, val.Value)
+		}
+	}
+
+	// RAMUsed
+	if len(act.RAMUsed) != 0 {
+		t.Errorf("expected RAMUsed to have length %d, was actually %d", 0, len(act.RAMUsed))
+	}
+
+	// RAMAllocation
+	if len(act.RAMAllocation) != 10 {
+		t.Errorf("expected RAMAllocation to have length %d, was actually %d", 10, len(act.RAMAllocation))
+	}
+	for _, val := range act.RAMAllocation {
+		if val.Value != 1072.26 {
+			t.Errorf("expected each RAMAllocation Vector to have Value %f, was actually %f", 1072.26, val.Value)
+		}
+	}
+
+	// CPUReq
+	if len(act.CPUReq) != 10 {
+		t.Errorf("expected CPUReq to have length %d, was actually %d", 10, len(act.CPUReq))
+	}
+	for _, val := range act.CPUReq {
+		if val.Value != 0.0317 {
+			t.Errorf("expected each CPUReq Vector to have Value %f, was actually %f", 0.0317, val.Value)
+		}
+	}
+
+	// CPUUsed
+	if len(act.CPUUsed) != 10 {
+		t.Errorf("expected CPUUsed to have length %d, was actually %d", 10, len(act.CPUUsed))
+	}
+	for _, val := range act.CPUUsed[:len(act.CPUUsed)-1] {
+		if val.Value != 10.0 {
+			t.Errorf("expected each CPUUsed Vector to have Value %f, was actually %f", 10.0, val.Value)
+		}
+	}
+	if act.CPUUsed[len(act.CPUUsed)-1].Value != 5.0 {
+		t.Errorf("expected each CPUUsed Vector to have Value %f, was actually %f", 5.0, act.CPUUsed[len(act.CPUUsed)-1].Value)
+	}
+
+	// CPUAllocation
+	if len(act.CPUAllocation) != 1 {
+		t.Errorf("expected CPUAllocation to have length %d, was actually %d", 1, len(act.CPUAllocation))
+	}
+	if act.CPUAllocation[0].Value != 246.912 {
+		t.Errorf("expected each CPUAllocation Vector to have Value %f, was actually %f", 246.912, act.CPUAllocation[len(act.CPUAllocation)-1].Value)
+	}
+
+	// PVCData
+	if len(act.PVCData[0].Values) != 10 {
+		t.Errorf("expected PVCData[0] to have length %d, was actually %d", 10, len(act.PVCData[0].Values))
+	}
+	for _, val := range act.PVCData[0].Values {
+		if val.Value != 13.4 {
+			t.Errorf("expected each PVCData[0] Vector to have Value %f, was actually %f", 13.4, val.Value)
+		}
+	}
+
+	costData["default"] = &CostData{
+		RAMReq:        generateVectorSeries(start, 100, oneHour, 1.0),
+		RAMUsed:       generateVectorSeries(start, 0, oneHour, 1.0),
+		RAMAllocation: generateVectorSeries(start, 100, oneHour, 107.226),
+		CPUReq:        generateVectorSeries(start, 100, oneHour, 0.00317),
+		CPUUsed:       generateVectorSeries(start, 95, oneHour, 1.0),
+		CPUAllocation: generateVectorSeries(start, 2, oneHour, 124.6),
+		PVCData: []*PersistentVolumeClaimData{
+			{Values: generateVectorSeries(start, 100, oneHour, 1.34)},
+		},
+	}
+
+	scaledData := ScaleHourlyCostData(costData, 0.1)
+
+	act, ok = scaledData["default"]
+	if !ok {
+		t.Errorf("scaled data should have key \"default\"")
+	}
+
+	// RAMReq
+	if len(act.RAMReq) != 100 {
+		t.Errorf("expected RAMReq to have length %d, was actually %d", 100, len(act.RAMReq))
+	}
+	for _, val := range act.RAMReq {
+		if val.Value != 0.1 {
+			t.Errorf("expected each RAMReq Vector to have Value %f, was actually %f", 0.1, val.Value)
+		}
+	}
+
+	// RAMUsed
+	if len(act.RAMUsed) != 0 {
+		t.Errorf("expected RAMUsed to have length %d, was actually %d", 0, len(act.RAMUsed))
+	}
+
+	// RAMAllocation
+	if len(act.RAMAllocation) != 100 {
+		t.Errorf("expected RAMAllocation to have length %d, was actually %d", 100, len(act.RAMAllocation))
+	}
+	for _, val := range act.RAMAllocation {
+		if val.Value != 10.7226 {
+			t.Errorf("expected each RAMAllocation Vector to have Value %f, was actually %f", 10.7226, val.Value)
+		}
+	}
+
+	// CPUReq
+	if len(act.CPUReq) != 100 {
+		t.Errorf("expected CPUReq to have length %d, was actually %d", 100, len(act.CPUReq))
+	}
+	for _, val := range act.CPUReq {
+		if val.Value != 0.000317 {
+			t.Errorf("expected each CPUReq Vector to have Value %f, was actually %f", 0.000317, val.Value)
+		}
+	}
+
+	// CPUUsed
+	if len(act.CPUUsed) != 95 {
+		t.Errorf("expected CPUUsed to have length %d, was actually %d", 95, len(act.CPUUsed))
+	}
+	for _, val := range act.CPUUsed {
+		if val.Value != 0.1 {
+			t.Errorf("expected each CPUUsed Vector to have Value %f, was actually %f", 0.1, val.Value)
+		}
+	}
+
+	// CPUAllocation
+	if len(act.CPUAllocation) != 2 {
+		t.Errorf("expected CPUAllocation to have length %d, was actually %d", 2, len(act.CPUAllocation))
+	}
+	for _, val := range act.CPUAllocation {
+		if val.Value != 12.46 {
+			t.Errorf("expected each CPUAllocation Vector to have Value %f, was actually %f", 12.46, val.Value)
+		}
+	}
+
+	// PVCData
+	if len(act.PVCData[0].Values) != 100 {
+		t.Errorf("expected PVCData[0] to have length %d, was actually %d", 100, len(act.PVCData[0].Values))
+	}
+	for _, val := range act.PVCData[0].Values {
+		if val.Value != .134 {
+			t.Errorf("expected each PVCData[0] Vector to have Value %f, was actually %f", .134, val.Value)
+		}
+	}
+}

+ 4 - 4
pkg/costmodel/cluster.go

@@ -25,13 +25,13 @@ const (
 	  ) by (cluster_id)`
 
 	queryStorage = `sum(
-		avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, cluster_id) * 730 
+		avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, cluster_id) * 730
 		* avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
 	  ) by (cluster_id) %s`
 
 	queryTotal = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 +
 	  sum(
-		avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730 
+		avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730
 		* avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
 	  ) by (cluster_id) %s`
 
@@ -938,7 +938,7 @@ func ClusterLoadBalancers(cp cloud.Provider, client prometheus.Client, duration,
 }
 
 // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
-func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
+func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
 	// Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
 	start, end, err := util.ParseTimeRange(window, offset)
 	if err != nil {
@@ -1094,7 +1094,7 @@ func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, wind
 
 	// Determine combined discount
 	discount, customDiscount := 0.0, 0.0
-	c, err := A.Cloud.GetConfig()
+	c, err := a.CloudProvider.GetConfig()
 	if err == nil {
 		discount, err = ParsePercentString(c.Discount)
 		if err != nil {

+ 7 - 7
pkg/costmodel/metrics.go

@@ -433,16 +433,16 @@ func StartCostModelMetricRecording(a *Accesses) bool {
 				podStatus[pod.Name] = pod.Status.Phase
 			}
 
-			cfg, _ := a.Cloud.GetConfig()
+			cfg, _ := a.CloudProvider.GetConfig()
 
-			provisioner, clusterManagementCost, err := a.Cloud.ClusterManagementPricing()
+			provisioner, clusterManagementCost, err := a.CloudProvider.ClusterManagementPricing()
 			if err != nil {
 				klog.V(1).Infof("Error getting cluster management cost %s", err.Error())
 			}
 			a.ClusterManagementCostRecorder.WithLabelValues(provisioner).Set(clusterManagementCost)
 
 			// Record network pricing at global scope
-			networkCosts, err := a.Cloud.NetworkPricing()
+			networkCosts, err := a.CloudProvider.NetworkPricing()
 			if err != nil {
 				klog.V(4).Infof("Failed to retrieve network costs: %s", err.Error())
 			} else {
@@ -451,14 +451,14 @@ func StartCostModelMetricRecording(a *Accesses) bool {
 				a.NetworkInternetEgressRecorder.Set(networkCosts.InternetNetworkEgressCost)
 			}
 
-			data, err := a.Model.ComputeCostData(a.PrometheusClient, a.KubeClientSet, a.Cloud, "2m", "", "")
+			data, err := a.Model.ComputeCostData(a.PrometheusClient, a.KubeClientSet, a.CloudProvider, "2m", "", "")
 			if err != nil {
 				klog.V(1).Info("Error in price recording: " + err.Error())
 				// zero the for loop so the time.Sleep will still work
 				data = map[string]*CostData{}
 			}
 
-			nodes, err := a.Model.GetNodeCost(a.Cloud)
+			nodes, err := a.Model.GetNodeCost(a.CloudProvider)
 			for nodeName, node := range nodes {
 				// Emit costs, guarding against NaN inputs for custom pricing.
 				cpuCost, _ := strconv.ParseFloat(node.VCPUCost, 64)
@@ -512,7 +512,7 @@ func StartCostModelMetricRecording(a *Accesses) bool {
 				nodeSeen[labelKey] = true
 			}
 
-			loadBalancers, err := a.Model.GetLBCost(a.Cloud)
+			loadBalancers, err := a.Model.GetLBCost(a.CloudProvider)
 			for lbKey, lb := range loadBalancers {
 				// TODO: parse (if necessary) and calculate cost associated with loadBalancer based on dynamic cloud prices fetched into each lb struct on GetLBCost() call
 				keyParts := getLabelStringsFromKey(lbKey)
@@ -594,7 +594,7 @@ func StartCostModelMetricRecording(a *Accesses) bool {
 						Region:     region,
 						Parameters: parameters,
 					}
-					GetPVCost(cacPv, pv, a.Cloud, region)
+					GetPVCost(cacPv, pv, a.CloudProvider, region)
 					c, _ := strconv.ParseFloat(cacPv.Cost, 64)
 					a.PersistentVolumePriceRecorder.WithLabelValues(pv.Name, pv.Name, cacPv.ProviderID).Set(c)
 					labelKey := getKeyFromLabelStrings(pv.Name, pv.Name)

+ 317 - 180
pkg/costmodel/router.go

@@ -17,12 +17,13 @@ import (
 
 	sentry "github.com/getsentry/sentry-go"
 
-	costAnalyzerCloud "github.com/kubecost/cost-model/pkg/cloud"
+	"github.com/kubecost/cost-model/pkg/cloud"
 	"github.com/kubecost/cost-model/pkg/clustercache"
 	cm "github.com/kubecost/cost-model/pkg/clustermanager"
 	"github.com/kubecost/cost-model/pkg/costmodel/clusters"
 	"github.com/kubecost/cost-model/pkg/env"
 	"github.com/kubecost/cost-model/pkg/errors"
+	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/prom"
 	"github.com/kubecost/cost-model/pkg/thanos"
 	prometheusClient "github.com/prometheus/client_golang/api"
@@ -41,6 +42,10 @@ import (
 const (
 	prometheusTroubleshootingEp = "http://docs.kubecost.com/custom-prom#troubleshoot"
 	RFC3339Milli                = "2006-01-02T15:04:05.000Z"
+	maxCacheMinutes1d           = 11
+	maxCacheMinutes2d           = 17
+	maxCacheMinutes7d           = 37
+	maxCacheMinutes30d          = 137
 )
 
 var (
@@ -48,16 +53,16 @@ var (
 	gitCommit string
 )
 
-var Router = httprouter.New()
-var A Accesses
-
+// Accesses defines a singleton application instance, providing access to
+// Prometheus, Kubernetes, the cloud provider, and caches.
 type Accesses struct {
+	Router                        *httprouter.Router
 	PrometheusClient              prometheusClient.Client
 	ThanosClient                  prometheusClient.Client
 	KubeClientSet                 kubernetes.Interface
 	ClusterManager                *cm.ClusterManager
 	ClusterMap                    clusters.ClusterMap
-	Cloud                         costAnalyzerCloud.Provider
+	CloudProvider                 cloud.Provider
 	CPUPriceRecorder              *prometheus.GaugeVec
 	RAMPriceRecorder              *prometheus.GaugeVec
 	PersistentVolumePriceRecorder *prometheus.GaugeVec
@@ -77,6 +82,64 @@ type Accesses struct {
 	DeploymentSelectorRecorder    *prometheus.GaugeVec
 	Model                         *CostModel
 	OutOfClusterCache             *cache.Cache
+	AggregateCache                *cache.Cache
+	CostDataCache                 *cache.Cache
+	ClusterCostsCache             *cache.Cache
+	CacheExpiration               map[string]time.Duration
+}
+
+// GetPrometheusClient decides whether the default Prometheus client or the Thanos client
+// should be used.
+func (a *Accesses) GetPrometheusClient(remote bool) prometheusClient.Client {
+	// Use Thanos Client if it exists (enabled) and remote flag set
+	var pc prometheusClient.Client
+
+	if remote && a.ThanosClient != nil {
+		pc = a.ThanosClient
+	} else {
+		pc = a.PrometheusClient
+	}
+
+	return pc
+}
+
+// GetCacheExpiration looks up and returns custom cache expiration for the given duration.
+// If one does not exists, it returns the default cache expiration, which is defined by
+// the particular cache.
+func (a *Accesses) GetCacheExpiration(dur string) time.Duration {
+	if expiration, ok := a.CacheExpiration[dur]; ok {
+		return expiration
+	}
+	return cache.DefaultExpiration
+}
+
+// GetCacheRefresh determines how long to wait before refreshing the cache for the given duration,
+// which is done 1 minute before we expect the cache to expire, or 1 minute if expiration is
+// not found or is less than 2 minutes.
+func (a *Accesses) GetCacheRefresh(dur string) time.Duration {
+	expiry := a.GetCacheExpiration(dur).Minutes()
+	if expiry <= 2.0 {
+		return time.Minute
+	}
+	mins := time.Duration(expiry/2.0) * time.Minute
+	return mins
+}
+
+func (a *Accesses) ClusterCostsFromCacheHandler(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	w.Header().Set("Content-Type", "application/json")
+
+	durationHrs := "24h"
+	offset := "1m"
+	pClient := a.GetPrometheusClient(true)
+
+	key := fmt.Sprintf("%s:%s", durationHrs, offset)
+	if data, valid := a.ClusterCostsCache.Get(key); valid {
+		clusterCosts := data.(map[string]*ClusterCosts)
+		w.Write(WrapDataWithMessage(clusterCosts, nil, "clusterCosts cache hit"))
+	} else {
+		data, err := a.ComputeClusterCosts(pClient, a.CloudProvider, durationHrs, offset, true)
+		w.Write(WrapDataWithMessage(data, err, fmt.Sprintf("clusterCosts cache miss: %s", key)))
+	}
 }
 
 type DataEnvelope struct {
@@ -295,7 +358,7 @@ func (a *Accesses) RefreshPricingData(w http.ResponseWriter, r *http.Request, ps
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	err := a.Cloud.DownloadPricingData()
+	err := a.CloudProvider.DownloadPricingData()
 
 	w.Write(WrapData(nil, err))
 }
@@ -313,7 +376,7 @@ func (a *Accesses) CostDataModel(w http.ResponseWriter, r *http.Request, ps http
 		offset = "offset " + offset
 	}
 
-	data, err := a.Model.ComputeCostData(a.PrometheusClient, a.KubeClientSet, a.Cloud, window, offset, namespace)
+	data, err := a.Model.ComputeCostData(a.PrometheusClient, a.KubeClientSet, a.CloudProvider, window, offset, namespace)
 
 	if fields != "" {
 		filteredData := filterFields(fields, data)
@@ -346,7 +409,7 @@ func (a *Accesses) ClusterCosts(w http.ResponseWriter, r *http.Request, ps httpr
 		client = a.PrometheusClient
 	}
 
-	data, err := ComputeClusterCosts(client, a.Cloud, window, offset, true)
+	data, err := a.ComputeClusterCosts(client, a.CloudProvider, window, offset, true)
 	w.Write(WrapData(data, err))
 }
 
@@ -359,7 +422,7 @@ func (a *Accesses) ClusterCostsOverTime(w http.ResponseWriter, r *http.Request,
 	window := r.URL.Query().Get("window")
 	offset := r.URL.Query().Get("offset")
 
-	data, err := ClusterCostsOverTime(a.PrometheusClient, a.Cloud, start, end, window, offset)
+	data, err := ClusterCostsOverTime(a.PrometheusClient, a.CloudProvider, start, end, window, offset)
 	w.Write(WrapData(data, err))
 }
 
@@ -386,7 +449,7 @@ func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps
 	}
 
 	resolutionHours := 1.0
-	data, err := a.Model.ComputeCostDataRange(pClient, a.KubeClientSet, a.Cloud, start, end, window, resolutionHours, namespace, cluster, remoteEnabled, "")
+	data, err := a.Model.ComputeCostDataRange(pClient, a.KubeClientSet, a.CloudProvider, start, end, window, resolutionHours, namespace, cluster, remoteEnabled, "")
 	if err != nil {
 		w.Write(WrapData(nil, err))
 	}
@@ -398,55 +461,6 @@ func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps
 	}
 }
 
-// CostDataModelRangeLarge is experimental multi-cluster and long-term data storage in SQL support.
-func (a *Accesses) CostDataModelRangeLarge(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
-	w.Header().Set("Content-Type", "application/json")
-	w.Header().Set("Access-Control-Allow-Origin", "*")
-
-	startString := r.URL.Query().Get("start")
-	endString := r.URL.Query().Get("end")
-	windowString := r.URL.Query().Get("window")
-
-	var start time.Time
-	var end time.Time
-	var err error
-
-	if windowString == "" {
-		windowString = "1h"
-	}
-	if startString != "" {
-		start, err = time.Parse(RFC3339Milli, startString)
-		if err != nil {
-			klog.V(1).Infof("Error parsing time " + startString + ". Error: " + err.Error())
-			w.Write(WrapData(nil, err))
-		}
-	} else {
-		window, err := time.ParseDuration(windowString)
-		if err != nil {
-			w.Write(WrapData(nil, fmt.Errorf("Invalid duration '%s'", windowString)))
-
-		}
-		start = time.Now().Add(-2 * window)
-	}
-	if endString != "" {
-		end, err = time.Parse(RFC3339Milli, endString)
-		if err != nil {
-			klog.V(1).Infof("Error parsing time " + endString + ". Error: " + err.Error())
-			w.Write(WrapData(nil, err))
-		}
-	} else {
-		end = time.Now()
-	}
-
-	remoteLayout := "2006-01-02T15:04:05Z"
-	remoteStartStr := start.Format(remoteLayout)
-	remoteEndStr := end.Format(remoteLayout)
-	klog.V(1).Infof("Using remote database for query from %s to %s with window %s", startString, endString, windowString)
-
-	data, err := CostDataRangeFromSQL("", "", windowString, remoteStartStr, remoteEndStr)
-	w.Write(WrapData(data, err))
-}
-
 func parseAggregations(customAggregation, aggregator, filterType string) (string, []string, string) {
 	var key string
 	var filter string
@@ -467,106 +481,41 @@ func parseAggregations(customAggregation, aggregator, filterType string) (string
 	return key, val, filter
 }
 
-func (a *Accesses) OutofClusterCosts(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) GetAllNodePricing(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	start := r.URL.Query().Get("start")
-	end := r.URL.Query().Get("end")
-	aggregator := r.URL.Query().Get("aggregator")
-	customAggregation := r.URL.Query().Get("customAggregation")
-	filterType := r.URL.Query().Get("filterType")
-	filterValue := r.URL.Query().Get("filterValue")
-	var data []*costAnalyzerCloud.OutOfClusterAllocation
-	var err error
-	_, aggregations, filter := parseAggregations(customAggregation, aggregator, filterType)
-	data, err = a.Cloud.ExternalAllocations(start, end, aggregations, filter, filterValue, false)
+	data, err := a.CloudProvider.AllNodePricing()
 	w.Write(WrapData(data, err))
 }
 
-func (a *Accesses) OutOfClusterCostsWithCache(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) GetConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
-
-	// start date for which to query costs, inclusive; format YYYY-MM-DD
-	start := r.URL.Query().Get("start")
-	// end date for which to query costs, inclusive; format YYYY-MM-DD
-	end := r.URL.Query().Get("end")
-	// aggregator sets the field by which to aggregate; default, prepended by "kubernetes_"
-	kubernetesAggregation := r.URL.Query().Get("aggregator")
-	// customAggregation allows full customization of aggregator w/o prepending
-	customAggregation := r.URL.Query().Get("customAggregation")
-	// disableCache, if set to "true", tells this function to recompute and
-	// cache the requested data
-	disableCache := r.URL.Query().Get("disableCache") == "true"
-	// clearCache, if set to "true", tells this function to flush the cache,
-	// then recompute and cache the requested data
-	clearCache := r.URL.Query().Get("clearCache") == "true"
-
-	filterType := r.URL.Query().Get("filterType")
-	filterValue := r.URL.Query().Get("filterValue")
-
-	aggregationkey, aggregation, filter := parseAggregations(customAggregation, kubernetesAggregation, filterType)
-
-	// clear cache prior to checking the cache so that a clearCache=true
-	// request always returns a freshly computed value
-	if clearCache {
-		a.OutOfClusterCache.Flush()
-	}
-
-	// attempt to retrieve cost data from cache
-	key := fmt.Sprintf(`%s:%s:%s:%s:%s`, start, end, aggregationkey, filter, filterValue)
-	if value, found := a.OutOfClusterCache.Get(key); found && !disableCache {
-		if data, ok := value.([]*costAnalyzerCloud.OutOfClusterAllocation); ok {
-			w.Write(WrapDataWithMessage(data, nil, fmt.Sprintf("out of cluster cache hit: %s", key)))
-			return
-		}
-		klog.Errorf("caching error: failed to type cast data: %s", key)
-	}
-
-	data, err := a.Cloud.ExternalAllocations(start, end, aggregation, filter, filterValue, false)
-	if err == nil {
-		a.OutOfClusterCache.Set(key, data, cache.DefaultExpiration)
-	}
-
-	w.Write(WrapDataWithMessage(data, err, fmt.Sprintf("out of cluser cache miss: %s", key)))
-}
-
-func (p *Accesses) GetAllNodePricing(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
-	w.Header().Set("Content-Type", "application/json")
-	w.Header().Set("Access-Control-Allow-Origin", "*")
-
-	data, err := p.Cloud.AllNodePricing()
-	w.Write(WrapData(data, err))
-}
-
-func (p *Accesses) GetConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
-	w.Header().Set("Content-Type", "application/json")
-	w.Header().Set("Access-Control-Allow-Origin", "*")
-	data, err := p.Cloud.GetConfig()
+	data, err := a.CloudProvider.GetConfig()
 	w.Write(WrapData(data, err))
 }
 
-func (p *Accesses) UpdateSpotInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) UpdateSpotInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
-	data, err := p.Cloud.UpdateConfig(r.Body, costAnalyzerCloud.SpotInfoUpdateType)
+	data, err := a.CloudProvider.UpdateConfig(r.Body, cloud.SpotInfoUpdateType)
 	if err != nil {
 		w.Write(WrapData(data, err))
 		return
 	}
 	w.Write(WrapData(data, err))
-	err = p.Cloud.DownloadPricingData()
+	err = a.CloudProvider.DownloadPricingData()
 	if err != nil {
 		klog.V(1).Infof("Error redownloading data on config update: %s", err.Error())
 	}
 	return
 }
 
-func (p *Accesses) UpdateAthenaInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) UpdateAthenaInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
-	data, err := p.Cloud.UpdateConfig(r.Body, costAnalyzerCloud.AthenaInfoUpdateType)
+	data, err := a.CloudProvider.UpdateConfig(r.Body, cloud.AthenaInfoUpdateType)
 	if err != nil {
 		w.Write(WrapData(data, err))
 		return
@@ -575,10 +524,10 @@ func (p *Accesses) UpdateAthenaInfoConfigs(w http.ResponseWriter, r *http.Reques
 	return
 }
 
-func (p *Accesses) UpdateBigQueryInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) UpdateBigQueryInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
-	data, err := p.Cloud.UpdateConfig(r.Body, costAnalyzerCloud.BigqueryUpdateType)
+	data, err := a.CloudProvider.UpdateConfig(r.Body, cloud.BigqueryUpdateType)
 	if err != nil {
 		w.Write(WrapData(data, err))
 		return
@@ -587,10 +536,10 @@ func (p *Accesses) UpdateBigQueryInfoConfigs(w http.ResponseWriter, r *http.Requ
 	return
 }
 
-func (p *Accesses) UpdateConfigByKey(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) UpdateConfigByKey(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
-	data, err := p.Cloud.UpdateConfig(r.Body, "")
+	data, err := a.CloudProvider.UpdateConfig(r.Body, "")
 	if err != nil {
 		w.Write(WrapData(data, err))
 		return
@@ -599,11 +548,11 @@ func (p *Accesses) UpdateConfigByKey(w http.ResponseWriter, r *http.Request, ps
 	return
 }
 
-func (p *Accesses) ManagementPlatform(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) ManagementPlatform(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	data, err := p.Cloud.GetManagementPlatform()
+	data, err := a.CloudProvider.GetManagementPlatform()
 	if err != nil {
 		w.Write(WrapData(data, err))
 		return
@@ -612,45 +561,206 @@ func (p *Accesses) ManagementPlatform(w http.ResponseWriter, r *http.Request, ps
 	return
 }
 
-func (p *Accesses) ClusterInfo(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) ClusterInfo(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	data := GetClusterInfo(p.KubeClientSet, p.Cloud)
+	data := GetClusterInfo(a.KubeClientSet, a.CloudProvider)
 
 	w.Write(WrapData(data, nil))
 }
 
-func (p *Accesses) GetClusterInfoMap(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+func (a *Accesses) GetClusterInfoMap(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	data := p.ClusterMap.AsMap()
+	data := a.ClusterMap.AsMap()
 
 	w.Write(WrapData(data, nil))
 }
 
-func (p *Accesses) GetServiceAccountStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
+func (a *Accesses) GetServiceAccountStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	w.Write(WrapData(A.Cloud.ServiceAccountStatus(), nil))
+	w.Write(WrapData(a.CloudProvider.ServiceAccountStatus(), nil))
 }
 
-func (p *Accesses) GetPricingSourceStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
+func (a *Accesses) GetPricingSourceStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	w.Write(WrapData(A.Cloud.PricingSourceStatus(), nil))
+	w.Write(WrapData(a.CloudProvider.PricingSourceStatus(), nil))
 }
 
-func (p *Accesses) GetPrometheusMetadata(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
+func (a *Accesses) GetPrometheusMetadata(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
 
-	w.Write(WrapData(prom.Validate(p.PrometheusClient)))
+	w.Write(WrapData(prom.Validate(a.PrometheusClient)))
 }
 
+// func warmAggregateCostModelCache() {
+// 	// Only allow one concurrent cache-warming operation
+// 	sem := util.NewSemaphore(1)
+
+// 	// Set default values, pulling them from application settings where applicable, and warm the cache
+// 	// for the given duration. Cache is intentionally set to expire (i.e. noExpireCache=false) so that
+// 	// if the default parameters change, the old cached defaults with eventually expire. Thus, the
+// 	// timing of the cache expiry/refresh is the only mechanism ensuring 100% cache warmth.
+// 	warmFunc := func(duration, durationHrs, offset string, cacheEfficiencyData bool) (error, error) {
+// 		field := "namespace"
+// 		subfields := []string{}
+// 		rate := ""
+// 		filters := map[string]string{}
+// 		includeTimeSeries := false
+// 		includeEfficiency := true
+// 		disableCache := true
+// 		clearCache := false
+// 		noCache := false
+// 		noExpireCache := false
+// 		remote := true
+// 		shareSplit := "weighted"
+// 		remoteAvailable := env.IsRemoteEnabled()
+// 		remoteEnabled := remote && remoteAvailable
+// 		promClient := a.OpenSource.GetPrometheusClient(remote)
+// 		allocateIdle := provider.AllocateIdleByDefault(a.CloudProvider)
+
+// 		sharedNamespaces := provider.SharedNamespaces(a.CloudProvider)
+// 		sharedLabelNames, sharedLabelValues := provider.SharedLabels(a.CloudProvider)
+
+// 		var sri *costmodel.SharedResourceInfo
+// 		if len(sharedNamespaces) > 0 || len(sharedLabelNames) > 0 {
+// 			sri = costmodel.NewSharedResourceInfo(true, sharedNamespaces, sharedLabelNames, sharedLabelValues)
+// 		}
+
+// 		aggKey := costmodel.GenerateAggKey(aggKeyParams{
+// 			duration:   duration,
+// 			offset:     offset,
+// 			filters:    filters,
+// 			field:      field,
+// 			subfields:  subfields,
+// 			rate:       rate,
+// 			sri:        sri,
+// 			shareType:  shareSplit,
+// 			idle:       allocateIdle,
+// 			timeSeries: includeTimeSeries,
+// 			efficiency: includeEfficiency,
+// 		})
+// 		klog.V(3).Infof("[Info] aggregation: cache warming defaults: %s", aggKey)
+// 		key := fmt.Sprintf("%s:%s", durationHrs, offset)
+
+// 		_, _, aggErr := costmodel.ComputeAggregateCostModel(promClient, duration, offset, field, subfields, rate, filters,
+// 			sri, shareSplit, allocateIdle, includeTimeSeries, includeEfficiency, disableCache,
+// 			clearCache, noCache, noExpireCache, remoteEnabled, false)
+// 		if aggErr != nil {
+// 			klog.Infof("Error building cache %s: %s", key, aggErr)
+// 		}
+// 		if costmodel.A.ThanosClient != nil {
+// 			offset = thanos.Offset()
+// 			klog.Infof("Setting offset to %s", offset)
+// 		}
+// 		totals, err := costmodel.ComputeClusterCosts(promClient, A.OpenSource.CloudProvider, durationHrs, offset, cacheEfficiencyData)
+// 		if err != nil {
+// 			klog.Infof("Error building cluster costs cache %s", key)
+// 		}
+// 		maxMinutesWithData := 0.0
+// 		for _, cluster := range totals {
+// 			if cluster.DataMinutes > maxMinutesWithData {
+// 				maxMinutesWithData = cluster.DataMinutes
+// 			}
+// 		}
+// 		if len(totals) > 0 && maxMinutesWithData > clusterCostsCacheTime {
+
+// 			A.ClusterCostsCache.Set(key, totals, A.GetCacheExpiration(duration))
+// 			klog.V(3).Infof("[Info] caching %s cluster costs for %s", duration, A.GetCacheExpiration(duration))
+// 		} else {
+// 			klog.V(2).Infof("[Warning] not caching %s cluster costs: no data or less than %f minutes data ", duration, clusterCostsCacheTime)
+// 		}
+// 		return aggErr, err
+// 	}
+
+// 	// 1 day
+// 	go func(sem *util.Semaphore) {
+// 		defer errors.HandlePanic()
+
+// 		for {
+// 			duration := "1d"
+// 			offset := "1m"
+// 			durHrs := "24h"
+
+// 			sem.Acquire()
+// 			warmFunc(duration, durHrs, offset, true)
+// 			sem.Return()
+
+// 			klog.V(3).Infof("aggregation: warm cache: %s", duration)
+// 			time.Sleep(A.GetCacheRefresh(duration))
+// 		}
+// 	}(sem)
+
+// 	// 2 day
+// 	go func(sem *util.Semaphore) {
+// 		defer errors.HandlePanic()
+
+// 		for {
+// 			duration := "2d"
+// 			offset := "1m"
+// 			durHrs := "48h"
+
+// 			sem.Acquire()
+// 			warmFunc(duration, durHrs, offset, false)
+// 			sem.Return()
+
+// 			klog.V(3).Infof("aggregation: warm cache: %s", duration)
+// 			time.Sleep(A.GetCacheRefresh(duration))
+// 		}
+// 	}(sem)
+
+// 	if !env.IsETLEnabled() {
+// 		// 7 day
+// 		go func(sem *util.Semaphore) {
+// 			defer errors.HandlePanic()
+
+// 			for {
+// 				duration := "7d"
+// 				offset := "1m"
+// 				durHrs := "168h"
+
+// 				sem.Acquire()
+// 				aggErr, err := warmFunc(duration, durHrs, offset, false)
+// 				sem.Return()
+
+// 				klog.V(3).Infof("aggregation: warm cache: %s", duration)
+// 				if aggErr == nil && err == nil {
+// 					time.Sleep(A.GetCacheRefresh(duration))
+// 				} else {
+// 					time.Sleep(5 * time.Minute)
+// 				}
+// 			}
+// 		}(sem)
+
+// 		// 30 day
+// 		go func(sem *util.Semaphore) {
+// 			defer errors.HandlePanic()
+
+// 			for {
+// 				duration := "30d"
+// 				offset := "1m"
+// 				durHrs := "720h"
+
+// 				sem.Acquire()
+// 				aggErr, err := warmFunc(duration, durHrs, offset, false)
+// 				sem.Return()
+// 				if aggErr == nil && err == nil {
+// 					time.Sleep(A.GetCacheRefresh(duration))
+// 				} else {
+// 					time.Sleep(5 * time.Minute)
+// 				}
+// 			}
+// 		}(sem)
+// 	}
+// }
+
 // Creates a new ClusterManager instance using a boltdb storage. If that fails,
 // then we fall back to a memory-only storage.
 func newClusterManager() *cm.ClusterManager {
@@ -715,7 +825,7 @@ func handlePanic(p errors.Panic) bool {
 	return p.Type == errors.PanicTypeHTTP
 }
 
-func Initialize(additionalConfigWatchers ...ConfigWatchers) {
+func Initialize(additionalConfigWatchers ...ConfigWatchers) *Accesses {
 	klog.InitFlags(nil)
 	flag.Set("v", "3")
 	flag.Parse()
@@ -815,7 +925,7 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 	k8sCache.Run()
 
 	cloudProviderKey := env.GetCloudProviderAPIKey()
-	cloudProvider, err := costAnalyzerCloud.NewProvider(k8sCache, cloudProviderKey)
+	cloudProvider, err := cloud.NewProvider(k8sCache, cloudProviderKey)
 	if err != nil {
 		panic(err.Error())
 	}
@@ -961,9 +1071,6 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 		Cloud:         cloudProvider,
 	})
 
-	// cache responses from model for a default of 5 minutes; clear expired responses every 10 minutes
-	outOfClusterCache := cache.New(time.Minute*5, time.Minute*10)
-
 	remoteEnabled := env.IsRemoteEnabled()
 	if remoteEnabled {
 		info, err := cloudProvider.ClusterInfo()
@@ -971,7 +1078,7 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 		if err != nil {
 			klog.Infof("Error saving cluster id %s", err.Error())
 		}
-		_, _, err = costAnalyzerCloud.GetOrCreateClusterMeta(info["id"], info["name"])
+		_, _, err = cloud.GetOrCreateClusterMeta(info["id"], info["name"])
 		if err != nil {
 			klog.Infof("Unable to set cluster id '%s' for cluster '%s', %s", info["id"], info["name"], err.Error())
 		}
@@ -1008,13 +1115,38 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 		clusterMap = clusters.NewClusterMap(promCli, 5*time.Minute)
 	}
 
-	A = Accesses{
+	// cache responses from model and aggregation for a default of 10 minutes;
+	// clear expired responses every 20 minutes
+	aggregateCache := cache.New(time.Minute*10, time.Minute*20)
+	costDataCache := cache.New(time.Minute*10, time.Minute*20)
+	clusterCostsCache := cache.New(cache.NoExpiration, cache.NoExpiration)
+
+	// query durations that should be cached longer should be registered here
+	// use relatively prime numbers to minimize likelihood of synchronized
+	// attempts at cache warming
+	cacheExpiration := map[string]time.Duration{
+		"1d":  maxCacheMinutes1d * time.Minute,
+		"2d":  maxCacheMinutes2d * time.Minute,
+		"7d":  maxCacheMinutes7d * time.Minute,
+		"30d": maxCacheMinutes30d * time.Minute,
+	}
+
+	// warm the cache unless explicitly set to false
+	if env.IsCacheWarmingEnabled() {
+		log.Infof("Init: AggregateCostModel cache warming enabled")
+		warmAggregateCostModelCache()
+	} else {
+		log.Infof("Init: AggregateCostModel cache warming disabled")
+	}
+
+	a := Accesses{
+		Router:                        httprouter.New(),
 		PrometheusClient:              promCli,
 		ThanosClient:                  thanosClient,
 		KubeClientSet:                 kubeClientset,
 		ClusterManager:                clusterManager,
 		ClusterMap:                    clusterMap,
-		Cloud:                         cloudProvider,
+		CloudProvider:                 cloudProvider,
 		CPUPriceRecorder:              cpuGv,
 		RAMPriceRecorder:              ramGv,
 		GPUPriceRecorder:              gpuGv,
@@ -1031,35 +1163,40 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 		ClusterManagementCostRecorder: ClusterManagementCostRecorder,
 		LBCostRecorder:                LBCostRecorder,
 		Model:                         NewCostModel(k8sCache, clusterMap, scrapeInterval),
-		OutOfClusterCache:             outOfClusterCache,
+		AggregateCache:                aggregateCache,
+		CostDataCache:                 costDataCache,
+		ClusterCostsCache:             clusterCostsCache,
+		CacheExpiration:               cacheExpiration,
 	}
 
-	err = A.Cloud.DownloadPricingData()
+	err = a.CloudProvider.DownloadPricingData()
 	if err != nil {
 		klog.V(1).Info("Failed to download pricing data: " + err.Error())
 	}
 
-	StartCostModelMetricRecording(&A)
-
-	managerEndpoints := cm.NewClusterManagerEndpoints(A.ClusterManager)
-
-	Router.GET("/costDataModel", A.CostDataModel)
-	Router.GET("/costDataModelRange", A.CostDataModelRange)
-	Router.GET("/costDataModelRangeLarge", A.CostDataModelRangeLarge)
-	Router.GET("/outOfClusterCosts", A.OutOfClusterCostsWithCache)
-	Router.GET("/allNodePricing", A.GetAllNodePricing)
-	Router.POST("/refreshPricing", A.RefreshPricingData)
-	Router.GET("/clusterCostsOverTime", A.ClusterCostsOverTime)
-	Router.GET("/clusterCosts", A.ClusterCosts)
-	Router.GET("/validatePrometheus", A.GetPrometheusMetadata)
-	Router.GET("/managementPlatform", A.ManagementPlatform)
-	Router.GET("/clusterInfo", A.ClusterInfo)
-	Router.GET("/clusterInfoMap", A.GetClusterInfoMap)
-	Router.GET("/serviceAccountStatus", A.GetServiceAccountStatus)
-	Router.GET("/pricingSourceStatus", A.GetPricingSourceStatus)
+	StartCostModelMetricRecording(&a)
+
+	managerEndpoints := cm.NewClusterManagerEndpoints(a.ClusterManager)
+
+	a.Router.GET("/costDataModel", a.CostDataModel)
+	a.Router.GET("/costDataModelRange", a.CostDataModelRange)
+	a.Router.GET("/aggregatedCostModel", a.AggregateCostModelHandler)
+	a.Router.GET("/allNodePricing", a.GetAllNodePricing)
+	a.Router.POST("/refreshPricing", a.RefreshPricingData)
+	a.Router.GET("/clusterCostsOverTime", a.ClusterCostsOverTime)
+	a.Router.GET("/clusterCosts", a.ClusterCosts)
+	a.Router.GET("/clusterCostsFromCache", a.ClusterCostsFromCacheHandler)
+	a.Router.GET("/validatePrometheus", a.GetPrometheusMetadata)
+	a.Router.GET("/managementPlatform", a.ManagementPlatform)
+	a.Router.GET("/clusterInfo", a.ClusterInfo)
+	a.Router.GET("/clusterInfoMap", a.GetClusterInfoMap)
+	a.Router.GET("/serviceAccountStatus", a.GetServiceAccountStatus)
+	a.Router.GET("/pricingSourceStatus", a.GetPricingSourceStatus)
 
 	// cluster manager endpoints
-	Router.GET("/clusters", managerEndpoints.GetAllClusters)
-	Router.PUT("/clusters", managerEndpoints.PutCluster)
-	Router.DELETE("/clusters/:id", managerEndpoints.DeleteCluster)
+	a.Router.GET("/clusters", managerEndpoints.GetAllClusters)
+	a.Router.PUT("/clusters", managerEndpoints.PutCluster)
+	a.Router.DELETE("/clusters/:id", managerEndpoints.DeleteCluster)
+
+	return &a
 }

+ 10 - 3
pkg/util/time.go

@@ -7,10 +7,17 @@ import (
 )
 
 const (
-	MinsPerHour   = 60.0
-	HoursPerDay   = 24.0
+	// MinsPerHour expresses the amount of minutes in an hour
+	MinsPerHour = 60.0
+
+	// HoursPerDay expresses the amount of hours in a day
+	HoursPerDay = 24.0
+
+	// HoursPerMonth expresses the amount of hours in a month
 	HoursPerMonth = 730.0
-	DaysPerMonth  = 30.42
+
+	// DaysPerMonth expresses the amount of days in a month
+	DaysPerMonth = 30.42
 )
 
 // ParseDuration converts a Prometheus-style duration string into a Duration