5 سال پیش · 52eb797dec
--- a/kubernetes/exporter/README.md
+++ b/kubernetes/exporter/README.md
@@ -0,0 +1,11 @@
 
				+# Exporter Deployment
			
 
				+
			
 
				+This is the one YAML file that is the aggregation of the regular deployment files. This is done for easy distribution, allowing users to `kubectl apply` an exporter-only deployment without cloning the whole repository. Apply on the parent directory won't apply anything in this directory unless `kubectl apply --recursive=True` is used.
			
 
				+
			
 
				+## Usage
			
 
				+
			
 
				+Please be aware, you will have to change both the `Namespace` and `ClusterRoleBinding` resource if you want to deploy to a namespace other than `cost-model`.
			
 
				+
			
 
				+``` sh
			
 
				+kubectl apply -f exporter.yaml --namespace cost-model
			
 
				+```
			
--- a/kubernetes/exporter/exporter.yaml
+++ b/kubernetes/exporter/exporter.yaml
@@ -0,0 +1,180 @@
 
				+# Based on the split YAML files, this is aggregated for convenience of deployment.
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# The namespace cost-model will run in
			
 
				+apiVersion: v1
			
 
				+kind: Namespace
			
 
				+metadata:
			
 
				+    name: cost-model
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Service account for permissions
			
 
				+apiVersion: v1
			
 
				+kind: ServiceAccount
			
 
				+metadata:
			
 
				+  name: cost-model
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Cluster role so cost model can gather data about the cluster
			
 
				+# No write permissions are allowed
			
 
				+apiVersion: rbac.authorization.k8s.io/v1
			
 
				+kind: ClusterRole
			
 
				+metadata:
			
 
				+  name: cost-model 
			
 
				+rules:
			
 
				+  - apiGroups:
			
 
				+      - ''
			
 
				+    resources:
			
 
				+      - configmaps
			
 
				+      - deployments
			
 
				+      - nodes
			
 
				+      - pods
			
 
				+      - services
			
 
				+      - resourcequotas
			
 
				+      - replicationcontrollers
			
 
				+      - limitranges
			
 
				+      - persistentvolumeclaims
			
 
				+      - persistentvolumes
			
 
				+      - namespaces
			
 
				+      - endpoints
			
 
				+    verbs:
			
 
				+      - get
			
 
				+      - list
			
 
				+      - watch
			
 
				+  - apiGroups:
			
 
				+      - extensions
			
 
				+    resources:
			
 
				+      - daemonsets
			
 
				+      - deployments
			
 
				+      - replicasets
			
 
				+    verbs:
			
 
				+      - get
			
 
				+      - list
			
 
				+      - watch
			
 
				+  - apiGroups:
			
 
				+      - apps
			
 
				+    resources:
			
 
				+      - statefulsets
			
 
				+      - deployments
			
 
				+      - daemonsets
			
 
				+      - replicasets
			
 
				+    verbs:
			
 
				+      - list
			
 
				+      - watch
			
 
				+  - apiGroups:
			
 
				+      - batch
			
 
				+    resources:
			
 
				+      - cronjobs
			
 
				+      - jobs
			
 
				+    verbs:
			
 
				+      - get
			
 
				+      - list
			
 
				+      - watch
			
 
				+  - apiGroups:
			
 
				+      - autoscaling
			
 
				+    resources:
			
 
				+      - horizontalpodautoscalers
			
 
				+    verbs:
			
 
				+      - get
			
 
				+      - list
			
 
				+      - watch
			
 
				+  - apiGroups:
			
 
				+      - policy
			
 
				+    resources:
			
 
				+      - poddisruptionbudgets
			
 
				+    verbs:
			
 
				+      - get
			
 
				+      - list
			
 
				+      - watch
			
 
				+  - apiGroups: 
			
 
				+      - storage.k8s.io
			
 
				+    resources: 
			
 
				+      - storageclasses
			
 
				+    verbs:
			
 
				+      - get
			
 
				+      - list
			
 
				+      - watch
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Bind the role to the service account
			
 
				+apiVersion: rbac.authorization.k8s.io/v1
			
 
				+kind: ClusterRoleBinding
			
 
				+metadata:
			
 
				+  name: cost-model
			
 
				+roleRef:
			
 
				+  apiGroup: rbac.authorization.k8s.io
			
 
				+  kind: ClusterRole
			
 
				+  name: cost-model
			
 
				+subjects:
			
 
				+  - kind: ServiceAccount
			
 
				+    name: cost-model
			
 
				+    namespace: cost-model
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Create a deployment for a single cost model pod
			
 
				+# 
			
 
				+# See environment variables if you would like to add a Prometheus for
			
 
				+# cost model to read from for full functionality.
			
 
				+apiVersion: apps/v1
			
 
				+kind: Deployment
			
 
				+metadata:
			
 
				+  name: cost-model
			
 
				+  labels:
			
 
				+    app: cost-model
			
 
				+spec:
			
 
				+  replicas: 1
			
 
				+  selector:
			
 
				+    matchLabels:
			
 
				+      app: cost-model
			
 
				+  strategy:
			
 
				+    rollingUpdate:
			
 
				+      maxSurge: 1
			
 
				+      maxUnavailable: 1
			
 
				+    type: RollingUpdate
			
 
				+  template:
			
 
				+    metadata:
			
 
				+      labels:
			
 
				+        app: cost-model
			
 
				+    spec:
			
 
				+      restartPolicy: Always
			
 
				+      serviceAccountName: cost-model
			
 
				+      containers:
			
 
				+        - image: quay.io/kubecost1/kubecost-cost-model:latest
			
 
				+          name: cost-model
			
 
				+          resources:
			
 
				+            requests:
			
 
				+              cpu: "10m"
			
 
				+              memory: "55M"
			
 
				+          env:
			
 
				+            - name: PROMETHEUS_SERVER_ENDPOINT
			
 
				+              value: "{{prometheusEndpoint}}"  #The endpoint should have the form http://<service-name>.<namespace-name>.svc
			
 
				+            - name: CLOUD_PROVIDER_API_KEY
			
 
				+              value: "AIzaSyD29bGxmHAVEOBYtgd8sYM2gM2ekfxQX4U" # The GCP Pricing API requires a key. This is supplied just for evaluation.
			
 
				+          imagePullPolicy: Always
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# Expose the cost model with a service
			
 
				+# 
			
 
				+# Without a Prometheus endpoint configured in the deployment,
			
 
				+# only cost-model/metrics will have useful data as it is intended
			
 
				+# to be used as just an exporter.
			
 
				+kind: Service
			
 
				+apiVersion: v1
			
 
				+metadata:
			
 
				+  name: cost-model
			
 
				+spec:
			
 
				+  selector:
			
 
				+    app: cost-model
			
 
				+  type: ClusterIP
			
 
				+  ports:
			
 
				+    - name: cost-model
			
 
				+      port: 9003
			
 
				+      targetPort: 9003
			
 
				+
			
 
				+---
			
--- a/pkg/cloud/awsprovider.go
+++ b/pkg/cloud/awsprovider.go
@@ -174,6 +174,7 @@ type AWSProductAttributes struct {
 
				 	OperatingSystem string `json:"operatingSystem"`
			
 
				 	PreInstalledSw  string `json:"preInstalledSw"`
			
 
				 	InstanceFamily  string `json:"instanceFamily"`
			
 
				+	CapacityStatus  string `json:"capacitystatus"`
			
 
				 	GPU             string `json:"gpu"` // GPU represents the number of GPU on the instance
			
 
				 }
			
 
				 
			
@@ -743,7 +744,8 @@ func (aws *AWS) DownloadPricingData() error {
 
				 				}
			
 
				 
			
 
				 				if product.Attributes.PreInstalledSw == "NA" &&
			
 
				-					(strings.HasPrefix(product.Attributes.UsageType, "BoxUsage") || strings.Contains(product.Attributes.UsageType, "-BoxUsage")) {
			
 
				+					(strings.HasPrefix(product.Attributes.UsageType, "BoxUsage") || strings.Contains(product.Attributes.UsageType, "-BoxUsage")) &&
			
 
				+					product.Attributes.CapacityStatus == "Used" {
			
 
				 					key := aws.KubeAttrConversion(product.Attributes.Location, product.Attributes.InstanceType, product.Attributes.OperatingSystem)
			
 
				 					spotKey := key + ",preemptible"
			
 
				 					if inputkeys[key] || inputkeys[spotKey] { // Just grab the sku even if spot, and change the price later.
			
--- a/pkg/cloud/azureprovider.go
+++ b/pkg/cloud/azureprovider.go
@@ -67,11 +67,13 @@ var (
 
				 )
			
 
				 
			
 
				 const AzureLayout = "2006-01-02"
			
 
				+var HeaderStrings = []string{"MeterCategory", "UsageDateTime", "InstanceId", "AdditionalInfo", "Tags", "PreTaxCost", "SubscriptionGuid", "ConsumedService", "ResourceGroup", "ResourceType"}
			
 
				+
			
 
				 
			
 
				 var loadedAzureSecret bool = false
			
 
				 var azureSecret *AzureServiceKey = nil
			
 
				 var loadedAzureStorageConfigSecret bool = false
			
 
				-var azureStorageConfig *AzureStorageConfig= nil
			
 
				+var azureStorageConfig *AzureStorageConfig = nil
			
 
				 
			
 
				 type regionParts []string
			
 
				 
			
@@ -191,7 +193,7 @@ type Azure struct {
 
				 	DownloadPricingDataLock sync.RWMutex
			
 
				 	Clientset               clustercache.ClusterCache
			
 
				 	Config                  *ProviderConfig
			
 
				-	ServiceAccountChecks        map[string]*ServiceAccountCheck
			
 
				+	ServiceAccountChecks    map[string]*ServiceAccountCheck
			
 
				 }
			
 
				 
			
 
				 type azureKey struct {
			
@@ -221,8 +223,8 @@ func (k *azureKey) ID() string {
 
				 
			
 
				 // Represents an azure storage config
			
 
				 type AzureStorageConfig struct {
			
 
				-	AccountName string `json:"azureStorageAccount"`
			
 
				-	AccessKey string `json:"azureStorageAccessKey"`
			
 
				+	AccountName   string `json:"azureStorageAccount"`
			
 
				+	AccessKey     string `json:"azureStorageAccessKey"`
			
 
				 	ContainerName string `json:"azureStorageContainer"`
			
 
				 }
			
 
				 
			
@@ -300,7 +302,7 @@ func (az *Azure) getAzureStorageConfig(forceReload bool) (accessKey, accountName
 
				 	}
			
 
				 	// 1. Check for secret
			
 
				 	s, _ := az.loadAzureStorageConfig(forceReload)
			
 
				-	if s != nil && s.AccessKey != "" && s.AccountName != ""  && s.ContainerName != ""{
			
 
				+	if s != nil && s.AccessKey != "" && s.AccountName != "" && s.ContainerName != "" {
			
 
				 
			
 
				 		az.ServiceAccountChecks["hasStorage"] = &ServiceAccountCheck{
			
 
				 			Message: "Azure Storage Config exists",
			
@@ -773,19 +775,25 @@ type azurePvKey struct {
 
				 	StorageClass           string
			
 
				 	StorageClassParameters map[string]string
			
 
				 	DefaultRegion          string
			
 
				+	ProviderId             string
			
 
				 }
			
 
				 
			
 
				 func (az *Azure) GetPVKey(pv *v1.PersistentVolume, parameters map[string]string, defaultRegion string) PVKey {
			
 
				+	providerID := ""
			
 
				+	if pv.Spec.AzureDisk != nil {
			
 
				+		providerID = pv.Spec.AzureDisk.DiskName
			
 
				+	}
			
 
				 	return &azurePvKey{
			
 
				 		Labels:                 pv.Labels,
			
 
				 		StorageClass:           pv.Spec.StorageClassName,
			
 
				 		StorageClassParameters: parameters,
			
 
				 		DefaultRegion:          defaultRegion,
			
 
				+		ProviderId:             providerID,
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 func (key *azurePvKey) ID() string {
			
 
				-	return ""
			
 
				+	return key.ProviderId
			
 
				 }
			
 
				 
			
 
				 func (key *azurePvKey) GetStorageClass() string {
			
@@ -914,10 +922,10 @@ func (az *Azure) ExternalAllocations(start string, end string, aggregators []str
 
				 	if err != nil {
			
 
				 		return nil, err
			
 
				 	}
			
 
				-	return GetExternalAllocations(start, end, aggregators, filterType, filterValue, crossCluster, csvRetriever)
			
 
				+	return getExternalAllocations(start, end, aggregators, filterType, filterValue, crossCluster, csvRetriever)
			
 
				 }
			
 
				 
			
 
				-func GetExternalAllocations(start string, end string, aggregators []string, filterType string, filterValue string, crossCluster bool, csvRetriever CSVRetriever) ([]*OutOfClusterAllocation, error) {
			
 
				+func getExternalAllocations(start string, end string, aggregators []string, filterType string, filterValue string, crossCluster bool, csvRetriever CSVRetriever) ([]*OutOfClusterAllocation, error) {
			
 
				 	dateFormat := "2006-1-2"
			
 
				 	startTime, err := time.Parse(dateFormat, start)
			
 
				 	if err != nil {
			
@@ -933,7 +941,7 @@ func GetExternalAllocations(start string, end string, aggregators []string, filt
 
				 	}
			
 
				 	oocAllocs := make(map[string]*OutOfClusterAllocation)
			
 
				 	for _, reader := range readers {
			
 
				-		err = ParseCSV(reader, startTime, endTime, oocAllocs, aggregators, filterType, filterValue, crossCluster)
			
 
				+		err = parseCSV(reader, startTime, endTime, oocAllocs, aggregators, filterType, filterValue, crossCluster)
			
 
				 		if err != nil {
			
 
				 			return nil, err
			
 
				 		}
			
@@ -945,13 +953,9 @@ func GetExternalAllocations(start string, end string, aggregators []string, filt
 
				 	return oocAllocsArr, nil
			
 
				 }
			
 
				 
			
 
				-func ParseCSV (reader *csv.Reader, start, end time.Time, oocAllocs map[string]*OutOfClusterAllocation, aggregators []string, filterType string, filterValue string, crossCluster bool) error {
			
 
				+func parseCSV(reader *csv.Reader, start, end time.Time, oocAllocs map[string]*OutOfClusterAllocation, aggregators []string, filterType string, filterValue string, crossCluster bool) error {
			
 
				 	headers, _ := reader.Read()
			
 
				-
			
 
				-	headerMap := map[string]int{}
			
 
				-	for i, header := range headers {
			
 
				-		headerMap[header] = i
			
 
				-	}
			
 
				+	headerMap := createHeaderMap(headers)
			
 
				 
			
 
				 	for {
			
 
				 		var record, err = reader.Read()
			
@@ -981,7 +985,7 @@ func ParseCSV (reader *csv.Reader, start, end time.Time, oocAllocs map[string]*O
 
				 		}
			
 
				 
			
 
				 		itemTags := make(map[string]string)
			
 
				-		itemTagJson := record[headerMap["Tags"]]
			
 
				+		itemTagJson := makeValidJSON(record[headerMap["Tags"]])
			
 
				 		if itemTagJson != "" {
			
 
				 			err = json.Unmarshal([]byte(itemTagJson), &itemTags)
			
 
				 			if err != nil {
			
@@ -990,7 +994,7 @@ func ParseCSV (reader *csv.Reader, start, end time.Time, oocAllocs map[string]*O
 
				 		}
			
 
				 
			
 
				 		if filterType != "kubernetes_" {
			
 
				-			if value, ok := itemTags[filterType];!ok || value != filterValue {
			
 
				+			if value, ok := itemTags[filterType]; !ok || value != filterValue {
			
 
				 				continue
			
 
				 			}
			
 
				 		}
			
@@ -1014,11 +1018,28 @@ func ParseCSV (reader *csv.Reader, start, end time.Time, oocAllocs map[string]*O
 
				 			oocAllocs[key] = ooc
			
 
				 		}
			
 
				 
			
 
				-
			
 
				 	}
			
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				+func createHeaderMap(headers []string) map[string]int {
			
 
				+	headerMap := make(map[string]int)
			
 
				+	for i, header := range headers {
			
 
				+		for _, headerString := range HeaderStrings {
			
 
				+			if strings.Contains(header, headerString) {
			
 
				+				headerMap[headerString] = i
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return headerMap
			
 
				+}
			
 
				+
			
 
				+func makeValidJSON(jsonString string) string {
			
 
				+	if jsonString == "" || (jsonString[0] == '{' && jsonString[len(jsonString)-1] == '}') {
			
 
				+		return jsonString
			
 
				+	}
			
 
				+	return fmt.Sprintf("{%v}", jsonString)
			
 
				+}
			
 
				 
			
 
				 
			
 
				 // UsageDateTime only contains date information and not time because of this filtering usageDate time is inclusive on start and exclusive on end
			
--- a/pkg/cloud/gcpprovider.go
+++ b/pkg/cloud/gcpprovider.go
@@ -309,6 +309,10 @@ func (gcp *GCP) UpdateConfig(r io.Reader, updateType string) (*CustomPricing, er
 
				 // "start" and "end" are dates of the format YYYY-MM-DD
			
 
				 // "aggregator" is the tag used to determine how to allocate those assets, ie namespace, pod, etc.
			
 
				 func (gcp *GCP) ExternalAllocations(start string, end string, aggregators []string, filterType string, filterValue string, crossCluster bool) ([]*OutOfClusterAllocation, error) {
			
 
				+	if env.LegacyExternalCostsAPIDisabled() {
			
 
				+		return nil, fmt.Errorf("Legacy External Allocations API disabled.")
			
 
				+	}
			
 
				+
			
 
				 	c, err := gcp.Config.GetCustomPricingData()
			
 
				 	if err != nil {
			
 
				 		return nil, err
			
--- a/pkg/cloud/provider.go
+++ b/pkg/cloud/provider.go
@@ -18,7 +18,8 @@ import (
 
				 )
			
 
				 
			
 
				 const authSecretPath = "/var/secrets/service-key.json"
			
 
				-const storageConfigSecretPath = "/var/secrets/azure-storage-config.json"
			
 
				+const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
			
 
				+
			
 
				 
			
 
				 var createTableStatements = []string{
			
 
				 	`CREATE TABLE IF NOT EXISTS names (
			
--- a/pkg/costmodel/cluster.go
+++ b/pkg/costmodel/cluster.go
@@ -369,6 +369,13 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
				 	for _, disk := range diskMap {
			
 
				 		// Apply all remaining RAM to Idle
			
 
				 		disk.Breakdown.Idle = 1.0 - (disk.Breakdown.System + disk.Breakdown.Other + disk.Breakdown.User)
			
 
				+
			
 
				+		// Set provider Id to the name for reconciliation on Azure
			
 
				+		if fmt.Sprintf("%T", provider) == "*provider.Azure"{
			
 
				+			if disk.ProviderID == "" {
			
 
				+				disk.ProviderID = disk.Name
			
 
				+			}
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	return diskMap, nil
			
--- a/pkg/costmodel/clusters/clustermap.go
+++ b/pkg/costmodel/clusters/clustermap.go
@@ -1,8 +1,8 @@
 
				 package clusters
			
 
				 
			
 
				 import (
			
 
				+	"context"
			
 
				 	"fmt"
			
 
				-	"math/rand"
			
 
				 	"strings"
			
 
				 	"sync"
			
 
				 	"time"
			
@@ -10,6 +10,7 @@ import (
 
				 	"github.com/kubecost/cost-model/pkg/log"
			
 
				 	"github.com/kubecost/cost-model/pkg/prom"
			
 
				 	"github.com/kubecost/cost-model/pkg/thanos"
			
 
				+	"github.com/kubecost/cost-model/pkg/util/retry"
			
 
				 
			
 
				 	prometheus "github.com/prometheus/client_golang/api"
			
 
				 )
			
@@ -120,33 +121,17 @@ func (pcm *PrometheusClusterMap) loadClusters() (map[string]*ClusterInfo, error)
 
				 	}
			
 
				 
			
 
				 	// Execute Query
			
 
				-	tryQuery := func() ([]*prom.QueryResult, prometheus.Warnings, error) {
			
 
				+	tryQuery := func() (interface{}, error) {
			
 
				 		ctx := prom.NewContext(pcm.client)
			
 
				-		return ctx.QuerySync(clusterInfoQuery(offset))
			
 
				+		r, _, e := ctx.QuerySync(clusterInfoQuery(offset))
			
 
				+		return r, e
			
 
				 	}
			
 
				 
			
 
				-	var qr []*prom.QueryResult
			
 
				-	var err error
			
 
				-
			
 
				 	// Retry on failure
			
 
				-	delay := LoadRetryDelay
			
 
				-	for r := LoadRetries; r > 0; r-- {
			
 
				-		qr, _, err = tryQuery()
			
 
				-
			
 
				-		// non-error breaks out of loop
			
 
				-		if err == nil {
			
 
				-			break
			
 
				-		}
			
 
				-
			
 
				-		// wait the delay
			
 
				-		time.Sleep(delay)
			
 
				+	result, err := retry.Retry(context.Background(), tryQuery, uint(LoadRetries), LoadRetryDelay)
			
 
				 
			
 
				-		// add some random backoff
			
 
				-		jitter := time.Duration(rand.Int63n(int64(delay)))
			
 
				-		delay = delay + jitter/2
			
 
				-	}
			
 
				-
			
 
				-	if err != nil {
			
 
				+	qr, ok := result.([]*prom.QueryResult)
			
 
				+	if !ok || err != nil {
			
 
				 		return nil, err
			
 
				 	}
			
 
				 
			
--- a/pkg/costmodel/costmodel.go
+++ b/pkg/costmodel/costmodel.go
@@ -297,14 +297,16 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
				 		}
			
 
				 
			
 
				 		// ErrorCollection is an collection of errors wrapped in a single error implementation
			
 
				-		return nil, ctx.ErrorCollection()
			
 
				+		// We opt to not return an error for the sake of running as a pure exporter.
			
 
				+		log.Warningf("ComputeCostData: continuing despite prometheus errors: %s", ctx.ErrorCollection().Error())
			
 
				 	}
			
 
				 
			
 
				 	defer measureTime(time.Now(), profileThreshold, "ComputeCostData: Processing Query Data")
			
 
				 
			
 
				 	normalizationValue, err := getNormalization(resNormalization)
			
 
				 	if err != nil {
			
 
				-		return nil, fmt.Errorf("Error parsing normalization values from %s: %s", queryNormalization, err.Error())
			
 
				+		// We opt to not return an error for the sake of running as a pure exporter.
			
 
				+		log.Warningf("ComputeCostData: continuing despite error parsing normalization values from %s: %s", queryNormalization, err.Error())
			
 
				 	}
			
 
				 
			
 
				 	nodes, err := cm.GetNodeCost(cp)
			
--- a/pkg/costmodel/router.go
+++ b/pkg/costmodel/router.go
@@ -1042,6 +1042,11 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) *Accesses {
 
				 	// Initialize mechanism for subscribing to settings changes
			
 
				 	a.InitializeSettingsPubSub()
			
 
				 
			
 
				+	err = a.CloudProvider.DownloadPricingData()
			
 
				+	if err != nil {
			
 
				+		klog.V(1).Info("Failed to download pricing data: " + err.Error())
			
 
				+	}
			
 
				+
			
 
				 	// Warm the aggregate cache unless explicitly set to false
			
 
				 	if env.IsCacheWarmingEnabled() {
			
 
				 		log.Infof("Init: AggregateCostModel cache warming enabled")
			
@@ -1050,11 +1055,6 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) *Accesses {
 
				 		log.Infof("Init: AggregateCostModel cache warming disabled")
			
 
				 	}
			
 
				 
			
 
				-	err = a.CloudProvider.DownloadPricingData()
			
 
				-	if err != nil {
			
 
				-		klog.V(1).Info("Failed to download pricing data: " + err.Error())
			
 
				-	}
			
 
				-
			
 
				 	a.MetricsEmitter.Start()
			
 
				 
			
 
				 	managerEndpoints := cm.NewClusterManagerEndpoints(a.ClusterManager)
			
--- a/pkg/env/costmodelenv.go
+++ b/pkg/env/costmodelenv.go
@@ -15,8 +15,8 @@ const (
 
				 	AWSAccessKeySecretEnvVar = "AWS_SECRET_ACCESS_KEY"
			
 
				 	AWSClusterIDEnvVar       = "AWS_CLUSTER_ID"
			
 
				 
			
 
				-	AzureStorageAccessKeyEnvVar = "AZURE_STORAGE_ACCESS_KEY"
			
 
				-	AzureStorageAccountNameEnvVar = "AZURE_STORAGE_ACCOUNT"
			
 
				+	AzureStorageAccessKeyEnvVar     = "AZURE_STORAGE_ACCESS_KEY"
			
 
				+	AzureStorageAccountNameEnvVar   = "AZURE_STORAGE_ACCOUNT"
			
 
				 	AzureStorageContainerNameEnvVar = "AZURE_STORAGE_CONTAINER"
			
 
				 
			
 
				 	KubecostNamespaceEnvVar        = "KUBECOST_NAMESPACE"
			
@@ -61,14 +61,15 @@ const (
 
				 
			
 
				 	UTCOffsetEnvVar = "UTC_OFFSET"
			
 
				 
			
 
				-	CacheWarmingEnabledEnvVar = "CACHE_WARMING_ENABLED"
			
 
				-	ETLEnabledEnvVar          = "ETL_ENABLED"
			
 
				+	CacheWarmingEnabledEnvVar    = "CACHE_WARMING_ENABLED"
			
 
				+	ETLEnabledEnvVar             = "ETL_ENABLED"
			
 
				+	LegacyExternalAPIDisabledVar = "LEGACY_EXTERNAL_API_DISABLED"
			
 
				 )
			
 
				 
			
 
				 // GetAWSAccessKeyID returns the environment variable value for AWSAccessKeyIDEnvVar which represents
			
 
				 // the AWS access key for authentication
			
 
				 func GetAppVersion() string {
			
 
				-	return Get(AppVersionEnvVar, "1.74.0")
			
 
				+	return Get(AppVersionEnvVar, "1.75.0")
			
 
				 }
			
 
				 
			
 
				 // IsEmitNamespaceAnnotationsMetric returns true if cost-model is configured to emit the kube_namespace_annotations metric
			
@@ -336,3 +337,7 @@ func IsCacheWarmingEnabled() bool {
 
				 func IsETLEnabled() bool {
			
 
				 	return GetBool(ETLEnabledEnvVar, true)
			
 
				 }
			
 
				+
			
 
				+func LegacyExternalCostsAPIDisabled() bool {
			
 
				+	return GetBool(LegacyExternalAPIDisabledVar, false)
			
 
				+}
			
--- a/pkg/kubecost/asset.go
+++ b/pkg/kubecost/asset.go
@@ -64,6 +64,35 @@ type Asset interface {
 
				 // the properties to use to aggregate, and the mapping from Allocation property
			
 
				 // to Asset label. For example, consider this asset:
			
 
				 //
			
 
				+// CURRENT: Asset ETL stores its data ALREADY MAPPED from label to k8s concept. This isn't ideal-- see the TOOD.
			
 
				+//   Cloud {
			
 
				+// 	   TotalCost: 10.00,
			
 
				+// 	   Labels{
			
 
				+//       "kubernetes_namespace":"monitoring",
			
 
				+// 	     "env":"prod"
			
 
				+// 	   }
			
 
				+//   }
			
 
				+//
			
 
				+// Given the following parameters, we expect to return:
			
 
				+//
			
 
				+//   1) single-prop full match
			
 
				+//   aggregateBy = ["namespace"]
			
 
				+//   => Allocation{Name: "monitoring", ExternalCost: 10.00, TotalCost: 10.00}, nil
			
 
				+//
			
 
				+//   2) multi-prop full match
			
 
				+//   aggregateBy = ["namespace", "label:env"]
			
 
				+//   allocationPropertyLabels = {"namespace":"kubernetes_namespace"}
			
 
				+//   => Allocation{Name: "monitoring/env=prod", ExternalCost: 10.00, TotalCost: 10.00}, nil
			
 
				+//
			
 
				+//   3) multi-prop partial match
			
 
				+//   aggregateBy = ["namespace", "label:foo"]
			
 
				+//   => Allocation{Name: "monitoring/__unallocated__", ExternalCost: 10.00, TotalCost: 10.00}, nil
			
 
				+//
			
 
				+//   4) no match
			
 
				+//   aggregateBy = ["cluster"]
			
 
				+//   => nil, err
			
 
				+//
			
 
				+// TODO:
			
 
				 //   Cloud {
			
 
				 // 	   TotalCost: 10.00,
			
 
				 // 	   Labels{
			
@@ -95,7 +124,7 @@ type Asset interface {
 
				 //   => nil, err
			
 
				 //
			
 
				 // (See asset_test.go for assertions of these examples and more.)
			
 
				-func AssetToExternalAllocation(asset Asset, aggregateBy []string, allocationPropertyLabels map[string]string) (*Allocation, error) {
			
 
				+func AssetToExternalAllocation(asset Asset, aggregateBy []string) (*Allocation, error) {
			
 
				 	if asset == nil {
			
 
				 		return nil, fmt.Errorf("asset is nil")
			
 
				 	}
			
@@ -115,7 +144,7 @@ func AssetToExternalAllocation(asset Asset, aggregateBy []string, allocationProp
 
				 		// labelName should be derived from the mapping of properties to
			
 
				 		// label names, unless the aggBy is explicitly a label, in which
			
 
				 		// case we should pull the label name from the aggBy string.
			
 
				-		labelName := allocationPropertyLabels[aggBy]
			
 
				+		labelName := aggBy
			
 
				 		if strings.HasPrefix(aggBy, "label:") {
			
 
				 			labelName = strings.TrimPrefix(aggBy, "label:")
			
 
				 		}
			
--- a/pkg/kubecost/asset_test.go
+++ b/pkg/kubecost/asset_test.go
@@ -1027,11 +1027,7 @@ func TestAssetToExternalAllocation(t *testing.T) {
 
				 	var alloc *Allocation
			
 
				 	var err error
			
 
				 
			
 
				-	// default allocationPropertyLabels, which should be compatible with result
			
 
				-	// of LabelConfig.AllocationPropertyLabels()
			
 
				-	apls := map[string]string{"namespace": "kubernetes_namespace"}
			
 
				-
			
 
				-	alloc, err = AssetToExternalAllocation(asset, []string{"namespace"}, apls)
			
 
				+	alloc, err = AssetToExternalAllocation(asset, []string{"namespace"})
			
 
				 	if err == nil {
			
 
				 		t.Fatalf("expected error due to nil asset")
			
 
				 	}
			
@@ -1046,19 +1042,17 @@ func TestAssetToExternalAllocation(t *testing.T) {
 
				 	//   }
			
 
				 	cloud := NewCloud(ComputeCategory, "abc123", start1, start2, windows[0])
			
 
				 	cloud.SetLabels(map[string]string{
			
 
				-		"kubernetes_namespace": "monitoring",
			
 
				-		"env":                  "prod",
			
 
				+		"namespace": "monitoring",
			
 
				+		"env":       "prod",
			
 
				 	})
			
 
				 	cloud.Cost = 10.00
			
 
				 	asset = cloud
			
 
				 
			
 
				-	// Providing nil params with a non-nil Asset should not panic, but it
			
 
				-	// should return an error in both cases (no matching is possible).
			
 
				-	alloc, err = AssetToExternalAllocation(asset, []string{"namespace"}, nil)
			
 
				-	if err == nil {
			
 
				-		t.Fatalf("expected error due to nil allocationPropertyLabels")
			
 
				+	alloc, err = AssetToExternalAllocation(asset, []string{"namespace"})
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("expected to not error")
			
 
				 	}
			
 
				-	alloc, err = AssetToExternalAllocation(asset, nil, apls)
			
 
				+	alloc, err = AssetToExternalAllocation(asset, nil)
			
 
				 	if err == nil {
			
 
				 		t.Fatalf("expected error due to nil aggregateBy")
			
 
				 	}
			
@@ -1086,7 +1080,7 @@ func TestAssetToExternalAllocation(t *testing.T) {
 
				 	//   => nil, err
			
 
				 
			
 
				 	// 1) single-prop full match
			
 
				-	alloc, err = AssetToExternalAllocation(asset, []string{"namespace"}, apls)
			
 
				+	alloc, err = AssetToExternalAllocation(asset, []string{"namespace"})
			
 
				 	if err != nil {
			
 
				 		t.Fatalf("unexpected error: %s", err)
			
 
				 	}
			
@@ -1104,7 +1098,7 @@ func TestAssetToExternalAllocation(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	// 2) multi-prop full match
			
 
				-	alloc, err = AssetToExternalAllocation(asset, []string{"namespace", "label:env"}, apls)
			
 
				+	alloc, err = AssetToExternalAllocation(asset, []string{"namespace", "label:env"})
			
 
				 	if err != nil {
			
 
				 		t.Fatalf("unexpected error: %s", err)
			
 
				 	}
			
@@ -1125,7 +1119,7 @@ func TestAssetToExternalAllocation(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	// 3) multi-prop partial match
			
 
				-	alloc, err = AssetToExternalAllocation(asset, []string{"namespace", "label:foo"}, apls)
			
 
				+	alloc, err = AssetToExternalAllocation(asset, []string{"namespace", "label:foo"})
			
 
				 	if err != nil {
			
 
				 		t.Fatalf("unexpected error: %s", err)
			
 
				 	}
			
@@ -1143,7 +1137,7 @@ func TestAssetToExternalAllocation(t *testing.T) {
 
				 	}
			
 
				 
			
 
				 	// 3) no match
			
 
				-	alloc, err = AssetToExternalAllocation(asset, []string{"cluster"}, apls)
			
 
				+	alloc, err = AssetToExternalAllocation(asset, []string{"cluster"})
			
 
				 	if err == nil {
			
 
				 		t.Fatalf("expected 'no match' error")
			
 
				 	}
			
--- a/pkg/util/retry/retry.go
+++ b/pkg/util/retry/retry.go
@@ -0,0 +1,44 @@
 
				+package retry
			
 
				+
			
 
				+import (
			
 
				+	"context"
			
 
				+	"fmt"
			
 
				+	"math/rand"
			
 
				+	"time"
			
 
				+)
			
 
				+
			
 
				+// RetryCancellationErr is the error type that's returned if the retry is cancelled
			
 
				+var RetryCancellationErr error = fmt.Errorf("RetryCancellationErr")
			
 
				+
			
 
				+// IsRetryCancelledError returns true if the error was a cancellation
			
 
				+func IsRetryCancelledError(err error) bool {
			
 
				+	return err != nil && err.Error() == "RetryCancellationErr"
			
 
				+}
			
 
				+
			
 
				+// Retry will run the f func until we receive a non error result up to the provided attempts or a cancellation.
			
 
				+func Retry(ctx context.Context, f func() (interface{}, error), attempts uint, delay time.Duration) (interface{}, error) {
			
 
				+	var result interface{}
			
 
				+	var err error
			
 
				+
			
 
				+	d := delay
			
 
				+	for r := attempts; r > 0; r-- {
			
 
				+		select {
			
 
				+		case <-ctx.Done():
			
 
				+			return nil, RetryCancellationErr
			
 
				+		default:
			
 
				+		}
			
 
				+
			
 
				+		result, err = f()
			
 
				+
			
 
				+		if err == nil {
			
 
				+			break
			
 
				+		}
			
 
				+
			
 
				+		time.Sleep(d)
			
 
				+
			
 
				+		jitter := time.Duration(rand.Int63n(int64(d)))
			
 
				+		d = d + jitter/2
			
 
				+	}
			
 
				+
			
 
				+	return result, err
			
 
				+}
			
--- a/pkg/util/retry/retry_test.go
+++ b/pkg/util/retry/retry_test.go
@@ -0,0 +1,121 @@
 
				+package retry
			
 
				+
			
 
				+import (
			
 
				+	"context"
			
 
				+	"fmt"
			
 
				+	"sync/atomic"
			
 
				+	"testing"
			
 
				+	"time"
			
 
				+)
			
 
				+
			
 
				+type Obj struct {
			
 
				+	Name string
			
 
				+}
			
 
				+
			
 
				+func TestPtrSliceRetry(t *testing.T) {
			
 
				+	const Expected uint64 = 3
			
 
				+
			
 
				+	var count uint64 = 0
			
 
				+
			
 
				+	f := func() (interface{}, error) {
			
 
				+		c := atomic.AddUint64(&count, 1)
			
 
				+		fmt.Println("Try:", c)
			
 
				+
			
 
				+		if c == Expected {
			
 
				+			return []*Obj{
			
 
				+				{"A"},
			
 
				+				{"B"},
			
 
				+				{"C"},
			
 
				+			}, nil
			
 
				+		}
			
 
				+
			
 
				+		return nil, fmt.Errorf("Failed: %d", c)
			
 
				+	}
			
 
				+
			
 
				+	result, err := Retry(context.Background(), f, 5, time.Second)
			
 
				+	objs, ok := result.([]*Obj)
			
 
				+	if err != nil || !ok {
			
 
				+		t.Fatalf("Failed to correctly cast back to slice type")
			
 
				+	}
			
 
				+
			
 
				+	t.Logf("Length: %d\n", len(objs))
			
 
				+}
			
 
				+
			
 
				+func TestSuccessRetry(t *testing.T) {
			
 
				+	const Expected uint64 = 3
			
 
				+
			
 
				+	var count uint64 = 0
			
 
				+
			
 
				+	f := func() (interface{}, error) {
			
 
				+		c := atomic.AddUint64(&count, 1)
			
 
				+		fmt.Println("Try:", c)
			
 
				+
			
 
				+		if c == Expected {
			
 
				+			return struct{}{}, nil
			
 
				+		}
			
 
				+
			
 
				+		return nil, fmt.Errorf("Failed: %d", c)
			
 
				+	}
			
 
				+
			
 
				+	_, err := Retry(context.Background(), f, 5, time.Second)
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("Unexpected error: %s", err)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestFailRetry(t *testing.T) {
			
 
				+	const Expected uint64 = 5
			
 
				+
			
 
				+	expectedError := fmt.Sprintf("Failed: %d", Expected)
			
 
				+	var count uint64 = 0
			
 
				+
			
 
				+	f := func() (interface{}, error) {
			
 
				+		c := atomic.AddUint64(&count, 1)
			
 
				+		fmt.Println("Try:", c)
			
 
				+		return nil, fmt.Errorf("Failed: %d", c)
			
 
				+	}
			
 
				+
			
 
				+	_, err := Retry(context.Background(), f, 5, time.Second)
			
 
				+	if count != 5 {
			
 
				+		t.Fatalf("Expected Count: %d, Actual: %d", Expected, count)
			
 
				+	}
			
 
				+
			
 
				+	if err.Error() != expectedError {
			
 
				+		t.Fatalf("Expected error: %s, Actual error: %s", expectedError, err.Error())
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestCancelRetry(t *testing.T) {
			
 
				+	const Expected uint64 = 5
			
 
				+
			
 
				+	var count uint64 = 0
			
 
				+
			
 
				+	f := func() (interface{}, error) {
			
 
				+		c := atomic.AddUint64(&count, 1)
			
 
				+		fmt.Println("Try:", c)
			
 
				+		return nil, fmt.Errorf("Failed: %d", c)
			
 
				+	}
			
 
				+
			
 
				+	wait := make(chan error)
			
 
				+	ctx, cancel := context.WithCancel(context.Background())
			
 
				+
			
 
				+	// execute retry in go routine
			
 
				+	go func() {
			
 
				+		_, err := Retry(ctx, f, 5, time.Second)
			
 
				+
			
 
				+		wait <- err
			
 
				+	}()
			
 
				+
			
 
				+	// cancel after 2 seconds
			
 
				+	go func() {
			
 
				+		time.Sleep(time.Second * 2)
			
 
				+		cancel()
			
 
				+	}()
			
 
				+
			
 
				+	// wait for error result
			
 
				+	e := <-wait
			
 
				+
			
 
				+	if !IsRetryCancelledError(e) {
			
 
				+		t.Fatalf("Expected CancellationError, got: %s", e)
			
 
				+	}
			
 
				+}