Просмотр исходного кода

Merge pull request #615 from kubecost/develop

Merge develop into master 1.70.0
Ajay Tripathy 5 лет назад
Родитель
Сommit
834debc158

+ 1 - 0
go.mod

@@ -27,6 +27,7 @@ require (
 	golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
 	golang.org/x/sync v0.0.0-20190423024810-112230192c58
 	google.golang.org/api v0.4.0
+	gopkg.in/yaml.v2 v2.2.4
 	k8s.io/api v0.0.0-20190913080256-21721929cffa
 	k8s.io/apimachinery v0.0.0-20190913075812-e119e5e154b6
 	k8s.io/client-go v0.0.0-20190620085101-78d2af792bab

+ 31 - 15
pkg/cloud/awsprovider.go

@@ -38,7 +38,6 @@ import (
 	v1 "k8s.io/api/core/v1"
 )
 
-const awsReservedInstancePricePerHour = 0.0287
 const supportedSpotFeedVersion = "1"
 const SpotInfoUpdateType = "spotinfo"
 const AthenaInfoUpdateType = "athenainfo"
@@ -71,10 +70,8 @@ func (aws *AWS) PricingSourceStatus() map[string]*PricingSource {
 	rps.Error = aws.RIPricingStatus
 	if rps.Error != "" {
 		rps.Available = false
-	} else if len(aws.RIPricingByInstanceID) > 0 {
-		rps.Available = true
 	} else {
-		rps.Error = "No reserved instances detected"
+		rps.Available = true
 	}
 	sources[ReservedInstancePricingSource] = rps
 	return sources
@@ -134,6 +131,7 @@ type AWS struct {
 	BaseGPUPrice                string
 	BaseSpotCPUPrice            string
 	BaseSpotRAMPrice            string
+	BaseSpotGPUPrice            string
 	SpotLabelName               string
 	SpotLabelValue              string
 	SpotDataRegion              string
@@ -616,6 +614,7 @@ func (aws *AWS) DownloadPricingData() error {
 	aws.BaseGPUPrice = c.GPU
 	aws.BaseSpotCPUPrice = c.SpotCPU
 	aws.BaseSpotRAMPrice = c.SpotRAM
+	aws.BaseSpotGPUPrice = c.SpotGPU
 	aws.SpotLabelName = c.SpotLabel
 	aws.SpotLabelValue = c.SpotLabelValue
 	aws.SpotDataBucket = c.SpotDataBucket
@@ -1007,6 +1006,7 @@ func (aws *AWS) createNode(terms *AWSProductTerms, usageType string, k Key) (*No
 			VCPUCost:     aws.BaseSpotCPUPrice,
 			RAM:          terms.Memory,
 			GPU:          terms.GPU,
+			GPUCost:      aws.BaseSpotGPUPrice,
 			RAMCost:      aws.BaseSpotRAMPrice,
 			Storage:      terms.Storage,
 			BaseCPUPrice: aws.BaseCPUPrice,
@@ -1710,25 +1710,29 @@ func (a *AWS) GetSavingsPlanDataFromAthena() error {
 	tOneDayAgo := tNow.Add(time.Duration(-25) * time.Hour) // Also get files from one day ago to avoid boundary conditions
 	start := tOneDayAgo.Format("2006-01-02")
 	end := tNow.Format("2006-01-02")
+	// Use Savings Plan Effective Rate as an estimation for cost, assuming the 1h most recent period got a fully loaded savings plan.
+	//
 	q := `SELECT   
 		line_item_usage_start_date,
 		savings_plan_savings_plan_a_r_n,
 		line_item_resource_id,
-		savings_plan_savings_plan_effective_cost
+		savings_plan_savings_plan_rate 
 	FROM %s as cost_data
 	WHERE line_item_usage_start_date BETWEEN date '%s' AND date '%s'
 	AND line_item_line_item_type = 'SavingsPlanCoveredUsage' ORDER BY 
 	line_item_usage_start_date DESC`
-	query := fmt.Sprintf(q, cfg.AthenaTable, start, end)
-	op, err := a.QueryAthenaBillingData(query)
-	if err != nil {
-		return fmt.Errorf("Error fetching Savings Plan Data: %s", err)
-	}
-	klog.Infof("Fetching SavingsPlan data...")
-	if len(op.ResultSet.Rows) > 1 {
+
+	page := 0
+	processResults := func(op *athena.GetQueryResultsOutput, lastpage bool) bool {
 		a.SavingsPlanDataLock.Lock()
+		a.SavingsPlanDataByInstanceID = make(map[string]*SavingsPlanData) // Clean out the old data and only report a savingsplan price if its in the most recent run.
 		mostRecentDate := ""
-		for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows) - 1)] {
+		iter := op.ResultSet.Rows
+		if page == 0 && len(iter) > 0 {
+			iter = op.ResultSet.Rows[1:len(op.ResultSet.Rows)]
+		}
+		page++
+		for _, r := range iter {
 			d := *r.Data[0].VarCharValue
 			if mostRecentDate == "" {
 				mostRecentDate = d
@@ -1752,8 +1756,20 @@ func (a *AWS) GetSavingsPlanDataFromAthena() error {
 			log.DedupedInfof(5, "Savings Plan Instance Data found for node %s : %f at time %s", k, r.EffectiveCost, r.MostRecentDate)
 		}
 		a.SavingsPlanDataLock.Unlock()
-	} else {
-		klog.Infof("No savings plan applied instance data found")
+		return true
+	}
+
+	query := fmt.Sprintf(q, cfg.AthenaTable, start, end)
+
+	klog.V(3).Infof("Running Query: %s", query)
+
+	ip, svc, err := a.QueryAthenaPaginated(query)
+	if err != nil {
+		return fmt.Errorf("Error fetching Savings Plan Data: %s", err)
+	}
+	athenaErr := svc.GetQueryResultsPages(ip, processResults)
+	if athenaErr != nil {
+		return athenaErr
 	}
 	return nil
 }

+ 6 - 2
pkg/cloud/gcpprovider.go

@@ -687,7 +687,11 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 				}
 
 				if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "N2") {
-					instanceType = "n2standard"
+					if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "N2D AMD") {
+						instanceType = "n2dstandard"
+					} else {
+						instanceType = "n2standard"
+					}
 				}
 
 				if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "COMPUTE OPTIMIZED") {
@@ -1449,7 +1453,7 @@ func sustainedUseDiscount(class string, defaultDiscount float64, isPreemptible b
 	switch class {
 	case "e2", "f1", "g1":
 		discount = 0.0
-	case "n2":
+	case "n2", "n2d":
 		discount = 0.2
 	}
 	return discount

+ 4 - 1
pkg/cloud/providerconfig.go

@@ -89,7 +89,9 @@ func (pc *ProviderConfig) loadConfig(writeIfNotExists bool) (*CustomPricing, err
 	}
 
 	pc.customPricing = &customPricing
-
+	if pc.customPricing.SpotGPU == "" {
+		pc.customPricing.SpotGPU = DefaultPricing().SpotGPU // Migration for users without this value set by default.
+	}
 	return pc.customPricing, nil
 }
 
@@ -169,6 +171,7 @@ func DefaultPricing() *CustomPricing {
 		RAM:                   "0.004237",
 		SpotRAM:               "0.000892",
 		GPU:                   "0.95",
+		SpotGPU:               "0.308",
 		Storage:               "0.00005479452",
 		ZoneNetworkEgress:     "0.01",
 		RegionNetworkEgress:   "0.01",

+ 5 - 2
pkg/costmodel/cluster.go

@@ -429,7 +429,7 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 	queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm]%s)) by (kubernetes_node, cluster_id, mode)`, durationStr, minsPerResolution, offsetStr)
 	queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (instance, cluster_id) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (node, cluster_id), "instance", "$1", "node", "(.*)")) by (instance, cluster_id)`, durationStr, minsPerResolution, offsetStr, durationStr, minsPerResolution, offsetStr)
 	queryNodeRAMUserPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system"}[%s:%dm]%s)) by (instance, cluster_id) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (node, cluster_id), "instance", "$1", "node", "(.*)")) by (instance, cluster_id)`, durationStr, minsPerResolution, offsetStr, durationStr, minsPerResolution, offsetStr)
-	queryActiveMins := fmt.Sprintf(`node_total_hourly_cost[%s:%dm]%s`, durationStr, minsPerResolution, offsetStr)
+	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost) by (node,cluster_id)[%s:%dm]%s`, durationStr, minsPerResolution, offsetStr)
 
 	// Return errors if these fail
 	resChNodeCPUCost := requiredCtx.Query(queryNodeCPUCost)
@@ -679,7 +679,10 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 		if modeTotals, ok := clusterNodeModeCPUTotal[key]; ok {
 			for mode, subtotal := range modeTotals {
 				// Compute percentage for the current cluster, node, mode
-				pct := subtotal / total
+				pct := 0.0
+				if total > 0 {
+					pct = subtotal / total
+				}
 
 				if _, ok := nodeMap[key]; !ok {
 					log.Warningf("ClusterNodes: CPU mode data for unidentified node")

+ 31 - 24
pkg/costmodel/costmodel.go

@@ -47,19 +47,21 @@ const (
 var isCron = regexp.MustCompile(`^(.+)-\d{10}$`)
 
 type CostModel struct {
-	Cache        clustercache.ClusterCache
-	ClusterMap   clusters.ClusterMap
-	RequestGroup *singleflight.Group
+	Cache          clustercache.ClusterCache
+	ClusterMap     clusters.ClusterMap
+	ScrapeInterval time.Duration
+	RequestGroup   *singleflight.Group
 }
 
-func NewCostModel(cache clustercache.ClusterCache, clusterMap clusters.ClusterMap) *CostModel {
+func NewCostModel(cache clustercache.ClusterCache, clusterMap clusters.ClusterMap, scrapeInterval time.Duration) *CostModel {
 	// request grouping to prevent over-requesting the same data prior to caching
 	requestGroup := new(singleflight.Group)
 
 	return &CostModel{
-		Cache:        cache,
-		ClusterMap:   clusterMap,
-		RequestGroup: requestGroup,
+		Cache:          cache,
+		ClusterMap:     clusterMap,
+		RequestGroup:   requestGroup,
+		ScrapeInterval: scrapeInterval,
 	}
 }
 
@@ -189,7 +191,7 @@ const (
 		label_replace(label_replace(
 			sum(
 				sum_over_time(container_memory_allocation_bytes{container!="",container!="POD", node!=""}[%s])
-			) by (namespace,container,pod,node,cluster_id) * (scalar(avg(prometheus_target_interval_length_seconds)) / 60 / 60)
+			) by (namespace,container,pod,node,cluster_id) * %f / 60 / 60
 		, "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)")`
 	// queryCPUAllocationVCPUHours yields the total VCPU-hour CPU allocation over the given
 	// window, aggregated by container.
@@ -201,11 +203,11 @@ const (
 		label_replace(label_replace(
 			sum(
 				sum_over_time(container_cpu_allocation{container!="",container!="POD", node!=""}[%s])
-			) by (namespace,container,pod,node,cluster_id) * (scalar(avg(prometheus_target_interval_length_seconds)) / 60 / 60)
+			) by (namespace,container,pod,node,cluster_id) * %f / 60 / 60
 		, "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)")`
 	// queryPVCAllocationFmt yields the total byte-hour PVC allocation over the given window.
 	// sum_over_time(each byte) = [byte*scrape] by metric *(scalar(avg(prometheus_target_interval_length_seconds)) = [seconds/scrape] / 60 / 60 =  [hours/scrape] by pod
-	queryPVCAllocationFmt     = `sum(sum_over_time(pod_pvc_allocation[%s])) by (cluster_id, namespace, pod, persistentvolume, persistentvolumeclaim) * scalar(avg(prometheus_target_interval_length_seconds)/60/60)`
+	queryPVCAllocationFmt     = `sum(sum_over_time(pod_pvc_allocation[%s])) by (cluster_id, namespace, pod, persistentvolume, persistentvolumeclaim) * %f/60/60`
 	queryPVHourlyCostFmt      = `avg_over_time(pv_hourly_cost[%s])`
 	queryNSLabels             = `avg_over_time(kube_namespace_labels[%s])`
 	queryPodLabels            = `avg_over_time(kube_pod_labels[%s])`
@@ -607,7 +609,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 		}
 	}
 
-	err = findDeletedNodeInfo(cli, missingNodes, window)
+	err = findDeletedNodeInfo(cli, missingNodes, window, "")
 	if err != nil {
 		klog.V(1).Infof("Error fetching historical node data: %s", err.Error())
 	}
@@ -696,13 +698,13 @@ func findDeletedPodInfo(cli prometheusClient.Client, missingContainers map[strin
 	return nil
 }
 
-func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*costAnalyzerCloud.Node, window string) error {
+func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*costAnalyzerCloud.Node, window, offset string) error {
 	if len(missingNodes) > 0 {
 		defer measureTime(time.Now(), profileThreshold, "Finding Deleted Node Info")
 
-		queryHistoricalCPUCost := fmt.Sprintf(`avg_over_time(node_cpu_hourly_cost[%s])`, window)
-		queryHistoricalRAMCost := fmt.Sprintf(`avg_over_time(node_ram_hourly_cost[%s])`, window)
-		queryHistoricalGPUCost := fmt.Sprintf(`avg_over_time(node_gpu_hourly_cost[%s])`, window)
+		queryHistoricalCPUCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s] offset %s)) by (node, instance, cluster_id)`, window, offset)
+		queryHistoricalRAMCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s] offset %s)) by (node, instance, cluster_id)`, window, offset)
+		queryHistoricalGPUCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s] offset %s)) by (node, instance, cluster_id)`, window, offset)
 
 		ctx := prom.NewContext(cli)
 		cpuCostResCh := ctx.Query(queryHistoricalCPUCost)
@@ -736,6 +738,8 @@ func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*c
 		for node, costv := range cpuCosts {
 			if _, ok := missingNodes[node]; ok {
 				missingNodes[node].VCPUCost = fmt.Sprintf("%f", costv[0].Value)
+			} else {
+				log.DedupedWarningf(5, "Node `%s` in prometheus but not k8s api", node)
 			}
 		}
 		for node, costv := range ramCosts {
@@ -1441,9 +1445,10 @@ func requestKeyFor(startString string, endString string, windowString string, fi
 	return fmt.Sprintf("%s,%s,%s,%s,%s,%t", startKey, endKey, windowString, filterNamespace, filterCluster, remoteEnabled)
 }
 
-// Executes a range query for cost data
+// ComputeCostDataRange executes a range query for cost data.
+// Note that "offset" represents the time between the function call and "endString", and is also passed for convenience
 func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider,
-	startString, endString, windowString string, resolutionHours float64, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
+	startString, endString, windowString string, resolutionHours float64, filterNamespace string, filterCluster string, remoteEnabled bool, offset string) (map[string]*CostData, error) {
 	// Create a request key for request grouping. This key will be used to represent the cost-model result
 	// for the specific inputs to prevent multiple queries for identical data.
 	key := requestKeyFor(startString, endString, windowString, filterNamespace, filterCluster, remoteEnabled)
@@ -1453,7 +1458,7 @@ func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset
 	// If there is already a request out that uses the same data, wait for it to return to share the results.
 	// Otherwise, start executing.
 	result, err, _ := cm.RequestGroup.Do(key, func() (interface{}, error) {
-		return cm.costDataRange(cli, clientset, cp, startString, endString, windowString, resolutionHours, filterNamespace, filterCluster, remoteEnabled)
+		return cm.costDataRange(cli, clientset, cp, startString, endString, windowString, resolutionHours, filterNamespace, filterCluster, remoteEnabled, offset)
 	})
 
 	data, ok := result.(map[string]*CostData)
@@ -1464,7 +1469,7 @@ func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset
 	return data, err
 }
 
-func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider, startString, endString, windowString string, resolutionHours float64, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
+func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider, startString, endString, windowString string, resolutionHours float64, filterNamespace string, filterCluster string, remoteEnabled bool, offset string) (map[string]*CostData, error) {
 	layout := "2006-01-02T15:04:05.000Z"
 
 	start, err := time.Parse(layout, startString)
@@ -1497,17 +1502,19 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		return CostDataRangeFromSQL("", "", windowString, remoteStartStr, remoteEndStr)
 	}
 
+	scrapeIntervalSeconds := cm.ScrapeInterval.Seconds()
+
 	ctx := prom.NewContext(cli)
 
-	queryRAMAlloc := fmt.Sprintf(queryRAMAllocationByteHours, windowString)
-	queryCPUAlloc := fmt.Sprintf(queryCPUAllocationVCPUHours, windowString)
+	queryRAMAlloc := fmt.Sprintf(queryRAMAllocationByteHours, windowString, scrapeIntervalSeconds)
+	queryCPUAlloc := fmt.Sprintf(queryCPUAllocationVCPUHours, windowString, scrapeIntervalSeconds)
 	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, windowString, "", windowString, "")
 	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, windowString, "", windowString, "")
 	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, windowString, "", windowString, "")
 	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, windowString, "")
 	queryGPURequests := fmt.Sprintf(queryGPURequestsStr, windowString, "", windowString, "", resolutionHours, windowString, "")
 	queryPVRequests := fmt.Sprintf(queryPVRequestsStr)
-	queryPVCAllocation := fmt.Sprintf(queryPVCAllocationFmt, windowString)
+	queryPVCAllocation := fmt.Sprintf(queryPVCAllocationFmt, windowString, scrapeIntervalSeconds)
 	queryPVHourlyCost := fmt.Sprintf(queryPVHourlyCostFmt, windowString)
 	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, windowString, "")
 	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, windowString, "")
@@ -1844,7 +1851,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
 		namespaceLabels, ok := namespaceLabelsMapping[nsKey]
 		if !ok {
-			klog.V(3).Infof("Missing data for namespace %s", c.Namespace)
+			klog.V(4).Infof("Missing data for namespace %s", c.Namespace)
 		}
 
 		pLabels := podLabels[podKey]
@@ -1978,7 +1985,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 	w += window
 	if w.Minutes() > 0 {
 		wStr := fmt.Sprintf("%dm", int(w.Minutes()))
-		err = findDeletedNodeInfo(cli, missingNodes, wStr)
+		err = findDeletedNodeInfo(cli, missingNodes, wStr, offset)
 		if err != nil {
 			klog.V(1).Infof("Error fetching historical node data: %s", err.Error())
 		}

+ 23 - 1
pkg/costmodel/promparsers.go

@@ -8,8 +8,30 @@ import (
 	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/prom"
 	"github.com/kubecost/cost-model/pkg/util"
+	"gopkg.in/yaml.v2"
 )
 
+const DEFAULT_KUBECOST_JOB_NAME = "kubecost"
+
+type ScrapeConfig struct {
+	JobName        string `yaml:"job_name,omitempty"`
+	ScrapeInterval string `yaml:"scrape_interval,omitempty"`
+}
+
+type PromCfg struct {
+	ScrapeConfigs []ScrapeConfig `yaml:"scrape_configs,omitempty"`
+}
+
+func GetPrometheusConfig(pcfg string) (PromCfg, error) {
+	var promCfg PromCfg
+	err := yaml.Unmarshal([]byte(pcfg), &promCfg)
+	return promCfg, err
+}
+
+func GetKubecostJobName() string {
+	return DEFAULT_KUBECOST_JOB_NAME // TODO: look this up from a prometheus variable?
+}
+
 // TODO niko/prom move parsing functions from costmodel.go
 
 func GetPVInfo(qrs []*prom.QueryResult, defaultClusterID string) (map[string]*PersistentVolumeClaimData, error) {
@@ -377,7 +399,7 @@ func getCost(qrs []*prom.QueryResult) (map[string][]*util.Vector, error) {
 	toReturn := make(map[string][]*util.Vector)
 
 	for _, val := range qrs {
-		instance, err := val.GetString("instance")
+		instance, err := val.GetString("node")
 		if err != nil {
 			return toReturn, err
 		}

+ 31 - 2
pkg/costmodel/router.go

@@ -386,7 +386,7 @@ func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps
 	}
 
 	resolutionHours := 1.0
-	data, err := a.Model.ComputeCostDataRange(pClient, a.KubeClientSet, a.Cloud, start, end, window, resolutionHours, namespace, cluster, remoteEnabled)
+	data, err := a.Model.ComputeCostDataRange(pClient, a.KubeClientSet, a.Cloud, start, end, window, resolutionHours, namespace, cluster, remoteEnabled, "")
 	if err != nil {
 		w.Write(WrapData(nil, err))
 	}
@@ -744,8 +744,37 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 
 	timeout := 120 * time.Second
 	keepAlive := 120 * time.Second
+	scrapeInterval, _ := time.ParseDuration("1m")
 
 	promCli, _ := prom.NewPrometheusClient(address, timeout, keepAlive, queryConcurrency, "")
+
+	api := prometheusAPI.NewAPI(promCli)
+	pcfg, err := api.Config(context.Background())
+	if err != nil {
+		klog.Infof("No valid prometheus config file at %s. Error: %s . Troubleshooting help available at: %s. Ignore if using cortex/thanos here.", address, err.Error(), prometheusTroubleshootingEp)
+	} else {
+		klog.V(1).Info("Retrieved a prometheus config file from: " + address)
+		sc, err := GetPrometheusConfig(pcfg.YAML)
+		if err != nil {
+			klog.Infof("Fix YAML error %s", err)
+		}
+		for _, scrapeconfig := range sc.ScrapeConfigs {
+			if scrapeconfig.JobName == GetKubecostJobName() {
+				if scrapeconfig.ScrapeInterval != "" {
+					si := scrapeconfig.ScrapeInterval
+					sid, err := time.ParseDuration(si)
+					if err != nil {
+						klog.Infof("error parseing scrapeConfig for %s", scrapeconfig.JobName)
+					} else {
+						klog.Infof("Found Kubecost job scrape interval of: %s", si)
+						scrapeInterval = sid
+					}
+				}
+			}
+		}
+	}
+	klog.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
+
 	m, err := prom.Validate(promCli)
 	if err != nil || m.Running == false {
 		if err != nil {
@@ -1001,7 +1030,7 @@ func Initialize(additionalConfigWatchers ...ConfigWatchers) {
 		PersistentVolumePriceRecorder: pvGv,
 		ClusterManagementCostRecorder: ClusterManagementCostRecorder,
 		LBCostRecorder:                LBCostRecorder,
-		Model:                         NewCostModel(k8sCache, clusterMap),
+		Model:                         NewCostModel(k8sCache, clusterMap, scrapeInterval),
 		OutOfClusterCache:             outOfClusterCache,
 	}
 

+ 1 - 1
pkg/env/costmodelenv.go

@@ -48,7 +48,7 @@ const (
 // GetAWSAccessKeyID returns the environment variable value for AWSAccessKeyIDEnvVar which represents
 // the AWS access key for authentication
 func GetAppVersion() string {
-	return Get(AppVersionEnvVar, "Pre-1.68.0")
+	return Get(AppVersionEnvVar, "1.70.0")
 }
 
 // GetAWSAccessKeyID returns the environment variable value for AWSAccessKeyIDEnvVar which represents