Просмотр исходного кода

rewrite group-right query for simplicity and performanc

Ajay Tripathy 4 лет назад
Родитель
Сommit
ea36ebcddf
1 измененных файлов с 46 добавлено и 43 удалено
  1. 46 43
      pkg/costmodel/cluster.go

+ 46 - 43
pkg/costmodel/cluster.go

@@ -2,9 +2,10 @@ package costmodel
 
 import (
 	"fmt"
-	"github.com/kubecost/cost-model/pkg/util/timeutil"
 	"time"
 
+	"github.com/kubecost/cost-model/pkg/util/timeutil"
+
 	"github.com/kubecost/cost-model/pkg/cloud"
 	"github.com/kubecost/cost-model/pkg/env"
 	"github.com/kubecost/cost-model/pkg/log"
@@ -136,8 +137,8 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 	costPerGBHr := 0.04 / 730.0
 
 	ctx := prom.NewContext(client)
-	queryPVCost := fmt.Sprintf(`sum_over_time((avg(kube_persistentvolume_capacity_bytes) by (%s, persistentvolume)  * on(%s, persistentvolume) group_right avg(pv_hourly_cost) by (%s, persistentvolume,provider_id))[%s:%dm]%s)/1024/1024/1024 * %f`, env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, hourlyToCumulative)
-	queryPVSize := fmt.Sprintf(`avg_over_time(kube_persistentvolume_capacity_bytes[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
+	queryPVCost := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost[%s]%s)) by (%s, persistentvolume,provider_id)`, durationStr, offsetStr, env.GetPromClusterLabel())
+	queryPVSize := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (%s, persistentvolume)`, durationStr, offsetStr, env.GetPromClusterLabel())
 	queryActiveMins := fmt.Sprintf(`count(pv_hourly_cost) by (%s, persistentvolume)[%s:%dm]%s`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr)
 
 	queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, hourlyToCumulative, costPerGBHr)
@@ -166,7 +167,7 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 	diskMap := map[string]*Disk{}
 
-	for _, result := range resPVCost {
+	for _, result := range resActiveMins {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
@@ -174,13 +175,14 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 		name, err := result.GetString("persistentvolume")
 		if err != nil {
-			log.Warningf("ClusterDisks: PV cost data missing persistentvolume")
+			log.Warningf("ClusterDisks: active mins missing pv name")
 			continue
 		}
 
-		// TODO niko/assets storage class
+		if len(result.Values) == 0 {
+			continue
+		}
 
-		cost := result.Values[0].Value
 		key := fmt.Sprintf("%s/%s", cluster, name)
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
@@ -189,11 +191,15 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 				Breakdown: &ClusterCostsBreakdown{},
 			}
 		}
-		diskMap[key].Cost += cost
-		providerID, _ := result.GetString("provider_id") // just put the providerID set up here, it's the simplest query.
-		if providerID != "" {
-			diskMap[key].ProviderID = cloud.ParsePVID(providerID)
-		}
+		s := time.Unix(int64(result.Values[0].Timestamp), 0)
+		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
+		mins := e.Sub(s).Minutes()
+
+		// TODO niko/assets if mins >= threshold, interpolate for missing data?
+
+		diskMap[key].End = e
+		diskMap[key].Start = s
+		diskMap[key].Minutes = mins
 	}
 
 	for _, result := range resPVSize {
@@ -222,18 +228,20 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 		diskMap[key].Bytes = bytes
 	}
 
-	for _, result := range resLocalStorageCost {
+	for _, result := range resPVCost {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
 		}
 
-		name, err := result.GetString("instance")
+		name, err := result.GetString("persistentvolume")
 		if err != nil {
-			log.Warningf("ClusterDisks: local storage data missing instance")
+			log.Warningf("ClusterDisks: PV cost data missing persistentvolume")
 			continue
 		}
 
+		// TODO niko/assets storage class
+
 		cost := result.Values[0].Value
 		key := fmt.Sprintf("%s/%s", cluster, name)
 		if _, ok := diskMap[key]; !ok {
@@ -241,13 +249,16 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 				Cluster:   cluster,
 				Name:      name,
 				Breakdown: &ClusterCostsBreakdown{},
-				Local:     true,
 			}
 		}
-		diskMap[key].Cost += cost
+		diskMap[key].Cost = cost * (diskMap[key].Bytes / 1024 / 1024 / 1024) * (diskMap[key].Minutes / 60)
+		providerID, _ := result.GetString("provider_id") // just put the providerID set up here, it's the simplest query.
+		if providerID != "" {
+			diskMap[key].ProviderID = cloud.ParsePVID(providerID)
+		}
 	}
 
-	for _, result := range resLocalStorageUsedCost {
+	for _, result := range resLocalStorageCost {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
@@ -255,7 +266,7 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterDisks: local storage usage data missing instance")
+			log.Warningf("ClusterDisks: local storage data missing instance")
 			continue
 		}
 
@@ -269,10 +280,10 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 				Local:     true,
 			}
 		}
-		diskMap[key].Breakdown.System = cost / diskMap[key].Cost
+		diskMap[key].Cost += cost
 	}
 
-	for _, result := range resLocalStorageBytes {
+	for _, result := range resLocalStorageUsedCost {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
@@ -280,11 +291,11 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterDisks: local storage data missing instance")
+			log.Warningf("ClusterDisks: local storage usage data missing instance")
 			continue
 		}
 
-		bytes := result.Values[0].Value
+		cost := result.Values[0].Value
 		key := fmt.Sprintf("%s/%s", cluster, name)
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
@@ -294,40 +305,32 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 				Local:     true,
 			}
 		}
-		diskMap[key].Bytes = bytes
+		diskMap[key].Breakdown.System = cost / diskMap[key].Cost
 	}
 
-	for _, result := range resActiveMins {
+	for _, result := range resLocalStorageBytes {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
 		}
 
-		name, err := result.GetString("persistentvolume")
+		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterDisks: active mins missing instance")
+			log.Warningf("ClusterDisks: local storage data missing instance")
 			continue
 		}
 
+		bytes := result.Values[0].Value
 		key := fmt.Sprintf("%s/%s", cluster, name)
 		if _, ok := diskMap[key]; !ok {
-			log.DedupedWarningf(5, "ClusterDisks: active mins for unidentified disk")
-			continue
-		}
-
-		if len(result.Values) == 0 {
-			continue
+			diskMap[key] = &Disk{
+				Cluster:   cluster,
+				Name:      name,
+				Breakdown: &ClusterCostsBreakdown{},
+				Local:     true,
+			}
 		}
-
-		s := time.Unix(int64(result.Values[0].Timestamp), 0)
-		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
-		mins := e.Sub(s).Minutes()
-
-		// TODO niko/assets if mins >= threshold, interpolate for missing data?
-
-		diskMap[key].End = e
-		diskMap[key].Start = s
-		diskMap[key].Minutes = mins
+		diskMap[key].Bytes = bytes
 	}
 
 	for _, result := range resLocalActiveMins {