|
|
@@ -139,7 +139,7 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
|
|
|
costPerGBHr := 0.04 / 730.0
|
|
|
|
|
|
ctx := prom.NewContext(client)
|
|
|
- queryPVCost := fmt.Sprintf(`sum_over_time((avg(kube_persistentvolume_capacity_bytes) by (cluster_id, persistentvolume) * avg(pv_hourly_cost) by (cluster_id, persistentvolume))[%s:%dm]%s)/1024/1024/1024 * %f`, durationStr, minsPerResolution, offsetStr, hourlyToCumulative)
|
|
|
+ queryPVCost := fmt.Sprintf(`sum_over_time((avg(kube_persistentvolume_capacity_bytes) by (cluster_id, persistentvolume) * on(cluster_id, persistentvolume) group_right avg(pv_hourly_cost) by (cluster_id, persistentvolume,provider_id))[%s:%dm]%s)/1024/1024/1024 * %f`, durationStr, minsPerResolution, offsetStr, hourlyToCumulative)
|
|
|
queryPVSize := fmt.Sprintf(`avg_over_time(kube_persistentvolume_capacity_bytes[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
|
|
|
queryActiveMins := fmt.Sprintf(`count(pv_hourly_cost) by (cluster_id, persistentvolume)[%s:%dm]%s`, durationStr, minsPerResolution, offsetStr)
|
|
|
|
|
|
@@ -193,6 +193,10 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
|
|
|
}
|
|
|
}
|
|
|
diskMap[key].Cost += cost
|
|
|
+ providerID, _ := result.GetString("provider_id") // just put the providerID set up here, it's the simplest query.
|
|
|
+ if providerID != "" {
|
|
|
+ diskMap[key].ProviderID = provider.ParsePVID(providerID)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
for _, result := range resPVSize {
|
|
|
@@ -287,9 +291,10 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
|
|
|
key := fmt.Sprintf("%s/%s", cluster, name)
|
|
|
if _, ok := diskMap[key]; !ok {
|
|
|
diskMap[key] = &Disk{
|
|
|
- Cluster: cluster,
|
|
|
- Name: name,
|
|
|
- Local: true,
|
|
|
+ Cluster: cluster,
|
|
|
+ Name: name,
|
|
|
+ Breakdown: &ClusterCostsBreakdown{},
|
|
|
+ Local: true,
|
|
|
}
|
|
|
}
|
|
|
diskMap[key].Bytes = bytes
|
|
|
@@ -417,7 +422,7 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
|
|
|
queryNodeCPUCores := fmt.Sprintf(`avg_over_time(avg(kube_node_status_capacity_cpu_cores) by (cluster_id, node)[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
|
|
|
queryNodeRAMCost := fmt.Sprintf(`sum_over_time((avg(kube_node_status_capacity_memory_bytes) by (cluster_id, node) * on(cluster_id, node) group_right avg(node_ram_hourly_cost) by (cluster_id, node, instance_type, provider_id))[%s:%dm]%s) / 1024 / 1024 / 1024 * %f`, durationStr, minsPerResolution, offsetStr, hourlyToCumulative)
|
|
|
queryNodeRAMBytes := fmt.Sprintf(`avg_over_time(avg(kube_node_status_capacity_memory_bytes) by (cluster_id, node)[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
|
|
|
- queryNodeGPUCost := fmt.Sprintf(`sum_over_time((avg(node_gpu_hourly_cost) by (cluster_id, node, provider_id))[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
|
|
|
+ queryNodeGPUCost := fmt.Sprintf(`sum_over_time((avg(node_gpu_hourly_cost * %d.0 / 60.0) by (cluster_id, node, provider_id))[%s:%dm]%s)`, minsPerResolution, durationStr, minsPerResolution, offsetStr)
|
|
|
queryNodeLabels := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
|
|
|
queryNodeCPUModePct := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm]%s)) by (kubernetes_node, cluster_id, mode) / ignoring(mode) group_left sum(rate(node_cpu_seconds_total[%s:%dm]%s)) by (kubernetes_node, cluster_id)`, durationStr, minsPerResolution, offsetStr, durationStr, minsPerResolution, offsetStr)
|
|
|
queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (instance, cluster_id) / sum(sum_over_time(label_replace(kube_node_status_capacity_memory_bytes, "instance", "$1", "node", "(.*)")[%s:%dm]%s)) by (instance, cluster_id)`, durationStr, minsPerResolution, offsetStr, durationStr, minsPerResolution, offsetStr)
|
|
|
@@ -783,6 +788,110 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
|
|
|
return nodeMap, nil
|
|
|
}
|
|
|
|
|
|
+type LoadBalancer struct {
|
|
|
+ Cluster string
|
|
|
+ Name string
|
|
|
+ ProviderID string
|
|
|
+ Cost float64
|
|
|
+ Start time.Time
|
|
|
+ Minutes float64
|
|
|
+}
|
|
|
+
|
|
|
+func ClusterLoadBalancers(cp cloud.Provider, client prometheus.Client, duration, offset time.Duration) (map[string]*LoadBalancer, []error) {
|
|
|
+ durationStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
|
|
|
+ offsetStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
|
|
|
+ if offset < time.Minute {
|
|
|
+ offsetStr = ""
|
|
|
+ }
|
|
|
+
|
|
|
+ // minsPerResolution determines accuracy and resource use for the following
|
|
|
+ // queries. Smaller values (higher resolution) result in better accuracy,
|
|
|
+ // but more expensive queries, and vice-a-versa.
|
|
|
+ minsPerResolution := 5
|
|
|
+
|
|
|
+ // hourlyToCumulative is a scaling factor that, when multiplied by an hourly
|
|
|
+ // value, converts it to a cumulative value; i.e.
|
|
|
+ // [$/hr] * [min/res]*[hr/min] = [$/res]
|
|
|
+ hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
|
|
|
+
|
|
|
+ ctx := prom.NewContext(client)
|
|
|
+ queryLBCost := fmt.Sprintf(`sum_over_time((avg(kubecost_load_balancer_cost) by (namespace, service_name))[%s:%dm]%s) * %f`, durationStr, minsPerResolution, offsetStr, hourlyToCumulative)
|
|
|
+ queryActiveMins := fmt.Sprintf(`count(kubecost_load_balancer_cost) by (namespace, service_name)[%s:%dm]%s`, durationStr, minsPerResolution, offsetStr)
|
|
|
+
|
|
|
+ resChLBCost := ctx.Query(queryLBCost)
|
|
|
+ resChActiveMins := ctx.Query(queryActiveMins)
|
|
|
+
|
|
|
+ resLBCost, _ := resChLBCost.Await()
|
|
|
+ resActiveMins, _ := resChActiveMins.Await()
|
|
|
+
|
|
|
+ if ctx.ErrorCollector.IsError() {
|
|
|
+ return nil, ctx.Errors()
|
|
|
+ }
|
|
|
+
|
|
|
+ loadBalancerMap := map[string]*LoadBalancer{}
|
|
|
+
|
|
|
+ for _, result := range resLBCost {
|
|
|
+ cluster, err := result.GetString("cluster_id")
|
|
|
+ if err != nil {
|
|
|
+ cluster = env.GetClusterID()
|
|
|
+ }
|
|
|
+ namespace, err := result.GetString("namespace")
|
|
|
+ if err != nil {
|
|
|
+ log.Warningf("ClusterLoadBalancers: LB cost data missing namespace")
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ serviceName, err := result.GetString("service_name")
|
|
|
+ if err != nil {
|
|
|
+ log.Warningf("ClusterLoadBalancers: LB cost data missing service_name")
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ providerID := ""
|
|
|
+ lbCost := result.Values[0].Value
|
|
|
+
|
|
|
+ key := fmt.Sprintf("%s/%s/%s", cluster, namespace, serviceName)
|
|
|
+ if _, ok := loadBalancerMap[key]; !ok {
|
|
|
+ loadBalancerMap[key] = &LoadBalancer{
|
|
|
+ Cluster: cluster,
|
|
|
+ Name: namespace + "/" + serviceName,
|
|
|
+ ProviderID: providerID, // cp.ParseID(providerID) if providerID does get recorded later
|
|
|
+ }
|
|
|
+ }
|
|
|
+ loadBalancerMap[key].Cost += lbCost
|
|
|
+ }
|
|
|
+
|
|
|
+ for _, result := range resActiveMins {
|
|
|
+ cluster, err := result.GetString("cluster_id")
|
|
|
+ if err != nil {
|
|
|
+ cluster = env.GetClusterID()
|
|
|
+ }
|
|
|
+ namespace, err := result.GetString("namespace")
|
|
|
+ if err != nil {
|
|
|
+ log.Warningf("ClusterLoadBalancers: LB cost data missing namespace")
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ serviceName, err := result.GetString("service_name")
|
|
|
+ if err != nil {
|
|
|
+ log.Warningf("ClusterLoadBalancers: LB cost data missing service_name")
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ key := fmt.Sprintf("%s/%s/%s", cluster, namespace, serviceName)
|
|
|
+
|
|
|
+ if len(result.Values) == 0 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+
|
|
|
+ s := time.Unix(int64(result.Values[0].Timestamp), 0)
|
|
|
+ e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0)
|
|
|
+ mins := e.Sub(s).Minutes()
|
|
|
+
|
|
|
+ // TODO niko/assets if mins >= threshold, interpolate for missing data?
|
|
|
+
|
|
|
+ loadBalancerMap[key].Start = s
|
|
|
+ loadBalancerMap[key].Minutes = mins
|
|
|
+ }
|
|
|
+ return loadBalancerMap, nil
|
|
|
+}
|
|
|
+
|
|
|
// ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
|
|
|
func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
|
|
|
// Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
|