|
|
@@ -2,6 +2,7 @@ package costmodel
|
|
|
|
|
|
import (
|
|
|
"fmt"
|
|
|
+ "os"
|
|
|
"time"
|
|
|
|
|
|
costAnalyzerCloud "github.com/kubecost/cost-model/cloud"
|
|
|
@@ -12,24 +13,24 @@ import (
|
|
|
|
|
|
const (
|
|
|
queryClusterCores = `sum(
|
|
|
- avg(kube_node_status_capacity_cpu_cores %s) by (node) * avg(node_cpu_hourly_cost %s) by (node) * 730 +
|
|
|
- avg(node_gpu_hourly_cost %s) by (node) * 730
|
|
|
- )`
|
|
|
+ avg(kube_node_status_capacity_cpu_cores %s) by (node, cluster_id) * avg(node_cpu_hourly_cost %s) by (node, cluster_id) * 730 +
|
|
|
+ avg(node_gpu_hourly_cost %s) by (node, cluster_id) * 730
|
|
|
+ ) by (cluster_id)`
|
|
|
|
|
|
queryClusterRAM = `sum(
|
|
|
- avg(kube_node_status_capacity_memory_bytes %s) by (node) / 1024 / 1024 / 1024 * avg(node_ram_hourly_cost %s) by (node) * 730
|
|
|
- )`
|
|
|
+ avg(kube_node_status_capacity_memory_bytes %s) by (node) / 1024 / 1024 / 1024 * avg(node_ram_hourly_cost %s) by (node, cluster_id) * 730
|
|
|
+ ) by (cluster_id)`
|
|
|
|
|
|
queryStorage = `sum(
|
|
|
- avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume) * 730
|
|
|
- * avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume) / 1024 / 1024 / 1024
|
|
|
- ) %s`
|
|
|
+ avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, cluster_id) * 730
|
|
|
+ * avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
|
|
|
+ ) by (cluster_id) %s`
|
|
|
|
|
|
- queryTotal = `sum(avg(node_total_hourly_cost) by (node)) * 730 +
|
|
|
+ queryTotal = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 +
|
|
|
sum(
|
|
|
- avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume) * 730
|
|
|
- * avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume) / 1024 / 1024 / 1024
|
|
|
- ) %s`
|
|
|
+ avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730
|
|
|
+ * avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
|
|
|
+ ) by (cluster_id) %s`
|
|
|
)
|
|
|
|
|
|
type Totals struct {
|
|
|
@@ -79,7 +80,9 @@ func resultToTotals(qr interface{}) ([][]string, error) {
|
|
|
return totals, nil
|
|
|
}
|
|
|
|
|
|
-func resultToTotal(qr interface{}) ([][]string, error) {
|
|
|
+func resultToTotal(qr interface{}) (map[string][][]string, error) {
|
|
|
+ defaultClusterID := os.Getenv(clusterIDKey)
|
|
|
+
|
|
|
data, ok := qr.(map[string]interface{})["data"]
|
|
|
if !ok {
|
|
|
e, err := wrapPrometheusError(qr)
|
|
|
@@ -99,22 +102,131 @@ func resultToTotal(qr interface{}) ([][]string, error) {
|
|
|
if len(results) == 0 {
|
|
|
return nil, fmt.Errorf("Not enough data available in the selected time range")
|
|
|
}
|
|
|
- val, ok := results[0].(map[string]interface{})["value"]
|
|
|
- totals := [][]string{}
|
|
|
- if !ok {
|
|
|
- return nil, fmt.Errorf("Improperly formatted results from prometheus, value is not a field in the vector")
|
|
|
+ toReturn := make(map[string][][]string)
|
|
|
+ for i := range results {
|
|
|
+ metrics, ok := results[i].(map[string]interface{})["metric"]
|
|
|
+ if !ok {
|
|
|
+ return nil, fmt.Errorf("Improperly formatted results from prometheus, metric is not a field in the vector")
|
|
|
+ }
|
|
|
+ metricMap, ok := metrics.(map[string]interface{})
|
|
|
+ cid, ok := metricMap["cluster_id"]
|
|
|
+ if !ok {
|
|
|
+ klog.V(4).Info("Prometheus vector does not have cluster id")
|
|
|
+ cid = defaultClusterID
|
|
|
+ }
|
|
|
+ clusterID, ok := cid.(string)
|
|
|
+ if !ok {
|
|
|
+ return nil, fmt.Errorf("Prometheus vector does not have string cluster_id")
|
|
|
+ }
|
|
|
+
|
|
|
+ val, ok := results[i].(map[string]interface{})["value"]
|
|
|
+ if !ok {
|
|
|
+ return nil, fmt.Errorf("Improperly formatted results from prometheus, value is not a field in the vector")
|
|
|
+ }
|
|
|
+ dataPoint, ok := val.([]interface{})
|
|
|
+ if !ok || len(dataPoint) != 2 {
|
|
|
+ return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
|
|
|
+ }
|
|
|
+ d0 := fmt.Sprintf("%f", dataPoint[0].(float64))
|
|
|
+ toAppend := []string{
|
|
|
+ d0,
|
|
|
+ dataPoint[1].(string),
|
|
|
+ }
|
|
|
+ if t, ok := toReturn[clusterID]; ok {
|
|
|
+ t = append(t, toAppend)
|
|
|
+ } else {
|
|
|
+ toReturn[clusterID] = [][]string{toAppend}
|
|
|
+ }
|
|
|
}
|
|
|
- dataPoint, ok := val.([]interface{})
|
|
|
- if !ok || len(dataPoint) != 2 {
|
|
|
- return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
|
|
|
+ return toReturn, nil
|
|
|
+}
|
|
|
+
|
|
|
+// ClusterCostsForAllClusters gives the cluster costs averaged over a window of time for all clusters.
|
|
|
+func ClusterCostsForAllClusters(cli prometheusClient.Client, cloud costAnalyzerCloud.Provider, windowString, offset string) (map[string]*Totals, error) {
|
|
|
+ localStorageQuery, err := cloud.GetLocalStorageQuery()
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
}
|
|
|
- d0 := fmt.Sprintf("%f", dataPoint[0].(float64))
|
|
|
- toAppend := []string{
|
|
|
- d0,
|
|
|
- dataPoint[1].(string),
|
|
|
+ if localStorageQuery != "" {
|
|
|
+ localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
|
|
|
}
|
|
|
- totals = append(totals, toAppend)
|
|
|
- return totals, nil
|
|
|
+
|
|
|
+ // turn offsets of the format "[0-9+]h" into the format "offset [0-9+]h" for use in query templatess
|
|
|
+ if offset != "" {
|
|
|
+ offset = fmt.Sprintf("offset %s", offset)
|
|
|
+ }
|
|
|
+
|
|
|
+ qCores := fmt.Sprintf(queryClusterCores, offset, offset, offset)
|
|
|
+ qRAM := fmt.Sprintf(queryClusterRAM, offset, offset)
|
|
|
+ qStorage := fmt.Sprintf(queryStorage, windowString, offset, windowString, offset, localStorageQuery)
|
|
|
+ qTotal := fmt.Sprintf(queryTotal, localStorageQuery)
|
|
|
+
|
|
|
+ resultClusterCores, err := Query(cli, qCores)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ resultClusterRAM, err := Query(cli, qRAM)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ resultStorage, err := Query(cli, qStorage)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ resultTotal, err := Query(cli, qTotal)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ toReturn := make(map[string]*Totals)
|
|
|
+
|
|
|
+ coreTotal, err := resultToTotal(resultClusterCores)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for clusterID, total := range coreTotal {
|
|
|
+ if _, ok := toReturn[clusterID]; !ok {
|
|
|
+ toReturn[clusterID] = &Totals{}
|
|
|
+ }
|
|
|
+ toReturn[clusterID].CPUCost = total
|
|
|
+ }
|
|
|
+
|
|
|
+ ramTotal, err := resultToTotal(resultClusterRAM)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for clusterID, total := range ramTotal {
|
|
|
+ if _, ok := toReturn[clusterID]; !ok {
|
|
|
+ toReturn[clusterID] = &Totals{}
|
|
|
+ }
|
|
|
+ toReturn[clusterID].MemCost = total
|
|
|
+ }
|
|
|
+
|
|
|
+ storageTotal, err := resultToTotal(resultStorage)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for clusterID, total := range storageTotal {
|
|
|
+ if _, ok := toReturn[clusterID]; !ok {
|
|
|
+ toReturn[clusterID] = &Totals{}
|
|
|
+ }
|
|
|
+ toReturn[clusterID].StorageCost = total
|
|
|
+ }
|
|
|
+
|
|
|
+ clusterTotal, err := resultToTotal(resultTotal)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ for clusterID, total := range clusterTotal {
|
|
|
+ if _, ok := toReturn[clusterID]; !ok {
|
|
|
+ toReturn[clusterID] = &Totals{}
|
|
|
+ }
|
|
|
+ toReturn[clusterID].TotalCost = total
|
|
|
+ }
|
|
|
+
|
|
|
+ return toReturn, nil
|
|
|
}
|
|
|
|
|
|
// ClusterCosts gives the current full cluster costs averaged over a window of time.
|