|
@@ -15,27 +15,19 @@ import (
|
|
|
"github.com/kubecost/cost-model/pkg/log"
|
|
"github.com/kubecost/cost-model/pkg/log"
|
|
|
"github.com/kubecost/cost-model/pkg/prom"
|
|
"github.com/kubecost/cost-model/pkg/prom"
|
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
|
|
|
+ "k8s.io/klog"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
const (
|
|
|
- queryFmtPods = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]%s`
|
|
|
|
|
- queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s, provider_id)`
|
|
|
|
|
- queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
|
|
- queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, %s)`
|
|
|
|
|
- queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, %s)`
|
|
|
|
|
- queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
|
|
- queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
|
|
- queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, %s)`
|
|
|
|
|
-
|
|
|
|
|
- // This query could be written without the recording rule
|
|
|
|
|
- // "kubecost_savings_container_cpu_usage_seconds", but we should
|
|
|
|
|
- // only do that when we're ready to incur the performance tradeoffs
|
|
|
|
|
- // with subqueries which would probably be in the world of hourly
|
|
|
|
|
- // ETL.
|
|
|
|
|
- //
|
|
|
|
|
- // See PromQL subquery documentation for a rate example:
|
|
|
|
|
- // https://prometheus.io/blog/2019/01/28/subquery-support/#examples
|
|
|
|
|
- queryFmtCPUUsageMax = `max(max_over_time(kubecost_savings_container_cpu_usage_seconds[%s]%s)) by (container_name, pod_name, namespace, instance, %s)`
|
|
|
|
|
|
|
+ queryFmtPods = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]%s`
|
|
|
|
|
+ queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s, provider_id)`
|
|
|
|
|
+ queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
|
|
+ queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
|
|
|
|
|
+ queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
|
|
|
|
|
+ queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
|
|
+ queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
|
|
+ queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
|
|
|
|
|
+ queryFmtCPUUsageMax = `max(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
|
|
|
queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
|
|
|
queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
|
|
queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
|
|
@@ -68,6 +60,10 @@ const (
|
|
|
queryFmtLBActiveMins = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]%s`
|
|
queryFmtLBActiveMins = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]%s`
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+// This is a bit of a hack to work around garbage data from cadvisor
|
|
|
|
|
+// Ideally you cap each pod to the max CPU on its node, but that involves a bit more complexity, as it it would need to be done when allocations joins with asset data.
|
|
|
|
|
+const MAX_CPU_CAP = 512
|
|
|
|
|
+
|
|
|
// CanCompute should return true if CostModel can act as a valid source for the
|
|
// CanCompute should return true if CostModel can act as a valid source for the
|
|
|
// given time range. In the case of CostModel we want to attempt to compute as
|
|
// given time range. In the case of CostModel we want to attempt to compute as
|
|
|
// long as the range starts in the past. If the CostModel ends up not having
|
|
// long as the range starts in the past. If the CostModel ends up not having
|
|
@@ -625,7 +621,7 @@ func applyPodResults(window kubecost.Window, resolution time.Duration, podMap ma
|
|
|
|
|
|
|
|
func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom.QueryResult) {
|
|
func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom.QueryResult) {
|
|
|
for _, res := range resCPUCoresAllocated {
|
|
for _, res := range resCPUCoresAllocated {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU allocation result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU allocation result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -647,6 +643,10 @@ func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
cpuCores := res.Values[0].Value
|
|
cpuCores := res.Values[0].Value
|
|
|
|
|
+ if cpuCores > MAX_CPU_CAP {
|
|
|
|
|
+ klog.Infof("[WARNING] Very large cpu allocation, clamping to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
|
|
|
|
|
+ cpuCores = 0.0
|
|
|
|
|
+ }
|
|
|
hours := pod.Allocations[container].Minutes() / 60.0
|
|
hours := pod.Allocations[container].Minutes() / 60.0
|
|
|
pod.Allocations[container].CPUCoreHours = cpuCores * hours
|
|
pod.Allocations[container].CPUCoreHours = cpuCores * hours
|
|
|
|
|
|
|
@@ -661,7 +661,7 @@ func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom
|
|
|
|
|
|
|
|
func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom.QueryResult) {
|
|
func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom.QueryResult) {
|
|
|
for _, res := range resCPUCoresRequested {
|
|
for _, res := range resCPUCoresRequested {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -689,6 +689,10 @@ func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom
|
|
|
if pod.Allocations[container].CPUCores() < res.Values[0].Value {
|
|
if pod.Allocations[container].CPUCores() < res.Values[0].Value {
|
|
|
pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
|
|
pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
|
|
|
}
|
|
}
|
|
|
|
|
+ if pod.Allocations[container].CPUCores() > MAX_CPU_CAP {
|
|
|
|
|
+ klog.Infof("[WARNING] Very large cpu allocation, clamping! to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
|
|
|
|
|
+ pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
node, err := res.GetString("node")
|
|
node, err := res.GetString("node")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
@@ -701,7 +705,7 @@ func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom
|
|
|
|
|
|
|
|
func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.QueryResult) {
|
|
func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.QueryResult) {
|
|
|
for _, res := range resCPUCoresUsedAvg {
|
|
for _, res := range resCPUCoresUsedAvg {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -711,11 +715,13 @@ func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.Que
|
|
|
if !ok {
|
|
if !ok {
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- container, err := res.GetString("container_name")
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg query result missing 'container': %s", key)
|
|
|
|
|
- continue
|
|
|
|
|
|
|
+ container, err := res.GetString("container")
|
|
|
|
|
+ if container == "" || err != nil {
|
|
|
|
|
+ container, err = res.GetString("container_name")
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg query result missing 'container': %s", key)
|
|
|
|
|
+ continue
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
@@ -723,12 +729,16 @@ func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.Que
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
pod.Allocations[container].CPUCoreUsageAverage = res.Values[0].Value
|
|
pod.Allocations[container].CPUCoreUsageAverage = res.Values[0].Value
|
|
|
|
|
+ if res.Values[0].Value > MAX_CPU_CAP {
|
|
|
|
|
+ klog.Infof("[WARNING] Very large cpu USAGE, dropping outlier")
|
|
|
|
|
+ pod.Allocations[container].CPUCoreUsageAverage = 0.0
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func applyCPUCoresUsedMax(podMap map[podKey]*Pod, resCPUCoresUsedMax []*prom.QueryResult) {
|
|
func applyCPUCoresUsedMax(podMap map[podKey]*Pod, resCPUCoresUsedMax []*prom.QueryResult) {
|
|
|
for _, res := range resCPUCoresUsedMax {
|
|
for _, res := range resCPUCoresUsedMax {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -739,11 +749,15 @@ func applyCPUCoresUsedMax(podMap map[podKey]*Pod, resCPUCoresUsedMax []*prom.Que
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- container, err := res.GetString("container_name")
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max query result missing 'container': %s", key)
|
|
|
|
|
- continue
|
|
|
|
|
|
|
+ container, err := res.GetString("container")
|
|
|
|
|
+ if container == "" || err != nil {
|
|
|
|
|
+ container, err = res.GetString("container_name")
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max query result missing 'container': %s", key)
|
|
|
|
|
+ continue
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
+ klog.Infof("FOUND CONTAINER %s", container)
|
|
|
|
|
|
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
|
pod.AppendContainer(container)
|
|
pod.AppendContainer(container)
|
|
@@ -761,7 +775,7 @@ func applyCPUCoresUsedMax(podMap map[podKey]*Pod, resCPUCoresUsedMax []*prom.Que
|
|
|
|
|
|
|
|
func applyRAMBytesAllocated(podMap map[podKey]*Pod, resRAMBytesAllocated []*prom.QueryResult) {
|
|
func applyRAMBytesAllocated(podMap map[podKey]*Pod, resRAMBytesAllocated []*prom.QueryResult) {
|
|
|
for _, res := range resRAMBytesAllocated {
|
|
for _, res := range resRAMBytesAllocated {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM allocation result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM allocation result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -797,7 +811,7 @@ func applyRAMBytesAllocated(podMap map[podKey]*Pod, resRAMBytesAllocated []*prom
|
|
|
|
|
|
|
|
func applyRAMBytesRequested(podMap map[podKey]*Pod, resRAMBytesRequested []*prom.QueryResult) {
|
|
func applyRAMBytesRequested(podMap map[podKey]*Pod, resRAMBytesRequested []*prom.QueryResult) {
|
|
|
for _, res := range resRAMBytesRequested {
|
|
for _, res := range resRAMBytesRequested {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM request result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM request result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -837,7 +851,7 @@ func applyRAMBytesRequested(podMap map[podKey]*Pod, resRAMBytesRequested []*prom
|
|
|
|
|
|
|
|
func applyRAMBytesUsedAvg(podMap map[podKey]*Pod, resRAMBytesUsedAvg []*prom.QueryResult) {
|
|
func applyRAMBytesUsedAvg(podMap map[podKey]*Pod, resRAMBytesUsedAvg []*prom.QueryResult) {
|
|
|
for _, res := range resRAMBytesUsedAvg {
|
|
for _, res := range resRAMBytesUsedAvg {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM avg usage result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM avg usage result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -848,10 +862,13 @@ func applyRAMBytesUsedAvg(podMap map[podKey]*Pod, resRAMBytesUsedAvg []*prom.Que
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- container, err := res.GetString("container_name")
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage avg query result missing 'container': %s", key)
|
|
|
|
|
- continue
|
|
|
|
|
|
|
+ container, err := res.GetString("container")
|
|
|
|
|
+ if container == "" || err != nil {
|
|
|
|
|
+ container, err = res.GetString("container_name")
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage avg query result missing 'container': %s", key)
|
|
|
|
|
+ continue
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
@@ -864,7 +881,7 @@ func applyRAMBytesUsedAvg(podMap map[podKey]*Pod, resRAMBytesUsedAvg []*prom.Que
|
|
|
|
|
|
|
|
func applyRAMBytesUsedMax(podMap map[podKey]*Pod, resRAMBytesUsedMax []*prom.QueryResult) {
|
|
func applyRAMBytesUsedMax(podMap map[podKey]*Pod, resRAMBytesUsedMax []*prom.QueryResult) {
|
|
|
for _, res := range resRAMBytesUsedMax {
|
|
for _, res := range resRAMBytesUsedMax {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -875,10 +892,13 @@ func applyRAMBytesUsedMax(podMap map[podKey]*Pod, resRAMBytesUsedMax []*prom.Que
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- container, err := res.GetString("container_name")
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max query result missing 'container': %s", key)
|
|
|
|
|
- continue
|
|
|
|
|
|
|
+ container, err := res.GetString("container")
|
|
|
|
|
+ if container == "" || err != nil {
|
|
|
|
|
+ container, err = res.GetString("container_name")
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max query result missing 'container': %s", key)
|
|
|
|
|
+ continue
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
if _, ok := pod.Allocations[container]; !ok {
|
|
@@ -900,7 +920,7 @@ func applyGPUsAllocated(podMap map[podKey]*Pod, resGPUsRequested []*prom.QueryRe
|
|
|
resGPUsRequested = resGPUsAllocated
|
|
resGPUsRequested = resGPUsAllocated
|
|
|
}
|
|
}
|
|
|
for _, res := range resGPUsRequested {
|
|
for _, res := range resGPUsRequested {
|
|
|
- key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU request result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU request result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -928,7 +948,7 @@ func applyGPUsAllocated(podMap map[podKey]*Pod, resGPUsRequested []*prom.QueryRe
|
|
|
|
|
|
|
|
func applyNetworkTotals(podMap map[podKey]*Pod, resNetworkTransferBytes []*prom.QueryResult, resNetworkReceiveBytes []*prom.QueryResult) {
|
|
func applyNetworkTotals(podMap map[podKey]*Pod, resNetworkTransferBytes []*prom.QueryResult, resNetworkReceiveBytes []*prom.QueryResult) {
|
|
|
for _, res := range resNetworkTransferBytes {
|
|
for _, res := range resNetworkTransferBytes {
|
|
|
- podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Transfer Bytes query result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Transfer Bytes query result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -944,7 +964,7 @@ func applyNetworkTotals(podMap map[podKey]*Pod, resNetworkTransferBytes []*prom.
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
for _, res := range resNetworkReceiveBytes {
|
|
for _, res := range resNetworkReceiveBytes {
|
|
|
- podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Receive Bytes query result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Receive Bytes query result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -974,7 +994,7 @@ func applyNetworkAllocation(podMap map[podKey]*Pod, resNetworkGiB []*prom.QueryR
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
for _, res := range resNetworkGiB {
|
|
for _, res := range resNetworkGiB {
|
|
|
- podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod_name")
|
|
|
|
|
|
|
+ podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
|
|
log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
|
|
|
continue
|
|
continue
|
|
@@ -1018,7 +1038,7 @@ func resToPodLabels(resPodLabels []*prom.QueryResult) map[podKey]map[string]stri
|
|
|
podLabels := map[podKey]map[string]string{}
|
|
podLabels := map[podKey]map[string]string{}
|
|
|
|
|
|
|
|
for _, res := range resPodLabels {
|
|
for _, res := range resPodLabels {
|
|
|
- podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|
|
@@ -1060,7 +1080,7 @@ func resToPodAnnotations(resPodAnnotations []*prom.QueryResult) map[podKey]map[s
|
|
|
podAnnotations := map[podKey]map[string]string{}
|
|
podAnnotations := map[podKey]map[string]string{}
|
|
|
|
|
|
|
|
for _, res := range resPodAnnotations {
|
|
for _, res := range resPodAnnotations {
|
|
|
- podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace", "pod")
|
|
|
|
|
|
|
+ podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
continue
|
|
continue
|
|
|
}
|
|
}
|