4 lat temu · 052b4825ae
--- a/pkg/costmodel/allocation.go
+++ b/pkg/costmodel/allocation.go
@@ -15,6 +15,7 @@ import (
 
				 	"github.com/kubecost/cost-model/pkg/log"
			
 
				 	"github.com/kubecost/cost-model/pkg/prom"
			
 
				 	"k8s.io/apimachinery/pkg/labels"
			
 
				+	"k8s.io/klog"
			
 
				 )
			
 
				 
			
 
				 const (
			
@@ -68,6 +69,11 @@ const (
 
				 	queryFmtLBActiveMins          = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]%s`
			
 
				 )
			
 
				 
			
 
				+// This is a bit of a hack to work around garbage data from cadvisor
			
 
				+// The maximum CPU count on AWS is 96. Standard_M192idms_v2 on azure allows 192. GCP offers 224 in n2ds. Many improvements here are possible to get lower bounnds, but 96 seems reasonable now.
			
 
				+// Ideally you cap each pod to the max CPU on its node, but that involves a bit more complexity, as it it would need to be done when allocations joins with asset data.
			
 
				+const MAX_CPU_CAP = 96
			
 
				+
			
 
				 // CanCompute should return true if CostModel can act as a valid source for the
			
 
				 // given time range. In the case of CostModel we want to attempt to compute as
			
 
				 // long as the range starts in the past. If the CostModel ends up not having
			
@@ -647,6 +653,10 @@ func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom
 
				 		}
			
 
				 
			
 
				 		cpuCores := res.Values[0].Value
			
 
				+		if cpuCores > MAX_CPU_CAP {
			
 
				+			klog.Infof("[WARNING] Very large cpu allocation, clamping to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
			
 
				+			cpuCores = 0.0
			
 
				+		}
			
 
				 		hours := pod.Allocations[container].Minutes() / 60.0
			
 
				 		pod.Allocations[container].CPUCoreHours = cpuCores * hours
			
 
				 
			
@@ -689,6 +699,10 @@ func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom
 
				 		if pod.Allocations[container].CPUCores() < res.Values[0].Value {
			
 
				 			pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
			
 
				 		}
			
 
				+		if pod.Allocations[container].CPUCores() > MAX_CPU_CAP {
			
 
				+			klog.Infof("[WARNING] Very large cpu allocation, clamping! to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
			
 
				+			pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
			
 
				+		}
			
 
				 
			
 
				 		node, err := res.GetString("node")
			
 
				 		if err != nil {
			
@@ -725,6 +739,10 @@ func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.Que
 
				 		}
			
 
				 
			
 
				 		pod.Allocations[container].CPUCoreUsageAverage = res.Values[0].Value
			
 
				+		if res.Values[0].Value > MAX_CPU_CAP {
			
 
				+			klog.Infof("[WARNING] Very large cpu USAGE, dropping outlier")
			
 
				+			pod.Allocations[container].CPUCoreUsageAverage = 0.0
			
 
				+		}
			
 
				 	}
			
 
				 }