Преглед изворни кода

fix: remove stale node metrics when label key changes to prevent cost inflation

When a node's Kubernetes labels are temporarily unavailable (e.g., during a
K8s API outage), InstanceType can become empty, causing a new Prometheus time
series to be emitted with instance_type="". The previous series (with the
correct instance_type value) persists in Prometheus's historical query window.

Since cost aggregation queries group by instance_type, both series are returned
and summed, resulting in double-counted (and sometimes wildly inflated) costs.
This was reported in issue #3620 where users saw 100x cost inflation after
upgrading, with duplicate time series — one with instance_type populated (normal
~$0.04/hr) and one without (showing $126-254+).

The fix introduces nodeCurrentLabelKey, a map that tracks the most recently
emitted label key per node. When the label key changes between metric
emission ticks, the old Prometheus time series are explicitly deleted before
the new ones are emitted, ensuring only one series per node exists at any time.

Fixes #3620
Signed-off-by: Claude <noreply@anthropic.com>
Claude пре 1 месец
родитељ
комит
54152b1451
1 измењених фајлова са 26 додато и 0 уклоњено
  1. 26 0
      pkg/costmodel/metrics.go

+ 26 - 0
pkg/costmodel/metrics.go

@@ -430,6 +430,13 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 		pvSeen := make(map[string]bool)
 		pvcSeen := make(map[string]bool)
 		nodeCostAverages := make(map[string]NodeCostAverages)
+		// nodeCurrentLabelKey tracks the most recently emitted label key for each
+		// node. When a node's labels change (e.g., instance_type becomes empty due
+		// to a temporary K8s API outage and then recovers), the old Prometheus time
+		// series must be explicitly deleted before emitting the new one. Without
+		// this, both series coexist within Prometheus's query window and cause
+		// cost inflation through double-counting in aggregation queries.
+		nodeCurrentLabelKey := make(map[string]string)
 
 		getKeyFromLabelStrings := func(labels ...string) string {
 			return strings.Join(labels, ",")
@@ -569,6 +576,25 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 
 				labelKey := getKeyFromLabelStrings(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID, node.ArchType, nodeUID)
 
+				// If the label key changed for this node (e.g., instance_type
+				// temporarily became empty due to a K8s API outage), delete the old
+				// metric series immediately. This prevents two time series for the
+				// same node from coexisting within Prometheus's query window, which
+				// would cause cost double-counting and inflated allocation results.
+				if prevLabelKey, exists := nodeCurrentLabelKey[nodeName]; exists && prevLabelKey != labelKey {
+					log.Infof("Node %s label set changed (previous: %q, current: %q); removing stale metrics to prevent duplicate time series", nodeName, prevLabelKey, labelKey)
+					prevLabels := getLabelStringsFromKey(prevLabelKey)
+					cmme.NodeTotalPriceRecorder.DeleteLabelValues(prevLabels...)
+					cmme.NodeSpotRecorder.DeleteLabelValues(prevLabels...)
+					cmme.CPUPriceRecorder.DeleteLabelValues(prevLabels...)
+					cmme.RAMPriceRecorder.DeleteLabelValues(prevLabels...)
+					cmme.GPUPriceRecorder.DeleteLabelValues(prevLabels...)
+					cmme.GPUCountRecorder.DeleteLabelValues(prevLabels...)
+					delete(nodeSeen, prevLabelKey)
+					delete(nodeCostAverages, prevLabelKey)
+				}
+				nodeCurrentLabelKey[nodeName] = labelKey
+
 				avgCosts, ok := nodeCostAverages[labelKey]
 
 				// initialize average cost tracking for this node if there is none