3 tygodni temu · a7a9dd6b7f
--- a/pkg/costmodel/allocation_helpers.go
+++ b/pkg/costmodel/allocation_helpers.go
@@ -416,12 +416,23 @@ func applyCPUCoresUsedMax(podMap map[podKey]*pod, resCPUCoresUsedMax []*source.C
 
				 				thisPod.appendContainer(container)
			
 
				 			}
			
 
				 
			
 
				+			// Take the max across all result rows for the same
			
 
				+			// (cluster, namespace, pod, container). The upstream query
			
 
				+			// groups by additional labels (uid, instance, ...) and can
			
 
				+			// return multiple rows for the same pod+container when, for
			
 
				+			// example, a pod restarted mid-window (new uid) or was
			
 
				+			// scraped from more than one instance. Prior to this, the
			
 
				+			// last row iterated overwrote any earlier (possibly larger)
			
 
				+			// value, which produced arbitrarily low maxima.
			
 
				 			if thisPod.Allocations[container].RawAllocationOnly == nil {
			
 
				 				thisPod.Allocations[container].RawAllocationOnly = &opencost.RawAllocationOnlyData{
			
 
				 					CPUCoreUsageMax: res.Data[0].Value,
			
 
				 				}
			
 
				 			} else {
			
 
				-				thisPod.Allocations[container].RawAllocationOnly.CPUCoreUsageMax = res.Data[0].Value
			
 
				+				thisPod.Allocations[container].RawAllocationOnly.CPUCoreUsageMax = math.Max(
			
 
				+					thisPod.Allocations[container].RawAllocationOnly.CPUCoreUsageMax,
			
 
				+					res.Data[0].Value,
			
 
				+				)
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -665,12 +676,23 @@ func applyRAMBytesUsedMax(podMap map[podKey]*pod, resRAMBytesUsedMax []*source.R
 
				 				thisPod.appendContainer(container)
			
 
				 			}
			
 
				 
			
 
				+			// Take the max across all result rows for the same
			
 
				+			// (cluster, namespace, pod, container). The upstream query
			
 
				+			// groups by additional labels (uid, instance, ...) and can
			
 
				+			// return multiple rows for the same pod+container when, for
			
 
				+			// example, a pod restarted mid-window (new uid) or was
			
 
				+			// scraped from more than one instance. Prior to this, the
			
 
				+			// last row iterated overwrote any earlier (possibly larger)
			
 
				+			// value, which produced arbitrarily low maxima.
			
 
				 			if thisPod.Allocations[container].RawAllocationOnly == nil {
			
 
				 				thisPod.Allocations[container].RawAllocationOnly = &opencost.RawAllocationOnlyData{
			
 
				 					RAMBytesUsageMax: res.Data[0].Value,
			
 
				 				}
			
 
				 			} else {
			
 
				-				thisPod.Allocations[container].RawAllocationOnly.RAMBytesUsageMax = res.Data[0].Value
			
 
				+				thisPod.Allocations[container].RawAllocationOnly.RAMBytesUsageMax = math.Max(
			
 
				+					thisPod.Allocations[container].RawAllocationOnly.RAMBytesUsageMax,
			
 
				+					res.Data[0].Value,
			
 
				+				)
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
@@ -757,12 +779,23 @@ func applyGPUUsageMax(podMap map[podKey]*pod, resGPUUsageMax []*source.GPUsUsage
 
				 				thisPod.appendContainer(container)
			
 
				 			}
			
 
				 
			
 
				+			// Take the max across all result rows for the same
			
 
				+			// (cluster, namespace, pod, container). The upstream query
			
 
				+			// groups by additional labels (uid, device, ...) and can
			
 
				+			// return multiple rows for the same pod+container (for
			
 
				+			// example a pod that restarted mid-window gets a new uid).
			
 
				+			// Prior to this, the last row iterated overwrote any
			
 
				+			// earlier (possibly larger) value.
			
 
				+			v := res.Data[0].Value
			
 
				 			if thisPod.Allocations[container].RawAllocationOnly == nil {
			
 
				 				thisPod.Allocations[container].RawAllocationOnly = &opencost.RawAllocationOnlyData{
			
 
				-					GPUUsageMax: &res.Data[0].Value,
			
 
				+					GPUUsageMax: &v,
			
 
				 				}
			
 
				 			} else {
			
 
				-				thisPod.Allocations[container].RawAllocationOnly.GPUUsageMax = &res.Data[0].Value
			
 
				+				existing := thisPod.Allocations[container].RawAllocationOnly.GPUUsageMax
			
 
				+				if existing == nil || v > *existing {
			
 
				+					thisPod.Allocations[container].RawAllocationOnly.GPUUsageMax = &v
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 	}
			
--- a/pkg/costmodel/allocation_helpers_test.go
+++ b/pkg/costmodel/allocation_helpers_test.go
@@ -622,3 +622,204 @@ func TestCalculateStartAndEnd(t *testing.T) {
 
				 		})
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+// makePodMapWithEmptyAllocations builds a pod map with the given podKey present
			
 
				+// and an empty Allocations map, which mirrors what buildPodMap produces before
			
 
				+// container-scoped apply functions run.
			
 
				+func makePodMapWithEmptyAllocations(pk podKey) map[podKey]*pod {
			
 
				+	return map[podKey]*pod{
			
 
				+		pk: {
			
 
				+			Window:      window.Clone(),
			
 
				+			Start:       windowStart,
			
 
				+			End:         windowEnd,
			
 
				+			Key:         pk,
			
 
				+			Allocations: map[string]*opencost.Allocation{},
			
 
				+		},
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// ramUsageMaxResult builds a RAMUsageMaxResult for a single container with the
			
 
				+// given pod-level identity and sample value.
			
 
				+func ramUsageMaxResult(pk podKey, container string, value float64) *source.RAMUsageMaxResult {
			
 
				+	return &source.RAMUsageMaxResult{
			
 
				+		Cluster:   pk.Cluster,
			
 
				+		Namespace: pk.Namespace,
			
 
				+		Pod:       pk.Pod,
			
 
				+		Container: container,
			
 
				+		Data: []*util.Vector{
			
 
				+			{Value: value},
			
 
				+		},
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// cpuUsageMaxResult builds a CPUUsageMaxResult for a single container with the
			
 
				+// given pod-level identity and sample value.
			
 
				+func cpuUsageMaxResult(pk podKey, container string, value float64) *source.CPUUsageMaxResult {
			
 
				+	return &source.CPUUsageMaxResult{
			
 
				+		Cluster:   pk.Cluster,
			
 
				+		Namespace: pk.Namespace,
			
 
				+		Pod:       pk.Pod,
			
 
				+		Container: container,
			
 
				+		Data: []*util.Vector{
			
 
				+			{Value: value},
			
 
				+		},
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// gpuUsageMaxResult builds a GPUsUsageMaxResult for a single container with
			
 
				+// the given pod-level identity and sample value.
			
 
				+func gpuUsageMaxResult(pk podKey, container string, value float64) *source.GPUsUsageMaxResult {
			
 
				+	return &source.GPUsUsageMaxResult{
			
 
				+		Cluster:   pk.Cluster,
			
 
				+		Namespace: pk.Namespace,
			
 
				+		Pod:       pk.Pod,
			
 
				+		Container: container,
			
 
				+		Data: []*util.Vector{
			
 
				+			{Value: value},
			
 
				+		},
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// TestApplyRAMBytesUsedMax_KeepsLargestAcrossDuplicateRows regression-tests
			
 
				+// the case where the Prometheus RAM max query returns multiple rows for the
			
 
				+// same (cluster, namespace, pod, container) combination (for example when a
			
 
				+// pod restarted mid-window and kube-state-metrics emitted a new uid row, or
			
 
				+// when the pod was scraped from more than one instance). The previous
			
 
				+// implementation overwrote the stored max with whichever row happened to be
			
 
				+// iterated last, producing an arbitrary (and typically tiny) maximum. This
			
 
				+// test asserts that applyRAMBytesUsedMax now takes the max across rows.
			
 
				+func TestApplyRAMBytesUsedMax_KeepsLargestAcrossDuplicateRows(t *testing.T) {
			
 
				+	const container = "c1"
			
 
				+	const small = 442368.0
			
 
				+	const large = 65101824.0
			
 
				+
			
 
				+	// The order of duplicate-row results must not matter; verify both orderings.
			
 
				+	testCases := map[string]struct {
			
 
				+		results []*source.RAMUsageMaxResult
			
 
				+	}{
			
 
				+		"large first, small second": {
			
 
				+			results: []*source.RAMUsageMaxResult{
			
 
				+				ramUsageMaxResult(podKey1, container, large),
			
 
				+				ramUsageMaxResult(podKey1, container, small),
			
 
				+			},
			
 
				+		},
			
 
				+		"small first, large second": {
			
 
				+			results: []*source.RAMUsageMaxResult{
			
 
				+				ramUsageMaxResult(podKey1, container, small),
			
 
				+				ramUsageMaxResult(podKey1, container, large),
			
 
				+			},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for name, tc := range testCases {
			
 
				+		t.Run(name, func(t *testing.T) {
			
 
				+			podMap := makePodMapWithEmptyAllocations(podKey1)
			
 
				+
			
 
				+			applyRAMBytesUsedMax(podMap, tc.results, map[podKey][]podKey{})
			
 
				+
			
 
				+			alloc, ok := podMap[podKey1].Allocations[container]
			
 
				+			if !ok {
			
 
				+				t.Fatalf("container allocation %q was not created", container)
			
 
				+			}
			
 
				+			if alloc.RawAllocationOnly == nil {
			
 
				+				t.Fatalf("RawAllocationOnly was not populated for container %q", container)
			
 
				+			}
			
 
				+			if got := alloc.RawAllocationOnly.RAMBytesUsageMax; got != large {
			
 
				+				t.Errorf("RAMBytesUsageMax = %v; want %v (max across duplicate rows)", got, large)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// TestApplyCPUCoresUsedMax_KeepsLargestAcrossDuplicateRows is the CPU analogue
			
 
				+// of TestApplyRAMBytesUsedMax_KeepsLargestAcrossDuplicateRows.
			
 
				+func TestApplyCPUCoresUsedMax_KeepsLargestAcrossDuplicateRows(t *testing.T) {
			
 
				+	const container = "c1"
			
 
				+	const small = 0.01
			
 
				+	const large = 1.75
			
 
				+
			
 
				+	testCases := map[string]struct {
			
 
				+		results []*source.CPUUsageMaxResult
			
 
				+	}{
			
 
				+		"large first, small second": {
			
 
				+			results: []*source.CPUUsageMaxResult{
			
 
				+				cpuUsageMaxResult(podKey1, container, large),
			
 
				+				cpuUsageMaxResult(podKey1, container, small),
			
 
				+			},
			
 
				+		},
			
 
				+		"small first, large second": {
			
 
				+			results: []*source.CPUUsageMaxResult{
			
 
				+				cpuUsageMaxResult(podKey1, container, small),
			
 
				+				cpuUsageMaxResult(podKey1, container, large),
			
 
				+			},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for name, tc := range testCases {
			
 
				+		t.Run(name, func(t *testing.T) {
			
 
				+			podMap := makePodMapWithEmptyAllocations(podKey1)
			
 
				+
			
 
				+			applyCPUCoresUsedMax(podMap, tc.results, map[podKey][]podKey{})
			
 
				+
			
 
				+			alloc, ok := podMap[podKey1].Allocations[container]
			
 
				+			if !ok {
			
 
				+				t.Fatalf("container allocation %q was not created", container)
			
 
				+			}
			
 
				+			if alloc.RawAllocationOnly == nil {
			
 
				+				t.Fatalf("RawAllocationOnly was not populated for container %q", container)
			
 
				+			}
			
 
				+			if got := alloc.RawAllocationOnly.CPUCoreUsageMax; got != large {
			
 
				+				t.Errorf("CPUCoreUsageMax = %v; want %v (max across duplicate rows)", got, large)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// TestApplyGPUUsageMax_KeepsLargestAcrossDuplicateRows is the GPU analogue.
			
 
				+// It additionally asserts the behavior for the pointer-valued GPUUsageMax
			
 
				+// field, covering both the fresh-create and update paths.
			
 
				+func TestApplyGPUUsageMax_KeepsLargestAcrossDuplicateRows(t *testing.T) {
			
 
				+	const container = "c1"
			
 
				+	const small = 0.02
			
 
				+	const large = 0.97
			
 
				+
			
 
				+	testCases := map[string]struct {
			
 
				+		results []*source.GPUsUsageMaxResult
			
 
				+	}{
			
 
				+		"large first, small second": {
			
 
				+			results: []*source.GPUsUsageMaxResult{
			
 
				+				gpuUsageMaxResult(podKey1, container, large),
			
 
				+				gpuUsageMaxResult(podKey1, container, small),
			
 
				+			},
			
 
				+		},
			
 
				+		"small first, large second": {
			
 
				+			results: []*source.GPUsUsageMaxResult{
			
 
				+				gpuUsageMaxResult(podKey1, container, small),
			
 
				+				gpuUsageMaxResult(podKey1, container, large),
			
 
				+			},
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for name, tc := range testCases {
			
 
				+		t.Run(name, func(t *testing.T) {
			
 
				+			podMap := makePodMapWithEmptyAllocations(podKey1)
			
 
				+
			
 
				+			applyGPUUsageMax(podMap, tc.results, map[podKey][]podKey{})
			
 
				+
			
 
				+			alloc, ok := podMap[podKey1].Allocations[container]
			
 
				+			if !ok {
			
 
				+				t.Fatalf("container allocation %q was not created", container)
			
 
				+			}
			
 
				+			if alloc.RawAllocationOnly == nil {
			
 
				+				t.Fatalf("RawAllocationOnly was not populated for container %q", container)
			
 
				+			}
			
 
				+			ptr := alloc.RawAllocationOnly.GPUUsageMax
			
 
				+			if ptr == nil {
			
 
				+				t.Fatalf("GPUUsageMax was nil; expected %v", large)
			
 
				+			}
			
 
				+			if *ptr != large {
			
 
				+				t.Errorf("GPUUsageMax = %v; want %v (max across duplicate rows)", *ptr, large)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}