Переглянути джерело

BURNDOWN-209 fix Inf and negative values in pod-to-PVC attribution

Signed-off-by: Niko Kovacevic <nikovacevic@gmail.com>
Niko Kovacevic 2 роки тому
батько
коміт
ff8ed918ea

+ 1 - 1
pkg/costmodel/allocation.go

@@ -665,7 +665,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	// split appropriately among each pod's container allocation.
 	podPVCMap := map[podKey][]*pvc{}
 	buildPodPVCMap(podPVCMap, pvMap, pvcMap, podMap, resPodPVCAllocation, podUIDKeyMap, ingestPodUID)
-	applyPVCsToPods(window, podMap, podPVCMap, pvcMap, resolution)
+	applyPVCsToPods(window, podMap, podPVCMap, pvcMap)
 
 	// Identify PVCs without pods and add pv costs to the unmounted Allocation for the pvc's cluster
 	applyUnmountedPVCs(window, podMap, pvcMap)

+ 13 - 4
pkg/costmodel/allocation_helpers.go

@@ -1961,7 +1961,7 @@ func buildPodPVCMap(podPVCMap map[podKey][]*pvc, pvMap map[pvKey]*pv, pvcMap map
 	}
 }
 
-func applyPVCsToPods(window kubecost.Window, podMap map[podKey]*pod, podPVCMap map[podKey][]*pvc, pvcMap map[pvcKey]*pvc, resolution time.Duration) {
+func applyPVCsToPods(window kubecost.Window, podMap map[podKey]*pod, podPVCMap map[podKey][]*pvc, pvcMap map[pvcKey]*pvc) {
 	// Because PVCs can be shared among pods, the respective pv cost
 	// needs to be evenly distributed to those pods based on time
 	// running, as well as the amount of time the pvc was shared.
@@ -2006,12 +2006,20 @@ func applyPVCsToPods(window kubecost.Window, podMap map[podKey]*pod, podPVCMap m
 
 		pvc, ok := pvcMap[thisPVCKey]
 		if !ok {
-			log.DedupedWarningf(5, "Missing pvc with key %s", thisPVCKey)
+			log.Warnf("Allocation: Compute: applyPVCsToPods: missing pvc with key %s", thisPVCKey)
+			continue
+		}
+		if pvc == nil {
+			log.Warnf("Allocation: Compute: applyPVCsToPods: nil pvc with key %s", thisPVCKey)
 			continue
 		}
 
 		// Determine coefficients for each pvc-pod relation.
-		sharedPVCCostCoefficients := getPVCCostCoefficients(intervals, pvc, resolution)
+		sharedPVCCostCoefficients, err := getPVCCostCoefficients(intervals, pvc)
+		if err != nil {
+			log.Warnf("Allocation: Compute: applyPVCsToPods: getPVCCostCoefficients: %s", err)
+			continue
+		}
 
 		// Distribute pvc costs to Allocations
 		for thisPodKey, coeffComponents := range sharedPVCCostCoefficients {
@@ -2043,8 +2051,9 @@ func applyPVCsToPods(window kubecost.Window, podMap map[podKey]*pod, podPVCMap m
 					Cluster: pvc.Volume.Cluster,
 					Name:    pvc.Volume.Name,
 				}
+
 				// Both Cost and byteHours should be multiplied by the coef and divided by count
-				// so that you if all allocations with a given pv key are summed the result of those
+				// so that if all allocations with a given pv key are summed the result of those
 				// would be equal to the values of the original pv
 				count := float64(len(pod.Allocations))
 				alloc.PVs[pvKey] = &kubecost.PVAllocation{

+ 18 - 8
pkg/costmodel/intervals.go

@@ -1,6 +1,7 @@
 package costmodel
 
 import (
+	"fmt"
 	"sort"
 	"time"
 
@@ -79,40 +80,49 @@ func getIntervalPointsFromWindows(windows map[podKey]kubecost.Window) IntervalPo
 // getPVCCostCoefficients gets a coefficient which represents the scale
 // factor that each PVC in a pvcIntervalMap and corresponding slice of
 // IntervalPoints intervals uses to calculate a cost for that PVC's PV.
-func getPVCCostCoefficients(intervals IntervalPoints, thisPVC *pvc, resolution time.Duration) map[podKey][]CoefficientComponent {
+func getPVCCostCoefficients(intervals IntervalPoints, thisPVC *pvc) (map[podKey][]CoefficientComponent, error) {
 	// pvcCostCoefficientMap has a format such that the individual coefficient
 	// components are preserved for testing purposes.
 	pvcCostCoefficientMap := make(map[podKey][]CoefficientComponent)
 
-	// Reset the start time by the offset as well. so that offset is not used in coefficient calculation!
-	startTime := thisPVC.Start.Add(resolution)
-	pvcWindow := kubecost.NewWindow(&startTime, &thisPVC.End)
+	pvcWindow := kubecost.NewWindow(&thisPVC.Start, &thisPVC.End)
+	pvcWindowDurationMinutes := pvcWindow.Duration().Minutes()
+	if pvcWindowDurationMinutes <= 0.0 {
+		// Protect against Inf and NaN issues that would be caused by dividing
+		// by zero later on.
+		return nil, fmt.Errorf("detected PVC with window of zero duration: %s/%s/%s", thisPVC.Cluster, thisPVC.Namespace, thisPVC.Name)
+	}
+
 	unmountedKey := getUnmountedPodKey(thisPVC.Cluster)
 
 	var void struct{}
 	activeKeys := map[podKey]struct{}{}
 
-	currentTime := startTime
+	currentTime := thisPVC.Start
+
 	// For each interval i.e. for any time a pod-PVC relation ends or starts...
 	for _, point := range intervals {
 		// If the current point happens at a later time than the previous point
 		if !point.Time.Equal(currentTime) {
+			// If there are active keys, attribute one unit of proportion to
+			// each active key.
 			for key := range activeKeys {
 				pvcCostCoefficientMap[key] = append(
 					pvcCostCoefficientMap[key],
 					CoefficientComponent{
-						Time:       point.Time.Sub(currentTime).Minutes() / pvcWindow.Duration().Minutes(),
+						Time:       point.Time.Sub(currentTime).Minutes() / pvcWindowDurationMinutes,
 						Proportion: 1.0 / float64(len(activeKeys)),
 					},
 				)
 
 			}
+
 			// If there are no active keys attribute all cost to the unmounted pv
 			if len(activeKeys) == 0 {
 				pvcCostCoefficientMap[unmountedKey] = append(
 					pvcCostCoefficientMap[unmountedKey],
 					CoefficientComponent{
-						Time:       point.Time.Sub(currentTime).Minutes() / pvcWindow.Duration().Minutes(),
+						Time:       point.Time.Sub(currentTime).Minutes() / pvcWindowDurationMinutes,
 						Proportion: 1.0,
 					},
 				)
@@ -142,7 +152,7 @@ func getPVCCostCoefficients(intervals IntervalPoints, thisPVC *pvc, resolution t
 			},
 		)
 	}
-	return pvcCostCoefficientMap
+	return pvcCostCoefficientMap, nil
 }
 
 // getCoefficientFromComponents takes the components of a PVC-pod PV cost coefficient

+ 53 - 25
pkg/costmodel/intervals_test.go

@@ -1,6 +1,7 @@
 package costmodel
 
 import (
+	"fmt"
 	"reflect"
 	"testing"
 	"time"
@@ -150,32 +151,39 @@ func TestGetIntervalPointsFromWindows(t *testing.T) {
 }
 
 func TestGetPVCCostCoefficients(t *testing.T) {
+	pod1Key := newPodKey("cluster1", "namespace1", "pod1")
+	pod2Key := newPodKey("cluster1", "namespace1", "pod2")
+	pod3Key := newPodKey("cluster1", "namespace1", "pod3")
+	pod4Key := newPodKey("cluster1", "namespace1", "pod4")
+	ummountedPodKey := newPodKey("cluster1", kubecost.UnmountedSuffix, kubecost.UnmountedSuffix)
+
 	pvc1 := &pvc{
-		Bytes:     0,
+		Bytes:     100 * 1024 * 1024 * 1024,
 		Name:      "pvc1",
 		Cluster:   "cluster1",
 		Namespace: "namespace1",
 		Start:     time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC),
 		End:       time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC),
 	}
-	pod1Key := newPodKey("cluster1", "namespace1", "pod1")
-	pod2Key := newPodKey("cluster1", "namespace1", "pod2")
-	pod3Key := newPodKey("cluster1", "namespace1", "pod3")
-	pod4Key := newPodKey("cluster1", "namespace1", "pod4")
-	ummountedPodKey := newPodKey("cluster1", kubecost.UnmountedSuffix, kubecost.UnmountedSuffix)
 
-	zeroDuration, _ := time.ParseDuration("0m0s")
-	fiveMinOffset, _ := time.ParseDuration("5m")
-	// Reflects the pvc that is offset by duration, the actual case that happens in allocation workflow.
-	// before core-370, the offset was causing a unmounted shared pvc coefficient map for the duration of the offset.
-	pvcWithDurationOffset := &pvc{
-		Bytes:     0,
-		Name:      "pvc1",
+	pvc2 := &pvc{
+		Bytes:     100 * 1024 * 1024 * 1024,
+		Name:      "pvc2",
 		Cluster:   "cluster1",
 		Namespace: "namespace1",
-		Start:     time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC).Add(-fiveMinOffset),
+		Start:     time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC),
 		End:       time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC),
 	}
+
+	pvc3 := &pvc{
+		Bytes:     100 * 1024 * 1024 * 1024,
+		Name:      "pvc3",
+		Cluster:   "cluster1",
+		Namespace: "namespace1",
+		Start:     time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC),
+		End:       time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC),
+	}
+
 	cases := []struct {
 		name           string
 		pvc            *pvc
@@ -183,6 +191,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 		intervals      []IntervalPoint
 		resolution     time.Duration
 		expected       map[podKey][]CoefficientComponent
+		expError       error
 	}{
 		{
 			name: "four pods w/ various overlaps",
@@ -197,7 +206,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod3Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod1Key),
 			},
-			resolution: zeroDuration,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{0.5, 0.25},
@@ -226,7 +235,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 30, 0, 0, time.UTC), "end", pod1Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod2Key),
 			},
-			resolution: zeroDuration,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{1.0, 0.5},
@@ -245,7 +254,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod1Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod2Key),
 			},
-			resolution: zeroDuration,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{0.5, 0.5},
@@ -265,7 +274,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC), "start", pod1Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod1Key),
 			},
-			resolution: zeroDuration,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{1.0, 1.0},
@@ -281,7 +290,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 45, 0, 0, time.UTC), "start", pod2Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod2Key),
 			},
-			resolution: zeroDuration,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{1.0, 0.25},
@@ -301,7 +310,7 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 15, 0, 0, time.UTC), "start", pod1Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 45, 0, 0, time.UTC), "end", pod1Key),
 			},
-			resolution: zeroDuration,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{1.0, 0.5},
@@ -312,17 +321,16 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				},
 			},
 		},
-		// Test case to ensure the offset doesnt cause any unmounted
 		{
-			name: "pvcMap with duration offset not causing any unmounted entry in sharedPV Coefficient",
-			pvc:  pvcWithDurationOffset,
+			name: "back to back pods, full coverage",
+			pvc:  pvc2,
 			intervals: []IntervalPoint{
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC), "start", pod1Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 30, 0, 0, time.UTC), "start", pod2Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 8, 30, 0, 0, time.UTC), "end", pod1Key),
 				NewIntervalPoint(time.Date(2021, 2, 19, 9, 0, 0, 0, time.UTC), "end", pod2Key),
 			},
-			resolution: fiveMinOffset,
+			expError: nil,
 			expected: map[podKey][]CoefficientComponent{
 				pod1Key: {
 					{1.0, 0.5},
@@ -332,11 +340,31 @@ func TestGetPVCCostCoefficients(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "zero duration",
+			pvc:  pvc3,
+			intervals: []IntervalPoint{
+				NewIntervalPoint(time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC), "start", pod1Key),
+				NewIntervalPoint(time.Date(2021, 2, 19, 8, 0, 0, 0, time.UTC), "end", pod1Key),
+			},
+			expError: fmt.Errorf("detected PVC with window of zero duration: %s/%s/%s", "cluster1", "namespace1", "pvc3"),
+			expected: nil,
+		},
 	}
 
 	for _, testCase := range cases {
 		t.Run(testCase.name, func(t *testing.T) {
-			result := getPVCCostCoefficients(testCase.intervals, testCase.pvc, testCase.resolution)
+			result, err := getPVCCostCoefficients(testCase.intervals, testCase.pvc)
+			if err != nil {
+				if testCase.expError == nil {
+					t.Errorf("getPVCCostCoefficients failed: got unexpected error: %v", err)
+				}
+				return
+			}
+
+			if err == nil && testCase.expError != nil {
+				t.Errorf("getPVCCostCoefficients failed: did not get expected error: %v", testCase.expError)
+			}
 
 			if !reflect.DeepEqual(result, testCase.expected) {
 				t.Errorf("getPVCCostCoefficients test failed: %s: Got %+v but expected %+v", testCase.name, result, testCase.expected)