6 лет назад · af70aeca63
--- a/costmodel/aggregations.go
+++ b/costmodel/aggregations.go
@@ -50,6 +50,24 @@ type Aggregation struct {
 
				 	TotalCost            float64   `json:"totalCost"`
			
 
				 }
			
 
				 
			
 
				+// VectorJoinOp is an operation func that accepts a result vector pointer
			
 
				+// for a specific timestamp and two float64 pointers representing the
			
 
				+// input vectors for that timestamp. x or y inputs can be nil, but not
			
 
				+// both. The op should use x and y values to set the Value on the result
			
 
				+// ptr. If a result could not be generated, the op should return false,
			
 
				+// which will omit the vector for the specific timestamp. Otherwise,
			
 
				+// return true denoting a successful op.
			
 
				+type VectorJoinOp func(result *Vector, x *float64, y *float64) bool
			
 
				+
			
 
				+// returns a nil ptr or valid float ptr based on the ok bool
			
 
				+func VectorValue(v float64, ok bool) *float64 {
			
 
				+	if !ok {
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	return &v
			
 
				+}
			
 
				+
			
 
				 func (a *Aggregation) GetDataCount() int {
			
 
				 	length := 0
			
 
				 
			
@@ -174,6 +192,24 @@ type AggregationOptions struct {
 
				 	SharedResourceInfo *SharedResourceInfo
			
 
				 }
			
 
				 
			
 
				+// Helper method to test request/usgae values against allocation averages for efficiency scores. Generate a warning log if
			
 
				+// clamp is required
			
 
				+func clampAverage(requestsAvg float64, usedAverage float64, allocationAvg float64, resource string) (float64, float64) {
			
 
				+	rAvg := requestsAvg
			
 
				+	if rAvg > allocationAvg {
			
 
				+		klog.V(3).Infof("Warning: Average %s Requested (%f) > Average %s Allocated (%f). Clamping.", resource, rAvg, resource, allocationAvg)
			
 
				+		rAvg = allocationAvg
			
 
				+	}
			
 
				+
			
 
				+	uAvg := usedAverage
			
 
				+	if uAvg > allocationAvg {
			
 
				+		klog.V(3).Infof("Warning: Average %s Used (%f) > Average %s Allocated (%f). Clamping.", resource, uAvg, resource, allocationAvg)
			
 
				+		uAvg = allocationAvg
			
 
				+	}
			
 
				+
			
 
				+	return rAvg, uAvg
			
 
				+}
			
 
				+
			
 
				 // AggregateCostData aggregates raw cost data by field; e.g. namespace, cluster, service, or label. In the case of label, callers
			
 
				 // must pass a slice of subfields indicating the labels by which to group. Provider is used to define custom resource pricing.
			
 
				 // See AggregationOptions for optional parameters.
			
@@ -244,7 +280,7 @@ func AggregateCostData(costData map[string]*CostData, field string, subfields []
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	for _, agg := range aggregations {
			
 
				+	for key, agg := range aggregations {
			
 
				 		agg.CPUCost = totalVectors(agg.CPUCostVector)
			
 
				 		agg.RAMCost = totalVectors(agg.RAMCostVector)
			
 
				 		agg.GPUCost = totalVectors(agg.GPUCostVector)
			
@@ -267,6 +303,13 @@ func AggregateCostData(costData map[string]*CostData, field string, subfields []
 
				 
			
 
				 		agg.TotalCost = agg.CPUCost + agg.RAMCost + agg.GPUCost + agg.PVCost + agg.NetworkCost + agg.SharedCost
			
 
				 
			
 
				+		// Evicted and Completed Pods can still show up here, but have 0 cost.
			
 
				+		// Filter these by default. Any reason to keep them?
			
 
				+		if agg.TotalCost == 0 {
			
 
				+			delete(aggregations, key)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				 		agg.CPUAllocationAverage = averageVectors(agg.CPUAllocationVectors)
			
 
				 		agg.GPUAllocationAverage = averageVectors(agg.GPUAllocationVectors)
			
 
				 		agg.RAMAllocationAverage = averageVectors(agg.RAMAllocationVectors)
			
@@ -288,6 +331,10 @@ func AggregateCostData(costData map[string]*CostData, field string, subfields []
 
				 			if agg.CPUAllocationAverage > 0.0 {
			
 
				 				avgCPURequested := averageVectors(agg.CPURequestedVectors)
			
 
				 				avgCPUUsed := averageVectors(agg.CPUUsedVectors)
			
 
				+
			
 
				+				// Clamp averages, log range violations
			
 
				+				avgCPURequested, avgCPUUsed = clampAverage(avgCPURequested, avgCPUUsed, agg.CPUAllocationAverage, "CPU")
			
 
				+
			
 
				 				CPUIdle = ((avgCPURequested - avgCPUUsed) / agg.CPUAllocationAverage)
			
 
				 				agg.CPUEfficiency = 1.0 - CPUIdle
			
 
				 			}
			
@@ -297,6 +344,10 @@ func AggregateCostData(costData map[string]*CostData, field string, subfields []
 
				 			if agg.RAMAllocationAverage > 0.0 {
			
 
				 				avgRAMRequested := averageVectors(agg.RAMRequestedVectors)
			
 
				 				avgRAMUsed := averageVectors(agg.RAMUsedVectors)
			
 
				+
			
 
				+				// Clamp averages, log range violations
			
 
				+				avgRAMRequested, avgRAMUsed = clampAverage(avgRAMRequested, avgRAMUsed, agg.RAMAllocationAverage, "RAM")
			
 
				+
			
 
				 				RAMIdle = ((avgRAMRequested - avgRAMUsed) / agg.RAMAllocationAverage)
			
 
				 				agg.RAMEfficiency = 1.0 - RAMIdle
			
 
				 			}
			
@@ -567,6 +618,24 @@ func NormalizeVectorByVector(xvs []*Vector, yvs []*Vector) []*Vector {
 
				 // Matching Vectors are summed, while unmatched Vectors are passed through.
			
 
				 // e.g. [(t=1, 1), (t=2, 2)] + [(t=2, 2), (t=3, 3)] = [(t=1, 1), (t=2, 4), (t=3, 3)]
			
 
				 func addVectors(xvs []*Vector, yvs []*Vector) []*Vector {
			
 
				+	sumOp := func(result *Vector, x *float64, y *float64) bool {
			
 
				+		if x != nil && y != nil {
			
 
				+			result.Value = *x + *y
			
 
				+		} else if y != nil {
			
 
				+			result.Value = *y
			
 
				+		} else if x != nil {
			
 
				+			result.Value = *x
			
 
				+		}
			
 
				+
			
 
				+		return true
			
 
				+	}
			
 
				+
			
 
				+	return ApplyVectorOp(xvs, yvs, sumOp)
			
 
				+}
			
 
				+
			
 
				+// ApplyVectorOp accepts two vectors, synchronizes timestamps, and executes an operation
			
 
				+// on each vector. See VectorJoinOp for details.
			
 
				+func ApplyVectorOp(xvs []*Vector, yvs []*Vector, op VectorJoinOp) []*Vector {
			
 
				 	// round all non-zero timestamps to the nearest 10 second mark
			
 
				 	for _, yv := range yvs {
			
 
				 		if yv.Timestamp != 0 {
			
@@ -589,8 +658,8 @@ func addVectors(xvs []*Vector, yvs []*Vector) []*Vector {
 
				 		return xvs
			
 
				 	}
			
 
				 
			
 
				-	// sum stores the sum of the vector slices xvs and yvs
			
 
				-	var sum []*Vector
			
 
				+	// result contains the final vector slice after joining xvs and yvs
			
 
				+	var result []*Vector
			
 
				 
			
 
				 	// timestamps stores all timestamps present in both vector slices
			
 
				 	// without duplicates
			
@@ -618,21 +687,17 @@ func addVectors(xvs []*Vector, yvs []*Vector) []*Vector {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	// iterate over each timestamp to produce a final summed vector slice
			
 
				+	// iterate over each timestamp to produce a final op vector slice
			
 
				 	sort.Float64s(timestamps)
			
 
				 	for _, t := range timestamps {
			
 
				 		x, okX := xMap[t]
			
 
				 		y, okY := yMap[t]
			
 
				 		sv := &Vector{Timestamp: t}
			
 
				-		if okX && okY {
			
 
				-			sv.Value = x + y
			
 
				-		} else if okX {
			
 
				-			sv.Value = x
			
 
				-		} else if okY {
			
 
				-			sv.Value = y
			
 
				+
			
 
				+		if op(sv, VectorValue(x, okX), VectorValue(y, okY)) {
			
 
				+			result = append(result, sv)
			
 
				 		}
			
 
				-		sum = append(sum, sv)
			
 
				 	}
			
 
				 
			
 
				-	return sum
			
 
				+	return result
			
 
				 }
			
--- a/costmodel/costmodel.go
+++ b/costmodel/costmodel.go
@@ -1365,6 +1365,10 @@ func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset
 
				 
			
 
				 func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider,
			
 
				 	startString, endString, windowString string, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
			
 
				+	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, windowString, "", windowString, "")
			
 
				+	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, windowString, "", windowString, "")
			
 
				+	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, windowString, "", windowString, "")
			
 
				+	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, windowString, "")
			
 
				 	queryRAMAlloc := fmt.Sprintf(queryRAMAllocation, windowString, "", windowString, "")
			
 
				 	queryCPUAlloc := fmt.Sprintf(queryCPUAllocation, windowString, "", windowString, "")
			
 
				 	queryGPURequests := fmt.Sprintf(queryGPURequestsStr, windowString, "", windowString, "")
			
@@ -1402,9 +1406,33 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 	}
			
 
				 
			
 
				 	var wg sync.WaitGroup
			
 
				-	wg.Add(15)
			
 
				+	wg.Add(19)
			
 
				 
			
 
				 	var promErr error
			
 
				+	var resultRAMRequests interface{}
			
 
				+	go func() {
			
 
				+		defer wg.Done()
			
 
				+
			
 
				+		resultRAMRequests, promErr = QueryRange(cli, queryRAMRequests, start, end, window)
			
 
				+	}()
			
 
				+	var resultRAMUsage interface{}
			
 
				+	go func() {
			
 
				+		defer wg.Done()
			
 
				+
			
 
				+		resultRAMUsage, promErr = QueryRange(cli, queryRAMUsage, start, end, window)
			
 
				+	}()
			
 
				+	var resultCPURequests interface{}
			
 
				+	go func() {
			
 
				+		defer wg.Done()
			
 
				+
			
 
				+		resultCPURequests, promErr = QueryRange(cli, queryCPURequests, start, end, window)
			
 
				+	}()
			
 
				+	var resultCPUUsage interface{}
			
 
				+	go func() {
			
 
				+		defer wg.Done()
			
 
				+
			
 
				+		resultCPUUsage, promErr = QueryRange(cli, queryCPUUsage, start, end, window)
			
 
				+	}()
			
 
				 	var resultRAMAllocations interface{}
			
 
				 	go func() {
			
 
				 		defer wg.Done()
			
@@ -1606,6 +1634,36 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 	containers := make(map[string]bool)
			
 
				 	otherClusterPVRecorded := make(map[string]bool)
			
 
				 
			
 
				+	RAMReqMap, err := GetContainerMetricVectors(resultRAMRequests, true, normalizationValue, clusterID)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	for key := range RAMReqMap {
			
 
				+		containers[key] = true
			
 
				+	}
			
 
				+	RAMUsedMap, err := GetContainerMetricVectors(resultRAMUsage, true, normalizationValue, clusterID)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	for key := range RAMUsedMap {
			
 
				+		containers[key] = true
			
 
				+	}
			
 
				+
			
 
				+	CPUReqMap, err := GetContainerMetricVectors(resultCPURequests, true, normalizationValue, clusterID)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	for key := range CPUReqMap {
			
 
				+		containers[key] = true
			
 
				+	}
			
 
				+	CPUUsedMap, err := GetContainerMetricVectors(resultCPUUsage, false, normalizationValue, clusterID) // No need to normalize here, as this comes from a counter
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	for key := range CPUUsedMap {
			
 
				+		containers[key] = true
			
 
				+	}
			
 
				+
			
 
				 	RAMAllocMap, err := GetContainerMetricVectors(resultRAMAllocations, true, normalizationValue, clusterID)
			
 
				 	if err != nil {
			
 
				 		return nil, err
			
@@ -1628,6 +1686,12 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 		containers[key] = true
			
 
				 	}
			
 
				 
			
 
				+	// Request metrics can show up after pod eviction and completion.
			
 
				+	// This method synchronizes requests to allocations such that when
			
 
				+	// allocation is 0, so are requests
			
 
				+	applyAllocationToRequests(RAMAllocMap, RAMReqMap)
			
 
				+	applyAllocationToRequests(CPUAllocMap, CPUReqMap)
			
 
				+
			
 
				 	currentContainers := make(map[string]v1.Pod)
			
 
				 	for _, pod := range podlist {
			
 
				 		if pod.Status.Phase != v1.PodRunning {
			
@@ -1713,6 +1777,26 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 				containerName := container.Name
			
 
				 
			
 
				 				newKey := newContainerMetricFromValues(ns, podName, containerName, pod.Spec.NodeName, clusterID).Key()
			
 
				+				RAMReqV, ok := RAMReqMap[newKey]
			
 
				+				if !ok {
			
 
				+					klog.V(4).Info("no RAM requests for " + newKey)
			
 
				+					RAMReqV = []*Vector{}
			
 
				+				}
			
 
				+				RAMUsedV, ok := RAMUsedMap[newKey]
			
 
				+				if !ok {
			
 
				+					klog.V(4).Info("no RAM usage for " + newKey)
			
 
				+					RAMUsedV = []*Vector{}
			
 
				+				}
			
 
				+				CPUReqV, ok := CPUReqMap[newKey]
			
 
				+				if !ok {
			
 
				+					klog.V(4).Info("no CPU requests for " + newKey)
			
 
				+					CPUReqV = []*Vector{}
			
 
				+				}
			
 
				+				CPUUsedV, ok := CPUUsedMap[newKey]
			
 
				+				if !ok {
			
 
				+					klog.V(4).Info("no CPU usage for " + newKey)
			
 
				+					CPUUsedV = []*Vector{}
			
 
				+				}
			
 
				 				RAMAllocsV, ok := RAMAllocMap[newKey]
			
 
				 				if !ok {
			
 
				 					klog.V(4).Info("no RAM allocation for " + newKey)
			
@@ -1747,6 +1831,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 					Jobs:            getJobsOfPod(pod),
			
 
				 					Statefulsets:    getStatefulSetsOfPod(pod),
			
 
				 					NodeData:        nodeData,
			
 
				+					RAMReq:          RAMReqV,
			
 
				+					RAMUsed:         RAMUsedV,
			
 
				+					CPUReq:          CPUReqV,
			
 
				+					CPUUsed:         CPUUsedV,
			
 
				 					RAMAllocation:   RAMAllocsV,
			
 
				 					CPUAllocation:   CPUAllocsV,
			
 
				 					GPUReq:          GPUReqV,
			
@@ -1767,7 +1855,26 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 			// Not all information is sent to prometheus via ksm, so fill out what we can without k8s api
			
 
				 			klog.V(4).Info("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
			
 
				 			c, _ := NewContainerMetricFromKey(key)
			
 
				-
			
 
				+			RAMReqV, ok := RAMReqMap[key]
			
 
				+			if !ok {
			
 
				+				klog.V(4).Info("no RAM requests for " + key)
			
 
				+				RAMReqV = []*Vector{}
			
 
				+			}
			
 
				+			RAMUsedV, ok := RAMUsedMap[key]
			
 
				+			if !ok {
			
 
				+				klog.V(4).Info("no RAM usage for " + key)
			
 
				+				RAMUsedV = []*Vector{}
			
 
				+			}
			
 
				+			CPUReqV, ok := CPUReqMap[key]
			
 
				+			if !ok {
			
 
				+				klog.V(4).Info("no CPU requests for " + key)
			
 
				+				CPUReqV = []*Vector{}
			
 
				+			}
			
 
				+			CPUUsedV, ok := CPUUsedMap[key]
			
 
				+			if !ok {
			
 
				+				klog.V(4).Info("no CPU usage for " + key)
			
 
				+				CPUUsedV = []*Vector{}
			
 
				+			}
			
 
				 			RAMAllocsV, ok := RAMAllocMap[key]
			
 
				 			if !ok {
			
 
				 				klog.V(4).Info("no RAM allocation for " + key)
			
@@ -1870,6 +1977,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 				Namespace:       c.Namespace,
			
 
				 				Services:        podServices,
			
 
				 				Deployments:     podDeployments,
			
 
				+				RAMReq:          RAMReqV,
			
 
				+				RAMUsed:         RAMUsedV,
			
 
				+				CPUReq:          CPUReqV,
			
 
				+				CPUUsed:         CPUUsedV,
			
 
				 				RAMAllocation:   RAMAllocsV,
			
 
				 				CPUAllocation:   CPUAllocsV,
			
 
				 				GPUReq:          GPUReqV,
			
@@ -1900,6 +2011,39 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
				 	return containerNameCost, err
			
 
				 }
			
 
				 
			
 
				+func applyAllocationToRequests(allocationMap map[string][]*Vector, requestMap map[string][]*Vector) {
			
 
				+	// The result of the normalize operation will be a new []*Vector to replace the requests
			
 
				+	normalizeOp := func(r *Vector, x *float64, y *float64) bool {
			
 
				+		// Omit data (return false) if both x and y inputs don't exist
			
 
				+		if x == nil || y == nil {
			
 
				+			return false
			
 
				+		}
			
 
				+
			
 
				+		// If the allocation value is 0, 0 out request value
			
 
				+		if *x == 0 {
			
 
				+			r.Value = 0
			
 
				+		} else {
			
 
				+			r.Value = *y
			
 
				+		}
			
 
				+
			
 
				+		return true
			
 
				+	}
			
 
				+
			
 
				+	// Run normalization on all request vectors in the mapping
			
 
				+	for k, requests := range requestMap {
			
 
				+
			
 
				+		// Only run normalization where there are valid allocations
			
 
				+		allocations, ok := allocationMap[k]
			
 
				+		if !ok {
			
 
				+			delete(requestMap, k)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		// Replace request map with normalized
			
 
				+		requestMap[k] = ApplyVectorOp(allocations, requests, normalizeOp)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 func addMetricPVData(pvAllocationMap map[string][]*PersistentVolumeClaimData, pvCostMap map[string]*costAnalyzerCloud.PV, cp costAnalyzerCloud.Provider) {
			
 
				 	cfg, err := cp.GetConfig()
			
 
				 	if err != nil {