Преглед изворни кода

Merge pull request #681 from kubecost/mmd-ccd-reduce-prom

ComputeCostData gets CPU and RAM requests from the Kubernetes API
Michael Dresser пре 5 година
родитељ
комит
222b1bcebe
1 измењених фајлова са 42 додато и 40 уклоњено
  1. 42 40
      pkg/costmodel/costmodel.go

+ 42 - 40
pkg/costmodel/costmodel.go

@@ -226,9 +226,7 @@ const (
 )
 )
 
 
 func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyzerCloud.Provider, window string, offset string, filterNamespace string) (map[string]*CostData, error) {
 func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyzerCloud.Provider, window string, offset string, filterNamespace string) (map[string]*CostData, error) {
-	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, window, offset, window, offset)
 	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, window, offset, window, offset)
 	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, window, offset, window, offset)
-	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, window, offset, window, offset)
 	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, window, offset)
 	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, window, offset)
 	queryGPURequests := fmt.Sprintf(queryGPURequestsStr, window, offset, window, offset, 1.0, window, offset)
 	queryGPURequests := fmt.Sprintf(queryGPURequestsStr, window, offset, window, offset, 1.0, window, offset)
 	queryPVRequests := fmt.Sprintf(queryPVRequestsStr)
 	queryPVRequests := fmt.Sprintf(queryPVRequestsStr)
@@ -242,9 +240,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 
 	// Submit all Prometheus queries asynchronously
 	// Submit all Prometheus queries asynchronously
 	ctx := prom.NewContext(cli)
 	ctx := prom.NewContext(cli)
-	resChRAMRequests := ctx.Query(queryRAMRequests)
 	resChRAMUsage := ctx.Query(queryRAMUsage)
 	resChRAMUsage := ctx.Query(queryRAMUsage)
-	resChCPURequests := ctx.Query(queryCPURequests)
 	resChCPUUsage := ctx.Query(queryCPUUsage)
 	resChCPUUsage := ctx.Query(queryCPUUsage)
 	resChGPURequests := ctx.Query(queryGPURequests)
 	resChGPURequests := ctx.Query(queryGPURequests)
 	resChPVRequests := ctx.Query(queryPVRequests)
 	resChPVRequests := ctx.Query(queryPVRequests)
@@ -277,9 +273,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 	}
 	}
 
 
 	// Process Prometheus query results. Handle errors using ctx.Errors.
 	// Process Prometheus query results. Handle errors using ctx.Errors.
-	resRAMRequests, _ := resChRAMRequests.Await()
 	resRAMUsage, _ := resChRAMUsage.Await()
 	resRAMUsage, _ := resChRAMUsage.Await()
-	resCPURequests, _ := resChCPURequests.Await()
 	resCPUUsage, _ := resChCPUUsage.Await()
 	resCPUUsage, _ := resChCPUUsage.Await()
 	resGPURequests, _ := resChGPURequests.Await()
 	resGPURequests, _ := resChGPURequests.Await()
 	resPVRequests, _ := resChPVRequests.Await()
 	resPVRequests, _ := resChPVRequests.Await()
@@ -345,14 +339,6 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 	containerNameCost := make(map[string]*CostData)
 	containerNameCost := make(map[string]*CostData)
 	containers := make(map[string]bool)
 	containers := make(map[string]bool)
 
 
-	RAMReqMap, err := GetContainerMetricVector(resRAMRequests, true, normalizationValue, clusterID)
-	if err != nil {
-		return nil, err
-	}
-	for key := range RAMReqMap {
-		containers[key] = true
-	}
-
 	RAMUsedMap, err := GetContainerMetricVector(resRAMUsage, true, normalizationValue, clusterID)
 	RAMUsedMap, err := GetContainerMetricVector(resRAMUsage, true, normalizationValue, clusterID)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
@@ -360,13 +346,6 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 	for key := range RAMUsedMap {
 	for key := range RAMUsedMap {
 		containers[key] = true
 		containers[key] = true
 	}
 	}
-	CPUReqMap, err := GetContainerMetricVector(resCPURequests, true, normalizationValue, clusterID)
-	if err != nil {
-		return nil, err
-	}
-	for key := range CPUReqMap {
-		containers[key] = true
-	}
 	GPUReqMap, err := GetContainerMetricVector(resGPURequests, true, normalizationValue, clusterID)
 	GPUReqMap, err := GetContainerMetricVector(resGPURequests, true, normalizationValue, clusterID)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err
@@ -401,6 +380,9 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 		if _, ok := containerNameCost[key]; ok {
 		if _, ok := containerNameCost[key]; ok {
 			continue // because ordering is important for the allocation model (all PV's applied to the first), just dedupe if it's already been added.
 			continue // because ordering is important for the allocation model (all PV's applied to the first), just dedupe if it's already been added.
 		}
 		}
+		// The _else_ case for this statement is the case in which the container has been
+		// deleted so we have usage information but not request information. In that case,
+		// we return partial data for CPU and RAM: only usage and not requests.
 		if pod, ok := currentContainers[key]; ok {
 		if pod, ok := currentContainers[key]; ok {
 			podName := pod.GetObjectMeta().GetName()
 			podName := pod.GetObjectMeta().GetName()
 			ns := pod.GetObjectMeta().GetNamespace()
 			ns := pod.GetObjectMeta().GetNamespace()
@@ -487,21 +469,40 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 				// recreate the key and look up data for this container
 				// recreate the key and look up data for this container
 				newKey := NewContainerMetricFromValues(ns, podName, containerName, pod.Spec.NodeName, clusterID).Key()
 				newKey := NewContainerMetricFromValues(ns, podName, containerName, pod.Spec.NodeName, clusterID).Key()
 
 
-				RAMReqV, ok := RAMReqMap[newKey]
-				if !ok {
-					klog.V(4).Info("no RAM requests for " + newKey)
-					RAMReqV = []*util.Vector{{}}
+				// k8s.io/apimachinery/pkg/api/resource/amount.go and
+				// k8s.io/apimachinery/pkg/api/resource/quantity.go for
+				// details on the "amount" API. See
+				// https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-types
+				// for the units of memory and CPU.
+				ramRequestBytes := container.Resources.Requests.Memory().Value()
+
+				// Because RAM (and CPU) information isn't coming from Prometheus, it won't
+				// have a timestamp associated with it. We need to provide a timestamp,
+				// otherwise the vector op that gets applied to take the max of usage
+				// and request won't work properly and will only take into account
+				// usage.
+				RAMReqV := []*util.Vector{
+					{
+						Value:     float64(ramRequestBytes),
+						Timestamp: float64(time.Now().UTC().Unix()),
+					},
+				}
+
+				// use millicores so we can convert to cores in a float64 format
+				cpuRequestMilliCores := container.Resources.Requests.Cpu().MilliValue()
+				CPUReqV := []*util.Vector{
+					{
+						Value:     float64(cpuRequestMilliCores) / 1000,
+						Timestamp: float64(time.Now().UTC().Unix()),
+					},
 				}
 				}
+
 				RAMUsedV, ok := RAMUsedMap[newKey]
 				RAMUsedV, ok := RAMUsedMap[newKey]
 				if !ok {
 				if !ok {
 					klog.V(4).Info("no RAM usage for " + newKey)
 					klog.V(4).Info("no RAM usage for " + newKey)
 					RAMUsedV = []*util.Vector{{}}
 					RAMUsedV = []*util.Vector{{}}
 				}
 				}
-				CPUReqV, ok := CPUReqMap[newKey]
-				if !ok {
-					klog.V(4).Info("no CPU requests for " + newKey)
-					CPUReqV = []*util.Vector{{}}
-				}
+
 				GPUReqV, ok := GPUReqMap[newKey]
 				GPUReqV, ok := GPUReqMap[newKey]
 				if !ok {
 				if !ok {
 					klog.V(4).Info("no GPU requests for " + newKey)
 					klog.V(4).Info("no GPU requests for " + newKey)
@@ -559,21 +560,22 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 			if err != nil {
 			if err != nil {
 				return nil, err
 				return nil, err
 			}
 			}
-			RAMReqV, ok := RAMReqMap[key]
-			if !ok {
-				klog.V(4).Info("no RAM requests for " + key)
-				RAMReqV = []*util.Vector{{}}
-			}
+
+			// CPU and RAM requests are obtained from the Kubernetes API.
+			// If this case has been reached, the Kubernetes API will not
+			// have information about the pod because it no longer exists.
+			//
+			// The case where this matters is minimal, mainly in environments
+			// with very short-lived pods that over-request resources.
+			RAMReqV := []*util.Vector{{}}
+			CPUReqV := []*util.Vector{{}}
+
 			RAMUsedV, ok := RAMUsedMap[key]
 			RAMUsedV, ok := RAMUsedMap[key]
 			if !ok {
 			if !ok {
 				klog.V(4).Info("no RAM usage for " + key)
 				klog.V(4).Info("no RAM usage for " + key)
 				RAMUsedV = []*util.Vector{{}}
 				RAMUsedV = []*util.Vector{{}}
 			}
 			}
-			CPUReqV, ok := CPUReqMap[key]
-			if !ok {
-				klog.V(4).Info("no CPU requests for " + key)
-				CPUReqV = []*util.Vector{{}}
-			}
+
 			GPUReqV, ok := GPUReqMap[key]
 			GPUReqV, ok := GPUReqMap[key]
 			if !ok {
 			if !ok {
 				klog.V(4).Info("no GPU requests for " + key)
 				klog.V(4).Info("no GPU requests for " + key)