Przeglądaj źródła

CostModel.ComputeAllocation: fix issues with determining node pricing, including custom pricing and default values

Niko Kovacevic 5 lat temu
rodzic
commit
c0c4192c4c
2 zmienionych plików z 151 dodań i 25 usunięć
  1. 1 1
      pkg/cloud/provider.go
  2. 150 24
      pkg/costmodel/allocation.go

+ 1 - 1
pkg/cloud/provider.go

@@ -20,7 +20,6 @@ import (
 const authSecretPath = "/var/secrets/service-key.json"
 const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
 
-
 var createTableStatements = []string{
 	`CREATE TABLE IF NOT EXISTS names (
 		cluster_id VARCHAR(255) NOT NULL,
@@ -266,6 +265,7 @@ func CustomPricesEnabled(p Provider) bool {
 	if err != nil {
 		return false
 	}
+	// [TODO:CLEANUP] what is going on with this?
 	if config.NegotiatedDiscount == "" {
 		config.NegotiatedDiscount = "0%"
 	}

+ 150 - 24
pkg/costmodel/allocation.go

@@ -2,8 +2,11 @@ package costmodel
 
 import (
 	"fmt"
+	"math"
+	"strconv"
 	"time"
 
+	"github.com/kubecost/cost-model/pkg/cloud"
 	"github.com/kubecost/cost-model/pkg/env"
 	"github.com/kubecost/cost-model/pkg/kubecost"
 	"github.com/kubecost/cost-model/pkg/log"
@@ -102,8 +105,6 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 	ctx := prom.NewContext(cm.PrometheusClient)
 	startQuerying := time.Now()
 
-	// TODO niko/computeallocation split into required and optional queries?
-
 	queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr)
 	resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
 
@@ -282,7 +283,7 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 
 	// Build out a map of Nodes with resource costs, discounts, and node types
 	// for converting resource allocation data to cumulative costs.
-	nodeMap := map[nodeKey]*Node{}
+	nodeMap := map[nodeKey]*NodePricing{}
 
 	applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
 	applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
@@ -321,23 +322,18 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 	for _, pod := range podMap {
 		for _, alloc := range pod.Allocations {
 			cluster, _ := alloc.Properties.GetCluster()
-			node, _ := alloc.Properties.GetNode()
+			nodeName, _ := alloc.Properties.GetNode()
 			namespace, _ := alloc.Properties.GetNamespace()
 			pod, _ := alloc.Properties.GetPod()
 			container, _ := alloc.Properties.GetContainer()
 
 			podKey := newPodKey(cluster, namespace, pod)
-			nodeKey := newNodeKey(cluster, node)
+			nodeKey := newNodeKey(cluster, nodeName)
 
-			if n, ok := nodeMap[nodeKey]; !ok {
-				if pod != kubecost.UnmountedSuffix {
-					log.Warningf("CostModel.ComputeAllocation: failed to find node %s for %s", nodeKey, alloc.Name)
-				}
-			} else {
-				alloc.CPUCost = alloc.CPUCoreHours * n.CostPerCPUHr
-				alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * n.CostPerRAMGiBHr
-				alloc.GPUCost = alloc.GPUHours * n.CostPerGPUHr
-			}
+			node := cm.getNodePricing(nodeMap, nodeKey)
+			alloc.CPUCost = alloc.CPUCoreHours * node.CostPerCPUHr
+			alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * node.CostPerRAMGiBHr
+			alloc.GPUCost = alloc.GPUHours * node.CostPerGPUHr
 
 			if pvcs, ok := podPVCMap[podKey]; ok {
 				for _, pvc := range pvcs {
@@ -380,7 +376,7 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 
 			// Make sure that the name is correct (node may not be present at this
 			// point due to it missing from queryMinutes) then insert.
-			alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container)
+			alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, nodeName, namespace, pod, container)
 			allocSet.Set(alloc)
 		}
 	}
@@ -428,7 +424,6 @@ func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSiz
 			durStr, offStr, err := batchWindow.DurationOffsetForPrometheus()
 			if err != nil || durStr == "" {
 				// Negative duration, so set empty results and don't query
-				// TODO niko/computeallocation test this!!!
 				resPods = []*prom.QueryResult{}
 				err = nil
 				break
@@ -1103,6 +1098,13 @@ func resToPodJobMap(resJobLabels []*prom.QueryResult) map[podKey]controllerKey {
 			continue
 		}
 
+		// Convert the name of Jobs generated by CronJobs to the name of the
+		// CronJob by stripping the timestamp off the end.
+		match := isCron.FindStringSubmatch(controllerKey.Controller)
+		if match != nil {
+			controllerKey.Controller = match[1]
+		}
+
 		pod, err := res.GetString("pod")
 		if err != nil {
 			log.Warningf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
@@ -1168,7 +1170,7 @@ func applyControllersToPods(podMap map[podKey]*Pod, podControllerMap map[podKey]
 	}
 }
 
-func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*prom.QueryResult) {
+func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerCPUHr []*prom.QueryResult) {
 	for _, res := range resNodeCostPerCPUHr {
 		cluster, err := res.GetString("cluster_id")
 		if err != nil {
@@ -1189,7 +1191,7 @@ func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*pro
 
 		key := newNodeKey(cluster, node)
 		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
+			nodeMap[key] = &NodePricing{
 				Name:     node,
 				NodeType: instanceType,
 			}
@@ -1199,7 +1201,7 @@ func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*pro
 	}
 }
 
-func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
+func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
 	for _, res := range resNodeCostPerRAMGiBHr {
 		cluster, err := res.GetString("cluster_id")
 		if err != nil {
@@ -1220,7 +1222,7 @@ func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr
 
 		key := newNodeKey(cluster, node)
 		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
+			nodeMap[key] = &NodePricing{
 				Name:     node,
 				NodeType: instanceType,
 			}
@@ -1230,7 +1232,7 @@ func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr
 	}
 }
 
-func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*prom.QueryResult) {
+func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerGPUHr []*prom.QueryResult) {
 	for _, res := range resNodeCostPerGPUHr {
 		cluster, err := res.GetString("cluster_id")
 		if err != nil {
@@ -1251,7 +1253,7 @@ func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*pro
 
 		key := newNodeKey(cluster, node)
 		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
+			nodeMap[key] = &NodePricing{
 				Name:     node,
 				NodeType: instanceType,
 			}
@@ -1261,7 +1263,7 @@ func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*pro
 	}
 }
 
-func applyNodeSpot(nodeMap map[nodeKey]*Node, resNodeIsSpot []*prom.QueryResult) {
+func applyNodeSpot(nodeMap map[nodeKey]*NodePricing, resNodeIsSpot []*prom.QueryResult) {
 	for _, res := range resNodeIsSpot {
 		cluster, err := res.GetString("cluster_id")
 		if err != nil {
@@ -1284,7 +1286,7 @@ func applyNodeSpot(nodeMap map[nodeKey]*Node, resNodeIsSpot []*prom.QueryResult)
 	}
 }
 
-func applyNodeDiscount(nodeMap map[nodeKey]*Node, cm *CostModel) {
+func applyNodeDiscount(nodeMap map[nodeKey]*NodePricing, cm *CostModel) {
 	if cm == nil {
 		return
 	}
@@ -1579,6 +1581,130 @@ func applyUnmountedPVCs(window kubecost.Window, podMap map[podKey]*Pod, pvcMap m
 	}
 }
 
+// getNodePricing determines node pricing, given a key and a mapping from keys
+// to their NodePricing instances, as well as the custom pricing configuration
+// inherent to the CostModel instance. If custom pricing is set, use that. If
+// not, use the pricing defined by the given key. If that doesn't exist, fall
+// back on custom pricing as a default.
+func (cm *CostModel) getNodePricing(nodeMap map[nodeKey]*NodePricing, nodeKey nodeKey) *NodePricing {
+	// Find the relevant NodePricing, if it exists. If not, substitute the
+	// custom NodePricing as a default.
+	node, ok := nodeMap[nodeKey]
+	if !ok || node == nil {
+		if nodeKey.Node != "" {
+			log.Warningf("CostModel: failed to find node for %s", nodeKey)
+		}
+		return cm.getCustomNodePricing(false)
+	}
+
+	// If custom pricing is enabled and can be retrieved, override detected
+	// node pricing with the custom values.
+	customPricingConfig, err := cm.Provider.GetConfig()
+	if err != nil {
+		log.Warningf("CostModel: failed to load custom pricing: %s", err)
+	}
+	if cloud.CustomPricesEnabled(cm.Provider) && customPricingConfig != nil {
+		return cm.getCustomNodePricing(node.Preemptible)
+	}
+
+	// If any of the values are NaN or zero, replace them with the custom
+	// values as default.
+	// [TODO:CLEANUP] can't we parse these custom prices once? why do we store
+	// them as strings like this?
+
+	if node.CostPerCPUHr == 0 || math.IsNaN(node.CostPerCPUHr) {
+		log.Warningf("CostModel: node pricing has illegal CostPerCPUHr; replacing with custom pricing: %s", nodeKey)
+		cpuCostStr := customPricingConfig.CPU
+		if node.Preemptible {
+			cpuCostStr = customPricingConfig.SpotCPU
+		}
+		costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
+		if err != nil {
+			log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
+		}
+		node.CostPerCPUHr = costPerCPUHr
+	}
+
+	if node.CostPerGPUHr == 0 || math.IsNaN(node.CostPerGPUHr) {
+		log.Warningf("CostModel: node pricing has illegal CostPerGPUHr; replacing with custom pricing: %s", nodeKey)
+		gpuCostStr := customPricingConfig.GPU
+		if node.Preemptible {
+			gpuCostStr = customPricingConfig.SpotGPU
+		}
+		costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
+		if err != nil {
+			log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
+		}
+		node.CostPerGPUHr = costPerGPUHr
+	}
+
+	if node.CostPerRAMGiBHr == 0 || math.IsNaN(node.CostPerRAMGiBHr) {
+		log.Warningf("CostModel: node pricing has illegal CostPerRAMHr; replacing with custom pricing: %s", nodeKey)
+		ramCostStr := customPricingConfig.RAM
+		if node.Preemptible {
+			ramCostStr = customPricingConfig.SpotRAM
+		}
+		costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
+		if err != nil {
+			log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
+		}
+		node.CostPerRAMGiBHr = costPerRAMHr
+	}
+
+	return node
+}
+
+// getCustomNodePricing converts the CostModel's configured custom pricing
+// values into a NodePricing instance.
+func (cm *CostModel) getCustomNodePricing(spot bool) *NodePricing {
+	customPricingConfig, err := cm.Provider.GetConfig()
+	if err != nil {
+		return nil
+	}
+
+	cpuCostStr := customPricingConfig.CPU
+	gpuCostStr := customPricingConfig.GPU
+	ramCostStr := customPricingConfig.RAM
+	if spot {
+		cpuCostStr = customPricingConfig.SpotCPU
+		gpuCostStr = customPricingConfig.SpotGPU
+		ramCostStr = customPricingConfig.SpotRAM
+	}
+
+	node := &NodePricing{}
+
+	costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
+	if err != nil {
+		log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
+	}
+	node.CostPerCPUHr = costPerCPUHr
+
+	costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
+	if err != nil {
+		log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
+	}
+	node.CostPerGPUHr = costPerGPUHr
+
+	costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
+	if err != nil {
+		log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
+	}
+	node.CostPerRAMGiBHr = costPerRAMHr
+
+	return node
+}
+
+type NodePricing struct {
+	Name            string
+	NodeType        string
+	Preemptible     bool
+	CostPerCPUHr    float64
+	CostPerRAMGiBHr float64
+	CostPerGPUHr    float64
+	Discount        float64
+	Source          string
+}
+
 // TODO niko/computealloction comment
 type Pod struct {
 	Window      kubecost.Window