Selaa lähdekoodia

Partially handle Azure Virtual Nodes (serverless) (#1858)

* First draft commit showing how to handle Azure VirtualNodes (serverless)

Signed-off-by: thomasvn <thomasnguyen96@gmail.com>

* Reduce WRN logging when unable to download Azure pricing data

Signed-off-by: thomasvn <thomasnguyen96@gmail.com>

* Update logs to print nodename. This will be helpful when diagnosing Azure Virtual Node

Signed-off-by: thomasvn <thomasnguyen96@gmail.com>

---------

Signed-off-by: thomasvn <thomasnguyen96@gmail.com>
Co-authored-by: Ajay Tripathy <ajay@kubecost.com>
Thomas Nguyen 3 vuotta sitten
vanhempi
sitoutus
16d731f6a8
2 muutettua tiedostoa jossa 41 lisäystä ja 19 poistoa
  1. 31 9
      pkg/cloud/azureprovider.go
  2. 10 10
      pkg/costmodel/allocation_helpers.go

+ 31 - 9
pkg/cloud/azureprovider.go

@@ -1078,12 +1078,19 @@ func (az *Azure) AllNodePricing() (interface{}, error) {
 func (az *Azure) NodePricing(key Key) (*Node, error) {
 	az.DownloadPricingDataLock.RLock()
 	defer az.DownloadPricingDataLock.RUnlock()
+	pricingDataExists := true
+	if az.Pricing == nil {
+		pricingDataExists = false
+		log.DedupedWarningf(1, "Unable to download Azure pricing data")
+	}
 
 	azKey, ok := key.(*azureKey)
 	if !ok {
 		return nil, fmt.Errorf("azure: NodePricing: key is of type %T", key)
 	}
 	config, _ := az.GetConfig()
+
+	// Spot Node
 	if slv, ok := azKey.Labels[config.SpotLabel]; ok && slv == config.SpotLabelValue && config.SpotLabel != "" && config.SpotLabelValue != "" {
 		features := strings.Split(azKey.Features(), ",")
 		region := features[0]
@@ -1097,7 +1104,6 @@ func (az *Azure) NodePricing(key Key) (*Node, error) {
 			return n.Node, nil
 		}
 		log.Infof("[Info] found spot instance, trying to get retail price for %s: %s, ", spotFeatures, azKey)
-
 		spotCost, err := getRetailPrice(region, instance, config.CurrencyCode, true)
 		if err != nil {
 			log.DedupedWarningf(5, "failed to retrieve spot retail pricing")
@@ -1111,27 +1117,31 @@ func (az *Azure) NodePricing(key Key) (*Node, error) {
 				UsageType: "spot",
 				GPU:       gpu,
 			}
-
 			az.addPricing(spotFeatures, &AzurePricing{
 				Node: spotNode,
 			})
-
 			return spotNode, nil
 		}
 	}
 
-	if n, ok := az.Pricing[azKey.Features()]; ok {
-		log.Debugf("Returning pricing for node %s: %+v from key %s", azKey, n, azKey.Features())
-		if azKey.isValidGPUNode() {
-			n.Node.GPU = azKey.GetGPUCount()
+	// Use the downloaded pricing data if possible. Otherwise, use default
+	// configured pricing data.
+	if pricingDataExists {
+		if n, ok := az.Pricing[azKey.Features()]; ok {
+			log.Debugf("Returning pricing for node %s: %+v from key %s", azKey, n, azKey.Features())
+			if azKey.isValidGPUNode() {
+				n.Node.GPU = azKey.GetGPUCount()
+			}
+			return n.Node, nil
 		}
-		return n.Node, nil
+		log.DedupedWarningf(5, "No pricing data found for node %s from key %s", azKey, azKey.Features())
 	}
-	log.Warnf("no pricing data found for %s: %s", azKey.Features(), azKey)
 	c, err := az.GetConfig()
 	if err != nil {
 		return nil, fmt.Errorf("No default pricing data available")
 	}
+
+	// GPU Node
 	if azKey.isValidGPUNode() {
 		return &Node{
 			VCPUCost:         c.CPU,
@@ -1141,6 +1151,18 @@ func (az *Azure) NodePricing(key Key) (*Node, error) {
 			GPU:              azKey.GetGPUCount(),
 		}, nil
 	}
+
+	// Serverless Node. This is an Azure Container Instance, and no pods can be
+	// scheduled to this node. Azure does not charge for this node. Set costs to
+	// zero.
+	if azKey.Labels["kubernetes.io/hostname"] == "virtual-node-aci-linux" {
+		return &Node{
+			VCPUCost: "0",
+			RAMCost:  "0",
+		}, nil
+	}
+
+	// Regular Node
 	return &Node{
 		VCPUCost:         c.CPU,
 		RAMCost:          c.RAM,

+ 10 - 10
pkg/costmodel/allocation_helpers.go

@@ -1411,19 +1411,19 @@ func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*nodePricing, resNodeCostPerCPUHr
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
 		instanceType, err := res.GetString("instance_type")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
 		providerID, err := res.GetString("provider_id")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
@@ -1449,19 +1449,19 @@ func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*nodePricing, resNodeCostPerRA
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
 		instanceType, err := res.GetString("instance_type")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
 		providerID, err := res.GetString("provider_id")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
@@ -1487,19 +1487,19 @@ func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*nodePricing, resNodeCostPerGPUHr
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
 		instanceType, err := res.GetString("instance_type")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
 		providerID, err := res.GetString("provider_id")
 		if err != nil {
-			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: \"%s\" for node \"%s\"", err, node)
 			continue
 		}
 
@@ -1531,7 +1531,7 @@ func applyNodeSpot(nodeMap map[nodeKey]*nodePricing, resNodeIsSpot []*prom.Query
 
 		key := newNodeKey(cluster, node)
 		if _, ok := nodeMap[key]; !ok {
-			log.Warnf("CostModel.ComputeAllocation: Node spot  query result for missing node: %s", key)
+			log.Warnf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
 			continue
 		}