1 год назад · ba2cad5065
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -9,6 +9,7 @@ Official list of [OpenCost Maintainers](https://github.com/orgs/opencost/teams/o
 
				 | Ajay Tripathy | @AjayTripathy | Kubecost | <Ajay@kubecost.com> |
			
 
				 | Alex Meijer | @ameijer | Kubecost | <ameijer@kubecost.com> |
			
 
				 | Artur Khantimirov | @r2k1 | Microsoft | <akhantimirov@microsoft.com> |
			
 
				+| Cliff Colvin | @cliffcolvin | Kubecost | <ccolvin@kubecost.com> |
			
 
				 | Matt Bolt | @mbolt35 | Kubecost | <matt@kubecost.com> |
			
 
				 | Niko Kovacevic | @nikovacevic | Kubecost | <niko@kubecost.com> |
			
 
				 | Sean Holcomb | @Sean-Holcomb | Kubecost | <Sean@kubecost.com> |
			
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 
				 [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
			
 
				 [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/6219/badge)](https://www.bestpractices.dev/projects/6219)
			
 
				+[![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20OpenCost%20Guru-006BFF)](https://gurubase.io/g/opencost)
			
 
				 
			
 
				 ![](./opencost-header.png)
			
 
				 
			
--- a/core/pkg/opencost/allocation.go
+++ b/core/pkg/opencost/allocation.go
@@ -279,7 +279,7 @@ func (r *RawAllocationOnlyData) SanitizeNaN() {
 
				 		log.DedupedWarningf(5, "RawAllocationOnlyData: Unexpected NaN found for RAMBytesUsageMax")
			
 
				 		r.RAMBytesUsageMax = 0
			
 
				 	}
			
 
				-	if r.GPUUsageMax == nil || math.IsNaN(*r.GPUUsageMax) {
			
 
				+	if r.GPUUsageMax != nil && math.IsNaN(*r.GPUUsageMax) {
			
 
				 		log.DedupedWarningf(5, "RawAllocationOnlyData: Unexpected NaN found for GPUUsageMax")
			
 
				 		r.GPUUsageMax = nil
			
 
				 	}
			
@@ -1414,21 +1414,6 @@ func (a *Allocation) add(that *Allocation) {
 
				 	// Sum LoadBalancer Allocations
			
 
				 	a.LoadBalancers = a.LoadBalancers.Add(that.LoadBalancers)
			
 
				 
			
 
				-	// Sum GPU Allocations
			
 
				-	if that.GPUAllocation != nil {
			
 
				-		if a.GPUAllocation == nil {
			
 
				-			a.GPUAllocation = that.GPUAllocation.Clone()
			
 
				-		} else {
			
 
				-			if a.GPUAllocation.GPUUsageAverage != nil && that.GPUAllocation.GPUUsageAverage != nil {
			
 
				-				*a.GPUAllocation.GPUUsageAverage += *that.GPUAllocation.GPUUsageAverage
			
 
				-			}
			
 
				-
			
 
				-			if a.GPUAllocation.GPURequestAverage != nil && that.GPUAllocation.GPURequestAverage != nil {
			
 
				-				*a.GPUAllocation.GPURequestAverage += *that.GPUAllocation.GPURequestAverage
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	// Any data that is in a "raw allocation only" is not valid in any
			
 
				 	// sort of cumulative Allocation (like one that is added).
			
 
				 	a.RawAllocationOnly = nil
			
--- a/core/pkg/opencost/allocation_test.go
+++ b/core/pkg/opencost/allocation_test.go
@@ -3784,12 +3784,38 @@ func TestRawAllocationOnlyData_SanitizeNaN(t *testing.T) {
 
				 	raw.SanitizeNaN()
			
 
				 	v := reflect.ValueOf(*raw)
			
 
				 	checkAllFloat64sForNaN(t, v, "TestRawAllocationOnlyData_SanitizeNaN")
			
 
				+
			
 
				+	nan := math.NaN()
			
 
				+	nilRawAllocation := &RawAllocationOnlyData{
			
 
				+		CPUCoreUsageMax:  nan,
			
 
				+		RAMBytesUsageMax: nan,
			
 
				+		GPUUsageMax:      &nan,
			
 
				+	}
			
 
				+
			
 
				+	nilRawAllocation.SanitizeNaN()
			
 
				+
			
 
				+	// SanitizeNaN allocates nil if NaN is passed
			
 
				+	if nilRawAllocation.GPUUsageMax != nil {
			
 
				+		t.Fatalf("want: nil, got: %v", nilRawAllocation.GPUUsageMax)
			
 
				+	}
			
 
				+
			
 
				+	// SanitizeNaN allocates 0.0 if NaN is passed
			
 
				+	if nilRawAllocation.CPUCoreUsageMax != 0.0 {
			
 
				+		t.Fatalf("want: 0.0, got: %v", nilRawAllocation.CPUCoreUsageMax)
			
 
				+	}
			
 
				+
			
 
				+	// SanitizeNaN allocates 0.0 if NaN is passed
			
 
				+	if nilRawAllocation.RAMBytesUsageMax != 0.0 {
			
 
				+		t.Fatalf("want: 0.0, got: %v", nilRawAllocation.RAMBytesUsageMax)
			
 
				+	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				 func getMockRawAllocationOnlyData(f float64) *RawAllocationOnlyData {
			
 
				 	return &RawAllocationOnlyData{
			
 
				 		CPUCoreUsageMax:  f,
			
 
				 		RAMBytesUsageMax: f,
			
 
				+		GPUUsageMax:      &f,
			
 
				 	}
			
 
				 }
			
 
				 
			
--- a/go.mod
+++ b/go.mod
@@ -115,7 +115,7 @@ require (
 
				 	github.com/goccy/go-json v0.10.2 // indirect
			
 
				 	github.com/gofrs/uuid v4.2.0+incompatible // indirect
			
 
				 	github.com/gogo/protobuf v1.3.2 // indirect
			
 
				-	github.com/golang-jwt/jwt/v4 v4.5.0 // indirect
			
 
				+	github.com/golang-jwt/jwt/v4 v4.5.1 // indirect
			
 
				 	github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
			
 
				 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
			
 
				 	github.com/golang/protobuf v1.5.4 // indirect
			
--- a/go.sum
+++ b/go.sum
@@ -230,8 +230,8 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69
 
				 github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A=
			
 
				 github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
			
 
				 github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
			
 
				-github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
			
 
				-github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
			
 
				+github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo=
			
 
				+github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
			
 
				 github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
			
 
				 github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
			
 
				 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
			
--- a/pkg/cloud/aws/provider.go
+++ b/pkg/cloud/aws/provider.go
@@ -1463,13 +1463,12 @@ func (awsProvider *AWS) ClusterInfo() (map[string]string, error) {
 
				 			clusterName = awsClusterID
			
 
				 			log.Warnf("Warning - %s will be deprecated in a future release. Use %s instead", ocenv.AWSClusterIDEnvVar, ocenv.ClusterIDEnvVar)
			
 
				 		} else if clusterName = ocenv.GetClusterID(); clusterName != "" {
			
 
				-			log.Infof("Setting cluster name to %s from %s ", clusterName, ocenv.ClusterIDEnvVar)
			
 
				+			log.DedupedInfof(5, "Setting cluster name to %s from %s ", clusterName, ocenv.ClusterIDEnvVar)
			
 
				 		} else {
			
 
				 			clusterName = defaultClusterName
			
 
				-			log.Warnf("Unable to detect cluster name - using default of %s", defaultClusterName)
			
 
				-			log.Warnf("Please set cluster name through configmap or via %s env var", ocenv.ClusterIDEnvVar)
			
 
				+			log.DedupedWarningf(5, "Unable to detect cluster name - using default of %s", defaultClusterName)
			
 
				+			log.DedupedWarningf(5, "Please set cluster name through configmap or via %s env var", ocenv.ClusterIDEnvVar)
			
 
				 		}
			
 
				-
			
 
				 	}
			
 
				 
			
 
				 	// this value requires configuration but is unavailable else where
			
--- a/pkg/cloud/azure/provider.go
+++ b/pkg/cloud/azure/provider.go
@@ -288,10 +288,12 @@ func getRetailPrice(region string, skuName string, currencyCode string, spot boo
 
				 	}
			
 
				 
			
 
				 	retailPrice := ""
			
 
				+	spotPrice := ""
			
 
				 	for _, item := range pricingPayload.Items {
			
 
				 		if item.Type == "Consumption" && !strings.Contains(item.ProductName, "Windows") {
			
 
				-			// if spot is true SkuName should contain "spot, if it is false it should not
			
 
				-			if spot == strings.Contains(strings.ToLower(item.SkuName), " spot") {
			
 
				+			if !strings.Contains(strings.ToLower(item.SkuName), " spot") {
			
 
				+				spotPrice = fmt.Sprintf("%f", item.RetailPrice)
			
 
				+			} else {
			
 
				 				retailPrice = fmt.Sprintf("%f", item.RetailPrice)
			
 
				 			}
			
 
				 		}
			
@@ -299,6 +301,10 @@ func getRetailPrice(region string, skuName string, currencyCode string, spot boo
 
				 
			
 
				 	log.DedupedInfof(5, "done parsing retail price payload from \"%s\"\n", pricingURL)
			
 
				 
			
 
				+	if spot && spotPrice != "" {
			
 
				+		return spotPrice, nil
			
 
				+	}
			
 
				+
			
 
				 	if retailPrice == "" {
			
 
				 		return retailPrice, fmt.Errorf("Couldn't find price for product \"%s\" in \"%s\" region", skuName, region)
			
 
				 	}
			
@@ -849,7 +855,7 @@ func (az *Azure) DownloadPricingData() error {
 
				 	// rate-card client is old, it can hang indefinitely in some cases
			
 
				 	// this happens on the main thread, so it may block the whole app
			
 
				 	// there is can be a better way to set timeout for the client
			
 
				-	ctx, cancel := context.WithTimeout(context.TODO(), 60*time.Second)
			
 
				+	ctx, cancel := context.WithTimeout(context.TODO(), 300*time.Second)
			
 
				 	defer cancel()
			
 
				 	result, err := rcClient.Get(ctx, rateCardFilter)
			
 
				 	if err != nil {
			
@@ -1092,68 +1098,71 @@ func (az *Azure) AllNodePricing() (interface{}, error) {
 
				 func (az *Azure) NodePricing(key models.Key) (*models.Node, models.PricingMetadata, error) {
			
 
				 	az.DownloadPricingDataLock.RLock()
			
 
				 	defer az.DownloadPricingDataLock.RUnlock()
			
 
				-	pricingDataExists := true
			
 
				-	if az.Pricing == nil {
			
 
				-		pricingDataExists = false
			
 
				-		log.DedupedWarningf(1, "Unable to download Azure pricing data")
			
 
				-	}
			
 
				 
			
 
				 	meta := models.PricingMetadata{}
			
 
				 
			
 
				+	if az.Pricing == nil {
			
 
				+		return nil, meta, fmt.Errorf("Unable to download Azure pricing data")
			
 
				+	}
			
 
				+
			
 
				 	azKey, ok := key.(*azureKey)
			
 
				 	if !ok {
			
 
				 		return nil, meta, fmt.Errorf("azure: NodePricing: key is of type %T", key)
			
 
				 	}
			
 
				 	config, _ := az.GetConfig()
			
 
				 
			
 
				-	// Spot Node
			
 
				 	slv, ok := azKey.Labels[config.SpotLabel]
			
 
				 	isSpot := ok && slv == config.SpotLabelValue && config.SpotLabel != "" && config.SpotLabelValue != ""
			
 
				+
			
 
				+	features := strings.Split(azKey.Features(), ",")
			
 
				+	region := features[0]
			
 
				+	instance := features[1]
			
 
				+	var featureString string
			
 
				 	if isSpot {
			
 
				-		features := strings.Split(azKey.Features(), ",")
			
 
				-		region := features[0]
			
 
				-		instance := features[1]
			
 
				-		spotFeatures := fmt.Sprintf("%s,%s,%s", region, instance, "spot")
			
 
				-		if n, ok := az.Pricing[spotFeatures]; ok {
			
 
				-			log.DedupedInfof(5, "Returning pricing for node %s: %+v from key %s", azKey, n, spotFeatures)
			
 
				-			if azKey.isValidGPUNode() {
			
 
				-				n.Node.GPU = "1" // TODO: support multiple GPUs
			
 
				-			}
			
 
				-			return n.Node, meta, nil
			
 
				+		featureString = fmt.Sprintf("%s,%s,spot", region, instance)
			
 
				+	} else {
			
 
				+		featureString = azKey.Features()
			
 
				+	}
			
 
				+
			
 
				+	if n, ok := az.Pricing[featureString]; ok {
			
 
				+		log.Debugf("Returning pricing for node %s: %+v from key %s", azKey, n, azKey.Features())
			
 
				+		if azKey.isValidGPUNode() {
			
 
				+			n.Node.GPU = azKey.GetGPUCount()
			
 
				 		}
			
 
				-		log.Infof("[Info] found spot instance, trying to get retail price for %s: %s, ", spotFeatures, azKey)
			
 
				-		spotCost, err := getRetailPrice(region, instance, config.CurrencyCode, true)
			
 
				-		if err != nil {
			
 
				-			log.DedupedWarningf(5, "failed to retrieve spot retail pricing")
			
 
				-		} else {
			
 
				-			gpu := ""
			
 
				-			if azKey.isValidGPUNode() {
			
 
				-				gpu = "1"
			
 
				-			}
			
 
				-			spotNode := &models.Node{
			
 
				-				Cost:      spotCost,
			
 
				+		return n.Node, meta, nil
			
 
				+	}
			
 
				+
			
 
				+	cost, err := getRetailPrice(region, instance, config.CurrencyCode, isSpot)
			
 
				+
			
 
				+	if err != nil {
			
 
				+		log.DedupedWarningf(5, "failed to retrieve retail pricing: %s", err)
			
 
				+	} else {
			
 
				+		gpu := ""
			
 
				+		if azKey.isValidGPUNode() {
			
 
				+			gpu = azKey.GetGPUCount()
			
 
				+		}
			
 
				+		var node *models.Node
			
 
				+		if isSpot {
			
 
				+			node = &models.Node{
			
 
				+				Cost:      cost,
			
 
				 				UsageType: "spot",
			
 
				 				GPU:       gpu,
			
 
				 			}
			
 
				-			az.addPricing(spotFeatures, &AzurePricing{
			
 
				-				Node: spotNode,
			
 
				-			})
			
 
				-			return spotNode, meta, nil
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	// Use the downloaded pricing data if possible. Otherwise, use default
			
 
				-	// configured pricing data.
			
 
				-	if pricingDataExists {
			
 
				-		if n, ok := az.Pricing[azKey.Features()]; ok {
			
 
				-			log.Debugf("Returning pricing for node %s: %+v from key %s", azKey, n, azKey.Features())
			
 
				-			if azKey.isValidGPUNode() {
			
 
				-				n.Node.GPU = azKey.GetGPUCount()
			
 
				+		} else {
			
 
				+			node = &models.Node{
			
 
				+				Cost: cost,
			
 
				+				GPU:  gpu,
			
 
				 			}
			
 
				-			return n.Node, meta, nil
			
 
				 		}
			
 
				-		log.DedupedWarningf(5, "No pricing data found for node %s from key %s", azKey, azKey.Features())
			
 
				+
			
 
				+		az.addPricing(featureString, &AzurePricing{
			
 
				+			Node: node,
			
 
				+		})
			
 
				+		return node, meta, nil
			
 
				 	}
			
 
				+
			
 
				+	log.DedupedWarningf(5, "No pricing data found for node %s from key %s", azKey, azKey.Features())
			
 
				+
			
 
				 	c, err := az.GetConfig()
			
 
				 	if err != nil {
			
 
				 		return nil, meta, fmt.Errorf("No default pricing data available")
			
--- a/pkg/costmodel/allocation.go
+++ b/pkg/costmodel/allocation.go
@@ -280,9 +280,12 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 
				 				resultAlloc.RawAllocationOnly.RAMBytesUsageMax = alloc.RawAllocationOnly.RAMBytesUsageMax
			
 
				 			}
			
 
				 
			
 
				-			if alloc.RawAllocationOnly.CPUCoreUsageMax > resultAlloc.RawAllocationOnly.CPUCoreUsageMax {
			
 
				-				resultAlloc.RawAllocationOnly.GPUUsageMax = alloc.RawAllocationOnly.GPUUsageMax
			
 
				+			if alloc.RawAllocationOnly.GPUUsageMax != nil {
			
 
				+				if *alloc.RawAllocationOnly.GPUUsageMax > *resultAlloc.RawAllocationOnly.GPUUsageMax {
			
 
				+					resultAlloc.RawAllocationOnly.GPUUsageMax = alloc.RawAllocationOnly.GPUUsageMax
			
 
				+				}
			
 
				 			}
			
 
				+
			
 
				 		}
			
 
				 	}
			
 
				 
			
--- a/pkg/costmodel/allocation_helpers.go
+++ b/pkg/costmodel/allocation_helpers.go
@@ -667,7 +667,13 @@ func applyGPUUsage(podMap map[podKey]*pod, resGPUUsageAvgOrMax []*prom.QueryResu
 
				 					thisPod.Allocations[container].GPUAllocation.GPUUsageAverage = &res.Values[0].Value
			
 
				 				}
			
 
				 			case GpuUsageMaxMode:
			
 
				-				thisPod.Allocations[container].RawAllocationOnly.GPUUsageMax = &res.Values[0].Value
			
 
				+				if thisPod.Allocations[container].RawAllocationOnly == nil {
			
 
				+					thisPod.Allocations[container].RawAllocationOnly = &opencost.RawAllocationOnlyData{
			
 
				+						GPUUsageMax: &res.Values[0].Value,
			
 
				+					}
			
 
				+				} else {
			
 
				+					thisPod.Allocations[container].RawAllocationOnly.GPUUsageMax = &res.Values[0].Value
			
 
				+				}
			
 
				 			case GpuIsSharedMode:
			
 
				 				// if a container is using a GPU and it is shared, isGPUShared will be true
			
 
				 				// if a container is using GPU and it is NOT shared, isGPUShared will be false
			
@@ -768,13 +774,6 @@ func applyGPUsAllocated(podMap map[podKey]*pod, resGPUsRequested []*prom.QueryRe
 
				 				}
			
 
				 			} else {
			
 
				 				thisPod.Allocations[container].GPUAllocation.GPURequestAverage = &res.Values[0].Value
			
 
				-				if thisPod.Allocations[container].GPUAllocation == nil {
			
 
				-					thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{
			
 
				-						GPURequestAverage: &res.Values[0].Value,
			
 
				-					}
			
 
				-				} else {
			
 
				-					thisPod.Allocations[container].GPUAllocation.GPURequestAverage = &res.Values[0].Value
			
 
				-				}
			
 
				 			}
			
 
				 		}
			
 
				 	}