瀏覽代碼

fix(carbon): correct lookup fallbacks, provider detection, and network support (#3736)

Signed-off-by: Warwick Peatey <warwick@automatic.systems>
Warwick 4 周之前
父節點
當前提交
889073d037
共有 2 個文件被更改,包括 394 次插入91 次删除
  1. 140 91
      pkg/carbon/carbonassets.go
  2. 254 0
      pkg/carbon/carbonassets_test.go

+ 140 - 91
pkg/carbon/carbonassets.go

@@ -3,7 +3,6 @@ package carbon
 import (
 	"embed"
 	"encoding/csv"
-	"fmt"
 	"strconv"
 	"strings"
 
@@ -15,7 +14,11 @@ import (
 //go:embed carbonlookupdata.csv
 var f embed.FS
 
-type carbonLookupKeyDisk struct {
+// averageRegionKey is the fallback region label used in the lookup CSV when a
+// specific (provider, region, instanceType) tuple cannot be matched.
+const averageRegionKey = "average-region"
+
+type carbonLookupKeyRegion struct {
 	provider string
 	region   string
 }
@@ -26,15 +29,13 @@ type carbonLookupKeyNode struct {
 	instanceType string
 }
 
-var carbonLookupNode map[carbonLookupKeyNode]float64
-var carbonLookupDisk map[carbonLookupKeyDisk]float64
-
-// Opencost does not build network types
-var carbonValidInstanceTypes map[string]string
-var carbonValidRegions map[string]string
+var (
+	carbonLookupNode    map[carbonLookupKeyNode]float64
+	carbonLookupDisk    map[carbonLookupKeyRegion]float64
+	carbonLookupNetwork map[carbonLookupKeyRegion]float64
+)
 
 func init() {
-
 	carbonData, err := f.ReadFile("carbonlookupdata.csv")
 	if err != nil {
 		log.Errorf("Error getting content of carbon lookup file: %s", err)
@@ -43,129 +44,177 @@ func init() {
 
 	reader := csv.NewReader(strings.NewReader(string(carbonData)))
 
-	// skip header
-	_, err = reader.Read()
-	if err != nil {
-		log.Errorf("Error reading carbon lookup data: %s", err)
+	if _, err := reader.Read(); err != nil {
+		log.Errorf("Error reading carbon lookup header: %s", err)
 		return
 	}
 
-	dat, err := reader.ReadAll()
+	rows, err := reader.ReadAll()
 	if err != nil {
 		log.Errorf("Error reading carbon lookup data: %s", err)
 		return
 	}
 
 	carbonLookupNode = make(map[carbonLookupKeyNode]float64)
-	carbonLookupDisk = make(map[carbonLookupKeyDisk]float64)
-
-	carbonValidInstanceTypes = make(map[string]string)
-	carbonValidRegions = make(map[string]string)
-
-	for _, carbonItem := range dat {
+	carbonLookupDisk = make(map[carbonLookupKeyRegion]float64)
+	carbonLookupNetwork = make(map[carbonLookupKeyRegion]float64)
 
-		if coeff, err := strconv.ParseFloat(carbonItem[5], 64); err != nil {
-
-			panic(fmt.Errorf("error setting up carbon lookup table: malformed carbon cost '%s'", carbonItem[5]))
-
-		} else {
-
-			provider := carbonItem[0]
-			region := carbonItem[1]
-			instanceType := carbonItem[2]
-			assetType := carbonItem[3]
+	for _, row := range rows {
+		// Skip blank records (e.g. a trailing newline in the CSV).
+		if len(row) == 0 || (len(row) == 1 && strings.TrimSpace(row[0]) == "") {
+			continue
+		}
+		if len(row) < 6 {
+			log.Warnf("carbon: skipping malformed lookup row %v", row)
+			continue
+		}
 
-			switch assetType {
-			case "Node":
-				carbonLookupNode[carbonLookupKeyNode{
-					provider:     provider,
-					region:       region,
-					instanceType: instanceType,
-				}] = coeff
-			case "Disk":
-				carbonLookupDisk[carbonLookupKeyDisk{
-					provider: provider,
-					region:   region,
-				}] = coeff
-			}
+		coeff, err := strconv.ParseFloat(row[5], 64)
+		if err != nil {
+			log.Warnf("carbon: skipping row with malformed carbon coefficient %q", row[5])
+			continue
+		}
 
-			carbonValidInstanceTypes[instanceType] = provider
-			carbonValidRegions[region] = provider
+		provider := row[0]
+		region := row[1]
+		instanceType := row[2]
+		assetType := row[3]
 
+		switch assetType {
+		case "Node":
+			carbonLookupNode[carbonLookupKeyNode{
+				provider:     provider,
+				region:       region,
+				instanceType: instanceType,
+			}] = coeff
+		case "Disk":
+			carbonLookupDisk[carbonLookupKeyRegion{
+				provider: provider,
+				region:   region,
+			}] = coeff
+		case "Network":
+			carbonLookupNetwork[carbonLookupKeyRegion{
+				provider: provider,
+				region:   region,
+			}] = coeff
 		}
-
 	}
-
 }
 
 type CarbonRow struct {
 	Co2e float64 `json:"co2e"`
 }
 
+// RelateCarbonAssets returns an estimated CO2e value for each asset in the set.
+// The returned value is in metric tonnes of CO2e, consistent with the units of
+// the embedded lookup table (tonnes CO2e per hour of asset runtime).
 func RelateCarbonAssets(as *opencost.AssetSet) (map[string]CarbonRow, error) {
-
-	res := make(map[string]CarbonRow)
+	res := make(map[string]CarbonRow, len(as.Assets))
 
 	for key, asset := range as.Assets {
-
-		// If no valid region, default to per-provider calculated average
-		region, _ := util.GetRegion(asset.GetLabels())
-		if _, ok := carbonValidRegions[region]; !ok {
-			region = "average-region"
-		}
-
-		// If no valid instance type, also default to per-provider calculated average
-		instanceType, _ := util.GetInstanceType(asset.GetLabels())
-		if _, ok := carbonValidInstanceTypes[instanceType]; !ok {
-			region = "average-region"
+		coeff := lookupCarbonCoeff(asset)
+		res[key] = CarbonRow{
+			Co2e: coeff * asset.Minutes() / 60,
 		}
+	}
 
-		provider := getProviderFromProviderID(asset.GetProperties().ProviderID)
+	return res, nil
+}
 
-		// If we're not able to parse the provider id, try to fetch the provider from the carbon data
-		if provider == "" && region != "average-region" {
-			provider = carbonValidRegions[region]
-		} else {
-			if asset.Type() == opencost.NodeAssetType || asset.Type() == opencost.DiskAssetType {
-				log.DedupedErrorf(10, "Cannot infer region information for asset '%s'", asset.GetProperties().ProviderID)
+// lookupCarbonCoeff resolves the carbon coefficient (tonnes CO2e per hour) for
+// the given asset, falling back to the provider-wide average-region value when
+// a specific region or instance type is not present in the lookup table.
+func lookupCarbonCoeff(asset opencost.Asset) float64 {
+	props := asset.GetProperties()
+	provider := resolveProvider(asset)
+	if provider == "" {
+		if isCarbonTrackedAsset(asset.Type()) {
+			providerID := ""
+			if props != nil {
+				providerID = props.ProviderID
 			}
+			log.DedupedWarningf(10, "carbon: cannot infer provider for asset %q", providerID)
 		}
+		return 0
+	}
 
-		var carbonCoeff float64
-		switch asset.Type() {
-		case opencost.NodeAssetType:
-			carbonCoeff = carbonLookupNode[carbonLookupKeyNode{
-				provider:     provider,
-				region:       region,
-				instanceType: instanceType,
-			}]
-		case opencost.DiskAssetType:
-			carbonCoeff = carbonLookupDisk[carbonLookupKeyDisk{
-				provider: provider,
-				region:   region,
-			}]
-		}
+	region, _ := util.GetRegion(asset.GetLabels())
+	instanceType, _ := util.GetInstanceType(asset.GetLabels())
 
-		res[key] = CarbonRow{
-			Co2e: carbonCoeff * asset.Minutes() / 60,
+	switch asset.Type() {
+	case opencost.NodeAssetType:
+		if coeff, ok := carbonLookupNode[carbonLookupKeyNode{provider, region, instanceType}]; ok {
+			return coeff
+		}
+		if coeff, ok := carbonLookupNode[carbonLookupKeyNode{provider, averageRegionKey, ""}]; ok {
+			log.DedupedWarningf(10, "carbon: falling back to average-region for node (provider=%s region=%q instanceType=%q)", provider, region, instanceType)
+			return coeff
+		}
+	case opencost.DiskAssetType:
+		if coeff, ok := carbonLookupDisk[carbonLookupKeyRegion{provider, region}]; ok {
+			return coeff
+		}
+		if coeff, ok := carbonLookupDisk[carbonLookupKeyRegion{provider, averageRegionKey}]; ok {
+			log.DedupedWarningf(10, "carbon: falling back to average-region for disk (provider=%s region=%q)", provider, region)
+			return coeff
 		}
+	case opencost.NetworkAssetType:
+		if coeff, ok := carbonLookupNetwork[carbonLookupKeyRegion{provider, region}]; ok {
+			return coeff
+		}
+		if coeff, ok := carbonLookupNetwork[carbonLookupKeyRegion{provider, averageRegionKey}]; ok {
+			return coeff
+		}
+	}
+	return 0
+}
 
+func isCarbonTrackedAsset(t opencost.AssetType) bool {
+	switch t {
+	case opencost.NodeAssetType, opencost.DiskAssetType, opencost.NetworkAssetType:
+		return true
 	}
+	return false
+}
 
-	return res, nil
+// resolveProvider returns the canonical provider name for an asset. It prefers
+// the canonical Provider property populated by the cost model, falling back to
+// parsing the cloud provider ID when the property is missing.
+func resolveProvider(asset opencost.Asset) string {
+	props := asset.GetProperties()
+	if props == nil {
+		return ""
+	}
 
+	switch props.Provider {
+	case opencost.AWSProvider, opencost.GCPProvider, opencost.AzureProvider:
+		return props.Provider
+	}
+
+	return inferProviderFromProviderID(props.ProviderID)
 }
 
-func getProviderFromProviderID(providerid string) string {
+// inferProviderFromProviderID is a best-effort fallback that matches the
+// conventional shapes of Kubernetes Node `spec.providerID` values for the
+// cloud providers present in the embedded lookup data (AWS, GCP, Azure).
+//
+// Real-world formats:
+//   - AWS:   aws:///<availability-zone>/<instance-id>  (or raw "i-…")
+//   - GCP:   gce://<project>/<zone>/<instance-name>
+//   - Azure: azure:///subscriptions/<sub>/resourceGroups/<rg>/…
+func inferProviderFromProviderID(providerID string) string {
+	id := strings.ToLower(strings.TrimSpace(providerID))
+	if id == "" {
+		return ""
+	}
 
-	if strings.HasPrefix(providerid, "gke") {
-		return opencost.GCPProvider
-	} else if strings.HasPrefix(providerid, "i-") {
+	switch {
+	case strings.HasPrefix(id, "aws:"), strings.HasPrefix(id, "i-"):
 		return opencost.AWSProvider
-	} else if strings.HasPrefix(providerid, "azure") {
+	case strings.HasPrefix(id, "gce:"), strings.HasPrefix(id, "gke"):
+		return opencost.GCPProvider
+	case strings.HasPrefix(id, "azure:"):
 		return opencost.AzureProvider
 	}
-
 	return ""
-
 }

+ 254 - 0
pkg/carbon/carbonassets_test.go

@@ -0,0 +1,254 @@
+package carbon
+
+import (
+	"math"
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/core/pkg/opencost"
+	v1 "k8s.io/api/core/v1"
+)
+
+const (
+	// Known-good row from carbonlookupdata.csv:
+	//   AWS,us-east-1,t4g.nano,Node,0.012788433076234564,4.84769853777516e-06
+	awsT4gNanoUSEast1Coeff = 4.84769853777516e-06
+
+	// AWS,average-region,,Node,0.186739186034359,7.278989705005508e-05
+	awsAvgRegionNodeCoeff = 7.278989705005508e-05
+
+	// AWS,us-east-1,,Network,0.001135,4.30243315e-7
+	awsUSEast1NetworkCoeff = 4.30243315e-7
+)
+
+// floatEqual compares floats at a tolerance appropriate for the lookup table
+// values, which are stored with full float64 precision in the CSV.
+func floatEqual(a, b float64) bool {
+	if a == b {
+		return true
+	}
+	return math.Abs(a-b) <= 1e-18+1e-12*math.Max(math.Abs(a), math.Abs(b))
+}
+
+func nodeWithLabels(provider, providerID, region, instanceType string, minutes float64) *opencost.Node {
+	start := time.Date(2026, time.April, 1, 0, 0, 0, 0, time.UTC)
+	end := start.Add(time.Duration(minutes) * time.Minute)
+	window := opencost.NewWindow(&start, &end)
+
+	n := opencost.NewNode("node", "cluster", providerID, start, end, window)
+	n.Properties.Provider = provider
+	labels := opencost.AssetLabels{}
+	if region != "" {
+		labels[v1.LabelTopologyRegion] = region
+	}
+	if instanceType != "" {
+		labels[v1.LabelInstanceTypeStable] = instanceType
+	}
+	n.Labels = labels
+	return n
+}
+
+func diskWithLabels(provider, providerID, region string, minutes float64) *opencost.Disk {
+	start := time.Date(2026, time.April, 1, 0, 0, 0, 0, time.UTC)
+	end := start.Add(time.Duration(minutes) * time.Minute)
+	window := opencost.NewWindow(&start, &end)
+
+	d := opencost.NewDisk("disk", "cluster", providerID, start, end, window)
+	d.Properties.Provider = provider
+	if region != "" {
+		d.Labels = opencost.AssetLabels{v1.LabelTopologyRegion: region}
+	}
+	return d
+}
+
+func networkWithLabels(provider, providerID, region string, minutes float64) *opencost.Network {
+	start := time.Date(2026, time.April, 1, 0, 0, 0, 0, time.UTC)
+	end := start.Add(time.Duration(minutes) * time.Minute)
+	window := opencost.NewWindow(&start, &end)
+
+	nw := opencost.NewNetwork("network", "cluster", providerID, start, end, window)
+	nw.Properties.Provider = provider
+	if region != "" {
+		nw.Labels = opencost.AssetLabels{v1.LabelTopologyRegion: region}
+	}
+	return nw
+}
+
+func TestInferProviderFromProviderID(t *testing.T) {
+	cases := []struct {
+		name string
+		id   string
+		want string
+	}{
+		{"empty", "", ""},
+		{"aws standard", "aws:///us-east-1a/i-0abc123", opencost.AWSProvider},
+		{"aws raw instance", "i-0abc123", opencost.AWSProvider},
+		{"gce standard", "gce://my-project/us-central1-a/gke-node-1", opencost.GCPProvider},
+		{"legacy gke prefix", "gke-node-1", opencost.GCPProvider},
+		{"azure standard", "azure:///subscriptions/x/resourceGroups/y/providers/Microsoft.Compute/virtualMachines/z", opencost.AzureProvider},
+		{"unknown prefix", "something-else", ""},
+		{"whitespace and case", "  AWS:///eu-west-1a/i-xyz  ", opencost.AWSProvider},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			if got := inferProviderFromProviderID(tc.id); got != tc.want {
+				t.Fatalf("inferProviderFromProviderID(%q) = %q, want %q", tc.id, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestResolveProvider_PrefersCanonicalProperty(t *testing.T) {
+	// ProviderID is a GCP-shaped string but the canonical property says AWS.
+	// Canonical property wins.
+	n := nodeWithLabels(opencost.AWSProvider, "gce://foo/bar/baz", "us-east-1", "t4g.nano", 60)
+	if got := resolveProvider(n); got != opencost.AWSProvider {
+		t.Fatalf("resolveProvider = %q, want %q", got, opencost.AWSProvider)
+	}
+}
+
+func TestResolveProvider_FallsBackToProviderID(t *testing.T) {
+	// No canonical Provider property — must fall back to parsing ProviderID.
+	n := nodeWithLabels("", "gce://my-project/us-central1-a/gke-node-1", "us-central1", "e2-standard-2", 60)
+	if got := resolveProvider(n); got != opencost.GCPProvider {
+		t.Fatalf("resolveProvider = %q, want %q", got, opencost.GCPProvider)
+	}
+}
+
+func TestLookupCarbonCoeff_Node_ExactMatch(t *testing.T) {
+	n := nodeWithLabels(opencost.AWSProvider, "aws:///us-east-1a/i-1", "us-east-1", "t4g.nano", 60)
+	if got := lookupCarbonCoeff(n); !floatEqual(got, awsT4gNanoUSEast1Coeff) {
+		t.Fatalf("lookupCarbonCoeff = %g, want %g", got, awsT4gNanoUSEast1Coeff)
+	}
+}
+
+func TestLookupCarbonCoeff_Node_FallsBackWhenRegionUnknown(t *testing.T) {
+	// Region is garbage; instance type is fine. Should fall back to
+	// (AWS, average-region, "") instead of returning zero.
+	n := nodeWithLabels(opencost.AWSProvider, "aws:///xx/i-1", "not-a-real-region", "t4g.nano", 60)
+	if got := lookupCarbonCoeff(n); !floatEqual(got, awsAvgRegionNodeCoeff) {
+		t.Fatalf("lookupCarbonCoeff = %g, want %g (average-region fallback)", got, awsAvgRegionNodeCoeff)
+	}
+}
+
+func TestLookupCarbonCoeff_Node_FallsBackWhenInstanceTypeUnknown(t *testing.T) {
+	// Region is real; instance type is unknown. Previously returned 0 because
+	// only the region was reset. Must now fall back to average-region.
+	n := nodeWithLabels(opencost.AWSProvider, "aws:///us-east-1a/i-1", "us-east-1", "future-xxlarge", 60)
+	if got := lookupCarbonCoeff(n); !floatEqual(got, awsAvgRegionNodeCoeff) {
+		t.Fatalf("lookupCarbonCoeff = %g, want %g (average-region fallback)", got, awsAvgRegionNodeCoeff)
+	}
+}
+
+func TestLookupCarbonCoeff_Node_FallsBackWhenBothUnknown(t *testing.T) {
+	n := nodeWithLabels(opencost.AWSProvider, "aws:///xx/i-1", "not-a-real-region", "future-xxlarge", 60)
+	if got := lookupCarbonCoeff(n); !floatEqual(got, awsAvgRegionNodeCoeff) {
+		t.Fatalf("lookupCarbonCoeff = %g, want %g (average-region fallback)", got, awsAvgRegionNodeCoeff)
+	}
+}
+
+func TestLookupCarbonCoeff_Node_ZeroForUnknownProvider(t *testing.T) {
+	n := nodeWithLabels("", "some-unknown-id", "us-east-1", "t4g.nano", 60)
+	if got := lookupCarbonCoeff(n); got != 0 {
+		t.Fatalf("lookupCarbonCoeff = %g, want 0", got)
+	}
+}
+
+func TestLookupCarbonCoeff_Disk_ExactMatch(t *testing.T) {
+	// The CSV contains several disk rows per (provider, region), one per
+	// disk type. They collide under a key of (provider, region), so we
+	// check the lookup against whatever value the table actually holds.
+	want, ok := carbonLookupDisk[carbonLookupKeyRegion{opencost.AWSProvider, "us-east-1"}]
+	if !ok || want == 0 {
+		t.Fatalf("expected AWS/us-east-1 disk coefficient to be loaded")
+	}
+	d := diskWithLabels(opencost.AWSProvider, "aws:///us-east-1a/vol-1", "us-east-1", 60)
+	if got := lookupCarbonCoeff(d); !floatEqual(got, want) {
+		t.Fatalf("lookupCarbonCoeff disk = %g, want %g", got, want)
+	}
+}
+
+func TestLookupCarbonCoeff_Disk_FallsBackWhenRegionUnknown(t *testing.T) {
+	d := diskWithLabels(opencost.AWSProvider, "aws:///xx/vol-1", "not-a-real-region", 60)
+	want, ok := carbonLookupDisk[carbonLookupKeyRegion{opencost.AWSProvider, averageRegionKey}]
+	if !ok {
+		t.Fatalf("expected AWS average-region disk coefficient to be loaded")
+	}
+	if got := lookupCarbonCoeff(d); !floatEqual(got, want) {
+		t.Fatalf("lookupCarbonCoeff disk fallback = %g, want %g", got, want)
+	}
+}
+
+func TestLookupCarbonCoeff_Network_Populated(t *testing.T) {
+	// Regression: Network rows were loaded but never consulted, so every
+	// Network asset produced 0 emissions.
+	nw := networkWithLabels(opencost.AWSProvider, "aws:///us-east-1a/net-1", "us-east-1", 60)
+	if got := lookupCarbonCoeff(nw); !floatEqual(got, awsUSEast1NetworkCoeff) {
+		t.Fatalf("lookupCarbonCoeff network = %g, want %g", got, awsUSEast1NetworkCoeff)
+	}
+}
+
+func TestRelateCarbonAssets_MinutesToHours(t *testing.T) {
+	// Coefficient is tonnes CO2e per hour, so 120 minutes should yield exactly
+	// twice the coefficient.
+	n := nodeWithLabels(opencost.AWSProvider, "aws:///us-east-1a/i-1", "us-east-1", "t4g.nano", 120)
+	as := opencost.NewAssetSet(*n.Window.Start(), *n.Window.End(), n)
+
+	rows, err := RelateCarbonAssets(as)
+	if err != nil {
+		t.Fatalf("RelateCarbonAssets: %v", err)
+	}
+	if len(rows) != 1 {
+		t.Fatalf("got %d rows, want 1", len(rows))
+	}
+	var row CarbonRow
+	for _, r := range rows {
+		row = r
+	}
+	want := awsT4gNanoUSEast1Coeff * 2
+	if !floatEqual(row.Co2e, want) {
+		t.Fatalf("Co2e = %g, want %g", row.Co2e, want)
+	}
+}
+
+func TestRelateCarbonAssets_ZeroForUnknownProvider(t *testing.T) {
+	n := nodeWithLabels("", "totally-unknown", "us-east-1", "t4g.nano", 60)
+	as := opencost.NewAssetSet(*n.Window.Start(), *n.Window.End(), n)
+
+	rows, err := RelateCarbonAssets(as)
+	if err != nil {
+		t.Fatalf("RelateCarbonAssets: %v", err)
+	}
+	for _, r := range rows {
+		if r.Co2e != 0 {
+			t.Fatalf("Co2e = %g, want 0 for unknown provider", r.Co2e)
+		}
+	}
+}
+
+func TestLookupCarbonCoeff_NoPanicOnNilProperties(t *testing.T) {
+	// A bare Node with nil Properties must not panic — older code would
+	// dereference props.ProviderID in the log line after resolveProvider
+	// returned "" for nil properties.
+	start := time.Date(2026, time.April, 1, 0, 0, 0, 0, time.UTC)
+	end := start.Add(60 * time.Minute)
+	window := opencost.NewWindow(&start, &end)
+	n := opencost.NewNode("node", "cluster", "", start, end, window)
+	n.Properties = nil
+
+	if got := lookupCarbonCoeff(n); got != 0 {
+		t.Fatalf("lookupCarbonCoeff with nil properties = %g, want 0", got)
+	}
+}
+
+func TestLookupTables_LoadedAtInit(t *testing.T) {
+	if len(carbonLookupNode) == 0 {
+		t.Error("carbonLookupNode is empty — init did not populate node lookups")
+	}
+	if len(carbonLookupDisk) == 0 {
+		t.Error("carbonLookupDisk is empty — init did not populate disk lookups")
+	}
+	if len(carbonLookupNetwork) == 0 {
+		t.Error("carbonLookupNetwork is empty — init did not populate network lookups")
+	}
+}