Просмотр исходного кода

Fix GCP GPU mapping for A100 40GB/80GB (A2-HighGPU & A2-Ultra) instances (#3446)

Signed-off-by: Mohamed Osama <mohamedosama58113@gmail.com>
mohamedosama113 5 месяцев назад
Родитель
Сommit
1798f562af
4 измененных файлов с 117 добавлено и 18 удалено
  1. 1 1
      Dockerfile
  2. 72 0
      pkg/cloud/gcp/gpu.go
  3. 41 0
      pkg/cloud/gcp/gpu_test.go
  4. 3 17
      pkg/cloud/gcp/provider.go

+ 1 - 1
Dockerfile

@@ -1,4 +1,4 @@
-FROM --platform=$BUILDPLATFORM golang:1.24-alpine3.20 AS build-env
+FROM --platform=$BUILDPLATFORM golang:1.25.3-alpine3.21 AS build-env
 
 WORKDIR /app
 

+ 72 - 0
pkg/cloud/gcp/gpu.go

@@ -0,0 +1,72 @@
+package gcp
+
+import (
+	"regexp"
+	"strings"
+)
+
+// ---- Original OpenCost regex fallback ----
+var (
+	nvidiaTeslaGPURegex = regexp.MustCompile(`(?i)nvidia[\s-]*tesla[\s-]*([a-z0-9]+)`)
+	nvidiaGPURegex      = regexp.MustCompile(`(?i)nvidia[\s-]*([a-z0-9]+)`)
+)
+
+// Explicit substring → canonical GPU label
+var gpuSKUToGpuLabel = map[string]string{
+	// A100
+	"nvidia tesla a100 80gb": "nvidia-a100-80gb",
+	"nvidia a100 80gb":       "nvidia-a100-80gb",
+	"nvidia tesla a100":      "nvidia-tesla-a100",
+	"nvidia a100":            "nvidia-tesla-a100",
+
+	// L4
+	"nvidia l4": "nvidia-l4",
+
+	// T4
+	"tesla t4":  "nvidia-tesla-t4",
+	"nvidia t4": "nvidia-tesla-t4",
+
+	// V100
+	"tesla v100":  "nvidia-tesla-v100",
+	"nvidia v100": "nvidia-tesla-v100",
+
+	// P100 (reviewer case)
+	"tesla p100":  "nvidia-tesla-p100",
+	"nvidia p100": "nvidia-tesla-p100",
+}
+
+// ---- Main Normalizer ----
+func NormalizeGPULabel(desc string) string {
+	d := strings.ToLower(desc)
+
+	// --- Step 1: A100 detection first ---
+	if strings.Contains(d, "a100") {
+		has80 := strings.Contains(d, "80gb") || strings.Contains(d, "80 gb")
+		has40 := strings.Contains(d, "40gb") || strings.Contains(d, "40 gb")
+
+		if has80 {
+			return "nvidia-a100-80gb"
+		}
+		if has40 {
+			return "nvidia-tesla-a100"
+		}
+		return "nvidia-tesla-a100" // generic A100 → legacy
+	}
+
+	// --- Step 2: explicit substring mapping ---
+	for key, model := range gpuSKUToGpuLabel {
+		if strings.Contains(d, key) {
+			return model
+		}
+	}
+
+	// --- Step 3: regex fallback (original OpenCost behavior) ---
+	if match := nvidiaTeslaGPURegex.FindStringSubmatch(desc); len(match) == 2 {
+		return "nvidia-tesla-" + strings.ToLower(match[1])
+	}
+	if match := nvidiaGPURegex.FindStringSubmatch(desc); len(match) == 2 {
+		return "nvidia-" + strings.ToLower(match[1])
+	}
+
+	return ""
+}

+ 41 - 0
pkg/cloud/gcp/gpu_test.go

@@ -0,0 +1,41 @@
+package gcp
+
+import "testing"
+
+func TestNormalizeGPULabel(t *testing.T) {
+	cases := []struct {
+		desc string
+		want string
+	}{
+		// A100 80GB (A2-Ultra)
+		{"Nvidia A100 80GB GPU attached to instance", "nvidia-a100-80gb"},
+		{"Nvidia Tesla A100 80GB GPU (SXM4) in region us-central1", "nvidia-a100-80gb"},
+
+		// A100 40GB / generic A100 (A2-HighGPU legacy label)
+		{"Nvidia Tesla A100 GPU attached", "nvidia-tesla-a100"},
+		{"Nvidia Tesla A100 40GB GPU", "nvidia-tesla-a100"},
+
+		// L4 (G2)
+		{"NVIDIA L4 GPU attached", "nvidia-l4"},
+
+		// T4
+		{"Tesla T4 GPU", "nvidia-tesla-t4"},
+		{"NVIDIA T4 accelerator", "nvidia-tesla-t4"},
+
+		// V100
+		{"NVIDIA V100 in use", "nvidia-tesla-v100"},
+
+		// P100 – reviewer example, should be handled by regex fallback.
+		{"Nvidia Tesla P100 GPU running in Melbourne", "nvidia-tesla-p100"},
+
+		// No GPU
+		{"E2 standard instance, no accelerator", ""},
+	}
+
+	for i, tc := range cases {
+		got := NormalizeGPULabel(tc.desc)
+		if got != tc.want {
+			t.Fatalf("case %d: desc=%q: got %q, want %q", i, tc.desc, got, tc.want)
+		}
+	}
+}

+ 3 - 17
pkg/cloud/gcp/provider.go

@@ -96,8 +96,6 @@ var gcpRegions = []string{
 }
 
 var (
-	nvidiaTeslaGPURegex = regexp.MustCompile("(Nvidia Tesla [^ ]+) ")
-	nvidiaGPURegex      = regexp.MustCompile("(Nvidia [^ ]+) ")
 	// gce://guestbook-12345/...
 	//  => guestbook-12345
 	gceRegex = regexp.MustCompile("gce://([^/]*)/*")
@@ -762,23 +760,11 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]models.Key, pvKeys m
 					instanceType = "t2astandard"
 				}
 
-				var gpuType string
-				for matchnum, group := range nvidiaTeslaGPURegex.FindStringSubmatch(product.Description) {
-					if matchnum == 1 {
-						gpuType = strings.ToLower(strings.Join(strings.Split(group, " "), "-"))
-						log.Debugf("GCP Billing API: GPU type found: '%s'", gpuType)
-					}
+				gpuType := NormalizeGPULabel(product.Description)
+				if gpuType != "" {
+				    log.Debugf("GCP Billing API: normalized GPU type: %q", gpuType)
 				}
 
-				// If a 'Nvidia Tesla' is not found, try 'Nvidia'
-				if gpuType == "" {
-					for matchnum, group := range nvidiaGPURegex.FindStringSubmatch(product.Description) {
-						if matchnum == 1 {
-							gpuType = strings.ToLower(strings.Join(strings.Split(group, " "), "-"))
-							log.Debugf("GCP Billing API: GPU type found: '%s'", gpuType)
-						}
-					}
-				}
 
 				candidateKeys := []string{}
 				if gcp.ValidPricingKeys == nil {