Просмотр исходного кода

Merge branch 'develop' into thomasn/gpu-efficiency

thomasvn 1 год назад
Родитель
Сommit
df3bda9364

+ 18 - 1
.github/workflows/build-and-publish-release.yml

@@ -65,6 +65,13 @@ jobs:
           repository: 'opencost/opencost'
           ref: '${{ steps.branch.outputs.BRANCH_NAME }}'
           path: ./opencost
+      
+      - name: Checkout UI Repo
+        uses: actions/checkout@v4
+        with:
+          repository: 'opencost/opencost-ui'
+          ref: '${{ steps.branch.outputs.BRANCH_NAME }}'
+          path: ./opencost-ui
 
       - name: Set SHA
         id: sha
@@ -103,7 +110,7 @@ jobs:
           go-version: 'stable'
 
       - name: Set up just
-        uses: extractions/setup-just@v1
+        uses: extractions/setup-just@v2
 
       - name: Install crane
         uses: imjasonh/setup-crane@v0.3
@@ -135,3 +142,13 @@ jobs:
         #  crane copy '${{ steps.tags.outputs.IMAGE_TAG }}' '${steps.tags.outputs.IMAGE_TAG_QUAY}'
         #  crane copy '${{ steps.tags.outputs.IMAGE_TAG }}' '${steps.tags.outputs.IMAGE_TAG_LATEST_QUAY}'
         #  crane copy '${{ steps.tags.outputs.IMAGE_TAG }}' '${steps.tags.outputs.IMAGE_TAG_VERSION_QUAY}'
+
+      - name: Build and push (multiarch) OpenCost UI
+        working-directory: ./opencost-ui
+        run: |
+          just build '${{ steps.tags.outputs.IMAGE_TAG_UI }}' '${{ steps.version_number.outputs.RELEASE_VERSION }}'
+          crane copy '${{ steps.tags.outputs.IMAGE_TAG_UI }}' '${{ steps.tags.outputs.IMAGE_TAG_UI_LATEST }}'
+          crane copy '${{ steps.tags.outputs.IMAGE_TAG_UI }}' '${{ steps.tags.outputs.IMAGE_TAG_UI_VERSION }}'
+        #  crane copy '${steps.tags.outputs.IMAGE_TAG_UI}' '${steps.tags.outputs.IMAGE_TAG_UI_QUAY}'
+        #  crane copy '${steps.tags.outputs.IMAGE_TAG_UI}' '${steps.tags.outputs.IMAGE_TAG_UI_LATEST_QUAY}'
+        #  crane copy '${steps.tags.outputs.IMAGE_TAG_UI}' '${steps.tags.outputs.IMAGE_TAG_UI_VERSION_QUAY}'

+ 3 - 3
.github/workflows/build-test.yaml

@@ -29,12 +29,12 @@ jobs:
           version: '25.3'
       -
         name: Install just
-        uses: extractions/setup-just@v1
+        uses: extractions/setup-just@v2
 
       - name: install protobuf-go
         run: |
           go install github.com/golang/protobuf/protoc-gen-go@latest
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.3.0
           which protoc-gen-go-grpc
       -
         name: Validate
@@ -49,7 +49,7 @@ jobs:
 
       -
         name: Install just
-        uses: extractions/setup-just@v1
+        uses: extractions/setup-just@v2
 
       -
         name: Install Go

+ 7 - 49
core/pkg/opencost/window.go

@@ -298,13 +298,18 @@ func parseWindow(window string, now time.Time) (Window, error) {
 		end := now
 		start := end.Add(-time.Duration(num) * dur)
 
-		// when using windows such as "7d" and "1w", we have to have a definition for what "the past X days" means.
+		// when using windows such as "7d", "1w", and "2h" we have to have a definition for what "the past X days/hours" means.
 		// let "the past X days" be defined as the entirety of today plus the entirety of the past X-1 days, where
 		// "entirety" is defined as midnight to midnight, UTC. given this definition, we round forward the calculated
 		// start and end times to the nearest day to align with midnight boundaries
-		if match[2] == "d" || match[2] == "w" {
+		// an analogous definition applies to "the past X weeks" and "the past X hours"
+		if match[2] == "w" {
+			// special case - with a week, we say the week ends today
 			end = end.Truncate(timeutil.Day).Add(timeutil.Day)
 			start = start.Truncate(timeutil.Day).Add(timeutil.Day)
+		} else {
+			end = now.Truncate(dur).Add(dur)
+			start = end.Add(-time.Duration(num) * dur)
 		}
 
 		return NewWindow(&start, &end), nil
@@ -743,53 +748,6 @@ func (w Window) DurationOffset() (time.Duration, time.Duration, error) {
 	return duration, offset, nil
 }
 
-// DurationOffsetForPrometheus returns strings representing durations for the
-// duration and offset of the given window, factoring in the Thanos offset if
-// necessary. Whereas duration is a simple duration string (e.g. "1d"), the
-// offset includes the word "offset" (e.g. " offset 2d") so that the values
-// returned can be used directly in the formatting string "some_metric[%s]%s"
-// to generate the query "some_metric[1d] offset 2d".
-func (w Window) DurationOffsetForPrometheus() (string, string, error) {
-	duration, offset, err := w.DurationOffset()
-	if err != nil {
-		return "", "", err
-	}
-
-	// If using Thanos, increase offset to 3 hours, reducing the duration by
-	// equal measure to maintain the same starting point.
-	// TODO: This logic should technically be decoupled from this type, but
-	// TODO: current use cases are unclear. To ensure we do not break existing
-	// TODO: (or legacy) use-cases, temporarily support this one-off logic.
-	thanosDur := thanosOffset()
-	if offset < thanosDur && isThanosEnabled() {
-		diff := thanosDur - offset
-		offset += diff
-		duration -= diff
-	}
-
-	// If duration < 0, return an error
-	if duration < 0 {
-		return "", "", fmt.Errorf("negative duration: %s", duration)
-	}
-
-	// Negative offset means that the end time is in the future. Prometheus
-	// fails for non-positive offset values, so shrink the duration and
-	// remove the offset altogether.
-	if offset < 0 {
-		duration = duration + offset
-		offset = 0
-	}
-
-	durStr, offStr := timeutil.DurationOffsetStrings(duration, offset)
-	if offset < time.Minute {
-		offStr = ""
-	} else {
-		offStr = " offset " + offStr
-	}
-
-	return durStr, offStr, nil
-}
-
 // DurationOffsetStrings returns formatted, Prometheus-compatible strings representing
 // the duration and offset of the window in terms of days, hours, minutes, or seconds;
 // e.g. ("7d", "1441m", "30m", "1s", "")

+ 45 - 111
core/pkg/opencost/window_test.go

@@ -4,7 +4,6 @@ import (
 	"encoding/json"
 	"errors"
 	"fmt"
-	"strings"
 	"testing"
 	"time"
 
@@ -653,7 +652,39 @@ func TestWindow_DurationOffsetStrings(t *testing.T) {
 	}
 }
 
-func TestWindow_DurationOffsetForPrometheus(t *testing.T) {
+func TestParse_Window(t *testing.T) {
+	now := time.Date(2024, time.May, 3, 8, 1, 4, 6, time.UTC)
+	win, err := parseWindow("2h", now)
+	if err != nil {
+		t.Fatalf(`unexpected error parsing "2h": %s`, err)
+	}
+
+	expectedStart := time.Date(2024, time.May, 3, 7, 0, 0, 0, time.UTC)
+	expectedEnd := time.Date(2024, time.May, 3, 9, 0, 0, 0, time.UTC)
+
+	if !win.start.Equal(expectedStart) {
+		t.Fatalf(`expect: window start to be %s; actual: %s`, expectedStart, win.start)
+	}
+	if !win.end.Equal(expectedEnd) {
+		t.Fatalf(`expect: window end to be %s; actual: %s`, expectedEnd, win.end)
+	}
+
+	win, err = parseWindow("3d", now)
+	if err != nil {
+		t.Fatalf(`unexpected error parsing "3d": %s`, err)
+	}
+
+	expectedStart = time.Date(2024, time.May, 1, 0, 0, 0, 0, time.UTC)
+	expectedEnd = time.Date(2024, time.May, 4, 0, 0, 0, 0, time.UTC)
+
+	if !win.start.Equal(expectedStart) {
+		t.Fatalf(`expect: window start to be %s; actual: %s`, expectedStart, win.start)
+	}
+	if !win.end.Equal(expectedEnd) {
+		t.Fatalf(`expect: window end to be %s; actual: %s`, expectedEnd, win.end)
+	}
+}
+func TestWindow_Duration(t *testing.T) {
 	// Set-up and tear-down
 	thanosEnabled := env.GetBool(ThanosEnabledEnvVarName, false)
 	defer env.SetBool(ThanosEnabledEnvVarName, thanosEnabled)
@@ -665,144 +696,47 @@ func TestWindow_DurationOffsetForPrometheus(t *testing.T) {
 	}
 
 	now := time.Now().UTC()
-	startOfToday := now.Truncate(timeutil.Day)
 	w, err := parseWindow("1d", now)
 	if err != nil {
 		t.Fatalf(`unexpected error parsing "1d": %s`, err)
 	}
-
-	dur, off, err := w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	// We can get a response in seconds OR minutes. Check seconds first as it
-	// is higher resolution.
-	expDurSec := int(now.Sub(startOfToday).Seconds())
-	expDurSecStr := fmt.Sprintf("%ds", expDurSec)
-	expDurMin := int(now.Sub(startOfToday).Minutes())
-	expDurMinStr := fmt.Sprintf("%dm", expDurMin)
-	if dur != expDurSecStr && dur != expDurMinStr {
-		t.Fatalf(`expect: window to be "%s" (or "%s"); actual: "%s"`, expDurSecStr, expDurMinStr, dur)
-	}
-	if off != "" {
-		t.Fatalf(`expect: offset to be ""; actual: "%s"`, off)
+	if w.Duration() != 24*time.Hour {
+		t.Fatalf(`expect: window to be 24 hours; actual: %s`, w.Duration())
 	}
 
 	w, err = ParseWindowUTC("2h")
 	if err != nil {
 		t.Fatalf(`unexpected error parsing "2h": %s`, err)
 	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	if dur != "2h" {
-		t.Fatalf(`expect: window to be "2h"; actual: "%s"`, dur)
-	}
-	if off != "" {
-		t.Fatalf(`expect: offset to be ""; actual: "%s"`, off)
-	}
 
+	if w.Duration().String() != "2h0m0s" {
+		t.Fatalf(`expect: window to be "2h"; actual: "%s"`, w.Duration().String())
+	}
 	w, err = ParseWindowUTC("10m")
 	if err != nil {
 		t.Fatalf(`unexpected error parsing "10m": %s`, err)
 	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	if dur != "10m" {
-		t.Fatalf(`expect: window to be "10m"; actual: "%s"`, dur)
-	}
-	if off != "" {
-		t.Fatalf(`expect: offset to be ""; actual: "%s"`, off)
+
+	if w.Duration().String() != "10m0s" {
+		t.Fatalf(`expect: window to be "10m"; actual: "%s"`, w.Duration().String())
 	}
 
 	w, err = ParseWindowUTC("1589448338,1589534798")
 	if err != nil {
 		t.Fatalf(`unexpected error parsing "1589448338,1589534798": %s`, err)
 	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	if dur != "1441m" {
-		t.Fatalf(`expect: window to be "1441m"; actual: "%s"`, dur)
-	}
-	if !strings.HasPrefix(off, " offset ") {
-		t.Fatalf(`expect: offset to start with " offset "; actual: "%s"`, off)
+	if w.Duration().String() != "24h1m0s" {
+		t.Fatalf(`expect: window to be "24h1m0s"; actual: "%s"`, w.Duration().String())
 	}
 
 	w, err = ParseWindowUTC("yesterday")
 	if err != nil {
 		t.Fatalf(`unexpected error parsing "yesterday": %s`, err)
 	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	if dur != "1d" {
-		t.Fatalf(`expect: window to be "1d"; actual: "%s"`, dur)
-	}
-	if !strings.HasPrefix(off, " offset ") {
-		t.Fatalf(`expect: offset to start with " offset "; actual: "%s"`, off)
-	}
-
-	// Test for Thanos (env.IsThanosEnabled() == true)
-	env.SetBool(ThanosEnabledEnvVarName, true)
-	if !env.GetBool(ThanosEnabledEnvVarName, false) {
-		t.Fatalf("expected env.IsThanosEnabled() == true")
-	}
-
-	// Note - with the updated logic of 1d, 1w, etc. rounding the start and end times forward to the nearest midnight,
-	// DurationOffsetForPrometheus may fail if not using a window using "Xh" as the string to parse
-	w, err = ParseWindowUTC("24h")
-	if err != nil {
-		t.Fatalf(`unexpected error parsing "24h": %s`, err)
-	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	if dur != "21h" {
-		t.Fatalf(`expect: window to be "21d"; actual: "%s"`, dur)
-	}
-	if off != " offset 3h" {
-		t.Fatalf(`expect: offset to be " offset 3h"; actual: "%s"`, off)
+	if w.Duration().String() != "24h0m0s" {
+		t.Fatalf(`expect: window to be "24h0m0s"; actual: "%s"`, w.Duration().String())
 	}
 
-	w, err = ParseWindowUTC("2h")
-	if err != nil {
-		t.Fatalf(`unexpected error parsing "2h": %s`, err)
-	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err == nil {
-		t.Fatalf(`expected error (negative duration); got ("%s", "%s")`, dur, off)
-	}
-
-	w, err = ParseWindowUTC("10m")
-	if err != nil {
-		t.Fatalf(`unexpected error parsing "1d": %s`, err)
-	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err == nil {
-		t.Fatalf(`expected error (negative duration); got ("%s", "%s")`, dur, off)
-	}
-
-	w, err = ParseWindowUTC("1589448338,1589534798")
-	if err != nil {
-		t.Fatalf(`unexpected error parsing "1589448338,1589534798": %s`, err)
-	}
-	dur, off, err = w.DurationOffsetForPrometheus()
-	if err != nil {
-		t.Fatalf("unexpected error: %s", err)
-	}
-	if dur != "1441m" {
-		t.Fatalf(`expect: window to be "1441m"; actual: "%s"`, dur)
-	}
-	if !strings.HasPrefix(off, " offset ") {
-		t.Fatalf(`expect: offset to start with " offset "; actual: "%s"`, off)
-	}
 }
 
 // TODO

+ 8 - 9
core/pkg/util/timeutil/timeutil_test.go

@@ -1,7 +1,6 @@
 package timeutil
 
 import (
-	"fmt"
 	"testing"
 	"time"
 )
@@ -393,27 +392,27 @@ func Test_FormatDurationStringDaysToHours(t *testing.T) {
 func TestRoundToStartOfWeek(t *testing.T) {
 	sunday := time.Date(2023, 03, 26, 12, 12, 12, 12, time.UTC)
 	roundedFromSunday := RoundToStartOfWeek(sunday)
-	if roundedFromSunday.Day() != 26 || roundedFromSunday.Weekday() == time.Sunday {
-		fmt.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromSunday.Day(), roundedFromSunday.Weekday().String())
+	if roundedFromSunday.Day() != 26 || roundedFromSunday.Weekday() != time.Sunday {
+		t.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromSunday.Day(), roundedFromSunday.Weekday().String())
 	}
 
 	tuesday := time.Date(2023, 03, 28, 12, 12, 12, 12, time.UTC)
 	roundedFromTuesday := RoundToStartOfWeek(tuesday)
-	if roundedFromTuesday.Day() != 26 || roundedFromTuesday.Weekday() == time.Sunday {
-		fmt.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromTuesday.Day(), roundedFromTuesday.Weekday().String())
+	if roundedFromTuesday.Day() != 26 || roundedFromTuesday.Weekday() != time.Sunday {
+		t.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromTuesday.Day(), roundedFromTuesday.Weekday().String())
 	}
 }
 
 func TestRoundToStartOfFollowingWeek(t *testing.T) {
 	sunday := time.Date(2023, 03, 26, 12, 12, 12, 12, time.UTC)
 	roundedFromSunday := RoundToStartOfFollowingWeek(sunday)
-	if roundedFromSunday.Month() != 4 || roundedFromSunday.Day() != 2 || roundedFromSunday.Weekday() == time.Sunday {
-		fmt.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromSunday.Day(), roundedFromSunday.Weekday().String())
+	if roundedFromSunday.Month() != 4 || roundedFromSunday.Day() != 2 || roundedFromSunday.Weekday() != time.Sunday {
+		t.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromSunday.Day(), roundedFromSunday.Weekday().String())
 	}
 
 	tuesday := time.Date(2023, 03, 28, 12, 12, 12, 12, time.UTC)
 	roundedFromTuesday := RoundToStartOfFollowingWeek(tuesday)
-	if roundedFromTuesday.Month() != 4 || roundedFromTuesday.Day() != 2 || roundedFromTuesday.Weekday() == time.Sunday {
-		fmt.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromTuesday.Day(), roundedFromTuesday.Weekday().String())
+	if roundedFromTuesday.Month() != 4 || roundedFromTuesday.Day() != 2 || roundedFromTuesday.Weekday() != time.Sunday {
+		t.Errorf("expected date to be rounded to the same sunday, got: %d, %s", roundedFromTuesday.Day(), roundedFromTuesday.Weekday().String())
 	}
 }

+ 5 - 5
go.mod

@@ -56,7 +56,7 @@ require (
 	golang.org/x/sync v0.6.0
 	golang.org/x/text v0.14.0
 	google.golang.org/api v0.162.0
-	google.golang.org/protobuf v1.32.0
+	google.golang.org/protobuf v1.33.0
 	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.25.3
 	k8s.io/apimachinery v0.25.3
@@ -169,11 +169,11 @@ require (
 	go.opentelemetry.io/otel/metric v1.22.0 // indirect
 	go.opentelemetry.io/otel/trace v1.22.0 // indirect
 	go.uber.org/atomic v1.10.0 // indirect
-	golang.org/x/crypto v0.19.0 // indirect
+	golang.org/x/crypto v0.21.0 // indirect
 	golang.org/x/mod v0.13.0 // indirect
-	golang.org/x/net v0.21.0 // indirect
-	golang.org/x/sys v0.17.0 // indirect
-	golang.org/x/term v0.17.0 // indirect
+	golang.org/x/net v0.23.0 // indirect
+	golang.org/x/sys v0.18.0 // indirect
+	golang.org/x/term v0.18.0 // indirect
 	golang.org/x/time v0.5.0 // indirect
 	golang.org/x/tools v0.14.0 // indirect
 	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect

+ 10 - 10
go.sum

@@ -612,8 +612,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
 golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
 golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.19.0 h1:ENy+Az/9Y1vSrlrvBSyna3PITt4tiZLf7sgCjZBX7Wo=
-golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -694,8 +694,8 @@ golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1
 golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4=
-golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
+golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -781,12 +781,12 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
-golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.17.0 h1:mkTF7LCd6WGJNL3K1Ad7kwxNfYAW6a8a8QqtMblp/4U=
-golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
+golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -982,8 +982,8 @@ google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGj
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
-google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

+ 7 - 1
justfile

@@ -6,8 +6,14 @@ commit := `git rev-parse --short HEAD`
 default:
     just --list
 
+# run core unit tests
+test-core: 
+    {{commonenv}} cd ./core && go test ./... -coverprofile=coverage.out
+    {{commonenv}} cd ./core && go vet ./...
+
+
 # Run unit tests
-test:
+test: test-core
     {{commonenv}} go test ./... -coverprofile=coverage.out
     {{commonenv}} go vet ./...
 

+ 30 - 41
pkg/cloud/alibaba/provider.go

@@ -69,7 +69,6 @@ var (
 // Variable to keep track of instance families that fail in DescribePrice API due improper defaulting of systemDisk if the information is not available
 var alibabaDefaultToCloudEssd = []string{"g6e", "r6e", "r7", "g7", "g7a", "r7a"}
 
-// Why predefined and dependency on code? Can be converted to API call - https://www.alibabacloud.com/help/en/elastic-compute-service/latest/regions-describeregions
 var alibabaRegions = []string{
 	"cn-qingdao",
 	"cn-beijing",
@@ -79,48 +78,28 @@ var alibabaRegions = []string{
 	"cn-hangzhou",
 	"cn-shanghai",
 	"cn-nanjing",
-	"cn-fuzhou",
 	"cn-shenzhen",
+	"cn-heyuan",
 	"cn-guangzhou",
+	"cn-fuzhou",
+	"cn-wuhan-lr",
 	"cn-chengdu",
 	"cn-hongkong",
+	"ap-northeast-1",
+	"ap-northeast-2",
 	"ap-southeast-1",
 	"ap-southeast-2",
 	"ap-southeast-3",
-	"ap-southeast-5",
 	"ap-southeast-6",
-	"ap-southeast-7",
+	"ap-southeast-5",
 	"ap-south-1",
-	"ap-northeast-1",
-	"ap-northeast-2",
-	"us-west-1",
+	"ap-southeast-7",
 	"us-east-1",
-	"eu-central-1",
+	"us-west-1",
+	"eu-west-1",
 	"me-east-1",
-}
-
-// To-Do: Convert to API call - https://www.alibabacloud.com/help/en/elastic-compute-service/latest/describeinstancetypefamilies
-// Also first pass only completely tested pricing API for General pupose instances families & memory optimized instance families
-var alibabaInstanceFamilies = []string{
-	"g7",
-	"g7a",
-	"g6e",
-	"g6",
-	"g5",
-	"sn2",
-	"sn2ne",
-	"r7",
-	"r7a",
-	"r6e",
-	"r6a",
-	"r6",
-	"r5",
-	"se1",
-	"se1ne",
-	"re6",
-	"re6p",
-	"re4",
-	"se1",
+	"me-central-1",
+	"eu-central-1",
 }
 
 // AlibabaInfo contains configuration for Alibaba's CUR integration
@@ -430,7 +409,6 @@ func (alibaba *Alibaba) DownloadPricingData() error {
 	var lookupKey string
 	alibaba.clients = make(map[string]*sdk.Client)
 	alibaba.Pricing = make(map[string]*AlibabaPricing)
-
 	for _, node := range nodeList {
 		pricingObj := &AlibabaPricing{}
 		slimK8sNode := generateSlimK8sNodeFromV1Node(node)
@@ -534,8 +512,17 @@ func (alibaba *Alibaba) NodePricing(key models.Key) (*models.Node, models.Pricin
 
 	pricing, ok := alibaba.Pricing[keyFeature]
 	if !ok {
-		log.Errorf("Node pricing information not found for node with feature: %s", keyFeature)
-		return nil, meta, fmt.Errorf("Node pricing information not found for node with feature: %s letting it use default values", keyFeature)
+		keys := make([]string, 0, len(alibaba.Pricing))
+		for k := range alibaba.Pricing {
+			keys = append(keys, k)
+		}
+		kf := key.(*AlibabaNodeKey)
+		// Try to look up pricing with no disk attached
+		pricing, ok = alibaba.Pricing[kf.FeaturesWithOtherDisk("")]
+		if !ok {
+			log.Errorf("Node pricing information not found for node with feature: %s . Existing keys are: %+v", keyFeature, keys)
+			return nil, meta, fmt.Errorf("Node pricing information not found for node with feature: %s letting it use default values", keyFeature)
+		}
 	}
 
 	log.Debugf("returning the node price for the node with feature: %s", keyFeature)
@@ -554,7 +541,7 @@ func (alibaba *Alibaba) PVPricing(pvk models.PVKey) (*models.PV, error) {
 	pricing, ok := alibaba.Pricing[keyFeature]
 
 	if !ok {
-		log.Errorf("Persistent Volume pricing not found for PV with feature: %s", keyFeature)
+		log.Debugf("Persistent Volume pricing not found for PV with feature: %s", keyFeature)
 		return nil, fmt.Errorf("Persistent Volume pricing not found for PV with feature: %s letting it use default values", keyFeature)
 	}
 
@@ -845,6 +832,12 @@ func (alibabaNodeKey *AlibabaNodeKey) Features() string {
 	return strings.Join(keyLookup, "::")
 }
 
+func (alibabaNodeKey *AlibabaNodeKey) FeaturesWithOtherDisk(overrideDiskCategory string) string {
+	keyLookup := stringutil.DeleteEmptyStringsFromArray([]string{alibabaNodeKey.RegionID, alibabaNodeKey.InstanceType, alibabaNodeKey.OSType,
+		alibabaNodeKey.OptimizedKeyword, overrideDiskCategory, alibabaNodeKey.SystemDiskSizeInGiB, alibabaNodeKey.SystemDiskPerformanceLevel})
+	return strings.Join(keyLookup, "::")
+}
+
 func (alibabaNodeKey *AlibabaNodeKey) GPUType() string {
 	return ""
 }
@@ -1096,7 +1089,7 @@ func processDescribePriceAndCreateAlibabaPricing(client *sdk.Client, i interface
 		resp, err := client.ProcessCommonRequestWithSigner(req, signer)
 		pricing.NodeAttributes = NewAlibabaNodeAttributes(node)
 		if err != nil || resp.GetHttpStatus() != 200 {
-			// Can be defaulted to some value here?
+			// Try again but default the disk to something else
 			return nil, fmt.Errorf("unable to fetch information for node with InstanceType: %v", node.InstanceType)
 		} else {
 			// This is where population of Pricing happens
@@ -1152,10 +1145,6 @@ func getInstanceFamilyFromType(instanceType string) string {
 		log.Warnf("unable to find the family of the instance type %s, returning its family type unknown", instanceType)
 		return ALIBABA_UNKNOWN_INSTANCE_FAMILY_TYPE
 	}
-	if !slices.Contains(alibabaInstanceFamilies, splitinstanceType[1]) {
-		log.Warnf("currently the instance family type %s is not valid or not tested completely for pricing API", instanceType)
-		return ALIBABA_NOT_SUPPORTED_INSTANCE_FAMILY_TYPE
-	}
 	return splitinstanceType[1]
 }
 

+ 24 - 5
pkg/cloud/alibaba/provider_test.go

@@ -408,6 +408,30 @@ func TestProcessDescribePriceAndCreateAlibabaPricing(t *testing.T) {
 			},
 			expectedError: nil,
 		},
+		{
+			name: "test incorrect disk type",
+			teststruct: &SlimK8sNode{
+				InstanceType:       "ecs.g6.xlarge",
+				RegionID:           "ap-northeast-1",
+				PriceUnit:          "Hour",
+				MemorySizeInKiB:    "33554432KiB",
+				IsIoOptimized:      true,
+				OSType:             "Linux",
+				ProviderID:         "cn-hangzhou.i-test-15",
+				InstanceTypeFamily: "se1",
+				SystemDisk: &SlimK8sDisk{
+					DiskType:         "data",
+					RegionID:         "ap-northeast-1",
+					PriceUnit:        "Hour",
+					SizeInGiB:        "40",
+					DiskCategory:     "cloud_essd",
+					PerformanceLevel: "PL1",
+					ProviderID:       "d-Ali-cloud-XXX-04",
+					StorageClass:     "temp",
+				},
+			},
+			expectedError: nil,
+		},
 	}
 	custom := &models.CustomPricing{}
 	for _, c := range cases {
@@ -442,11 +466,6 @@ func TestGetInstanceFamilyFromType(t *testing.T) {
 			instanceType:           "random.value",
 			expectedInstanceFamily: ALIBABA_UNKNOWN_INSTANCE_FAMILY_TYPE,
 		},
-		{
-			name:                   "test if random instance family gives you ALIBABA_NOT_SUPPORTED_INSTANCE_FAMILY_TYPE value ",
-			instanceType:           "ecs.g7e.2xlarge",
-			expectedInstanceFamily: ALIBABA_NOT_SUPPORTED_INSTANCE_FAMILY_TYPE,
-		},
 	}
 
 	for _, c := range cases {

+ 2 - 2
pkg/cloud/aws/provider.go

@@ -1382,8 +1382,8 @@ func (aws *AWS) createNode(terms *AWSProductTerms, usageType string, k models.Ke
 	}
 	// Throw error if public price is not found
 	if !publicPricingFound {
-		log.Errorf("Could not fetch data for \"%s\"", k.ID())
-		return nil, meta, fmt.Errorf("Could not fetch data for \"%s\"", k.ID())
+		log.Errorf("For node \"%s\", cannot find the following key in OnDemand pricing data \"%s\"", k.ID(), k.Features())
+		return nil, meta, fmt.Errorf("for node \"%s\", cannot find the following key in OnDemand pricing data \"%s\"", k.ID(), k.Features())
 	}
 
 	return &models.Node{

+ 3 - 0
pkg/cloud/azure/storageconnection.go

@@ -42,6 +42,9 @@ func (sc *StorageConnection) getBlobURLTemplate() string {
 	// Use gov cloud blob url if gov is detected in AzureCloud
 	if strings.Contains(strings.ToLower(sc.Cloud), "gov") {
 		return "https://%s.blob.core.usgovcloudapi.net/%s"
+	} else if strings.Contains(strings.ToLower(sc.Cloud), "china") {
+		// Use China cloud blob url if china is detected in AzureCloud
+		return "https://%s.blob.core.chinacloudapi.cn/%s"
 	}
 	// default to Public Cloud template
 	return "https://%s.blob.core.windows.net/%s"

+ 3 - 0
pkg/cloud/gcp/bigqueryintegration.go

@@ -24,6 +24,7 @@ const (
 	SKUDescriptionColumnName     = "description"
 	LabelsColumnName             = "labels"
 	ResourceNameColumnName       = "resource"
+	ResourceGlobalNameColumnName = "global_resource"
 	CostColumnName               = "cost"
 	ListCostColumnName           = "list_cost"
 	CreditsColumnName            = "credits"
@@ -46,6 +47,7 @@ func (bqi *BigQueryIntegration) GetCloudCost(start time.Time, end time.Time) (*o
 		fmt.Sprintf("service.description as %s", ServiceDescriptionColumnName),
 		fmt.Sprintf("sku.description as %s", SKUDescriptionColumnName),
 		fmt.Sprintf("resource.name as %s", ResourceNameColumnName),
+		fmt.Sprintf("resource.global_name as %s", ResourceGlobalNameColumnName),
 		fmt.Sprintf("TO_JSON_STRING(labels) as %s", LabelsColumnName),
 		fmt.Sprintf("SUM(cost) as %s", CostColumnName),
 		fmt.Sprintf("SUM(cost_at_list) as %s", ListCostColumnName),
@@ -60,6 +62,7 @@ func (bqi *BigQueryIntegration) GetCloudCost(start time.Time, end time.Time) (*o
 		SKUDescriptionColumnName,
 		LabelsColumnName,
 		ResourceNameColumnName,
+		ResourceGlobalNameColumnName,
 	}
 
 	whereConjuncts := GetWhereConjuncts(start, end)

+ 20 - 0
pkg/cloud/gcp/bigqueryintegration_types.go

@@ -113,6 +113,26 @@ func (ccl *CloudCostLoader) Load(values []bigquery.Value, schema bigquery.Schema
 			}
 
 			properties.ProviderID = ParseProviderID(resource)
+		case ResourceGlobalNameColumnName:
+			// skip if we already got ProviderID from resource.name, as resource.global_name is a fallback for when
+			// resource.name is null
+			if len(properties.ProviderID) > 0 {
+				continue
+			}
+
+			resourceGlobalNameValue := values[i]
+			if resourceGlobalNameValue == nil {
+				properties.ProviderID = ""
+				continue
+			}
+			resourceGlobalName, ok := resourceGlobalNameValue.(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", ResourceGlobalNameColumnName, values[i])
+				properties.ProviderID = ""
+				continue
+			}
+
+			properties.ProviderID = ParseProviderID(resourceGlobalName)
 		case CostColumnName:
 			costValue, ok := values[i].(float64)
 			if !ok {

+ 75 - 0
pkg/cloud/gcp/bigqueryintegration_types_test.go

@@ -0,0 +1,75 @@
+package gcp
+
+import (
+	"testing"
+	"time"
+
+	"cloud.google.com/go/bigquery"
+	"github.com/opencost/opencost/core/pkg/opencost"
+)
+
+func Test_Load_ResourceFallback(t *testing.T) {
+	schema := bigquery.Schema{
+		&bigquery.FieldSchema{
+			Name: UsageDateColumnName,
+		},
+		&bigquery.FieldSchema{
+			Name: ResourceNameColumnName,
+		},
+		&bigquery.FieldSchema{
+			Name: ResourceGlobalNameColumnName,
+		},
+	}
+
+	testCases := map[string]struct {
+		values             []bigquery.Value
+		expectedProviderID string
+	}{
+		"no data": {
+			values: []bigquery.Value{
+				bigquery.Value(time.Now()),
+				bigquery.Value(nil),
+				bigquery.Value(nil),
+			},
+			expectedProviderID: "",
+		},
+		"resource name only": {
+			values: []bigquery.Value{
+				bigquery.Value(time.Now()),
+				bigquery.Value("resource_name"),
+				bigquery.Value(nil),
+			},
+			expectedProviderID: "resource_name",
+		},
+		"resource global name only": {
+			values: []bigquery.Value{
+				bigquery.Value(time.Now()),
+				bigquery.Value(nil),
+				bigquery.Value("resource_global_name"),
+			},
+			expectedProviderID: "resource_global_name",
+		},
+		"resource name and global name": {
+			values: []bigquery.Value{
+				bigquery.Value(time.Now()),
+				bigquery.Value("resource_name"),
+				bigquery.Value("resource_global_name"),
+			},
+			expectedProviderID: "resource_name",
+		},
+	}
+	for name, testCase := range testCases {
+		t.Run(name, func(t *testing.T) {
+			ccl := CloudCostLoader{
+				CloudCost: &opencost.CloudCost{},
+			}
+
+			err := ccl.Load(testCase.values, schema)
+			if err != nil {
+				t.Errorf("Other error during testing %s", err)
+			} else if ccl.CloudCost.Properties.ProviderID != testCase.expectedProviderID {
+				t.Errorf("Incorrect result, actual ProviderID: %s, expected: %s", ccl.CloudCost.Properties.ProviderID, testCase.expectedProviderID)
+			}
+		})
+	}
+}

+ 1 - 1
pkg/cloud/gcp/provider.go

@@ -1142,7 +1142,7 @@ func (gcp *GCP) PVPricing(pvk models.PVKey) (*models.PV, error) {
 	defer gcp.DownloadPricingDataLock.RUnlock()
 	pricing, ok := gcp.Pricing[pvk.Features()]
 	if !ok {
-		log.Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
+		log.Debugf("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
 		return &models.PV{}, nil
 	}
 	return pricing.PV, nil

+ 1 - 1
pkg/cloud/provider/csvprovider.go

@@ -418,7 +418,7 @@ func (c *CSVProvider) PVPricing(pvk models.PVKey) (*models.PV, error) {
 	defer c.DownloadPricingDataLock.RUnlock()
 	pricing, ok := c.PricingPV[pvk.Features()]
 	if !ok {
-		log.Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
+		log.Debugf("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
 		return &models.PV{}, nil
 	}
 	return &models.PV{

+ 1 - 1
pkg/cloud/scaleway/provider.go

@@ -232,7 +232,7 @@ func (c *Scaleway) PVPricing(pvk models.PVKey) (*models.PV, error) {
 
 	pricing, ok := c.Pricing[pvk.Features()]
 	if !ok {
-		log.Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
+		log.Debugf("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
 		return &models.PV{}, nil
 	}
 	return &models.PV{

+ 44 - 25
pkg/costmodel/cluster.go

@@ -161,13 +161,6 @@ func ClusterDisks(client prometheus.Client, provider models.Provider, start, end
 	if durStr == "" {
 		return nil, fmt.Errorf("illegal duration value for %s", opencost.NewClosedWindow(start, end))
 	}
-	// hourlyToCumulative is a scaling factor that, when multiplied by an hourly
-	// value, converts it to a cumulative value; i.e.
-	// [$/hr] * [min/res]*[hr/min] = [$/res]
-	hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
-
-	// TODO niko/assets how do we not hard-code this price?
-	costPerGBHr := 0.04 / 730.0
 
 	ctx := prom.NewNamedContext(client, prom.ClusterContextName)
 	queryPVCost := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost{%s}[%s])) by (%s, persistentvolume,provider_id)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
@@ -177,12 +170,6 @@ func ClusterDisks(client prometheus.Client, provider models.Provider, start, end
 	queryPVUsedAvg := fmt.Sprintf(`avg(avg_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	queryPVUsedMax := fmt.Sprintf(`max(max_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	queryPVCInfo := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolumeclaim_info{%s}[%s])) by (%s, volumename, persistentvolumeclaim, namespace)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
-	queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
-	queryLocalStorageUsedCost := fmt.Sprintf(`sum_over_time(sum(container_fs_usage_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
-	queryLocalStorageUsedAvg := fmt.Sprintf(`avg(sum(avg_over_time(container_fs_usage_bytes{device!="tmpfs", id="/", %s}[%s])) by (instance, %s, job)) by (instance, %s)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryLocalStorageUsedMax := fmt.Sprintf(`max(sum(max_over_time(container_fs_usage_bytes{device!="tmpfs", id="/", %s}[%s])) by (instance, %s, job)) by (instance, %s)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryLocalStorageBytes := fmt.Sprintf(`avg_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm])`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
-	queryLocalActiveMins := fmt.Sprintf(`count(node_total_hourly_cost{%s}) by (%s, node)[%s:%dm]`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
 
 	resChPVCost := ctx.QueryAtTime(queryPVCost, t)
 	resChPVSize := ctx.QueryAtTime(queryPVSize, t)
@@ -191,12 +178,6 @@ func ClusterDisks(client prometheus.Client, provider models.Provider, start, end
 	resChPVUsedAvg := ctx.QueryAtTime(queryPVUsedAvg, t)
 	resChPVUsedMax := ctx.QueryAtTime(queryPVUsedMax, t)
 	resChPVCInfo := ctx.QueryAtTime(queryPVCInfo, t)
-	resChLocalStorageCost := ctx.QueryAtTime(queryLocalStorageCost, t)
-	resChLocalStorageUsedCost := ctx.QueryAtTime(queryLocalStorageUsedCost, t)
-	resChLocalStoreageUsedAvg := ctx.QueryAtTime(queryLocalStorageUsedAvg, t)
-	resChLocalStoreageUsedMax := ctx.QueryAtTime(queryLocalStorageUsedMax, t)
-	resChLocalStorageBytes := ctx.QueryAtTime(queryLocalStorageBytes, t)
-	resChLocalActiveMins := ctx.QueryAtTime(queryLocalActiveMins, t)
 
 	resPVCost, _ := resChPVCost.Await()
 	resPVSize, _ := resChPVSize.Await()
@@ -205,12 +186,50 @@ func ClusterDisks(client prometheus.Client, provider models.Provider, start, end
 	resPVUsedAvg, _ := resChPVUsedAvg.Await()
 	resPVUsedMax, _ := resChPVUsedMax.Await()
 	resPVCInfo, _ := resChPVCInfo.Await()
-	resLocalStorageCost, _ := resChLocalStorageCost.Await()
-	resLocalStorageUsedCost, _ := resChLocalStorageUsedCost.Await()
-	resLocalStorageUsedAvg, _ := resChLocalStoreageUsedAvg.Await()
-	resLocalStorageUsedMax, _ := resChLocalStoreageUsedMax.Await()
-	resLocalStorageBytes, _ := resChLocalStorageBytes.Await()
-	resLocalActiveMins, _ := resChLocalActiveMins.Await()
+
+	// Cloud providers do not always charge for a node's local disk costs (i.e.
+	// ephemeral storage). Provide an option to opt out of calculating &
+	// allocating local disk costs. Note, that this does not affect
+	// PersistentVolume costs.
+	//
+	// Ref:
+	// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/RootDeviceStorage.html
+	// https://learn.microsoft.com/en-us/azure/virtual-machines/managed-disks-overview#temporary-disk
+	// https://cloud.google.com/compute/docs/disks/local-ssd
+	resLocalStorageCost := []*prom.QueryResult{}
+	resLocalStorageUsedCost := []*prom.QueryResult{}
+	resLocalStorageUsedAvg := []*prom.QueryResult{}
+	resLocalStorageUsedMax := []*prom.QueryResult{}
+	resLocalStorageBytes := []*prom.QueryResult{}
+	resLocalActiveMins := []*prom.QueryResult{}
+	if env.GetAssetIncludeLocalDiskCost() {
+		// hourlyToCumulative is a scaling factor that, when multiplied by an
+		// hourly value, converts it to a cumulative value; i.e. [$/hr] *
+		// [min/res]*[hr/min] = [$/res]
+		hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
+		costPerGBHr := 0.04 / 730.0
+
+		queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
+		queryLocalStorageUsedCost := fmt.Sprintf(`sum_over_time(sum(container_fs_usage_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
+		queryLocalStorageUsedAvg := fmt.Sprintf(`avg(sum(avg_over_time(container_fs_usage_bytes{device!="tmpfs", id="/", %s}[%s])) by (instance, %s, job)) by (instance, %s)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+		queryLocalStorageUsedMax := fmt.Sprintf(`max(sum(max_over_time(container_fs_usage_bytes{device!="tmpfs", id="/", %s}[%s])) by (instance, %s, job)) by (instance, %s)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+		queryLocalStorageBytes := fmt.Sprintf(`avg_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm])`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
+		queryLocalActiveMins := fmt.Sprintf(`count(node_total_hourly_cost{%s}) by (%s, node)[%s:%dm]`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
+
+		resChLocalStorageCost := ctx.QueryAtTime(queryLocalStorageCost, t)
+		resChLocalStorageUsedCost := ctx.QueryAtTime(queryLocalStorageUsedCost, t)
+		resChLocalStoreageUsedAvg := ctx.QueryAtTime(queryLocalStorageUsedAvg, t)
+		resChLocalStoreageUsedMax := ctx.QueryAtTime(queryLocalStorageUsedMax, t)
+		resChLocalStorageBytes := ctx.QueryAtTime(queryLocalStorageBytes, t)
+		resChLocalActiveMins := ctx.QueryAtTime(queryLocalActiveMins, t)
+
+		resLocalStorageCost, _ = resChLocalStorageCost.Await()
+		resLocalStorageUsedCost, _ = resChLocalStorageUsedCost.Await()
+		resLocalStorageUsedAvg, _ = resChLocalStoreageUsedAvg.Await()
+		resLocalStorageUsedMax, _ = resChLocalStoreageUsedMax.Await()
+		resLocalStorageBytes, _ = resChLocalStorageBytes.Await()
+		resLocalActiveMins, _ = resChLocalActiveMins.Await()
+	}
 
 	if ctx.HasErrors() {
 		return nil, ctx.ErrorCollection()

+ 10 - 5
pkg/costmodel/costmodel.go

@@ -1168,8 +1168,11 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			// was the big thing to investigate. All the funky ratio math
 			// we were doing was messing with their default pricing. for SUSE Rancher.
 
-			// We couldn't find a gpu cost, so fix cpu and ram, then accordingly
-			log.Infof("GPU without cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
+			// We reach this when a GPU is detected on a node, but no cost for
+			// the GPU is defined in the OnDemand pricing. Calculate ratios of
+			// CPU to RAM and GPU to RAM costs, then distribute the total node
+			// cost among the CPU, RAM, and GPU.
+			log.Tracef("GPU without cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
 
 			defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
 			if err != nil {
@@ -1261,8 +1264,10 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			newCnode.RAMBytes = fmt.Sprintf("%f", ram)
 			newCnode.GPUCost = fmt.Sprintf("%f", gpuPrice)
 		} else if newCnode.RAMCost == "" {
-			// We couldn't find a ramcost, so fix cpu and allocate ram accordingly
-			log.Debugf("No RAM cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
+			// We reach this when no RAM cost is defined in the OnDemand
+			// pricing. It calculates a cpuToRAMRatio and ramMultiple to
+			// distrubte the total node cost among CPU and RAM costs.
+			log.Tracef("No RAM cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
 
 			defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
 			if err != nil {
@@ -1352,7 +1357,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			}
 			newCnode.RAMBytes = fmt.Sprintf("%f", ram)
 
-			log.Debugf("Computed \"%s\" RAM Cost := %v", name, newCnode.RAMCost)
+			log.Tracef("Computed \"%s\" RAM Cost := %v", name, newCnode.RAMCost)
 		}
 
 		nodes[name] = &newCnode

+ 6 - 0
pkg/env/costmodelenv.go

@@ -108,6 +108,8 @@ const (
 	AllocationNodeLabelsEnabled     = "ALLOCATION_NODE_LABELS_ENABLED"
 	AllocationNodeLabelsIncludeList = "ALLOCATION_NODE_LABELS_INCLUDE_LIST"
 
+	AssetIncludeLocalDiskCostEnvVar = "ASSET_INCLUDE_LOCAL_DISK_COST"
+
 	regionOverrideList = "REGION_OVERRIDE_LIST"
 
 	ExportCSVFile       = "EXPORT_CSV_FILE"
@@ -640,6 +642,10 @@ func GetAllocationNodeLabelsIncludeList() []string {
 	return list
 }
 
+func GetAssetIncludeLocalDiskCost() bool {
+	return env.GetBool(AssetIncludeLocalDiskCostEnvVar, true)
+}
+
 func GetRegionOverrideList() []string {
 	regionList := env.GetList(regionOverrideList, ",")
 

+ 3 - 1
pkg/metrics/metricsconfig.go

@@ -17,7 +17,9 @@ var (
 )
 
 type MetricsConfig struct {
-	DisabledMetrics []string `json:"disabledMetrics"`
+	DisabledMetrics    []string        `json:"disabledMetrics"`
+	UseLabelsWhitelist bool            `json:"useLabelsWhitelist,omitempty"`
+	LabelsWhitelist    map[string]bool `json:"labelsWhiteList,omitempty"`
 }
 
 // Gets map of disabled metrics to empty structs

+ 53 - 1
pkg/metrics/podlabelmetrics.go

@@ -14,6 +14,14 @@ import (
 type KubePodLabelsCollector struct {
 	KubeClusterCache clustercache.ClusterCache
 	metricsConfig    MetricsConfig
+	labelsWhitelist  map[string]bool
+}
+
+func (kpmc *KubePodLabelsCollector) SetLabelsWhiteList() {
+	kpmc.labelsWhitelist = make(map[string]bool)
+	for k, v := range kpmc.metricsConfig.LabelsWhitelist {
+		kpmc.labelsWhitelist[k] = v
+	}
 }
 
 // Describe sends the super-set of all possible descriptors of pod labels only
@@ -29,6 +37,40 @@ func (kpmc KubePodLabelsCollector) Describe(ch chan<- *prometheus.Desc) {
 	}
 }
 
+func (kpmc *KubePodLabelsCollector) UpdateControllerSelectorsCache() {
+	for _, r := range kpmc.KubeClusterCache.GetAllReplicaSets() {
+		for k := range r.Spec.Selector.MatchLabels {
+			kpmc.labelsWhitelist[k] = true
+		}
+		for _, v := range r.Spec.Selector.MatchExpressions {
+			kpmc.labelsWhitelist[v.Key] = true
+		}
+	}
+	for _, ss := range kpmc.KubeClusterCache.GetAllStatefulSets() {
+		for k := range ss.Spec.Selector.MatchLabels {
+			kpmc.labelsWhitelist[k] = true
+		}
+		for _, v := range ss.Spec.Selector.MatchExpressions {
+			kpmc.labelsWhitelist[v.Key] = true
+		}
+	}
+}
+
+func (kpmc *KubePodLabelsCollector) UpdateServiceLabels() {
+	for _, service := range kpmc.KubeClusterCache.GetAllServices() {
+		// Just unroll the selector and keep all labels whose keys could match a service selector
+		for k := range service.Spec.Selector {
+			kpmc.labelsWhitelist[k] = true
+		}
+	}
+}
+
+func (kpmc *KubePodLabelsCollector) UpdateWhitelist() {
+	kpmc.SetLabelsWhiteList()
+	kpmc.UpdateControllerSelectorsCache()
+	kpmc.UpdateServiceLabels()
+}
+
 // Collect is called by the Prometheus registry when collecting metrics.
 func (kpmc KubePodLabelsCollector) Collect(ch chan<- prometheus.Metric) {
 	pods := kpmc.KubeClusterCache.GetAllPods()
@@ -41,7 +83,17 @@ func (kpmc KubePodLabelsCollector) Collect(ch chan<- prometheus.Metric) {
 
 		// Pod Labels
 		if _, disabled := disabledMetrics["kube_pod_labels"]; !disabled {
-			labelNames, labelValues := promutil.KubePrependQualifierToLabels(promutil.SanitizeLabels(pod.GetLabels()), "label_")
+			podLabels := pod.GetLabels()
+			if kpmc.metricsConfig.UseLabelsWhitelist {
+				kpmc.UpdateWhitelist()
+				for lname := range podLabels {
+					if _, ok := kpmc.labelsWhitelist[lname]; !ok {
+						delete(podLabels, lname)
+					}
+				}
+			}
+
+			labelNames, labelValues := promutil.KubePrependQualifierToLabels(promutil.SanitizeLabels(podLabels), "label_")
 			ch <- newKubePodLabelsMetric("kube_pod_labels", podNS, podName, podUID, labelNames, labelValues)
 		}
 

+ 77 - 0
pkg/metrics/podlabelmetrics_test.go

@@ -0,0 +1,77 @@
+package metrics
+
+import (
+	"testing"
+
+	"github.com/opencost/opencost/pkg/clustercache"
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestWhitelist(t *testing.T) {
+	sampleServices := []*v1.Service{&v1.Service{
+		Spec: v1.ServiceSpec{
+			Selector: map[string]string{"servicewhitelistlabel": "foo"},
+		},
+	}}
+	replicaSetLabelSelector := metav1.LabelSelector{
+		MatchLabels: map[string]string{"replicasetwhitelistlabel1": "bar"},
+	}
+	sampleReplicaSets := []*appsv1.ReplicaSet{{
+		Spec: appsv1.ReplicaSetSpec{
+			Selector: &replicaSetLabelSelector,
+		},
+	}}
+
+	sampleStatefulSets := []*appsv1.StatefulSet{}
+
+	kc := NewFakeCache(sampleReplicaSets, sampleStatefulSets, sampleServices)
+	wl := map[string]bool{
+		"whitelistedlabel": true,
+	}
+	mc := MetricsConfig{
+		DisabledMetrics:    []string{},
+		UseLabelsWhitelist: true,
+		LabelsWhitelist:    wl,
+	}
+	kplc := KubePodLabelsCollector{
+		KubeClusterCache: kc,
+		metricsConfig:    mc,
+	}
+	kplc.UpdateWhitelist()
+	if !kplc.labelsWhitelist["servicewhitelistlabel"] {
+		t.Errorf("Missing expected label %s", "servicewhitelistlabel")
+	}
+	if !kplc.labelsWhitelist["replicasetwhitelistlabel1"] {
+		t.Errorf("Missing expected label %s", "servicewhitelistlabel1")
+	}
+
+}
+
+type FakeCache struct {
+	clustercache.ClusterCache
+	replicasets  []*appsv1.ReplicaSet
+	statefulsets []*appsv1.StatefulSet
+	services     []*v1.Service
+}
+
+func (f FakeCache) GetAllReplicaSets() []*appsv1.ReplicaSet {
+	return f.replicasets
+}
+
+func (f FakeCache) GetAllStatefulSets() []*appsv1.StatefulSet {
+	return f.statefulsets
+}
+
+func (f FakeCache) GetAllServices() []*v1.Service {
+	return f.services
+}
+
+func NewFakeCache(replicasets []*appsv1.ReplicaSet, statefulsets []*appsv1.StatefulSet, services []*v1.Service) FakeCache {
+	return FakeCache{
+		replicasets:  replicasets,
+		statefulsets: statefulsets,
+		services:     services,
+	}
+}