Просмотр исходного кода

Add GPU saturation model to the Allocation API

Introduce the vendor-neutral GPU saturation primitives (USE method) that
the prometheus-source, collector-source, and kubemodel paths populate in
later changes. This change is the data model only; nothing produces these
values yet.

- GPUSaturation: throttle violation/reason ratios (decoded from the DCGM
  clock-throttle-reasons bitmask), framebuffer occupancy and memory
  pressure, XID error counts, memory-bandwidth and compute activity, and
  host/peer link rates. Absence semantics are explicit: nil pointers /
  empty maps mean "telemetry unavailable", never zero. SanitizeNaN
  normalizes NaN to absent so Equal and codec roundtrips stay well-defined.
- GPUAllocation gains an optional Saturation field (bingen field version
  26), so older payloads decode with Saturation nil.
- GPU_MEMORY_SATURATION_THRESHOLD env (core, shared by both data sources)
  configures the framebuffer-occupancy ratio above which a GPU counts as
  memory-pressured; values outside (0, 1] fall back to the 0.9 default.

Binary roundtrip and Equal are covered by tests; the env test is hermetic
(sets the var for every case, including the unset/default path).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Signed-off-by: Cliff Colvin <clifford.colvin@gmail.com>
Cliff Colvin 1 неделя назад
Родитель
Сommit
fe6e1de6d2

+ 28 - 0
core/pkg/env/gpu.go

@@ -0,0 +1,28 @@
+package env
+
+const (
+	// GPUMemorySaturationThresholdEnvVar configures the framebuffer
+	// occupancy ratio above which a GPU counts as memory-pressured.
+	GPUMemorySaturationThresholdEnvVar = "GPU_MEMORY_SATURATION_THRESHOLD"
+)
+
+// DefaultGPUMemorySaturationThreshold is the framebuffer occupancy ratio
+// above which a GPU is considered memory-pressured when no valid override
+// is configured.
+const DefaultGPUMemorySaturationThreshold = 0.9
+
+// GetGPUMemorySaturationThreshold returns the configured framebuffer
+// occupancy threshold for GPU memory pressure. Values outside (0, 1] are
+// rejected in favor of the default.
+//
+// This lives in core (rather than per data-source module) because both the
+// prometheus-source query builder and the collector-source aggregator must
+// apply the identical threshold; two copies of the validation logic drifted
+// during development and were consolidated here. (Code review finding.)
+func GetGPUMemorySaturationThreshold() float64 {
+	threshold := GetFloat64(GPUMemorySaturationThresholdEnvVar, DefaultGPUMemorySaturationThreshold)
+	if threshold <= 0.0 || threshold > 1.0 {
+		return DefaultGPUMemorySaturationThreshold
+	}
+	return threshold
+}

+ 30 - 0
core/pkg/env/gpu_test.go

@@ -0,0 +1,30 @@
+package env
+
+import "testing"
+
+func TestGetGPUMemorySaturationThreshold(t *testing.T) {
+	cases := map[string]struct {
+		value string
+		want  float64
+	}{
+		"unset uses default":       {"", 0.9},
+		"valid value":              {"0.8", 0.8},
+		"upper bound inclusive":    {"1.0", 1.0},
+		"zero rejected":            {"0", 0.9},
+		"negative rejected":        {"-0.5", 0.9},
+		"above one rejected":       {"1.5", 0.9},
+		"non-numeric uses default": {"high", 0.9},
+	}
+
+	for name, tc := range cases {
+		t.Run(name, func(t *testing.T) {
+			// Set unconditionally (empty for the "unset" case) so an
+			// externally-configured value cannot leak in and make the
+			// default-path subtests non-hermetic.
+			t.Setenv(GPUMemorySaturationThresholdEnvVar, tc.value)
+			if got := GetGPUMemorySaturationThreshold(); got != tc.want {
+				t.Errorf("GetGPUMemorySaturationThreshold() = %v, want %v", got, tc.want)
+			}
+		})
+	}
+}

+ 6 - 1
core/pkg/opencost/allocation.go

@@ -119,6 +119,8 @@ type GPUAllocation struct {
 	IsGPUShared       *bool    `json:"isGPUShared"`
 	GPUUsageAverage   *float64 `json:"gpuUsageAverage"`
 	GPURequestAverage *float64 `json:"gpuRequestAverage"`
+
+	Saturation *GPUSaturation `json:"saturation,omitempty"` //@bingen:field[version=26]
 }
 
 func (orig *GPUAllocation) SanitizeNaN() {
@@ -131,6 +133,7 @@ func (orig *GPUAllocation) SanitizeNaN() {
 	if orig.GPUUsageAverage == nil || math.IsNaN(*orig.GPUUsageAverage) {
 		orig.GPUUsageAverage = nil
 	}
+	orig.Saturation.SanitizeNaN()
 }
 
 func (orig *GPUAllocation) Clone() *GPUAllocation {
@@ -145,6 +148,7 @@ func (orig *GPUAllocation) Clone() *GPUAllocation {
 		IsGPUShared:       orig.IsGPUShared,
 		GPUUsageAverage:   orig.GPUUsageAverage,
 		GPURequestAverage: orig.GPURequestAverage,
+		Saturation:        orig.Saturation.Clone(),
 	}
 }
 
@@ -161,7 +165,8 @@ func (orig *GPUAllocation) Equal(that *GPUAllocation) bool {
 		orig.GPUUUID == that.GPUUUID &&
 		orig.IsGPUShared == that.IsGPUShared &&
 		orig.GPUUsageAverage == that.GPUUsageAverage &&
-		orig.GPURequestAverage == that.GPURequestAverage
+		orig.GPURequestAverage == that.GPURequestAverage &&
+		orig.Saturation.Equal(that.Saturation)
 
 }
 

+ 2 - 1
core/pkg/opencost/bingen.go

@@ -44,7 +44,7 @@ package opencost
 // @bingen:end
 
 // Allocation Version Set: Includes Allocation pipeline specific resources
-// @bingen:set[name=Allocation,version=25]
+// @bingen:set[name=Allocation,version=26]
 // @bingen:generate[migrate]:Allocation
 // @bingen:generate[streamable,stringtable]:AllocationSet
 // @bingen:generate:AllocationSetRange
@@ -59,6 +59,7 @@ package opencost
 // @bingen:generate:LbAllocations
 // @bingen:generate:LbAllocation
 // @bingen:generate:GPUAllocation
+// @bingen:generate:GPUSaturation
 // @bingen:end
 
 // @bingen:set[name=CloudCost,version=3]

+ 252 - 0
core/pkg/opencost/gpu_saturation.go

@@ -0,0 +1,252 @@
+package opencost
+
+import (
+	"maps"
+	"math"
+)
+
+// GPU throttle reason bits as reported by the DCGM field
+// DCGM_FI_DEV_CLOCK_THROTTLE_REASONS (renamed DCGM_FI_DEV_CLOCKS_EVENT_REASONS
+// in DCGM 3.3+). The bit positions are defined by NVML's
+// nvmlClocksThrottleReasons constants:
+// https://docs.nvidia.com/deploy/nvml-api/group__nvmlClocksThrottleReasons.html
+//
+// Only saturation-relevant bits are enumerated here. The remaining bits
+// (gpu_idle 0x1, applications_clocks_setting 0x2, display_clock_setting 0x100)
+// describe configured operating states rather than the GPU being unable to
+// service demand, so they are intentionally excluded.
+const (
+	GPUThrottleBitSwPowerCap   uint64 = 0x4  // clocks reduced by software power cap
+	GPUThrottleBitHwSlowdown   uint64 = 0x8  // hardware slowdown (thermal or power brake) engaged
+	GPUThrottleBitSyncBoost    uint64 = 0x10 // clocks lowered to match a sync-boost group
+	GPUThrottleBitSwThermal    uint64 = 0x20 // software thermal slowdown
+	GPUThrottleBitHwThermal    uint64 = 0x40 // hardware thermal slowdown
+	GPUThrottleBitHwPowerBrake uint64 = 0x80 // hardware power brake slowdown
+)
+
+// Canonical names for saturation-relevant GPU throttle reasons. These are the
+// keys used in GPUSaturation.ThrottleReasonRatios.
+const (
+	GPUThrottleReasonSwPowerCap   = "sw_power_cap"
+	GPUThrottleReasonHwSlowdown   = "hw_slowdown"
+	GPUThrottleReasonSyncBoost    = "sync_boost"
+	GPUThrottleReasonSwThermal    = "sw_thermal"
+	GPUThrottleReasonHwThermal    = "hw_thermal"
+	GPUThrottleReasonHwPowerBrake = "hw_power_brake"
+)
+
+// Canonical names for GPU throttle violation counters reported by DCGM. These
+// are the keys used in GPUSaturation.ThrottleViolationRatios. Each maps to a
+// cumulative microsecond counter in the default dcgm-exporter configuration:
+// DCGM_FI_DEV_POWER_VIOLATION, DCGM_FI_DEV_THERMAL_VIOLATION,
+// DCGM_FI_DEV_SYNC_BOOST_VIOLATION, and DCGM_FI_DEV_BOARD_LIMIT_VIOLATION.
+const (
+	GPUThrottleViolationPower      = "power"
+	GPUThrottleViolationThermal    = "thermal"
+	GPUThrottleViolationSyncBoost  = "sync_boost"
+	GPUThrottleViolationBoardLimit = "board_limit"
+)
+
+// GPUThrottleReason pairs a canonical throttle reason name with its bit in the
+// DCGM clock throttle reasons bitmask.
+type GPUThrottleReason struct {
+	Name string
+	Bit  uint64
+}
+
+// GPUThrottleReasons enumerates every saturation-relevant throttle reason, in
+// ascending bit order. It is the single source of truth for bitmask decoding
+// and for generating per-reason Prometheus queries.
+var GPUThrottleReasons = []GPUThrottleReason{
+	{Name: GPUThrottleReasonSwPowerCap, Bit: GPUThrottleBitSwPowerCap},
+	{Name: GPUThrottleReasonHwSlowdown, Bit: GPUThrottleBitHwSlowdown},
+	{Name: GPUThrottleReasonSyncBoost, Bit: GPUThrottleBitSyncBoost},
+	{Name: GPUThrottleReasonSwThermal, Bit: GPUThrottleBitSwThermal},
+	{Name: GPUThrottleReasonHwThermal, Bit: GPUThrottleBitHwThermal},
+	{Name: GPUThrottleReasonHwPowerBrake, Bit: GPUThrottleBitHwPowerBrake},
+}
+
+// GPUThrottleReasonsFromMask decodes a DCGM clock throttle reasons bitmask
+// into the canonical names of the active saturation-relevant reasons.
+// Non-saturation bits are ignored. A zero mask decodes to an empty slice.
+func GPUThrottleReasonsFromMask(mask uint64) []string {
+	reasons := make([]string, 0, len(GPUThrottleReasons))
+	for _, reason := range GPUThrottleReasons {
+		if mask&reason.Bit != 0 {
+			reasons = append(reasons, reason.Name)
+		}
+	}
+	return reasons
+}
+
+// GPUSaturation carries per-GPU saturation signals derived from DCGM exporter
+// metrics, following the USE method: where utilization reports how busy the
+// GPU was, saturation reports work that was queued, rejected, or slowed
+// because the GPU could not service demand.
+//
+// Every field is an independent primitive; no composite score is computed.
+// A nil field means the underlying DCGM metric was not available in the
+// window (no dcgm-exporter, field disabled in its config, or no DCP
+// profiling support), never that the value was zero.
+//
+// Ratios are fractions of the queried window in [0, 1] unless noted.
+type GPUSaturation struct {
+	// ThrottleViolationRatios maps a GPUThrottleViolation* name to the
+	// fraction of the window the GPU spent throttled for that reason,
+	// derived from the cumulative DCGM_FI_DEV_*_VIOLATION microsecond
+	// counters. These counters are part of the default dcgm-exporter
+	// configuration.
+	ThrottleViolationRatios map[string]float64 `json:"throttleViolationRatios,omitempty"`
+	// ThrottleReasonRatios maps a GPUThrottleReason* name to the fraction
+	// of the window the corresponding DCGM_FI_DEV_CLOCK_THROTTLE_REASONS
+	// bit was set. That field is not in the default dcgm-exporter
+	// configuration and must be enabled explicitly. Reported for the whole
+	// physical GPU, even when MIG or time-slicing is in use.
+	ThrottleReasonRatios map[string]float64 `json:"throttleReasonRatios,omitempty"`
+	// MemoryUsedRatioAvg/Max are framebuffer occupancy over the window:
+	// DCGM_FI_DEV_FB_USED / (DCGM_FI_DEV_FB_USED + DCGM_FI_DEV_FB_FREE).
+	// Default dcgm-exporter configuration.
+	MemoryUsedRatioAvg *float64 `json:"memoryUsedRatioAvg,omitempty"`
+	MemoryUsedRatioMax *float64 `json:"memoryUsedRatioMax,omitempty"`
+	// MemoryPressureRatio is the fraction of the window the framebuffer
+	// occupancy was above the configured threshold (default 0.9).
+	MemoryPressureRatio *float64 `json:"memoryPressureRatio,omitempty"`
+	// XIDErrorCount counts distinct XID error events observed in the window
+	// via changes in DCGM_FI_DEV_XID_ERRORS. That field reports the last
+	// XID code seen, so repeats of the same error code are undercounted.
+	XIDErrorCount *float64 `json:"xidErrorCount,omitempty"`
+	// DRAMActiveAvg/Max are the ratio of cycles the device memory interface
+	// was active (DCGM_FI_PROF_DRAM_ACTIVE). Requires DCP profiling
+	// (Volta+). Sustained values near 1.0 with low SMOccupancyAvg indicate
+	// a memory-bandwidth-bound workload.
+	DRAMActiveAvg *float64 `json:"dramActiveAvg,omitempty"`
+	DRAMActiveMax *float64 `json:"dramActiveMax,omitempty"`
+	// SMActiveAvg (DCGM_FI_PROF_SM_ACTIVE) and SMOccupancyAvg
+	// (DCGM_FI_PROF_SM_OCCUPANCY) are provided so consumers can
+	// distinguish compute-bound from bandwidth- or latency-bound
+	// saturation. Requires DCP profiling and is commented out of the
+	// default dcgm-exporter configuration.
+	SMActiveAvg    *float64 `json:"smActiveAvg,omitempty"`
+	SMOccupancyAvg *float64 `json:"smOccupancyAvg,omitempty"`
+	// PCIe/NVLink average throughput in bytes/sec over the window, from
+	// rate() of DCGM_FI_PROF_PCIE_TX/RX_BYTES and
+	// DCGM_FI_PROF_NVLINK_TX/RX_BYTES counters. Requires DCP profiling;
+	// the NVLink fields are commented out of the default dcgm-exporter
+	// configuration. Link capacity is not derivable from DCGM, so these
+	// are raw rates rather than ratios.
+	PCIeTxBytesAvg   *float64 `json:"pcieTxBytesAvg,omitempty"`
+	PCIeRxBytesAvg   *float64 `json:"pcieRxBytesAvg,omitempty"`
+	NVLinkTxBytesAvg *float64 `json:"nvlinkTxBytesAvg,omitempty"`
+	NVLinkRxBytesAvg *float64 `json:"nvlinkRxBytesAvg,omitempty"`
+}
+
+// scalarFields returns pointers to every scalar field, so SanitizeNaN,
+// Equal, and IsEmpty cannot silently miss a newly added field.
+func (orig *GPUSaturation) scalarFields() []**float64 {
+	return []**float64{
+		&orig.MemoryUsedRatioAvg,
+		&orig.MemoryUsedRatioMax,
+		&orig.MemoryPressureRatio,
+		&orig.XIDErrorCount,
+		&orig.DRAMActiveAvg,
+		&orig.DRAMActiveMax,
+		&orig.SMActiveAvg,
+		&orig.SMOccupancyAvg,
+		&orig.PCIeTxBytesAvg,
+		&orig.PCIeRxBytesAvg,
+		&orig.NVLinkTxBytesAvg,
+		&orig.NVLinkRxBytesAvg,
+	}
+}
+
+// SanitizeNaN removes NaN values: NaN scalars become nil and NaN map entries
+// are deleted, so absence is always represented the same way.
+func (orig *GPUSaturation) SanitizeNaN() {
+	if orig == nil {
+		return
+	}
+	for _, field := range orig.scalarFields() {
+		if *field != nil && math.IsNaN(**field) {
+			*field = nil
+		}
+	}
+	for _, m := range []map[string]float64{orig.ThrottleViolationRatios, orig.ThrottleReasonRatios} {
+		for k, v := range m {
+			if math.IsNaN(v) {
+				delete(m, k)
+			}
+		}
+	}
+}
+
+// Clone returns a deep copy of the GPUSaturation.
+func (orig *GPUSaturation) Clone() *GPUSaturation {
+	if orig == nil {
+		return nil
+	}
+
+	clone := &GPUSaturation{
+		ThrottleViolationRatios: maps.Clone(orig.ThrottleViolationRatios),
+		ThrottleReasonRatios:    maps.Clone(orig.ThrottleReasonRatios),
+	}
+
+	origFields := orig.scalarFields()
+	cloneFields := clone.scalarFields()
+	for i := range origFields {
+		if *origFields[i] != nil {
+			v := **origFields[i]
+			*cloneFields[i] = &v
+		}
+	}
+	return clone
+}
+
+// Equal compares two GPUSaturation values field by field. Scalar fields are
+// equal when both are nil or both point to the same value.
+func (orig *GPUSaturation) Equal(that *GPUSaturation) bool {
+	if orig == nil && that == nil {
+		return true
+	}
+	if orig == nil || that == nil {
+		return false
+	}
+
+	if !maps.Equal(orig.ThrottleViolationRatios, that.ThrottleViolationRatios) {
+		return false
+	}
+	if !maps.Equal(orig.ThrottleReasonRatios, that.ThrottleReasonRatios) {
+		return false
+	}
+
+	origFields := orig.scalarFields()
+	thatFields := that.scalarFields()
+	for i := range origFields {
+		// inline nil/pointee comparison; converge on the shared
+		// ptrValueEqual helper once the #3846 fix (separate PR) lands
+		a, b := *origFields[i], *thatFields[i]
+		if (a == nil) != (b == nil) {
+			return false
+		}
+		if a != nil && *a != *b {
+			return false
+		}
+	}
+	return true
+}
+
+// IsEmpty reports whether no saturation signal is present at all, in which
+// case the GPUSaturation should be omitted rather than serialized.
+func (orig *GPUSaturation) IsEmpty() bool {
+	if orig == nil {
+		return true
+	}
+	if len(orig.ThrottleViolationRatios) > 0 || len(orig.ThrottleReasonRatios) > 0 {
+		return false
+	}
+	for _, field := range orig.scalarFields() {
+		if *field != nil {
+			return false
+		}
+	}
+	return true
+}

+ 318 - 0
core/pkg/opencost/gpu_saturation_test.go

@@ -0,0 +1,318 @@
+package opencost
+
+import (
+	"math"
+	"reflect"
+	"sort"
+	"testing"
+)
+
+func TestGPUThrottleReasonsFromMask(t *testing.T) {
+	cases := map[string]struct {
+		mask uint64
+		want []string
+	}{
+		"zero mask": {
+			mask: 0x0,
+			want: []string{},
+		},
+		"sw power cap": {
+			mask: GPUThrottleBitSwPowerCap,
+			want: []string{GPUThrottleReasonSwPowerCap},
+		},
+		"hw slowdown": {
+			mask: GPUThrottleBitHwSlowdown,
+			want: []string{GPUThrottleReasonHwSlowdown},
+		},
+		"sync boost": {
+			mask: GPUThrottleBitSyncBoost,
+			want: []string{GPUThrottleReasonSyncBoost},
+		},
+		"sw thermal": {
+			mask: GPUThrottleBitSwThermal,
+			want: []string{GPUThrottleReasonSwThermal},
+		},
+		"hw thermal": {
+			mask: GPUThrottleBitHwThermal,
+			want: []string{GPUThrottleReasonHwThermal},
+		},
+		"hw power brake": {
+			mask: GPUThrottleBitHwPowerBrake,
+			want: []string{GPUThrottleReasonHwPowerBrake},
+		},
+		"non-saturation bits ignored": {
+			// gpu_idle (0x1), applications_clocks_setting (0x2), and
+			// display_clock_setting (0x100) are operating states, not
+			// saturation, and must not decode as throttle reasons.
+			mask: 0x1 | 0x2 | 0x100,
+			want: []string{},
+		},
+		"combined saturation and non-saturation bits": {
+			mask: 0x1 | GPUThrottleBitSwPowerCap | GPUThrottleBitHwThermal,
+			want: []string{GPUThrottleReasonSwPowerCap, GPUThrottleReasonHwThermal},
+		},
+		"all saturation bits": {
+			mask: GPUThrottleBitSwPowerCap | GPUThrottleBitHwSlowdown | GPUThrottleBitSyncBoost |
+				GPUThrottleBitSwThermal | GPUThrottleBitHwThermal | GPUThrottleBitHwPowerBrake,
+			want: []string{
+				GPUThrottleReasonSwPowerCap, GPUThrottleReasonHwSlowdown, GPUThrottleReasonSyncBoost,
+				GPUThrottleReasonSwThermal, GPUThrottleReasonHwThermal, GPUThrottleReasonHwPowerBrake,
+			},
+		},
+	}
+
+	for name, tc := range cases {
+		t.Run(name, func(t *testing.T) {
+			got := GPUThrottleReasonsFromMask(tc.mask)
+			sort.Strings(got)
+			want := append([]string{}, tc.want...)
+			sort.Strings(want)
+			if !reflect.DeepEqual(got, want) {
+				t.Errorf("GPUThrottleReasonsFromMask(%#x) = %v, want %v", tc.mask, got, want)
+			}
+		})
+	}
+}
+
+func TestGPUThrottleReasonBitsMatchNVML(t *testing.T) {
+	// Bit positions are defined by NVML's nvmlClocksThrottleReasons and must
+	// never drift: DCGM_FI_DEV_CLOCK_THROTTLE_REASONS reports them verbatim.
+	want := map[string]uint64{
+		GPUThrottleReasonSwPowerCap:   0x4,
+		GPUThrottleReasonHwSlowdown:   0x8,
+		GPUThrottleReasonSyncBoost:    0x10,
+		GPUThrottleReasonSwThermal:    0x20,
+		GPUThrottleReasonHwThermal:    0x40,
+		GPUThrottleReasonHwPowerBrake: 0x80,
+	}
+
+	if len(GPUThrottleReasons) != len(want) {
+		t.Fatalf("GPUThrottleReasons has %d entries, want %d", len(GPUThrottleReasons), len(want))
+	}
+
+	seen := map[string]bool{}
+	for _, reason := range GPUThrottleReasons {
+		bit, ok := want[reason.Name]
+		if !ok {
+			t.Errorf("unexpected throttle reason %q", reason.Name)
+			continue
+		}
+		if reason.Bit != bit {
+			t.Errorf("throttle reason %q has bit %#x, want %#x", reason.Name, reason.Bit, bit)
+		}
+		if seen[reason.Name] {
+			t.Errorf("duplicate throttle reason %q", reason.Name)
+		}
+		seen[reason.Name] = true
+	}
+}
+
+func f64(v float64) *float64 {
+	return &v
+}
+
+func mockGPUSaturation() *GPUSaturation {
+	return &GPUSaturation{
+		ThrottleViolationRatios: map[string]float64{
+			GPUThrottleViolationPower:   0.25,
+			GPUThrottleViolationThermal: 0.1,
+		},
+		ThrottleReasonRatios: map[string]float64{
+			GPUThrottleReasonSwPowerCap: 0.2,
+			GPUThrottleReasonHwThermal:  0.05,
+		},
+		MemoryUsedRatioAvg:  f64(0.85),
+		MemoryUsedRatioMax:  f64(0.99),
+		MemoryPressureRatio: f64(0.4),
+		XIDErrorCount:       f64(2),
+		DRAMActiveAvg:       f64(0.7),
+		DRAMActiveMax:       f64(0.95),
+		SMActiveAvg:         f64(0.6),
+		SMOccupancyAvg:      f64(0.5),
+		PCIeTxBytesAvg:      f64(1.5e9),
+		PCIeRxBytesAvg:      f64(2.5e9),
+		NVLinkTxBytesAvg:    f64(3.5e9),
+		NVLinkRxBytesAvg:    f64(4.5e9),
+	}
+}
+
+func TestGPUSaturation_SanitizeNaN(t *testing.T) {
+	nan := math.NaN()
+
+	sat := mockGPUSaturation()
+	sat.MemoryUsedRatioAvg = &nan
+	sat.SMActiveAvg = &nan
+	sat.ThrottleViolationRatios[GPUThrottleViolationSyncBoost] = math.NaN()
+	sat.ThrottleReasonRatios[GPUThrottleReasonHwSlowdown] = math.NaN()
+
+	sat.SanitizeNaN()
+
+	if sat.MemoryUsedRatioAvg != nil {
+		t.Errorf("expected NaN MemoryUsedRatioAvg to be nil")
+	}
+	if sat.SMActiveAvg != nil {
+		t.Errorf("expected NaN SMActiveAvg to be nil")
+	}
+	if _, ok := sat.ThrottleViolationRatios[GPUThrottleViolationSyncBoost]; ok {
+		t.Errorf("expected NaN throttle violation entry to be removed")
+	}
+	if _, ok := sat.ThrottleReasonRatios[GPUThrottleReasonHwSlowdown]; ok {
+		t.Errorf("expected NaN throttle reason entry to be removed")
+	}
+
+	// non-NaN values survive
+	if sat.MemoryUsedRatioMax == nil || *sat.MemoryUsedRatioMax != 0.99 {
+		t.Errorf("expected MemoryUsedRatioMax to survive sanitization")
+	}
+	if v, ok := sat.ThrottleViolationRatios[GPUThrottleViolationPower]; !ok || v != 0.25 {
+		t.Errorf("expected power violation ratio to survive sanitization")
+	}
+
+	// nil receiver must not panic
+	var nilSat *GPUSaturation
+	nilSat.SanitizeNaN()
+}
+
+func TestGPUSaturation_Clone(t *testing.T) {
+	var nilSat *GPUSaturation
+	if nilSat.Clone() != nil {
+		t.Fatalf("expected nil Clone of nil GPUSaturation")
+	}
+
+	orig := mockGPUSaturation()
+	clone := orig.Clone()
+
+	if !orig.Equal(clone) {
+		t.Fatalf("expected clone to equal original")
+	}
+
+	// deep copy: mutating the clone must not affect the original
+	clone.ThrottleViolationRatios[GPUThrottleViolationPower] = 0.99
+	*clone.MemoryUsedRatioAvg = 0.1
+	if orig.ThrottleViolationRatios[GPUThrottleViolationPower] == 0.99 {
+		t.Errorf("clone shares ThrottleViolationRatios map with original")
+	}
+	if *orig.MemoryUsedRatioAvg == 0.1 {
+		t.Errorf("clone shares MemoryUsedRatioAvg pointer with original")
+	}
+}
+
+func TestGPUSaturation_Equal(t *testing.T) {
+	cases := map[string]struct {
+		a, b *GPUSaturation
+		want bool
+	}{
+		"both nil":  {nil, nil, true},
+		"one nil":   {mockGPUSaturation(), nil, false},
+		"identical": {mockGPUSaturation(), mockGPUSaturation(), true},
+		"different scalar": {
+			mockGPUSaturation(),
+			func() *GPUSaturation { s := mockGPUSaturation(); s.SMActiveAvg = f64(0.99); return s }(),
+			false,
+		},
+		"nil vs set scalar": {
+			mockGPUSaturation(),
+			func() *GPUSaturation { s := mockGPUSaturation(); s.SMActiveAvg = nil; return s }(),
+			false,
+		},
+		"different map value": {
+			mockGPUSaturation(),
+			func() *GPUSaturation {
+				s := mockGPUSaturation()
+				s.ThrottleReasonRatios[GPUThrottleReasonSwPowerCap] = 0.99
+				return s
+			}(),
+			false,
+		},
+		"missing map key": {
+			mockGPUSaturation(),
+			func() *GPUSaturation {
+				s := mockGPUSaturation()
+				delete(s.ThrottleReasonRatios, GPUThrottleReasonSwPowerCap)
+				return s
+			}(),
+			false,
+		},
+		"empty": {&GPUSaturation{}, &GPUSaturation{}, true},
+	}
+
+	for name, tc := range cases {
+		t.Run(name, func(t *testing.T) {
+			if got := tc.a.Equal(tc.b); got != tc.want {
+				t.Errorf("Equal() = %v, want %v", got, tc.want)
+			}
+			if got := tc.b.Equal(tc.a); got != tc.want {
+				t.Errorf("Equal() reversed = %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
+
+func TestGPUSaturation_IsEmpty(t *testing.T) {
+	var nilSat *GPUSaturation
+	if !nilSat.IsEmpty() {
+		t.Errorf("expected nil GPUSaturation to be empty")
+	}
+	if !(&GPUSaturation{}).IsEmpty() {
+		t.Errorf("expected zero GPUSaturation to be empty")
+	}
+	if !(&GPUSaturation{ThrottleReasonRatios: map[string]float64{}}).IsEmpty() {
+		t.Errorf("expected GPUSaturation with empty map to be empty")
+	}
+	if mockGPUSaturation().IsEmpty() {
+		t.Errorf("expected populated GPUSaturation to be non-empty")
+	}
+	if (&GPUSaturation{XIDErrorCount: f64(1)}).IsEmpty() {
+		t.Errorf("expected GPUSaturation with one field to be non-empty")
+	}
+}
+
+// TestGPUAllocation_BinaryRoundtripWithSaturation verifies that a
+// GPUAllocation carrying saturation data survives bingen binary
+// marshal/unmarshal, and that absent saturation stays absent.
+func TestGPUAllocation_BinaryRoundtripWithSaturation(t *testing.T) {
+	shared := false
+	cases := map[string]*GPUAllocation{
+		"nil saturation": {
+			GPUDevice:       "nvidia0",
+			GPUModel:        "Tesla T4",
+			GPUUUID:         "GPU-1",
+			IsGPUShared:     &shared,
+			GPUUsageAverage: f64(0.5),
+		},
+		"populated saturation": {
+			GPUDevice:  "nvidia0",
+			GPUModel:   "Tesla T4",
+			GPUUUID:    "GPU-1",
+			Saturation: mockGPUSaturation(),
+		},
+		"empty saturation struct": {
+			GPUDevice:  "nvidia0",
+			Saturation: &GPUSaturation{},
+		},
+	}
+
+	for name, orig := range cases {
+		t.Run(name, func(t *testing.T) {
+			bs, err := orig.MarshalBinary()
+			if err != nil {
+				t.Fatalf("MarshalBinary: %s", err)
+			}
+
+			decoded := new(GPUAllocation)
+			if err := decoded.UnmarshalBinary(bs); err != nil {
+				t.Fatalf("UnmarshalBinary: %s", err)
+			}
+
+			if decoded.GPUDevice != orig.GPUDevice || decoded.GPUModel != orig.GPUModel || decoded.GPUUUID != orig.GPUUUID {
+				t.Errorf("device identity did not survive roundtrip: got %+v, want %+v", decoded, orig)
+			}
+			if (decoded.Saturation == nil) != (orig.Saturation == nil) {
+				t.Fatalf("saturation presence did not survive roundtrip: got %v, want %v", decoded.Saturation, orig.Saturation)
+			}
+			if !decoded.Saturation.Equal(orig.Saturation) {
+				t.Errorf("saturation did not survive roundtrip: got %+v, want %+v", decoded.Saturation, orig.Saturation)
+			}
+		})
+	}
+}

+ 398 - 1
core/pkg/opencost/opencost_codecs.go

@@ -51,7 +51,7 @@ const (
 	AssetsCodecVersion uint8 = 21
 
 	// AllocationCodecVersion is used for any resources listed in the Allocation version set
-	AllocationCodecVersion uint8 = 25
+	AllocationCodecVersion uint8 = 26
 )
 
 //--------------------------------------------------------------------------
@@ -136,6 +136,7 @@ var typeMap map[string]reflect.Type = map[string]reflect.Type{
 	"CostMetric":            reflect.TypeFor[CostMetric](),
 	"Disk":                  reflect.TypeFor[Disk](),
 	"GPUAllocation":         reflect.TypeFor[GPUAllocation](),
+	"GPUSaturation":         reflect.TypeFor[GPUSaturation](),
 	"LbAllocation":          reflect.TypeFor[LbAllocation](),
 	"LoadBalancer":          reflect.TypeFor[LoadBalancer](),
 	"Network":               reflect.TypeFor[Network](),
@@ -6223,6 +6224,20 @@ func (target *GPUAllocation) MarshalBinaryWithContext(ctx *EncodingContext) (err
 
 		buff.WriteFloat64(*target.GPURequestAverage) // write float64
 	}
+	if target.Saturation == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		// --- [begin][write][struct](GPUSaturation) ---
+		buff.WriteInt(0) // [compatibility, unused]
+		errP := target.Saturation.MarshalBinaryWithContext(ctx)
+		if errP != nil {
+			return errP
+		}
+		// --- [end][write][struct](GPUSaturation) ---
+
+	}
 	return nil
 }
 
@@ -6325,6 +6340,388 @@ func (target *GPUAllocation) UnmarshalBinaryWithContext(ctx *DecodingContext) (e
 		o := buff.ReadFloat64() // read float64
 		target.GPURequestAverage = &o
 
+	}
+
+	// field version check
+	if uint8(26) <= version {
+		if buff.ReadUInt8() == uint8(0) {
+			target.Saturation = nil
+		} else {
+			// --- [begin][read][struct](GPUSaturation) ---
+			p := &GPUSaturation{}
+			buff.ReadInt() // [compatibility, unused]
+			errP := p.UnmarshalBinaryWithContext(ctx)
+			if errP != nil {
+				return errP
+			}
+			target.Saturation = p
+			// --- [end][read][struct](GPUSaturation) ---
+
+		}
+	} else {
+		target.Saturation = nil
+
+	}
+	return nil
+}
+
+//--------------------------------------------------------------------------
+//  GPUSaturation
+//--------------------------------------------------------------------------
+//
+// NOTE: This block (and the Saturation field handling in the GPUAllocation
+// codec above) was written by hand. The public bingen (opencost/bingen
+// v0.1.1) generates code against a newer core/pkg/util buffer API
+// (util.NewBufferFromWriter) than this repository vendors, so regenerating
+// this file with current bingen does not compile here. The wire format
+// below was verified against bingen v0.1.1 output for these types: same
+// field order, nil bytes, map/string-table encoding, and version gating.
+// When the util package is upgraded and this file is regenerated, this
+// block can be replaced wholesale by generator output.
+
+// MarshalBinary serializes the internal properties of this GPUSaturation instance
+// into a byte array
+func (target *GPUSaturation) MarshalBinary() (data []byte, err error) {
+	ctx := &EncodingContext{
+		Buffer: util.NewBuffer(),
+		Table:  nil,
+	}
+
+	e := target.MarshalBinaryWithContext(ctx)
+	if e != nil {
+		return nil, e
+	}
+
+	encBytes := ctx.Buffer.Bytes()
+	return encBytes, nil
+}
+
+// MarshalBinaryWithContext serializes the internal properties of this GPUSaturation instance
+// into a byte array leveraging a predefined context.
+func (target *GPUSaturation) MarshalBinaryWithContext(ctx *EncodingContext) (err error) {
+	// panics are recovered and propagated as errors
+	defer func() {
+		if r := recover(); r != nil {
+			if e, ok := r.(error); ok {
+				err = e
+			} else if s, ok := r.(string); ok {
+				err = fmt.Errorf("Unexpected panic: %s", s)
+			} else {
+				err = fmt.Errorf("Unexpected panic: %+v", r)
+			}
+		}
+	}()
+
+	buff := ctx.Buffer
+	buff.WriteUInt8(AllocationCodecVersion) // version
+
+	if target.ThrottleViolationRatios == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		// --- [begin][write][map](map[string]float64) ---
+		buff.WriteInt(len(target.ThrottleViolationRatios)) // map length
+		for vv, zz := range target.ThrottleViolationRatios {
+			if ctx.IsStringTable() {
+				a := ctx.Table.AddOrGet(vv)
+				buff.WriteInt(a) // write table index
+			} else {
+				buff.WriteString(vv) // write string
+			}
+			buff.WriteFloat64(zz) // write float64
+		}
+		// --- [end][write][map](map[string]float64) ---
+
+	}
+	if target.ThrottleReasonRatios == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		// --- [begin][write][map](map[string]float64) ---
+		buff.WriteInt(len(target.ThrottleReasonRatios)) // map length
+		for vv, zz := range target.ThrottleReasonRatios {
+			if ctx.IsStringTable() {
+				b := ctx.Table.AddOrGet(vv)
+				buff.WriteInt(b) // write table index
+			} else {
+				buff.WriteString(vv) // write string
+			}
+			buff.WriteFloat64(zz) // write float64
+		}
+		// --- [end][write][map](map[string]float64) ---
+
+	}
+	if target.MemoryUsedRatioAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.MemoryUsedRatioAvg) // write float64
+	}
+	if target.MemoryUsedRatioMax == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.MemoryUsedRatioMax) // write float64
+	}
+	if target.MemoryPressureRatio == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.MemoryPressureRatio) // write float64
+	}
+	if target.XIDErrorCount == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.XIDErrorCount) // write float64
+	}
+	if target.DRAMActiveAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.DRAMActiveAvg) // write float64
+	}
+	if target.DRAMActiveMax == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.DRAMActiveMax) // write float64
+	}
+	if target.SMActiveAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.SMActiveAvg) // write float64
+	}
+	if target.SMOccupancyAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.SMOccupancyAvg) // write float64
+	}
+	if target.PCIeTxBytesAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.PCIeTxBytesAvg) // write float64
+	}
+	if target.PCIeRxBytesAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.PCIeRxBytesAvg) // write float64
+	}
+	if target.NVLinkTxBytesAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.NVLinkTxBytesAvg) // write float64
+	}
+	if target.NVLinkRxBytesAvg == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.NVLinkRxBytesAvg) // write float64
+	}
+	return nil
+}
+
+// UnmarshalBinary uses the data passed byte array to set all the internal properties of
+// the GPUSaturation type
+func (target *GPUSaturation) UnmarshalBinary(data []byte) error {
+	ctx := NewDecodingContextFromBytes(data)
+	defer ctx.Close()
+	err := target.UnmarshalBinaryWithContext(ctx)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// UnmarshalBinaryFromReader uses the io.Reader data to set all the internal properties of
+// the GPUSaturation type
+func (target *GPUSaturation) UnmarshalBinaryFromReader(reader io.Reader) error {
+	ctx := NewDecodingContextFromReader(reader)
+	defer ctx.Close()
+	err := target.UnmarshalBinaryWithContext(ctx)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// UnmarshalBinaryWithContext uses the context containing a string table and binary buffer to set all the internal properties of
+// the GPUSaturation type
+func (target *GPUSaturation) UnmarshalBinaryWithContext(ctx *DecodingContext) (err error) {
+	// panics are recovered and propagated as errors
+	defer func() {
+		if r := recover(); r != nil {
+			if e, ok := r.(error); ok {
+				err = e
+			} else if s, ok := r.(string); ok {
+				err = fmt.Errorf("Unexpected panic: %s", s)
+			} else {
+				err = fmt.Errorf("Unexpected panic: %+v", r)
+			}
+		}
+	}()
+
+	buff := ctx.Buffer
+	version := buff.ReadUInt8()
+
+	if version > AllocationCodecVersion {
+		return fmt.Errorf("Invalid Version Unmarshaling GPUSaturation. Expected %d or less, got %d", AllocationCodecVersion, version)
+	}
+
+	if buff.ReadUInt8() == uint8(0) {
+		target.ThrottleViolationRatios = nil
+	} else {
+		// --- [begin][read][map](map[string]float64) ---
+		a := buff.ReadInt() // map len
+		b := make(map[string]float64, a)
+		for j := 0; j < a; j++ {
+			var vv string
+			var d string
+			if ctx.IsStringTable() {
+				e := buff.ReadInt() // read string index
+				d = ctx.Table.At(e)
+			} else {
+				d = buff.ReadString() // read string
+			}
+			c := d
+			vv = c
+
+			zz := buff.ReadFloat64() // read float64
+			b[vv] = zz
+		}
+		target.ThrottleViolationRatios = b
+		// --- [end][read][map](map[string]float64) ---
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.ThrottleReasonRatios = nil
+	} else {
+		// --- [begin][read][map](map[string]float64) ---
+		f := buff.ReadInt() // map len
+		g := make(map[string]float64, f)
+		for j := 0; j < f; j++ {
+			var vv string
+			var l string
+			if ctx.IsStringTable() {
+				m := buff.ReadInt() // read string index
+				l = ctx.Table.At(m)
+			} else {
+				l = buff.ReadString() // read string
+			}
+			h := l
+			vv = h
+
+			zz := buff.ReadFloat64() // read float64
+			g[vv] = zz
+		}
+		target.ThrottleReasonRatios = g
+		// --- [end][read][map](map[string]float64) ---
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.MemoryUsedRatioAvg = nil
+	} else {
+		n := buff.ReadFloat64() // read float64
+		target.MemoryUsedRatioAvg = &n
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.MemoryUsedRatioMax = nil
+	} else {
+		o := buff.ReadFloat64() // read float64
+		target.MemoryUsedRatioMax = &o
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.MemoryPressureRatio = nil
+	} else {
+		p := buff.ReadFloat64() // read float64
+		target.MemoryPressureRatio = &p
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.XIDErrorCount = nil
+	} else {
+		q := buff.ReadFloat64() // read float64
+		target.XIDErrorCount = &q
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.DRAMActiveAvg = nil
+	} else {
+		r := buff.ReadFloat64() // read float64
+		target.DRAMActiveAvg = &r
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.DRAMActiveMax = nil
+	} else {
+		s := buff.ReadFloat64() // read float64
+		target.DRAMActiveMax = &s
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.SMActiveAvg = nil
+	} else {
+		t := buff.ReadFloat64() // read float64
+		target.SMActiveAvg = &t
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.SMOccupancyAvg = nil
+	} else {
+		u := buff.ReadFloat64() // read float64
+		target.SMOccupancyAvg = &u
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.PCIeTxBytesAvg = nil
+	} else {
+		v := buff.ReadFloat64() // read float64
+		target.PCIeTxBytesAvg = &v
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.PCIeRxBytesAvg = nil
+	} else {
+		w := buff.ReadFloat64() // read float64
+		target.PCIeRxBytesAvg = &w
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.NVLinkTxBytesAvg = nil
+	} else {
+		x := buff.ReadFloat64() // read float64
+		target.NVLinkTxBytesAvg = &x
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.NVLinkRxBytesAvg = nil
+	} else {
+		y := buff.ReadFloat64() // read float64
+		target.NVLinkRxBytesAvg = &y
+
 	}
 	return nil
 }