1 nedēļu atpakaļ · 8b4a3933aa
--- a/core/pkg/model/kubemodel/bingen.go
+++ b/core/pkg/model/kubemodel/bingen.go
@@ -22,4 +22,4 @@ package kubemodel
 
															 // @bingen:define[string]:github.com/opencost/opencost/core/pkg/model/shared.Provider
														
 
															-//go:generate bingen -package=kubemodel -version=2 -buffer=github.com/opencost/opencost/core/pkg/util
														
 
															+//go:generate bingen -package=kubemodel -version=4 -buffer=github.com/opencost/opencost/core/pkg/util
														
--- a/core/pkg/model/kubemodel/dcgm.go
+++ b/core/pkg/model/kubemodel/dcgm.go
@@ -3,6 +3,8 @@ package kubemodel
 
															 import (
														
 
															 	"fmt"
														
 
															 	"time"
														
 
															+
														
 
															+	"maps"
														
 
															 )
														
 
															 // DCGMDevice holds recording from the DCGM exporter which provides identification and usage metrics for
														
@@ -19,6 +21,131 @@ type DCGMDevice struct {
 
															 	Device    string             `json:"device"`
														
 
															 	ModelName string             `json:"modelName"`
														
 
															 	PodUsages map[string]DCGMPod `json:"podUsages"`
														
 
															+
														
 
															+	// USE-method saturation signals for this device, nil when no
														
 
															+	// saturation metrics were available (see DCGMDeviceSaturation)
														
 
															+	Saturation *DCGMDeviceSaturation `json:"saturation,omitempty"` // @bingen:field[version=3]
														
 
															+
														
 
															+	// Device-level metrics from the default dcgm-exporter configuration.
														
 
															+	// Compute utilization is a percentage (0-100); memory used is absolute
														
 
															+	// bytes. Zero means not collected for the window.
														
 
															+	PowerWatts            float64 `json:"powerWatts,omitempty"`            // @bingen:field[version=4]
														
 
															+	TemperatureCelsius    float64 `json:"temperatureCelsius,omitempty"`    // @bingen:field[version=4]
														
 
															+	ComputeUtilizationAvg float64 `json:"computeUtilizationAvg,omitempty"` // @bingen:field[version=4]
														
 
															+	ComputeUtilizationMax float64 `json:"computeUtilizationMax,omitempty"` // @bingen:field[version=4]
														
 
															+	MemoryUsedBytesAvg    float64 `json:"memoryUsedBytesAvg,omitempty"`    // @bingen:field[version=4]
														
 
															+	MemoryUsedBytesMax    float64 `json:"memoryUsedBytesMax,omitempty"`    // @bingen:field[version=4]
														
 
															+}
														
 
															+
														
 
															+var (
														
 
															+	_ DeviceInfo        = (*DCGMDevice)(nil)
														
 
															+	_ DevicePerformance = (*DCGMDevice)(nil)
														
 
															+	_ DeviceSaturation  = (*DCGMDevice)(nil)
														
 
															+)
														
 
															+
														
 
															+// DeviceInfo implementation. MIG parentage is not derivable from
														
 
															+// dcgm-exporter labels (a MIG instance's series do not carry the physical
														
 
															+// GPU UUID), so GetParent reports empty until a source for the mapping
														
 
															+// exists (e.g. DRA slice attributes).
														
 
															+func (d *DCGMDevice) GetIdentifier() string { return d.UUID }
														
 
															+func (d *DCGMDevice) GetType() string       { return "GPU" }
														
 
															+func (d *DCGMDevice) GetName() string       { return d.ModelName }
														
 
															+func (d *DCGMDevice) GetPower() float64     { return d.PowerWatts }
														
 
															+func (d *DCGMDevice) GetStart() time.Time   { return d.Start }
														
 
															+func (d *DCGMDevice) GetEnd() time.Time     { return d.End }
														
 
															+func (d *DCGMDevice) GetParent() string     { return "" }
														
 
															+
														
 
															+// DevicePerformance implementation, backed by device-level DCGM series:
														
 
															+// GR_ENGINE_ACTIVE (scaled to percent), FB_USED (scaled to bytes), and
														
 
															+// GPU_TEMP.
														
 
															+func (d *DCGMDevice) GetComputeUtilizationAverage() float64 { return d.ComputeUtilizationAvg }
														
 
															+func (d *DCGMDevice) GetComputeUtilizationMax() float64     { return d.ComputeUtilizationMax }
														
 
															+func (d *DCGMDevice) GetMemoryUtilizationAverage() float64  { return d.MemoryUsedBytesAvg }
														
 
															+func (d *DCGMDevice) GetMemoryUtilizationMax() float64      { return d.MemoryUsedBytesMax }
														
 
															+func (d *DCGMDevice) GetTemp() float64                      { return d.TemperatureCelsius }
														
 
															+
														
 
															+// DeviceSaturation implementation. The vendor-neutral getters map onto
														
 
															+// DCGM concepts: throttle violation counters DCGM_FI_DEV_*_VIOLATION,
														
 
															+// throttle reason bitmask DCGM_FI_DEV_CLOCK_THROTTLE_REASONS, framebuffer
														
 
															+// occupancy FB_USED/(FB_USED+FB_FREE), error events = XID errors, memory
														
 
															+// bandwidth = DRAM_ACTIVE, compute active/occupancy = SM_ACTIVE /
														
 
															+// SM_OCCUPANCY, host link = PCIe, peer link = NVLink. ok=false / nil means
														
 
															+// the DCGM field was unavailable, never zero.
														
 
															+
														
 
															+// saturationValue adapts the saturation struct's pointer fields to the
														
 
															+// interface's (value, ok) presence contract, including when Saturation is
														
 
															+// entirely nil.
														
 
															+func (d *DCGMDevice) saturationValue(get func(*DCGMDeviceSaturation) *float64) (float64, bool) {
														
 
															+	if d.Saturation == nil {
														
 
															+		return 0, false
														
 
															+	}
														
 
															+	v := get(d.Saturation)
														
 
															+	if v == nil {
														
 
															+		return 0, false
														
 
															+	}
														
 
															+	return *v, true
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetThrottleViolationRatios() map[string]float64 {
														
 
															+	if d.Saturation == nil {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return maps.Clone(d.Saturation.ThrottleViolationRatios)
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetThrottleReasonRatios() map[string]float64 {
														
 
															+	if d.Saturation == nil {
														
 
															+		return nil
														
 
															+	}
														
 
															+	return maps.Clone(d.Saturation.ThrottleReasonRatios)
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetMemoryUsedRatioAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.MemoryUsedRatioAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetMemoryUsedRatioMax() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.MemoryUsedRatioMax })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetMemoryPressureRatio() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.MemoryPressureRatio })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetErrorEventCount() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.XIDErrorCount })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetMemoryBandwidthActiveAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.DRAMActiveAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetMemoryBandwidthActiveMax() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.DRAMActiveMax })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetComputeActiveAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.SMActiveAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetComputeOccupancyAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.SMOccupancyAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetHostLinkTxBytesAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.PCIeTxBytesAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetHostLinkRxBytesAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.PCIeRxBytesAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetPeerLinkTxBytesAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.NVLinkTxBytesAvg })
														
 
															+}
														
 
															+
														
 
															+func (d *DCGMDevice) GetPeerLinkRxBytesAvg() (float64, bool) {
														
 
															+	return d.saturationValue(func(s *DCGMDeviceSaturation) *float64 { return s.NVLinkRxBytesAvg })
														
 
															 }
														
 
															 // @bingen:generate:DCGMPod
														
@@ -41,6 +168,10 @@ func (d *DCGMDevice) ValidateDCGMDevice(window Window) error {
 
															 		return err
														
 
															 	}
														
 
															+	if err := d.Saturation.Validate(); err != nil {
														
 
															+		return fmt.Errorf("invalid Saturation for DCGMDevice '%s': %w", d.UUID, err)
														
 
															+	}
														
 
															+
														
 
															 	return nil
														
 
															 }
														
--- a/core/pkg/model/kubemodel/dcgm_saturation.go
+++ b/core/pkg/model/kubemodel/dcgm_saturation.go
@@ -0,0 +1,134 @@
 
															+package kubemodel
														
 
															+
														
 
															+import (
														
 
															+	"fmt"
														
 
															+	"math"
														
 
															+
														
 
															+	"maps"
														
 
															+)
														
 
															+
														
 
															+// DCGMDeviceSaturation carries USE-method saturation signals for an NVIDIA
														
 
															+// GPU recorded from the DCGM exporter: where utilization reports how busy
														
 
															+// the device was, saturation reports work queued, rejected, or slowed
														
 
															+// because the device could not service demand. It lives on DCGMDevice
														
 
															+// because these signals are NVIDIA/DCGM-specific; the vendor-neutral
														
 
															+// surface is the DeviceSaturation interface.
														
 
															+//
														
 
															+// Every field is an independent primitive; no composite score is computed.
														
 
															+// A nil field means the underlying source metric was not available in the
														
 
															+// window (no dcgm-exporter, field disabled in its config, or no DCP
														
 
															+// profiling support), never that the value was zero. Ratios are fractions
														
 
															+// of the observed window in [0, 1] unless noted.
														
 
															+// @bingen:generate:DCGMDeviceSaturation
														
 
															+type DCGMDeviceSaturation struct {
														
 
															+	// ThrottleViolationRatios maps a throttle reason (power, thermal,
														
 
															+	// sync_boost, board_limit) to the fraction of the window the device
														
 
															+	// spent throttled for that reason, from the cumulative
														
 
															+	// DCGM_FI_DEV_*_VIOLATION microsecond counters.
														
 
															+	ThrottleViolationRatios map[string]float64 `json:"throttleViolationRatios,omitempty"`
														
 
															+	// ThrottleReasonRatios maps a throttle reason (sw_power_cap,
														
 
															+	// hw_slowdown, sync_boost, sw_thermal, hw_thermal, hw_power_brake) to
														
 
															+	// the fraction of the window the corresponding bit of
														
 
															+	// DCGM_FI_DEV_CLOCK_THROTTLE_REASONS was set.
														
 
															+	ThrottleReasonRatios map[string]float64 `json:"throttleReasonRatios,omitempty"`
														
 
															+	// MemoryUsedRatioAvg/Max are framebuffer occupancy over the window:
														
 
															+	// FB_USED / (FB_USED + FB_FREE).
														
 
															+	MemoryUsedRatioAvg *float64 `json:"memoryUsedRatioAvg,omitempty"`
														
 
															+	MemoryUsedRatioMax *float64 `json:"memoryUsedRatioMax,omitempty"`
														
 
															+	// MemoryPressureRatio is the fraction of the window framebuffer
														
 
															+	// occupancy exceeded the configured threshold.
														
 
															+	MemoryPressureRatio *float64 `json:"memoryPressureRatio,omitempty"`
														
 
															+	// XIDErrorCount counts XID error events observed in the window.
														
 
															+	XIDErrorCount *float64 `json:"xidErrorCount,omitempty"`
														
 
															+	// DRAMActiveAvg/Max are the ratio of cycles the device memory interface
														
 
															+	// was active (DCP profiling).
														
 
															+	DRAMActiveAvg *float64 `json:"dramActiveAvg,omitempty"`
														
 
															+	DRAMActiveMax *float64 `json:"dramActiveMax,omitempty"`
														
 
															+	// SMActiveAvg and SMOccupancyAvg distinguish compute-bound from
														
 
															+	// bandwidth- or latency-bound saturation (DCP profiling).
														
 
															+	SMActiveAvg    *float64 `json:"smActiveAvg,omitempty"`
														
 
															+	SMOccupancyAvg *float64 `json:"smOccupancyAvg,omitempty"`
														
 
															+	// Interconnect throughput in bytes/sec (DCP profiling). Link capacity
														
 
															+	// is not derivable from DCGM, so these are raw rates, not ratios.
														
 
															+	PCIeTxBytesAvg   *float64 `json:"pcieTxBytesAvg,omitempty"`
														
 
															+	PCIeRxBytesAvg   *float64 `json:"pcieRxBytesAvg,omitempty"`
														
 
															+	NVLinkTxBytesAvg *float64 `json:"nvlinkTxBytesAvg,omitempty"`
														
 
															+	NVLinkRxBytesAvg *float64 `json:"nvlinkRxBytesAvg,omitempty"`
														
 
															+}
														
 
															+
														
 
															+// ratioFields returns name/value pairs for every field constrained to
														
 
															+// [0, 1], for validation.
														
 
															+func (s *DCGMDeviceSaturation) ratioFields() map[string]*float64 {
														
 
															+	return map[string]*float64{
														
 
															+		"MemoryUsedRatioAvg":  s.MemoryUsedRatioAvg,
														
 
															+		"MemoryUsedRatioMax":  s.MemoryUsedRatioMax,
														
 
															+		"MemoryPressureRatio": s.MemoryPressureRatio,
														
 
															+		"DRAMActiveAvg":       s.DRAMActiveAvg,
														
 
															+		"DRAMActiveMax":       s.DRAMActiveMax,
														
 
															+		"SMActiveAvg":         s.SMActiveAvg,
														
 
															+		"SMOccupancyAvg":      s.SMOccupancyAvg,
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// Validate validates the DCGMDeviceSaturation fields.
														
 
															+func (s *DCGMDeviceSaturation) Validate() error {
														
 
															+	if s == nil {
														
 
															+		return nil
														
 
															+	}
														
 
															+	for _, m := range []map[string]float64{s.ThrottleViolationRatios, s.ThrottleReasonRatios} {
														
 
															+		for reason, ratio := range m {
														
 
															+			if math.IsNaN(ratio) || ratio < 0 || ratio > 1 {
														
 
															+				return fmt.Errorf("throttle ratio for %q must be 0-1, got %v", reason, ratio)
														
 
															+			}
														
 
															+		}
														
 
															+	}
														
 
															+	for name, value := range s.ratioFields() {
														
 
															+		if value != nil && (math.IsNaN(*value) || *value < 0 || *value > 1) {
														
 
															+			return fmt.Errorf("%s must be 0-1, got %v", name, *value)
														
 
															+		}
														
 
															+	}
														
 
															+	for name, value := range map[string]*float64{
														
 
															+		"XIDErrorCount":    s.XIDErrorCount,
														
 
															+		"PCIeTxBytesAvg":   s.PCIeTxBytesAvg,
														
 
															+		"PCIeRxBytesAvg":   s.PCIeRxBytesAvg,
														
 
															+		"NVLinkTxBytesAvg": s.NVLinkTxBytesAvg,
														
 
															+		"NVLinkRxBytesAvg": s.NVLinkRxBytesAvg,
														
 
															+	} {
														
 
															+		if value != nil && (math.IsNaN(*value) || *value < 0) {
														
 
															+			return fmt.Errorf("%s cannot be negative, got %v", name, *value)
														
 
															+		}
														
 
															+	}
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+// Clone creates a deep copy of the DCGMDeviceSaturation.
														
 
															+func (s *DCGMDeviceSaturation) Clone() *DCGMDeviceSaturation {
														
 
															+	if s == nil {
														
 
															+		return nil
														
 
															+	}
														
 
															+
														
 
															+	cloneFloat := func(v *float64) *float64 {
														
 
															+		if v == nil {
														
 
															+			return nil
														
 
															+		}
														
 
															+		c := *v
														
 
															+		return &c
														
 
															+	}
														
 
															+
														
 
															+	return &DCGMDeviceSaturation{
														
 
															+		ThrottleViolationRatios: maps.Clone(s.ThrottleViolationRatios),
														
 
															+		ThrottleReasonRatios:    maps.Clone(s.ThrottleReasonRatios),
														
 
															+		MemoryUsedRatioAvg:      cloneFloat(s.MemoryUsedRatioAvg),
														
 
															+		MemoryUsedRatioMax:      cloneFloat(s.MemoryUsedRatioMax),
														
 
															+		MemoryPressureRatio:     cloneFloat(s.MemoryPressureRatio),
														
 
															+		XIDErrorCount:           cloneFloat(s.XIDErrorCount),
														
 
															+		DRAMActiveAvg:           cloneFloat(s.DRAMActiveAvg),
														
 
															+		DRAMActiveMax:           cloneFloat(s.DRAMActiveMax),
														
 
															+		SMActiveAvg:             cloneFloat(s.SMActiveAvg),
														
 
															+		SMOccupancyAvg:          cloneFloat(s.SMOccupancyAvg),
														
 
															+		PCIeTxBytesAvg:          cloneFloat(s.PCIeTxBytesAvg),
														
 
															+		PCIeRxBytesAvg:          cloneFloat(s.PCIeRxBytesAvg),
														
 
															+		NVLinkTxBytesAvg:        cloneFloat(s.NVLinkTxBytesAvg),
														
 
															+		NVLinkRxBytesAvg:        cloneFloat(s.NVLinkRxBytesAvg),
														
 
															+	}
														
 
															+}
														
--- a/core/pkg/model/kubemodel/dcgm_saturation_test.go
+++ b/core/pkg/model/kubemodel/dcgm_saturation_test.go
@@ -0,0 +1,281 @@
 
															+package kubemodel
														
 
															+
														
 
															+import (
														
 
															+	"math"
														
 
															+	"reflect"
														
 
															+	"testing"
														
 
															+	"time"
														
 
															+)
														
 
															+
														
 
															+func saturationFloat(v float64) *float64 {
														
 
															+	return &v
														
 
															+}
														
 
															+
														
 
															+func mockDCGMDeviceSaturation() *DCGMDeviceSaturation {
														
 
															+	return &DCGMDeviceSaturation{
														
 
															+		ThrottleViolationRatios: map[string]float64{"power": 0.25},
														
 
															+		ThrottleReasonRatios:    map[string]float64{"sw_power_cap": 0.2},
														
 
															+		MemoryUsedRatioAvg:      saturationFloat(0.85),
														
 
															+		MemoryUsedRatioMax:      saturationFloat(0.99),
														
 
															+		MemoryPressureRatio:     saturationFloat(0.4),
														
 
															+		XIDErrorCount:           saturationFloat(2),
														
 
															+		DRAMActiveAvg:           saturationFloat(0.7),
														
 
															+		DRAMActiveMax:           saturationFloat(0.95),
														
 
															+		SMActiveAvg:             saturationFloat(0.6),
														
 
															+		SMOccupancyAvg:          saturationFloat(0.5),
														
 
															+		PCIeTxBytesAvg:          saturationFloat(1.5e9),
														
 
															+		PCIeRxBytesAvg:          saturationFloat(2.5e9),
														
 
															+		NVLinkTxBytesAvg:        saturationFloat(3.5e9),
														
 
															+		NVLinkRxBytesAvg:        saturationFloat(4.5e9),
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func mockSaturatedDCGMDevice() *DCGMDevice {
														
 
															+	return &DCGMDevice{
														
 
															+		UUID:                  "GPU-1",
														
 
															+		Start:                 time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC),
														
 
															+		End:                   time.Date(2026, 6, 1, 1, 0, 0, 0, time.UTC),
														
 
															+		Device:                "nvidia0",
														
 
															+		ModelName:             "NVIDIA A100 80GB",
														
 
															+		Saturation:            mockDCGMDeviceSaturation(),
														
 
															+		PowerWatts:            140,
														
 
															+		TemperatureCelsius:    55,
														
 
															+		ComputeUtilizationAvg: 42.5,
														
 
															+		ComputeUtilizationMax: 97,
														
 
															+		MemoryUsedBytesAvg:    32e9,
														
 
															+		MemoryUsedBytesMax:    71e9,
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// TestDCGMDevice_DeviceInfoInterface verifies the vendor-neutral identity
														
 
															+// surface on the DCGM device.
														
 
															+func TestDCGMDevice_DeviceInfoInterface(t *testing.T) {
														
 
															+	var info DeviceInfo = mockSaturatedDCGMDevice()
														
 
															+
														
 
															+	if info.GetIdentifier() != "GPU-1" {
														
 
															+		t.Errorf("GetIdentifier() = %q, want GPU-1", info.GetIdentifier())
														
 
															+	}
														
 
															+	if info.GetType() != "GPU" {
														
 
															+		t.Errorf("GetType() = %q, want GPU", info.GetType())
														
 
															+	}
														
 
															+	if info.GetName() != "NVIDIA A100 80GB" {
														
 
															+		t.Errorf("GetName() = %q", info.GetName())
														
 
															+	}
														
 
															+	if !info.GetStart().Before(info.GetEnd()) {
														
 
															+		t.Errorf("GetStart() not before GetEnd()")
														
 
															+	}
														
 
															+	if info.GetPower() != 140 {
														
 
															+		t.Errorf("GetPower() = %v, want 140", info.GetPower())
														
 
															+	}
														
 
															+	// MIG parentage is not derivable from dcgm-exporter labels yet
														
 
															+	if info.GetParent() != "" {
														
 
															+		t.Errorf("expected empty parent until a mapping source exists, got %q", info.GetParent())
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// TestDCGMDevice_DevicePerformanceInterface verifies the performance surface
														
 
															+// is backed by the device-level metric fields with documented units.
														
 
															+func TestDCGMDevice_DevicePerformanceInterface(t *testing.T) {
														
 
															+	var perf DevicePerformance = mockSaturatedDCGMDevice()
														
 
															+
														
 
															+	if perf.GetComputeUtilizationAverage() != 42.5 || perf.GetComputeUtilizationMax() != 97 {
														
 
															+		t.Errorf("compute utilization = (%v, %v), want (42.5, 97)", perf.GetComputeUtilizationAverage(), perf.GetComputeUtilizationMax())
														
 
															+	}
														
 
															+	if perf.GetMemoryUtilizationAverage() != 32e9 || perf.GetMemoryUtilizationMax() != 71e9 {
														
 
															+		t.Errorf("memory utilization = (%v, %v), want (3.2e10, 7.1e10)", perf.GetMemoryUtilizationAverage(), perf.GetMemoryUtilizationMax())
														
 
															+	}
														
 
															+	if perf.GetTemp() != 55 {
														
 
															+		t.Errorf("GetTemp() = %v, want 55", perf.GetTemp())
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// TestDCGMDevice_DeviceSaturationInterface verifies the saturation getters
														
 
															+// expose every signal with correct values, that absence (nil Saturation or
														
 
															+// nil field) reports ok=false / nil rather than zero, and that returned
														
 
															+// maps are copies.
														
 
															+func TestDCGMDevice_DeviceSaturationInterface(t *testing.T) {
														
 
															+	var sat DeviceSaturation = mockSaturatedDCGMDevice()
														
 
															+
														
 
															+	scalarChecks := map[string]struct {
														
 
															+		get  func() (float64, bool)
														
 
															+		want float64
														
 
															+	}{
														
 
															+		"MemoryUsedRatioAvg":       {sat.GetMemoryUsedRatioAvg, 0.85},
														
 
															+		"MemoryUsedRatioMax":       {sat.GetMemoryUsedRatioMax, 0.99},
														
 
															+		"MemoryPressureRatio":      {sat.GetMemoryPressureRatio, 0.4},
														
 
															+		"ErrorEventCount":          {sat.GetErrorEventCount, 2},
														
 
															+		"MemoryBandwidthActiveAvg": {sat.GetMemoryBandwidthActiveAvg, 0.7},
														
 
															+		"MemoryBandwidthActiveMax": {sat.GetMemoryBandwidthActiveMax, 0.95},
														
 
															+		"ComputeActiveAvg":         {sat.GetComputeActiveAvg, 0.6},
														
 
															+		"ComputeOccupancyAvg":      {sat.GetComputeOccupancyAvg, 0.5},
														
 
															+		"HostLinkTxBytesAvg":       {sat.GetHostLinkTxBytesAvg, 1.5e9},
														
 
															+		"HostLinkRxBytesAvg":       {sat.GetHostLinkRxBytesAvg, 2.5e9},
														
 
															+		"PeerLinkTxBytesAvg":       {sat.GetPeerLinkTxBytesAvg, 3.5e9},
														
 
															+		"PeerLinkRxBytesAvg":       {sat.GetPeerLinkRxBytesAvg, 4.5e9},
														
 
															+	}
														
 
															+	for name, check := range scalarChecks {
														
 
															+		got, ok := check.get()
														
 
															+		if !ok || got != check.want {
														
 
															+			t.Errorf("%s = (%v, %v), want (%v, true)", name, got, ok, check.want)
														
 
															+		}
														
 
															+	}
														
 
															+
														
 
															+	if got := sat.GetThrottleViolationRatios(); got["power"] != 0.25 {
														
 
															+		t.Errorf("GetThrottleViolationRatios() = %v, want power: 0.25", got)
														
 
															+	}
														
 
															+	if got := sat.GetThrottleReasonRatios(); got["sw_power_cap"] != 0.2 {
														
 
															+		t.Errorf("GetThrottleReasonRatios() = %v, want sw_power_cap: 0.2", got)
														
 
															+	}
														
 
															+
														
 
															+	// returned maps are copies: consumers must not mutate device state
														
 
															+	sat.GetThrottleViolationRatios()["power"] = 0.99
														
 
															+	if got := sat.GetThrottleViolationRatios(); got["power"] != 0.25 {
														
 
															+		t.Errorf("interface exposed internal throttle map: %v", got)
														
 
															+	}
														
 
															+
														
 
															+	// absence semantics: nil Saturation reports ok=false / nil, never zero
														
 
															+	var absent DeviceSaturation = &DCGMDevice{UUID: "GPU-2"}
														
 
															+	if _, ok := absent.GetMemoryUsedRatioAvg(); ok {
														
 
															+		t.Errorf("expected ok=false for device without saturation")
														
 
															+	}
														
 
															+	if got := absent.GetThrottleViolationRatios(); got != nil {
														
 
															+		t.Errorf("expected nil throttle map for device without saturation, got %v", got)
														
 
															+	}
														
 
															+
														
 
															+	// per-field absence: present Saturation with one nil field
														
 
															+	partial := mockSaturatedDCGMDevice()
														
 
															+	partial.Saturation.SMActiveAvg = nil
														
 
															+	if _, ok := partial.GetComputeActiveAvg(); ok {
														
 
															+		t.Errorf("expected ok=false for absent SM active signal")
														
 
															+	}
														
 
															+	if got, ok := partial.GetComputeOccupancyAvg(); !ok || got != 0.5 {
														
 
															+		t.Errorf("sibling signal lost: (%v, %v), want (0.5, true)", got, ok)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func TestDCGMDeviceSaturation_Validate(t *testing.T) {
														
 
															+	cases := map[string]struct {
														
 
															+		mutate  func(*DCGMDeviceSaturation)
														
 
															+		wantErr bool
														
 
															+	}{
														
 
															+		"valid":  {mutate: func(s *DCGMDeviceSaturation) {}, wantErr: false},
														
 
															+		"nil ok": {mutate: nil, wantErr: false},
														
 
															+		"empty":  {mutate: func(s *DCGMDeviceSaturation) { *s = DCGMDeviceSaturation{} }, wantErr: false},
														
 
															+		"ratio above one": {
														
 
															+			mutate:  func(s *DCGMDeviceSaturation) { s.MemoryUsedRatioAvg = saturationFloat(1.1) },
														
 
															+			wantErr: true,
														
 
															+		},
														
 
															+		"negative ratio": {
														
 
															+			mutate:  func(s *DCGMDeviceSaturation) { s.SMActiveAvg = saturationFloat(-0.1) },
														
 
															+			wantErr: true,
														
 
															+		},
														
 
															+		"NaN ratio": {
														
 
															+			mutate:  func(s *DCGMDeviceSaturation) { s.DRAMActiveAvg = saturationFloat(math.NaN()) },
														
 
															+			wantErr: true,
														
 
															+		},
														
 
															+		"throttle map ratio above one": {
														
 
															+			mutate:  func(s *DCGMDeviceSaturation) { s.ThrottleReasonRatios["sw_power_cap"] = 1.5 },
														
 
															+			wantErr: true,
														
 
															+		},
														
 
															+		"negative byte rate": {
														
 
															+			mutate:  func(s *DCGMDeviceSaturation) { s.PCIeTxBytesAvg = saturationFloat(-1) },
														
 
															+			wantErr: true,
														
 
															+		},
														
 
															+		"negative xid count": {
														
 
															+			mutate:  func(s *DCGMDeviceSaturation) { s.XIDErrorCount = saturationFloat(-1) },
														
 
															+			wantErr: true,
														
 
															+		},
														
 
															+	}
														
 
															+
														
 
															+	for name, tc := range cases {
														
 
															+		t.Run(name, func(t *testing.T) {
														
 
															+			var sat *DCGMDeviceSaturation
														
 
															+			if tc.mutate != nil {
														
 
															+				sat = mockDCGMDeviceSaturation()
														
 
															+				tc.mutate(sat)
														
 
															+			}
														
 
															+			err := sat.Validate()
														
 
															+			if (err != nil) != tc.wantErr {
														
 
															+				t.Errorf("Validate() error = %v, wantErr %v", err, tc.wantErr)
														
 
															+			}
														
 
															+		})
														
 
															+	}
														
 
															+
														
 
															+	// invalid saturation must fail device validation too
														
 
															+	device := mockSaturatedDCGMDevice()
														
 
															+	device.Saturation.MemoryUsedRatioAvg = saturationFloat(2.0)
														
 
															+	window := Window{Start: device.Start, End: device.End}
														
 
															+	if err := device.ValidateDCGMDevice(window); err == nil {
														
 
															+		t.Errorf("expected ValidateDCGMDevice to reject invalid saturation")
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func TestDCGMDeviceSaturation_Clone(t *testing.T) {
														
 
															+	var nilSat *DCGMDeviceSaturation
														
 
															+	if nilSat.Clone() != nil {
														
 
															+		t.Fatalf("expected nil Clone of nil DCGMDeviceSaturation")
														
 
															+	}
														
 
															+
														
 
															+	orig := mockDCGMDeviceSaturation()
														
 
															+	clone := orig.Clone()
														
 
															+
														
 
															+	if !reflect.DeepEqual(orig, clone) {
														
 
															+		t.Fatalf("clone differs from original:\n got %+v\nwant %+v", clone, orig)
														
 
															+	}
														
 
															+
														
 
															+	clone.ThrottleViolationRatios["power"] = 0.99
														
 
															+	*clone.MemoryUsedRatioAvg = 0.1
														
 
															+	if orig.ThrottleViolationRatios["power"] == 0.99 {
														
 
															+		t.Errorf("clone shares ThrottleViolationRatios map with original")
														
 
															+	}
														
 
															+	if *orig.MemoryUsedRatioAvg == 0.1 {
														
 
															+		t.Errorf("clone shares MemoryUsedRatioAvg pointer with original")
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// TestDCGMDevice_BinaryRoundtripWithSaturation verifies a DCGMDevice
														
 
															+// carrying saturation survives the bingen binary codec, and that absent
														
 
															+// saturation stays absent.
														
 
															+func TestDCGMDevice_BinaryRoundtripWithSaturation(t *testing.T) {
														
 
															+	cases := map[string]*DCGMDevice{
														
 
															+		"nil saturation": {
														
 
															+			UUID:      "GPU-1",
														
 
															+			Device:    "nvidia0",
														
 
															+			ModelName: "Tesla T4",
														
 
															+		},
														
 
															+		"populated saturation": mockSaturatedDCGMDevice(),
														
 
															+		"empty saturation struct": {
														
 
															+			UUID:       "GPU-1",
														
 
															+			Saturation: &DCGMDeviceSaturation{},
														
 
															+		},
														
 
															+	}
														
 
															+
														
 
															+	for name, orig := range cases {
														
 
															+		t.Run(name, func(t *testing.T) {
														
 
															+			bs, err := orig.MarshalBinary()
														
 
															+			if err != nil {
														
 
															+				t.Fatalf("MarshalBinary: %s", err)
														
 
															+			}
														
 
															+
														
 
															+			decoded := new(DCGMDevice)
														
 
															+			if err := decoded.UnmarshalBinary(bs); err != nil {
														
 
															+				t.Fatalf("UnmarshalBinary: %s", err)
														
 
															+			}
														
 
															+
														
 
															+			if decoded.UUID != orig.UUID || decoded.Device != orig.Device || decoded.ModelName != orig.ModelName {
														
 
															+				t.Errorf("device identity did not survive roundtrip: got %+v, want %+v", decoded, orig)
														
 
															+			}
														
 
															+			if decoded.PowerWatts != orig.PowerWatts || decoded.TemperatureCelsius != orig.TemperatureCelsius ||
														
 
															+				decoded.ComputeUtilizationAvg != orig.ComputeUtilizationAvg || decoded.ComputeUtilizationMax != orig.ComputeUtilizationMax ||
														
 
															+				decoded.MemoryUsedBytesAvg != orig.MemoryUsedBytesAvg || decoded.MemoryUsedBytesMax != orig.MemoryUsedBytesMax {
														
 
															+				t.Errorf("device metrics did not survive roundtrip: got %+v, want %+v", decoded, orig)
														
 
															+			}
														
 
															+			if (decoded.Saturation == nil) != (orig.Saturation == nil) {
														
 
															+				t.Fatalf("saturation presence did not survive roundtrip: got %v, want %v", decoded.Saturation, orig.Saturation)
														
 
															+			}
														
 
															+			if !reflect.DeepEqual(decoded.Saturation, orig.Saturation) {
														
 
															+				t.Errorf("saturation did not survive roundtrip: got %+v, want %+v", decoded.Saturation, orig.Saturation)
														
 
															+			}
														
 
															+		})
														
 
															+	}
														
 
															+}
														
--- a/core/pkg/model/kubemodel/device_interface.go
+++ b/core/pkg/model/kubemodel/device_interface.go
@@ -0,0 +1,136 @@
 
															+package kubemodel
														
 
															+
														
 
															+import "time"
														
 
															+
														
 
															+// Accelerator devices are modeled as per-vendor concrete types (e.g.
														
 
															+// DCGMDevice for NVIDIA) behind small interfaces rather than one generic
														
 
															+// concrete struct. A generic type forces every vendor's changes into shared
														
 
															+// code, implies relational fields not all vendors can provide, and blurs
														
 
															+// semantics that differ between vendors (e.g. what "power" means for AMD vs
														
 
															+// NVIDIA). Overly specific shared models have the opposite failure:
														
 
															+// collecting more than is used and looking half-supported for vendors
														
 
															+// missing a feature. Vendor structs keep agent-side transformation minimal
														
 
															+// (data ships close to its source shape, e.g. DCGM for NVIDIA), while these
														
 
															+// interfaces provide the common surface for identification, utilization,
														
 
															+// performance, and saturation.
														
 
															+//
														
 
															+// Capacity is intentionally absent: it is not readily available from vendor
														
 
															+// telemetry (e.g. DCGM) and is inferred via device plugins / DRA, which are
														
 
															+// assumed prerequisites and provide the request half of device allocation.
														
 
															+// Fan speed, PCIe topology detail, and topology are out of scope.
														
 
															+
														
 
															+// DeviceInfo identifies a physical or virtualized accelerator device and
														
 
															+// its lifecycle within the observed window.
														
 
															+type DeviceInfo interface {
														
 
															+	// GetIdentifier returns the device UUID. For virtualized devices
														
 
															+	// (e.g. a MIG instance) this is the UUID of the virtual device.
														
 
															+	GetIdentifier() string
														
 
															+
														
 
															+	// GetType returns the human-readable device type, e.g. "GPU".
														
 
															+	GetType() string
														
 
															+
														
 
															+	// GetName returns the marketing/model name, e.g. "NVIDIA A100 80GB".
														
 
															+	GetName() string
														
 
															+
														
 
															+	// GetPower returns the device power draw in watts.
														
 
															+	GetPower() float64
														
 
															+
														
 
															+	// GetStart returns the time the device came online in the observed
														
 
															+	// window.
														
 
															+	GetStart() time.Time
														
 
															+
														
 
															+	// GetEnd returns the time the device's observation ended.
														
 
															+	GetEnd() time.Time
														
 
															+
														
 
															+	// GetParent returns the UUID of the parent device, or empty if none.
														
 
															+	// For a MIG instance this is the UUID of the physical GPU.
														
 
															+	GetParent() string
														
 
															+}
														
 
															+
														
 
															+// DevicePerformance exposes utilization and performance measures for an
														
 
															+// accelerator device.
														
 
															+type DevicePerformance interface {
														
 
															+	// GetComputeUtilizationAverage returns the average percentage of
														
 
															+	// compute used over the observed window.
														
 
															+	GetComputeUtilizationAverage() float64
														
 
															+
														
 
															+	// GetComputeUtilizationMax returns the max percentage of compute used.
														
 
															+	GetComputeUtilizationMax() float64
														
 
															+
														
 
															+	// GetMemoryUtilizationAverage returns the average absolute memory used
														
 
															+	// in bytes.
														
 
															+	GetMemoryUtilizationAverage() float64
														
 
															+
														
 
															+	// GetMemoryUtilizationMax returns the max absolute memory used in
														
 
															+	// bytes.
														
 
															+	GetMemoryUtilizationMax() float64
														
 
															+
														
 
															+	// GetTemp returns the current device temperature in degrees Celsius.
														
 
															+	GetTemp() float64
														
 
															+}
														
 
															+
														
 
															+// DeviceSaturation exposes USE-method saturation measures for an
														
 
															+// accelerator device: where DevicePerformance reports how busy the device
														
 
															+// was, saturation reports work queued, rejected, or slowed because the
														
 
															+// device could not service demand.
														
 
															+//
														
 
															+// Absence is meaningful and distinct from zero: scalar getters return
														
 
															+// ok=false and map getters return nil when the vendor telemetry for that
														
 
															+// signal was unavailable, so consumers never mistake "not measured" for
														
 
															+// "not saturated". Names are vendor-neutral; the mapping to vendor
														
 
															+// telemetry (e.g. DCGM fields for NVIDIA) lives in each implementation.
														
 
															+// Ratios are fractions of the observed window in [0, 1].
														
 
															+type DeviceSaturation interface {
														
 
															+	// GetThrottleViolationRatios returns the fraction of the window the
														
 
															+	// device spent throttled, keyed by vendor-defined reason name, from
														
 
															+	// cumulative time-in-violation counters. Nil when unavailable.
														
 
															+	GetThrottleViolationRatios() map[string]float64
														
 
															+
														
 
															+	// GetThrottleReasonRatios returns the fraction of the window each
														
 
															+	// vendor-defined throttle reason state was active, from sampled state
														
 
															+	// flags. Nil when unavailable.
														
 
															+	GetThrottleReasonRatios() map[string]float64
														
 
															+
														
 
															+	// GetMemoryUsedRatioAvg returns average device-memory occupancy.
														
 
															+	GetMemoryUsedRatioAvg() (float64, bool)
														
 
															+
														
 
															+	// GetMemoryUsedRatioMax returns peak device-memory occupancy.
														
 
															+	GetMemoryUsedRatioMax() (float64, bool)
														
 
															+
														
 
															+	// GetMemoryPressureRatio returns the fraction of the window
														
 
															+	// device-memory occupancy exceeded the configured threshold.
														
 
															+	GetMemoryPressureRatio() (float64, bool)
														
 
															+
														
 
															+	// GetErrorEventCount returns the number of device error events
														
 
															+	// (rejected work) observed in the window.
														
 
															+	GetErrorEventCount() (float64, bool)
														
 
															+
														
 
															+	// GetMemoryBandwidthActiveAvg returns the average ratio of cycles the
														
 
															+	// device memory interface was active.
														
 
															+	GetMemoryBandwidthActiveAvg() (float64, bool)
														
 
															+
														
 
															+	// GetMemoryBandwidthActiveMax returns the peak ratio of cycles the
														
 
															+	// device memory interface was active.
														
 
															+	GetMemoryBandwidthActiveMax() (float64, bool)
														
 
															+
														
 
															+	// GetComputeActiveAvg returns the average ratio of cycles compute
														
 
															+	// units had work resident. Together with GetComputeOccupancyAvg and
														
 
															+	// GetMemoryBandwidthActiveAvg, consumers can distinguish
														
 
															+	// compute-bound from bandwidth- or latency-bound saturation.
														
 
															+	GetComputeActiveAvg() (float64, bool)
														
 
															+
														
 
															+	// GetComputeOccupancyAvg returns the average ratio of resident work
														
 
															+	// to the compute units' maximum.
														
 
															+	GetComputeOccupancyAvg() (float64, bool)
														
 
															+
														
 
															+	// GetHostLinkTxBytesAvg/RxBytesAvg return average host-interconnect
														
 
															+	// throughput (e.g. PCIe) in bytes/sec. Link capacity is not derivable
														
 
															+	// from vendor telemetry, so these are rates, not ratios.
														
 
															+	GetHostLinkTxBytesAvg() (float64, bool)
														
 
															+	GetHostLinkRxBytesAvg() (float64, bool)
														
 
															+
														
 
															+	// GetPeerLinkTxBytesAvg/RxBytesAvg return average device-to-device
														
 
															+	// fabric throughput (e.g. NVLink) in bytes/sec.
														
 
															+	GetPeerLinkTxBytesAvg() (float64, bool)
														
 
															+	GetPeerLinkRxBytesAvg() (float64, bool)
														
 
															+}
														
--- a/core/pkg/model/kubemodel/kubemodel_codecs.go
+++ b/core/pkg/model/kubemodel/kubemodel_codecs.go
@@ -35,7 +35,7 @@ const (
 
															 	BinaryTagStringTable string = "BGST"
														
 
															 	// DefaultCodecVersion is used for any resources listed in the Default version set
														
 
															-	DefaultCodecVersion uint8 = 2
														
 
															+	DefaultCodecVersion uint8 = 4
														
 
															 )
														
 
															 //--------------------------------------------------------------------------
														
@@ -106,6 +106,7 @@ var typeMap map[string]reflect.Type = map[string]reflect.Type{
 
															 	"CronJob":                 reflect.TypeFor[CronJob](),
														
 
															 	"DCGMContainer":           reflect.TypeFor[DCGMContainer](),
														
 
															 	"DCGMDevice":              reflect.TypeFor[DCGMDevice](),
														
 
															+	"DCGMDeviceSaturation":    reflect.TypeFor[DCGMDeviceSaturation](),
														
 
															 	"DCGMPod":                 reflect.TypeFor[DCGMPod](),
														
 
															 	"DaemonSet":               reflect.TypeFor[DaemonSet](),
														
 
															 	"Deployment":              reflect.TypeFor[Deployment](),
														
@@ -1798,6 +1799,32 @@ func (target *DCGMDevice) MarshalBinaryWithContext(ctx *EncodingContext) (err er
 
															 		// --- [end][write][map](map[string]DCGMPod) ---
														
 
															 	}
														
 
															+	if target.Saturation == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		// --- [begin][write][struct](DCGMDeviceSaturation) ---
														
 
															+		buff.WriteInt(0) // [compatibility, unused]
														
 
															+		errD := target.Saturation.MarshalBinaryWithContext(ctx)
														
 
															+		if errD != nil {
														
 
															+			return errD
														
 
															+		}
														
 
															+		// --- [end][write][struct](DCGMDeviceSaturation) ---
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	buff.WriteFloat64(target.PowerWatts) // write float64
														
 
															+
														
 
															+	buff.WriteFloat64(target.TemperatureCelsius) // write float64
														
 
															+
														
 
															+	buff.WriteFloat64(target.ComputeUtilizationAvg) // write float64
														
 
															+
														
 
															+	buff.WriteFloat64(target.ComputeUtilizationMax) // write float64
														
 
															+
														
 
															+	buff.WriteFloat64(target.MemoryUsedBytesAvg) // write float64
														
 
															+
														
 
															+	buff.WriteFloat64(target.MemoryUsedBytesMax) // write float64
														
 
															 	return nil
														
 
															 }
														
@@ -1940,6 +1967,484 @@ func (target *DCGMDevice) UnmarshalBinaryWithContext(ctx *DecodingContext) (err
 
															 	}
														
 
															+	// field version check
														
 
															+	if uint8(3) <= version {
														
 
															+		if buff.ReadUInt8() == uint8(0) {
														
 
															+			target.Saturation = nil
														
 
															+		} else {
														
 
															+
														
 
															+			// --- [begin][read][struct](DCGMDeviceSaturation) ---
														
 
															+			aa := new(DCGMDeviceSaturation)
														
 
															+			buff.ReadInt() // [compatibility, unused]
														
 
															+			errD := aa.UnmarshalBinaryWithContext(ctx)
														
 
															+			if errD != nil {
														
 
															+				return errD
														
 
															+			}
														
 
															+			target.Saturation = aa
														
 
															+			// --- [end][read][struct](DCGMDeviceSaturation) ---
														
 
															+
														
 
															+		}
														
 
															+
														
 
															+	} else {
														
 
															+		target.Saturation = nil
														
 
															+	}
														
 
															+	// field version check
														
 
															+	if uint8(4) <= version {
														
 
															+
														
 
															+		bb := buff.ReadFloat64() // read float64
														
 
															+		target.PowerWatts = bb
														
 
															+
														
 
															+	} else {
														
 
															+		target.PowerWatts = float64(0) // default
														
 
															+	}
														
 
															+	// field version check
														
 
															+	if uint8(4) <= version {
														
 
															+
														
 
															+		cc := buff.ReadFloat64() // read float64
														
 
															+		target.TemperatureCelsius = cc
														
 
															+
														
 
															+	} else {
														
 
															+		target.TemperatureCelsius = float64(0) // default
														
 
															+	}
														
 
															+	// field version check
														
 
															+	if uint8(4) <= version {
														
 
															+
														
 
															+		dd := buff.ReadFloat64() // read float64
														
 
															+		target.ComputeUtilizationAvg = dd
														
 
															+
														
 
															+	} else {
														
 
															+		target.ComputeUtilizationAvg = float64(0) // default
														
 
															+	}
														
 
															+	// field version check
														
 
															+	if uint8(4) <= version {
														
 
															+
														
 
															+		ee := buff.ReadFloat64() // read float64
														
 
															+		target.ComputeUtilizationMax = ee
														
 
															+
														
 
															+	} else {
														
 
															+		target.ComputeUtilizationMax = float64(0) // default
														
 
															+	}
														
 
															+	// field version check
														
 
															+	if uint8(4) <= version {
														
 
															+
														
 
															+		ff := buff.ReadFloat64() // read float64
														
 
															+		target.MemoryUsedBytesAvg = ff
														
 
															+
														
 
															+	} else {
														
 
															+		target.MemoryUsedBytesAvg = float64(0) // default
														
 
															+	}
														
 
															+	// field version check
														
 
															+	if uint8(4) <= version {
														
 
															+
														
 
															+		gg := buff.ReadFloat64() // read float64
														
 
															+		target.MemoryUsedBytesMax = gg
														
 
															+
														
 
															+	} else {
														
 
															+		target.MemoryUsedBytesMax = float64(0) // default
														
 
															+	}
														
 
															+
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+//--------------------------------------------------------------------------
														
 
															+//  DCGMDeviceSaturation
														
 
															+//--------------------------------------------------------------------------
														
 
															+
														
 
															+// MarshalBinary serializes the internal properties of this DCGMDeviceSaturation instance
														
 
															+// into a byte array
														
 
															+func (target *DCGMDeviceSaturation) MarshalBinary() (data []byte, err error) {
														
 
															+	ctx := &EncodingContext{
														
 
															+		Buffer: util.NewBuffer(),
														
 
															+		Table:  nil,
														
 
															+	}
														
 
															+
														
 
															+	e := target.MarshalBinaryWithContext(ctx)
														
 
															+	if e != nil {
														
 
															+		return nil, e
														
 
															+	}
														
 
															+
														
 
															+	encBytes := ctx.Buffer.Bytes()
														
 
															+	return encBytes, nil
														
 
															+}
														
 
															+
														
 
															+// MarshalBinaryWithContext serializes the internal properties of this DCGMDeviceSaturation instance
														
 
															+// into a byte array leveraging a predefined context.
														
 
															+func (target *DCGMDeviceSaturation) MarshalBinaryWithContext(ctx *EncodingContext) (err error) {
														
 
															+	// panics are recovered and propagated as errors
														
 
															+	defer func() {
														
 
															+		if r := recover(); r != nil {
														
 
															+			if e, ok := r.(error); ok {
														
 
															+				err = e
														
 
															+			} else if s, ok := r.(string); ok {
														
 
															+				err = fmt.Errorf("unexpected panic: %s", s)
														
 
															+			} else {
														
 
															+				err = fmt.Errorf("unexpected panic: %+v", r)
														
 
															+			}
														
 
															+		}
														
 
															+	}()
														
 
															+
														
 
															+	buff := ctx.Buffer
														
 
															+	buff.WriteUInt8(DefaultCodecVersion) // version
														
 
															+
														
 
															+	if target.ThrottleViolationRatios == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		// --- [begin][write][map](map[string]float64) ---
														
 
															+		buff.WriteInt(len(target.ThrottleViolationRatios)) // map length
														
 
															+		for v, z := range target.ThrottleViolationRatios {
														
 
															+			if ctx.IsStringTable() {
														
 
															+				a := ctx.Table.AddOrGet(v)
														
 
															+				buff.WriteInt(a) // write table index
														
 
															+			} else {
														
 
															+				buff.WriteString(v) // write string
														
 
															+			}
														
 
															+
														
 
															+			buff.WriteFloat64(z) // write float64
														
 
															+
														
 
															+		}
														
 
															+		// --- [end][write][map](map[string]float64) ---
														
 
															+
														
 
															+	}
														
 
															+	if target.ThrottleReasonRatios == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		// --- [begin][write][map](map[string]float64) ---
														
 
															+		buff.WriteInt(len(target.ThrottleReasonRatios)) // map length
														
 
															+		for vv, zz := range target.ThrottleReasonRatios {
														
 
															+			if ctx.IsStringTable() {
														
 
															+				b := ctx.Table.AddOrGet(vv)
														
 
															+				buff.WriteInt(b) // write table index
														
 
															+			} else {
														
 
															+				buff.WriteString(vv) // write string
														
 
															+			}
														
 
															+
														
 
															+			buff.WriteFloat64(zz) // write float64
														
 
															+
														
 
															+		}
														
 
															+		// --- [end][write][map](map[string]float64) ---
														
 
															+
														
 
															+	}
														
 
															+	if target.MemoryUsedRatioAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.MemoryUsedRatioAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.MemoryUsedRatioMax == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.MemoryUsedRatioMax) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.MemoryPressureRatio == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.MemoryPressureRatio) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.XIDErrorCount == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.XIDErrorCount) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.DRAMActiveAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.DRAMActiveAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.DRAMActiveMax == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.DRAMActiveMax) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.SMActiveAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.SMActiveAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.SMOccupancyAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.SMOccupancyAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.PCIeTxBytesAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.PCIeTxBytesAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.PCIeRxBytesAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.PCIeRxBytesAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.NVLinkTxBytesAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.NVLinkTxBytesAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+	if target.NVLinkRxBytesAvg == nil {
														
 
															+		buff.WriteUInt8(uint8(0)) // write nil byte
														
 
															+	} else {
														
 
															+		buff.WriteUInt8(uint8(1)) // write non-nil byte
														
 
															+
														
 
															+		buff.WriteFloat64(*target.NVLinkRxBytesAvg) // write float64
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+// UnmarshalBinary uses the data passed byte array to set all the internal properties of
														
 
															+// the DCGMDeviceSaturation type
														
 
															+func (target *DCGMDeviceSaturation) UnmarshalBinary(data []byte) error {
														
 
															+	ctx := NewDecodingContextFromBytes(data)
														
 
															+	defer ctx.Close()
														
 
															+
														
 
															+	err := target.UnmarshalBinaryWithContext(ctx)
														
 
															+	if err != nil {
														
 
															+		return err
														
 
															+	}
														
 
															+
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+// UnmarshalBinaryFromReader uses the io.Reader data to set all the internal properties of
														
 
															+// the DCGMDeviceSaturation type
														
 
															+func (target *DCGMDeviceSaturation) UnmarshalBinaryFromReader(reader io.Reader) error {
														
 
															+	ctx := NewDecodingContextFromReader(reader)
														
 
															+	defer ctx.Close()
														
 
															+
														
 
															+	err := target.UnmarshalBinaryWithContext(ctx)
														
 
															+	if err != nil {
														
 
															+		return err
														
 
															+	}
														
 
															+
														
 
															+	return nil
														
 
															+}
														
 
															+
														
 
															+// UnmarshalBinaryWithContext uses the context containing a string table and binary buffer to set all the internal properties of
														
 
															+// the DCGMDeviceSaturation type
														
 
															+func (target *DCGMDeviceSaturation) UnmarshalBinaryWithContext(ctx *DecodingContext) (err error) {
														
 
															+	// panics are recovered and propagated as errors
														
 
															+	defer func() {
														
 
															+		if r := recover(); r != nil {
														
 
															+			if e, ok := r.(error); ok {
														
 
															+				err = e
														
 
															+			} else if s, ok := r.(string); ok {
														
 
															+				err = fmt.Errorf("unexpected panic: %s", s)
														
 
															+			} else {
														
 
															+				err = fmt.Errorf("unexpected panic: %+v", r)
														
 
															+			}
														
 
															+		}
														
 
															+	}()
														
 
															+
														
 
															+	buff := ctx.Buffer
														
 
															+	version := buff.ReadUInt8()
														
 
															+
														
 
															+	if version > DefaultCodecVersion {
														
 
															+		return fmt.Errorf("Invalid Version Unmarshalling DCGMDeviceSaturation. Expected %d or less, got %d", DefaultCodecVersion, version)
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.ThrottleViolationRatios = nil
														
 
															+	} else {
														
 
															+		// --- [begin][read][map](map[string]float64) ---
														
 
															+		b := buff.ReadInt() // map len
														
 
															+		a := make(map[string]float64, b)
														
 
															+		for range b {
														
 
															+			var v string
														
 
															+			var d string
														
 
															+			if ctx.IsStringTable() {
														
 
															+				e := buff.ReadInt() // read string index
														
 
															+				d = ctx.Table.At(e)
														
 
															+			} else {
														
 
															+				d = buff.ReadString() // read string
														
 
															+			}
														
 
															+			c := d
														
 
															+			v = c
														
 
															+
														
 
															+			var z float64
														
 
															+			f := buff.ReadFloat64() // read float64
														
 
															+			z = f
														
 
															+
														
 
															+			a[v] = z
														
 
															+		}
														
 
															+		target.ThrottleViolationRatios = a
														
 
															+		// --- [end][read][map](map[string]float64) ---
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.ThrottleReasonRatios = nil
														
 
															+	} else {
														
 
															+		// --- [begin][read][map](map[string]float64) ---
														
 
															+		h := buff.ReadInt() // map len
														
 
															+		g := make(map[string]float64, h)
														
 
															+		for range h {
														
 
															+			var vv string
														
 
															+			var m string
														
 
															+			if ctx.IsStringTable() {
														
 
															+				n := buff.ReadInt() // read string index
														
 
															+				m = ctx.Table.At(n)
														
 
															+			} else {
														
 
															+				m = buff.ReadString() // read string
														
 
															+			}
														
 
															+			l := m
														
 
															+			vv = l
														
 
															+
														
 
															+			var zz float64
														
 
															+			o := buff.ReadFloat64() // read float64
														
 
															+			zz = o
														
 
															+
														
 
															+			g[vv] = zz
														
 
															+		}
														
 
															+		target.ThrottleReasonRatios = g
														
 
															+		// --- [end][read][map](map[string]float64) ---
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.MemoryUsedRatioAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		p := buff.ReadFloat64() // read float64
														
 
															+		target.MemoryUsedRatioAvg = &p
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.MemoryUsedRatioMax = nil
														
 
															+	} else {
														
 
															+
														
 
															+		q := buff.ReadFloat64() // read float64
														
 
															+		target.MemoryUsedRatioMax = &q
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.MemoryPressureRatio = nil
														
 
															+	} else {
														
 
															+
														
 
															+		r := buff.ReadFloat64() // read float64
														
 
															+		target.MemoryPressureRatio = &r
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.XIDErrorCount = nil
														
 
															+	} else {
														
 
															+
														
 
															+		s := buff.ReadFloat64() // read float64
														
 
															+		target.XIDErrorCount = &s
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.DRAMActiveAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		t := buff.ReadFloat64() // read float64
														
 
															+		target.DRAMActiveAvg = &t
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.DRAMActiveMax = nil
														
 
															+	} else {
														
 
															+
														
 
															+		u := buff.ReadFloat64() // read float64
														
 
															+		target.DRAMActiveMax = &u
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.SMActiveAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		w := buff.ReadFloat64() // read float64
														
 
															+		target.SMActiveAvg = &w
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.SMOccupancyAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		x := buff.ReadFloat64() // read float64
														
 
															+		target.SMOccupancyAvg = &x
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.PCIeTxBytesAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		y := buff.ReadFloat64() // read float64
														
 
															+		target.PCIeTxBytesAvg = &y
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.PCIeRxBytesAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		aa := buff.ReadFloat64() // read float64
														
 
															+		target.PCIeRxBytesAvg = &aa
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.NVLinkTxBytesAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		bb := buff.ReadFloat64() // read float64
														
 
															+		target.NVLinkTxBytesAvg = &bb
														
 
															+
														
 
															+	}
														
 
															+
														
 
															+	if buff.ReadUInt8() == uint8(0) {
														
 
															+		target.NVLinkRxBytesAvg = nil
														
 
															+	} else {
														
 
															+
														
 
															+		cc := buff.ReadFloat64() // read float64
														
 
															+		target.NVLinkRxBytesAvg = &cc
														
 
															+
														
 
															+	}
														
 
															+
														
 
															 	return nil
														
 
															 }
														
@@ -4129,7 +4634,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 		if buff.ReadUInt8() == uint8(0) {
														
 
															 			target.Metadata = nil
														
 
															 		} else {
														
 
															-
														
 
															 			// --- [begin][read][struct](Metadata) ---
														
 
															 			a := new(Metadata)
														
 
															 			buff.ReadInt() // [compatibility, unused]
														
@@ -4147,6 +4651,7 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 	}
														
 
															 	// field version check
														
 
															 	if uint8(1) <= version {
														
 
															+
														
 
															 		// --- [begin][read][struct](Window) ---
														
 
															 		b := new(Window)
														
 
															 		buff.ReadInt() // [compatibility, unused]
														
@@ -4164,7 +4669,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 		if buff.ReadUInt8() == uint8(0) {
														
 
															 			target.Cluster = nil
														
 
															 		} else {
														
 
															-
														
 
															 			// --- [begin][read][struct](Cluster) ---
														
 
															 			c := new(Cluster)
														
 
															 			buff.ReadInt() // [compatibility, unused]
														
@@ -4204,7 +4708,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					z = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](Namespace) ---
														
 
															 					l := new(Namespace)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4295,7 +4798,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](Service) ---
														
 
															 					y := new(Service)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4386,7 +4888,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](StatefulSet) ---
														
 
															 					oo := new(StatefulSet)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4432,7 +4933,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](DaemonSet) ---
														
 
															 					uu := new(DaemonSet)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4478,7 +4978,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](Job) ---
														
 
															 					ccc := new(Job)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4524,7 +5023,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](CronJob) ---
														
 
															 					lll := new(CronJob)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4616,7 +5114,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzzzzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](Node) ---
														
 
															 					yyy := new(Node)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4707,7 +5204,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzzzzzzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](PersistentVolumeClaim) ---
														
 
															 					oooo := new(PersistentVolumeClaim)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4844,7 +5340,6 @@ func (target *KubeModelSet) UnmarshalBinaryWithContext(ctx *DecodingContext) (er
 
															 				if buff.ReadUInt8() == uint8(0) {
														
 
															 					zzzzzzzzzzzzzzz = nil
														
 
															 				} else {
														
 
															-
														
 
															 					// --- [begin][read][struct](DCGMDevice) ---
														
 
															 					lllll := new(DCGMDevice)
														
 
															 					buff.ReadInt() // [compatibility, unused]
														
@@ -4923,7 +5418,7 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 		}
														
 
															 		fi = BingenFieldInfo{
														
 
															-			Type: reflect.TypeFor[Metadata](),
														
 
															+			Type: reflect.TypeFor[*Metadata](),
														
 
															 			Name: "Metadata",
														
 
															 		}
														
 
															 		// field version check
														
@@ -4986,7 +5481,7 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 		}
														
 
															 		fi = BingenFieldInfo{
														
 
															-			Type: reflect.TypeFor[Cluster](),
														
 
															+			Type: reflect.TypeFor[*Cluster](),
														
 
															 			Name: "Cluster",
														
 
															 		}
														
 
															 		// field version check
														
@@ -5052,7 +5547,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						z = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](Namespace) ---
														
 
															 						n := new(Namespace)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5171,7 +5665,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](Service) ---
														
 
															 						y := new(Service)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5290,7 +5783,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](StatefulSet) ---
														
 
															 						mm := new(StatefulSet)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5350,7 +5842,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](DaemonSet) ---
														
 
															 						rr := new(DaemonSet)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5410,7 +5901,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](Job) ---
														
 
															 						xx := new(Job)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5470,7 +5960,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](CronJob) ---
														
 
															 						ddd := new(CronJob)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5590,7 +6079,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzzzzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](Node) ---
														
 
															 						qqq := new(Node)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5709,7 +6197,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzzzzzzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](PersistentVolumeClaim) ---
														
 
															 						cccc := new(PersistentVolumeClaim)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -5888,7 +6375,6 @@ func (stream *KubeModelSetStream) Stream() iter.Seq2[BingenFieldInfo, *BingenVal
 
															 					if buff.ReadUInt8() == uint8(0) {
														
 
															 						zzzzzzzzzzzzzzz = nil
														
 
															 					} else {
														
 
															-
														
 
															 						// --- [begin][read][struct](DCGMDevice) ---
														
 
															 						uuuu := new(DCGMDevice)
														
 
															 						buff.ReadInt() // [compatibility, unused]
														
@@ -9474,7 +9960,6 @@ func (target *ResourceQuotaSpecHard) UnmarshalBinaryWithContext(ctx *DecodingCon
 
															 	// field version check
														
 
															 	if uint8(1) <= version {
														
 
															-
														
 
															 		// --- [begin][read][alias](ResourceQuantities) ---
														
 
															 		var a map[Resource]ResourceQuantity
														
 
															 		if buff.ReadUInt8() == uint8(0) {
														
@@ -9523,7 +10008,6 @@ func (target *ResourceQuotaSpecHard) UnmarshalBinaryWithContext(ctx *DecodingCon
 
															 	}
														
 
															 	// field version check
														
 
															 	if uint8(1) <= version {
														
 
															-
														
 
															 		// --- [begin][read][alias](ResourceQuantities) ---
														
 
															 		var l map[Resource]ResourceQuantity
														
 
															 		if buff.ReadUInt8() == uint8(0) {
														
@@ -9870,7 +10354,6 @@ func (target *ResourceQuotaStatusUsed) UnmarshalBinaryWithContext(ctx *DecodingC
 
															 	// field version check
														
 
															 	if uint8(1) <= version {
														
 
															-
														
 
															 		// --- [begin][read][alias](ResourceQuantities) ---
														
 
															 		var a map[Resource]ResourceQuantity
														
 
															 		if buff.ReadUInt8() == uint8(0) {
														
@@ -9919,7 +10402,6 @@ func (target *ResourceQuotaStatusUsed) UnmarshalBinaryWithContext(ctx *DecodingC
 
															 	}
														
 
															 	// field version check
														
 
															 	if uint8(1) <= version {
														
 
															-
														
 
															 		// --- [begin][read][alias](ResourceQuantities) ---
														
 
															 		var l map[Resource]ResourceQuantity
														
 
															 		if buff.ReadUInt8() == uint8(0) {
														
--- a/docs/gpu-saturation.md
+++ b/docs/gpu-saturation.md
@@ -0,0 +1,164 @@
 
															+# GPU Saturation Signals
														
 
															+
														
 
															+OpenCost derives GPU **saturation** signals from
														
 
															+[dcgm-exporter](https://github.com/NVIDIA/dcgm-exporter) metrics, following
														
 
															+the [USE method](https://www.brendangregg.com/usemethod.html):
														
 
															+
														
 
															+- **Utilization** — how busy the GPU was (already exposed as
														
 
															+  `gpuAllocation.gpuUsageAverage` from `DCGM_FI_PROF_GR_ENGINE_ACTIVE`).
														
 
															+- **Saturation** — work that was queued, rejected, or slowed because the GPU
														
 
															+  could not service demand. That is what the signals below report.
														
 
															+
														
 
															+Every signal is an independent primitive. OpenCost deliberately does **not**
														
 
															+compute a composite saturation score; consumers combine the primitives as
														
 
															+they see fit.
														
 
															+
														
 
															+## Absence semantics
														
 
															+
														
 
															+A missing field always means *the source metric was unavailable* — no
														
 
															+dcgm-exporter in the cluster, the DCGM field is not in the exporter's
														
 
															+configuration, or the GPU lacks DCP profiling support. OpenCost never emits
														
 
															+a zero in place of missing data, so `0` can be trusted to mean "observed,
														
 
															+and not saturated".
														
 
															+
														
 
															+## Allocation API
														
 
															+
														
 
															+Saturation appears on the Allocation API under
														
 
															+`gpuAllocation.saturation`, per container:
														
 
															+
														
 
															+```json
														
 
															+{
														
 
															+  "gpuAllocation": {
														
 
															+    "gpuDevice": "nvidia0",
														
 
															+    "gpuModel": "Tesla T4",
														
 
															+    "gpuUUID": "GPU-...",
														
 
															+    "saturation": {
														
 
															+      "throttleViolationRatios": { "power": 0.12, "thermal": 0.01 },
														
 
															+      "throttleReasonRatios": { "sw_power_cap": 0.15 },
														
 
															+      "memoryUsedRatioAvg": 0.81,
														
 
															+      "memoryUsedRatioMax": 0.97,
														
 
															+      "memoryPressureRatio": 0.25,
														
 
															+      "xidErrorCount": 0,
														
 
															+      "dramActiveAvg": 0.62,
														
 
															+      "smActiveAvg": 0.55,
														
 
															+      "smOccupancyAvg": 0.31,
														
 
															+      "pcieTxBytesAvg": 1.2e9,
														
 
															+      "pcieRxBytesAvg": 2.0e9
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+Controlled by `GPU_SATURATION_METRICS_ENABLED` (default `true`).
														
 
															+
														
 
															+## Signal reference
														
 
															+
														
 
															+Ratios are fractions of the queried window in `[0, 1]` unless noted.
														
 
															+
														
 
															+| Field | DCGM source | In default dcgm-exporter config? | Needs DCP? | Meaning |
														
 
															+|---|---|---|---|---|
														
 
															+| `throttleViolationRatios` (`power`, `thermal`, `sync_boost`, `board_limit`) | `DCGM_FI_DEV_POWER_VIOLATION`, `DCGM_FI_DEV_THERMAL_VIOLATION`, `DCGM_FI_DEV_SYNC_BOOST_VIOLATION`, `DCGM_FI_DEV_BOARD_LIMIT_VIOLATION` | Yes | No | Fraction of the window the GPU spent throttled for the reason, from cumulative microsecond violation counters. The strongest direct saturation signal available by default. |
														
 
															+| `throttleReasonRatios` (`sw_power_cap`, `hw_slowdown`, `sync_boost`, `sw_thermal`, `hw_thermal`, `hw_power_brake`) | `DCGM_FI_DEV_CLOCK_THROTTLE_REASONS` (renamed `DCGM_FI_DEV_CLOCKS_EVENT_REASONS` in DCGM 3.3+; OpenCost queries both) | **No — must be enabled** | No | Fraction of samples in which each saturation-relevant bit of the NVML throttle-reasons bitmask was set. Richer reason breakdown than the violation counters (hardware slowdown, power brake). Idle/configured-clock bits are excluded by design. Reported for the whole physical GPU even under MIG or time-slicing. |
														
 
															+| `memoryUsedRatioAvg`, `memoryUsedRatioMax` | `DCGM_FI_DEV_FB_USED`, `DCGM_FI_DEV_FB_FREE` | Yes | No | Framebuffer occupancy `used / (used + free)`. Sustained values near 1.0 mean new allocations are likely to fail or force eviction. |
														
 
															+| `memoryPressureRatio` | same | Yes | No | Fraction of the window occupancy was at or above the configured threshold (`GPU_MEMORY_SATURATION_THRESHOLD`, default `0.9`). |
														
 
															+| `xidErrorCount` | `DCGM_FI_DEV_XID_ERRORS` | Yes | No | XID error events observed in the window, a rejected-work signal. The DCGM field reports the *last* XID code, so consecutive identical errors are undercounted. |
														
 
															+| `dramActiveAvg`, `dramActiveMax` | `DCGM_FI_PROF_DRAM_ACTIVE` | Yes | **Yes** | Ratio of cycles the device memory interface was active. Near-ceiling values with low `smOccupancyAvg` indicate a memory-bandwidth-bound workload. |
														
 
															+| `smActiveAvg` | `DCGM_FI_PROF_SM_ACTIVE` | **No — must be enabled** | **Yes** | Ratio of cycles at least one warp was resident on any SM. |
														
 
															+| `smOccupancyAvg` | `DCGM_FI_PROF_SM_OCCUPANCY` | **No — must be enabled** | **Yes** | Ratio of resident warps to the SM maximum. Together with `smActiveAvg` and `dramActiveAvg`, lets consumers distinguish compute-bound vs bandwidth-bound vs latency-bound saturation. |
														
 
															+| `pcieTxBytesAvg`, `pcieRxBytesAvg` | `DCGM_FI_PROF_PCIE_TX_BYTES`, `DCGM_FI_PROF_PCIE_RX_BYTES` | Yes | **Yes** | Average PCIe throughput in bytes/sec. DCGM does not export link capacity, so these are raw rates; deriving a capacity ratio is future work. |
														
 
															+| `nvlinkTxBytesAvg`, `nvlinkRxBytesAvg` | `DCGM_FI_PROF_NVLINK_TX_BYTES`, `DCGM_FI_PROF_NVLINK_RX_BYTES` | **No — must be enabled** | **Yes** | Average NVLink throughput in bytes/sec; same capacity caveat as PCIe. |
														
 
															+
														
 
															+"Needs DCP" means the field comes from DCGM's profiling module (DCP), which
														
 
															+requires Volta or newer GPUs and is unavailable in some virtualized
														
 
															+environments. "Must be enabled" means the field exists in the dcgm-exporter
														
 
															+default configuration file but is commented out (or absent), and must be
														
 
															+uncommented/added for the signal to appear.
														
 
															+
														
 
															+### Interpretation guidance (USE)
														
 
															+
														
 
															+- High utilization with **zero** throttle/pressure ratios: the GPU is busy
														
 
															+  but keeping up — buying a faster GPU may not help.
														
 
															+- `throttleViolationRatios.power` or `sw_power_cap` sustained above zero:
														
 
															+  the GPU is power-limited; demand exceeds what the power envelope can
														
 
															+  service.
														
 
															+- `memoryUsedRatioMax` near 1.0 or `memoryPressureRatio` > 0 alongside
														
 
															+  `xidErrorCount` > 0: work is likely being rejected (OOM-style failures).
														
 
															+- `dramActiveAvg` near ceiling with low `smOccupancyAvg`: bandwidth-bound;
														
 
															+  with high `smOccupancyAvg`: genuinely compute-saturated.
														
 
															+
														
 
															+### Attribution caveat
														
 
															+
														
 
															+Device-level signals (throttling, framebuffer, XID) are attributed to
														
 
															+containers via the pod labels dcgm-exporter attaches. For exclusively
														
 
															+assigned GPUs this is exact. For time-sliced or MPS-shared GPUs, every
														
 
															+sharing container sees the same device-level saturation; the signal tells
														
 
															+you the *device* was saturated, not which tenant caused it. MIG slices are
														
 
															+reported as distinct devices (dcgm-exporter `GPU_I_ID` / `GPU_I_PROFILE`
														
 
															+labels), except the throttle-reasons bitmask, which is physical-GPU-scoped.
														
 
															+
														
 
															+## Scheduler-level saturation metrics
														
 
															+
														
 
															+DCGM cannot see work that never reached a GPU. OpenCost additionally emits
														
 
															+cluster-scoped gauges on `/metrics` from the Kubernetes scheduler's view,
														
 
															+one series per observed GPU resource name (`nvidia.com/gpu`,
														
 
															+`nvidia.com/gpu.shared`, `nvidia.com/mig-*`):
														
 
															+
														
 
															+| Metric | Meaning |
														
 
															+|---|---|
														
 
															+| `cluster_gpu_pending_pod_count{resource}` | Pods in `Pending` phase requesting the resource. |
														
 
															+| `cluster_gpu_pending_request_total{resource}` | GPU units requested by those pending pods. |
														
 
															+| `cluster_gpu_requested_allocatable_ratio{resource}` | Units requested by all non-terminated pods divided by allocatable units. Values near or above 1 indicate scheduler-level saturation, including exhaustion of time-sliced/MPS replicas. Only emitted when allocatable capacity exists. |
														
 
															+
														
 
															+These can be disabled individually through the standard metrics
														
 
															+configuration (the same mechanism as `node_gpu_count` et al.).
														
 
															+
														
 
															+## Configuration
														
 
															+
														
 
															+| Variable | Default | Description |
														
 
															+|---|---|---|
														
 
															+| `GPU_SATURATION_METRICS_ENABLED` | `true` | Query and apply GPU saturation signals in the Allocation pipeline. |
														
 
															+| `GPU_MEMORY_SATURATION_THRESHOLD` | `0.9` | Framebuffer occupancy ratio above which the GPU counts as memory-pressured. Values outside `(0, 1]` fall back to the default. |
														
 
															+
														
 
															+## Data source differences
														
 
															+
														
 
															+- **Prometheus source**: the throttle-bitmask and memory-pressure signals
														
 
															+  use PromQL subqueries at the configured query resolution.
														
 
															+- **Collector source**: framebuffer occupancy is joined from FB_USED and
														
 
															+  FB_FREE per scrape by a metric synthesizer, producing a per-sample ratio
														
 
															+  that the occupancy and pressure aggregations consume. All signals are
														
 
															+  supported.
														
 
															+
														
 
															+## Allocation half: DRA
														
 
															+
														
 
															+Telemetry reports what devices *did*; Dynamic Resource Allocation
														
 
															+(`resource.k8s.io/v1`, k8s 1.34+) reports what was *requested, allocated,
														
 
															+and reserved*. The kubemodel carries both:
														
 
															+
														
 
															+- `ResourceSlices` — driver-advertised device capacity per node/pool,
														
 
															+  including driver-published attributes and capacity quantities.
														
 
															+- `ResourceClaims` — device requests (class, count), scheduler allocations
														
 
															+  (driver/pool/device), and the pods that reserved them
														
 
															+  (`reservedForPodUids`). A reserved-but-idle device appears here even
														
 
															+  though it never shows in DCGM usage.
														
 
															+- Hydration joins the halves: each allocated device's UUID is resolved from
														
 
															+  its slice attributes (`uuid`, or driver-qualified `*/uuid`), matching
														
 
															+  `DCGMDevice.UUID` so claims link to telemetry directly.
														
 
															+
														
 
															+Claims and slices are cluster state, not time series: the model carries the
														
 
															+state observed at hydration time. Clusters without the DRA API (or without
														
 
															+RBAC) hydrate nothing — absence, not zeros.
														
 
															+
														
 
															+**RBAC**: the OpenCost service account needs `list`/`watch` on
														
 
															+`resourceclaims` and `resourceslices` in the `resource.k8s.io` API group
														
 
															+(helm chart follow-up).
														
 
															+
														
 
															+## Future work
														
 
															+
														
 
															+- Non-NVIDIA GPUs (AMD ROCm SMI exporter, Intel XPU manager) — the signal
														
 
															+  taxonomy is vendor-neutral, the queries are not.
														
 
															+- PCIe/NVLink capacity ratios, once link capacity can be derived per model.
														
 
															+- Device-level saturation in the kubemodel pipeline is modeled on the
														
 
															+  vendor-specific device type (`DCGMDevice.Saturation`) behind the
														
 
															+  vendor-neutral `DeviceInfo`/`DevicePerformance`/`DeviceSaturation`
														
 
															+  interfaces, but not yet populated; it will be wired into the DCGM
														
 
															+  hydration path alongside the existing device usage collection.
														
--- a/pkg/env/costmodel.go
+++ b/pkg/env/costmodel.go
@@ -93,6 +93,8 @@ const (
 
															 	CarbonEstimatesEnabledEnvVar = "CARBON_ESTIMATES_ENABLED"
														
 
															+	GPUSaturationMetricsEnabledEnvVar = "GPU_SATURATION_METRICS_ENABLED"
														
 
															+
														
 
															 	KubernetesResourceAccessEnvVar = "KUBERNETES_RESOURCE_ACCESS"
														
 
															 	UseCacheV1                     = "USE_CACHE_V1"
														
@@ -384,6 +386,14 @@ func IsCarbonEstimatesEnabled() bool {
 
															 	return env.GetBool(CarbonEstimatesEnabledEnvVar, false)
														
 
															 }
														
 
															+// IsGPUSaturationMetricsEnabled controls whether the kubemodel hydration
														
 
															+// queries and applies GPU saturation signals (USE method) derived from DCGM
														
 
															+// exporter metrics. Enabled by default; clusters without dcgm-exporter
														
 
															+// simply receive empty results.
														
 
															+func IsGPUSaturationMetricsEnabled() bool {
														
 
															+	return env.GetBool(GPUSaturationMetricsEnabledEnvVar, true)
														
 
															+}
														
 
															+
														
 
															 // HasKubernetesResourceAccess can be set to false if Opencost is run without access to the kubernetes resources
														
 
															 func HasKubernetesResourceAccess() bool { return env.GetBool(KubernetesResourceAccessEnvVar, true) }
														
--- a/pkg/kubemodel/dcgm_saturation.go
+++ b/pkg/kubemodel/dcgm_saturation.go
@@ -0,0 +1,199 @@
 
															+package kubemodel
														
 
															+
														
 
															+import (
														
 
															+	"time"
														
 
															+
														
 
															+	"github.com/opencost/opencost/core/pkg/model/kubemodel"
														
 
															+	"github.com/opencost/opencost/core/pkg/source"
														
 
															+)
														
 
															+
														
 
															+// DCGM device saturation hydration. The saturation queries are
														
 
															+// container-attributed series (dcgm-exporter duplicates each device-level
														
 
															+// value onto every pod sharing the device), so hydration reduces them to
														
 
															+// one value per device UUID. MIG instances surface as their own UUIDs when
														
 
															+// the exporter runs in MIG mode, so slices hydrate as distinct devices and
														
 
															+// PodUsages preserves the container-to-slice association.
														
 
															+
														
 
															+// dcgmSaturationFutures holds the in-flight saturation queries for one
														
 
															+// computeDCGMDevices pass, mirroring the bundle used by ComputeAllocation.
														
 
															+type dcgmSaturationFutures struct {
														
 
															+	throttleViolation *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	throttleReason    *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	memoryUsedAvg     *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	memoryUsedMax     *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	memoryPressure    *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	xidErrorCount     *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	dramActiveAvg     *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	dramActiveMax     *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	smActiveAvg       *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	smOccupancyAvg    *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	pcieTxBytesAvg    *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	pcieRxBytesAvg    *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	nvlinkTxBytesAvg  *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+	nvlinkRxBytesAvg  *source.QueryGroupFuture[source.GPUSaturationResult]
														
 
															+}
														
 
															+
														
 
															+func startDCGMSaturationQueries(grp *source.QueryGroup, metrics source.MetricsQuerier, start, end time.Time) *dcgmSaturationFutures {
														
 
															+	return &dcgmSaturationFutures{
														
 
															+		throttleViolation: source.WithGroup(grp, metrics.QueryGPUThrottleViolationRatio(start, end)),
														
 
															+		throttleReason:    source.WithGroup(grp, metrics.QueryGPUThrottleReasonRatio(start, end)),
														
 
															+		memoryUsedAvg:     source.WithGroup(grp, metrics.QueryGPUMemoryUsedRatioAvg(start, end)),
														
 
															+		memoryUsedMax:     source.WithGroup(grp, metrics.QueryGPUMemoryUsedRatioMax(start, end)),
														
 
															+		memoryPressure:    source.WithGroup(grp, metrics.QueryGPUMemoryPressureRatio(start, end)),
														
 
															+		xidErrorCount:     source.WithGroup(grp, metrics.QueryGPUXIDErrorCount(start, end)),
														
 
															+		dramActiveAvg:     source.WithGroup(grp, metrics.QueryGPUDRAMActiveAvg(start, end)),
														
 
															+		dramActiveMax:     source.WithGroup(grp, metrics.QueryGPUDRAMActiveMax(start, end)),
														
 
															+		smActiveAvg:       source.WithGroup(grp, metrics.QueryGPUSMActiveAvg(start, end)),
														
 
															+		smOccupancyAvg:    source.WithGroup(grp, metrics.QueryGPUSMOccupancyAvg(start, end)),
														
 
															+		pcieTxBytesAvg:    source.WithGroup(grp, metrics.QueryGPUPCIeTxBytesAvg(start, end)),
														
 
															+		pcieRxBytesAvg:    source.WithGroup(grp, metrics.QueryGPUPCIeRxBytesAvg(start, end)),
														
 
															+		nvlinkTxBytesAvg:  source.WithGroup(grp, metrics.QueryGPUNVLinkTxBytesAvg(start, end)),
														
 
															+		nvlinkRxBytesAvg:  source.WithGroup(grp, metrics.QueryGPUNVLinkRxBytesAvg(start, end)),
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// awaitAndApply awaits every saturation query and reduces the results onto
														
 
															+// the device map. Per-future errors are recorded in the query group by
														
 
															+// Await, matching how the rest of computeDCGMDevices treats its queries.
														
 
															+func (f *dcgmSaturationFutures) awaitAndApply(deviceMap map[string]*kubemodel.DCGMDevice) {
														
 
															+	if f == nil {
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	resThrottleViolation, _ := f.throttleViolation.Await()
														
 
															+	resThrottleReason, _ := f.throttleReason.Await()
														
 
															+	resMemoryUsedAvg, _ := f.memoryUsedAvg.Await()
														
 
															+	resMemoryUsedMax, _ := f.memoryUsedMax.Await()
														
 
															+	resMemoryPressure, _ := f.memoryPressure.Await()
														
 
															+	resXIDErrorCount, _ := f.xidErrorCount.Await()
														
 
															+	resDRAMActiveAvg, _ := f.dramActiveAvg.Await()
														
 
															+	resDRAMActiveMax, _ := f.dramActiveMax.Await()
														
 
															+	resSMActiveAvg, _ := f.smActiveAvg.Await()
														
 
															+	resSMOccupancyAvg, _ := f.smOccupancyAvg.Await()
														
 
															+	resPCIeTxBytesAvg, _ := f.pcieTxBytesAvg.Await()
														
 
															+	resPCIeRxBytesAvg, _ := f.pcieRxBytesAvg.Await()
														
 
															+	resNVLinkTxBytesAvg, _ := f.nvlinkTxBytesAvg.Await()
														
 
															+	resNVLinkRxBytesAvg, _ := f.nvlinkRxBytesAvg.Await()
														
 
															+
														
 
															+	applyDeviceThrottleRatios(deviceMap, resThrottleViolation, func(sat *kubemodel.DCGMDeviceSaturation) map[string]float64 {
														
 
															+		if sat.ThrottleViolationRatios == nil {
														
 
															+			sat.ThrottleViolationRatios = make(map[string]float64)
														
 
															+		}
														
 
															+		return sat.ThrottleViolationRatios
														
 
															+	})
														
 
															+	applyDeviceThrottleRatios(deviceMap, resThrottleReason, func(sat *kubemodel.DCGMDeviceSaturation) map[string]float64 {
														
 
															+		if sat.ThrottleReasonRatios == nil {
														
 
															+			sat.ThrottleReasonRatios = make(map[string]float64)
														
 
															+		}
														
 
															+		return sat.ThrottleReasonRatios
														
 
															+	})
														
 
															+	applyDeviceSaturationScalar(deviceMap, resMemoryUsedAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.MemoryUsedRatioAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resMemoryUsedMax, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.MemoryUsedRatioMax = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resMemoryPressure, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.MemoryPressureRatio = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resXIDErrorCount, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.XIDErrorCount = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resDRAMActiveAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.DRAMActiveAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resDRAMActiveMax, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.DRAMActiveMax = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resSMActiveAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.SMActiveAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resSMOccupancyAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.SMOccupancyAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resPCIeTxBytesAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.PCIeTxBytesAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resPCIeRxBytesAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.PCIeRxBytesAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resNVLinkTxBytesAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.NVLinkTxBytesAvg = &v })
														
 
															+	applyDeviceSaturationScalar(deviceMap, resNVLinkRxBytesAvg, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.NVLinkRxBytesAvg = &v })
														
 
															+}
														
 
															+
														
 
															+// ensureDeviceSaturation lazily creates the saturation struct, so devices
														
 
															+// with no signals keep Saturation nil (absence is never zero).
														
 
															+func ensureDeviceSaturation(device *kubemodel.DCGMDevice) *kubemodel.DCGMDeviceSaturation {
														
 
															+	if device.Saturation == nil {
														
 
															+		device.Saturation = &kubemodel.DCGMDeviceSaturation{}
														
 
															+	}
														
 
															+	return device.Saturation
														
 
															+}
														
 
															+
														
 
															+// applyDeviceSaturationScalar reduces container-attributed results to one
														
 
															+// value per device UUID. Sharing containers carry duplicates of the same
														
 
															+// device-level value, so last-write-wins is exact; results for UUIDs the
														
 
															+// info query did not report are skipped.
														
 
															+func applyDeviceSaturationScalar(deviceMap map[string]*kubemodel.DCGMDevice, results []*source.GPUSaturationResult, set func(sat *kubemodel.DCGMDeviceSaturation, value float64)) {
														
 
															+	for _, res := range results {
														
 
															+		device, ok := deviceMap[res.UUID]
														
 
															+		if !ok || len(res.Data) == 0 {
														
 
															+			continue
														
 
															+		}
														
 
															+		set(ensureDeviceSaturation(device), res.Data[0].Value)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// applyDeviceThrottleRatios reduces reason-labeled throttle results onto the
														
 
															+// device's reason map.
														
 
															+func applyDeviceThrottleRatios(deviceMap map[string]*kubemodel.DCGMDevice, results []*source.GPUSaturationResult, ratios func(sat *kubemodel.DCGMDeviceSaturation) map[string]float64) {
														
 
															+	for _, res := range results {
														
 
															+		device, ok := deviceMap[res.UUID]
														
 
															+		if !ok || len(res.Data) == 0 || res.Reason == "" {
														
 
															+			continue
														
 
															+		}
														
 
															+		ratios(ensureDeviceSaturation(device))[res.Reason] = res.Data[0].Value
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// dcgmDeviceMetricFutures holds the device-level metric queries backing
														
 
															+// DeviceInfo / DevicePerformance: power, temperature, device-level compute
														
 
															+// utilization, and framebuffer used. Unlike saturation these are not
														
 
															+// feature-gated: they are core device telemetry from the default
														
 
															+// dcgm-exporter configuration.
														
 
															+type dcgmDeviceMetricFutures struct {
														
 
															+	powerAvg      *source.QueryGroupFuture[source.GPUDeviceMetricResult]
														
 
															+	tempAvg       *source.QueryGroupFuture[source.GPUDeviceMetricResult]
														
 
															+	usageAvg      *source.QueryGroupFuture[source.GPUDeviceMetricResult]
														
 
															+	usageMax      *source.QueryGroupFuture[source.GPUDeviceMetricResult]
														
 
															+	memoryUsedAvg *source.QueryGroupFuture[source.GPUDeviceMetricResult]
														
 
															+	memoryUsedMax *source.QueryGroupFuture[source.GPUDeviceMetricResult]
														
 
															+}
														
 
															+
														
 
															+func startDCGMDeviceMetricQueries(grp *source.QueryGroup, metrics source.MetricsQuerier, start, end time.Time) *dcgmDeviceMetricFutures {
														
 
															+	return &dcgmDeviceMetricFutures{
														
 
															+		powerAvg:      source.WithGroup(grp, metrics.QueryGPUDevicePowerAvg(start, end)),
														
 
															+		tempAvg:       source.WithGroup(grp, metrics.QueryGPUDeviceTempAvg(start, end)),
														
 
															+		usageAvg:      source.WithGroup(grp, metrics.QueryGPUDeviceUsageAvg(start, end)),
														
 
															+		usageMax:      source.WithGroup(grp, metrics.QueryGPUDeviceUsageMax(start, end)),
														
 
															+		memoryUsedAvg: source.WithGroup(grp, metrics.QueryGPUDeviceMemoryUsedAvg(start, end)),
														
 
															+		memoryUsedMax: source.WithGroup(grp, metrics.QueryGPUDeviceMemoryUsedMax(start, end)),
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+const fbMiB = 1024 * 1024
														
 
															+
														
 
															+// awaitAndApply reduces the device-level metrics onto the device map,
														
 
															+// scaling to the model's units: GR_ENGINE_ACTIVE ratio to percent (0-100),
														
 
															+// FB_USED MiB to bytes.
														
 
															+func (f *dcgmDeviceMetricFutures) awaitAndApply(deviceMap map[string]*kubemodel.DCGMDevice) {
														
 
															+	if f == nil {
														
 
															+		return
														
 
															+	}
														
 
															+
														
 
															+	resPower, _ := f.powerAvg.Await()
														
 
															+	resTemp, _ := f.tempAvg.Await()
														
 
															+	resUsageAvg, _ := f.usageAvg.Await()
														
 
															+	resUsageMax, _ := f.usageMax.Await()
														
 
															+	resMemAvg, _ := f.memoryUsedAvg.Await()
														
 
															+	resMemMax, _ := f.memoryUsedMax.Await()
														
 
															+
														
 
															+	applyDeviceMetric(deviceMap, resPower, func(d *kubemodel.DCGMDevice, v float64) { d.PowerWatts = v })
														
 
															+	applyDeviceMetric(deviceMap, resTemp, func(d *kubemodel.DCGMDevice, v float64) { d.TemperatureCelsius = v })
														
 
															+	applyDeviceMetric(deviceMap, resUsageAvg, func(d *kubemodel.DCGMDevice, v float64) { d.ComputeUtilizationAvg = v * 100 })
														
 
															+	applyDeviceMetric(deviceMap, resUsageMax, func(d *kubemodel.DCGMDevice, v float64) { d.ComputeUtilizationMax = v * 100 })
														
 
															+	applyDeviceMetric(deviceMap, resMemAvg, func(d *kubemodel.DCGMDevice, v float64) { d.MemoryUsedBytesAvg = v * fbMiB })
														
 
															+	applyDeviceMetric(deviceMap, resMemMax, func(d *kubemodel.DCGMDevice, v float64) { d.MemoryUsedBytesMax = v * fbMiB })
														
 
															+}
														
 
															+
														
 
															+// applyDeviceMetric reduces device-keyed results onto device fields;
														
 
															+// unknown UUIDs and empty results are skipped.
														
 
															+func applyDeviceMetric(deviceMap map[string]*kubemodel.DCGMDevice, results []*source.GPUDeviceMetricResult, set func(d *kubemodel.DCGMDevice, value float64)) {
														
 
															+	for _, res := range results {
														
 
															+		device, ok := deviceMap[res.UUID]
														
 
															+		if !ok || len(res.Data) == 0 {
														
 
															+			continue
														
 
															+		}
														
 
															+		set(device, res.Data[0].Value)
														
 
															+	}
														
 
															+}
														
--- a/pkg/kubemodel/dcgm_saturation_test.go
+++ b/pkg/kubemodel/dcgm_saturation_test.go
@@ -0,0 +1,222 @@
 
															+package kubemodel
														
 
															+
														
 
															+import (
														
 
															+	"testing"
														
 
															+
														
 
															+	"github.com/opencost/opencost/core/pkg/model/kubemodel"
														
 
															+	"github.com/opencost/opencost/core/pkg/source"
														
 
															+	"github.com/opencost/opencost/core/pkg/util"
														
 
															+)
														
 
															+
														
 
															+func saturationDeviceMap() map[string]*kubemodel.DCGMDevice {
														
 
															+	return map[string]*kubemodel.DCGMDevice{
														
 
															+		"GPU-1": {UUID: "GPU-1", Device: "nvidia0", ModelName: "Tesla T4"},
														
 
															+		"GPU-2": {UUID: "GPU-2", Device: "nvidia1", ModelName: "Tesla T4"},
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func deviceSaturationResult(uuid, container, reason string, value float64) *source.GPUSaturationResult {
														
 
															+	return &source.GPUSaturationResult{
														
 
															+		UUID:      uuid,
														
 
															+		Container: container,
														
 
															+		Pod:       "pod-" + container,
														
 
															+		Namespace: "ns",
														
 
															+		Reason:    reason,
														
 
															+		Data:      []*util.Vector{{Value: value}},
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func TestApplyDeviceSaturationScalar(t *testing.T) {
														
 
															+	deviceMap := saturationDeviceMap()
														
 
															+
														
 
															+	results := []*source.GPUSaturationResult{
														
 
															+		// dcgm-exporter duplicates the device-level value onto every
														
 
															+		// container sharing the device: both rows carry the same value
														
 
															+		deviceSaturationResult("GPU-1", "container-a", "", 0.85),
														
 
															+		deviceSaturationResult("GPU-1", "container-b", "", 0.85),
														
 
															+		// unknown device must be skipped
														
 
															+		deviceSaturationResult("GPU-9", "container-c", "", 0.5),
														
 
															+	}
														
 
															+	noData := deviceSaturationResult("GPU-2", "container-d", "", 0)
														
 
															+	noData.Data = nil
														
 
															+	results = append(results, noData)
														
 
															+
														
 
															+	applyDeviceSaturationScalar(deviceMap, results, func(sat *kubemodel.DCGMDeviceSaturation, v float64) { sat.MemoryUsedRatioAvg = &v })
														
 
															+
														
 
															+	sat := deviceMap["GPU-1"].Saturation
														
 
															+	if sat == nil || sat.MemoryUsedRatioAvg == nil || *sat.MemoryUsedRatioAvg != 0.85 {
														
 
															+		t.Errorf("GPU-1 saturation = %+v, want MemoryUsedRatioAvg 0.85", sat)
														
 
															+	}
														
 
															+	// device with only an empty-data result must keep Saturation nil
														
 
															+	if deviceMap["GPU-2"].Saturation != nil {
														
 
															+		t.Errorf("GPU-2 saturation should stay nil for empty results, got %+v", deviceMap["GPU-2"].Saturation)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+func TestApplyDeviceThrottleRatios(t *testing.T) {
														
 
															+	deviceMap := saturationDeviceMap()
														
 
															+
														
 
															+	applyDeviceThrottleRatios(deviceMap, []*source.GPUSaturationResult{
														
 
															+		deviceSaturationResult("GPU-1", "container-a", "power", 0.25),
														
 
															+		deviceSaturationResult("GPU-1", "container-a", "thermal", 0.1),
														
 
															+		// duplicate attribution from a sharing container, same value
														
 
															+		deviceSaturationResult("GPU-1", "container-b", "power", 0.25),
														
 
															+		// missing reason must be dropped without creating saturation
														
 
															+		deviceSaturationResult("GPU-2", "container-c", "", 0.5),
														
 
															+	}, func(sat *kubemodel.DCGMDeviceSaturation) map[string]float64 {
														
 
															+		if sat.ThrottleViolationRatios == nil {
														
 
															+			sat.ThrottleViolationRatios = make(map[string]float64)
														
 
															+		}
														
 
															+		return sat.ThrottleViolationRatios
														
 
															+	})
														
 
															+
														
 
															+	sat := deviceMap["GPU-1"].Saturation
														
 
															+	if sat == nil {
														
 
															+		t.Fatalf("expected saturation on GPU-1")
														
 
															+	}
														
 
															+	if len(sat.ThrottleViolationRatios) != 2 || sat.ThrottleViolationRatios["power"] != 0.25 || sat.ThrottleViolationRatios["thermal"] != 0.1 {
														
 
															+		t.Errorf("ThrottleViolationRatios = %v", sat.ThrottleViolationRatios)
														
 
															+	}
														
 
															+	if deviceMap["GPU-2"].Saturation != nil {
														
 
															+		t.Errorf("reasonless result must not create saturation, got %+v", deviceMap["GPU-2"].Saturation)
														
 
															+	}
														
 
															+}
														
 
															+
														
 
															+// TestDCGMSaturationAwaitAndApply runs the full reduction over a fabricated
														
 
															+// future bundle, verifying every signal lands on the right device and that
														
 
															+// devices without signals keep Saturation nil.
														
 
															+func TestDCGMSaturationAwaitAndApply(t *testing.T) {
														
 
															+	makeFuture := func(results ...*source.GPUSaturationResult) *source.QueryGroupFuture[source.GPUSaturationResult] {
														
 
															+		queryResults := source.NewQueryResults("test")
														
 
															+		for _, res := range results {
														
 
															+			metrics := map[string]any{
														
 
															+				"UUID":      res.UUID,
														
 
															+				"container": res.Container,
														
 
															+				"pod":       res.Pod,
														
 
															+				"namespace": res.Namespace,
														
 
															+			}
														
 
															+			if res.Reason != "" {
														
 
															+				metrics["reason"] = res.Reason
														
 
															+			}
														
 
															+			queryResults.Results = append(queryResults.Results, source.NewQueryResult(metrics, res.Data, nil))
														
 
															+		}
														
 
															+		ch := make(source.QueryResultsChan, 1)
														
 
															+		ch <- queryResults
														
 
															+		grp := source.NewQueryGroup()
														
 
															+		return source.WithGroup(grp, source.NewFuture(source.DecodeGPUSaturationResult, ch))
														
 
															+	}
														
 
															+
														
 
															+	deviceMap := saturationDeviceMap()
														
 
															+	futures := &dcgmSaturationFutures{
														
 
															+		throttleViolation: makeFuture(deviceSaturationResult("GPU-1", "c", "power", 0.25)),
														
 
															+		throttleReason:    makeFuture(deviceSaturationResult("GPU-1", "c", "sw_power_cap", 0.2)),
														
 
															+		memoryUsedAvg:     makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.85)),
														
 
															+		memoryUsedMax:     makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.99)),
														
 
															+		memoryPressure:    makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.4)),
														
 
															+		xidErrorCount:     makeFuture(deviceSaturationResult("GPU-1", "c", "", 2)),
														
 
															+		dramActiveAvg:     makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.7)),
														
 
															+		dramActiveMax:     makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.95)),
														
 
															+		smActiveAvg:       makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.6)),
														
 
															+		smOccupancyAvg:    makeFuture(deviceSaturationResult("GPU-1", "c", "", 0.5)),
														
 
															+		pcieTxBytesAvg:    makeFuture(deviceSaturationResult("GPU-1", "c", "", 1.5e9)),
														
 
															+		pcieRxBytesAvg:    makeFuture(deviceSaturationResult("GPU-1", "c", "", 2.5e9)),
														
 
															+		nvlinkTxBytesAvg:  makeFuture(),
														
 
															+		nvlinkRxBytesAvg:  makeFuture(),
														
 
															+	}
														
 
															+
														
 
															+	futures.awaitAndApply(deviceMap)
														
 
															+
														
 
															+	sat := deviceMap["GPU-1"].Saturation
														
 
															+	if sat == nil {
														
 
															+		t.Fatalf("expected saturation on GPU-1")
														
 
															+	}
														
 
															+	if err := sat.Validate(); err != nil {
														
 
															+		t.Fatalf("hydrated saturation invalid: %v", err)
														
 
															+	}
														
 
															+	if sat.ThrottleViolationRatios["power"] != 0.25 || sat.ThrottleReasonRatios["sw_power_cap"] != 0.2 {
														
 
															+		t.Errorf("throttle ratios = %v / %v", sat.ThrottleViolationRatios, sat.ThrottleReasonRatios)
														
 
															+	}
														
 
															+	scalarChecks := map[string]*float64{
														
 
															+		"MemoryUsedRatioAvg=0.85": sat.MemoryUsedRatioAvg,
														
 
															+		"MemoryUsedRatioMax=0.99": sat.MemoryUsedRatioMax,
														
 
															+		"MemoryPressureRatio=0.4": sat.MemoryPressureRatio,
														
 
															+		"XIDErrorCount=2":         sat.XIDErrorCount,
														
 
															+		"DRAMActiveAvg=0.7":       sat.DRAMActiveAvg,
														
 
															+		"DRAMActiveMax=0.95":      sat.DRAMActiveMax,
														
 
															+		"SMActiveAvg=0.6":         sat.SMActiveAvg,
														
 
															+		"SMOccupancyAvg=0.5":      sat.SMOccupancyAvg,
														
 
															+		"PCIeTxBytesAvg=1.5e9":    sat.PCIeTxBytesAvg,
														
 
															+		"PCIeRxBytesAvg=2.5e9":    sat.PCIeRxBytesAvg,
														
 
															+	}
														
 
															+	for name, ptr := range scalarChecks {
														
 
															+		if ptr == nil {
														
 
															+			t.Errorf("%s: signal missing", name)
														
 
															+		}
														
 
															+	}
														
 
															+	// NVLink queries returned nothing: absent, not zero
														
 
															+	if sat.NVLinkTxBytesAvg != nil || sat.NVLinkRxBytesAvg != nil {
														
 
															+		t.Errorf("expected NVLink signals to stay nil, got %v / %v", sat.NVLinkTxBytesAvg, sat.NVLinkRxBytesAvg)
														
 
															+	}
														
 
															+	// no signals targeted GPU-2: Saturation stays nil
														
 
															+	if deviceMap["GPU-2"].Saturation != nil {
														
 
															+		t.Errorf("GPU-2 saturation should be nil, got %+v", deviceMap["GPU-2"].Saturation)
														
 
															+	}
														
 
															+
														
 
															+	// nil bundle (feature disabled) must be a no-op
														
 
															+	var disabled *dcgmSaturationFutures
														
 
															+	disabled.awaitAndApply(deviceMap)
														
 
															+}
														
 
															+
														
 
															+// TestDCGMDeviceMetricAwaitAndApply verifies device-level metrics land with
														
 
															+// correct unit scaling and that devices without series keep zero values.
														
 
															+func TestDCGMDeviceMetricAwaitAndApply(t *testing.T) {
														
 
															+	makeFuture := func(results ...*source.GPUDeviceMetricResult) *source.QueryGroupFuture[source.GPUDeviceMetricResult] {
														
 
															+		queryResults := source.NewQueryResults("test")
														
 
															+		for _, res := range results {
														
 
															+			queryResults.Results = append(queryResults.Results, source.NewQueryResult(map[string]any{
														
 
															+				"UUID": res.UUID,
														
 
															+			}, res.Data, nil))
														
 
															+		}
														
 
															+		ch := make(source.QueryResultsChan, 1)
														
 
															+		ch <- queryResults
														
 
															+		grp := source.NewQueryGroup()
														
 
															+		return source.WithGroup(grp, source.NewFuture(source.DecodeGPUDeviceMetricResult, ch))
														
 
															+	}
														
 
															+	metricResult := func(uuid string, value float64) *source.GPUDeviceMetricResult {
														
 
															+		return &source.GPUDeviceMetricResult{UUID: uuid, Data: []*util.Vector{{Value: value}}}
														
 
															+	}
														
 
															+
														
 
															+	deviceMap := saturationDeviceMap()
														
 
															+	futures := &dcgmDeviceMetricFutures{
														
 
															+		powerAvg:      makeFuture(metricResult("GPU-1", 140)),
														
 
															+		tempAvg:       makeFuture(metricResult("GPU-1", 55)),
														
 
															+		usageAvg:      makeFuture(metricResult("GPU-1", 0.425)),
														
 
															+		usageMax:      makeFuture(metricResult("GPU-1", 0.97)),
														
 
															+		memoryUsedAvg: makeFuture(metricResult("GPU-1", 1024)), // MiB
														
 
															+		memoryUsedMax: makeFuture(metricResult("GPU-1", 2048)),
														
 
															+	}
														
 
															+
														
 
															+	futures.awaitAndApply(deviceMap)
														
 
															+
														
 
															+	d := deviceMap["GPU-1"]
														
 
															+	if d.PowerWatts != 140 || d.TemperatureCelsius != 55 {
														
 
															+		t.Errorf("power/temp = (%v, %v), want (140, 55)", d.PowerWatts, d.TemperatureCelsius)
														
 
															+	}
														
 
															+	// GR_ENGINE_ACTIVE ratio scaled to percent
														
 
															+	if d.ComputeUtilizationAvg != 42.5 || d.ComputeUtilizationMax != 97 {
														
 
															+		t.Errorf("compute util = (%v, %v), want (42.5, 97)", d.ComputeUtilizationAvg, d.ComputeUtilizationMax)
														
 
															+	}
														
 
															+	// FB_USED MiB scaled to bytes
														
 
															+	if d.MemoryUsedBytesAvg != 1024*1024*1024 || d.MemoryUsedBytesMax != 2048*1024*1024 {
														
 
															+		t.Errorf("memory bytes = (%v, %v)", d.MemoryUsedBytesAvg, d.MemoryUsedBytesMax)
														
 
															+	}
														
 
															+
														
 
															+	// no series targeted GPU-2: untouched zeros
														
 
															+	if deviceMap["GPU-2"].PowerWatts != 0 || deviceMap["GPU-2"].ComputeUtilizationAvg != 0 {
														
 
															+		t.Errorf("GPU-2 must stay zero: %+v", deviceMap["GPU-2"])
														
 
															+	}
														
 
															+
														
 
															+	// nil bundle is a no-op
														
 
															+	var disabled *dcgmDeviceMetricFutures
														
 
															+	disabled.awaitAndApply(deviceMap)
														
 
															+}
														
--- a/pkg/kubemodel/kubemodel.go
+++ b/pkg/kubemodel/kubemodel.go
@@ -9,6 +9,7 @@ import (
 
															 	"github.com/opencost/opencost/core/pkg/model/kubemodel"
														
 
															 	"github.com/opencost/opencost/core/pkg/model/shared"
														
 
															 	"github.com/opencost/opencost/core/pkg/source"
														
 
															+	"github.com/opencost/opencost/pkg/env"
														
 
															 )
														
 
															 const logTimeFmt string = "2006-01-02T15:04:05"
														
@@ -1517,6 +1518,13 @@ func (km *KubeModel) computeDCGMDevices(kms *kubemodel.KubeModelSet, start, end
 
															 	dcgmUsageAvgFuture := source.WithGroup(grp, metrics.QueryDCGMContainerUsageAvg(start, end))
														
 
															 	dcgmUsageMaxFuture := source.WithGroup(grp, metrics.QueryDCGMContainerUsageMax(start, end))
														
 
															+	var saturationFutures *dcgmSaturationFutures
														
 
															+	if env.IsGPUSaturationMetricsEnabled() {
														
 
															+		saturationFutures = startDCGMSaturationQueries(grp, metrics, start, end)
														
 
															+	}
														
 
															+
														
 
															+	deviceMetricFutures := startDCGMDeviceMetricQueries(grp, metrics, start, end)
														
 
															+
														
 
															 	deviceMap := make(map[string]*kubemodel.DCGMDevice)
														
 
															 	dcgmInfoResult, _ := dcgmInfoFuture.Await()
														
@@ -1579,6 +1587,13 @@ func (km *KubeModel) computeDCGMDevices(kms *kubemodel.KubeModelSet, start, end
 
															 		device.PodUsages[res.PodUID] = pod
														
 
															 	}
														
 
															+	// reduce container-attributed saturation series onto each device; nil
														
 
															+	// when the feature is disabled
														
 
															+	saturationFutures.awaitAndApply(deviceMap)
														
 
															+
														
 
															+	// device-level power/temperature/utilization/memory
														
 
															+	deviceMetricFutures.awaitAndApply(deviceMap)
														
 
															+
														
 
															 	for _, device := range deviceMap {
														
 
															 		if err := kms.RegisterDCGMDevice(device); err != nil {
														
 
															 			log.Warnf("Failed to register DCGM device: %s", err.Error())