2
0
Эх сурвалжийг харах

ENG-2914: Introduce GPUUsageMax, isShared and GPU device information (#2963)

* ENG-2914: Introduce GPUUsageMax, isShared and GPU device information

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

* ENG-2914: Deprecate GPUUsageAverage and GPURequestAverage (float fields)

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

* ENG-2914: Fix bug found in code review

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

* ENG-2914: Code review comments, use new fields to calculate GPUEfficiency, do not show deprecated fields in json responses

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

* ENG-2914: Code review comments, remove deprecated fields from allocation_json.go

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

* ENG-2914: Found a bug while testing

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

* ENG-2914: Fix UTs

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>

---------

Signed-off-by: Pranav Bhat <pbhat@kubecost.com>
Co-authored-by: Cliff Colvin <ccolvin@kubecost.com>
kubecost-pb 1 жил өмнө
parent
commit
056962e773

+ 173 - 28
core/pkg/opencost/allocation.go

@@ -100,9 +100,64 @@ type Allocation struct {
 	// UnmountedPVCost is used to track how much of the cost in PVs is for an
 	// unmounted PV. It is not additive of PVCost() and need not be sent in API
 	// responses.
-	UnmountedPVCost   float64 `json:"-"`                 //@bingen:field[ignore]
-	GPURequestAverage float64 `json:"gpuRequestAverage"` //@bingen:field[version=22]
-	GPUUsageAverage   float64 `json:"gpuUsageAverage"`   //@bingen:field[version=22]
+	UnmountedPVCost             float64        `json:"-"`             //@bingen:field[ignore]
+	deprecatedGPURequestAverage float64        `json:"-"`             //@bingen:field[version=22]
+	deprecatedGPUUsageAverage   float64        `json:"-"`             //@bingen:field[version=22]
+	GPUAllocation               *GPUAllocation `json:"GPUAllocation"` //@bingen:field[version=23]
+}
+
+type GPUAllocation struct {
+	GPUDevice string `json:"gpuDevice,omitempty"`
+	GPUModel  string `json:"gpuModel,omitempty"`
+	GPUUUID   string `json:"gpuUUID,omitempty"`
+
+	IsGPUShared       *bool    `json:"isGPUShared"`
+	GPUUsageAverage   *float64 `json:"gpuUsageAverage"`
+	GPURequestAverage *float64 `json:"gpuRequestAverage"`
+}
+
+func (orig *GPUAllocation) SanitizeNaN() {
+	if orig == nil {
+		return
+	}
+	if orig.GPURequestAverage == nil || math.IsNaN(*orig.GPURequestAverage) {
+		orig.GPURequestAverage = nil
+	}
+	if orig.GPUUsageAverage == nil || math.IsNaN(*orig.GPUUsageAverage) {
+		orig.GPUUsageAverage = nil
+	}
+}
+
+func (orig *GPUAllocation) Clone() *GPUAllocation {
+	if orig == nil {
+		return nil
+	}
+
+	return &GPUAllocation{
+		GPUDevice:         orig.GPUDevice,
+		GPUModel:          orig.GPUModel,
+		GPUUUID:           orig.GPUUUID,
+		IsGPUShared:       orig.IsGPUShared,
+		GPUUsageAverage:   orig.GPUUsageAverage,
+		GPURequestAverage: orig.GPURequestAverage,
+	}
+}
+
+func (orig *GPUAllocation) Equal(that *GPUAllocation) bool {
+	if orig == nil && that == nil {
+		return true
+	}
+	if orig == nil || that == nil {
+		return false
+	}
+
+	return orig.GPUDevice == that.GPUDevice &&
+		orig.GPUModel == that.GPUModel &&
+		orig.GPUUUID == that.GPUUUID &&
+		orig.IsGPUShared == that.IsGPUShared &&
+		orig.GPUUsageAverage == that.GPUUsageAverage &&
+		orig.GPURequestAverage == that.GPURequestAverage
+
 }
 
 type LbAllocations map[string]*LbAllocation
@@ -174,8 +229,9 @@ func (lba *LbAllocation) SanitizeNaN() {
 // then this type would be unnecessary and its fields would go into the regular Allocation
 // and not in the AggregatedAllocation.
 type RawAllocationOnlyData struct {
-	CPUCoreUsageMax  float64 `json:"cpuCoreUsageMax"`
-	RAMBytesUsageMax float64 `json:"ramByteUsageMax"`
+	CPUCoreUsageMax  float64  `json:"cpuCoreUsageMax"`
+	RAMBytesUsageMax float64  `json:"ramByteUsageMax"`
+	GPUUsageMax      *float64 `json:"gpuUsageMax"` //@bingen:field[version=23]
 }
 
 // Clone returns a deep copy of the given RawAllocationOnlyData
@@ -187,6 +243,7 @@ func (r *RawAllocationOnlyData) Clone() *RawAllocationOnlyData {
 	return &RawAllocationOnlyData{
 		CPUCoreUsageMax:  r.CPUCoreUsageMax,
 		RAMBytesUsageMax: r.RAMBytesUsageMax,
+		GPUUsageMax:      r.GPUUsageMax,
 	}
 }
 
@@ -198,8 +255,16 @@ func (r *RawAllocationOnlyData) Equal(that *RawAllocationOnlyData) bool {
 	if r == nil || that == nil {
 		return false
 	}
-	return util.IsApproximately(r.CPUCoreUsageMax, that.CPUCoreUsageMax) &&
+	cmpResult := util.IsApproximately(r.CPUCoreUsageMax, that.CPUCoreUsageMax) &&
 		util.IsApproximately(r.RAMBytesUsageMax, that.RAMBytesUsageMax)
+
+	if r.GPUUsageMax != nil && that.GPUUsageMax != nil {
+		cmpResult = cmpResult && util.IsApproximately(*r.GPUUsageMax, *that.GPUUsageMax)
+	} else if !(r.GPUUsageMax == nil && that.GPUUsageMax == nil) {
+		cmpResult = false
+	}
+
+	return cmpResult
 }
 
 func (r *RawAllocationOnlyData) SanitizeNaN() {
@@ -214,6 +279,10 @@ func (r *RawAllocationOnlyData) SanitizeNaN() {
 		log.DedupedWarningf(5, "RawAllocationOnlyData: Unexpected NaN found for RAMBytesUsageMax")
 		r.RAMBytesUsageMax = 0
 	}
+	if r.GPUUsageMax == nil || math.IsNaN(*r.GPUUsageMax) {
+		log.DedupedWarningf(5, "RawAllocationOnlyData: Unexpected NaN found for GPUUsageMax")
+		r.GPUUsageMax = nil
+	}
 }
 
 // PVAllocations is a map of Disk Asset Identifiers to the
@@ -675,8 +744,8 @@ func (a *Allocation) Clone() *Allocation {
 		CPUCostIdle:                    a.CPUCostIdle,
 		CPUCostAdjustment:              a.CPUCostAdjustment,
 		GPUHours:                       a.GPUHours,
-		GPURequestAverage:              a.GPURequestAverage,
-		GPUUsageAverage:                a.GPUUsageAverage,
+		deprecatedGPURequestAverage:    a.deprecatedGPURequestAverage,
+		deprecatedGPUUsageAverage:      a.deprecatedGPUUsageAverage,
 		GPUCost:                        a.GPUCost,
 		GPUCostIdle:                    a.GPUCostIdle,
 		GPUCostAdjustment:              a.GPUCostAdjustment,
@@ -704,6 +773,7 @@ func (a *Allocation) Clone() *Allocation {
 		SharedCostBreakdown:            a.SharedCostBreakdown.Clone(),
 		LoadBalancers:                  a.LoadBalancers.Clone(),
 		UnmountedPVCost:                a.UnmountedPVCost,
+		GPUAllocation:                  a.GPUAllocation.Clone(),
 	}
 }
 
@@ -816,6 +886,10 @@ func (a *Allocation) Equal(that *Allocation) bool {
 		return false
 	}
 
+	if !a.GPUAllocation.Equal(that.GPUAllocation) {
+		return false
+	}
+
 	return true
 }
 
@@ -963,17 +1037,24 @@ func (a *Allocation) RAMEfficiency() float64 {
 
 // GPUEfficiency is the ratio of usage to request. Note that, without the NVIDIA
 // DCGM exporter providing Prometheus with usage metrics, this will always be
-// zero, as GPUUsageAverage will be zero (the default value).
+// zero, as deprecatedGPUUsageAverage will be zero (the default value).
 func (a *Allocation) GPUEfficiency() float64 {
 	if a == nil {
 		return 0.0
 	}
+	if a.GPUAllocation == nil {
+		return 0.0
+	}
+
+	if a.GPUAllocation.GPURequestAverage == nil || a.GPUAllocation.GPUUsageAverage == nil {
+		return 0.0
+	}
 
-	if a.GPURequestAverage > 0 && a.GPUUsageAverage > 0 {
-		return a.GPUUsageAverage / a.GPURequestAverage
+	if *a.GPUAllocation.GPURequestAverage > 0 && *a.GPUAllocation.GPUUsageAverage > 0 {
+		return *a.GPUAllocation.GPUUsageAverage / *a.GPUAllocation.GPURequestAverage
 	}
 
-	if a.GPUUsageAverage == 0.0 || a.GPUTotalCost() == 0.0 {
+	if *a.GPUAllocation.GPURequestAverage == 0.0 || a.GPUTotalCost() == 0.0 {
 		return 0.0
 	}
 
@@ -1221,11 +1302,37 @@ func (a *Allocation) add(that *Allocation) {
 	ramUseByteMins := a.RAMBytesUsageAverage * a.Minutes()
 	ramUseByteMins += that.RAMBytesUsageAverage * that.Minutes()
 
-	gpuReqMins := a.GPURequestAverage * a.Minutes()
-	gpuReqMins += that.GPURequestAverage * that.Minutes()
+	var gpuReqMins *float64 = nil
+	if a.GPUAllocation != nil && a.GPUAllocation.GPURequestAverage != nil {
+		result := *a.GPUAllocation.GPURequestAverage * a.Minutes()
+		gpuReqMins = &result
+	}
 
-	gpuUseMins := a.GPUUsageAverage * a.Minutes()
-	gpuUseMins += that.GPUUsageAverage * that.Minutes()
+	if that.GPUAllocation != nil && that.GPUAllocation.GPURequestAverage != nil {
+		if gpuReqMins == nil {
+			result := *that.GPUAllocation.GPURequestAverage * that.Minutes()
+			gpuReqMins = &result
+		} else {
+			result := *gpuReqMins + *that.GPUAllocation.GPURequestAverage*that.Minutes()
+			gpuReqMins = &result
+		}
+	}
+
+	var gpuUseMins *float64 = nil
+	if a.GPUAllocation != nil && a.GPUAllocation.GPUUsageAverage != nil {
+		result := *a.GPUAllocation.GPUUsageAverage * a.Minutes()
+		gpuUseMins = &result
+	}
+
+	if that.GPUAllocation != nil && that.GPUAllocation.GPUUsageAverage != nil {
+		if gpuUseMins == nil {
+			result := *that.GPUAllocation.GPUUsageAverage * that.Minutes()
+			gpuUseMins = &result
+		} else {
+			result := *gpuUseMins + *that.GPUAllocation.GPUUsageAverage*that.Minutes()
+			gpuUseMins = &result
+		}
+	}
 
 	// Expand Start and End to be the "max" of among the given Allocations
 	if that.Start.Before(a.Start) {
@@ -1242,15 +1349,32 @@ func (a *Allocation) add(that *Allocation) {
 		a.CPUCoreUsageAverage = cpuUseCoreMins / a.Minutes()
 		a.RAMBytesRequestAverage = ramReqByteMins / a.Minutes()
 		a.RAMBytesUsageAverage = ramUseByteMins / a.Minutes()
-		a.GPURequestAverage = gpuReqMins / a.Minutes()
-		a.GPUUsageAverage = gpuUseMins / a.Minutes()
+
+		if a.GPUAllocation != nil {
+			if gpuReqMins != nil {
+				gpuReqMinsRes := *gpuReqMins / a.Minutes()
+				a.GPUAllocation.GPURequestAverage = &gpuReqMinsRes
+			} else {
+				a.GPUAllocation.GPURequestAverage = nil
+			}
+
+			if gpuUseMins != nil {
+				gpuUsageMinsRes := *gpuUseMins / a.Minutes()
+				a.GPUAllocation.GPUUsageAverage = &gpuUsageMinsRes
+			} else {
+				a.GPUAllocation.GPUUsageAverage = nil
+			}
+		}
 	} else {
 		a.CPUCoreRequestAverage = 0.0
 		a.CPUCoreUsageAverage = 0.0
 		a.RAMBytesRequestAverage = 0.0
 		a.RAMBytesUsageAverage = 0.0
-		a.GPURequestAverage = 0.0
-		a.GPUUsageAverage = 0.0
+
+		if a.GPUAllocation != nil {
+			a.GPUAllocation.GPURequestAverage = nil
+			a.GPUAllocation.GPUUsageAverage = nil
+		}
 	}
 
 	// Sum all cumulative resource fields
@@ -1290,6 +1414,21 @@ func (a *Allocation) add(that *Allocation) {
 	// Sum LoadBalancer Allocations
 	a.LoadBalancers = a.LoadBalancers.Add(that.LoadBalancers)
 
+	// Sum GPU Allocations
+	if that.GPUAllocation != nil {
+		if a.GPUAllocation == nil {
+			a.GPUAllocation = that.GPUAllocation.Clone()
+		} else {
+			if a.GPUAllocation.GPUUsageAverage != nil && that.GPUAllocation.GPUUsageAverage != nil {
+				*a.GPUAllocation.GPUUsageAverage += *that.GPUAllocation.GPUUsageAverage
+			}
+
+			if a.GPUAllocation.GPURequestAverage != nil && that.GPUAllocation.GPURequestAverage != nil {
+				*a.GPUAllocation.GPURequestAverage += *that.GPUAllocation.GPURequestAverage
+			}
+		}
+	}
+
 	// Any data that is in a "raw allocation only" is not valid in any
 	// sort of cumulative Allocation (like one that is added).
 	a.RawAllocationOnly = nil
@@ -2587,14 +2726,7 @@ func (a *Allocation) SanitizeNaN() {
 		log.DedupedWarningf(5, "Allocation: Unexpected NaN found for GPUHours name:%s, window:%s, properties:%s", a.Name, a.Window.String(), a.Properties.String())
 		a.GPUHours = 0
 	}
-	if math.IsNaN(a.GPURequestAverage) {
-		log.DedupedWarningf(5, "Allocation: Unexpected NaN found for GPURequestAverage name:%s, window:%s, properties:%s", a.Name, a.Window.String(), a.Properties.String())
-		a.GPURequestAverage = 0
-	}
-	if math.IsNaN(a.GPUUsageAverage) {
-		log.DedupedWarningf(5, "Allocation: Unexpected NaN found for GPUUsageAverage name:%s, window:%s, properties:%s", a.Name, a.Window.String(), a.Properties.String())
-		a.GPUUsageAverage = 0
-	}
+
 	if math.IsNaN(a.GPUCost) {
 		log.DedupedWarningf(5, "Allocation: Unexpected NaN found for GPUCost name:%s, window:%s, properties:%s", a.Name, a.Window.String(), a.Properties.String())
 		a.GPUCost = 0
@@ -2682,6 +2814,7 @@ func (a *Allocation) SanitizeNaN() {
 
 	a.PVs.SanitizeNaN()
 	a.RawAllocationOnly.SanitizeNaN()
+	a.GPUAllocation.SanitizeNaN()
 	a.ProportionalAssetResourceCosts.SanitizeNaN()
 	a.SharedCostBreakdown.SanitizeNaN()
 	a.LoadBalancers.SanitizeNaN()
@@ -3570,3 +3703,15 @@ func (asr *AllocationSetRange) Clone() *AllocationSetRange {
 
 	return sasrClone
 }
+
+func migrateAllocation(as *Allocation, fromVersion uint8, toVersion uint8) {
+	if fromVersion == toVersion {
+		return
+	}
+
+	if fromVersion == 22 && toVersion >= 23 {
+		as.GPUAllocation = &GPUAllocation{}
+		as.GPUAllocation.GPUUsageAverage = &as.deprecatedGPUUsageAverage
+		as.GPUAllocation.GPURequestAverage = &as.deprecatedGPURequestAverage
+	}
+}

+ 4 - 4
core/pkg/opencost/allocation_json.go

@@ -26,8 +26,8 @@ type AllocationJSON struct {
 	CPUCostIdle                    *float64                        `json:"cpuCostIdle"`
 	CPUEfficiency                  *float64                        `json:"cpuEfficiency"`
 	GPUCount                       *float64                        `json:"gpuCount"`
-	GPURequestAverage              *float64                        `json:"gpuRequestAverage"`
-	GPUUsageAverage                *float64                        `json:"gpuUsageAverage"`
+	GPURequestAverage              *float64                        `json:"-"`
+	GPUUsageAverage                *float64                        `json:"-"`
 	GPUHours                       *float64                        `json:"gpuHours"`
 	GPUCost                        *float64                        `json:"gpuCost"`
 	GPUCostAdjustment              *float64                        `json:"gpuCostAdjustment"`
@@ -63,6 +63,7 @@ type AllocationJSON struct {
 	ProportionalAssetResourceCosts *ProportionalAssetResourceCosts `json:"proportionalAssetResourceCosts,omitempty"`
 	LoadBalancers                  LbAllocations                   `json:"lbAllocations"`
 	SharedCostBreakdown            *SharedCostBreakdowns           `json:"sharedCostBreakdown,omitempty"`
+	GPUAllocation                  *GPUAllocation                  `json:"gpuAllocation"`
 }
 
 func (aj *AllocationJSON) BuildFromAllocation(a *Allocation) {
@@ -84,8 +85,6 @@ func (aj *AllocationJSON) BuildFromAllocation(a *Allocation) {
 	aj.CPUCostIdle = formatFloat64ForResponse(a.CPUCostIdle)
 	aj.CPUEfficiency = formatFloat64ForResponse(a.CPUEfficiency())
 	aj.GPUCount = formatFloat64ForResponse(a.GPUs())
-	aj.GPURequestAverage = formatFloat64ForResponse(a.GPURequestAverage)
-	aj.GPUUsageAverage = formatFloat64ForResponse(a.GPUUsageAverage)
 	aj.GPUHours = formatFloat64ForResponse(a.GPUHours)
 	aj.GPUCost = formatFloat64ForResponse(a.GPUCost)
 	aj.GPUCostAdjustment = formatFloat64ForResponse(a.GPUCostAdjustment)
@@ -121,6 +120,7 @@ func (aj *AllocationJSON) BuildFromAllocation(a *Allocation) {
 	aj.ProportionalAssetResourceCosts = &a.ProportionalAssetResourceCosts
 	aj.LoadBalancers = a.LoadBalancers
 	aj.SharedCostBreakdown = &a.SharedCostBreakdown
+	aj.GPUAllocation = a.GPUAllocation
 }
 
 // formatFloat64ForResponse - take an existing float64, round it to 6 decimal places and return is possible, or return nil if invalid

+ 3 - 2
core/pkg/opencost/bingen.go

@@ -46,8 +46,8 @@ package opencost
 // @bingen:end
 
 // Allocation Version Set: Includes Allocation pipeline specific resources
-// @bingen:set[name=Allocation,version=22]
-// @bingen:generate:Allocation
+// @bingen:set[name=Allocation,version=23]
+// @bingen:generate[migrate]:Allocation
 // @bingen:generate[stringtable]:AllocationSet
 // @bingen:generate:AllocationSetRange
 // @bingen:generate:AllocationProperties
@@ -60,6 +60,7 @@ package opencost
 // @bingen:generate:PVAllocation
 // @bingen:generate:LbAllocations
 // @bingen:generate:LbAllocation
+// @bingen:generate:GPUAllocation
 // @bingen:end
 
 // @bingen:set[name=CloudCost,version=3]

+ 261 - 8
core/pkg/opencost/opencost_codecs.go

@@ -13,11 +13,12 @@ package opencost
 
 import (
 	"fmt"
-	util "github.com/opencost/opencost/core/pkg/util"
 	"reflect"
 	"strings"
 	"sync"
 	"time"
+
+	"github.com/opencost/opencost/core/pkg/util"
 )
 
 const (
@@ -40,7 +41,7 @@ const (
 	AssetsCodecVersion uint8 = 21
 
 	// AllocationCodecVersion is used for any resources listed in the Allocation version set
-	AllocationCodecVersion uint8 = 22
+	AllocationCodecVersion uint8 = 23
 
 	// CloudCostCodecVersion is used for any resources listed in the CloudCost version set
 	CloudCostCodecVersion uint8 = 3
@@ -72,6 +73,7 @@ var typeMap map[string]reflect.Type = map[string]reflect.Type{
 	"Coverage":              reflect.TypeOf((*Coverage)(nil)).Elem(),
 	"CoverageSet":           reflect.TypeOf((*CoverageSet)(nil)).Elem(),
 	"Disk":                  reflect.TypeOf((*Disk)(nil)).Elem(),
+	"GPUAllocation":         reflect.TypeOf((*GPUAllocation)(nil)).Elem(),
 	"LbAllocation":          reflect.TypeOf((*LbAllocation)(nil)).Elem(),
 	"LoadBalancer":          reflect.TypeOf((*LoadBalancer)(nil)).Elem(),
 	"Network":               reflect.TypeOf((*Network)(nil)).Elem(),
@@ -456,8 +458,22 @@ func (target *Allocation) MarshalBinaryWithContext(ctx *EncodingContext) (err er
 	}
 	// --- [end][write][alias](LbAllocations) ---
 
-	buff.WriteFloat64(target.GPURequestAverage) // write float64
-	buff.WriteFloat64(target.GPUUsageAverage)   // write float64
+	buff.WriteFloat64(target.deprecatedGPURequestAverage) // write float64
+	buff.WriteFloat64(target.deprecatedGPUUsageAverage)   // write float64
+	if target.GPUAllocation == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		// --- [begin][write][struct](GPUAllocation) ---
+		buff.WriteInt(0) // [compatibility, unused]
+		errI := target.GPUAllocation.MarshalBinaryWithContext(ctx)
+		if errI != nil {
+			return errI
+		}
+		// --- [end][write][struct](GPUAllocation) ---
+
+	}
 	return nil
 }
 
@@ -773,19 +789,45 @@ func (target *Allocation) UnmarshalBinaryWithContext(ctx *DecodingContext) (err
 	// field version check
 	if uint8(22) <= version {
 		fff := buff.ReadFloat64() // read float64
-		target.GPURequestAverage = fff
+		target.deprecatedGPURequestAverage = fff
 
 	} else {
-		target.GPURequestAverage = float64(0) // default
+		target.deprecatedGPURequestAverage = float64(0) // default
 	}
 
 	// field version check
 	if uint8(22) <= version {
 		ggg := buff.ReadFloat64() // read float64
-		target.GPUUsageAverage = ggg
+		target.deprecatedGPUUsageAverage = ggg
+
+	} else {
+		target.deprecatedGPUUsageAverage = float64(0) // default
+	}
 
+	// field version check
+	if uint8(23) <= version {
+		if buff.ReadUInt8() == uint8(0) {
+			target.GPUAllocation = nil
+		} else {
+			// --- [begin][read][struct](GPUAllocation) ---
+			hhh := &GPUAllocation{}
+			buff.ReadInt() // [compatibility, unused]
+			errI := hhh.UnmarshalBinaryWithContext(ctx)
+			if errI != nil {
+				return errI
+			}
+			target.GPUAllocation = hhh
+			// --- [end][read][struct](GPUAllocation) ---
+
+		}
 	} else {
-		target.GPUUsageAverage = float64(0) // default
+		target.GPUAllocation = nil
+
+	}
+
+	// execute migration func if version delta detected
+	if version != AllocationCodecVersion {
+		migrateAllocation(target, version, AllocationCodecVersion)
 	}
 
 	return nil
@@ -5486,6 +5528,196 @@ func (target *Disk) UnmarshalBinaryWithContext(ctx *DecodingContext) (err error)
 	return nil
 }
 
+//--------------------------------------------------------------------------
+//  GPUAllocation
+//--------------------------------------------------------------------------
+
+// MarshalBinary serializes the internal properties of this GPUAllocation instance
+// into a byte array
+func (target *GPUAllocation) MarshalBinary() (data []byte, err error) {
+	ctx := &EncodingContext{
+		Buffer: util.NewBuffer(),
+		Table:  nil,
+	}
+
+	e := target.MarshalBinaryWithContext(ctx)
+	if e != nil {
+		return nil, e
+	}
+
+	encBytes := ctx.Buffer.Bytes()
+	return encBytes, nil
+}
+
+// MarshalBinaryWithContext serializes the internal properties of this GPUAllocation instance
+// into a byte array leveraging a predefined context.
+func (target *GPUAllocation) MarshalBinaryWithContext(ctx *EncodingContext) (err error) {
+	// panics are recovered and propagated as errors
+	defer func() {
+		if r := recover(); r != nil {
+			if e, ok := r.(error); ok {
+				err = e
+			} else if s, ok := r.(string); ok {
+				err = fmt.Errorf("Unexpected panic: %s", s)
+			} else {
+				err = fmt.Errorf("Unexpected panic: %+v", r)
+			}
+		}
+	}()
+
+	buff := ctx.Buffer
+	buff.WriteUInt8(AllocationCodecVersion) // version
+
+	if ctx.IsStringTable() {
+		a := ctx.Table.AddOrGet(target.GPUDevice)
+		buff.WriteInt(a) // write table index
+	} else {
+		buff.WriteString(target.GPUDevice) // write string
+	}
+	if ctx.IsStringTable() {
+		b := ctx.Table.AddOrGet(target.GPUModel)
+		buff.WriteInt(b) // write table index
+	} else {
+		buff.WriteString(target.GPUModel) // write string
+	}
+	if ctx.IsStringTable() {
+		c := ctx.Table.AddOrGet(target.GPUUUID)
+		buff.WriteInt(c) // write table index
+	} else {
+		buff.WriteString(target.GPUUUID) // write string
+	}
+	if target.IsGPUShared == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteBool(*target.IsGPUShared) // write bool
+	}
+	if target.GPUUsageAverage == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.GPUUsageAverage) // write float64
+	}
+	if target.GPURequestAverage == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.GPURequestAverage) // write float64
+	}
+	return nil
+}
+
+// UnmarshalBinary uses the data passed byte array to set all the internal properties of
+// the GPUAllocation type
+func (target *GPUAllocation) UnmarshalBinary(data []byte) error {
+	var table []string
+	buff := util.NewBufferFromBytes(data)
+
+	// string table header validation
+	if isBinaryTag(data, BinaryTagStringTable) {
+		buff.ReadBytes(len(BinaryTagStringTable)) // strip tag length
+		tl := buff.ReadInt()                      // table length
+		if tl > 0 {
+			table = make([]string, tl, tl)
+			for i := 0; i < tl; i++ {
+				table[i] = buff.ReadString()
+			}
+		}
+	}
+
+	ctx := &DecodingContext{
+		Buffer: buff,
+		Table:  table,
+	}
+
+	err := target.UnmarshalBinaryWithContext(ctx)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// UnmarshalBinaryWithContext uses the context containing a string table and binary buffer to set all the internal properties of
+// the GPUAllocation type
+func (target *GPUAllocation) UnmarshalBinaryWithContext(ctx *DecodingContext) (err error) {
+	// panics are recovered and propagated as errors
+	defer func() {
+		if r := recover(); r != nil {
+			if e, ok := r.(error); ok {
+				err = e
+			} else if s, ok := r.(string); ok {
+				err = fmt.Errorf("Unexpected panic: %s", s)
+			} else {
+				err = fmt.Errorf("Unexpected panic: %+v", r)
+			}
+		}
+	}()
+
+	buff := ctx.Buffer
+	version := buff.ReadUInt8()
+
+	if version > AllocationCodecVersion {
+		return fmt.Errorf("Invalid Version Unmarshaling GPUAllocation. Expected %d or less, got %d", AllocationCodecVersion, version)
+	}
+
+	var b string
+	if ctx.IsStringTable() {
+		c := buff.ReadInt() // read string index
+		b = ctx.Table[c]
+	} else {
+		b = buff.ReadString() // read string
+	}
+	a := b
+	target.GPUDevice = a
+
+	var e string
+	if ctx.IsStringTable() {
+		f := buff.ReadInt() // read string index
+		e = ctx.Table[f]
+	} else {
+		e = buff.ReadString() // read string
+	}
+	d := e
+	target.GPUModel = d
+
+	var h string
+	if ctx.IsStringTable() {
+		k := buff.ReadInt() // read string index
+		h = ctx.Table[k]
+	} else {
+		h = buff.ReadString() // read string
+	}
+	g := h
+	target.GPUUUID = g
+
+	if buff.ReadUInt8() == uint8(0) {
+		target.IsGPUShared = nil
+	} else {
+		l := buff.ReadBool() // read bool
+		target.IsGPUShared = &l
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.GPUUsageAverage = nil
+	} else {
+		m := buff.ReadFloat64() // read float64
+		target.GPUUsageAverage = &m
+
+	}
+	if buff.ReadUInt8() == uint8(0) {
+		target.GPURequestAverage = nil
+	} else {
+		n := buff.ReadFloat64() // read float64
+		target.GPURequestAverage = &n
+
+	}
+	return nil
+}
+
 //--------------------------------------------------------------------------
 //  LbAllocation
 //--------------------------------------------------------------------------
@@ -7012,6 +7244,13 @@ func (target *RawAllocationOnlyData) MarshalBinaryWithContext(ctx *EncodingConte
 
 	buff.WriteFloat64(target.CPUCoreUsageMax)  // write float64
 	buff.WriteFloat64(target.RAMBytesUsageMax) // write float64
+	if target.GPUUsageMax == nil {
+		buff.WriteUInt8(uint8(0)) // write nil byte
+	} else {
+		buff.WriteUInt8(uint8(1)) // write non-nil byte
+
+		buff.WriteFloat64(*target.GPUUsageMax) // write float64
+	}
 	return nil
 }
 
@@ -7075,6 +7314,20 @@ func (target *RawAllocationOnlyData) UnmarshalBinaryWithContext(ctx *DecodingCon
 	b := buff.ReadFloat64() // read float64
 	target.RAMBytesUsageMax = b
 
+	// field version check
+	if uint8(23) <= version {
+		if buff.ReadUInt8() == uint8(0) {
+			target.GPUUsageMax = nil
+		} else {
+			c := buff.ReadFloat64() // read float64
+			target.GPUUsageMax = &c
+
+		}
+	} else {
+		target.GPUUsageMax = nil
+
+	}
+
 	return nil
 }
 

+ 56 - 12
core/pkg/opencost/summaryallocation.go

@@ -30,8 +30,8 @@ type SummaryAllocation struct {
 	CPUCoreUsageAverage    float64               `json:"cpuCoreUsageAverage"`
 	CPUCost                float64               `json:"cpuCost"`
 	CPUCostIdle            float64               `json:"cpuCostIdle"`
-	GPURequestAverage      float64               `json:"gpuRequestAverage"`
-	GPUUsageAverage        float64               `json:"gpuUsageAverage"`
+	GPURequestAverage      *float64              `json:"gpuRequestAverage"`
+	GPUUsageAverage        *float64              `json:"gpuUsageAverage"`
 	GPUCost                float64               `json:"gpuCost"`
 	GPUCostIdle            float64               `json:"gpuCostIdle"`
 	NetworkCost            float64               `json:"networkCost"`
@@ -57,6 +57,12 @@ func NewSummaryAllocation(alloc *Allocation, reconcile, reconcileNetwork bool) *
 		return nil
 	}
 
+	var gpuRequestAvg, gpuUsageAvg *float64
+	if alloc.GPUAllocation != nil {
+		gpuRequestAvg = alloc.GPUAllocation.GPURequestAverage
+		gpuUsageAvg = alloc.GPUAllocation.GPUUsageAverage
+	}
+
 	sa := &SummaryAllocation{
 		Name:                   alloc.Name,
 		Properties:             alloc.Properties,
@@ -65,8 +71,8 @@ func NewSummaryAllocation(alloc *Allocation, reconcile, reconcileNetwork bool) *
 		CPUCoreRequestAverage:  alloc.CPUCoreRequestAverage,
 		CPUCoreUsageAverage:    alloc.CPUCoreUsageAverage,
 		CPUCost:                alloc.CPUCost + alloc.CPUCostAdjustment,
-		GPURequestAverage:      alloc.GPURequestAverage,
-		GPUUsageAverage:        alloc.GPUUsageAverage,
+		GPURequestAverage:      gpuRequestAvg,
+		GPUUsageAverage:        gpuUsageAvg,
 		GPUCost:                alloc.GPUCost + alloc.GPUCostAdjustment,
 		NetworkCost:            alloc.NetworkCost + alloc.NetworkCostAdjustment,
 		LoadBalancerCost:       alloc.LoadBalancerCost + alloc.LoadBalancerCostAdjustment,
@@ -128,11 +134,37 @@ func (sa *SummaryAllocation) Add(that *SummaryAllocation) error {
 	ramUseByteMins := sa.RAMBytesUsageAverage * sa.Minutes()
 	ramUseByteMins += that.RAMBytesUsageAverage * that.Minutes()
 
-	gpuReqMins := sa.GPURequestAverage * sa.Minutes()
-	gpuReqMins += that.GPURequestAverage * that.Minutes()
+	var gpuReqMins *float64 = nil
+	if sa.GPURequestAverage != nil {
+		result := *sa.GPURequestAverage * sa.Minutes()
+		gpuReqMins = &result
+	}
 
-	gpuUseMins := sa.GPUUsageAverage * sa.Minutes()
-	gpuUseMins += that.GPUUsageAverage * that.Minutes()
+	if sa.GPURequestAverage != nil && that.GPURequestAverage != nil {
+		if gpuReqMins == nil {
+			result := *that.GPURequestAverage * that.Minutes()
+			gpuReqMins = &result
+		} else {
+			result := *gpuReqMins + *that.GPURequestAverage*that.Minutes()
+			gpuReqMins = &result
+		}
+	}
+
+	var gpuUseMins *float64 = nil
+	if sa.GPUUsageAverage != nil {
+		result := *sa.GPUUsageAverage * sa.Minutes()
+		gpuUseMins = &result
+	}
+
+	if that.GPUUsageAverage != nil {
+		if gpuUseMins == nil {
+			result := *that.GPUUsageAverage * that.Minutes()
+			gpuUseMins = &result
+		} else {
+			result := *gpuUseMins + *that.GPUUsageAverage*that.Minutes()
+			gpuUseMins = &result
+		}
+	}
 
 	// Expand Start and End to be the "max" of among the given Allocations
 	if that.Start.Before(sa.Start) {
@@ -148,15 +180,27 @@ func (sa *SummaryAllocation) Add(that *SummaryAllocation) error {
 		sa.CPUCoreUsageAverage = cpuUseCoreMins / sa.Minutes()
 		sa.RAMBytesRequestAverage = ramReqByteMins / sa.Minutes()
 		sa.RAMBytesUsageAverage = ramUseByteMins / sa.Minutes()
-		sa.GPURequestAverage = gpuReqMins / sa.Minutes()
-		sa.GPUUsageAverage = gpuUseMins / sa.Minutes()
+
+		var gpuReqAvgVal, gpuUsageAvgVal *float64
+		if gpuReqMins != nil {
+			result := *gpuReqMins / sa.Minutes()
+			gpuReqAvgVal = &result
+		}
+
+		if gpuUseMins != nil {
+			result := *gpuUseMins / sa.Minutes()
+			gpuUsageAvgVal = &result
+		}
+
+		sa.GPURequestAverage = gpuReqAvgVal
+		sa.GPUUsageAverage = gpuUsageAvgVal
 	} else {
 		sa.CPUCoreRequestAverage = 0.0
 		sa.CPUCoreUsageAverage = 0.0
 		sa.RAMBytesRequestAverage = 0.0
 		sa.RAMBytesUsageAverage = 0.0
-		sa.GPURequestAverage = 0.0
-		sa.GPUUsageAverage = 0.0
+		sa.GPURequestAverage = nil
+		sa.GPUUsageAverage = nil
 	}
 
 	// Sum all cumulative cost fields

+ 2 - 2
core/pkg/opencost/summaryallocation_json.go

@@ -58,8 +58,8 @@ func (sa *SummaryAllocation) ToResponse() *SummaryAllocationResponse {
 		CPUCoreUsageAverage:    formatutil.Float64ToResponse(sa.CPUCoreUsageAverage),
 		CPUCost:                formatutil.Float64ToResponse(sa.CPUCost),
 		CPUCostIdle:            formatutil.Float64ToResponse(sa.CPUCostIdle),
-		GPURequestAverage:      formatutil.Float64ToResponse(sa.GPURequestAverage),
-		GPUUsageAverage:        formatutil.Float64ToResponse(sa.GPUUsageAverage),
+		GPURequestAverage:      sa.GPURequestAverage, // already in *float64
+		GPUUsageAverage:        sa.GPUUsageAverage,   // already in *float64
 		GPUCost:                formatutil.Float64ToResponse(sa.GPUCost),
 		GPUCostIdle:            formatutil.Float64ToResponse(sa.GPUCostIdle),
 		NetworkCost:            formatutil.Float64ToResponse(sa.NetworkCost),

+ 27 - 1
pkg/costmodel/allocation.go

@@ -29,6 +29,7 @@ const (
 	queryFmtCPUUsageAvg                 = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
 	queryFmtGPUsRequested               = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
 	queryFmtGPUsUsageAvg                = `avg(avg_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, %s)`
+	queryFmtGPUsUsageMax                = `max(max_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, %s)`
 	queryFmtGPUsAllocated               = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
 	queryFmtNodeCostPerCPUHr            = `avg(avg_over_time(node_cpu_hourly_cost{%s}[%s])) by (node, %s, instance_type, provider_id)`
 	queryFmtNodeCostPerRAMGiBHr         = `avg(avg_over_time(node_ram_hourly_cost{%s}[%s])) by (node, %s, instance_type, provider_id)`
@@ -66,6 +67,8 @@ const (
 	queryFmtLBActiveMins                = `count(kubecost_load_balancer_cost{%s}) by (namespace, service_name, %s)[%s:%s]`
 	queryFmtOldestSample                = `min_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
 	queryFmtNewestSample                = `max_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
+	queryFmtIsGPuShared                 = `avg(avg_over_time(kube_pod_container_resource_requests{container!="", node != "", pod != "", container!= "", unit = "integer",  %s}[%s])) by (container, pod, namespace, node, resource)`
+	queryFmtGetGPuInfo                  = `avg(avg_over_time(DCGM_FI_DEV_DEC_UTIL{container!="",%s}[%s])) by (container, pod, namespace, device, modelName, UUID)`
 
 	// Because we use container_cpu_usage_seconds_total to calculate CPU usage
 	// at any given "instant" of time, we need to use an irate or rate. To then
@@ -276,6 +279,10 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 			if alloc.RawAllocationOnly.RAMBytesUsageMax > resultAlloc.RawAllocationOnly.RAMBytesUsageMax {
 				resultAlloc.RawAllocationOnly.RAMBytesUsageMax = alloc.RawAllocationOnly.RAMBytesUsageMax
 			}
+
+			if alloc.RawAllocationOnly.CPUCoreUsageMax > resultAlloc.RawAllocationOnly.CPUCoreUsageMax {
+				resultAlloc.RawAllocationOnly.GPUUsageMax = alloc.RawAllocationOnly.GPUUsageMax
+			}
 		}
 	}
 
@@ -426,12 +433,17 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 		}
 	}
 
+	// GPU Queries
+	//queryIsGpuShared := fmt.Sprintf(queryFmtIsGPuShared, durStr)
 	queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChGPUsRequested := ctx.QueryAtTime(queryGPUsRequested, end)
 
 	queryGPUsUsageAvg := fmt.Sprintf(queryFmtGPUsUsageAvg, durStr, env.GetPromClusterLabel())
 	resChGPUsUsageAvg := ctx.Query(queryGPUsUsageAvg)
 
+	queryGPUsUsageMax := fmt.Sprintf(queryFmtGPUsUsageMax, durStr, env.GetPromClusterLabel())
+	resChGPUsUsageMax := ctx.Query(queryGPUsUsageMax)
+
 	queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChGPUsAllocated := ctx.QueryAtTime(queryGPUsAllocated, end)
 
@@ -492,6 +504,13 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetInternetCostPerGiB := ctx.QueryAtTime(queryNetInternetCostPerGiB, end)
 
+	//GPU Queries
+	queryIsGpuShared := fmt.Sprintf(queryFmtIsGPuShared, env.GetPromClusterFilter(), durStr)
+	resChIsGpuShared := ctx.QueryAtTime(queryIsGpuShared, end)
+
+	queryGetGPUInfo := fmt.Sprintf(queryFmtGetGPuInfo, env.GetPromClusterFilter(), durStr)
+	resChGetGPUInfo := ctx.QueryAtTime(queryGetGPUInfo, end)
+
 	var resChNodeLabels prom.QueryResultsChan
 	if env.GetAllocationNodeLabelsEnabled() {
 		queryNodeLabels := fmt.Sprintf(queryFmtNodeLabels, env.GetPromClusterFilter(), durStr)
@@ -549,8 +568,12 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	resRAMUsageMax, _ := resChRAMUsageMax.Await()
 	resGPUsRequested, _ := resChGPUsRequested.Await()
 	resGPUsUsageAvg, _ := resChGPUsUsageAvg.Await()
+	resGPUsUsageMax, _ := resChGPUsUsageMax.Await()
 	resGPUsAllocated, _ := resChGPUsAllocated.Await()
 
+	resIsGpuShared, _ := resChIsGpuShared.Await()
+	resGetGPUInfo, _ := resChGetGPUInfo.Await()
+
 	resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
 	resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
 	resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
@@ -615,7 +638,10 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	applyRAMBytesRequested(podMap, resRAMRequests, podUIDKeyMap)
 	applyRAMBytesUsedAvg(podMap, resRAMUsageAvg, podUIDKeyMap)
 	applyRAMBytesUsedMax(podMap, resRAMUsageMax, podUIDKeyMap)
-	applyGPUUsageAvg(podMap, resGPUsUsageAvg, podUIDKeyMap)
+	applyGPUUsage(podMap, resGPUsUsageAvg, podUIDKeyMap, GpuUsageAverageMode)
+	applyGPUUsage(podMap, resGPUsUsageMax, podUIDKeyMap, GpuUsageMaxMode)
+	applyGPUUsage(podMap, resIsGpuShared, podUIDKeyMap, GpuIsSharedMode)
+	applyGPUUsage(podMap, resGetGPUInfo, podUIDKeyMap, GpuInfoMode)
 	applyGPUsAllocated(podMap, resGPUsRequested, resGPUsAllocated, podUIDKeyMap)
 	applyNetworkTotals(podMap, resNetTransferBytes, resNetReceiveBytes, podUIDKeyMap)
 	applyNetworkAllocation(podMap, resNetZoneGiB, resNetZoneCostPerGiB, podUIDKeyMap, networkCrossZoneCost)

+ 87 - 6
pkg/costmodel/allocation_helpers.go

@@ -29,6 +29,13 @@ const TiB = 1024.0 * GiB
 const PiB = 1024.0 * TiB
 const PV_USAGE_SANITY_LIMIT_BYTES = 10.0 * PiB
 
+const (
+	GpuUsageAverageMode = "AVERAGE"
+	GpuUsageMaxMode     = "MAX"
+	GpuIsSharedMode     = "SHARED"
+	GpuInfoMode         = "GPU_INFO"
+)
+
 /* Pod Helpers */
 
 func (cm *CostModel) buildPodMap(window opencost.Window, resolution, maxBatchSize time.Duration, podMap map[podKey]*pod, clusterStart, clusterEnd map[string]time.Time, ingestPodUID bool, podUIDKeyMap map[podKey][]podKey) error {
@@ -614,12 +621,13 @@ func applyRAMBytesUsedMax(podMap map[podKey]*pod, resRAMBytesUsedMax []*prom.Que
 	}
 }
 
-func applyGPUUsageAvg(podMap map[podKey]*pod, resGPUUsageAvg []*prom.QueryResult, podUIDKeyMap map[podKey][]podKey) {
+// same func is used for both GPUUsageAvg and GPUUsageMax
+func applyGPUUsage(podMap map[podKey]*pod, resGPUUsageAvgOrMax []*prom.QueryResult, podUIDKeyMap map[podKey][]podKey, mode string) {
 	// Example PromQueryResult: {container="dcgmproftester12", namespace="gpu", pod="dcgmproftester3-deployment-fc89c8dd6-ph7z5"} 0.997307
-	for _, res := range resGPUUsageAvg {
+	for _, res := range resGPUUsageAvgOrMax {
 		key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
 		if err != nil {
-			log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU usage avg result missing field: %s", err)
+			log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU usage avg/max result missing field: %s", err)
 			continue
 		}
 
@@ -642,7 +650,7 @@ func applyGPUUsageAvg(podMap map[podKey]*pod, resGPUUsageAvg []*prom.QueryResult
 		for _, thisPod := range pods {
 			container, err := res.GetString("container")
 			if err != nil {
-				log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU usage avg query result missing 'container': %s", key)
+				log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU usage avg/max query result missing 'container': %s", key)
 				continue
 			}
 			if _, ok := thisPod.Allocations[container]; !ok {
@@ -650,7 +658,58 @@ func applyGPUUsageAvg(podMap map[podKey]*pod, resGPUUsageAvg []*prom.QueryResult
 			}
 
 			// DCGM_FI_PROF_GR_ENGINE_ACTIVE metric is a float between 0-1.
-			thisPod.Allocations[container].GPUUsageAverage = res.Values[0].Value
+			switch mode {
+			case GpuUsageAverageMode:
+
+				if thisPod.Allocations[container].GPUAllocation == nil {
+					thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{GPUUsageAverage: &res.Values[0].Value}
+				} else {
+					thisPod.Allocations[container].GPUAllocation.GPUUsageAverage = &res.Values[0].Value
+				}
+			case GpuUsageMaxMode:
+				thisPod.Allocations[container].RawAllocationOnly.GPUUsageMax = &res.Values[0].Value
+			case GpuIsSharedMode:
+				// if a container is using a GPU and it is shared, isGPUShared will be true
+				// if a container is using GPU and it is NOT shared, isGPUShared will be false
+				// if a container is NOT using a GPU, isGPUShared will be null
+				if res.Metric["resource"] == "nvidia_com_gpu_shared" {
+					trueVal := true
+					if res.Values[0].Value == 1 {
+						if thisPod.Allocations[container].GPUAllocation == nil {
+
+							thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{IsGPUShared: &trueVal}
+						} else {
+							thisPod.Allocations[container].GPUAllocation.IsGPUShared = &trueVal
+						}
+					}
+				} else if res.Metric["resource"] == "nvidia_com_gpu" {
+					falseVal := false
+					if res.Values[0].Value == 1 {
+						if thisPod.Allocations[container].GPUAllocation == nil {
+							thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{IsGPUShared: &falseVal}
+						} else {
+							thisPod.Allocations[container].GPUAllocation.IsGPUShared = &falseVal
+						}
+					}
+				} else {
+					continue
+				}
+			case GpuInfoMode:
+				if thisPod.Allocations[container].GPUAllocation == nil {
+					thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{
+						GPUDevice: getSanitizedDeviceName(fmt.Sprintf("%s", res.Metric["device_name"])),
+						GPUModel:  fmt.Sprintf("%s", res.Metric["modelName"]),
+						GPUUUID:   fmt.Sprintf("%s", res.Metric["UUID"]),
+					}
+				} else {
+					thisPod.Allocations[container].GPUAllocation.GPUDevice = getSanitizedDeviceName(fmt.Sprintf("%s", res.Metric["device"]))
+					thisPod.Allocations[container].GPUAllocation.GPUModel = fmt.Sprintf("%s", res.Metric["modelName"])
+					thisPod.Allocations[container].GPUAllocation.GPUUUID = fmt.Sprintf("%s", res.Metric["UUID"])
+				}
+
+			default:
+				log.DedupedInfof(10, "CostModel.ComputeAllocation: Unknown mode: %s", mode)
+			}
 		}
 	}
 }
@@ -702,7 +761,21 @@ func applyGPUsAllocated(podMap map[podKey]*pod, resGPUsRequested []*prom.QueryRe
 			// Therefore max(usage,request) will always equal request. In the
 			// future this may need to be refactored when building support for
 			// GPU Time Slicing.
-			thisPod.Allocations[container].GPURequestAverage = res.Values[0].Value
+
+			if thisPod.Allocations[container].GPUAllocation == nil {
+				thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{
+					GPURequestAverage: &res.Values[0].Value,
+				}
+			} else {
+				thisPod.Allocations[container].GPUAllocation.GPURequestAverage = &res.Values[0].Value
+				if thisPod.Allocations[container].GPUAllocation == nil {
+					thisPod.Allocations[container].GPUAllocation = &opencost.GPUAllocation{
+						GPURequestAverage: &res.Values[0].Value,
+					}
+				} else {
+					thisPod.Allocations[container].GPUAllocation.GPURequestAverage = &res.Values[0].Value
+				}
+			}
 		}
 	}
 }
@@ -2324,3 +2397,11 @@ func calculateStartAndEnd(result *prom.QueryResult, resolution time.Duration, wi
 
 	return s, e
 }
+
+func getSanitizedDeviceName(deviceName string) string {
+	if strings.Contains(deviceName, "nvidia") {
+		return "nvidia"
+	}
+
+	return deviceName
+}