Sfoglia il codice sorgente

fix: Negative idle cost on promless mode (#3393)

Signed-off-by: Sparsh <sparsh.raj30@gmail.com>
Co-authored-by: Alex Meijer <ameijer@users.noreply.github.com>
Sparsh Raj 6 mesi fa
parent
commit
9bf926c15c

+ 18 - 0
core/pkg/opencost/totals.go

@@ -35,8 +35,10 @@ type AllocationTotals struct {
 	Count                          int       `json:"count"`
 	CPUCost                        float64   `json:"cpuCost"`
 	CPUCostAdjustment              float64   `json:"cpuCostAdjustment"`
+	CPUCoreHours                   float64   `json:"cpuCoreHours"`
 	GPUCost                        float64   `json:"gpuCost"`
 	GPUCostAdjustment              float64   `json:"gpuCostAdjustment"`
+	GPUHours                       float64   `json:"gpuHours"`
 	LoadBalancerCost               float64   `json:"loadBalancerCost"`
 	LoadBalancerCostAdjustment     float64   `json:"loadBalancerCostAdjustment"`
 	NetworkCost                    float64   `json:"networkCost"`
@@ -45,6 +47,7 @@ type AllocationTotals struct {
 	PersistentVolumeCostAdjustment float64   `json:"persistentVolumeCostAdjustment"`
 	RAMCost                        float64   `json:"ramCost"`
 	RAMCostAdjustment              float64   `json:"ramCostAdjustment"`
+	RAMByteHours                   float64   `json:"ramByteHours"`
 	// UnmountedPVCost is used to track how much of the cost in
 	// PersistentVolumeCost is for an unmounted PV. It is not additive of that
 	// field, and need not be sent in API responses.
@@ -71,8 +74,10 @@ func (art *AllocationTotals) Clone() *AllocationTotals {
 		Count:                          art.Count,
 		CPUCost:                        art.CPUCost,
 		CPUCostAdjustment:              art.CPUCostAdjustment,
+		CPUCoreHours:                   art.CPUCoreHours,
 		GPUCost:                        art.GPUCost,
 		GPUCostAdjustment:              art.GPUCostAdjustment,
+		GPUHours:                       art.GPUHours,
 		LoadBalancerCost:               art.LoadBalancerCost,
 		LoadBalancerCostAdjustment:     art.LoadBalancerCostAdjustment,
 		NetworkCost:                    art.NetworkCost,
@@ -81,6 +86,7 @@ func (art *AllocationTotals) Clone() *AllocationTotals {
 		PersistentVolumeCostAdjustment: art.PersistentVolumeCostAdjustment,
 		RAMCost:                        art.RAMCost,
 		RAMCostAdjustment:              art.RAMCostAdjustment,
+		RAMByteHours:                   art.RAMByteHours,
 	}
 }
 
@@ -162,9 +168,11 @@ func ComputeAllocationTotals(as *AllocationSet, prop string) map[string]*Allocat
 
 		arts[key].CPUCost += alloc.CPUCost
 		arts[key].CPUCostAdjustment += alloc.CPUCostAdjustment
+		arts[key].CPUCoreHours += alloc.CPUCoreHours
 
 		arts[key].GPUCost += alloc.GPUCost
 		arts[key].GPUCostAdjustment += alloc.GPUCostAdjustment
+		arts[key].GPUHours += alloc.GPUHours
 
 		arts[key].LoadBalancerCost += alloc.LoadBalancerCost
 		arts[key].LoadBalancerCostAdjustment += alloc.LoadBalancerCostAdjustment
@@ -177,6 +185,7 @@ func ComputeAllocationTotals(as *AllocationSet, prop string) map[string]*Allocat
 
 		arts[key].RAMCost += alloc.RAMCost
 		arts[key].RAMCostAdjustment += alloc.RAMCostAdjustment
+		arts[key].RAMByteHours += alloc.RAMByteHours
 	}
 
 	return arts
@@ -217,14 +226,17 @@ type AssetTotals struct {
 	ClusterManagementCostAdjustment float64   `json:"clusterManagementCostAdjustment"`
 	CPUCost                         float64   `json:"cpuCost"`
 	CPUCostAdjustment               float64   `json:"cpuCostAdjustment"`
+	CPUCoreHours                    float64   `json:"cpuCoreHours"`
 	GPUCost                         float64   `json:"gpuCost"`
 	GPUCostAdjustment               float64   `json:"gpuCostAdjustment"`
+	GPUHours                        float64   `json:"gpuHours"`
 	LoadBalancerCost                float64   `json:"loadBalancerCost"`
 	LoadBalancerCostAdjustment      float64   `json:"loadBalancerCostAdjustment"`
 	PersistentVolumeCost            float64   `json:"persistentVolumeCost"`
 	PersistentVolumeCostAdjustment  float64   `json:"persistentVolumeCostAdjustment"`
 	RAMCost                         float64   `json:"ramCost"`
 	RAMCostAdjustment               float64   `json:"ramCostAdjustment"`
+	RAMByteHours                    float64   `json:"ramByteHours"`
 	PrivateLoadBalancer             bool      `json:"privateLoadBalancer"`
 }
 
@@ -254,14 +266,17 @@ func (art *AssetTotals) Clone() *AssetTotals {
 		ClusterManagementCostAdjustment: art.ClusterManagementCostAdjustment,
 		CPUCost:                         art.CPUCost,
 		CPUCostAdjustment:               art.CPUCostAdjustment,
+		CPUCoreHours:                    art.CPUCoreHours,
 		GPUCost:                         art.GPUCost,
 		GPUCostAdjustment:               art.GPUCostAdjustment,
+		GPUHours:                        art.GPUHours,
 		LoadBalancerCost:                art.LoadBalancerCost,
 		LoadBalancerCostAdjustment:      art.LoadBalancerCostAdjustment,
 		PersistentVolumeCost:            art.PersistentVolumeCost,
 		PersistentVolumeCostAdjustment:  art.PersistentVolumeCostAdjustment,
 		RAMCost:                         art.RAMCost,
 		RAMCostAdjustment:               art.RAMCostAdjustment,
+		RAMByteHours:                    art.RAMByteHours,
 		PrivateLoadBalancer:             art.PrivateLoadBalancer,
 	}
 }
@@ -411,14 +426,17 @@ func ComputeAssetTotals(as *AssetSet, byAsset bool) map[string]*AssetTotals {
 		// TotalCPUCost will be discounted cost + adjustment
 		arts[key].CPUCost += discountedCPUCost
 		arts[key].CPUCostAdjustment += cpuCostAdjustment
+		arts[key].CPUCoreHours += node.CPUCoreHours
 
 		// TotalRAMCost will be discounted cost + adjustment
 		arts[key].RAMCost += discountedRAMCost
 		arts[key].RAMCostAdjustment += ramCostAdjustment
+		arts[key].RAMByteHours += node.RAMByteHours
 
 		// TotalGPUCost will be discounted cost + adjustment
 		arts[key].GPUCost += node.GPUCost
 		arts[key].GPUCostAdjustment += gpuCostAdjustment
+		arts[key].GPUHours += node.GPUHours
 	}
 
 	for _, lb := range as.LoadBalancers {

+ 60 - 28
pkg/costmodel/cluster_helpers.go

@@ -2,6 +2,7 @@ package costmodel
 
 import (
 	"fmt"
+	"math"
 	"strconv"
 	"time"
 
@@ -73,23 +74,39 @@ func buildCPUCostMap(
 
 		var cpuCost float64
 
-		if customPricingEnabled && customPricingConfig != nil {
+		// Start with the value from the data source (e.g., collector or Prometheus)
+		cpuCost = result.Data[0].Value
+
+		// If custom pricing is enabled or the data source value is invalid, use custom pricing
+		if (customPricingEnabled && customPricingConfig != nil) || cpuCost == 0 || math.IsNaN(cpuCost) {
+			if customPricingConfig != nil {
+				var customCPUStr string
+				if spot, ok := preemptible[key]; ok && spot {
+					customCPUStr = customPricingConfig.SpotCPU
+				} else {
+					customCPUStr = customPricingConfig.CPU
+				}
 
-			var customCPUStr string
-			if spot, ok := preemptible[key]; ok && spot {
-				customCPUStr = customPricingConfig.SpotCPU
+				customCPUCost, err := strconv.ParseFloat(customCPUStr, 64)
+				if err != nil {
+					log.Warnf("ClusterNodes: error parsing custom CPU price: %s", customCPUStr)
+				} else {
+					// Log the reason for using custom pricing
+					if cpuCost == 0 {
+						log.DedupedInfof(10, "ClusterNodes: node %s has invalid CPU cost (0) from data source; falling back to custom pricing: %f", name, customCPUCost)
+					} else if math.IsNaN(cpuCost) {
+						log.DedupedInfof(10, "ClusterNodes: node %s has invalid CPU cost (NaN) from data source; falling back to custom pricing: %f", name, customCPUCost)
+					} else {
+						log.DedupedInfof(10, "ClusterNodes: node %s using custom pricing: %f", name, customCPUCost)
+					}
+					cpuCost = customCPUCost
+				}
 			} else {
-				customCPUStr = customPricingConfig.CPU
-			}
-
-			customCPUCost, err := strconv.ParseFloat(customCPUStr, 64)
-			if err != nil {
-				log.Warnf("ClusterNodes: error parsing custom CPU price: %s", customCPUStr)
+				// custom pricing config is nil, but we needed it because cpuCost was invalid
+				if cpuCost == 0 || math.IsNaN(cpuCost) {
+					log.Warnf("ClusterNodes: node %s has invalid CPU cost (0 or NaN), but was unable to fall back to custom pricing because it was nil", name)
+				}
 			}
-			cpuCost = customCPUCost
-
-		} else {
-			cpuCost = result.Data[0].Value
 		}
 
 		clusterAndNameToType[keyNon] = nodeType
@@ -141,23 +158,38 @@ func buildRAMCostMap(
 
 		var ramCost float64
 
-		if customPricingEnabled && customPricingConfig != nil {
+		// Start with the value from the data source (e.g., collector or Prometheus)
+		ramCost = result.Data[0].Value
+
+		// If custom pricing is enabled or the data source value is invalid, use custom pricing
+		if (customPricingEnabled && customPricingConfig != nil) || ramCost == 0 || math.IsNaN(ramCost) {
+			if customPricingConfig != nil {
+				var customRAMStr string
+				if spot, ok := preemptible[key]; ok && spot {
+					customRAMStr = customPricingConfig.SpotRAM
+				} else {
+					customRAMStr = customPricingConfig.RAM
+				}
 
-			var customRAMStr string
-			if spot, ok := preemptible[key]; ok && spot {
-				customRAMStr = customPricingConfig.SpotRAM
+				customRAMCost, err := strconv.ParseFloat(customRAMStr, 64)
+				if err != nil {
+					log.Warnf("ClusterNodes: error parsing custom RAM price: %s", customRAMStr)
+				} else {
+					// Log the reason for using custom pricing
+					if ramCost == 0 {
+						log.DedupedInfof(10, "ClusterNodes: node %s has invalid RAM cost (0) from data source; falling back to custom pricing: %f", name, customRAMCost)
+					} else if math.IsNaN(ramCost) {
+						log.DedupedInfof(10, "ClusterNodes: node %s has invalid RAM cost (NaN) from data source; falling back to custom pricing: %f", name, customRAMCost)
+					} else {
+						log.DedupedInfof(10, "ClusterNodes: node %s using custom pricing: %f", name, customRAMCost)
+					}
+					ramCost = customRAMCost
+				}
 			} else {
-				customRAMStr = customPricingConfig.RAM
-			}
-
-			customRAMCost, err := strconv.ParseFloat(customRAMStr, 64)
-			if err != nil {
-				log.Warnf("ClusterNodes: error parsing custom RAM price: %s", customRAMStr)
+				if ramCost == 0 || math.IsNaN(ramCost) {
+					log.Warnf("ClusterNodes: node %s has invalid RAM cost (0 or NaN), but was unable to fall back to custom pricing because it was nil", name)
+				}
 			}
-			ramCost = customRAMCost
-
-		} else {
-			ramCost = result.Data[0].Value
 		}
 
 		clusterAndNameToType[keyNon] = nodeType

+ 48 - 21
pkg/costmodel/cluster_helpers_test.go

@@ -901,24 +901,6 @@ func TestAssetCustompricing(t *testing.T) {
 
 	startTimestamp := float64(windowStart.Unix())
 
-	nodePromResult := []*source.QueryResult{
-		source.NewQueryResult(
-			map[string]interface{}{
-				"cluster_id":    "cluster1",
-				"node":          "node1",
-				"instance_type": "type1",
-				"provider_id":   "provider1",
-			},
-			[]*util.Vector{
-				{
-					Timestamp: startTimestamp,
-					Value:     0.5,
-				},
-			},
-			source.DefaultResultKeys(),
-		),
-	}
-
 	pvCostPromResult := []*source.QueryResult{
 		source.NewQueryResult(
 			map[string]interface{}{
@@ -1052,6 +1034,7 @@ func TestAssetCustompricing(t *testing.T) {
 		name             string
 		customPricingMap map[string]string
 		expectedPricing  map[string]float64
+		zeroCollector    bool // If true, simulate collector returning 0 (promless mode)
 	}{
 		{
 			name:             "No custom pricing",
@@ -1062,6 +1045,7 @@ func TestAssetCustompricing(t *testing.T) {
 				"GPU":     1.0,
 				"Storage": 1.0,
 			},
+			zeroCollector: false,
 		},
 		{
 			name: "Custom pricing enabled",
@@ -1078,6 +1062,25 @@ func TestAssetCustompricing(t *testing.T) {
 				"GPU":     1.369864,              // 500.0 / 730 * 2
 				"Storage": 0.000137,              // 0.1 / 730 * (1073741824.0 / 1024 / 1024 / 1024) * (60 / 60) => 0.1 / 730 * 1 * 1
 			},
+			zeroCollector: false,
+		},
+		{
+			name: "Collector returns 0, fallback to custom pricing",
+			customPricingMap: map[string]string{
+				"CPU":     "20.0",
+				"RAM":     "4.0",
+				"GPU":     "500.0",
+				"Storage": "0.1",
+				// NOTE: customPricesEnabled is NOT set to "true"
+				// This tests the fallback behavior when collector returns 0
+			},
+			expectedPricing: map[string]float64{
+				"CPU":     0.027397,              // 20.0 / 730 (fallback from 0)
+				"RAM":     5.102716386318207e-12, // 4.0 / 730 / 1024^3 (fallback from 0)
+				"GPU":     0.0,                   // GPU doesn't have fallback logic
+				"Storage": 1.0,                   // Storage uses separate PV pricing (pvCostPromResult), not affected by node pricing
+			},
+			zeroCollector: true,
 		},
 	}
 
@@ -1088,10 +1091,34 @@ func TestAssetCustompricing(t *testing.T) {
 			}
 			testProvider.UpdateConfigFromConfigMap(testCase.customPricingMap)
 
+			// Create test data - if zeroCollector is true, simulate collector returning 0
+			testValue := 0.5
+			if testCase.zeroCollector {
+				testValue = 0.0
+			}
+
+			zeroCollectorPromResult := []*source.QueryResult{
+				source.NewQueryResult(
+					map[string]interface{}{
+						"cluster_id":    "cluster1",
+						"node":          "node1",
+						"instance_type": "type1",
+						"provider_id":   "provider1",
+					},
+					[]*util.Vector{
+						{
+							Timestamp: startTimestamp,
+							Value:     testValue,
+						},
+					},
+					source.DefaultResultKeys(),
+				),
+			}
+
 			testPreemptible := make(map[NodeIdentifier]bool)
-			nodeCpuResult := source.DecodeAll(nodePromResult, source.DecodeNodeCPUPricePerHrResult)
-			nodeRamResult := source.DecodeAll(nodePromResult, source.DecodeNodeRAMPricePerGiBHrResult)
-			nodeGpuResult := source.DecodeAll(nodePromResult, source.DecodeNodeGPUPricePerHrResult)
+			nodeCpuResult := source.DecodeAll(zeroCollectorPromResult, source.DecodeNodeCPUPricePerHrResult)
+			nodeRamResult := source.DecodeAll(zeroCollectorPromResult, source.DecodeNodeRAMPricePerGiBHrResult)
+			nodeGpuResult := source.DecodeAll(zeroCollectorPromResult, source.DecodeNodeGPUPricePerHrResult)
 
 			cpuMap, _ := buildCPUCostMap(nodeCpuResult, testProvider, testPreemptible)
 			ramMap, _ := buildRAMCostMap(nodeRamResult, testProvider, testPreemptible)

+ 256 - 3
pkg/costmodel/costmodel.go

@@ -1779,11 +1779,236 @@ func (cm *CostModel) QueryAllocation(window opencost.Window, step time.Duration,
 	return asr, nil
 }
 
+// debugAssetAllocationMismatch analyzes and logs discrepancies between asset and allocation data
+// This helps diagnose pricing issues and negative idle costs
+func debugAssetAllocationMismatch(allocSet *opencost.AllocationSet, assetSet *opencost.AssetSet) {
+	log.Debugf("=== Asset-Allocation Debug Analysis for window %s ===", allocSet.Window)
+
+	// Build maps for efficient lookup
+	assetsByProviderID := make(map[string]*opencost.Node)
+	assetsByNode := make(map[string]*opencost.Node)
+	for _, asset := range assetSet.Nodes {
+		if asset.Properties != nil && asset.Properties.ProviderID != "" {
+			assetsByProviderID[asset.Properties.ProviderID] = asset
+		}
+		if asset.Properties != nil && asset.Properties.Name != "" {
+			assetsByNode[asset.Properties.Name] = asset
+		}
+	}
+
+	// 1) Find allocations without matching assets (by ProviderID)
+	allocsWithoutAssets := make([]*opencost.Allocation, 0)
+	for _, alloc := range allocSet.Allocations {
+		if alloc.Properties == nil {
+			continue
+		}
+		providerID := alloc.Properties.ProviderID
+		if providerID == "" {
+			continue
+		}
+		if _, found := assetsByProviderID[providerID]; !found {
+			allocsWithoutAssets = append(allocsWithoutAssets, alloc)
+		}
+	}
+
+	if len(allocsWithoutAssets) > 0 {
+		log.Debugf("Found %d allocations without matching assets:", len(allocsWithoutAssets))
+		for _, alloc := range allocsWithoutAssets {
+			log.Debugf("  - Allocation: %s, Node: %s, ProviderID: %s, TotalCost: %.4f",
+				alloc.Name,
+				alloc.Properties.Node,
+				alloc.Properties.ProviderID,
+				alloc.TotalCost())
+		}
+	}
+
+	// 2) Sum allocations per node and compare to node asset costs
+	allocTotalsByNode := make(map[string]*struct {
+		CPUCost      float64
+		GPUCost      float64
+		RAMCost      float64
+		TotalCost    float64
+		CPUCoreHours float64
+		GPUHours     float64
+		RAMByteHours float64
+		Count        int
+	})
+
+	for _, alloc := range allocSet.Allocations {
+		if alloc.Properties == nil || alloc.Properties.Node == "" {
+			continue
+		}
+		node := alloc.Properties.Node
+
+		if _, exists := allocTotalsByNode[node]; !exists {
+			allocTotalsByNode[node] = &struct {
+				CPUCost      float64
+				GPUCost      float64
+				RAMCost      float64
+				TotalCost    float64
+				CPUCoreHours float64
+				GPUHours     float64
+				RAMByteHours float64
+				Count        int
+			}{}
+		}
+
+		allocTotalsByNode[node].CPUCost += alloc.CPUCost
+		allocTotalsByNode[node].GPUCost += alloc.GPUCost
+		allocTotalsByNode[node].RAMCost += alloc.RAMCost
+		allocTotalsByNode[node].TotalCost += alloc.TotalCost()
+		allocTotalsByNode[node].CPUCoreHours += alloc.CPUCoreHours
+		allocTotalsByNode[node].GPUHours += alloc.GPUHours
+		allocTotalsByNode[node].RAMByteHours += alloc.RAMByteHours
+		allocTotalsByNode[node].Count++
+	}
+
+	log.Debugf("Per-Node Asset vs Allocation Comparison:")
+	for node, allocTotals := range allocTotalsByNode {
+		asset, hasAsset := assetsByNode[node]
+		if !hasAsset {
+			log.Debugf("  Node %s: Has allocations but NO ASSET (allocations: %d, total cost: %.4f)",
+				node, allocTotals.Count, allocTotals.TotalCost)
+			continue
+		}
+
+		assetCPU := asset.CPUCost
+		assetGPU := asset.GPUCost
+		assetRAM := asset.RAMCost
+		assetTotal := asset.TotalCost()
+
+		cpuDiff := assetCPU - allocTotals.CPUCost
+		gpuDiff := assetGPU - allocTotals.GPUCost
+		ramDiff := assetRAM - allocTotals.RAMCost
+		totalDiff := assetTotal - allocTotals.TotalCost
+
+		status := "OK"
+		if cpuDiff < 0 || gpuDiff < 0 || ramDiff < 0 {
+			status = "NEGATIVE_IDLE"
+		}
+
+		log.Debugf("  Node %s [%s]:", node, status)
+		log.Debugf("    Asset:      CPU=%.4f, GPU=%.4f, RAM=%.4f, Total=%.4f",
+			assetCPU, assetGPU, assetRAM, assetTotal)
+		log.Debugf("    Allocation: CPU=%.4f, GPU=%.4f, RAM=%.4f, Total=%.4f (%d allocs)",
+			allocTotals.CPUCost, allocTotals.GPUCost, allocTotals.RAMCost, allocTotals.TotalCost, allocTotals.Count)
+		log.Debugf("    Difference: CPU=%.4f, GPU=%.4f, RAM=%.4f, Total=%.4f",
+			cpuDiff, gpuDiff, ramDiff, totalDiff)
+
+		if asset.Adjustment != 0 {
+			log.Debugf("    Adjustment: %.4f", asset.Adjustment)
+		}
+
+		// Compare resource amounts vs costs: higher resources should have higher costs
+		assetCPUHours := asset.CPUCoreHours
+		assetGPUHours := asset.GPUHours
+		assetRAMBytes := asset.RAMByteHours
+
+		allocCPUHours := allocTotals.CPUCoreHours
+		allocGPUHours := allocTotals.GPUHours
+		allocRAMBytes := allocTotals.RAMByteHours
+
+		// Warn if resource amounts and costs are inverted (higher resources but lower costs)
+		if assetCPUHours > 0 && allocCPUHours > 0 {
+			if assetCPUHours > allocCPUHours && assetCPU < allocTotals.CPUCost {
+				log.Warnf("Resource-cost inversion for %s CPU: asset has MORE hours (%.2f) but LESS cost (%.4f) than allocations (hours: %.2f, cost: %.4f)",
+					node, assetCPUHours, assetCPU, allocCPUHours, allocTotals.CPUCost)
+			} else if assetCPUHours < allocCPUHours && assetCPU > allocTotals.CPUCost {
+				log.Warnf("Resource-cost inversion for %s CPU: asset has LESS hours (%.2f) but MORE cost (%.4f) than allocations (hours: %.2f, cost: %.4f)",
+					node, assetCPUHours, assetCPU, allocCPUHours, allocTotals.CPUCost)
+			}
+		}
+
+		if assetGPUHours > 0 && allocGPUHours > 0 {
+			if assetGPUHours > allocGPUHours && assetGPU < allocTotals.GPUCost {
+				log.Warnf("Resource-cost inversion for %s GPU: asset has MORE hours (%.2f) but LESS cost (%.4f) than allocations (hours: %.2f, cost: %.4f)",
+					node, assetGPUHours, assetGPU, allocGPUHours, allocTotals.GPUCost)
+			} else if assetGPUHours < allocGPUHours && assetGPU > allocTotals.GPUCost {
+				log.Warnf("Resource-cost inversion for %s GPU: asset has LESS hours (%.2f) but MORE cost (%.4f) than allocations (hours: %.2f, cost: %.4f)",
+					node, assetGPUHours, assetGPU, allocGPUHours, allocTotals.GPUCost)
+			}
+		}
+
+		if assetRAMBytes > 0 && allocRAMBytes > 0 {
+			if assetRAMBytes > allocRAMBytes && assetRAM < allocTotals.RAMCost {
+				log.Warnf("Resource-cost inversion for %s RAM: asset has MORE byte-hours (%.2f) but LESS cost (%.4f) than allocations (byte-hours: %.2f, cost: %.4f)",
+					node, assetRAMBytes, assetRAM, allocRAMBytes, allocTotals.RAMCost)
+			} else if assetRAMBytes < allocRAMBytes && assetRAM > allocTotals.RAMCost {
+				log.Warnf("Resource-cost inversion for %s RAM: asset has LESS byte-hours (%.2f) but MORE cost (%.4f) than allocations (byte-hours: %.2f, cost: %.4f)",
+					node, assetRAMBytes, assetRAM, allocRAMBytes, allocTotals.RAMCost)
+			}
+		}
+
+		// Log resource amounts for debugging
+		log.Debugf("    Resource Hours:")
+		log.Debugf("      Asset:      CPU=%.2f hours, GPU=%.2f hours, RAM=%.2f byte-hours",
+			assetCPUHours, assetGPUHours, assetRAMBytes)
+		log.Debugf("      Allocation: CPU=%.2f hours, GPU=%.2f hours, RAM=%.2f byte-hours",
+			allocCPUHours, allocGPUHours, allocRAMBytes)
+	}
+
+	// 3) Sum total of all node costs
+	totalNodeCPU := 0.0
+	totalNodeGPU := 0.0
+	totalNodeRAM := 0.0
+	totalNodeCost := 0.0
+	nodeCount := 0
+
+	for _, asset := range assetSet.Nodes {
+		totalNodeCPU += asset.CPUCost
+		totalNodeGPU += asset.GPUCost
+		totalNodeRAM += asset.RAMCost
+		totalNodeCost += asset.TotalCost()
+		nodeCount++
+	}
+
+	log.Debugf("Total Node Asset Costs:")
+	log.Debugf("  Nodes: %d", nodeCount)
+	log.Debugf("  CPU:   %.4f", totalNodeCPU)
+	log.Debugf("  GPU:   %.4f", totalNodeGPU)
+	log.Debugf("  RAM:   %.4f", totalNodeRAM)
+	log.Debugf("  Total: %.4f", totalNodeCost)
+
+	// 4) Sum total of all allocation costs
+	totalAllocCPU := 0.0
+	totalAllocGPU := 0.0
+	totalAllocRAM := 0.0
+	totalAllocCost := 0.0
+	allocCount := 0
+
+	for _, alloc := range allocSet.Allocations {
+		totalAllocCPU += alloc.CPUCost
+		totalAllocGPU += alloc.GPUCost
+		totalAllocRAM += alloc.RAMCost
+		totalAllocCost += alloc.TotalCost()
+		allocCount++
+	}
+
+	log.Debugf("Total Allocation Costs:")
+	log.Debugf("  Allocations: %d", allocCount)
+	log.Debugf("  CPU:         %.4f", totalAllocCPU)
+	log.Debugf("  GPU:         %.4f", totalAllocGPU)
+	log.Debugf("  RAM:         %.4f", totalAllocRAM)
+	log.Debugf("  Total:       %.4f", totalAllocCost)
+
+	// Overall comparison
+	log.Debugf("Overall Asset vs Allocation:")
+	log.Debugf("  CPU Difference:   %.4f (Asset - Allocation)", totalNodeCPU-totalAllocCPU)
+	log.Debugf("  GPU Difference:   %.4f (Asset - Allocation)", totalNodeGPU-totalAllocGPU)
+	log.Debugf("  RAM Difference:   %.4f (Asset - Allocation)", totalNodeRAM-totalAllocRAM)
+	log.Debugf("  Total Difference: %.4f (Asset - Allocation)", totalNodeCost-totalAllocCost)
+
+	log.Debugf("=== End Asset-Allocation Debug Analysis ===")
+}
+
 func computeIdleAllocations(allocSet *opencost.AllocationSet, assetSet *opencost.AssetSet, idleByNode bool) (*opencost.AllocationSet, error) {
 	if !allocSet.Window.Equal(assetSet.Window) {
 		return nil, fmt.Errorf("cannot compute idle allocations for mismatched sets: %s does not equal %s", allocSet.Window, assetSet.Window)
 	}
 
+	// Run debug analysis when log level is debug
+	debugAssetAllocationMismatch(allocSet, assetSet)
+
 	var allocTotals map[string]*opencost.AllocationTotals
 	var assetTotals map[string]*opencost.AssetTotals
 
@@ -1817,7 +2042,35 @@ func computeIdleAllocations(allocSet *opencost.AllocationSet, assetSet *opencost
 		// Insert one idle allocation for each key (whether by node or
 		// by cluster), defined as the difference between the total
 		// asset cost and the allocated cost per-resource.
+		// Idle costs are clamped to zero to prevent negative values that can occur
+		// when asset total costs are less than allocated costs. This can happen when:
+		// - Pricing data is unavailable (promless mode, API failures, missing price data)
+		// - Custom pricing is misconfigured or returns zero values
+		// - Cloud billing adjustments reduce asset costs below allocation costs
+		// - Allocation calculations exceed asset costs due to timing or rounding
 		name := fmt.Sprintf("%s/%s", key, opencost.IdleSuffix)
+
+		cpuIdleCost := assetTotal.TotalCPUCost() - allocTotal.TotalCPUCost()
+		gpuIdleCost := assetTotal.TotalGPUCost() - allocTotal.TotalGPUCost()
+		ramIdleCost := assetTotal.TotalRAMCost() - allocTotal.TotalRAMCost()
+
+		// Clamp idle costs to zero to prevent negative idle allocations
+		if cpuIdleCost < 0 {
+			log.Warnf("Negative CPU idle cost detected for %s: asset total (%.4f) < allocation total (%.4f), clamping to 0",
+				key, assetTotal.TotalCPUCost(), allocTotal.TotalCPUCost())
+			cpuIdleCost = 0
+		}
+		if gpuIdleCost < 0 {
+			log.Warnf("Negative GPU idle cost detected for %s: asset total (%.4f) < allocation total (%.4f), clamping to 0",
+				key, assetTotal.TotalGPUCost(), allocTotal.TotalGPUCost())
+			gpuIdleCost = 0
+		}
+		if ramIdleCost < 0 {
+			log.Warnf("Negative RAM idle cost detected for %s: asset total (%.4f) < allocation total (%.4f), clamping to 0",
+				key, assetTotal.TotalRAMCost(), allocTotal.TotalRAMCost())
+			ramIdleCost = 0
+		}
+
 		err := idleSet.Insert(&opencost.Allocation{
 			Name:   name,
 			Window: idleSet.Window.Clone(),
@@ -1828,9 +2081,9 @@ func computeIdleAllocations(allocSet *opencost.AllocationSet, assetSet *opencost
 			},
 			Start:   assetTotal.Start,
 			End:     assetTotal.End,
-			CPUCost: assetTotal.TotalCPUCost() - allocTotal.TotalCPUCost(),
-			GPUCost: assetTotal.TotalGPUCost() - allocTotal.TotalGPUCost(),
-			RAMCost: assetTotal.TotalRAMCost() - allocTotal.TotalRAMCost(),
+			CPUCost: cpuIdleCost,
+			GPUCost: gpuIdleCost,
+			RAMCost: ramIdleCost,
 		})
 		if err != nil {
 			return nil, fmt.Errorf("failed to insert idle allocation %s: %w", name, err)