gpumemory_test.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. package synthetic
  2. import (
  3. "math"
  4. "testing"
  5. "time"
  6. "github.com/opencost/opencost/modules/collector-source/pkg/metric"
  7. )
  8. // capturingUpdater records the UpdateSet handed to the next stage of the
  9. // synthesizer pipeline.
  10. type capturingUpdater struct {
  11. set *metric.UpdateSet
  12. }
  13. func (c *capturingUpdater) Update(set *metric.UpdateSet) {
  14. c.set = set
  15. }
  16. func gpuFBUpdate(name string, uuid, migInstance string, value float64) *metric.Update {
  17. return &metric.Update{
  18. Name: name,
  19. Labels: map[string]string{
  20. "UUID": uuid,
  21. "GPU_I_ID": migInstance,
  22. "pod_uid": "pod-uuid1",
  23. "container": "container1",
  24. "namespace": "namespace1",
  25. "pod": "pod1",
  26. },
  27. Value: value,
  28. }
  29. }
  30. func TestGPUMemoryUsedRatioSynthesizer(t *testing.T) {
  31. now := time.Now()
  32. t.Run("joins used and free into a ratio", func(t *testing.T) {
  33. s := NewGPUMemoryUsedRatioSynthesizer()
  34. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "", 12000))
  35. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-1", "", 4000))
  36. updates := s.Synthesize()
  37. if len(updates) != 1 {
  38. t.Fatalf("expected 1 synthetic update, got %d", len(updates))
  39. }
  40. got := updates[0]
  41. if got.Name != metric.OpencostGPUMemoryUsedRatio {
  42. t.Errorf("Name = %q, want %q", got.Name, metric.OpencostGPUMemoryUsedRatio)
  43. }
  44. if got.Value != 0.75 {
  45. t.Errorf("Value = %v, want 0.75", got.Value)
  46. }
  47. if got.Labels["UUID"] != "GPU-1" || got.Labels["container"] != "container1" {
  48. t.Errorf("labels not carried through: %v", got.Labels)
  49. }
  50. })
  51. t.Run("MIG instances synthesize independently", func(t *testing.T) {
  52. s := NewGPUMemoryUsedRatioSynthesizer()
  53. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "1", 5000))
  54. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-1", "1", 5000))
  55. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "2", 2000))
  56. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-1", "2", 8000))
  57. updates := s.Synthesize()
  58. if len(updates) != 2 {
  59. t.Fatalf("expected 2 synthetic updates, got %d", len(updates))
  60. }
  61. byInstance := map[string]float64{}
  62. for _, u := range updates {
  63. byInstance[u.Labels["GPU_I_ID"]] = u.Value
  64. }
  65. if byInstance["1"] != 0.5 || byInstance["2"] != 0.2 {
  66. t.Errorf("per-instance ratios = %v, want {1:0.5, 2:0.2}", byInstance)
  67. }
  68. })
  69. t.Run("missing half emits nothing", func(t *testing.T) {
  70. s := NewGPUMemoryUsedRatioSynthesizer()
  71. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "", 12000))
  72. if updates := s.Synthesize(); len(updates) != 0 {
  73. t.Errorf("expected no updates without FB_FREE, got %v", updates)
  74. }
  75. })
  76. t.Run("invalid totals emit nothing", func(t *testing.T) {
  77. cases := map[string][2]float64{
  78. "zero total": {0, 0},
  79. "negative used": {-1, 100},
  80. "NaN free": {100, math.NaN()},
  81. "infinite total": {math.Inf(1), 100},
  82. // negative free with positive total would yield ratio > 1
  83. // (100/(100-50) = 2.0) if only the total were validated
  84. "negative free": {100, -50},
  85. }
  86. for name, values := range cases {
  87. s := NewGPUMemoryUsedRatioSynthesizer()
  88. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "", values[0]))
  89. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-1", "", values[1]))
  90. if updates := s.Synthesize(); len(updates) != 0 {
  91. t.Errorf("%s: expected no updates, got %v", name, updates)
  92. }
  93. }
  94. })
  95. t.Run("unrelated metrics are ignored", func(t *testing.T) {
  96. s := NewGPUMemoryUsedRatioSynthesizer()
  97. s.Process(now, gpuFBUpdate(metric.DCGMFIPROFGRENGINEACTIVE, "GPU-1", "", 0.9))
  98. if updates := s.Synthesize(); len(updates) != 0 {
  99. t.Errorf("expected no updates for unrelated metric, got %v", updates)
  100. }
  101. })
  102. t.Run("joins correctly through the MetricSynthesizers pipeline", func(t *testing.T) {
  103. // Exercises the real dispatch path: MetricSynthesizers.Update copies
  104. // each Update into a loop-body variable and passes its address to
  105. // Process. The body-scoped declaration yields a distinct allocation
  106. // per iteration, so stored pointers never alias; this test pins that
  107. // by pushing two devices' used/free pairs through one UpdateSet and
  108. // asserting each synthesized ratio reflects its own samples.
  109. captured := &capturingUpdater{}
  110. pipeline := NewMetricSynthesizers(captured, NewGPUMemoryUsedRatioSynthesizer())
  111. pipeline.Update(&metric.UpdateSet{
  112. Timestamp: now,
  113. Updates: []metric.Update{
  114. *gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "", 12000),
  115. *gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-1", "", 4000),
  116. *gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-2", "", 2000),
  117. *gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-2", "", 8000),
  118. },
  119. })
  120. ratios := map[string]float64{}
  121. for _, u := range captured.set.Updates {
  122. if u.Name == metric.OpencostGPUMemoryUsedRatio {
  123. ratios[u.Labels["UUID"]] = u.Value
  124. }
  125. }
  126. if len(ratios) != 2 {
  127. t.Fatalf("expected 2 synthesized ratios, got %d: %v", len(ratios), ratios)
  128. }
  129. if ratios["GPU-1"] != 0.75 || ratios["GPU-2"] != 0.2 {
  130. t.Errorf("ratios = %v, want {GPU-1:0.75, GPU-2:0.2}", ratios)
  131. }
  132. // original updates must pass through untouched alongside synthetics
  133. if len(captured.set.Updates) != 6 {
  134. t.Errorf("expected 4 originals + 2 synthetics, got %d", len(captured.set.Updates))
  135. }
  136. })
  137. t.Run("Clear resets state between scrapes", func(t *testing.T) {
  138. s := NewGPUMemoryUsedRatioSynthesizer()
  139. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBUSED, "GPU-1", "", 12000))
  140. s.Clear()
  141. s.Process(now, gpuFBUpdate(metric.DCGMFIDEVFBFREE, "GPU-1", "", 4000))
  142. if updates := s.Synthesize(); len(updates) != 0 {
  143. t.Errorf("expected no join across Clear, got %v", updates)
  144. }
  145. })
  146. }