dcgmdevice.go 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. package kubemodel
  2. import (
  3. "time"
  4. "github.com/opencost/opencost/core/pkg/log"
  5. "github.com/opencost/opencost/core/pkg/model/kubemodel"
  6. "github.com/opencost/opencost/core/pkg/source"
  7. )
  8. func (km *KubeModel) computeDCGMDevices(kms *kubemodel.KubeModelSet, start, end time.Time) error {
  9. grp := source.NewQueryGroup()
  10. metrics := km.ds.Metrics()
  11. dcgmInfoFuture := source.WithGroup(grp, metrics.QueryDCGMDeviceInfo(start, end))
  12. dcgmUptimeFuture := source.WithGroup(grp, metrics.QueryDCGMDeviceUptime(start, end))
  13. dcgmUsageAvgFuture := source.WithGroup(grp, metrics.QueryDCGMContainerUsageAvg(start, end))
  14. dcgmUsageMaxFuture := source.WithGroup(grp, metrics.QueryDCGMContainerUsageMax(start, end))
  15. deviceMap := make(map[string]*kubemodel.DCGMDevice)
  16. dcgmInfoResult, _ := dcgmInfoFuture.Await()
  17. for _, res := range dcgmInfoResult {
  18. if res.UUID == "" {
  19. continue
  20. }
  21. if _, ok := deviceMap[res.UUID]; ok {
  22. continue
  23. }
  24. deviceMap[res.UUID] = &kubemodel.DCGMDevice{
  25. UUID: res.UUID,
  26. Device: res.Device,
  27. ModelName: res.ModelName,
  28. PodUsages: make(map[string]kubemodel.DCGMPod),
  29. }
  30. }
  31. dcgmUptimeResult, _ := dcgmUptimeFuture.Await()
  32. for _, res := range dcgmUptimeResult {
  33. d, ok := deviceMap[res.UUID]
  34. if !ok {
  35. log.Warnf("DCGM uptime result for unknown device UUID '%s'", res.UUID)
  36. continue
  37. }
  38. s, e := res.GetStartEnd(start, end, km.ds.Resolution())
  39. d.Start = s
  40. d.End = e
  41. }
  42. dcgmUsageAvgResult, _ := dcgmUsageAvgFuture.Await()
  43. for _, res := range dcgmUsageAvgResult {
  44. device, ok := deviceMap[res.UUID]
  45. if !ok || res.PodUID == "" || res.Container == "" {
  46. continue
  47. }
  48. pod, ok := device.PodUsages[res.PodUID]
  49. if !ok {
  50. pod = kubemodel.DCGMPod{ContainerUsages: make(map[string]kubemodel.DCGMContainer)}
  51. }
  52. c := pod.ContainerUsages[res.Container]
  53. c.UsageAvg = res.Value
  54. pod.ContainerUsages[res.Container] = c
  55. device.PodUsages[res.PodUID] = pod
  56. }
  57. dcgmUsageMaxResult, _ := dcgmUsageMaxFuture.Await()
  58. for _, res := range dcgmUsageMaxResult {
  59. device, ok := deviceMap[res.UUID]
  60. if !ok || res.PodUID == "" || res.Container == "" {
  61. continue
  62. }
  63. pod, ok := device.PodUsages[res.PodUID]
  64. if !ok {
  65. pod = kubemodel.DCGMPod{ContainerUsages: make(map[string]kubemodel.DCGMContainer)}
  66. }
  67. c := pod.ContainerUsages[res.Container]
  68. c.UsageMax = res.Value
  69. pod.ContainerUsages[res.Container] = c
  70. device.PodUsages[res.PodUID] = pod
  71. }
  72. for _, device := range deviceMap {
  73. if err := kms.RegisterDCGMDevice(device); err != nil {
  74. log.Warnf("Failed to register DCGM device: %s", err.Error())
  75. }
  76. }
  77. return nil
  78. }