dcgm.go 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. package scrape
  2. import (
  3. "fmt"
  4. "regexp"
  5. "github.com/opencost/opencost/core/pkg/clustercache"
  6. "github.com/opencost/opencost/core/pkg/log"
  7. "github.com/opencost/opencost/modules/collector-source/pkg/event"
  8. "github.com/opencost/opencost/modules/collector-source/pkg/metric"
  9. "github.com/opencost/opencost/modules/collector-source/pkg/scrape/target"
  10. v1 "k8s.io/api/core/v1"
  11. )
  12. var dcgmRegex = regexp.MustCompile("(?i)(.*dcgm-exporter.*)")
  13. func newDCGMScrapper(clusterCache clustercache.ClusterCache) Scraper {
  14. tp := newDCGMTargetProvider(clusterCache)
  15. return newDCGMTargetScraper(tp)
  16. }
  17. func newDCGMTargetScraper(provider target.TargetProvider) *TargetScraper {
  18. return newTargetScrapper(
  19. event.DCGMScraperName,
  20. provider,
  21. []string{
  22. metric.DCGMFIPROFGRENGINEACTIVE,
  23. metric.DCGMFIDEVDECUTIL,
  24. },
  25. true)
  26. }
  27. type DCGMTargetProvider struct {
  28. clusterCache clustercache.ClusterCache
  29. port int
  30. }
  31. func newDCGMTargetProvider(clusterCache clustercache.ClusterCache) *DCGMTargetProvider {
  32. return &DCGMTargetProvider{
  33. clusterCache: clusterCache,
  34. port: 9400,
  35. }
  36. }
  37. func (p *DCGMTargetProvider) GetTargets() []target.ScrapeTarget {
  38. // NOTE: The proper way to discover these targets is to first identify a Service that
  39. // NOTE: matches a specific selector. Then, locate the Endpoints kubernetes resource associated
  40. // NOTE: with that Service. This Endpoints resource has a list of all the targetted pods and their
  41. // NOTE: addresses. We do _not_ have the Endpoints resource on our cluster cache at the moment,
  42. // NOTE: so we'll perform this lookup ourselves.
  43. pods := p.clusterCache.GetAllPods()
  44. var targets []target.ScrapeTarget
  45. for _, pod := range pods {
  46. if pod.Status.Phase == v1.PodRunning && isDCGM(pod.Labels) {
  47. log.Debugf("DCGM: found target: http://%s:%d/metrics", pod.Status.PodIP, p.port)
  48. t := target.NewUrlTarget(fmt.Sprintf("http://%s:%d/metrics", pod.Status.PodIP, p.port))
  49. targets = append(targets, t)
  50. }
  51. }
  52. return targets
  53. }
  54. func isDCGM(labels map[string]string) bool {
  55. keys := []string{
  56. "app",
  57. "app.kubernetes.io/name",
  58. "app.kubernetes.io/component",
  59. }
  60. for _, key := range keys {
  61. if value, ok := labels[key]; ok {
  62. if dcgmRegex.MatchString(value) {
  63. return true
  64. }
  65. }
  66. }
  67. return false
  68. }