config.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. package prom
  2. import (
  3. "crypto/tls"
  4. "crypto/x509"
  5. "fmt"
  6. "os"
  7. "time"
  8. coreenv "github.com/opencost/opencost/core/pkg/env"
  9. "github.com/opencost/opencost/core/pkg/log"
  10. "github.com/opencost/opencost/modules/prometheus-source/pkg/env"
  11. restclient "k8s.io/client-go/rest"
  12. certutil "k8s.io/client-go/util/cert"
  13. )
  14. const (
  15. ServiceCA = `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`
  16. )
  17. type OpenCostPrometheusConfig struct {
  18. ServerEndpoint string
  19. Version string
  20. IsOffsetResolution bool
  21. ClientConfig *PrometheusClientConfig
  22. ScrapeInterval time.Duration
  23. JobName string
  24. Offset string
  25. QueryOffset time.Duration
  26. MaxQueryDuration time.Duration
  27. ClusterLabel string
  28. ClusterID string
  29. ClusterFilter string
  30. DataResolution time.Duration
  31. DataResolutionMinutes int
  32. }
  33. func (ocpc *OpenCostPrometheusConfig) IsRateLimitRetryEnabled() bool {
  34. return ocpc.ClientConfig.RateLimitRetryOpts != nil
  35. }
  36. // NewOpenCostPrometheusConfigFromEnv creates a new OpenCostPrometheusConfig from environment variables.
  37. func NewOpenCostPrometheusConfigFromEnv() (*OpenCostPrometheusConfig, error) {
  38. serverEndpoint := env.GetPrometheusServerEndpoint()
  39. if serverEndpoint == "" {
  40. return nil, fmt.Errorf("no address for prometheus set in $%s", env.PrometheusServerEndpointEnvVar)
  41. }
  42. queryConcurrency := env.GetMaxQueryConcurrency()
  43. log.Infof("Prometheus Client Max Concurrency set to %d", queryConcurrency)
  44. timeout := env.GetPrometheusQueryTimeout()
  45. keepAlive := env.GetPrometheusKeepAlive()
  46. tlsHandshakeTimeout := env.GetPrometheusTLSHandshakeTimeout()
  47. jobName := env.GetJobName()
  48. scrapeInterval := env.GetScrapeInterval()
  49. maxQueryDuration := env.GetPrometheusMaxQueryDuration()
  50. clusterId := coreenv.GetClusterID()
  51. clusterLabel := env.GetPromClusterLabel()
  52. clusterFilter := env.GetPromClusterFilter()
  53. var rateLimitRetryOpts *RateLimitRetryOpts = nil
  54. if env.IsPrometheusRetryOnRateLimitResponse() {
  55. rateLimitRetryOpts = &RateLimitRetryOpts{
  56. MaxRetries: env.GetPrometheusRetryOnRateLimitMaxRetries(),
  57. DefaultRetryWait: env.GetPrometheusRetryOnRateLimitDefaultWait(),
  58. }
  59. }
  60. auth := &ClientAuth{
  61. Username: env.GetDBBasicAuthUsername(),
  62. Password: env.GetDBBasicAuthUserPassword(),
  63. BearerToken: env.GetDBBearerToken(),
  64. }
  65. // We will use the service account token and service-ca.crt to authenticate with the Prometheus server via kube-rbac-proxy.
  66. // We need to ensure that the service account has the necessary permissions to access the Prometheus server by binding it to the appropriate role.
  67. var tlsCaCert *x509.CertPool
  68. var tlsClientCertificates []tls.Certificate
  69. if env.IsKubeRbacProxyEnabled() {
  70. restConfig, err := restclient.InClusterConfig()
  71. if err != nil {
  72. log.Errorf("%s was set to true but failed to get in-cluster config: %s", env.KubeRbacProxyEnabledEnvVar, err)
  73. }
  74. auth.BearerToken = restConfig.BearerToken
  75. tlsCaCert, err = certutil.NewPool(ServiceCA)
  76. if err != nil {
  77. log.Errorf("%s was set to true but failed to load service-ca.crt: %s", env.KubeRbacProxyEnabledEnvVar, err)
  78. }
  79. } else if env.IsPromMtlsAuthEnabled() {
  80. tlsCaCert = x509.NewCertPool()
  81. // The /etc/ssl/cert.pem location is correct for Alpine Linux, the container base used here
  82. systemCa, err := os.ReadFile("/etc/ssl/cert.pem")
  83. if err != nil {
  84. log.Errorf("mTLS options were set but failed to load system CAs: %s", err)
  85. } else {
  86. tlsCaCert.AppendCertsFromPEM(systemCa)
  87. }
  88. mTlsCa, err := os.ReadFile(env.GetPromMtlsAuthCAFile())
  89. if err != nil {
  90. log.Errorf("mTLS options were set but failed to load PROM_MTLS_AUTH_CA_FILE: %s", err)
  91. } else {
  92. tlsCaCert.AppendCertsFromPEM(mTlsCa)
  93. }
  94. mTlsKeyPair, err := tls.LoadX509KeyPair(env.GetPromMtlsAuthCrtFile(), env.GetPromMtlsAuthKeyFile())
  95. if err != nil {
  96. log.Errorf("mTLS options were set but failed to load PROM_MTLS_AUTH_CRT_FILE or PROM_MTLS_AUTH_KEY_FILE: %s", err)
  97. } else {
  98. tlsClientCertificates = []tls.Certificate{mTlsKeyPair}
  99. }
  100. }
  101. dataResolution := env.GetPrometheusQueryResolution()
  102. // Ensuring if data resolution is less than 60s default it to 1m
  103. resolutionMinutes := int(dataResolution.Minutes())
  104. if resolutionMinutes == 0 {
  105. resolutionMinutes = 1
  106. }
  107. clientConfig := &PrometheusClientConfig{
  108. Timeout: timeout,
  109. KeepAlive: keepAlive,
  110. TLSHandshakeTimeout: tlsHandshakeTimeout,
  111. TLSInsecureSkipVerify: env.IsInsecureSkipVerify(),
  112. RootCAs: tlsCaCert,
  113. ClientCertificates: tlsClientCertificates,
  114. RateLimitRetryOpts: rateLimitRetryOpts,
  115. Auth: auth,
  116. QueryConcurrency: queryConcurrency,
  117. QueryLogFile: "",
  118. HeaderXScopeOrgId: env.GetPrometheusHeaderXScopeOrgId(),
  119. }
  120. return &OpenCostPrometheusConfig{
  121. ServerEndpoint: serverEndpoint,
  122. Version: "0.0.0",
  123. IsOffsetResolution: false,
  124. ClientConfig: clientConfig,
  125. ScrapeInterval: scrapeInterval,
  126. JobName: jobName,
  127. Offset: "",
  128. QueryOffset: time.Duration(0),
  129. MaxQueryDuration: maxQueryDuration,
  130. ClusterLabel: clusterLabel,
  131. ClusterID: clusterId,
  132. ClusterFilter: clusterFilter,
  133. DataResolution: dataResolution,
  134. DataResolutionMinutes: resolutionMinutes,
  135. }, nil
  136. }