metrics.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. package prometheus
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "sort"
  8. "strings"
  9. v1 "k8s.io/api/core/v1"
  10. "k8s.io/client-go/kubernetes"
  11. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  12. )
  13. // GetPrometheusService returns the prometheus service name. The prometheus-community/prometheus chart @ v15.5.3 uses non-FQDN labels, unlike v22.6.2. This function checks for both labels.
  14. func GetPrometheusService(clientset kubernetes.Interface) (*v1.Service, bool, error) {
  15. redundantServices, err := clientset.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{
  16. LabelSelector: "app=prometheus,component=server,heritage=Helm",
  17. })
  18. if err != nil {
  19. return nil, false, err
  20. }
  21. upgradedServices, err := clientset.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{
  22. LabelSelector: "app.kubernetes.io/component=server,app.kubernetes.io/instance=prometheus,app.kubernetes.io/managed-by=Helm",
  23. })
  24. if err != nil {
  25. return nil, false, err
  26. }
  27. if len(redundantServices.Items) > 0 {
  28. return &redundantServices.Items[0], true, nil
  29. }
  30. if len(upgradedServices.Items) > 0 {
  31. return &upgradedServices.Items[0], true, nil
  32. }
  33. return nil, false, err
  34. }
  35. // getKubeStateMetricsService returns the prometheus service name
  36. func getKubeStateMetricsService(clientset kubernetes.Interface) (*v1.Service, bool, error) {
  37. services, err := clientset.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{
  38. LabelSelector: "app.kubernetes.io/name=kube-state-metrics",
  39. })
  40. if err != nil {
  41. return nil, false, err
  42. }
  43. if len(services.Items) == 0 {
  44. return nil, false, nil
  45. }
  46. return &services.Items[0], true, nil
  47. }
  48. type SimpleIngress struct {
  49. Name string `json:"name"`
  50. Namespace string `json:"namespace"`
  51. }
  52. // GetIngressesWithNGINXAnnotation gets an array of names for all ingresses controlled by
  53. // NGINX
  54. func GetIngressesWithNGINXAnnotation(clientset kubernetes.Interface) ([]SimpleIngress, error) {
  55. res := make([]SimpleIngress, 0)
  56. foundMap := make(map[string]bool)
  57. v1beta1IngressList, v1beta1Err := clientset.NetworkingV1beta1().Ingresses("").List(context.TODO(), metav1.ListOptions{})
  58. v1IngressList, v1Err := clientset.NetworkingV1().Ingresses("").List(context.TODO(), metav1.ListOptions{})
  59. if v1beta1Err != nil && v1Err != nil {
  60. return nil, fmt.Errorf("List ingresses error: %s, %s", v1beta1Err.Error(), v1Err.Error())
  61. }
  62. if v1beta1Err == nil && len(v1beta1IngressList.Items) > 0 {
  63. for _, ingress := range v1beta1IngressList.Items {
  64. ingressAnn, found := ingress.ObjectMeta.Annotations["kubernetes.io/ingress.class"]
  65. uid := fmt.Sprintf("%s/%s", ingress.ObjectMeta.Namespace, ingress.ObjectMeta.Name)
  66. if _, exists := foundMap[uid]; !exists && ((found && ingressAnn == "nginx") || *ingress.Spec.IngressClassName == "nginx") {
  67. res = append(res, SimpleIngress{
  68. Name: ingress.ObjectMeta.Name,
  69. Namespace: ingress.ObjectMeta.Namespace,
  70. })
  71. foundMap[uid] = true
  72. }
  73. }
  74. }
  75. if v1Err == nil && len(v1IngressList.Items) > 0 {
  76. for _, ingress := range v1IngressList.Items {
  77. ingressAnn, found := ingress.ObjectMeta.Annotations["kubernetes.io/ingress.class"]
  78. uid := fmt.Sprintf("%s/%s", ingress.ObjectMeta.Namespace, ingress.ObjectMeta.Name)
  79. if _, exists := foundMap[uid]; !exists && ((found && ingressAnn == "nginx") || *ingress.Spec.IngressClassName == "nginx") {
  80. res = append(res, SimpleIngress{
  81. Name: ingress.ObjectMeta.Name,
  82. Namespace: ingress.ObjectMeta.Namespace,
  83. })
  84. foundMap[uid] = true
  85. }
  86. }
  87. }
  88. return res, nil
  89. }
  90. type QueryOpts struct {
  91. // the name of the metric being queried for
  92. Metric string `schema:"metric"`
  93. ShouldSum bool `schema:"shouldsum"`
  94. Kind string `schema:"kind"`
  95. PodList []string `schema:"pods"`
  96. Name string `schema:"name"`
  97. Namespace string `schema:"namespace"`
  98. // start time (in unix timestamp) for prometheus results
  99. StartRange uint `schema:"startrange"`
  100. // end time time (in unix timestamp) for prometheus results
  101. EndRange uint `schema:"endrange"`
  102. Resolution string `schema:"resolution"`
  103. Percentile float64 `schema:"percentile"`
  104. }
  105. func QueryPrometheus(
  106. clientset kubernetes.Interface,
  107. service *v1.Service,
  108. opts *QueryOpts,
  109. ) ([]*promParsedSingletonQuery, error) {
  110. if len(service.Spec.Ports) == 0 {
  111. return nil, fmt.Errorf("prometheus service has no exposed ports to query")
  112. }
  113. selectionRegex, err := getSelectionRegex(opts.Kind, opts.Name)
  114. if err != nil {
  115. return nil, err
  116. }
  117. var podSelector string
  118. if len(opts.PodList) > 0 {
  119. podSelector = fmt.Sprintf(`namespace="%s",pod=~"%s",container!="POD",container!=""`, opts.Namespace, strings.Join(opts.PodList, "|"))
  120. } else {
  121. podSelector = fmt.Sprintf(`namespace="%s",pod=~"%s",container!="POD",container!=""`, opts.Namespace, selectionRegex)
  122. }
  123. query := ""
  124. if opts.Metric == "cpu" {
  125. query = fmt.Sprintf("rate(container_cpu_usage_seconds_total{%s}[5m])", podSelector)
  126. } else if opts.Metric == "memory" {
  127. query = fmt.Sprintf("container_memory_usage_bytes{%s}", podSelector)
  128. } else if opts.Metric == "network" {
  129. netPodSelector := fmt.Sprintf(`namespace="%s",pod=~"%s"`, opts.Namespace, selectionRegex)
  130. query = fmt.Sprintf("rate(container_network_receive_bytes_total{%s}[5m])", netPodSelector)
  131. } else if opts.Metric == "nginx:errors" {
  132. num := fmt.Sprintf(`(sum(rate(nginx_ingress_controller_requests{status=~"5.*",exported_namespace="%s",ingress=~"%s"}[5m]) OR sum(rate(nginx_ingress_controller_requests{status=~"5.*",namespace="%s",ingress=~"%s"}[5m])) OR on() vector(0))`, opts.Namespace, selectionRegex, opts.Namespace, selectionRegex)
  133. denom := fmt.Sprintf(`(sum(rate(nginx_ingress_controller_requests{exported_namespace="%s",ingress=~"%s"}[5m]) OR sum(rate(nginx_ingress_controller_requests{namespace="%s",ingress=~"%s"}[5m])) > 0)`, opts.Namespace, selectionRegex, opts.Namespace, selectionRegex)
  134. query = fmt.Sprintf(`%s / %s * 100 OR on() vector(0)`, num, denom)
  135. } else if opts.Metric == "nginx:latency" {
  136. num := fmt.Sprintf(`(sum(rate(nginx_ingress_controller_request_duration_seconds_sum{exported_namespace=~"%s",ingress=~"%s"}[5m]) OR sum(rate(nginx_ingress_controller_request_duration_seconds_sum{namespace=~"%s",ingress=~"%s"}[5m])) OR on() vector(0))`, opts.Namespace, selectionRegex, opts.Namespace, selectionRegex)
  137. denom := fmt.Sprintf(`(sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace=~"%s",ingress=~"%s"}[5m])) OR sum(rate(nginx_ingress_controller_request_duration_seconds_count{namespace=~"%s",ingress=~"%s"}[5m])))`, opts.Namespace, selectionRegex, opts.Namespace, selectionRegex)
  138. query = fmt.Sprintf(`%s / %s OR on() vector(0)`, num, denom)
  139. } else if opts.Metric == "nginx:latency-histogram" {
  140. query = fmt.Sprintf(`histogram_quantile(%f, (sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{status!="404",status!="500",exported_namespace=~"%s",ingress=~"%s"}[5m])) OR sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{status!="404",status!="500",namespace=~"%s",ingress=~"%s"}[5m]))) by (le, ingress))`, opts.Percentile, opts.Namespace, selectionRegex, opts.Namespace, selectionRegex)
  141. } else if opts.Metric == "nginx:status" {
  142. query, err = getNginxStatusQuery(opts, selectionRegex)
  143. if err != nil {
  144. return nil, err
  145. }
  146. } else if opts.Metric == "cpu_hpa_threshold" {
  147. // get the name of the kube hpa metric
  148. metricName, hpaMetricName := getKubeHPAMetricName(clientset, service, opts, "spec_target_metric")
  149. cpuMetricName := getKubeCPUMetricName(clientset, service, opts)
  150. ksmSvc, found, _ := getKubeStateMetricsService(clientset)
  151. appLabel := ""
  152. if found {
  153. appLabel = ksmSvc.ObjectMeta.Labels["app.kubernetes.io/instance"]
  154. }
  155. query = createHPAAbsoluteCPUThresholdQuery(cpuMetricName, metricName, selectionRegex, opts.Name, opts.Namespace, appLabel, hpaMetricName)
  156. } else if opts.Metric == "memory_hpa_threshold" {
  157. metricName, hpaMetricName := getKubeHPAMetricName(clientset, service, opts, "spec_target_metric")
  158. memMetricName := getKubeMemoryMetricName(clientset, service, opts)
  159. ksmSvc, found, _ := getKubeStateMetricsService(clientset)
  160. appLabel := ""
  161. if found {
  162. appLabel = ksmSvc.ObjectMeta.Labels["app.kubernetes.io/instance"]
  163. }
  164. query = createHPAAbsoluteMemoryThresholdQuery(memMetricName, metricName, selectionRegex, opts.Name, opts.Namespace, appLabel, hpaMetricName)
  165. } else if opts.Metric == "hpa_replicas" {
  166. metricName, hpaMetricName := getKubeHPAMetricName(clientset, service, opts, "status_current_replicas")
  167. ksmSvc, found, _ := getKubeStateMetricsService(clientset)
  168. appLabel := ""
  169. if found {
  170. appLabel = ksmSvc.ObjectMeta.Labels["app.kubernetes.io/instance"]
  171. }
  172. query = createHPACurrentReplicasQuery(metricName, opts.Name, opts.Namespace, appLabel, hpaMetricName)
  173. }
  174. if opts.ShouldSum {
  175. query = fmt.Sprintf("sum(%s)", query)
  176. }
  177. queryParams := map[string]string{
  178. "query": query,
  179. "start": fmt.Sprintf("%d", opts.StartRange),
  180. "end": fmt.Sprintf("%d", opts.EndRange),
  181. "step": opts.Resolution,
  182. }
  183. resp := clientset.CoreV1().Services(service.Namespace).ProxyGet(
  184. "http",
  185. service.Name,
  186. fmt.Sprintf("%d", service.Spec.Ports[0].Port),
  187. "/api/v1/query_range",
  188. queryParams,
  189. )
  190. rawQuery, err := resp.DoRaw(context.TODO())
  191. if err != nil {
  192. // in this case, it's very likely that prometheus doesn't contain any data for the given labels
  193. if strings.Contains(err.Error(), "rejected our request for an unknown reason") {
  194. return []*promParsedSingletonQuery{}, nil
  195. }
  196. return nil, err
  197. }
  198. return parseQuery(rawQuery, opts.Metric)
  199. }
  200. func getNginxStatusQuery(opts *QueryOpts, selectionRegex string) (string, error) {
  201. query := fmt.Sprintf(`round(sum by (status_code, ingress)(label_replace(increase(nginx_ingress_controller_requests{exported_namespace=~"%s",ingress="%s",service="%s"}[2m]), "status_code", "${1}xx", "status", "(.)..")), 0.001)`, opts.Namespace, selectionRegex, opts.Name)
  202. return query, nil
  203. }
  204. type promRawQuery struct {
  205. Data struct {
  206. Result []struct {
  207. Metric struct {
  208. Pod string `json:"pod,omitempty"`
  209. StatusCode string `json:"status_code,omitempty"`
  210. } `json:"metric,omitempty"`
  211. Values [][]interface{} `json:"values"`
  212. } `json:"result"`
  213. } `json:"data"`
  214. }
  215. type promParsedSingletonQueryResult struct {
  216. Date interface{} `json:"date,omitempty"`
  217. CPU interface{} `json:"cpu,omitempty"`
  218. Replicas interface{} `json:"replicas,omitempty"`
  219. Memory interface{} `json:"memory,omitempty"`
  220. Bytes interface{} `json:"bytes,omitempty"`
  221. ErrorPct interface{} `json:"error_pct,omitempty"`
  222. Latency interface{} `json:"latency,omitempty"`
  223. StatusCode1xx interface{} `json:"1xx,omitempty"`
  224. StatusCode2xx interface{} `json:"2xx,omitempty"`
  225. StatusCode3xx interface{} `json:"3xx,omitempty"`
  226. StatusCode4xx interface{} `json:"4xx,omitempty"`
  227. StatusCode5xx interface{} `json:"5xx,omitempty"`
  228. }
  229. type promParsedSingletonQuery struct {
  230. Pod string `json:"pod,omitempty"`
  231. Results []promParsedSingletonQueryResult `json:"results"`
  232. }
  233. func parseQuery(rawQuery []byte, metric string) ([]*promParsedSingletonQuery, error) {
  234. if metric == "nginx:status" {
  235. return parseNginxStatusQuery(rawQuery)
  236. }
  237. rawQueryObj := &promRawQuery{}
  238. err := json.Unmarshal(rawQuery, rawQueryObj)
  239. if err != nil {
  240. return nil, err
  241. }
  242. res := make([]*promParsedSingletonQuery, 0)
  243. for _, result := range rawQueryObj.Data.Result {
  244. singleton := &promParsedSingletonQuery{
  245. Pod: result.Metric.Pod,
  246. }
  247. singletonResults := make([]promParsedSingletonQueryResult, 0)
  248. for _, values := range result.Values {
  249. singletonResult := &promParsedSingletonQueryResult{
  250. Date: values[0],
  251. }
  252. if metric == "cpu" {
  253. singletonResult.CPU = values[1]
  254. } else if metric == "memory" {
  255. singletonResult.Memory = values[1]
  256. } else if metric == "network" {
  257. singletonResult.Bytes = values[1]
  258. } else if metric == "nginx:errors" {
  259. singletonResult.ErrorPct = values[1]
  260. } else if metric == "cpu_hpa_threshold" {
  261. singletonResult.CPU = values[1]
  262. } else if metric == "memory_hpa_threshold" {
  263. singletonResult.Memory = values[1]
  264. } else if metric == "hpa_replicas" {
  265. singletonResult.Replicas = values[1]
  266. } else if metric == "nginx:latency" || metric == "nginx:latency-histogram" {
  267. singletonResult.Latency = values[1]
  268. }
  269. singletonResults = append(singletonResults, *singletonResult)
  270. }
  271. singleton.Results = singletonResults
  272. res = append(res, singleton)
  273. }
  274. return res, nil
  275. }
  276. func parseNginxStatusQuery(rawQuery []byte) ([]*promParsedSingletonQuery, error) {
  277. rawQueryObj := &promRawQuery{}
  278. err := json.Unmarshal(rawQuery, rawQueryObj)
  279. if err != nil {
  280. return nil, err
  281. }
  282. singletonResultsByDate := make(map[string]*promParsedSingletonQueryResult, 0)
  283. keys := make([]string, 0)
  284. for _, result := range rawQueryObj.Data.Result {
  285. for _, values := range result.Values {
  286. date := values[0]
  287. dateKey := fmt.Sprintf("%v", date)
  288. if _, ok := singletonResultsByDate[dateKey]; !ok {
  289. keys = append(keys, dateKey)
  290. singletonResultsByDate[dateKey] = &promParsedSingletonQueryResult{
  291. Date: date,
  292. }
  293. }
  294. switch result.Metric.StatusCode {
  295. case "1xx":
  296. singletonResultsByDate[dateKey].StatusCode1xx = values[1]
  297. case "2xx":
  298. singletonResultsByDate[dateKey].StatusCode2xx = values[1]
  299. case "3xx":
  300. singletonResultsByDate[dateKey].StatusCode3xx = values[1]
  301. case "4xx":
  302. singletonResultsByDate[dateKey].StatusCode4xx = values[1]
  303. case "5xx":
  304. singletonResultsByDate[dateKey].StatusCode5xx = values[1]
  305. default:
  306. return nil, errors.New("invalid nginx status code")
  307. }
  308. }
  309. }
  310. sort.Strings(keys)
  311. singletonResults := make([]promParsedSingletonQueryResult, 0)
  312. for _, k := range keys {
  313. singletonResults = append(singletonResults, *singletonResultsByDate[k])
  314. }
  315. singleton := &promParsedSingletonQuery{
  316. Results: singletonResults,
  317. }
  318. res := make([]*promParsedSingletonQuery, 0)
  319. res = append(res, singleton)
  320. return res, nil
  321. }
  322. func getSelectionRegex(kind, name string) (string, error) {
  323. var suffix string
  324. switch strings.ToLower(kind) {
  325. case "deployment":
  326. suffix = "[a-z0-9]+(-[a-z0-9]+)*"
  327. case "statefulset":
  328. suffix = "[0-9]+"
  329. case "job":
  330. suffix = "[a-z0-9]+"
  331. case "cronjob":
  332. suffix = "[a-z0-9]+-[a-z0-9]+"
  333. case "ingress":
  334. return name, nil
  335. case "daemonset":
  336. suffix = "[a-z0-9]+"
  337. default:
  338. return "", fmt.Errorf("not a supported controller to query for metrics")
  339. }
  340. return fmt.Sprintf("%s-%s", name, suffix), nil
  341. }
  342. func createHPAAbsoluteCPUThresholdQuery(cpuMetricName, metricName, podSelectionRegex, hpaName, namespace, appLabel, hpaMetricName string) string {
  343. kubeMetricsPodSelectorOne := getKubeMetricsPodSelector(podSelectionRegex, namespace, "namespace")
  344. kubeMetricsPodSelectorTwo := getKubeMetricsPodSelector(podSelectionRegex, namespace, "exported_namespace")
  345. kubeMetricsHPASelectorOne := fmt.Sprintf(
  346. `%s="%s",namespace="%s",metric_name="cpu",metric_target_type="utilization"`,
  347. hpaMetricName,
  348. hpaName,
  349. namespace,
  350. )
  351. kubeMetricsHPASelectorTwo := fmt.Sprintf(
  352. `%s="%s",exported_namespace="%s",metric_name="cpu",metric_target_type="utilization"`,
  353. hpaMetricName,
  354. hpaName,
  355. namespace,
  356. )
  357. if cpuMetricName == "kube_pod_container_resource_requests" {
  358. kubeMetricsPodSelectorOne += `,resource="cpu",unit="core"`
  359. kubeMetricsPodSelectorTwo += `,resource="cpu",unit="core"`
  360. }
  361. // the kube-state-metrics queries are less prone to error if the field app_kubernetes_io_instance is matched
  362. // as well
  363. if appLabel != "" {
  364. kubeMetricsPodSelectorOne += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  365. kubeMetricsPodSelectorTwo += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  366. kubeMetricsHPASelectorOne += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  367. kubeMetricsHPASelectorTwo += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  368. }
  369. requestCPUOne := fmt.Sprintf(
  370. `avg by (%s) (label_replace(%s{%s},"%s", "%s", "", ""))`,
  371. hpaMetricName,
  372. cpuMetricName,
  373. kubeMetricsPodSelectorOne,
  374. hpaMetricName,
  375. hpaName,
  376. )
  377. targetCPUUtilThresholdOne := fmt.Sprintf(
  378. `%s{%s} / 50`,
  379. metricName,
  380. kubeMetricsHPASelectorOne,
  381. )
  382. requestCPUTwo := fmt.Sprintf(
  383. `avg by (%s) (label_replace(%s{%s},"%s", "%s", "", ""))`,
  384. hpaMetricName,
  385. cpuMetricName,
  386. kubeMetricsPodSelectorTwo,
  387. hpaMetricName,
  388. hpaName,
  389. )
  390. targetCPUUtilThresholdTwo := fmt.Sprintf(
  391. `%s{%s} / 50`,
  392. metricName,
  393. kubeMetricsHPASelectorTwo,
  394. )
  395. return fmt.Sprintf(
  396. `(%s * on(%s) %s) or (%s * on(%s) %s)`,
  397. requestCPUOne, hpaMetricName, targetCPUUtilThresholdOne,
  398. requestCPUTwo, hpaMetricName, targetCPUUtilThresholdTwo,
  399. )
  400. }
  401. func createHPAAbsoluteMemoryThresholdQuery(memMetricName, metricName, podSelectionRegex, hpaName, namespace, appLabel, hpaMetricName string) string {
  402. kubeMetricsPodSelectorOne := getKubeMetricsPodSelector(podSelectionRegex, namespace, "namespace")
  403. kubeMetricsPodSelectorTwo := getKubeMetricsPodSelector(podSelectionRegex, namespace, "exported_namespace")
  404. kubeMetricsHPASelectorOne := fmt.Sprintf(
  405. `%s="%s",namespace="%s",metric_name="memory",metric_target_type="utilization"`,
  406. hpaMetricName,
  407. hpaName,
  408. namespace,
  409. )
  410. kubeMetricsHPASelectorTwo := fmt.Sprintf(
  411. `%s="%s",exported_namespace="%s",metric_name="memory",metric_target_type="utilization"`,
  412. hpaMetricName,
  413. hpaName,
  414. namespace,
  415. )
  416. if memMetricName == "kube_pod_container_resource_requests" {
  417. kubeMetricsPodSelectorOne += `,resource="memory",unit="byte"`
  418. kubeMetricsPodSelectorTwo += `,resource="memory",unit="byte"`
  419. }
  420. // the kube-state-metrics queries are less prone to error if the field app_kubernetes_io_instance is matched
  421. // as well
  422. if appLabel != "" {
  423. kubeMetricsPodSelectorOne += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  424. kubeMetricsPodSelectorTwo += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  425. kubeMetricsHPASelectorOne += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  426. kubeMetricsHPASelectorTwo += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  427. }
  428. requestMemOne := fmt.Sprintf(
  429. `avg by (%s) (label_replace(%s{%s},"%s", "%s", "", ""))`,
  430. hpaMetricName,
  431. memMetricName,
  432. kubeMetricsPodSelectorOne,
  433. hpaMetricName,
  434. hpaName,
  435. )
  436. targetMemUtilThresholdOne := fmt.Sprintf(
  437. `%s{%s} / 50`,
  438. metricName,
  439. kubeMetricsHPASelectorOne,
  440. )
  441. requestMemTwo := fmt.Sprintf(
  442. `avg by (%s) (label_replace(%s{%s},"%s", "%s", "", ""))`,
  443. hpaMetricName,
  444. memMetricName,
  445. kubeMetricsPodSelectorTwo,
  446. hpaMetricName,
  447. hpaName,
  448. )
  449. targetMemUtilThresholdTwo := fmt.Sprintf(
  450. `%s{%s} / 50`,
  451. metricName,
  452. kubeMetricsHPASelectorTwo,
  453. )
  454. return fmt.Sprintf(
  455. `(%s * on(%s) %s) or (%s * on(%s) %s)`,
  456. requestMemOne, hpaMetricName, targetMemUtilThresholdOne,
  457. requestMemTwo, hpaMetricName, targetMemUtilThresholdTwo,
  458. )
  459. }
  460. func getKubeMetricsPodSelector(podSelectionRegex, namespace, namespaceLabel string) string {
  461. return fmt.Sprintf(
  462. `pod=~"%s",%s="%s",container!="POD",container!=""`,
  463. podSelectionRegex,
  464. namespaceLabel,
  465. namespace,
  466. )
  467. }
  468. func createHPACurrentReplicasQuery(metricName, hpaName, namespace, appLabel, hpaMetricName string) string {
  469. kubeMetricsHPASelectorOne := fmt.Sprintf(
  470. `%s="%s",namespace="%s"`,
  471. hpaMetricName,
  472. hpaName,
  473. namespace,
  474. )
  475. kubeMetricsHPASelectorTwo := fmt.Sprintf(
  476. `%s="%s",exported_namespace="%s"`,
  477. hpaMetricName,
  478. hpaName,
  479. namespace,
  480. )
  481. // the kube-state-metrics queries are less prone to error if the field app_kubernetes_io_instance is matched
  482. // as well
  483. if appLabel != "" {
  484. kubeMetricsHPASelectorOne += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  485. kubeMetricsHPASelectorTwo += fmt.Sprintf(`,app_kubernetes_io_instance="%s"`, appLabel)
  486. }
  487. return fmt.Sprintf(
  488. `(%s{%s}) or (%s{%s})`,
  489. metricName,
  490. kubeMetricsHPASelectorOne,
  491. metricName,
  492. kubeMetricsHPASelectorTwo,
  493. )
  494. }
  495. type promRawValuesQuery struct {
  496. Status string `json:"status"`
  497. Data []string `json:"data"`
  498. }
  499. // getKubeHPAMetricName performs a "best guess" for the name of the kube HPA metric,
  500. // which was renamed to kube_horizontalpodautoscaler... in later versions of kube-state-metrics.
  501. // we query Prometheus for a list of metric names to see if any match the new query
  502. // value, otherwise we return the deprecated name.
  503. func getKubeHPAMetricName(
  504. clientset kubernetes.Interface,
  505. service *v1.Service,
  506. opts *QueryOpts,
  507. suffix string,
  508. ) (string, string) {
  509. queryParams := map[string]string{
  510. "match[]": fmt.Sprintf("kube_horizontalpodautoscaler_%s", suffix),
  511. "start": fmt.Sprintf("%d", opts.StartRange),
  512. "end": fmt.Sprintf("%d", opts.EndRange),
  513. }
  514. resp := clientset.CoreV1().Services(service.Namespace).ProxyGet(
  515. "http",
  516. service.Name,
  517. fmt.Sprintf("%d", service.Spec.Ports[0].Port),
  518. "/api/v1/label/__name__/values",
  519. queryParams,
  520. )
  521. rawQuery, err := resp.DoRaw(context.TODO())
  522. if err != nil {
  523. return fmt.Sprintf("kube_hpa_%s", suffix), "hpa"
  524. }
  525. rawQueryObj := &promRawValuesQuery{}
  526. json.Unmarshal(rawQuery, rawQueryObj)
  527. if rawQueryObj.Status == "success" && len(rawQueryObj.Data) == 1 {
  528. return fmt.Sprintf("kube_horizontalpodautoscaler_%s", suffix), "horizontalpodautoscaler"
  529. }
  530. return fmt.Sprintf("kube_hpa_%s", suffix), "hpa"
  531. }
  532. func getKubeCPUMetricName(
  533. clientset kubernetes.Interface,
  534. service *v1.Service,
  535. opts *QueryOpts,
  536. ) string {
  537. queryParams := map[string]string{
  538. "match[]": "kube_pod_container_resource_requests",
  539. "start": fmt.Sprintf("%d", opts.StartRange),
  540. "end": fmt.Sprintf("%d", opts.EndRange),
  541. }
  542. resp := clientset.CoreV1().Services(service.Namespace).ProxyGet(
  543. "http",
  544. service.Name,
  545. fmt.Sprintf("%d", service.Spec.Ports[0].Port),
  546. "/api/v1/label/__name__/values",
  547. queryParams,
  548. )
  549. rawQuery, err := resp.DoRaw(context.TODO())
  550. if err != nil {
  551. return "kube_pod_container_resource_requests_cpu_cores"
  552. }
  553. rawQueryObj := &promRawValuesQuery{}
  554. json.Unmarshal(rawQuery, rawQueryObj)
  555. if rawQueryObj.Status == "success" && len(rawQueryObj.Data) == 1 {
  556. return "kube_pod_container_resource_requests"
  557. }
  558. return "kube_pod_container_resource_requests_cpu_cores"
  559. }
  560. func getKubeMemoryMetricName(
  561. clientset kubernetes.Interface,
  562. service *v1.Service,
  563. opts *QueryOpts,
  564. ) string {
  565. queryParams := map[string]string{
  566. "match[]": "kube_pod_container_resource_requests",
  567. "start": fmt.Sprintf("%d", opts.StartRange),
  568. "end": fmt.Sprintf("%d", opts.EndRange),
  569. }
  570. resp := clientset.CoreV1().Services(service.Namespace).ProxyGet(
  571. "http",
  572. service.Name,
  573. fmt.Sprintf("%d", service.Spec.Ports[0].Port),
  574. "/api/v1/label/__name__/values",
  575. queryParams,
  576. )
  577. rawQuery, err := resp.DoRaw(context.TODO())
  578. if err != nil {
  579. return "kube_pod_container_resource_requests_memory_bytes"
  580. }
  581. rawQueryObj := &promRawValuesQuery{}
  582. json.Unmarshal(rawQuery, rawQueryObj)
  583. if rawQueryObj.Status == "success" && len(rawQueryObj.Data) == 1 {
  584. return "kube_pod_container_resource_requests"
  585. }
  586. return "kube_pod_container_resource_requests_memory_bytes"
  587. }