瀏覽代碼

Merge pull request #1981 from nik-kc/nik/core-320

Fix prometheus diagnostics errors
Niko Kovacevic 2 年之前
父節點
當前提交
97f0cd3a14
共有 1 個文件被更改,包括 8 次插入8 次删除
  1. 8 8
      pkg/prom/diagnostics.go

+ 8 - 8
pkg/prom/diagnostics.go

@@ -60,14 +60,14 @@ const DocumentationBaseURL = "https://github.com/kubecost/docs/blob/master/diagn
 var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnosticDefinition{
 	CAdvisorDiagnosticMetricID: {
 		ID:          CAdvisorDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(container_cpu_usage_seconds_total[5m] %s)`,
+		QueryFmt:    `absent_over_time(container_cpu_usage_seconds_total{%s}[5m] %s)`,
 		Label:       "cAdvisor metrics available",
 		Description: "Determine if cAdvisor metrics are available during last 5 minutes.",
 		DocLink:     fmt.Sprintf("%s#cadvisor-metrics-available", DocumentationBaseURL),
 	},
 	KSMDiagnosticMetricID: {
 		ID:          KSMDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte"}[5m] %s)`,
+		QueryFmt:    `absent_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", %s}[5m] %s)`,
 		Label:       "Kube-state-metrics available",
 		Description: "Determine if metrics from kube-state-metrics are available during last 5 minutes.",
 		DocLink:     fmt.Sprintf("%s#kube-state-metrics-metrics-available", DocumentationBaseURL),
@@ -87,7 +87,7 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
 	},
 	CAdvisorLabelDiagnosticMetricID: {
 		ID:          CAdvisorLabelDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(container_cpu_usage_seconds_total{container!="",pod!="",%s}[5m] %s)`,
+		QueryFmt:    `absent_over_time(container_cpu_usage_seconds_total{container!="",pod!="", %s}[5m] %s)`,
 		Label:       "Expected cAdvisor labels available",
 		Description: "Determine if expected cAdvisor labels are present during last 5 minutes.",
 		DocLink:     fmt.Sprintf("%s#cadvisor-metrics-available", DocumentationBaseURL),
@@ -107,33 +107,33 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
 	},
 	CPUThrottlingDiagnosticMetricID: {
 		ID: CPUThrottlingDiagnosticMetricID,
-		QueryFmt: `avg(increase(container_cpu_cfs_throttled_periods_total{container="cost-model",%s}[10m] %s)) by (container_name, pod_name, namespace)
+		QueryFmt: `avg(increase(container_cpu_cfs_throttled_periods_total{container="cost-model", %s}[10m] %s)) by (container_name, pod_name, namespace)
 	/ avg(increase(container_cpu_cfs_periods_total{container="cost-model",%s}[10m] %s)) by (container_name, pod_name, namespace) > 0.2`,
 		Label:       "Kubecost is not CPU throttled",
 		Description: "Kubecost loading slowly? A kubecost component might be CPU throttled",
 	},
 	KubecostRecordingRuleCPUUsageID: {
 		ID:          KubecostRecordingRuleCPUUsageID,
-		QueryFmt:    `absent_over_time(kubecost_container_cpu_usage_irate[5m] %s)`,
+		QueryFmt:    `absent_over_time(kubecost_container_cpu_usage_irate{%s}[5m] %s)`,
 		Label:       "Kubecost's CPU usage recording rule is set up",
 		Description: "If the 'kubecost_container_cpu_usage_irate' recording rule is not set up, Allocation pipeline build may put pressure on your Prometheus due to the use of a subquery.",
 		DocLink:     "https://docs.kubecost.com/install-and-configure/install/custom-prom",
 	},
 	CAdvisorWorkingSetBytesMetricID: {
 		ID:          CAdvisorWorkingSetBytesMetricID,
-		QueryFmt:    `absent_over_time(container_memory_working_set_bytes{container="cost-model", container!="POD", instance!=""}[5m] %s)`,
+		QueryFmt:    `absent_over_time(container_memory_working_set_bytes{container="cost-model", container!="POD", instance!="", %s}[5m] %s)`,
 		Label:       "cAdvisor working set bytes metrics available",
 		Description: "Determine if cAdvisor working set bytes metrics are available during last 5 minutes.",
 	},
 	KSMCPUCapacityMetricID: {
 		ID:          KSMCPUCapacityMetricID,
-		QueryFmt:    `absent_over_time(kube_node_status_capacity_cpu_cores[5m] %s)`,
+		QueryFmt:    `absent_over_time(kube_node_status_capacity_cpu_cores{%s}[5m] %s)`,
 		Label:       "KSM had CPU capacity during the last 5 minutes",
 		Description: "Determine if KSM had CPU capacity during the last 5 minutes",
 	},
 	KSMAllocatableCPUCoresMetricID: {
 		ID:          KSMAllocatableCPUCoresMetricID,
-		QueryFmt:    `absent_over_time(kube_node_status_allocatable_cpu_cores[5m] %s)`,
+		QueryFmt:    `absent_over_time(kube_node_status_allocatable_cpu_cores{%s}[5m] %s)`,
 		Label:       "KSM had allocatable CPU cores during the last 5 minutes",
 		Description: "Determine if KSM had allocatable CPU cores during the last 5 minutes",
 	},