| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- package allocation
- // Description
- // Check Pod Labels from API Match results from Promethues
- import (
- "testing"
- "time"
- "github.com/opencost/opencost-integration-tests/pkg/api"
- "github.com/opencost/opencost-integration-tests/pkg/prometheus"
- )
- func TestPodLabels(t *testing.T) {
- apiObj := api.NewAPI()
- testCases := []struct {
- name string
- window string
- aggregate string
- accumulate string
- includeAggregatedMetadata string
- }{
- {
- name: "Today",
- window: "24h",
- aggregate: "pod",
- accumulate: "true",
- includeAggregatedMetadata: "true",
- },
- }
- t.Logf("testCases: %v", testCases)
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- queryEnd := time.Now().UTC().Truncate(time.Hour).Add(time.Hour)
- endTime := queryEnd.Unix()
- // -------------------------------
- // Pod Running Time
- // avg(avg_over_time(kube_pod_container_status_running{%s}[%s])) by (pod)
- // -------------------------------
- client := prometheus.NewClient()
- promPodRunningInfoInput := prometheus.PrometheusInput{}
- promPodRunningInfoInput.Metric = "kube_pod_container_status_running"
- promPodRunningInfoInput.Function = []string{"avg_over_time", "avg"}
- promPodRunningInfoInput.QueryWindow = tc.window
- promPodRunningInfoInput.AggregateBy = []string{"pod"}
- promPodRunningInfoInput.Time = &endTime
- promPodRunningInfo, err := client.RunPromQLQuery(promPodRunningInfoInput, t)
- if err != nil {
- t.Fatalf("Error while calling Prometheus API %v", err)
- }
- podRunningStatus := make(map[string]int)
- for _, promPodRunningInfoItem := range promPodRunningInfo.Data.Result {
- pod := promPodRunningInfoItem.Metric.Pod
- runningStatus := int(promPodRunningInfoItem.Value.Value)
- // kube_pod_labels and kube_nodespace_labels might hold labels for dead pods as well
- // filter the ones that are running because allocation filters for that
- podRunningStatus[pod] = runningStatus
- }
- // Pod Info - narrow the "running" set to pods that were actually
- // running at the query endTime using a 1m resolution subquery,
- // matching the pattern used in pod_annotations_test.go.
- // Pods that only briefly existed earlier in the 24h window may
- // not appear in /allocation, and comparing their labels yields
- // false negatives that have nothing to do with label
- // propagation.
- promPodInfoInput := prometheus.PrometheusInput{}
- promPodInfoInput.Metric = "kube_pod_container_status_running"
- promPodInfoInput.MetricNotEqualTo = "0"
- promPodInfoInput.AggregateBy = []string{"container", "pod", "namespace", "node"}
- promPodInfoInput.Function = []string{"avg"}
- promPodInfoInput.AggregateWindow = tc.window
- promPodInfoInput.AggregateResolution = podStatusResolution
- promPodInfoInput.Time = &endTime
- podInfo, err := client.RunPromQLQuery(promPodInfoInput, t)
- if err != nil {
- t.Fatalf("Error while calling Prometheus API %v", err)
- }
- alive := make(map[string]bool)
- for _, r := range podInfo.Data.Result {
- alive[r.Metric.Pod] = true
- }
- // -------------------------------
- // Pod Labels
- // avg_over_time(kube_pod_labels{%s}[%s])
- // -------------------------------
- promLabelInfoInput := prometheus.PrometheusInput{}
- promLabelInfoInput.Metric = "kube_pod_labels"
- promLabelInfoInput.Function = []string{"avg_over_time"}
- promLabelInfoInput.QueryWindow = tc.window
- promLabelInfoInput.Time = &endTime
- promlabelInfo, err := client.RunPromQLQuery(promLabelInfoInput, t)
- if err != nil {
- t.Fatalf("Error while calling Prometheus API %v", err)
- }
- // Store Results in a Pod Map
- type PodData struct {
- Pod string
- Alive bool
- InAlloc bool
- PromLabels map[string]string
- AllocLabels map[string]string
- }
- podMap := make(map[string]*PodData)
- // Store Prometheus Pod Prometheus Results
- for _, promlabel := range promlabelInfo.Data.Result {
- pod := promlabel.Metric.Pod
- labels := promlabel.Metric.Labels
- // Skip Dead Pods
- if podRunningStatus[pod] == 0 {
- continue
- }
- podMap[pod] = &PodData{
- Pod: pod,
- Alive: alive[pod],
- PromLabels: labels,
- }
- }
- // API Response
- apiResponse, err := apiObj.GetAllocation(api.AllocationRequest{
- Window: tc.window,
- Aggregate: tc.aggregate,
- Accumulate: tc.accumulate,
- IncludeAggregatedMetadata: tc.includeAggregatedMetadata,
- })
- if err != nil {
- t.Fatalf("Error while calling Allocation API %v", err)
- }
- if apiResponse.Code != 200 {
- t.Errorf("API returned non-200 code")
- }
- // Store Allocation Pod Label Results
- for pod, allocationResponseItem := range apiResponse.Data[0] {
- podLabels, ok := podMap[pod]
- if !ok {
- t.Logf("Pod Information Missing from Prometheus %s", pod)
- continue
- }
- podLabels.InAlloc = true
- podLabels.AllocLabels = allocationResponseItem.Properties.Labels
- }
- // Compare Results
- for pod, podLabels := range podMap {
- t.Logf("Pod: %s", pod)
- // Skip pods that were not alive at the query end. They
- // may have been running earlier in the window but
- // /allocation only reports pods with coincident usage
- // metrics, so label comparisons would be noisy.
- if !podLabels.Alive {
- t.Logf("Skipping %s. Pod Dead at query end.", pod)
- continue
- }
- // Skip pods that were not returned by /allocation. A pod
- // can show up in kube_pod_labels but not in /allocation
- // when it was very short lived or lacked CPU/memory
- // usage samples, which is a window-boundary race rather
- // than a label-propagation bug.
- if !podLabels.InAlloc {
- t.Logf("Skipping %s. Pod not present in /allocation response.", pod)
- continue
- }
- // Prometheus Result will have fewer labels.
- // Allocation has oracle and feature related labels
- for promLabel, promLabelValue := range podLabels.PromLabels {
- allocLabelValue, ok := podLabels.AllocLabels[promLabel]
- if !ok {
- t.Errorf(" - [Fail]: Prometheus Label %s not found in Allocation", promLabel)
- continue
- }
- if allocLabelValue != promLabelValue {
- t.Errorf(" - [Fail]: Alloc %s != Prom %s", allocLabelValue, promLabelValue)
- } else {
- t.Logf(" - [Pass]: Label: %s", promLabel)
- }
- }
- }
- })
- }
- }
|