akillibulut
/
cost-model
mirror of https://github.com/kubecost/cost-model


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
							package allocation

// Description
// Check Pod Labels from API Match results from Promethues

import (
	"testing"
	"time"

	"github.com/opencost/opencost-integration-tests/pkg/api"
	"github.com/opencost/opencost-integration-tests/pkg/prometheus"
)

func TestPodLabels(t *testing.T) {
	apiObj := api.NewAPI()

	testCases := []struct {
		name                      string
		window                    string
		aggregate                 string
		accumulate                string
		includeAggregatedMetadata string
	}{
		{
			name:                      "Today",
			window:                    "24h",
			aggregate:                 "pod",
			accumulate:                "true",
			includeAggregatedMetadata: "true",
		},
	}

	t.Logf("testCases: %v", testCases)

	for _, tc := range testCases {
		t.Run(tc.name, func(t *testing.T) {

			queryEnd := time.Now().UTC().Truncate(time.Hour).Add(time.Hour)
			endTime := queryEnd.Unix()

			// -------------------------------
			// Pod Running Time
			// avg(avg_over_time(kube_pod_container_status_running{%s}[%s])) by (pod)
			// -------------------------------
			client := prometheus.NewClient()
			promPodRunningInfoInput := prometheus.PrometheusInput{}
			promPodRunningInfoInput.Metric = "kube_pod_container_status_running"
			promPodRunningInfoInput.Function = []string{"avg_over_time", "avg"}
			promPodRunningInfoInput.QueryWindow = tc.window
			promPodRunningInfoInput.AggregateBy = []string{"pod"}
			promPodRunningInfoInput.Time = &endTime

			promPodRunningInfo, err := client.RunPromQLQuery(promPodRunningInfoInput, t)
			if err != nil {
				t.Fatalf("Error while calling Prometheus API %v", err)
			}

			podRunningStatus := make(map[string]int)

			for _, promPodRunningInfoItem := range promPodRunningInfo.Data.Result {
				pod := promPodRunningInfoItem.Metric.Pod
				runningStatus := int(promPodRunningInfoItem.Value.Value)

				// kube_pod_labels and kube_nodespace_labels might hold labels for dead pods as well
				// filter the ones that are running because allocation filters for that
				podRunningStatus[pod] = runningStatus
			}

			// Pod Info - narrow the "running" set to pods that were actually
			// running at the query endTime using a 1m resolution subquery,
			// matching the pattern used in pod_annotations_test.go.
			// Pods that only briefly existed earlier in the 24h window may
			// not appear in /allocation, and comparing their labels yields
			// false negatives that have nothing to do with label
			// propagation.
			promPodInfoInput := prometheus.PrometheusInput{}
			promPodInfoInput.Metric = "kube_pod_container_status_running"
			promPodInfoInput.MetricNotEqualTo = "0"
			promPodInfoInput.AggregateBy = []string{"container", "pod", "namespace", "node"}
			promPodInfoInput.Function = []string{"avg"}
			promPodInfoInput.AggregateWindow = tc.window
			promPodInfoInput.AggregateResolution = podStatusResolution
			promPodInfoInput.Time = &endTime

			podInfo, err := client.RunPromQLQuery(promPodInfoInput, t)
			if err != nil {
				t.Fatalf("Error while calling Prometheus API %v", err)
			}

			alive := make(map[string]bool)
			for _, r := range podInfo.Data.Result {
				alive[r.Metric.Pod] = true
			}

			// -------------------------------
			// Pod Labels
			// avg_over_time(kube_pod_labels{%s}[%s])
			// -------------------------------
			promLabelInfoInput := prometheus.PrometheusInput{}
			promLabelInfoInput.Metric = "kube_pod_labels"
			promLabelInfoInput.Function = []string{"avg_over_time"}
			promLabelInfoInput.QueryWindow = tc.window
			promLabelInfoInput.Time = &endTime

			promlabelInfo, err := client.RunPromQLQuery(promLabelInfoInput, t)
			if err != nil {
				t.Fatalf("Error while calling Prometheus API %v", err)
			}

			// Store Results in a Pod Map
			type PodData struct {
				Pod         string
				Alive       bool
				InAlloc     bool
				PromLabels  map[string]string
				AllocLabels map[string]string
			}

			podMap := make(map[string]*PodData)

			// Store Prometheus Pod Prometheus Results
			for _, promlabel := range promlabelInfo.Data.Result {
				pod := promlabel.Metric.Pod
				labels := promlabel.Metric.Labels

				// Skip Dead Pods
				if podRunningStatus[pod] == 0 {
					continue
				}

				podMap[pod] = &PodData{
					Pod:        pod,
					Alive:      alive[pod],
					PromLabels: labels,
				}
			}

			// API Response
			apiResponse, err := apiObj.GetAllocation(api.AllocationRequest{
				Window:                    tc.window,
				Aggregate:                 tc.aggregate,
				Accumulate:                tc.accumulate,
				IncludeAggregatedMetadata: tc.includeAggregatedMetadata,
			})

			if err != nil {
				t.Fatalf("Error while calling Allocation API %v", err)
			}
			if apiResponse.Code != 200 {
				t.Errorf("API returned non-200 code")
			}

			// Store Allocation Pod Label Results
			for pod, allocationResponseItem := range apiResponse.Data[0] {
				podLabels, ok := podMap[pod]
				if !ok {
					t.Logf("Pod Information Missing from Prometheus %s", pod)
					continue
				}
				podLabels.InAlloc = true
				podLabels.AllocLabels = allocationResponseItem.Properties.Labels
			}

			// Compare Results
			for pod, podLabels := range podMap {
				t.Logf("Pod: %s", pod)

				// Skip pods that were not alive at the query end. They
				// may have been running earlier in the window but
				// /allocation only reports pods with coincident usage
				// metrics, so label comparisons would be noisy.
				if !podLabels.Alive {
					t.Logf("Skipping %s. Pod Dead at query end.", pod)
					continue
				}
				// Skip pods that were not returned by /allocation. A pod
				// can show up in kube_pod_labels but not in /allocation
				// when it was very short lived or lacked CPU/memory
				// usage samples, which is a window-boundary race rather
				// than a label-propagation bug.
				if !podLabels.InAlloc {
					t.Logf("Skipping %s. Pod not present in /allocation response.", pod)
					continue
				}

				// Prometheus Result will have fewer labels.
				// Allocation has oracle and feature related labels
				for promLabel, promLabelValue := range podLabels.PromLabels {
					allocLabelValue, ok := podLabels.AllocLabels[promLabel]
					if !ok {
						t.Errorf("  - [Fail]: Prometheus Label %s not found in Allocation", promLabel)
						continue
					}
					if allocLabelValue != promLabelValue {
						t.Errorf("  - [Fail]: Alloc %s != Prom %s", allocLabelValue, promLabelValue)
					} else {
						t.Logf("  - [Pass]: Label: %s", promLabel)
					}
				}
			}
		})
	}
}