Просмотр исходного кода

Merge branch 'develop' into alex-core-135-unified-asset

Alex Meijer 3 лет назад
Родитель
Сommit
fa204086d3
5 измененных файлов с 131 добавлено и 60 удалено
  1. 35 0
      SECURITY.md
  2. 14 5
      pkg/cloud/provider.go
  3. 51 46
      pkg/costmodel/allocation.go
  4. 25 9
      pkg/costmodel/allocation_helpers.go
  5. 6 0
      pkg/costmodel/key.go

+ 35 - 0
SECURITY.md

@@ -0,0 +1,35 @@
+# OpenCost Security Policy
+
+The OpenCost project greatly appreciates the need for security and timely updates, given our proximity to cloud billing. We are very grateful to the users, security researchers, and developers for reporting security vulnerabilities to us. All reported security vulnerabilities will be carefully assessed, addressed, and responded to.
+
+## Code Security
+
+Application code is version controlled using GitHub. All code changes are tracked with full revision history and are attributable to a specific individual. Code must be reviewed and accepted by a different engineer than the author of the change.
+
+### Dependabot
+
+OpenCost has [Dependabot](https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-supply-chain-security#what-is-dependabot) enabled for assessing dependencies in the project.
+
+## Supported Versions
+
+OpenCost provides security updates for the two most recent minor versions released on GitHub.
+
+For example, if `v1.102.0` is the most recent stable version, we will address security updates for `v1.101.0` and later. Once `v1.103.0` is released, we will no longer provide updates for `v1.101.x` releases.
+
+## Reporting a Vulnerability
+
+The OpenCost project has enabled [Private vulnerability reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability) for our repositories which allows for direct reporting of issues to administrators and maintainers in a secure fashion. Please include a thorough description of the issue, the steps you took to create the issue, affected versions, and, if known, mitigations for the issue. The team will help diagnose the severity of the issue and determine how to address the issue. Issues deemed to be non-critical will be filed as GitHub issues. Critical issues will receive immediate attention and be fixed as quickly as possible.
+
+### Kubecost Bug Bounty
+
+Kubecost offers a Bug Bounty program that pays $250 USD for unique, not previously disclosed publicly available CVEs, and accepted security bug reports submitted to vulnerability-report@kubecost.com.
+
+## Disclosure policy
+
+For known public security vulnerabilities, we will disclose the disclosure as soon as possible after receiving the report. Vulnerabilities discovered for the first time will be disclosed in accordance with the following process:
+
+1. The received security vulnerability report shall be handed over to the security team for follow-up coordination and repair work.
+2. After the vulnerability is confirmed, we will create a draft Security Advisory on GitHub that lists the details of the vulnerability.
+3. Invite related personnel to discuss the fix.
+4. Fork the temporary private repository on GitHub, and collaborate to fix the vulnerability.
+5. After the fixed code is merged into all supported versions, the vulnerability will be publicly posted in the GitHub Advisory Database.

+ 14 - 5
pkg/cloud/provider.go

@@ -4,9 +4,8 @@ import (
 	"database/sql"
 	"errors"
 	"fmt"
-	"golang.org/x/text/cases"
-	"golang.org/x/text/language"
 	"io"
+	"net"
 	"net/http"
 	"regexp"
 	"strconv"
@@ -14,6 +13,9 @@ import (
 	"sync"
 	"time"
 
+	"golang.org/x/text/cases"
+	"golang.org/x/text/language"
+
 	"github.com/opencost/opencost/pkg/kubecost"
 
 	"github.com/opencost/opencost/pkg/util"
@@ -506,9 +508,16 @@ func NewProvider(cache clustercache.ClusterCache, apiKey string, config *config.
 			clusterRegion:    cp.region,
 			clusterAccountID: cp.accountID,
 			clusterProjectID: cp.projectID,
-			metadataClient: metadata.NewClient(&http.Client{
-				Transport: httputil.NewUserAgentTransport("kubecost", http.DefaultTransport),
-			}),
+			metadataClient: metadata.NewClient(
+				&http.Client{
+					Transport: httputil.NewUserAgentTransport("kubecost", &http.Transport{
+						Dial: (&net.Dialer{
+							Timeout:   2 * time.Second,
+							KeepAlive: 30 * time.Second,
+						}).Dial,
+					}),
+					Timeout: 5 * time.Second,
+				}),
 		}, nil
 	case kubecost.AWSProvider:
 		log.Info("Found ProviderID starting with \"aws\", using AWS Provider")

+ 51 - 46
pkg/costmodel/allocation.go

@@ -13,51 +13,52 @@ import (
 )
 
 const (
-	queryFmtPods                     = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]`
-	queryFmtPodsUID                  = `avg(kube_pod_container_status_running{}) by (pod, namespace, uid, %s)[%s:%s]`
-	queryFmtRAMBytesAllocated        = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s, provider_id)`
-	queryFmtRAMRequests              = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtRAMUsageAvg              = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtRAMUsageMax              = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtCPUCoresAllocated        = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtCPURequests              = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtCPUUsageAvg              = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtGPUsRequested            = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtGPUsAllocated            = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtNodeCostPerCPUHr         = `avg(avg_over_time(node_cpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeCostPerRAMGiBHr      = `avg(avg_over_time(node_ram_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeCostPerGPUHr         = `avg(avg_over_time(node_gpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeIsSpot               = `avg_over_time(kubecost_node_is_spot[%s])`
-	queryFmtPVCInfo                  = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]`
-	queryFmtPodPVCAllocation         = `avg(avg_over_time(pod_pvc_allocation[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
-	queryFmtPVCBytesRequested        = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s])) by (persistentvolumeclaim, namespace, %s)`
-	queryFmtPVActiveMins             = `count(kube_persistentvolume_capacity_bytes) by (persistentvolume, %s)[%s:%s]`
-	queryFmtPVBytes                  = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s])) by (persistentvolume, %s)`
-	queryFmtPVCostPerGiBHour         = `avg(avg_over_time(pv_hourly_cost[%s])) by (volumename, %s)`
-	queryFmtNetZoneGiB               = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetZoneCostPerGiB        = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s])) by (%s)`
-	queryFmtNetRegionGiB             = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetRegionCostPerGiB      = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s])) by (%s)`
-	queryFmtNetInternetGiB           = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetInternetCostPerGiB    = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s])) by (%s)`
-	queryFmtNetReceiveBytes          = `sum(increase(container_network_receive_bytes_total{pod!=""}[%s])) by (pod_name, pod, namespace, %s)`
-	queryFmtNetTransferBytes         = `sum(increase(container_network_transmit_bytes_total{pod!=""}[%s])) by (pod_name, pod, namespace, %s)`
-	queryFmtNodeLabels               = `avg_over_time(kube_node_labels[%s])`
-	queryFmtNamespaceLabels          = `avg_over_time(kube_namespace_labels[%s])`
-	queryFmtNamespaceAnnotations     = `avg_over_time(kube_namespace_annotations[%s])`
-	queryFmtPodLabels                = `avg_over_time(kube_pod_labels[%s])`
-	queryFmtPodAnnotations           = `avg_over_time(kube_pod_annotations[%s])`
-	queryFmtServiceLabels            = `avg_over_time(service_selector_labels[%s])`
-	queryFmtDeploymentLabels         = `avg_over_time(deployment_match_labels[%s])`
-	queryFmtStatefulSetLabels        = `avg_over_time(statefulSet_match_labels[%s])`
-	queryFmtDaemonSetLabels          = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s])) by (pod, owner_name, namespace, %s)`
-	queryFmtJobLabels                = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s])) by (pod, owner_name, namespace ,%s)`
-	queryFmtPodsWithReplicaSetOwner  = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet"}[%s])) by (pod, owner_name, namespace ,%s)`
-	queryFmtReplicaSetsWithoutOwners = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>"}[%s])) by (replicaset, namespace, %s)`
-	queryFmtLBCostPerHr              = `avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s)`
-	queryFmtLBActiveMins             = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]`
-	queryFmtOldestSample             = `min_over_time(timestamp(group(node_cpu_hourly_cost))[%s:%s])`
-	queryFmtNewestSample             = `max_over_time(timestamp(group(node_cpu_hourly_cost))[%s:%s])`
+	queryFmtPods                        = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]`
+	queryFmtPodsUID                     = `avg(kube_pod_container_status_running{}) by (pod, namespace, uid, %s)[%s:%s]`
+	queryFmtRAMBytesAllocated           = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s, provider_id)`
+	queryFmtRAMRequests                 = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtRAMUsageAvg                 = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtRAMUsageMax                 = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtCPUCoresAllocated           = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtCPURequests                 = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtCPUUsageAvg                 = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtGPUsRequested               = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtGPUsAllocated               = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtNodeCostPerCPUHr            = `avg(avg_over_time(node_cpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeCostPerRAMGiBHr         = `avg(avg_over_time(node_ram_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeCostPerGPUHr            = `avg(avg_over_time(node_gpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeIsSpot                  = `avg_over_time(kubecost_node_is_spot[%s])`
+	queryFmtPVCInfo                     = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]`
+	queryFmtPodPVCAllocation            = `avg(avg_over_time(pod_pvc_allocation[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
+	queryFmtPVCBytesRequested           = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s])) by (persistentvolumeclaim, namespace, %s)`
+	queryFmtPVActiveMins                = `count(kube_persistentvolume_capacity_bytes) by (persistentvolume, %s)[%s:%s]`
+	queryFmtPVBytes                     = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s])) by (persistentvolume, %s)`
+	queryFmtPVCostPerGiBHour            = `avg(avg_over_time(pv_hourly_cost[%s])) by (volumename, %s)`
+	queryFmtNetZoneGiB                  = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetZoneCostPerGiB           = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s])) by (%s)`
+	queryFmtNetRegionGiB                = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetRegionCostPerGiB         = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s])) by (%s)`
+	queryFmtNetInternetGiB              = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetInternetCostPerGiB       = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s])) by (%s)`
+	queryFmtNetReceiveBytes             = `sum(increase(container_network_receive_bytes_total{pod!="", container="POD"}[%s])) by (pod_name, pod, namespace, %s)`
+	queryFmtNetTransferBytes            = `sum(increase(container_network_transmit_bytes_total{pod!="", container="POD"}[%s])) by (pod_name, pod, namespace, %s)`
+	queryFmtNodeLabels                  = `avg_over_time(kube_node_labels[%s])`
+	queryFmtNamespaceLabels             = `avg_over_time(kube_namespace_labels[%s])`
+	queryFmtNamespaceAnnotations        = `avg_over_time(kube_namespace_annotations[%s])`
+	queryFmtPodLabels                   = `avg_over_time(kube_pod_labels[%s])`
+	queryFmtPodAnnotations              = `avg_over_time(kube_pod_annotations[%s])`
+	queryFmtServiceLabels               = `avg_over_time(service_selector_labels[%s])`
+	queryFmtDeploymentLabels            = `avg_over_time(deployment_match_labels[%s])`
+	queryFmtStatefulSetLabels           = `avg_over_time(statefulSet_match_labels[%s])`
+	queryFmtDaemonSetLabels             = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s])) by (pod, owner_name, namespace, %s)`
+	queryFmtJobLabels                   = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s])) by (pod, owner_name, namespace ,%s)`
+	queryFmtPodsWithReplicaSetOwner     = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet"}[%s])) by (pod, owner_name, namespace ,%s)`
+	queryFmtReplicaSetsWithoutOwners    = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>"}[%s])) by (replicaset, namespace, %s)`
+	queryFmtReplicaSetsWithRolloutOwner = `avg(avg_over_time(kube_replicaset_owner{owner_kind="Rollout"}[%s])) by (replicaset, namespace, owner_kind, owner_name, %s)`
+	queryFmtLBCostPerHr                 = `avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s)`
+	queryFmtLBActiveMins                = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]`
+	queryFmtOldestSample                = `min_over_time(timestamp(group(node_cpu_hourly_cost))[%s:%s])`
+	queryFmtNewestSample                = `max_over_time(timestamp(group(node_cpu_hourly_cost))[%s:%s])`
 
 
 	// Because we use container_cpu_usage_seconds_total to calculate CPU usage
@@ -504,6 +505,9 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, durStr, env.GetPromClusterLabel())
 	resChReplicaSetsWithoutOwners := ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end)
 
+	queryReplicaSetsWithRolloutOwner := fmt.Sprintf(queryFmtReplicaSetsWithRolloutOwner, durStr, env.GetPromClusterLabel())
+	resChReplicaSetsWithRolloutOwner := ctx.QueryAtTime(queryReplicaSetsWithRolloutOwner, end)
+
 	queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, env.GetPromClusterLabel())
 	resChJobLabels := ctx.QueryAtTime(queryJobLabels, end)
 
@@ -561,6 +565,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	resDaemonSetLabels, _ := resChDaemonSetLabels.Await()
 	resPodsWithReplicaSetOwner, _ := resChPodsWithReplicaSetOwner.Await()
 	resReplicaSetsWithoutOwners, _ := resChReplicaSetsWithoutOwners.Await()
+	resReplicaSetsWithRolloutOwner, _ := resChReplicaSetsWithRolloutOwner.Await()
 	resJobLabels, _ := resChJobLabels.Await()
 	resLBCostPerHr, _ := resChLBCostPerHr.Await()
 	resLBActiveMins, _ := resChLBActiveMins.Await()
@@ -616,7 +621,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	podStatefulSetMap := labelsToPodControllerMap(podLabels, resToStatefulSetLabels(resStatefulSetLabels))
 	podDaemonSetMap := resToPodDaemonSetMap(resDaemonSetLabels, podUIDKeyMap, ingestPodUID)
 	podJobMap := resToPodJobMap(resJobLabels, podUIDKeyMap, ingestPodUID)
-	podReplicaSetMap := resToPodReplicaSetMap(resPodsWithReplicaSetOwner, resReplicaSetsWithoutOwners, podUIDKeyMap, ingestPodUID)
+	podReplicaSetMap := resToPodReplicaSetMap(resPodsWithReplicaSetOwner, resReplicaSetsWithoutOwners, resReplicaSetsWithRolloutOwner, podUIDKeyMap, ingestPodUID)
 	applyControllersToPods(podMap, podDeploymentMap)
 	applyControllersToPods(podMap, podStatefulSetMap)
 	applyControllersToPods(podMap, podDaemonSetMap)

+ 25 - 9
pkg/costmodel/allocation_helpers.go

@@ -1156,13 +1156,16 @@ func resToPodJobMap(resJobLabels []*prom.QueryResult, podUIDKeyMap map[podKey][]
 	return jobLabels
 }
 
-func resToPodReplicaSetMap(resPodsWithReplicaSetOwner []*prom.QueryResult, resReplicaSetsWithoutOwners []*prom.QueryResult, podUIDKeyMap map[podKey][]podKey, ingestPodUID bool) map[podKey]controllerKey {
+func resToPodReplicaSetMap(resPodsWithReplicaSetOwner []*prom.QueryResult, resReplicaSetsWithoutOwners []*prom.QueryResult, resReplicaSetsWithRolloutOwner []*prom.QueryResult, podUIDKeyMap map[podKey][]podKey, ingestPodUID bool) map[podKey]controllerKey {
 	// Build out set of ReplicaSets that have no owners, themselves, such that
 	// the ReplicaSet should be used as the owner of the Pods it controls.
 	// (This should exclude, for example, ReplicaSets that are controlled by
 	// Deployments, in which case the Deployment should be the pod's owner.)
+	// Additionally, add to this set of ReplicaSets those ReplicaSets that
+	// are owned by a Rollout
 	replicaSets := map[controllerKey]struct{}{}
 
+	// Create unowned ReplicaSet controller keys
 	for _, res := range resReplicaSetsWithoutOwners {
 		controllerKey, err := resultReplicaSetKey(res, env.GetPromClusterLabel(), "namespace", "replicaset")
 		if err != nil {
@@ -1172,17 +1175,34 @@ func resToPodReplicaSetMap(resPodsWithReplicaSetOwner []*prom.QueryResult, resRe
 		replicaSets[controllerKey] = struct{}{}
 	}
 
+	// Create Rollout-owned ReplicaSet controller keys
+	for _, res := range resReplicaSetsWithRolloutOwner {
+		controllerKey, err := resultReplicaSetRolloutKey(res, env.GetPromClusterLabel(), "namespace", "replicaset")
+		if err != nil {
+			continue
+		}
+
+		replicaSets[controllerKey] = struct{}{}
+	}
+
 	// Create the mapping of Pods to ReplicaSets, ignoring any ReplicaSets that
-	// to not appear in the set of uncontrolled ReplicaSets above.
+	// do not appear in the set of unowned/Rollout-owned ReplicaSets above.
 	podToReplicaSet := map[podKey]controllerKey{}
 
 	for _, res := range resPodsWithReplicaSetOwner {
+		// First, check if this pod is owned by an unowned ReplicaSet
 		controllerKey, err := resultReplicaSetKey(res, env.GetPromClusterLabel(), "namespace", "owner_name")
 		if err != nil {
 			continue
-		}
-		if _, ok := replicaSets[controllerKey]; !ok {
-			continue
+		} else if _, ok := replicaSets[controllerKey]; !ok {
+			// If the pod is not owned by an unowned ReplicaSet, check if
+			// it's owned by a Rollout-owned ReplicaSet
+			controllerKey, err = resultReplicaSetRolloutKey(res, env.GetPromClusterLabel(), "namespace", "owner_name")
+			if err != nil {
+				continue
+			} else if _, ok := replicaSets[controllerKey]; !ok {
+				continue
+			}
 		}
 
 		pod, err := res.GetString("pod")
@@ -1196,18 +1216,14 @@ func resToPodReplicaSetMap(resPodsWithReplicaSetOwner []*prom.QueryResult, resRe
 
 		if ingestPodUID {
 			if uidKeys, ok := podUIDKeyMap[key]; ok {
-
 				keys = append(keys, uidKeys...)
-
 			}
 		} else {
 			keys = []podKey{key}
 		}
 
 		for _, key := range keys {
-
 			podToReplicaSet[key] = controllerKey
-
 		}
 	}
 

+ 6 - 0
pkg/costmodel/key.go

@@ -243,6 +243,12 @@ func resultReplicaSetKey(res *prom.QueryResult, clusterLabel, namespaceLabel, co
 	return resultControllerKey("replicaset", res, clusterLabel, namespaceLabel, controllerLabel)
 }
 
+// resultReplicaSetRolloutKey creates a controllerKey for a Job.
+// (See resultControllerKey for more.)
+func resultReplicaSetRolloutKey(res *prom.QueryResult, clusterLabel, namespaceLabel, controllerLabel string) (controllerKey, error) {
+	return resultControllerKey("rollout", res, clusterLabel, namespaceLabel, controllerLabel)
+}
+
 type serviceKey struct {
 	Cluster   string
 	Namespace string