Преглед изворни кода

Merge branch 'develop' into enable-image-to-run-nonroot

Jorge García пре 2 година
родитељ
комит
5fd4957e28
52 измењених фајлова са 2237 додато и 457 уклоњено
  1. 63 16
      .github/workflows/pr.yaml
  2. 2 0
      .gitignore
  3. 22 9
      CONTRIBUTING.md
  4. 18 0
      Dockerfile.cross
  5. 141 0
      GOVERNANCE.md
  6. 7 4
      MAINTAINERS.md
  7. 1 0
      config/invalid.json
  8. 2 0
      configs/pricing_schema_pv_storageclass.csv
  9. 2 0
      configs/pricing_schema_special_char.csv
  10. 1 1
      go.mod
  11. 2 6
      go.sum
  12. 63 0
      justfile
  13. 8 9
      pkg/cloud/aws/athenaintegration.go
  14. 1 1
      pkg/cloud/aws/athenaintegration_test.go
  15. 99 0
      pkg/cloud/azure/azurestorageintegration.go
  16. 69 0
      pkg/cloud/azure/azurestorageintegration_test.go
  17. 369 0
      pkg/cloud/gcp/bigqueryintegration.go
  18. 58 0
      pkg/cloud/gcp/bigqueryintegration_test.go
  19. 11 80
      pkg/cloud/gcp/bigqueryquerier.go
  20. 3 3
      pkg/cloud/gcp/provider.go
  21. 1 0
      pkg/cloud/models/models.go
  22. 11 4
      pkg/cloud/provider/csvprovider.go
  23. 94 94
      pkg/costmodel/allocation.go
  24. 2 2
      pkg/costmodel/allocation_helpers.go
  25. 9 5
      pkg/costmodel/assets.go
  26. 65 65
      pkg/costmodel/cluster.go
  27. 5 0
      pkg/costmodel/cluster_helpers.go
  28. 9 0
      pkg/costmodel/cluster_helpers_test.go
  29. 1 1
      pkg/costmodel/clusters/clustermap.go
  30. 66 62
      pkg/costmodel/costmodel.go
  31. 10 10
      pkg/costmodel/metrics.go
  32. 12 1
      pkg/env/costmodelenv.go
  33. 9 1
      pkg/filemanager/filemanager_test.go
  34. 53 17
      pkg/filter/util/cloudcost.go
  35. 134 0
      pkg/filter/util/cloudcost_test.go
  36. 87 0
      pkg/kubecost/allocation.go
  37. 2 0
      pkg/kubecost/allocation_json.go
  38. 121 0
      pkg/kubecost/allocation_test.go
  39. 15 0
      pkg/metrics/kubemetrics.go
  40. 10 9
      pkg/prom/diagnostics.go
  41. 145 38
      pkg/util/allocationfilterutil/queryfilters.go
  42. 209 0
      pkg/util/allocationfilterutil/queryfilters_test.go
  43. 18 14
      pkg/util/allocationfilterutil/v2/parser.go
  44. 10 0
      pkg/util/compat.go
  45. 56 0
      pkg/util/compat_test.go
  46. 69 3
      test/cloud_test.go
  47. 10 0
      ui/Dockerfile.cross
  48. 31 0
      ui/justfile
  49. 11 2
      ui/src/Reports.js
  50. 15 0
      ui/src/components/Controls/Edit.js
  51. 4 0
      ui/src/components/Controls/index.js
  52. 1 0
      ui/src/constants/currencyCodes.js

+ 63 - 16
.github/workflows/pr.yaml

@@ -1,4 +1,4 @@
-name: Develop PR - build, test
+name: Develop PR - build test
 
 on:
   pull_request:
@@ -6,27 +6,74 @@ on:
       - develop
 
 jobs:
-  build:
-    strategy:
-      matrix:
-        include:
-          - component: Frontend
-            location: ui
-          - component: Backend
-            location: .
+  backend:
     runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          path: ./
+
+      -
+        name: Install just
+        uses: extractions/setup-just@v1
+
+      -
+        name: Install Go
+        uses: actions/setup-go@v4
+        with:
+          go-version: 'stable'
+
+      # Saves us from having to redownload all modules
+      - name: Go Mod cache
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
+
+      -
+        name: Test
+        run: |
+          just test
 
+      -
+        name: Build
+        run: |
+          just build-local
+
+  frontend:
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
         with:
           path: ./
 
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+      -
+        name: Install just
+        uses: extractions/setup-just@v1
 
-      - name: Build ${{ matrix.component }}
-        uses: docker/build-push-action@v2
+      -
+        name: Install node
+        uses: actions/setup-node@v3
         with:
-          context: ${{ matrix.location }}/
-          file: ${{ matrix.location }}/Dockerfile
-          push: false
+          node-version: '18.3.0'
+
+      - name: Get npm cache directory
+        id: npm-cache-dir
+        shell: bash
+        run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT}
+
+      - uses: actions/cache@v3
+        id: npm-cache # use this to check for `cache-hit` ==> if: steps.npm-cache.outputs.cache-hit != 'true'
+        with:
+          path: ${{ steps.npm-cache-dir.outputs.dir }}
+          key: ${{ runner.os }}-node-${{ hashFiles('./ui/**/package-lock.json') }}
+          restore-keys: |
+            ${{ runner.os }}-node-
+
+      -
+        name: Build
+        working-directory: ./ui
+        run: |
+          just build-local

+ 2 - 0
.gitignore

@@ -7,4 +7,6 @@ ui/.cache
 ui/dist
 ui/node_modules/
 cmd/costmodel/costmodel
+cmd/costmodel/costmodel-amd64
+cmd/costmodel/costmodel-arm64
 pkg/cloud/azureorphan_test.go

+ 22 - 9
CONTRIBUTING.md

@@ -24,18 +24,31 @@ This repository's contribution workflow follows a typical open-source model:
 
 ## Building OpenCost
 
-Follow these steps to build the OpenCost cost-model and UI from source and deploy:
+Follow these steps to build the OpenCost cost-model and UI from source and
+deploy. The provided build tooling is natively multi-architecture (built images
+will run on both AMD64 and ARM64 clusters).
 
-1. `docker build --rm -f "Dockerfile" -t <repo>/opencost:<tag> .`
+Dependencies:
+1. Docker (with `buildx`)
+2. [just](https://github.com/casey/just) (if you don't want to install Just, read the `justfile` and run the commands manually)
+3. Multi-arch `buildx` builders set up via https://github.com/tonistiigi/binfmt
+4. `npm` (if you want to build the UI)
+
+### Build the backend
+
+1. `just build "<repo>/opencost:<tag>"`
 2. Edit the [pulled image](https://github.com/opencost/opencost/blob/develop/kubernetes/opencost.yaml#L145) in the `kubernetes/opencost.yaml` to `<repo>/opencost:<tag>`
 3. Set [this environment variable](https://github.com/opencost/opencost/blob/develop/kubernetes/opencost.yaml#L155) to the address of your Prometheus server
-4. `cd ui`
-5. `docker build --rm -f "Dockerfile" -t <repo>/opencost-ui:<tag> .`
-6. `cd ..`
-7. Edit the [pulled image](https://github.com/opencost/opencost/blob/develop/kubernetes/opencost.yaml#L162) in the `kubernetes/opencost.yaml` to `<repo>/opencost-ui:<tag>`
-8. `kubectl create namespace opencost`
-9. `kubectl apply -f kubernetes/opencost --namespace opencost`
-10. `kubectl -n opencost port-forward service/opencost 9090 9003`
+
+### Build the frontend
+1. `cd ui && just build-ui "<repo>/opencost-ui:<tag>"`
+2. Edit the [pulled image](https://github.com/opencost/opencost/blob/develop/kubernetes/opencost.yaml#L162) in the `kubernetes/opencost.yaml` to `<repo>/opencost-ui:<tag>`
+
+### Deploy to a cluster
+
+1. `kubectl create namespace opencost`
+2. `kubectl apply -f kubernetes/opencost --namespace opencost`
+3. `kubectl -n opencost port-forward service/opencost 9090 9003`
 
 To test, build the OpenCost containers and then push them to a Kubernetes cluster with a running Prometheus.
 

+ 18 - 0
Dockerfile.cross

@@ -0,0 +1,18 @@
+FROM alpine:latest
+
+# The prebuilt binary path. This Dockerfile assumes the binary will be built
+# outside of Docker.
+ARG binarypath
+
+RUN apk add --update --no-cache ca-certificates
+
+ADD --chmod=644 ./configs/default.json /models/default.json
+ADD --chmod=644 ./configs/azure.json /models/azure.json
+ADD --chmod=644 ./configs/aws.json /models/aws.json
+ADD --chmod=644 ./configs/gcp.json /models/gcp.json
+ADD --chmod=644 ./configs/alibaba.json /models/alibaba.json
+
+COPY ${binarypath} /go/bin/app
+
+USER 1001
+ENTRYPOINT ["/go/bin/app"]

+ 141 - 0
GOVERNANCE.md

@@ -0,0 +1,141 @@
+# OpenCost Governance
+
+> **Note**
+> OpenCost community governance is a work in progress as we expand beyond the initial Kubecost stewardship. Expanding the committers and maintainers to additional organizations will allow the project to become more self-managing.
+
+This document attempts to clarify how the [OpenCost](https://github.com/opencost/) projects are maintained. Anyone interested in improving the project may join the community, contribute to the project, and participate in shaping future releases. This document attempts to outline the general participation structure and set expectations within the project community.
+
+## Code of Conduct
+
+OpenCost community members are expected to adhere to our published [Code of Conduct](CODE_OF_CONDUCT.md).
+
+## Roles and Responsibilities
+
+There are 4 levels of membership in the OpenCost community.
+
+### Contributor
+
+Contributors are community members who contribute in concrete ways to the project. Anyone can contribute to the project and become a contributor, regardless of their skillset. There is no expectation of commitment to the project, no specific skill requirements, and no selection process. There are many ways to contribute to the project, which may be one or more of the following (but not limited to):
+
+- Participate in community discussions in the `#opencost` channel in [CNCF Slack](https://slack.cncf.io/) or at [community meetings](https://bit.ly/opencost-meeting)
+- Report, comment on, and sometimes resolve Issues
+- Occasionally submit PRs
+- Try out new releases
+- Contribute to the documentation
+- Improve the OpenCost website
+- Promote the project in public
+- Help other users
+
+For first-time contributors, it’s recommended to start by going through [Contributing to OpenCost](CONTRIBUTING.md) and joining our community Slack channel.
+
+### Member
+
+[Members](https://github.com/orgs/opencost/people) are continuously active contributors in the community. There are multiple ways to stay "active" and engaged with us - contributing to codes, raising issues, writing tutorials and case studies, and even answering questions.
+
+To become an OpenCost member, you are expected to:
+
+- Make multiple contributions, which may be one or more of the following (but not limited to):
+    - Authored PRs on GitHub.
+    - Filed, or commented on Issues on GitHub.
+    - Join community discussions (e.g. community meetings, Slack).
+- Sponsored by at least 1 OpenCost [maintainer or committer](MAINTAINERS.md)
+
+Contributors that meet the above requirements will then be invited to the GitHub organization "OpenCost" by a sponsor, and there would be an announcement published in the slack channel [(#opencost)](https://slack.cncf.io/).
+
+Members are expected to respond to issues and PRs assigned to them, and be the owners of the code or docs they have contributed. Members that have not contributed to the project or community for over 6 months may lose their membership.
+
+### Committer
+
+Committers are active community members who have shown that they are committed to the success of the project through ongoing engagement with the community. Committership allows contributors to more easily carry on with their project-related activities by giving them direct access to the project’s resources.
+
+Committers are granted `Triage` permissions for the OpenCost repository.
+
+Typically, a potential committer needs to show that they have a sufficient understanding of the project, its objectives, and its strategy. To become a committer, you are expected to:
+
+- Be an OpenCost member.
+- Express interest to the existing maintainers that you are interested in becoming a committer.
+- Have contributed 5 or more substantial PRs.
+- Have an above-average understanding of the project codebase, its goals, and directions.
+
+Members that meet the above requirements will be nominated by an existing maintainer to become a committer. It is recommended to describe the reasons for the nomination and the contribution of the nominee in the PR. The existing maintainers will confer and decide whether to grant committer status or not.
+
+Committers are expected to review issues and PRs. While committership indicates a valued member of the community who has demonstrated a healthy respect for the project’s aims and objectives, their work continues to be reviewed by the community before acceptance in an official release.
+
+### Maintainer
+
+Maintainers are first and foremost committers that have shown they are committed to the long term success of a project. They are the planners and designers of the OpenCost project. Maintainership is about building trust with the current maintainers of the project and being a person that they can depend on to make decisions in the best interest of the project in a consistent manner.
+
+Maintainers are granted `Write` permissions for the OpenCost repository.
+
+Committers wanting to become maintainers are expected to:
+
+- Enable adoptions or ecosystems.
+- Collaborate well.
+- Demonstrate a deep and comprehensive understanding of OpenCost's architecture, technical goals, and directions.
+- Actively engage with major OpenCost feature proposals and implementations.
+
+A new maintainer must be nominated by an existing maintainer. The nominating maintainer will create a PR to update the [Maintainers List](MAINTAINERS.md). It is recommended to describe the reasons for the nomination and the contribution of the nominee in the PR. Upon consensus of incumbent maintainers, the PR will be approved and the new maintainer becomes active.
+
+## Policies and Procedures
+
+### Community Meetings
+
+The [OpenCost Community Meeting](https://bit.ly/opencost-meeting) is every 2 weeks at 1pm Pacific. Community Members are encouraged to attend and discuss development, events, and any other OpenCost community items of interest.
+
+### Issue/PR Timelines
+
+Best efforts will be given to respond to new Issues and PRs within 24 hours on weekdays.
+
+### Approving PRs
+
+PRs may be merged only after receiving at least one approvals from committers or maintainers. However, maintainers can sidestep this rule under justifiable circumstances. For example:
+
+- If a CI tool is broken, may override the tool to still submit the change.
+- Minor typos or fixes for broken tests.
+- The change was approved through other means than the standard process.
+
+### Decision Making Process
+
+Ideally, all project decisions are resolved by consensus via a PR or GitHub issue. Any of the day-to-day project maintenance can be done by a [lazy consensus model](https://communitymgt.fandom.com/wiki/Lazy_consensus).
+
+Community or project level decisions such as RFC submission, creating a new project, maintainer promotion, and major updates on GOVERNANCE must be brought to broader awareness of the community via community meetings, GitHub discussions, and the Slack channel. A supermajority (2/3) approval from Maintainers is required for such approvals.
+
+In general, we prefer that technical issues and maintainer membership are amicably worked out between the persons involved. If a dispute cannot be decided independently, the maintainers can be called in to resolve the issue by voting. For voting, a specific statement of what is being voted on should be added to the relevant GitHub issue or PR, and a link to that issue or PR added to the maintainers meeting agenda document. Maintainers should indicate their yes/no vote on that issue or PR, and after a suitable period of time, the votes will be tallied and the outcome noted.
+
+### Conflict resolution and voting
+
+In general, we prefer that technical issues and membership are amicably worked out between the persons involved. If a dispute cannot be decided independently, the sponsors and core maintainers can be called in to decide an issue. If the sponsors and maintainers themselves cannot decide an issue, the issue will be resolved by voting.
+
+In all cases in this document where voting is mentioned, the voting process is a simple majority in which each sponsor receives two votes and each core maintainer receives one vote. If such a majority is reached, the vote is said to have _passed_.
+
+### Proposal process
+
+> **Note**
+> We intend to use a Request for Comments (RFC) process for any substantial changes to OpenCost, but this has yet to be developed.
+
+### Inactivity
+
+It is important for contributors to be and stay active to set an example and show commitment to the project. Inactivity is harmful to the project as it may lead to unexpected delays, contributor attrition, and a loss of trust in the project.
+
+Inactivity is measured by periods of no contributions without explanation, for longer than:
+
+- Maintainer: 3 months
+- Committer: 6 months
+- Member: 12 months
+
+Consequences of being inactive include:
+
+- Involuntary removal or demotion
+- Being asked to move to Emeritus status
+
+### Involuntary Removal or Demotion
+
+Involuntary removal/demotion of a contributor happens when responsibilities and requirements aren't being met. This may include repeated patterns of inactivity, extended period of inactivity, a period of failing to meet the requirements of your role, and/or a violation of the Code of Conduct. This process is important because it protects the community and its deliverables while also opens up opportunities for new contributors to step in.
+
+Involuntary removal or demotion is handled through a vote by a majority of the current Maintainers.
+
+### Stepping Down/Emeritus Process
+
+If and when Contributors' commitment levels change, Contributors can consider stepping down (moving down the Contributor ladder) vs moving to emeritus status (completely stepping away from the project).
+
+Contact the Maintainers about changing to Emeritus status, or reducing your contributor level. An Emeritus list will be added to the [MAINTAINERS.md](MAINTAINERS.md)

+ 7 - 4
MAINTAINERS.md

@@ -1,8 +1,11 @@
-# OpenCost Maintainers
+# OpenCost Committers and Maintainers
 
-Official list of OpenCost Maintainers.
+Official list of OpenCost Committers and Maintainers. This is managed as documented in the [GOVERNANCE.md](GOVERNANCE.md).
 
-Please keep the below list sorted in ascending order.
+## Committers
+
+| Committer | GitHub ID | Affiliation | Email |
+| --------------- | --------- | ----------- | ----------- |
 
 ## Maintainers
 
@@ -10,8 +13,8 @@ Please keep the below list sorted in ascending order.
 | --------------- | --------- | ----------- | ----------- |
 | Ajay Tripathy | @AjayTripathy | Kubecost | <Ajay@kubecost.com> |
 | Matt Bolt | @​mbolt35 | Kubecost | <matt@kubecost.com> |
+| Matt Ray | @mattray | Kubecost | <mattray@kubecost.com> |
 | Michael Dresser | @michaelmdresser | Kubecost | <michael@kubecost.com> |
 | Niko Kovacevic | @nikovacevic | Kubecost | <niko@kubecost.com> |
 | Sean Holcomb | @Sean-Holcomb | Kubecost | <Sean@kubecost.com> |
 | Thomas Evans | @teevans | Kubecost | <thomas@kubecost.com> |
-| Matt Ray | @mattray | Kubecost | <mattray@kubecost.com> |

+ 1 - 0
config/invalid.json

@@ -0,0 +1 @@
+{"provider":"base","description":"Default prices based on GCP us-central1","CPU":"0.031611","spotCPU":"0.006655","RAM":"0.004237","spotRAM":"0.000892","GPU":"0.95","spotGPU":"0.308","storage":"0.00005479452","zoneNetworkEgress":"0.01","regionNetworkEgress":"0.01","internetNetworkEgress":"0.12","firstFiveForwardingRulesCost":"","additionalForwardingRuleCost":"","LBIngressDataCost":"","athenaBucketName":"","athenaRegion":"","athenaDatabase":"","athenaTable":"","athenaWorkgroup":"","masterPayerARN":"","customPricesEnabled":"false","defaultIdle":"","azureSubscriptionID":"","azureClientID":"","azureClientSecret":"","azureTenantID":"","azureBillingRegion":"","azureOfferDurableID":"","azureStorageSubscriptionID":"","azureStorageAccount":"","azureStorageAccessKey":"","azureStorageContainer":"","azureContainerPath":"","azureCloud":"","currencyCode":"","discount":"","negotiatedDiscount":"","sharedOverhead":"","clusterName":"","sharedNamespaces":"","sharedLabelNames":"","sharedLabelValues":"","shareTenancyCosts":"true","readOnly":"","editorAccess":"","kubecostToken":"","googleAnalyticsTag":"","excludeProviderID":""}

+ 2 - 0
configs/pricing_schema_pv_storageclass.csv

@@ -0,0 +1,2 @@
+EndTimestamp,InstanceID,Region,AssetClass,InstanceIDField,InstanceType,MarketPriceHourly,Version
+2019-04-17 23:34:22 UTC,storageClass0,,pv,spec.storageClassName,,0.1338,

+ 2 - 0
configs/pricing_schema_special_char.csv

@@ -0,0 +1,2 @@
+EndTimestamp,InstanceID,Region,AssetClass,InstanceIDField,InstanceType,MarketPriceHourly,Version
+2019-04-17 23:34:22 UTC,gke-standard-cluster-1-pool-1-91dc432d-cg69,,node,metadata.labels.<http://metadata.label.servers.com/label|metadata.label.servers.com/label>,,0.1337,

+ 1 - 1
go.mod

@@ -97,7 +97,7 @@ require (
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
 	github.com/dimchansky/utfbom v1.1.1 // indirect
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/emicklei/go-restful/v3 v3.8.0 // indirect
+	github.com/emicklei/go-restful/v3 v3.10.2 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
 	github.com/go-logr/logr v1.2.3 // indirect
 	github.com/go-openapi/jsonpointer v0.19.5 // indirect

+ 2 - 6
go.sum

@@ -56,14 +56,10 @@ github.com/Azure/azure-pipeline-go v0.2.3 h1:7U9HBg1JFK3jHl5qmo4CTZKFTVgMwdFHMVt
 github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k=
 github.com/Azure/azure-sdk-for-go v65.0.0+incompatible h1:HzKLt3kIwMm4KeJYTdx9EbjRYTySD/t8i1Ee/W5EGXw=
 github.com/Azure/azure-sdk-for-go v65.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
-github.com/Azure/azure-sdk-for-go/sdk/azcore v1.4.1-0.20230323231529-14c481f239ec h1:S83Dzhd3VLyvN2bgFI7/Lgk1etamk3Pk8QQhn3iXt4s=
-github.com/Azure/azure-sdk-for-go/sdk/azcore v1.4.1-0.20230323231529-14c481f239ec/go.mod h1:IoxiGSzhL1QHFXa/mlAXCD+sUaP0rxg//yn2w/JH7wg=
 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.5.0 h1:xGLAFFd9D3iLGxYiUGPdITSzsFmU1K8VtfuUHWAoN7M=
 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.5.0/go.mod h1:bjGvMhVMb+EEm3VRNQawDMUyMMjo+S5ewNjflkep/0Q=
 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.2 h1:uqM+VoHjVH6zdlkLF2b6O0ZANcHoj3rO0PoQ3jglUJA=
 github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.2.2/go.mod h1:twTKAa1E6hLmSDjLhaCkbTMQKc7p/rNLU40rLxGEOCI=
-github.com/Azure/azure-sdk-for-go/sdk/internal v1.2.0 h1:leh5DwKv6Ihwi+h60uHtn6UWAxBbZ0q8DwQVMzf61zw=
-github.com/Azure/azure-sdk-for-go/sdk/internal v1.2.0/go.mod h1:eWRD7oawr1Mu1sLCawqVc0CUiF43ia3qQMxLscsKQ9w=
 github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 h1:sXr+ck84g/ZlZUOZiNELInmMgOsuGwdjjVkEIde0OtY=
 github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0/go.mod h1:okt5dMMTOFjX/aovMlrjvvXoPMBVSPzk9185BT0+eZM=
 github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.0.0 h1:u/LLAOFgsMv7HmNL4Qufg58y+qElGOt5qv0z1mURkRY=
@@ -224,8 +220,8 @@ github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25Kn
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM=
-github.com/emicklei/go-restful/v3 v3.8.0 h1:eCZ8ulSerjdAiaNpF7GxXIE7ZCMo1moN1qX+S609eVw=
-github.com/emicklei/go-restful/v3 v3.8.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
+github.com/emicklei/go-restful/v3 v3.10.2 h1:hIovbnmBTLjHXkqEBUz3HGpXZdM7ZrE9fJIZIqlJLqE=
+github.com/emicklei/go-restful/v3 v3.10.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=

+ 63 - 0
justfile

@@ -0,0 +1,63 @@
+commonenv := "CGO_ENABLED=0"
+
+version := "dev"
+commit := `git rev-parse --short HEAD`
+
+default:
+    just --list
+
+# Run unit tests
+test:
+    {{commonenv}} go test ./...
+
+# Compile a local binary
+build-local:
+    cd ./cmd/costmodel && \
+        {{commonenv}} go build \
+        -ldflags \
+          "-X github.com/opencost/opencost/pkg/version.Version={{version}} \
+           -X github.com/opencost/opencost/pkg/version.GitCommit={{commit}}" \
+        -o ./costmodel
+
+# Build multiarch binaries
+build-binary VERSION=version:
+    cd ./cmd/costmodel && \
+        {{commonenv}} GOOS=linux GOARCH=amd64 go build \
+        -ldflags \
+          "-X github.com/opencost/opencost/pkg/version.Version={{VERSION}} \
+           -X github.com/opencost/opencost/pkg/version.GitCommit={{commit}}" \
+        -o ./costmodel-amd64
+
+    cd ./cmd/costmodel && \
+        {{commonenv}} GOOS=linux GOARCH=arm64 go build \
+        -ldflags \
+          "-X github.com/opencost/opencost/pkg/version.Version={{VERSION}} \
+           -X github.com/opencost/opencost/pkg/version.GitCommit={{commit}}" \
+        -o ./costmodel-arm64
+
+# Build and push a multi-arch Docker image
+build IMAGETAG VERSION=version: test (build-binary VERSION)
+    docker buildx build \
+        --rm \
+        --platform "linux/amd64" \
+        -f 'Dockerfile.cross' \
+        --build-arg binarypath=./cmd/costmodel/costmodel-amd64 \
+        --provenance=false \
+        -t {{IMAGETAG}}-amd64 \
+        --push \
+        .
+
+    docker buildx build \
+        --rm \
+        --platform "linux/arm64" \
+        -f 'Dockerfile.cross' \
+        --build-arg binarypath=./cmd/costmodel/costmodel-arm64 \
+        --provenance=false \
+        -t {{IMAGETAG}}-arm64 \
+        --push \
+        .
+
+    manifest-tool push from-args \
+        --platforms "linux/amd64,linux/arm64" \
+        --template {{IMAGETAG}}-ARCH \
+        --target {{IMAGETAG}}

+ 8 - 9
pkg/cloud/aws/athenaintegration.go

@@ -69,7 +69,7 @@ type AthenaIntegration struct {
 
 // Query Athena for CUR data and build a new CloudCostSetRange containing the info
 func (ai *AthenaIntegration) GetCloudCost(start, end time.Time) (*kubecost.CloudCostSetRange, error) {
-	log.Infof("AthenaIntegration[%s]: StoreCloudCost: %s", ai.Key(), kubecost.NewWindow(&start, &end).String())
+	log.Infof("AthenaIntegration[%s]: GetCloudCost: %s", ai.Key(), kubecost.NewWindow(&start, &end).String())
 	// Query for all column names
 	allColumns, err := ai.GetColumns()
 	if err != nil {
@@ -185,31 +185,30 @@ func (ai *AthenaIntegration) GetCloudCost(start, end time.Time) (*kubecost.Cloud
 	`
 	aqi.Query = fmt.Sprintf(queryStr, columnStr, ai.Table, whereClause, groupByStr)
 
-	CCSR, err := kubecost.NewCloudCostSetRange(start, end, timeutil.Day, ai.Key())
+	ccsr, err := kubecost.NewCloudCostSetRange(start, end, timeutil.Day, ai.Key())
 	if err != nil {
 		return nil, err
 	}
 
 	// Generate row handling function.
 	rowHandler := func(row types.Row) {
-		err2 := ai.RowToCloudCost(row, aqi, CCSR)
+		err2 := ai.RowToCloudCost(row, aqi, ccsr)
 		if err2 != nil {
-			log.Errorf("AthenaIntegration: queryCloudCostCompute: error while parsing row: %s", err2.Error())
+			log.Errorf("AthenaIntegration: GetCloudCost: error while parsing row: %s", err2.Error())
 		}
 	}
-	log.Debugf("AthenaIntegration[%s]: queryCloudCostCompute: querying: %s", ai.Key(), aqi.Query)
+	log.Debugf("AthenaIntegration[%s]: GetCloudCost: querying: %s", ai.Key(), aqi.Query)
 	// Query CUR data and fill out CCSR
 	err = ai.Query(context.TODO(), aqi.Query, GetAthenaQueryFunc(rowHandler))
 	if err != nil {
 		return nil, err
 	}
 
-	// TODO: May not be needed anymore?
-	for _, ccs := range CCSR.CloudCostSets {
-		log.Debugf("AthenaIntegration[%s]: queryCloudCostCompute: writing compute items for window %s: %d", ai.Key(), ccs.Window, len(ccs.CloudCosts))
+	for _, ccs := range ccsr.CloudCostSets {
+		log.Debugf("AthenaIntegration[%s]: GetCloudCost: writing compute items for window %s: %d", ai.Key(), ccs.Window, len(ccs.CloudCosts))
 		ai.ConnectionStatus = ai.GetConnectionStatusFromResult(ccs, ai.ConnectionStatus)
 	}
-	return CCSR, nil
+	return ccsr, nil
 
 }
 

+ 1 - 1
pkg/cloud/aws/athenaintegration_test.go

@@ -9,7 +9,7 @@ import (
 	"github.com/opencost/opencost/pkg/util/timeutil"
 )
 
-func GetCloudCost_Test(t *testing.T) {
+func TestAthenaIntegration_GetCloudCost(t *testing.T) {
 	athenaConfigPath := os.Getenv("ATHENA_CONFIGURATION")
 	if athenaConfigPath == "" {
 		t.Skip("skipping integration test, set environment variable ATHENA_CONFIGURATION")

+ 99 - 0
pkg/cloud/azure/azurestorageintegration.go

@@ -0,0 +1,99 @@
+package azure
+
+import (
+	"strings"
+	"time"
+
+	"github.com/opencost/opencost/pkg/cloud"
+	"github.com/opencost/opencost/pkg/kubecost"
+	"github.com/opencost/opencost/pkg/util/timeutil"
+)
+
+type AzureStorageIntegration struct {
+	AzureStorageBillingParser
+	ConnectionStatus cloud.ConnectionStatus
+}
+
+func (asi *AzureStorageIntegration) GetCloudCost(start, end time.Time) (*kubecost.CloudCostSetRange, error) {
+	ccsr, err := kubecost.NewCloudCostSetRange(start, end, timeutil.Day, asi.Key())
+	if err != nil {
+		return nil, err
+	}
+
+	status, err := asi.ParseBillingData(start, end, func(abv *BillingRowValues) error {
+		s := abv.Date
+		e := abv.Date.Add(timeutil.Day)
+		window := kubecost.NewWindow(&s, &e)
+
+		k8sPtc := 0.0
+		if AzureIsK8s(abv.Tags) {
+			k8sPtc = 1.0
+		}
+
+		// Create CloudCost
+		// Using the NetCost as a 'placeholder' for Invoiced and Amortized Net costs now,
+		// until we can revisit and spend the time to do the calculations correctly
+		cc := &kubecost.CloudCost{
+			Properties: &kubecost.CloudCostProperties{
+				ProviderID:      AzureSetProviderID(abv),
+				Provider:        kubecost.AzureProvider,
+				AccountID:       abv.SubscriptionID,
+				InvoiceEntityID: abv.InvoiceEntityID,
+				Service:         abv.Service,
+				Category:        SelectAzureCategory(abv.MeterCategory),
+				Labels:          abv.Tags,
+			},
+			Window: window,
+			AmortizedNetCost: kubecost.CostMetric{
+				Cost:              abv.NetCost,
+				KubernetesPercent: k8sPtc,
+			},
+			InvoicedCost: kubecost.CostMetric{
+				Cost:              abv.NetCost,
+				KubernetesPercent: k8sPtc,
+			},
+			ListCost: kubecost.CostMetric{
+				Cost:              abv.Cost,
+				KubernetesPercent: k8sPtc,
+			},
+			NetCost: kubecost.CostMetric{
+				Cost:              abv.NetCost,
+				KubernetesPercent: k8sPtc,
+			},
+			// NOTE: on Azure, there is no "AmortizedCost" per se, so we use
+			// AmortizedNetCost, or NetCost, instead.
+			AmortizedCost: kubecost.CostMetric{
+				Cost:              abv.NetCost,
+				KubernetesPercent: k8sPtc,
+			},
+		}
+
+		// Check if Item
+		if abv.IsCompute(cc.Properties.Category) {
+			// TODO: Will need to split VMSS for other features
+			ccsr.LoadCloudCost(cc)
+		}
+		return nil
+	})
+	if err != nil {
+		asi.ConnectionStatus = status
+		return nil, err
+	}
+	return ccsr, nil
+}
+
+// Check for the presence of k8s labels
+func AzureIsK8s(labels map[string]string) bool {
+	for key := range labels {
+		if strings.HasPrefix(key, "aks-managed-") {
+			return true
+		}
+		if strings.HasPrefix(key, "kubernetes.io-created-") {
+			return true
+		}
+		if strings.HasPrefix(key, "k8s-azure-created-") {
+			return true
+		}
+	}
+	return false
+}

+ 69 - 0
pkg/cloud/azure/azurestorageintegration_test.go

@@ -0,0 +1,69 @@
+package azure
+
+import (
+	"os"
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/pkg/util/json"
+	"github.com/opencost/opencost/pkg/util/timeutil"
+)
+
+func GetCloudCost_Test(t *testing.T) {
+	azureConfigPath := os.Getenv("AZURE_CONFIGURATION")
+	if azureConfigPath == "" {
+		t.Skip("skipping integration test, set environment variable AZURE_CONFIGURATION")
+	}
+	azureConfigBin, err := os.ReadFile(azureConfigPath)
+	if err != nil {
+		t.Fatalf("failed to read config file: %s", err.Error())
+	}
+	var azureConfig StorageConfiguration
+	err = json.Unmarshal(azureConfigBin, &azureConfig)
+	if err != nil {
+		t.Fatalf("failed to unmarshal config from JSON: %s", err.Error())
+	}
+	testCases := map[string]struct {
+		integration *AzureStorageIntegration
+		start       time.Time
+		end         time.Time
+		expected    bool
+	}{
+		// No CUR data is expected within 2 days of now
+		"too_recent_window": {
+			integration: &AzureStorageIntegration{
+				AzureStorageBillingParser: AzureStorageBillingParser{
+					StorageConnection: StorageConnection{
+						StorageConfiguration: azureConfig,
+					},
+				},
+			},
+			end:      time.Now(),
+			start:    time.Now().Add(-timeutil.Day),
+			expected: true,
+		},
+		// CUR data should be available
+		"last week window": {
+			integration: &AzureStorageIntegration{
+				AzureStorageBillingParser: AzureStorageBillingParser{
+					StorageConnection: StorageConnection{
+						StorageConfiguration: azureConfig,
+					},
+				},
+			},
+			end:      time.Now().Add(-7 * timeutil.Day),
+			start:    time.Now().Add(-8 * timeutil.Day),
+			expected: false,
+		},
+	}
+	for name, testCase := range testCases {
+		t.Run(name, func(t *testing.T) {
+			actual, err := testCase.integration.GetCloudCost(testCase.start, testCase.end)
+			if err != nil {
+				t.Errorf("Other error during testing %s", err)
+			} else if actual.IsEmpty() != testCase.expected {
+				t.Errorf("Incorrect result, actual emptiness: %t, expected: %t", actual.IsEmpty(), testCase.expected)
+			}
+		})
+	}
+}

+ 369 - 0
pkg/cloud/gcp/bigqueryintegration.go

@@ -0,0 +1,369 @@
+package gcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"regexp"
+	"strings"
+	"time"
+
+	"cloud.google.com/go/bigquery"
+	"github.com/opencost/opencost/pkg/kubecost"
+	"github.com/opencost/opencost/pkg/log"
+	"github.com/opencost/opencost/pkg/util/timeutil"
+	"google.golang.org/api/iterator"
+)
+
+type BigQueryIntegration struct {
+	BigQueryQuerier
+}
+
+const (
+	UsageDateColumnName          = "usage_date"
+	BillingAccountIDColumnName   = "billing_id"
+	ProjectIDColumnName          = "project_id"
+	ServiceDescriptionColumnName = "service"
+	SKUDescriptionColumnName     = "description"
+	LabelsColumnName             = "labels"
+	ResourceNameColumnName       = "resource"
+	CostColumnName               = "cost"
+	CreditsColumnName            = "credits"
+)
+
+const BiqQueryWherePartitionFmt = `DATE(_PARTITIONTIME) >= "%s" AND DATE(_PARTITIONTIME) < "%s"`
+const BiqQueryWhereDateFmt = `usage_start_time >= "%s" AND usage_start_time < "%s"`
+
+func (bqi *BigQueryIntegration) GetCloudCost(start time.Time, end time.Time) (*kubecost.CloudCostSetRange, error) {
+	// Build Query
+
+	selectColumns := []string{
+		fmt.Sprintf("TIMESTAMP_TRUNC(usage_start_time, day) as %s", UsageDateColumnName),
+		fmt.Sprintf("billing_account_id as %s", BillingAccountIDColumnName),
+		fmt.Sprintf("project.id as %s", ProjectIDColumnName),
+		fmt.Sprintf("service.description as %s", ServiceDescriptionColumnName),
+		fmt.Sprintf("sku.description as %s", SKUDescriptionColumnName),
+		fmt.Sprintf("resource.name as %s", ResourceNameColumnName),
+		fmt.Sprintf("TO_JSON_STRING(labels) as %s", LabelsColumnName),
+		fmt.Sprintf("SUM(cost) as %s", CostColumnName),
+		fmt.Sprintf("IFNULL(SUM((Select SUM(amount) FROM bd.credits)),0) as %s", CreditsColumnName),
+	}
+
+	groupByColumns := []string{
+		UsageDateColumnName,
+		BillingAccountIDColumnName,
+		ProjectIDColumnName,
+		ServiceDescriptionColumnName,
+		SKUDescriptionColumnName,
+		LabelsColumnName,
+		ResourceNameColumnName,
+	}
+
+	partitionStart := start
+	partitionEnd := end.AddDate(0, 0, 2)
+	wherePartition := fmt.Sprintf(BiqQueryWherePartitionFmt, partitionStart.Format("2006-01-02"), partitionEnd.Format("2006-01-02"))
+	whereDate := fmt.Sprintf(BiqQueryWhereDateFmt, start.Format("2006-01-02"), end.Format("2006-01-02"))
+
+	whereConjuncts := []string{
+		wherePartition,
+		whereDate,
+	}
+
+	columnStr := strings.Join(selectColumns, ", ")
+	table := fmt.Sprintf(" `%s` bd ", bqi.GetBillingDataDataset())
+	whereClause := strings.Join(whereConjuncts, " AND ")
+	groupByStr := strings.Join(groupByColumns, ", ")
+	queryStr := `
+		SELECT %s
+		FROM %s
+		WHERE %s
+		GROUP BY %s
+	`
+
+	querystr := fmt.Sprintf(queryStr, columnStr, table, whereClause, groupByStr)
+
+	// Perform Query and parse values
+
+	ccsr, err := kubecost.NewCloudCostSetRange(start, end, timeutil.Day, bqi.Key())
+	if err != nil {
+		return ccsr, fmt.Errorf("error creating new CloudCostSetRange: %s", err)
+	}
+
+	iter, err := bqi.Query(context.Background(), querystr)
+	if err != nil {
+		return ccsr, fmt.Errorf("error querying: %s", err)
+	}
+
+	// Parse query into CloudCostSetRange
+	for {
+		var ccl CloudCostLoader
+		err = iter.Next(&ccl)
+		if err == iterator.Done {
+			break
+		}
+		if err != nil {
+			return ccsr, err
+		}
+		if ccl.CloudCost == nil {
+			continue
+		}
+		ccsr.LoadCloudCost(ccl.CloudCost)
+
+	}
+	return ccsr, nil
+
+}
+
+type CloudCostLoader struct {
+	CloudCost *kubecost.CloudCost
+}
+
+// Load populates the fields of a CloudCostValues with bigquery.Value from provided slice
+func (ccl *CloudCostLoader) Load(values []bigquery.Value, schema bigquery.Schema) error {
+
+	// Create Cloud Cost Properties
+	properties := kubecost.CloudCostProperties{
+		Provider: kubecost.GCPProvider,
+	}
+	var window kubecost.Window
+	var description string
+	var listCost float64
+	var credits float64
+
+	for i, field := range schema {
+		if field == nil {
+			log.DedupedErrorf(5, "GCP: BigQuery: found nil field in schema")
+			continue
+		}
+
+		switch field.Name {
+		case UsageDateColumnName:
+			usageDate, ok := values[i].(time.Time)
+			if !ok {
+				// It would be very surprising if an unparsable time came back from the API, so it should be ok to return here.
+				return fmt.Errorf("error parsing usage date: %v", values[0])
+			}
+			// start and end will be the day that the usage occurred on
+			s := usageDate
+			e := s.Add(timeutil.Day)
+			window = kubecost.NewWindow(&s, &e)
+		case BillingAccountIDColumnName:
+			invoiceEntityID, ok := values[i].(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", BillingAccountIDColumnName, values[i])
+				invoiceEntityID = ""
+			}
+			properties.InvoiceEntityID = invoiceEntityID
+		case ProjectIDColumnName:
+			accountID, ok := values[i].(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", ProjectIDColumnName, values[i])
+				accountID = ""
+			}
+			properties.AccountID = accountID
+		case ServiceDescriptionColumnName:
+			service, ok := values[i].(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", ServiceDescriptionColumnName, values[i])
+				service = ""
+			}
+			properties.Service = service
+		case SKUDescriptionColumnName:
+			d, ok := values[i].(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", SKUDescriptionColumnName, values[i])
+				d = ""
+			}
+			description = d
+		case LabelsColumnName:
+			labelJSON, ok := values[i].(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", LabelsColumnName, values[i])
+			}
+			labelList := []map[string]string{}
+			err := json.Unmarshal([]byte(labelJSON), &labelList)
+			if err != nil {
+				log.Warnf("GCP Cloud Assets: error unmarshaling GCP CloudCost labels: %s", err)
+			}
+			labels := map[string]string{}
+			for _, pair := range labelList {
+				key := pair["key"]
+				value := pair["value"]
+				labels[key] = value
+			}
+			properties.Labels = labels
+		case ResourceNameColumnName:
+			resouceNameValue := values[i]
+			if resouceNameValue == nil {
+				properties.ProviderID = ""
+				continue
+			}
+			resource, ok := resouceNameValue.(string)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", ResourceNameColumnName, values[i])
+				properties.ProviderID = ""
+				continue
+			}
+
+			properties.ProviderID = ParseProviderID(resource)
+		case CostColumnName:
+			cost, ok := values[i].(float64)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", CostColumnName, values[i])
+				cost = 0.0
+			}
+			listCost = cost
+		case CreditsColumnName:
+			creditSum, ok := values[i].(float64)
+			if !ok {
+				log.DedupedErrorf(5, "error parsing GCP CloudCost %s: %v", CreditsColumnName, values[i])
+				creditSum = 0.0
+			}
+			credits = creditSum
+		default:
+			log.DedupedErrorf(5, "GCP: BigQuery: found unrecognized column name %s", field.Name)
+		}
+	}
+
+	// Check required Fields
+	if window.IsOpen() {
+		return fmt.Errorf("GCP: BigQuery: error parsing, item had invalid window")
+	}
+
+	// Determine Category
+	properties.Category = SelectCategory(properties.Service, description)
+
+	// sum credit and cost for NetCost
+	netCost := listCost + credits
+
+	// Using the NetCost as a 'placeholder' for these costs now, until we can revisit and spend the time to do
+	// the calculations correctly
+	amortizedCost := netCost
+	amortizedNetCost := netCost
+	invoicedCost := netCost
+
+	// percent k8s is determined by the presence of labels
+	k8sPercent := 0.0
+	if IsK8s(properties.Labels) {
+		k8sPercent = 1.0
+	}
+
+	ccl.CloudCost = &kubecost.CloudCost{
+		Properties: &properties,
+		Window:     window,
+		ListCost: kubecost.CostMetric{
+			Cost:              listCost,
+			KubernetesPercent: k8sPercent,
+		},
+		AmortizedCost: kubecost.CostMetric{
+			Cost:              amortizedCost,
+			KubernetesPercent: k8sPercent,
+		},
+		AmortizedNetCost: kubecost.CostMetric{
+			Cost:              amortizedNetCost,
+			KubernetesPercent: k8sPercent,
+		},
+		InvoicedCost: kubecost.CostMetric{
+			Cost:              invoicedCost,
+			KubernetesPercent: k8sPercent,
+		},
+		NetCost: kubecost.CostMetric{
+			Cost:              netCost,
+			KubernetesPercent: k8sPercent,
+		},
+	}
+
+	return nil
+}
+
+func IsK8s(labels map[string]string) bool {
+	if _, ok := labels["goog-gke-volume"]; ok {
+		return true
+	}
+
+	if _, ok := labels["goog-gke-node"]; ok {
+		return true
+	}
+
+	if _, ok := labels["goog-k8s-cluster-name"]; ok {
+		return true
+	}
+
+	return false
+}
+
+var parseProviderIDRx = regexp.MustCompile("^.+\\/(.+)?") // Capture "gke-cluster-3-default-pool-xxxx-yy" from "projects/###/instances/gke-cluster-3-default-pool-xxxx-yy"
+
+func ParseProviderID(id string) string {
+	match := parseProviderIDRx.FindStringSubmatch(id)
+	if len(match) == 0 {
+		return id
+	}
+	return match[len(match)-1]
+}
+
+func SelectCategory(service, description string) string {
+	s := strings.ToLower(service)
+	d := strings.ToLower(description)
+
+	// Network descriptions
+	if strings.Contains(d, "download") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "network") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "ingress") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "egress") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "static ip") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "external ip") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "load balanced") {
+		return kubecost.NetworkCategory
+	}
+	if strings.Contains(d, "licensing fee") {
+		return kubecost.OtherCategory
+	}
+
+	// Storage Descriptions
+	if strings.Contains(d, "storage") {
+		return kubecost.StorageCategory
+	}
+	if strings.Contains(d, "pd capacity") {
+		return kubecost.StorageCategory
+	}
+	if strings.Contains(d, "pd iops") {
+		return kubecost.StorageCategory
+	}
+	if strings.Contains(d, "pd snapshot") {
+		return kubecost.StorageCategory
+	}
+
+	// Service Defaults
+	if strings.Contains(s, "storage") {
+		return kubecost.StorageCategory
+	}
+	if strings.Contains(s, "compute") {
+		return kubecost.ComputeCategory
+	}
+	if strings.Contains(s, "sql") {
+		return kubecost.StorageCategory
+	}
+	if strings.Contains(s, "bigquery") {
+		return kubecost.StorageCategory
+	}
+	if strings.Contains(s, "kubernetes") {
+		return kubecost.ManagementCategory
+	} else if strings.Contains(s, "pub/sub") {
+		return kubecost.NetworkCategory
+	}
+
+	return kubecost.OtherCategory
+}

+ 58 - 0
pkg/cloud/gcp/bigqueryintegration_test.go

@@ -0,0 +1,58 @@
+package gcp
+
+import (
+	"encoding/json"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/opencost/opencost/pkg/kubecost"
+	"github.com/opencost/opencost/pkg/util/timeutil"
+)
+
+func TestBigQueryIntegration_GetCloudCost(t *testing.T) {
+	bigQueryConfigPath := os.Getenv("BIGQUERY_CONFIGURATION")
+	if bigQueryConfigPath == "" {
+		t.Skip("skipping integration test, set environment variable ATHENA_CONFIGURATION")
+	}
+	bigQueryConfigBin, err := os.ReadFile(bigQueryConfigPath)
+	if err != nil {
+		t.Fatalf("failed to read config file: %s", err.Error())
+	}
+	var bigQueryConfig BigQueryConfiguration
+	err = json.Unmarshal(bigQueryConfigBin, &bigQueryConfig)
+	if err != nil {
+		t.Fatalf("failed to unmarshal config from JSON: %s", err.Error())
+	}
+
+	today := kubecost.RoundBack(time.Now().UTC(), timeutil.Day)
+
+	testCases := map[string]struct {
+		integration *BigQueryIntegration
+		start       time.Time
+		end         time.Time
+		expected    bool
+	}{
+
+		"last week window": {
+			integration: &BigQueryIntegration{
+				BigQueryQuerier: BigQueryQuerier{
+					BigQueryConfiguration: bigQueryConfig,
+				},
+			},
+			end:      today.Add(-7 * timeutil.Day),
+			start:    today.Add(-8 * timeutil.Day),
+			expected: false,
+		},
+	}
+	for name, testCase := range testCases {
+		t.Run(name, func(t *testing.T) {
+			actual, err := testCase.integration.GetCloudCost(testCase.start, testCase.end)
+			if err != nil {
+				t.Errorf("Other error during testing %s", err)
+			} else if actual.IsEmpty() != testCase.expected {
+				t.Errorf("Incorrect result, actual emptiness: %t, expected: %t", actual.IsEmpty(), testCase.expected)
+			}
+		})
+	}
+}

+ 11 - 80
pkg/cloud/gcp/bigqueryquerier.go

@@ -2,16 +2,15 @@ package gcp
 
 import (
 	"context"
-	"regexp"
-	"strings"
 
 	"cloud.google.com/go/bigquery"
+	"github.com/opencost/opencost/pkg/cloud"
 	cloudconfig "github.com/opencost/opencost/pkg/cloud/config"
-	"github.com/opencost/opencost/pkg/kubecost"
 )
 
 type BigQueryQuerier struct {
 	BigQueryConfiguration
+	ConnectionStatus cloud.ConnectionStatus
 }
 
 func (bqq *BigQueryQuerier) Equals(config cloudconfig.Config) bool {
@@ -23,88 +22,20 @@ func (bqq *BigQueryQuerier) Equals(config cloudconfig.Config) bool {
 	return bqq.BigQueryConfiguration.Equals(&thatConfig.BigQueryConfiguration)
 }
 
-func (bqq *BigQueryQuerier) QueryBigQuery(ctx context.Context, queryStr string) (*bigquery.RowIterator, error) {
+func (bqq *BigQueryQuerier) Query(ctx context.Context, queryStr string) (*bigquery.RowIterator, error) {
+	err := bqq.Validate()
+
+	if err != nil {
+		bqq.ConnectionStatus = cloud.InvalidConfiguration
+		return nil, err
+	}
+
 	client, err := bqq.GetBigQueryClient(ctx)
 	if err != nil {
+		bqq.ConnectionStatus = cloud.FailedConnection
 		return nil, err
 	}
 
 	query := client.Query(queryStr)
 	return query.Read(ctx)
 }
-
-func GCPSelectCategory(service, description string) string {
-	s := strings.ToLower(service)
-	d := strings.ToLower(description)
-
-	// Network descriptions
-	if strings.Contains(d, "download") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "network") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "ingress") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "egress") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "static ip") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "external ip") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "load balanced") {
-		return kubecost.NetworkCategory
-	}
-	if strings.Contains(d, "licensing fee") {
-		return kubecost.OtherCategory
-	}
-
-	// Storage Descriptions
-	if strings.Contains(d, "storage") {
-		return kubecost.StorageCategory
-	}
-	if strings.Contains(d, "pd capacity") {
-		return kubecost.StorageCategory
-	}
-	if strings.Contains(d, "pd iops") {
-		return kubecost.StorageCategory
-	}
-	if strings.Contains(d, "pd snapshot") {
-		return kubecost.StorageCategory
-	}
-
-	// Service Defaults
-	if strings.Contains(s, "storage") {
-		return kubecost.StorageCategory
-	}
-	if strings.Contains(s, "compute") {
-		return kubecost.ComputeCategory
-	}
-	if strings.Contains(s, "sql") {
-		return kubecost.StorageCategory
-	}
-	if strings.Contains(s, "bigquery") {
-		return kubecost.StorageCategory
-	}
-	if strings.Contains(s, "kubernetes") {
-		return kubecost.ManagementCategory
-	} else if strings.Contains(s, "pub/sub") {
-		return kubecost.NetworkCategory
-	}
-
-	return kubecost.OtherCategory
-}
-
-var parseProviderIDRx = regexp.MustCompile("^.+\\/(.+)?") // Capture "gke-cluster-3-default-pool-xxxx-yy" from "projects/###/instances/gke-cluster-3-default-pool-xxxx-yy"
-
-func GCPParseProviderID(id string) string {
-	match := parseProviderIDRx.FindStringSubmatch(id)
-	if len(match) == 0 {
-		return id
-	}
-	return match[len(match)-1]
-}

+ 3 - 3
pkg/cloud/gcp/provider.go

@@ -139,11 +139,11 @@ func (gcp *GCP) GetLocalStorageQuery(window, offset time.Duration, rate bool, us
 	fmtOffset := timeutil.DurationToPromOffsetString(offset)
 
 	fmtCumulativeQuery := `sum(
-		sum_over_time(%s{device!="tmpfs", id="/"}[%s:1m]%s)
+		sum_over_time(%s{device!="tmpfs", id="/", %s}[%s:1m]%s)
 	) by (%s) / 60 / 730 / 1024 / 1024 / 1024 * %f`
 
 	fmtMonthlyQuery := `sum(
-		avg_over_time(%s{device!="tmpfs", id="/"}[%s:1m]%s)
+		avg_over_time(%s{device!="tmpfs", id="/", %s}[%s:1m]%s)
 	) by (%s) / 1024 / 1024 / 1024 * %f`
 
 	fmtQuery := fmtCumulativeQuery
@@ -152,7 +152,7 @@ func (gcp *GCP) GetLocalStorageQuery(window, offset time.Duration, rate bool, us
 	}
 	fmtWindow := timeutil.DurationString(window)
 
-	return fmt.Sprintf(fmtQuery, baseMetric, fmtWindow, fmtOffset, env.GetPromClusterLabel(), localStorageCost)
+	return fmt.Sprintf(fmtQuery, env.GetPromClusterFilter(), baseMetric, fmtWindow, fmtOffset, env.GetPromClusterLabel(), localStorageCost)
 }
 
 func (gcp *GCP) GetConfig() (*models.CustomPricing, error) {

+ 1 - 0
pkg/cloud/models/models.go

@@ -60,6 +60,7 @@ type Node struct {
 	Reserved         *ReservedInstanceData `json:"reserved,omitempty"`
 	ProviderID       string                `json:"providerID,omitempty"`
 	PricingType      PricingType           `json:"pricingType,omitempty"`
+	ArchType         string                `json:"archType,omitempty"`
 }
 
 // IsSpot determines whether or not a Node uses spot by usage type

+ 11 - 4
pkg/cloud/provider/csvprovider.go

@@ -308,10 +308,10 @@ func NodeValueFromMapField(m string, n *v1.Node, useRegion bool) string {
 		if mf[1] == "name" {
 			return toReturn + n.Name
 		} else if mf[1] == "labels" {
-			lkey := strings.Join(mf[2:], "")
+			lkey := strings.Join(mf[2:len(mf)], ".")
 			return toReturn + n.Labels[lkey]
 		} else if mf[1] == "annotations" {
-			akey := strings.Join(mf[2:], "")
+			akey := strings.Join(mf[2:len(mf)], ".")
 			return toReturn + n.Annotations[akey]
 		} else {
 			log.Errorf("Unsupported InstanceIDField %s in CSV For Node", m)
@@ -329,10 +329,10 @@ func PVValueFromMapField(m string, n *v1.PersistentVolume) string {
 		if mf[1] == "name" {
 			return n.Name
 		} else if mf[1] == "labels" {
-			lkey := strings.Join(mf[2:], "")
+			lkey := strings.Join(mf[2:len(mf)], "")
 			return n.Labels[lkey]
 		} else if mf[1] == "annotations" {
-			akey := strings.Join(mf[2:], "")
+			akey := strings.Join(mf[2:len(mf)], "")
 			return n.Annotations[akey]
 		} else {
 			log.Errorf("Unsupported InstanceIDField %s in CSV For PV", m)
@@ -346,6 +346,13 @@ func PVValueFromMapField(m string, n *v1.PersistentVolume) string {
 			log.Infof("[ERROR] Unsupported InstanceIDField %s in CSV For PV", m)
 			return ""
 		}
+	} else if len(mf) > 1 && mf[0] == "spec" {
+		if mf[1] == "storageClassName" {
+			return n.Spec.StorageClassName
+		} else {
+			log.Infof("[ERROR] Unsupported InstanceIDField %s in CSV For PV", m)
+			return ""
+		}
 	} else {
 		log.Errorf("Unsupported InstanceIDField %s in CSV For PV", m)
 		return ""

+ 94 - 94
pkg/costmodel/allocation.go

@@ -13,52 +13,52 @@ import (
 )
 
 const (
-	queryFmtPods                        = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]`
-	queryFmtPodsUID                     = `avg(kube_pod_container_status_running{}) by (pod, namespace, uid, %s)[%s:%s]`
-	queryFmtRAMBytesAllocated           = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s, provider_id)`
-	queryFmtRAMRequests                 = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtRAMUsageAvg                 = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtRAMUsageMax                 = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtCPUCoresAllocated           = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtCPURequests                 = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtCPUUsageAvg                 = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtGPUsRequested               = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtGPUsAllocated               = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
-	queryFmtNodeCostPerCPUHr            = `avg(avg_over_time(node_cpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeCostPerRAMGiBHr         = `avg(avg_over_time(node_ram_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeCostPerGPUHr            = `avg(avg_over_time(node_gpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeIsSpot                  = `avg_over_time(kubecost_node_is_spot[%s])`
-	queryFmtPVCInfo                     = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]`
-	queryFmtPodPVCAllocation            = `avg(avg_over_time(pod_pvc_allocation[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
-	queryFmtPVCBytesRequested           = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s])) by (persistentvolumeclaim, namespace, %s)`
-	queryFmtPVActiveMins                = `count(kube_persistentvolume_capacity_bytes) by (persistentvolume, %s)[%s:%s]`
-	queryFmtPVBytes                     = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s])) by (persistentvolume, %s)`
-	queryFmtPVCostPerGiBHour            = `avg(avg_over_time(pv_hourly_cost[%s])) by (volumename, %s)`
-	queryFmtNetZoneGiB                  = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetZoneCostPerGiB           = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s])) by (%s)`
-	queryFmtNetRegionGiB                = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetRegionCostPerGiB         = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s])) by (%s)`
-	queryFmtNetInternetGiB              = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetInternetCostPerGiB       = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s])) by (%s)`
-	queryFmtNetReceiveBytes             = `sum(increase(container_network_receive_bytes_total{pod!=""}[%s])) by (pod_name, pod, namespace, %s)`
-	queryFmtNetTransferBytes            = `sum(increase(container_network_transmit_bytes_total{pod!=""}[%s])) by (pod_name, pod, namespace, %s)`
-	queryFmtNodeLabels                  = `avg_over_time(kube_node_labels[%s])`
-	queryFmtNamespaceLabels             = `avg_over_time(kube_namespace_labels[%s])`
-	queryFmtNamespaceAnnotations        = `avg_over_time(kube_namespace_annotations[%s])`
-	queryFmtPodLabels                   = `avg_over_time(kube_pod_labels[%s])`
-	queryFmtPodAnnotations              = `avg_over_time(kube_pod_annotations[%s])`
-	queryFmtServiceLabels               = `avg_over_time(service_selector_labels[%s])`
-	queryFmtDeploymentLabels            = `avg_over_time(deployment_match_labels[%s])`
-	queryFmtStatefulSetLabels           = `avg_over_time(statefulSet_match_labels[%s])`
-	queryFmtDaemonSetLabels             = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s])) by (pod, owner_name, namespace, %s)`
-	queryFmtJobLabels                   = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s])) by (pod, owner_name, namespace ,%s)`
-	queryFmtPodsWithReplicaSetOwner     = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet"}[%s])) by (pod, owner_name, namespace ,%s)`
-	queryFmtReplicaSetsWithoutOwners    = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>"}[%s])) by (replicaset, namespace, %s)`
-	queryFmtReplicaSetsWithRolloutOwner = `avg(avg_over_time(kube_replicaset_owner{owner_kind="Rollout"}[%s])) by (replicaset, namespace, owner_kind, owner_name, %s)`
-	queryFmtLBCostPerHr                 = `avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s)`
-	queryFmtLBActiveMins                = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]`
-	queryFmtOldestSample                = `min_over_time(timestamp(group(node_cpu_hourly_cost))[%s:%s])`
-	queryFmtNewestSample                = `max_over_time(timestamp(group(node_cpu_hourly_cost))[%s:%s])`
+	queryFmtPods                        = `avg(kube_pod_container_status_running{%s}) by (pod, namespace, %s)[%s:%s]`
+	queryFmtPodsUID                     = `avg(kube_pod_container_status_running{%s}) by (pod, namespace, uid, %s)[%s:%s]`
+	queryFmtRAMBytesAllocated           = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s, provider_id)`
+	queryFmtRAMRequests                 = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtRAMUsageAvg                 = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtRAMUsageMax                 = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtCPUCoresAllocated           = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtCPURequests                 = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtCPUUsageAvg                 = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtGPUsRequested               = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtGPUsAllocated               = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtNodeCostPerCPUHr            = `avg(avg_over_time(node_cpu_hourly_cost{%s}[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeCostPerRAMGiBHr         = `avg(avg_over_time(node_ram_hourly_cost{%s}[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeCostPerGPUHr            = `avg(avg_over_time(node_gpu_hourly_cost{%s}[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeIsSpot                  = `avg_over_time(kubecost_node_is_spot{%s}[%s])`
+	queryFmtPVCInfo                     = `avg(kube_persistentvolumeclaim_info{volumename != "", %s}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]`
+	queryFmtPodPVCAllocation            = `avg(avg_over_time(pod_pvc_allocation{%s}[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
+	queryFmtPVCBytesRequested           = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{%s}[%s])) by (persistentvolumeclaim, namespace, %s)`
+	queryFmtPVActiveMins                = `count(kube_persistentvolume_capacity_bytes{%s}) by (persistentvolume, %s)[%s:%s]`
+	queryFmtPVBytes                     = `avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[%s])) by (persistentvolume, %s)`
+	queryFmtPVCostPerGiBHour            = `avg(avg_over_time(pv_hourly_cost{%s}[%s])) by (volumename, %s)`
+	queryFmtNetZoneGiB                  = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true", %s}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetZoneCostPerGiB           = `avg(avg_over_time(kubecost_network_zone_egress_cost{%s}[%s])) by (%s)`
+	queryFmtNetRegionGiB                = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false", %s}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetRegionCostPerGiB         = `avg(avg_over_time(kubecost_network_region_egress_cost{%s}[%s])) by (%s)`
+	queryFmtNetInternetGiB              = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetInternetCostPerGiB       = `avg(avg_over_time(kubecost_network_internet_egress_cost{%s}[%s])) by (%s)`
+	queryFmtNetReceiveBytes             = `sum(increase(container_network_receive_bytes_total{pod!="", %s}[%s])) by (pod_name, pod, namespace, %s)`
+	queryFmtNetTransferBytes            = `sum(increase(container_network_transmit_bytes_total{pod!="", %s}[%s])) by (pod_name, pod, namespace, %s)`
+	queryFmtNodeLabels                  = `avg_over_time(kube_node_labels{%s}[%s])`
+	queryFmtNamespaceLabels             = `avg_over_time(kube_namespace_labels{%s}[%s])`
+	queryFmtNamespaceAnnotations        = `avg_over_time(kube_namespace_annotations{%s}[%s])`
+	queryFmtPodLabels                   = `avg_over_time(kube_pod_labels{%s}[%s])`
+	queryFmtPodAnnotations              = `avg_over_time(kube_pod_annotations{%s}[%s])`
+	queryFmtServiceLabels               = `avg_over_time(service_selector_labels{%s}[%s])`
+	queryFmtDeploymentLabels            = `avg_over_time(deployment_match_labels{%s}[%s])`
+	queryFmtStatefulSetLabels           = `avg_over_time(statefulSet_match_labels{%s}[%s])`
+	queryFmtDaemonSetLabels             = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet", %s}[%s])) by (pod, owner_name, namespace, %s)`
+	queryFmtJobLabels                   = `sum(avg_over_time(kube_pod_owner{owner_kind="Job", %s}[%s])) by (pod, owner_name, namespace ,%s)`
+	queryFmtPodsWithReplicaSetOwner     = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet", %s}[%s])) by (pod, owner_name, namespace ,%s)`
+	queryFmtReplicaSetsWithoutOwners    = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>", %s}[%s])) by (replicaset, namespace, %s)`
+	queryFmtReplicaSetsWithRolloutOwner = `avg(avg_over_time(kube_replicaset_owner{owner_kind="Rollout", %s}[%s])) by (replicaset, namespace, owner_kind, owner_name, %s)`
+	queryFmtLBCostPerHr                 = `avg(avg_over_time(kubecost_load_balancer_cost{%s}[%s])) by (namespace, service_name, %s)`
+	queryFmtLBActiveMins                = `count(kubecost_load_balancer_cost{%s}) by (namespace, service_name, %s)[%s:%s]`
+	queryFmtOldestSample                = `min_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
+	queryFmtNewestSample                = `max_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
 
 	// Because we use container_cpu_usage_seconds_total to calculate CPU usage
 	// at any given "instant" of time, we need to use an irate or rate. To then
@@ -73,7 +73,7 @@ const (
 	//
 	// If changing the name of the recording rule, make sure to update the
 	// corresponding diagnostic query to avoid confusion.
-	queryFmtCPUUsageMaxRecordingRule = `max(max_over_time(kubecost_container_cpu_usage_irate{}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtCPUUsageMaxRecordingRule = `max(max_over_time(kubecost_container_cpu_usage_irate{%s}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
 	// This is the subquery equivalent of the above recording rule query. It is
 	// more expensive, but does not require the recording rule. It should be
 	// used as a fallback query if the recording rule data does not exist.
@@ -84,7 +84,7 @@ const (
 	// the resolution, to make sure the irate always has two points to query
 	// in case the Prom scrape duration has been reduced to be equal to the
 	// ETL resolution.
-	queryFmtCPUUsageMaxSubquery = `max(max_over_time(irate(container_cpu_usage_seconds_total{container_name!="POD", container_name!=""}[%s])[%s:%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtCPUUsageMaxSubquery = `max(max_over_time(irate(container_cpu_usage_seconds_total{container_name!="POD", container_name!="", %s}[%s])[%s:%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
 )
 
 // Constants for Network Cost Subtype
@@ -287,7 +287,7 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 func (cm *CostModel) DateRange() (time.Time, time.Time, error) {
 	ctx := prom.NewNamedContext(cm.PrometheusClient, prom.AllocationContextName)
 
-	resOldest, _, err := ctx.QuerySync(fmt.Sprintf(queryFmtOldestSample, "90d", "1h"))
+	resOldest, _, err := ctx.QuerySync(fmt.Sprintf(queryFmtOldestSample, env.GetPromClusterFilter(), "90d", "1h"))
 	if err != nil {
 		return time.Time{}, time.Time{}, fmt.Errorf("querying oldest sample: %w", err)
 	}
@@ -296,7 +296,7 @@ func (cm *CostModel) DateRange() (time.Time, time.Time, error) {
 	}
 	oldest := time.Unix(int64(resOldest[0].Values[0].Value), 0)
 
-	resNewest, _, err := ctx.QuerySync(fmt.Sprintf(queryFmtNewestSample, "90d", "1h"))
+	resNewest, _, err := ctx.QuerySync(fmt.Sprintf(queryFmtNewestSample, env.GetPromClusterFilter(), "90d", "1h"))
 	if err != nil {
 		return time.Time{}, time.Time{}, fmt.Errorf("querying newest sample: %w", err)
 	}
@@ -373,28 +373,28 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 
 	ctx := prom.NewNamedContext(cm.PrometheusClient, prom.AllocationContextName)
 
-	queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, env.GetPromClusterLabel())
+	queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChRAMBytesAllocated := ctx.QueryAtTime(queryRAMBytesAllocated, end)
 
-	queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, env.GetPromClusterLabel())
+	queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChRAMRequests := ctx.QueryAtTime(queryRAMRequests, end)
 
-	queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, durStr, env.GetPromClusterLabel())
+	queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChRAMUsageAvg := ctx.QueryAtTime(queryRAMUsageAvg, end)
 
-	queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, durStr, env.GetPromClusterLabel())
+	queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChRAMUsageMax := ctx.QueryAtTime(queryRAMUsageMax, end)
 
-	queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, env.GetPromClusterLabel())
+	queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChCPUCoresAllocated := ctx.QueryAtTime(queryCPUCoresAllocated, end)
 
-	queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, env.GetPromClusterLabel())
+	queryCPURequests := fmt.Sprintf(queryFmtCPURequests, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChCPURequests := ctx.QueryAtTime(queryCPURequests, end)
 
-	queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, env.GetPromClusterLabel())
+	queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChCPUUsageAvg := ctx.QueryAtTime(queryCPUUsageAvg, end)
 
-	queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMaxRecordingRule, durStr, env.GetPromClusterLabel())
+	queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMaxRecordingRule, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChCPUUsageMax := ctx.QueryAtTime(queryCPUUsageMax, end)
 	resCPUUsageMax, _ := resChCPUUsageMax.Await()
 	// If the recording rule has no data, try to fall back to the subquery.
@@ -404,7 +404,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 		// in case the Prom scrape duration has been reduced to be equal to the
 		// resolution.
 		doubleResStr := timeutil.DurationString(2 * resolution)
-		queryCPUUsageMax = fmt.Sprintf(queryFmtCPUUsageMaxSubquery, doubleResStr, durStr, resStr, env.GetPromClusterLabel())
+		queryCPUUsageMax = fmt.Sprintf(queryFmtCPUUsageMaxSubquery, env.GetPromClusterFilter(), doubleResStr, durStr, resStr, env.GetPromClusterLabel())
 		resChCPUUsageMax = ctx.QueryAtTime(queryCPUUsageMax, end)
 		resCPUUsageMax, _ = resChCPUUsageMax.Await()
 
@@ -415,112 +415,112 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 		}
 	}
 
-	queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, env.GetPromClusterLabel())
+	queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChGPUsRequested := ctx.QueryAtTime(queryGPUsRequested, end)
 
-	queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, durStr, env.GetPromClusterLabel())
+	queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChGPUsAllocated := ctx.QueryAtTime(queryGPUsAllocated, end)
 
-	queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, env.GetPromClusterLabel())
+	queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNodeCostPerCPUHr := ctx.QueryAtTime(queryNodeCostPerCPUHr, end)
 
-	queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, env.GetPromClusterLabel())
+	queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNodeCostPerRAMGiBHr := ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end)
 
-	queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, env.GetPromClusterLabel())
+	queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNodeCostPerGPUHr := ctx.QueryAtTime(queryNodeCostPerGPUHr, end)
 
-	queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr)
+	queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, env.GetPromClusterFilter(), durStr)
 	resChNodeIsSpot := ctx.QueryAtTime(queryNodeIsSpot, end)
 
-	queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterLabel(), durStr, resStr)
+	queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, resStr)
 	resChPVCInfo := ctx.QueryAtTime(queryPVCInfo, end)
 
-	queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, env.GetPromClusterLabel())
+	queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChPodPVCAllocation := ctx.QueryAtTime(queryPodPVCAllocation, end)
 
-	queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, env.GetPromClusterLabel())
+	queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChPVCBytesRequested := ctx.QueryAtTime(queryPVCBytesRequested, end)
 
-	queryPVActiveMins := fmt.Sprintf(queryFmtPVActiveMins, env.GetPromClusterLabel(), durStr, resStr)
+	queryPVActiveMins := fmt.Sprintf(queryFmtPVActiveMins, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, resStr)
 	resChPVActiveMins := ctx.QueryAtTime(queryPVActiveMins, end)
 
-	queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, env.GetPromClusterLabel())
+	queryPVBytes := fmt.Sprintf(queryFmtPVBytes, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChPVBytes := ctx.QueryAtTime(queryPVBytes, end)
 
-	queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, env.GetPromClusterLabel())
+	queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChPVCostPerGiBHour := ctx.QueryAtTime(queryPVCostPerGiBHour, end)
 
-	queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, durStr, env.GetPromClusterLabel())
+	queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetTransferBytes := ctx.QueryAtTime(queryNetTransferBytes, end)
 
-	queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, durStr, env.GetPromClusterLabel())
+	queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetReceiveBytes := ctx.QueryAtTime(queryNetReceiveBytes, end)
 
-	queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, env.GetPromClusterLabel())
+	queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetZoneGiB := ctx.QueryAtTime(queryNetZoneGiB, end)
 
-	queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, env.GetPromClusterLabel())
+	queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetZoneCostPerGiB := ctx.QueryAtTime(queryNetZoneCostPerGiB, end)
 
-	queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, env.GetPromClusterLabel())
+	queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetRegionGiB := ctx.QueryAtTime(queryNetRegionGiB, end)
 
-	queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, env.GetPromClusterLabel())
+	queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetRegionCostPerGiB := ctx.QueryAtTime(queryNetRegionCostPerGiB, end)
 
-	queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, env.GetPromClusterLabel())
+	queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetInternetGiB := ctx.QueryAtTime(queryNetInternetGiB, end)
 
-	queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, env.GetPromClusterLabel())
+	queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChNetInternetCostPerGiB := ctx.QueryAtTime(queryNetInternetCostPerGiB, end)
 
 	var resChNodeLabels prom.QueryResultsChan
 	if env.GetAllocationNodeLabelsEnabled() {
-		queryNodeLabels := fmt.Sprintf(queryFmtNodeLabels, durStr)
+		queryNodeLabels := fmt.Sprintf(queryFmtNodeLabels, env.GetPromClusterFilter(), durStr)
 		resChNodeLabels = ctx.QueryAtTime(queryNodeLabels, end)
 	}
 
-	queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr)
+	queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, env.GetPromClusterFilter(), durStr)
 	resChNamespaceLabels := ctx.QueryAtTime(queryNamespaceLabels, end)
 
-	queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr)
+	queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, env.GetPromClusterFilter(), durStr)
 	resChNamespaceAnnotations := ctx.QueryAtTime(queryNamespaceAnnotations, end)
 
-	queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr)
+	queryPodLabels := fmt.Sprintf(queryFmtPodLabels, env.GetPromClusterFilter(), durStr)
 	resChPodLabels := ctx.QueryAtTime(queryPodLabels, end)
 
-	queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr)
+	queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, env.GetPromClusterFilter(), durStr)
 	resChPodAnnotations := ctx.QueryAtTime(queryPodAnnotations, end)
 
-	queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr)
+	queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, env.GetPromClusterFilter(), durStr)
 	resChServiceLabels := ctx.QueryAtTime(queryServiceLabels, end)
 
-	queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr)
+	queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, env.GetPromClusterFilter(), durStr)
 	resChDeploymentLabels := ctx.QueryAtTime(queryDeploymentLabels, end)
 
-	queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr)
+	queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, env.GetPromClusterFilter(), durStr)
 	resChStatefulSetLabels := ctx.QueryAtTime(queryStatefulSetLabels, end)
 
-	queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, env.GetPromClusterLabel())
+	queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChDaemonSetLabels := ctx.QueryAtTime(queryDaemonSetLabels, end)
 
-	queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, durStr, env.GetPromClusterLabel())
+	queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChPodsWithReplicaSetOwner := ctx.QueryAtTime(queryPodsWithReplicaSetOwner, end)
 
-	queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, durStr, env.GetPromClusterLabel())
+	queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChReplicaSetsWithoutOwners := ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end)
 
-	queryReplicaSetsWithRolloutOwner := fmt.Sprintf(queryFmtReplicaSetsWithRolloutOwner, durStr, env.GetPromClusterLabel())
+	queryReplicaSetsWithRolloutOwner := fmt.Sprintf(queryFmtReplicaSetsWithRolloutOwner, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChReplicaSetsWithRolloutOwner := ctx.QueryAtTime(queryReplicaSetsWithRolloutOwner, end)
 
-	queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, env.GetPromClusterLabel())
+	queryJobLabels := fmt.Sprintf(queryFmtJobLabels, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChJobLabels := ctx.QueryAtTime(queryJobLabels, end)
 
-	queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, env.GetPromClusterLabel())
+	queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
 	resChLBCostPerHr := ctx.QueryAtTime(queryLBCostPerHr, end)
 
-	queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr)
+	queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, resStr)
 	resChLBActiveMins := ctx.QueryAtTime(queryLBActiveMins, end)
 
 	resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()

+ 2 - 2
pkg/costmodel/allocation_helpers.go

@@ -69,9 +69,9 @@ func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSiz
 			var queryPods string
 			// If ingesting UIDs, avg on them
 			if ingestPodUID {
-				queryPods = fmt.Sprintf(queryFmtPodsUID, env.GetPromClusterLabel(), durStr, resStr)
+				queryPods = fmt.Sprintf(queryFmtPodsUID, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, resStr)
 			} else {
-				queryPods = fmt.Sprintf(queryFmtPods, env.GetPromClusterLabel(), durStr, resStr)
+				queryPods = fmt.Sprintf(queryFmtPods, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, resStr)
 			}
 
 			queryProfile := time.Now()

+ 9 - 5
pkg/costmodel/assets.go

@@ -134,11 +134,15 @@ func (cm *CostModel) ComputeAssets(start, end time.Time) (*kubecost.AssetSet, er
 		node.GPUCount = n.GPUCount
 		node.RAMCost = n.RAMCost
 
-		node.Overhead = &kubecost.NodeOverhead{
-			RamOverheadFraction: n.Overhead.RamOverheadFraction,
-			CpuOverheadFraction: n.Overhead.CpuOverheadFraction,
-			OverheadCostFraction: ((n.Overhead.CpuOverheadFraction * n.CPUCost) +
-				(n.Overhead.RamOverheadFraction * n.RAMCost)) / node.TotalCost(),
+		if n.Overhead != nil {
+			node.Overhead = &kubecost.NodeOverhead{
+				RamOverheadFraction: n.Overhead.RamOverheadFraction,
+				CpuOverheadFraction: n.Overhead.CpuOverheadFraction,
+				OverheadCostFraction: ((n.Overhead.CpuOverheadFraction * n.CPUCost) +
+					(n.Overhead.RamOverheadFraction * n.RAMCost)) / node.TotalCost(),
+			}
+		} else {
+			node.Overhead = &kubecost.NodeOverhead{}
 		}
 		node.Discount = n.Discount
 		if n.Preemptible {

+ 65 - 65
pkg/costmodel/cluster.go

@@ -19,26 +19,26 @@ import (
 
 const (
 	queryClusterCores = `sum(
-		avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s] %s)) by (node, %s) * avg(avg_over_time(node_cpu_hourly_cost[%s] %s)) by (node, %s) * 730 +
-		avg(avg_over_time(node_gpu_hourly_cost[%s] %s)) by (node, %s) * 730
+		avg(avg_over_time(kube_node_status_capacity_cpu_cores{%s}[%s] %s)) by (node, %s) * avg(avg_over_time(node_cpu_hourly_cost{%s}[%s] %s)) by (node, %s) * 730 +
+		avg(avg_over_time(node_gpu_hourly_cost{%s}[%s] %s)) by (node, %s) * 730
 	  ) by (%s)`
 
 	queryClusterRAM = `sum(
-		avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s] %s)) by (node, %s) / 1024 / 1024 / 1024 * avg(avg_over_time(node_ram_hourly_cost[%s] %s)) by (node, %s) * 730
+		avg(avg_over_time(kube_node_status_capacity_memory_bytes{%s}[%s] %s)) by (node, %s) / 1024 / 1024 / 1024 * avg(avg_over_time(node_ram_hourly_cost{%s}[%s] %s)) by (node, %s) * 730
 	  ) by (%s)`
 
 	queryStorage = `sum(
-		avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, %s) * 730
-		* avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume, %s) / 1024 / 1024 / 1024
+		avg(avg_over_time(pv_hourly_cost{%s}[%s] %s)) by (persistentvolume, %s) * 730
+		* avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[%s] %s)) by (persistentvolume, %s) / 1024 / 1024 / 1024
 	  ) by (%s) %s`
 
-	queryTotal = `sum(avg(node_total_hourly_cost) by (node, %s)) * 730 +
+	queryTotal = `sum(avg(node_total_hourly_cost{%s}) by (node, %s)) * 730 +
 	  sum(
-		avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, %s) * 730
-		* avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, %s) / 1024 / 1024 / 1024
+		avg(avg_over_time(pv_hourly_cost{%s}[1h])) by (persistentvolume, %s) * 730
+		* avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[1h])) by (persistentvolume, %s) / 1024 / 1024 / 1024
 	  ) by (%s) %s`
 
-	queryNodes = `sum(avg(node_total_hourly_cost) by (node, %s)) * 730 %s`
+	queryNodes = `sum(avg(node_total_hourly_cost{%s}) by (node, %s)) * 730 %s`
 )
 
 const maxLocalDiskSize = 200 // AWS limits root disks to 100 Gi, and occasional metric errors in filesystem size should not contribute to large costs.
@@ -171,19 +171,19 @@ func ClusterDisks(client prometheus.Client, provider models.Provider, start, end
 	costPerGBHr := 0.04 / 730.0
 
 	ctx := prom.NewNamedContext(client, prom.ClusterContextName)
-	queryPVCost := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost[%s])) by (%s, persistentvolume,provider_id)`, durStr, env.GetPromClusterLabel())
-	queryPVSize := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s])) by (%s, persistentvolume)`, durStr, env.GetPromClusterLabel())
-	queryActiveMins := fmt.Sprintf(`avg(kube_persistentvolume_capacity_bytes) by (%s, persistentvolume)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
-	queryPVStorageClass := fmt.Sprintf(`avg(avg_over_time(kubecost_pv_info[%s])) by (%s, persistentvolume, storageclass)`, durStr, env.GetPromClusterLabel())
-	queryPVUsedAvg := fmt.Sprintf(`avg(avg_over_time(kubelet_volume_stats_used_bytes[%s])) by (%s, persistentvolumeclaim, namespace)`, durStr, env.GetPromClusterLabel())
-	queryPVUsedMax := fmt.Sprintf(`max(max_over_time(kubelet_volume_stats_used_bytes[%s])) by (%s, persistentvolumeclaim, namespace)`, durStr, env.GetPromClusterLabel())
-	queryPVCInfo := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolumeclaim_info[%s])) by (%s, volumename, persistentvolumeclaim, namespace)`, durStr, env.GetPromClusterLabel())
-	queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
-	queryLocalStorageUsedCost := fmt.Sprintf(`sum_over_time(sum(container_fs_usage_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
-	queryLocalStorageUsedAvg := fmt.Sprintf(`avg(avg_over_time(container_fs_usage_bytes{device!="tmpfs", id="/"}[%s])) by (instance, %s)`, durStr, env.GetPromClusterLabel())
-	queryLocalStorageUsedMax := fmt.Sprintf(`max(max_over_time(container_fs_usage_bytes{device!="tmpfs", id="/"}[%s])) by (instance, %s)`, durStr, env.GetPromClusterLabel())
-	queryLocalStorageBytes := fmt.Sprintf(`avg_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm])`, env.GetPromClusterLabel(), durStr, minsPerResolution)
-	queryLocalActiveMins := fmt.Sprintf(`count(node_total_hourly_cost) by (%s, node)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryPVCost := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost{%s}[%s])) by (%s, persistentvolume,provider_id)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryPVSize := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[%s])) by (%s, persistentvolume)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryActiveMins := fmt.Sprintf(`avg(kube_persistentvolume_capacity_bytes{%s}) by (%s, persistentvolume)[%s:%dm]`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryPVStorageClass := fmt.Sprintf(`avg(avg_over_time(kubecost_pv_info{%s}[%s])) by (%s, persistentvolume, storageclass)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryPVUsedAvg := fmt.Sprintf(`avg(avg_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryPVUsedMax := fmt.Sprintf(`max(max_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryPVCInfo := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolumeclaim_info{%s}[%s])) by (%s, volumename, persistentvolumeclaim, namespace)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
+	queryLocalStorageUsedCost := fmt.Sprintf(`sum_over_time(sum(container_fs_usage_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
+	queryLocalStorageUsedAvg := fmt.Sprintf(`avg(avg_over_time(container_fs_usage_bytes{device!="tmpfs", id="/", %s}[%s])) by (instance, %s)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryLocalStorageUsedMax := fmt.Sprintf(`max(max_over_time(container_fs_usage_bytes{device!="tmpfs", id="/", %s}[%s])) by (instance, %s)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryLocalStorageBytes := fmt.Sprintf(`avg_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/", %s}) by (instance, %s)[%s:%dm])`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryLocalActiveMins := fmt.Sprintf(`count(node_total_hourly_cost{%s}) by (%s, node)[%s:%dm]`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
 
 	resChPVCost := ctx.QueryAtTime(queryPVCost, t)
 	resChPVSize := ctx.QueryAtTime(queryPVSize, t)
@@ -571,20 +571,20 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
 	requiredCtx := prom.NewNamedContext(client, prom.ClusterContextName)
 	optionalCtx := prom.NewNamedContext(client, prom.ClusterOptionalContextName)
 
-	queryNodeCPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
-	queryNodeCPUCoresCapacity := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
-	queryNodeCPUCoresAllocatable := fmt.Sprintf(`avg(avg_over_time(kube_node_status_allocatable_cpu_cores[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
-	queryNodeRAMHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s])) by (%s, node, instance_type, provider_id) / 1024 / 1024 / 1024`, durStr, env.GetPromClusterLabel())
-	queryNodeRAMBytesCapacity := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
-	queryNodeRAMBytesAllocatable := fmt.Sprintf(`avg(avg_over_time(kube_node_status_allocatable_memory_bytes[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
-	queryNodeGPUCount := fmt.Sprintf(`avg(avg_over_time(node_gpu_count[%s])) by (%s, node, provider_id)`, durStr, env.GetPromClusterLabel())
-	queryNodeGPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
-	queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm])) by (kubernetes_node, %s, mode)`, durStr, minsPerResolution, env.GetPromClusterLabel())
-	queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[%s:%dm])) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm])) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, durStr, minsPerResolution, env.GetPromClusterLabel(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryNodeRAMUserPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system"}[%s:%dm])) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm])) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, durStr, minsPerResolution, env.GetPromClusterLabel(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost) by (node, %s, provider_id)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
-	queryIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s:%dm])`, durStr, minsPerResolution)
-	queryLabels := fmt.Sprintf(`count_over_time(kube_node_labels[%s:%dm])`, durStr, minsPerResolution)
+	queryNodeCPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost{%s}[%s])) by (%s, node, instance_type, provider_id)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+  queryNodeCPUCoresCapacity := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_cpu_cores{%s}[%s])) by (%s, node)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryNodeCPUCoresAllocatable := fmt.Sprintf(`avg(avg_over_time(kube_node_status_allocatable_cpu_cores{%s}[%s])) by (%s, node)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryNodeRAMHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost{%s}[%s])) by (%s, node, instance_type, provider_id) / 1024 / 1024 / 1024`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+  queryNodeRAMBytesCapacity := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_memory_bytes{%s}[%s])) by (%s, node)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryNodeRAMBytesAllocatable := fmt.Sprintf(`avg(avg_over_time(kube_node_status_allocatable_memory_bytes{%s}[%s])) by (%s, node)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryNodeGPUCount := fmt.Sprintf(`avg(avg_over_time(node_gpu_count{%s}[%s])) by (%s, node, provider_id)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryNodeGPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost{%s}[%s])) by (%s, node, instance_type, provider_id)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total{%s}[%s:%dm])) by (kubernetes_node, %s, mode)`, env.GetPromClusterFilter(), durStr, minsPerResolution, env.GetPromClusterLabel())
+	queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system", %s}[%s:%dm])) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, env.GetPromClusterFilter(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterFilter(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryNodeRAMUserPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system", %s}[%s:%dm])) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, env.GetPromClusterFilter(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterFilter(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost{%s}) by (node, %s, provider_id)[%s:%dm]`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot{%s}[%s:%dm])`, env.GetPromClusterFilter(), durStr, minsPerResolution)
+	queryLabels := fmt.Sprintf(`count_over_time(kube_node_labels{%s}[%s:%dm])`, env.GetPromClusterFilter(), durStr, minsPerResolution)
 
 	// Return errors if these fail
 	resChNodeCPUHourlyCost := requiredCtx.QueryAtTime(queryNodeCPUHourlyCost, t)
@@ -742,8 +742,8 @@ func ClusterLoadBalancers(client prometheus.Client, start, end time.Time) (map[L
 
 	ctx := prom.NewNamedContext(client, prom.ClusterContextName)
 
-	queryLBCost := fmt.Sprintf(`avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s, ingress_ip)`, durStr, env.GetPromClusterLabel())
-	queryActiveMins := fmt.Sprintf(`avg(kubecost_load_balancer_cost) by (namespace, service_name, %s, ingress_ip)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryLBCost := fmt.Sprintf(`avg(avg_over_time(kubecost_load_balancer_cost{%s}[%s])) by (namespace, service_name, %s, ingress_ip)`, env.GetPromClusterFilter(), durStr, env.GetPromClusterLabel())
+	queryActiveMins := fmt.Sprintf(`avg(kubecost_load_balancer_cost{%s}) by (namespace, service_name, %s, ingress_ip)[%s:%dm]`, env.GetPromClusterFilter(), env.GetPromClusterLabel(), durStr, minsPerResolution)
 
 	resChLBCost := ctx.QueryAtTime(queryLBCost, t)
 	resChActiveMins := ctx.QueryAtTime(queryActiveMins, t)
@@ -878,49 +878,49 @@ func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider models
 	hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
 
 	const fmtQueryDataCount = `
-		count_over_time(sum(kube_node_status_capacity_cpu_cores) by (%s)[%s:%dm]%s) * %d
+		count_over_time(sum(kube_node_status_capacity_cpu_cores{%s}) by (%s)[%s:%dm]%s) * %d
 	`
 
 	const fmtQueryTotalGPU = `
 		sum(
-			sum_over_time(node_gpu_hourly_cost[%s:%dm]%s) * %f
+			sum_over_time(node_gpu_hourly_cost{%s}[%s:%dm]%s) * %f
 		) by (%s)
 	`
 
 	const fmtQueryTotalCPU = `
 		sum(
-			sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, %s)[%s:%dm]%s) *
-			avg(avg_over_time(node_cpu_hourly_cost[%s:%dm]%s)) by (node, %s) * %f
+			sum_over_time(avg(kube_node_status_capacity_cpu_cores{%s}) by (node, %s)[%s:%dm]%s) *
+			avg(avg_over_time(node_cpu_hourly_cost{%s}[%s:%dm]%s)) by (node, %s) * %f
 		) by (%s)
 	`
 
 	const fmtQueryTotalRAM = `
 		sum(
-			sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 *
-			avg(avg_over_time(node_ram_hourly_cost[%s:%dm]%s)) by (node, %s) * %f
+			sum_over_time(avg(kube_node_status_capacity_memory_bytes{%s}) by (node, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 *
+			avg(avg_over_time(node_ram_hourly_cost{%s}[%s:%dm]%s)) by (node, %s) * %f
 		) by (%s)
 	`
 
 	const fmtQueryTotalStorage = `
 		sum(
-			sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 *
-			avg(avg_over_time(pv_hourly_cost[%s:%dm]%s)) by (persistentvolume, %s) * %f
+			sum_over_time(avg(kube_persistentvolume_capacity_bytes{%s}) by (persistentvolume, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 *
+			avg(avg_over_time(pv_hourly_cost{%s}[%s:%dm]%s)) by (persistentvolume, %s) * %f
 		) by (%s)
 	`
 
 	const fmtQueryCPUModePct = `
-		sum(rate(node_cpu_seconds_total[%s]%s)) by (%s, mode) / ignoring(mode)
-		group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (%s)
+		sum(rate(node_cpu_seconds_total{%s}[%s]%s)) by (%s, mode) / ignoring(mode)
+		group_left sum(rate(node_cpu_seconds_total{%s}[%s]%s)) by (%s)
 	`
 
 	const fmtQueryRAMSystemPct = `
-		sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (%s)
-		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (%s)
+		sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system", %s}[%s:%dm]%s)) by (%s)
+		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm]%s)) by (%s)
 	`
 
 	const fmtQueryRAMUserPct = `
-		sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:%dm]%s)) by (%s)
-		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (%s)
+		sum(sum_over_time(kubecost_cluster_memory_working_set_bytes{%s}[%s:%dm]%s)) by (%s)
+		/ sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm]%s)) by (%s)
 	`
 
 	// TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
@@ -936,11 +936,11 @@ func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider models
 
 	fmtOffset := timeutil.DurationToPromOffsetString(offset)
 
-	queryDataCount := fmt.Sprintf(fmtQueryDataCount, env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, minsPerResolution)
-	queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, windowStr, minsPerResolution, fmtOffset, hourlyToCumulative, env.GetPromClusterLabel())
-	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), hourlyToCumulative, env.GetPromClusterLabel())
-	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), hourlyToCumulative, env.GetPromClusterLabel())
-	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), hourlyToCumulative, env.GetPromClusterLabel())
+	queryDataCount := fmt.Sprintf(fmtQueryDataCount, env.GetPromClusterFilter(), env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, minsPerResolution)
+	queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, hourlyToCumulative, env.GetPromClusterLabel())
+	queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, env.GetPromClusterFilter(), env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), hourlyToCumulative, env.GetPromClusterLabel())
+	queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, env.GetPromClusterFilter(), env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), hourlyToCumulative, env.GetPromClusterLabel())
+	queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, env.GetPromClusterFilter(), env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), hourlyToCumulative, env.GetPromClusterLabel())
 
 	ctx := prom.NewNamedContext(client, prom.ClusterContextName)
 
@@ -962,9 +962,9 @@ func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider models
 	}
 
 	if withBreakdown {
-		queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, windowStr, fmtOffset, env.GetPromClusterLabel(), windowStr, fmtOffset, env.GetPromClusterLabel())
-		queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel())
-		queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel())
+		queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, env.GetPromClusterFilter(), windowStr, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), windowStr, fmtOffset, env.GetPromClusterLabel())
+		queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel())
+		queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), windowStr, minsPerResolution, fmtOffset, env.GetPromClusterLabel())
 
 		bdResChs := ctx.QueryAll(
 			queryCPUModePct,
@@ -1236,10 +1236,10 @@ func ClusterCostsOverTime(cli prometheus.Client, provider models.Provider, start
 
 	fmtOffset := timeutil.DurationToPromOffsetString(offset)
 
-	qCores := fmt.Sprintf(queryClusterCores, fmtWindow, fmtOffset, env.GetPromClusterLabel(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	qRAM := fmt.Sprintf(queryClusterRAM, fmtWindow, fmtOffset, env.GetPromClusterLabel(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	qStorage := fmt.Sprintf(queryStorage, fmtWindow, fmtOffset, env.GetPromClusterLabel(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterLabel(), localStorageQuery)
-	qTotal := fmt.Sprintf(queryTotal, env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), localStorageQuery)
+	qCores := fmt.Sprintf(queryClusterCores, env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	qRAM := fmt.Sprintf(queryClusterRAM, env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	qStorage := fmt.Sprintf(queryStorage, env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterFilter(), fmtWindow, fmtOffset, env.GetPromClusterLabel(), env.GetPromClusterLabel(), localStorageQuery)
+	qTotal := fmt.Sprintf(queryTotal, env.GetPromClusterFilter(), env.GetPromClusterLabel(), env.GetPromClusterFilter(), env.GetPromClusterLabel(), env.GetPromClusterFilter(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), localStorageQuery)
 
 	ctx := prom.NewNamedContext(cli, prom.ClusterContextName)
 	resChClusterCores := ctx.QueryRange(qCores, start, end, window)
@@ -1289,7 +1289,7 @@ func ClusterCostsOverTime(cli prometheus.Client, provider models.Provider, start
 		// If clusterTotal query failed, it's likely because there are no PVs, which
 		// causes the qTotal query to return no data. Instead, query only node costs.
 		// If that fails, return an error because something is actually wrong.
-		qNodes := fmt.Sprintf(queryNodes, env.GetPromClusterLabel(), localStorageQuery)
+		qNodes := fmt.Sprintf(queryNodes, env.GetPromClusterFilter(), env.GetPromClusterLabel(), localStorageQuery)
 
 		resultNodes, warnings, err := ctx.QueryRangeSync(qNodes, start, end, window)
 		for _, warning := range warnings {

+ 5 - 0
pkg/costmodel/cluster_helpers.go

@@ -825,6 +825,11 @@ func buildNodeMap(
 
 		if overhead, ok := overheadMap[clusterAndNameID]; ok {
 			nodePtr.Overhead = overhead
+		} else {
+			// we were unable to compute overhead for this node
+			// assume default case of no overhead
+			nodePtr.Overhead = &NodeOverhead{}
+			log.Warnf("unable to compute overhead for node %s - defaulting to no overhead", clusterAndNameID.Name)
 		}
 
 	}

+ 9 - 0
pkg/costmodel/cluster_helpers_test.go

@@ -184,6 +184,7 @@ func TestBuildNodeMap(t *testing.T) {
 					CPUCost:      0.048,
 					CPUBreakdown: &ClusterCostsBreakdown{},
 					RAMBreakdown: &ClusterCostsBreakdown{},
+					Overhead:     &NodeOverhead{},
 				},
 			},
 		},
@@ -212,6 +213,7 @@ func TestBuildNodeMap(t *testing.T) {
 					CPUCost:      0.048,
 					CPUBreakdown: &ClusterCostsBreakdown{},
 					RAMBreakdown: &ClusterCostsBreakdown{},
+					Overhead:     &NodeOverhead{},
 				},
 			},
 		},
@@ -248,6 +250,7 @@ func TestBuildNodeMap(t *testing.T) {
 					CPUCost:      0.048,
 					CPUBreakdown: &ClusterCostsBreakdown{},
 					RAMBreakdown: &ClusterCostsBreakdown{},
+					Overhead:     &NodeOverhead{},
 				},
 				{
 					Cluster:    "cluster1",
@@ -261,6 +264,7 @@ func TestBuildNodeMap(t *testing.T) {
 					CPUCost:      0.087,
 					CPUBreakdown: &ClusterCostsBreakdown{},
 					RAMBreakdown: &ClusterCostsBreakdown{},
+					Overhead:     &NodeOverhead{},
 				},
 			},
 		},
@@ -490,6 +494,7 @@ func TestBuildNodeMap(t *testing.T) {
 					End:         time.Date(2020, 6, 16, 9, 20, 39, 0, time.UTC),
 					Minutes:     5*60 + 35 + (11.0 / 60.0),
 					Preemptible: true,
+					Overhead:    &NodeOverhead{},
 					Labels: map[string]string{
 						"labelname1_A": "labelvalue1_A",
 						"labelname1_B": "labelvalue1_B",
@@ -526,6 +531,7 @@ func TestBuildNodeMap(t *testing.T) {
 						"labelname1_A": "labelvalue1_A",
 						"labelname1_B": "labelvalue1_B",
 					},
+					Overhead: &NodeOverhead{},
 				},
 				{
 					Cluster:    "cluster1",
@@ -558,6 +564,7 @@ func TestBuildNodeMap(t *testing.T) {
 						"labelname2_A": "labelvalue2_A",
 						"labelname2_B": "labelvalue2_B",
 					},
+					Overhead: &NodeOverhead{},
 				},
 			},
 		},
@@ -596,6 +603,7 @@ func TestBuildNodeMap(t *testing.T) {
 					CPUCores:     partialCPUMap["e2-micro"],
 					CPUBreakdown: &ClusterCostsBreakdown{},
 					RAMBreakdown: &ClusterCostsBreakdown{},
+					Overhead:     &NodeOverhead{},
 				},
 			},
 		},
@@ -634,6 +642,7 @@ func TestBuildNodeMap(t *testing.T) {
 					CPUCores:     partialCPUMap["e2-small"],
 					CPUBreakdown: &ClusterCostsBreakdown{},
 					RAMBreakdown: &ClusterCostsBreakdown{},
+					Overhead:     &NodeOverhead{},
 				},
 			},
 		},

+ 1 - 1
pkg/costmodel/clusters/clustermap.go

@@ -143,7 +143,7 @@ func NewClusterMap(client prometheus.Client, cip ClusterInfoProvider, refresh ti
 
 // clusterInfoQuery returns the query string to load cluster info
 func clusterInfoQuery(offset string) string {
-	return fmt.Sprintf("kubecost_cluster_info%s", offset)
+	return fmt.Sprintf("kubecost_cluster_info{%s}%s", env.GetPromClusterFilter(), offset)
 }
 
 // loadClusters loads all the cluster info to map

+ 66 - 62
pkg/costmodel/costmodel.go

@@ -144,9 +144,9 @@ const (
 		label_replace(
 			label_replace(
 				avg(
-					count_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!=""}[%s] %s)
+					count_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!="", %s}[%s] %s)
 					*
-					avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!=""}[%s] %s)
+					avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="",container!="POD", node!="", %s}[%s] %s)
 				) by (namespace,container,pod,node,%s) , "container_name","$1","container","(.+)"
 			), "pod_name","$1","pod","(.+)"
 		)
@@ -156,7 +156,7 @@ const (
 			label_replace(
 				label_replace(
 					label_replace(
-						count_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!=""}[%s] %s), "node", "$1", "instance", "(.+)"
+						count_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!="", %s}[%s] %s), "node", "$1", "instance", "(.+)"
 					), "container_name", "$1", "container", "(.+)"
 				), "pod_name", "$1", "pod", "(.+)"
 			)
@@ -164,7 +164,7 @@ const (
 			label_replace(
 				label_replace(
 					label_replace(
-						avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!=""}[%s] %s), "node", "$1", "instance", "(.+)"
+						avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", instance!="", %s}[%s] %s), "node", "$1", "instance", "(.+)"
 					), "container_name", "$1", "container", "(.+)"
 				), "pod_name", "$1", "pod", "(.+)"
 			)
@@ -174,9 +174,9 @@ const (
 		label_replace(
 			label_replace(
 				avg(
-					count_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="",container!="POD", node!=""}[%s] %s)
+					count_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="",container!="POD", node!="", %s}[%s] %s)
 					*
-					avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="",container!="POD", node!=""}[%s] %s)
+					avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="",container!="POD", node!="", %s}[%s] %s)
 				) by (namespace,container,pod,node,%s) , "container_name","$1","container","(.+)"
 			), "pod_name","$1","pod","(.+)"
 		)
@@ -186,7 +186,7 @@ const (
 			label_replace(
 				label_replace(
 					rate(
-						container_cpu_usage_seconds_total{container!="", container!="POD", instance!=""}[%s] %s
+						container_cpu_usage_seconds_total{container!="", container!="POD", instance!="", %s}[%s] %s
 					), "node", "$1", "instance", "(.+)"
 				), "container_name", "$1", "container", "(.+)"
 			), "pod_name", "$1", "pod", "(.+)"
@@ -196,19 +196,19 @@ const (
 		label_replace(
 			label_replace(
 				avg(
-					count_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s] %s)
+					count_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s] %s)
 					*
-					avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s] %s)
+					avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s] %s)
 					* %f
 				) by (namespace,container,pod,node,%s) , "container_name","$1","container","(.+)"
 			), "pod_name","$1","pod","(.+)"
 		)
 	) by (namespace,container_name,pod_name,node,%s)
-	* on (pod_name, namespace, %s) group_left(container) label_replace(avg(avg_over_time(kube_pod_status_phase{phase="Running"}[%s] %s)) by (pod,namespace,%s), "pod_name","$1","pod","(.+)")`
-	queryPVRequestsStr = `avg(avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, namespace, volumename, %s, kubernetes_node)
+	* on (pod_name, namespace, %s) group_left(container) label_replace(avg(avg_over_time(kube_pod_status_phase{phase="Running", %s}[%s] %s)) by (pod,namespace,%s), "pod_name","$1","pod","(.+)")`
+	queryPVRequestsStr = `avg(avg(kube_persistentvolumeclaim_info{volumename != "", %s}) by (persistentvolumeclaim, storageclass, namespace, volumename, %s, kubernetes_node)
 	*
 	on (persistentvolumeclaim, namespace, %s, kubernetes_node) group_right(storageclass, volumename)
-	sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{}) by (persistentvolumeclaim, namespace, %s, kubernetes_node, kubernetes_name)) by (persistentvolumeclaim, storageclass, namespace, %s, volumename, kubernetes_node)`
+	sum(kube_persistentvolumeclaim_resource_requests_storage_bytes{%s}) by (persistentvolumeclaim, namespace, %s, kubernetes_node, kubernetes_name)) by (persistentvolumeclaim, storageclass, namespace, %s, volumename, kubernetes_node)`
 	// queryRAMAllocationByteHours yields the total byte-hour RAM allocation over the given
 	// window, aggregated by container.
 	//  [line 3]  sum_over_time(each byte) = [byte*scrape] by metric
@@ -218,7 +218,7 @@ const (
 	queryRAMAllocationByteHours = `
 		label_replace(label_replace(
 			sum(
-				sum_over_time(container_memory_allocation_bytes{container!="",container!="POD", node!=""}[%s])
+				sum_over_time(container_memory_allocation_bytes{container!="",container!="POD", node!="", %s}[%s])
 			) by (namespace,container,pod,node,%s) * %f / 60 / 60
 		, "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)")`
 	// queryCPUAllocationVCPUHours yields the total VCPU-hour CPU allocation over the given
@@ -230,35 +230,35 @@ const (
 	queryCPUAllocationVCPUHours = `
 		label_replace(label_replace(
 			sum(
-				sum_over_time(container_cpu_allocation{container!="",container!="POD", node!=""}[%s])
+				sum_over_time(container_cpu_allocation{container!="",container!="POD", node!="", %s}[%s])
 			) by (namespace,container,pod,node,%s) * %f / 60 / 60
 		, "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)")`
 	// queryPVCAllocationFmt yields the total byte-hour PVC allocation over the given window.
 	// sum_over_time(each byte) = [byte*scrape] by metric *(scalar(avg(prometheus_target_interval_length_seconds)) = [seconds/scrape] / 60 / 60 =  [hours/scrape] by pod
-	queryPVCAllocationFmt     = `sum(sum_over_time(pod_pvc_allocation[%s])) by (%s, namespace, pod, persistentvolume, persistentvolumeclaim) * %f/60/60`
-	queryPVHourlyCostFmt      = `avg_over_time(pv_hourly_cost[%s])`
-	queryNSLabels             = `avg_over_time(kube_namespace_labels[%s])`
-	queryPodLabels            = `avg_over_time(kube_pod_labels[%s])`
-	queryNSAnnotations        = `avg_over_time(kube_namespace_annotations[%s])`
-	queryPodAnnotations       = `avg_over_time(kube_pod_annotations[%s])`
-	queryDeploymentLabels     = `avg_over_time(deployment_match_labels[%s])`
-	queryStatefulsetLabels    = `avg_over_time(statefulSet_match_labels[%s])`
-	queryPodDaemonsets        = `sum(kube_pod_owner{owner_kind="DaemonSet"}) by (namespace,pod,owner_name,%s)`
-	queryPodJobs              = `sum(kube_pod_owner{owner_kind="Job"}) by (namespace,pod,owner_name,%s)`
-	queryServiceLabels        = `avg_over_time(service_selector_labels[%s])`
-	queryZoneNetworkUsage     = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s] %s)) by (namespace,pod_name,%s) / 1024 / 1024 / 1024`
-	queryRegionNetworkUsage   = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s] %s)) by (namespace,pod_name,%s) / 1024 / 1024 / 1024`
-	queryInternetNetworkUsage = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s] %s)) by (namespace,pod_name,%s) / 1024 / 1024 / 1024`
-	normalizationStr          = `max(count_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte"}[%s] %s))`
+	queryPVCAllocationFmt     = `sum(sum_over_time(pod_pvc_allocation{%s}[%s])) by (%s, namespace, pod, persistentvolume, persistentvolumeclaim) * %f/60/60`
+	queryPVHourlyCostFmt      = `avg_over_time(pv_hourly_cost{%s}[%s])`
+	queryNSLabels             = `avg_over_time(kube_namespace_labels{%s}[%s])`
+	queryPodLabels            = `avg_over_time(kube_pod_labels{%s}[%s])`
+	queryNSAnnotations        = `avg_over_time(kube_namespace_annotations{%s}[%s])`
+	queryPodAnnotations       = `avg_over_time(kube_pod_annotations{%s}[%s])`
+	queryDeploymentLabels     = `avg_over_time(deployment_match_labels{%s}[%s])`
+	queryStatefulsetLabels    = `avg_over_time(statefulSet_match_labels{%s}[%s])`
+	queryPodDaemonsets        = `sum(kube_pod_owner{owner_kind="DaemonSet", %s}) by (namespace,pod,owner_name,%s)`
+	queryPodJobs              = `sum(kube_pod_owner{owner_kind="Job", %s}) by (namespace,pod,owner_name,%s)`
+	queryServiceLabels        = `avg_over_time(service_selector_labels{%s}[%s])`
+	queryZoneNetworkUsage     = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true", %s}[%s] %s)) by (namespace,pod_name,%s) / 1024 / 1024 / 1024`
+	queryRegionNetworkUsage   = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false", %s}[%s] %s)) by (namespace,pod_name,%s) / 1024 / 1024 / 1024`
+	queryInternetNetworkUsage = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s] %s)) by (namespace,pod_name,%s) / 1024 / 1024 / 1024`
+	normalizationStr          = `max(count_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", %s}[%s] %s))`
 )
 
 func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyzerCloud.Provider, window string, offset string, filterNamespace string) (map[string]*CostData, error) {
-	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, window, offset, window, offset, env.GetPromClusterLabel())
-	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, window, offset, env.GetPromClusterLabel())
-	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, window, "", env.GetPromClusterLabel())
-	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, window, "", env.GetPromClusterLabel())
-	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, window, "", env.GetPromClusterLabel())
-	queryNormalization := fmt.Sprintf(normalizationStr, window, offset)
+	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, env.GetPromClusterFilter(), window, offset, env.GetPromClusterFilter(), window, offset, env.GetPromClusterLabel())
+	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, env.GetPromClusterFilter(), window, offset, env.GetPromClusterLabel())
+	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, env.GetPromClusterFilter(), window, "", env.GetPromClusterLabel())
+	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, env.GetPromClusterFilter(), window, "", env.GetPromClusterLabel())
+	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, env.GetPromClusterFilter(), window, "", env.GetPromClusterLabel())
+	queryNormalization := fmt.Sprintf(normalizationStr, env.GetPromClusterFilter(), window, offset)
 
 	// Cluster ID is specific to the source cluster
 	clusterID := env.GetClusterID()
@@ -734,7 +734,7 @@ func findUnmountedPVCostData(clusterMap clusters.ClusterMap, unmountedPVs map[st
 
 func findDeletedPodInfo(cli prometheusClient.Client, missingContainers map[string]*CostData, window string) error {
 	if len(missingContainers) > 0 {
-		queryHistoricalPodLabels := fmt.Sprintf(`kube_pod_labels{}[%s]`, window)
+		queryHistoricalPodLabels := fmt.Sprintf(`kube_pod_labels{%s}[%s]`, env.GetPromClusterFilter(), window)
 
 		podLabelsResult, _, err := prom.NewNamedContext(cli, prom.ComputeCostDataContextName).QuerySync(queryHistoricalPodLabels)
 		if err != nil {
@@ -773,9 +773,9 @@ func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*c
 			offsetStr = fmt.Sprintf("offset %s", offset)
 		}
 
-		queryHistoricalCPUCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s] %s)) by (node, instance, %s)`, window, offsetStr, env.GetPromClusterLabel())
-		queryHistoricalRAMCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s] %s)) by (node, instance, %s)`, window, offsetStr, env.GetPromClusterLabel())
-		queryHistoricalGPUCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s] %s)) by (node, instance, %s)`, window, offsetStr, env.GetPromClusterLabel())
+		queryHistoricalCPUCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost{%s}[%s] %s)) by (node, instance, %s)`, env.GetPromClusterFilter(), window, offsetStr, env.GetPromClusterLabel())
+		queryHistoricalRAMCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost{%s}[%s] %s)) by (node, instance, %s)`, env.GetPromClusterFilter(), window, offsetStr, env.GetPromClusterLabel())
+		queryHistoricalGPUCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost{%s}[%s] %s)) by (node, instance, %s)`, env.GetPromClusterFilter(), window, offsetStr, env.GetPromClusterLabel())
 
 		ctx := prom.NewNamedContext(cli, prom.ComputeCostDataContextName)
 		cpuCostResCh := ctx.Query(queryHistoricalCPUCost)
@@ -1002,6 +1002,10 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			region, _ := util.GetRegion(n.Labels)
 			newCnode.Region = region
 		}
+		if newCnode.ArchType == "" {
+			arch, _ := util.GetArchType(n.Labels)
+			newCnode.ArchType = arch
+		}
 		newCnode.ProviderID = n.Spec.ProviderID
 
 		var cpu float64
@@ -1625,20 +1629,20 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	ctx := prom.NewNamedContext(cli, prom.ComputeCostDataRangeContextName)
 
-	queryRAMAlloc := fmt.Sprintf(queryRAMAllocationByteHours, resStr, env.GetPromClusterLabel(), scrapeIntervalSeconds)
-	queryCPUAlloc := fmt.Sprintf(queryCPUAllocationVCPUHours, resStr, env.GetPromClusterLabel(), scrapeIntervalSeconds)
-	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, resStr, "", resStr, "", env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, resStr, "", resStr, "", env.GetPromClusterLabel())
-	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, resStr, "", resStr, "", env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, resStr, "", env.GetPromClusterLabel())
-	queryGPURequests := fmt.Sprintf(queryGPURequestsStr, resStr, "", resStr, "", resolution.Hours(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), resStr, "", env.GetPromClusterLabel())
-	queryPVRequests := fmt.Sprintf(queryPVRequestsStr, env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryPVCAllocation := fmt.Sprintf(queryPVCAllocationFmt, resStr, env.GetPromClusterLabel(), scrapeIntervalSeconds)
-	queryPVHourlyCost := fmt.Sprintf(queryPVHourlyCostFmt, resStr)
-	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, resStr, "", env.GetPromClusterLabel())
-	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, resStr, "", env.GetPromClusterLabel())
-	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, resStr, "", env.GetPromClusterLabel())
-	queryNormalization := fmt.Sprintf(normalizationStr, resStr, "")
+	queryRAMAlloc := fmt.Sprintf(queryRAMAllocationByteHours, env.GetPromClusterFilter(), resStr, env.GetPromClusterLabel(), scrapeIntervalSeconds)
+	queryCPUAlloc := fmt.Sprintf(queryCPUAllocationVCPUHours, env.GetPromClusterFilter(), resStr, env.GetPromClusterLabel(), scrapeIntervalSeconds)
+	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel())
+	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryCPUUsage := fmt.Sprintf(queryCPUUsageStr, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel())
+	queryGPURequests := fmt.Sprintf(queryGPURequestsStr, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterFilter(), resStr, "", resolution.Hours(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel())
+	queryPVRequests := fmt.Sprintf(queryPVRequestsStr, env.GetPromClusterFilter(), env.GetPromClusterLabel(), env.GetPromClusterLabel(), env.GetPromClusterFilter(), env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryPVCAllocation := fmt.Sprintf(queryPVCAllocationFmt, env.GetPromClusterFilter(), resStr, env.GetPromClusterLabel(), scrapeIntervalSeconds)
+	queryPVHourlyCost := fmt.Sprintf(queryPVHourlyCostFmt, env.GetPromClusterFilter(), resStr)
+	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel())
+	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel())
+	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, env.GetPromClusterFilter(), resStr, "", env.GetPromClusterLabel())
+	queryNormalization := fmt.Sprintf(normalizationStr, env.GetPromClusterFilter(), resStr, "")
 
 	// Submit all queries for concurrent evaluation
 	resChRAMRequests := ctx.QueryRange(queryRAMRequests, start, end, resolution)
@@ -1654,15 +1658,15 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 	resChNetZoneRequests := ctx.QueryRange(queryNetZoneRequests, start, end, resolution)
 	resChNetRegionRequests := ctx.QueryRange(queryNetRegionRequests, start, end, resolution)
 	resChNetInternetRequests := ctx.QueryRange(queryNetInternetRequests, start, end, resolution)
-	resChNSLabels := ctx.QueryRange(fmt.Sprintf(queryNSLabels, resStr), start, end, resolution)
-	resChPodLabels := ctx.QueryRange(fmt.Sprintf(queryPodLabels, resStr), start, end, resolution)
-	resChNSAnnotations := ctx.QueryRange(fmt.Sprintf(queryNSAnnotations, resStr), start, end, resolution)
-	resChPodAnnotations := ctx.QueryRange(fmt.Sprintf(queryPodAnnotations, resStr), start, end, resolution)
-	resChServiceLabels := ctx.QueryRange(fmt.Sprintf(queryServiceLabels, resStr), start, end, resolution)
-	resChDeploymentLabels := ctx.QueryRange(fmt.Sprintf(queryDeploymentLabels, resStr), start, end, resolution)
-	resChStatefulsetLabels := ctx.QueryRange(fmt.Sprintf(queryStatefulsetLabels, resStr), start, end, resolution)
-	resChJobs := ctx.QueryRange(fmt.Sprintf(queryPodJobs, env.GetPromClusterLabel()), start, end, resolution)
-	resChDaemonsets := ctx.QueryRange(fmt.Sprintf(queryPodDaemonsets, env.GetPromClusterLabel()), start, end, resolution)
+	resChNSLabels := ctx.QueryRange(fmt.Sprintf(queryNSLabels, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChPodLabels := ctx.QueryRange(fmt.Sprintf(queryPodLabels, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChNSAnnotations := ctx.QueryRange(fmt.Sprintf(queryNSAnnotations, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChPodAnnotations := ctx.QueryRange(fmt.Sprintf(queryPodAnnotations, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChServiceLabels := ctx.QueryRange(fmt.Sprintf(queryServiceLabels, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChDeploymentLabels := ctx.QueryRange(fmt.Sprintf(queryDeploymentLabels, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChStatefulsetLabels := ctx.QueryRange(fmt.Sprintf(queryStatefulsetLabels, env.GetPromClusterFilter(), resStr), start, end, resolution)
+	resChJobs := ctx.QueryRange(fmt.Sprintf(queryPodJobs, env.GetPromClusterFilter(), env.GetPromClusterLabel()), start, end, resolution)
+	resChDaemonsets := ctx.QueryRange(fmt.Sprintf(queryPodDaemonsets, env.GetPromClusterFilter(), env.GetPromClusterLabel()), start, end, resolution)
 	resChNormalization := ctx.QueryRange(queryNormalization, start, end, resolution)
 
 	// Pull k8s pod, controller, service, and namespace details

+ 10 - 10
pkg/costmodel/metrics.go

@@ -147,7 +147,7 @@ func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider model
 		cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "node_cpu_hourly_cost",
 			Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+		}, []string{"instance", "node", "instance_type", "region", "provider_id", "arch"})
 		if _, disabled := disabledMetrics["node_cpu_hourly_cost"]; !disabled {
 			toRegisterGV = append(toRegisterGV, cpuGv)
 		}
@@ -155,7 +155,7 @@ func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider model
 		ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "node_ram_hourly_cost",
 			Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+		}, []string{"instance", "node", "instance_type", "region", "provider_id", "arch"})
 		if _, disabled := disabledMetrics["node_ram_hourly_cost"]; !disabled {
 			toRegisterGV = append(toRegisterGV, ramGv)
 		}
@@ -163,7 +163,7 @@ func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider model
 		gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "node_gpu_hourly_cost",
 			Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+		}, []string{"instance", "node", "instance_type", "region", "provider_id", "arch"})
 		if _, disabled := disabledMetrics["node_gpu_hourly_cost"]; !disabled {
 			toRegisterGV = append(toRegisterGV, gpuGv)
 		}
@@ -171,7 +171,7 @@ func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider model
 		gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "node_gpu_count",
 			Help: "node_gpu_count count of gpu on this node",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+		}, []string{"instance", "node", "instance_type", "region", "provider_id", "arch"})
 		if _, disabled := disabledMetrics["node_gpu_count"]; !disabled {
 			toRegisterGV = append(toRegisterGV, gpuCountGv)
 		}
@@ -195,7 +195,7 @@ func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider model
 		totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "node_total_hourly_cost",
 			Help: "node_total_hourly_cost Total node cost per hour",
-		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
+		}, []string{"instance", "node", "instance_type", "region", "provider_id", "arch"})
 		if _, disabled := disabledMetrics["node_total_hourly_cost"]; !disabled {
 			toRegisterGV = append(toRegisterGV, totalGv)
 		}
@@ -528,22 +528,22 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 					nodeCostAverages[labelKey] = avgCosts
 				}
 
-				cmme.GPUCountRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(gpu)
-				cmme.GPUPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(gpuCost)
+				cmme.GPUCountRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID, node.ArchType).Set(gpu)
+				cmme.GPUPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID, node.ArchType).Set(gpuCost)
 
 				const outlierFactor float64 = 30
 				// don't record cpuCost, ramCost, or gpuCost in the case of wild outliers
 				// k8s api sometimes causes cost spikes as described here:
 				// https://github.com/opencost/opencost/issues/927
 				if cpuCost < outlierFactor*avgCosts.CpuCostAverage {
-					cmme.CPUPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(cpuCost)
+					cmme.CPUPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID, node.ArchType).Set(cpuCost)
 					avgCosts.CpuCostAverage = (avgCosts.CpuCostAverage*avgCosts.NumCpuDataPoints + cpuCost) / (avgCosts.NumCpuDataPoints + 1)
 					avgCosts.NumCpuDataPoints += 1
 				} else {
 					log.Warnf("CPU cost outlier detected; skipping data point.")
 				}
 				if ramCost < outlierFactor*avgCosts.RamCostAverage {
-					cmme.RAMPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(ramCost)
+					cmme.RAMPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID, node.ArchType).Set(ramCost)
 					avgCosts.RamCostAverage = (avgCosts.RamCostAverage*avgCosts.NumRamDataPoints + ramCost) / (avgCosts.NumRamDataPoints + 1)
 					avgCosts.NumRamDataPoints += 1
 				} else {
@@ -552,7 +552,7 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 				// skip redording totalCost if any constituent costs were outliers
 				if cpuCost < outlierFactor*avgCosts.CpuCostAverage &&
 					ramCost < outlierFactor*avgCosts.RamCostAverage {
-					cmme.NodeTotalPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(totalCost)
+					cmme.NodeTotalPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID, node.ArchType).Set(totalCost)
 				}
 
 				nodeCostAverages[labelKey] = avgCosts

+ 12 - 1
pkg/env/costmodelenv.go

@@ -1,6 +1,7 @@
 package env
 
 import (
+	"fmt"
 	"regexp"
 	"strconv"
 	"time"
@@ -67,7 +68,8 @@ const (
 
 	KubeConfigPathEnvVar = "KUBECONFIG_PATH"
 
-	UTCOffsetEnvVar = "UTC_OFFSET"
+	UTCOffsetEnvVar                  = "UTC_OFFSET"
+	CurrentClusterIdFilterEnabledVar = "CURRENT_CLUSTER_ID_FILTER_ENABLED"
 
 	CacheWarmingEnabledEnvVar            = "CACHE_WARMING_ENABLED"
 	ETLEnabledEnvVar                     = "ETL_ENABLED"
@@ -284,6 +286,15 @@ func GetClusterID() string {
 	return Get(ClusterIDEnvVar, "")
 }
 
+// GetPromClusterFilter returns environment variable value CurrentClusterIdFilterEnabledVar which
+// represents additional prometheus filter for all metrics for current cluster id
+func GetPromClusterFilter() string {
+	if GetBool(CurrentClusterIdFilterEnabledVar, false) {
+		return fmt.Sprintf("%s=\"%s\"", GetPromClusterLabel(), GetClusterID())
+	}
+	return ""
+}
+
 // GetPrometheusServerEndpoint returns the environment variable value for PrometheusServerEndpointEnvVar which
 // represents the prometheus server endpoint used to execute prometheus queries.
 func GetPrometheusServerEndpoint() string {

+ 9 - 1
pkg/filemanager/filemanager_test.go

@@ -14,25 +14,33 @@ import (
 
 func Test_NewFileManager(t *testing.T) {
 	t.Run("file system", func(t *testing.T) {
+		// Create File Manager
 		tmpPath := filepath.Join(os.TempDir(), fmt.Sprintf("opencost-test-file-manager-%d", rand.Int31()))
+		defer os.Remove(tmpPath) // remove managed file on completion to ensure it does not exist for next run
 		fm, err := NewFileManager(tmpPath)
 		require.NoError(t, err)
 
+		// Attempt to download Managed file into a new file, this should fail because managed file has not been initalized
 		downloadFile, err := os.CreateTemp("", "opencost-test-file-manager-*")
 		require.NoError(t, err)
 		err = fm.Download(context.TODO(), downloadFile)
 		require.ErrorIs(t, err, ErrNotFound)
 
+		// Create a file and add content to it
 		uploadFile, err := os.CreateTemp("", "opencost-test-file-manager-*")
 		require.NoError(t, err)
 		_, err = uploadFile.WriteString("test-content")
 		require.NoError(t, err)
-		require.NoError(t, err)
+
+		// Upload file to managed file
 		err = fm.Upload(context.TODO(), uploadFile)
 		require.NoError(t, err)
 
+		// Download managed file to original file
 		err = fm.Download(context.TODO(), downloadFile)
 		require.NoError(t, err)
+
+		// Check that content matches
 		_, err = downloadFile.Seek(0, io.SeekStart)
 		require.NoError(t, err)
 		data, err := io.ReadAll(downloadFile)

+ 53 - 17
pkg/filter/util/cloudcost.go

@@ -1,6 +1,7 @@
 package util
 
 import (
+	"reflect"
 	"strings"
 
 	"github.com/opencost/opencost/pkg/filter"
@@ -9,48 +10,83 @@ import (
 	"github.com/opencost/opencost/pkg/util/mapper"
 )
 
+type CloudCostFilter struct {
+	AccountIDs       []string `json:"accountIDs,omitempty"`
+	Categories       []string `json:"categories,omitempty"`
+	InvoiceEntityIDs []string `json:"invoiceEntityIDs,omitempty"`
+	Labels           []string `json:"labels,omitempty"`
+	Providers        []string `json:"providers,omitempty"`
+	ProviderIDs      []string `json:"providerIDs,omitempty"`
+	Services         []string `json:"services,omitempty"`
+}
+
+func (g *CloudCostFilter) Equals(that CloudCostFilter) bool {
+	return reflect.DeepEqual(g.AccountIDs, that.AccountIDs) &&
+		reflect.DeepEqual(g.Categories, that.Categories) &&
+		reflect.DeepEqual(g.InvoiceEntityIDs, that.InvoiceEntityIDs) &&
+		reflect.DeepEqual(g.Labels, that.Labels) &&
+		reflect.DeepEqual(g.Providers, that.Providers) &&
+		reflect.DeepEqual(g.ProviderIDs, that.ProviderIDs) &&
+		reflect.DeepEqual(g.Services, that.Services)
+}
 func parseWildcardEnd(rawFilterValue string) (string, bool) {
 	return strings.TrimSuffix(rawFilterValue, "*"), strings.HasSuffix(rawFilterValue, "*")
 }
 
 func CloudCostFilterFromParams(pmr mapper.PrimitiveMapReader) filter.Filter[*kubecost.CloudCost] {
-	filter := filter.And[*kubecost.CloudCost]{
+	ccFilter := convertFilterQueryParams(pmr)
+	return ParseCloudCostFilter(ccFilter)
+}
+
+func convertFilterQueryParams(pmr mapper.PrimitiveMapReader) CloudCostFilter {
+	return CloudCostFilter{
+		AccountIDs:       pmr.GetList("filterAccountIDs", ","),
+		Categories:       pmr.GetList("filterCategories", ","),
+		InvoiceEntityIDs: pmr.GetList("filterInvoiceEntityIDs", ","),
+		Labels:           pmr.GetList("filterLabels", ","),
+		Providers:        pmr.GetList("filterProviders", ","),
+		ProviderIDs:      pmr.GetList("filterProviderIDs", ","),
+		Services:         pmr.GetList("filterServices", ","),
+	}
+}
+func ParseCloudCostFilter(filters CloudCostFilter) filter.Filter[*kubecost.CloudCost] {
+	result := filter.And[*kubecost.CloudCost]{
 		Filters: []filter.Filter[*kubecost.CloudCost]{},
 	}
 
-	if raw := pmr.GetList("filterInvoiceEntityIDs", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.CloudCostInvoiceEntityIDProp))
+	if len(filters.InvoiceEntityIDs) > 0 {
+		result.Filters = append(result.Filters, filterV1SingleValueFromList(filters.InvoiceEntityIDs, kubecost.CloudCostInvoiceEntityIDProp))
 	}
 
-	if raw := pmr.GetList("filterAccountIDs", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.CloudCostAccountIDProp))
+	if len(filters.AccountIDs) > 0 {
+		result.Filters = append(result.Filters, filterV1SingleValueFromList(filters.AccountIDs, kubecost.CloudCostAccountIDProp))
 	}
 
-	if raw := pmr.GetList("filterProviders", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.CloudCostProviderProp))
+	if len(filters.Providers) > 0 {
+		result.Filters = append(result.Filters, filterV1SingleValueFromList(filters.Providers, kubecost.CloudCostProviderProp))
 	}
 
-	if raw := pmr.GetList("filterProviderIDs", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.CloudCostProviderIDProp))
+	if len(filters.ProviderIDs) > 0 {
+		result.Filters = append(result.Filters, filterV1SingleValueFromList(filters.ProviderIDs, kubecost.CloudCostProviderIDProp))
 	}
 
-	if raw := pmr.GetList("filterServices", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.CloudCostServiceProp))
+	if len(filters.Services) > 0 {
+		result.Filters = append(result.Filters, filterV1SingleValueFromList(filters.Services, kubecost.CloudCostServiceProp))
 	}
 
-	if raw := pmr.GetList("filterCategories", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.CloudCostCategoryProp))
+	if len(filters.Categories) > 0 {
+		result.Filters = append(result.Filters, filterV1SingleValueFromList(filters.Categories, kubecost.CloudCostCategoryProp))
 	}
 
-	if raw := pmr.GetList("filterLabels", ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1DoubleValueFromList(raw, kubecost.CloudCostLabelProp))
+	if len(filters.Labels) > 0 {
+		result.Filters = append(result.Filters, filterV1DoubleValueFromList(filters.Labels, kubecost.CloudCostLabelProp))
 	}
 
-	if len(filter.Filters) == 0 {
+	if len(result.Filters) == 0 {
 		return nil
 	}
 
-	return filter
+	return result
 }
 
 func filterV1SingleValueFromList(rawFilterValues []string, field string) filter.Filter[*kubecost.CloudCost] {

+ 134 - 0
pkg/filter/util/cloudcost_test.go

@@ -0,0 +1,134 @@
+package util
+
+import (
+	"testing"
+)
+
+type CloudCostFilterEqualsTestcase struct {
+	name     string
+	this     CloudCostFilter
+	that     CloudCostFilter
+	expected bool
+}
+
+func TestCloudCostFilter_Equals(t *testing.T) {
+	testCases := []CloudCostFilterEqualsTestcase{
+		{
+			name: "both filters nil",
+			this: CloudCostFilter{
+				AccountIDs:       nil,
+				Categories:       nil,
+				InvoiceEntityIDs: nil,
+				Labels:           nil,
+				Providers:        nil,
+				ProviderIDs:      nil,
+				Services:         nil,
+			},
+			that: CloudCostFilter{
+				AccountIDs:       nil,
+				Categories:       nil,
+				InvoiceEntityIDs: nil,
+				Labels:           nil,
+				Providers:        nil,
+				ProviderIDs:      nil,
+				Services:         nil,
+			},
+			expected: true,
+		},
+		{
+			name: "both filters not nil and matching",
+			this: CloudCostFilter{
+				AccountIDs:       []string{"account1", "account2", "account3"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			that: CloudCostFilter{
+				AccountIDs:       []string{"account1", "account2", "account3"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			expected: true,
+		},
+		{
+			name: "both filters diff count",
+			this: CloudCostFilter{
+				AccountIDs:       []string{"account1", "account2", "account3"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			that: CloudCostFilter{
+				AccountIDs:       []string{"account1", "account2"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			expected: false,
+		},
+		{
+			name: "slight mismatch",
+			this: CloudCostFilter{
+				AccountIDs:       []string{"account1", "account2", "account3"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			that: CloudCostFilter{
+				AccountIDs:       []string{"account10", "account2", "account3"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			expected: false,
+		},
+		{
+			name: "one nil, one not",
+			this: CloudCostFilter{
+				AccountIDs:       []string{"account1", "account2", "account3"},
+				Categories:       []string{"category1", "category2"},
+				InvoiceEntityIDs: []string{"invoice1", "invoice2"},
+				Labels:           []string{"label1", "label2"},
+				Providers:        []string{"provider1", "provider2"},
+				ProviderIDs:      []string{"provider1", "provider2"},
+				Services:         []string{"s1"},
+			},
+			that: CloudCostFilter{
+				AccountIDs:       nil,
+				Categories:       nil,
+				InvoiceEntityIDs: nil,
+				Labels:           nil,
+				Providers:        nil,
+				ProviderIDs:      nil,
+				Services:         nil,
+			},
+			expected: false,
+		},
+	}
+
+	for _, tc := range testCases {
+		got := tc.this.Equals(tc.that)
+		if got != tc.expected {
+			t.Fatalf("expected %t, got: %t for test case: %s", tc.expected, got, tc.name)
+		}
+	}
+}

+ 87 - 0
pkg/kubecost/allocation.go

@@ -87,6 +87,7 @@ type Allocation struct {
 	// asset on which the allocation was run. It is optionally computed
 	// and appended to an Allocation, and so by default is is nil.
 	ProportionalAssetResourceCosts ProportionalAssetResourceCosts `json:"proportionalAssetResourceCosts"` //@bingen:field[ignore]
+	SharedCostBreakdown            SharedCostBreakdowns           `json:"sharedCostBreakdown"`            //@bingen:field[ignore]
 }
 
 // RawAllocationOnlyData is information that only belong in "raw" Allocations,
@@ -356,6 +357,53 @@ func (parcs ProportionalAssetResourceCosts) Add(that ProportionalAssetResourceCo
 	}
 }
 
+type SharedCostBreakdown struct {
+	Name         string  `json:"name"`
+	TotalCost    float64 `json:"totalCost"`
+	CPUCost      float64 `json:"cpuCost,omitempty"`
+	GPUCost      float64 `json:"gpuCost,omitempty"`
+	RAMCost      float64 `json:"ramCost,omitempty"`
+	PVCost       float64 `json:"pvCost,omitempty"`
+	NetworkCost  float64 `json:"networkCost,omitempty"`
+	LBCost       float64 `json:"loadBalancerCost,omitempty"`
+	ExternalCost float64 `json:"externalCost,omitempty"`
+}
+
+type SharedCostBreakdowns map[string]SharedCostBreakdown
+
+func (scbs SharedCostBreakdowns) Clone() SharedCostBreakdowns {
+	cloned := SharedCostBreakdowns{}
+
+	for key, scb := range scbs {
+		cloned[key] = scb
+	}
+	return cloned
+}
+
+func (scbs SharedCostBreakdowns) Insert(scb SharedCostBreakdown) {
+	if curr, ok := scbs[scb.Name]; ok {
+		scbs[scb.Name] = SharedCostBreakdown{
+			Name:         curr.Name,
+			TotalCost:    curr.TotalCost + scb.TotalCost,
+			CPUCost:      curr.CPUCost + scb.CPUCost,
+			GPUCost:      curr.GPUCost + scb.GPUCost,
+			RAMCost:      curr.RAMCost + scb.RAMCost,
+			PVCost:       curr.PVCost + scb.PVCost,
+			NetworkCost:  curr.NetworkCost + scb.NetworkCost,
+			LBCost:       curr.LBCost + scb.LBCost,
+			ExternalCost: curr.ExternalCost + scb.ExternalCost,
+		}
+	} else {
+		scbs[scb.Name] = scb
+	}
+}
+
+func (scbs SharedCostBreakdowns) Add(that SharedCostBreakdowns) {
+	for _, scb := range that {
+		scbs.Insert(scb)
+	}
+}
+
 // GetWindow returns the window of the struct
 func (a *Allocation) GetWindow() Window {
 	return a.Window
@@ -424,6 +472,7 @@ func (a *Allocation) Clone() *Allocation {
 		ExternalCost:                   a.ExternalCost,
 		RawAllocationOnly:              a.RawAllocationOnly.Clone(),
 		ProportionalAssetResourceCosts: a.ProportionalAssetResourceCosts.Clone(),
+		SharedCostBreakdown:            a.SharedCostBreakdown.Clone(),
 	}
 }
 
@@ -844,6 +893,18 @@ func (a *Allocation) add(that *Allocation) {
 		a.ProportionalAssetResourceCosts.Add(that.ProportionalAssetResourceCosts)
 	}
 
+	// If both Allocations have SharedCostBreakdowns, then
+	// add those from the given Allocation into the receiver.
+	if a.SharedCostBreakdown != nil || that.SharedCostBreakdown != nil {
+		if a.SharedCostBreakdown == nil {
+			a.SharedCostBreakdown = SharedCostBreakdowns{}
+		}
+		if that.SharedCostBreakdown == nil {
+			that.SharedCostBreakdown = SharedCostBreakdowns{}
+		}
+		a.SharedCostBreakdown.Add(that.SharedCostBreakdown)
+	}
+
 	// Overwrite regular intersection logic for the controller name property in the
 	// case that the Allocation keys are the same but the controllers are not.
 	if leftKey == rightKey &&
@@ -987,6 +1048,7 @@ type AllocationAggregationOptions struct {
 	ShareIdle                             string
 	ShareSplit                            string
 	SharedHourlyCosts                     map[string]float64
+	IncludeSharedCostBreakdown            bool
 	SplitIdle                             bool
 	IncludeAggregatedMetadata             bool
 }
@@ -1473,6 +1535,31 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 					continue
 				}
 
+				if options.IncludeSharedCostBreakdown {
+					if alloc.SharedCostBreakdown == nil {
+						alloc.SharedCostBreakdown = map[string]SharedCostBreakdown{}
+					}
+					sharedCostName := sharedAlloc.generateKey(aggregateBy, options.LabelConfig)
+					// check if current allocation is a shared flat overhead cost
+					if strings.Contains(sharedAlloc.Name, SharedSuffix) {
+						sharedCostName = "overheadCost"
+					}
+
+					scb := SharedCostBreakdown{
+						Name:         sharedCostName,
+						TotalCost:    sharedAlloc.TotalCost() * shareCoefficients[alloc.Name],
+						CPUCost:      sharedAlloc.CPUTotalCost() * shareCoefficients[alloc.Name],
+						GPUCost:      sharedAlloc.GPUTotalCost() * shareCoefficients[alloc.Name],
+						RAMCost:      sharedAlloc.RAMTotalCost() * shareCoefficients[alloc.Name],
+						PVCost:       sharedAlloc.PVCost() * shareCoefficients[alloc.Name],
+						NetworkCost:  sharedAlloc.NetworkTotalCost() * shareCoefficients[alloc.Name],
+						LBCost:       sharedAlloc.LBTotalCost() * shareCoefficients[alloc.Name],
+						ExternalCost: sharedAlloc.ExternalCost * shareCoefficients[alloc.Name],
+					}
+					// fmt.Printf("shared cost: %+v", scb)
+					alloc.SharedCostBreakdown.Insert(scb)
+				}
+
 				alloc.SharedCost += sharedAlloc.TotalCost() * shareCoefficients[alloc.Name]
 			}
 		}

+ 2 - 0
pkg/kubecost/allocation_json.go

@@ -55,6 +55,7 @@ type AllocationJSON struct {
 	TotalEfficiency                *float64                        `json:"totalEfficiency"`
 	RawAllocationOnly              *RawAllocationOnlyData          `json:"rawAllocationOnly,omitempty"`
 	ProportionalAssetResourceCosts *ProportionalAssetResourceCosts `json:"proportionalAssetResourceCosts,omitempty"`
+	SharedCostBreakdown            *SharedCostBreakdowns           `json:"sharedCostBreakdown,omitempty"`
 }
 
 func (aj *AllocationJSON) BuildFromAllocation(a *Allocation) {
@@ -105,6 +106,7 @@ func (aj *AllocationJSON) BuildFromAllocation(a *Allocation) {
 	aj.TotalEfficiency = formatFloat64ForResponse(a.TotalEfficiency())
 	aj.RawAllocationOnly = a.RawAllocationOnly
 	aj.ProportionalAssetResourceCosts = &a.ProportionalAssetResourceCosts
+	aj.SharedCostBreakdown = &a.SharedCostBreakdown
 
 }
 

+ 121 - 0
pkg/kubecost/allocation_test.go

@@ -12,6 +12,7 @@ import (
 	"github.com/opencost/opencost/pkg/log"
 	"github.com/opencost/opencost/pkg/util"
 	"github.com/opencost/opencost/pkg/util/json"
+	"github.com/opencost/opencost/pkg/util/timeutil"
 )
 
 func TestAllocation_Add(t *testing.T) {
@@ -1723,6 +1724,126 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 	}
 }
 
+func TestAllocationSet_AggregateBy_SharedCostBreakdown(t *testing.T) {
+	// Set generated by GenerateMockAllocationSet
+	// | Hierarchy                              | Cost |  CPU |  RAM |  GPU |   PV |  Net |  LB  |
+	// +----------------------------------------+------+------+------+------+------+------+------+
+	//   cluster1:
+	//     idle:                                  20.00   5.00  15.00   0.00   0.00   0.00   0.00
+	//     namespace1:
+	//       pod1:
+	//         container1: [app1, env1]   16.00   1.00  11.00   1.00   1.00   1.00   1.00
+	//       pod-abc: (deployment1)
+	//         container2:                         6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//       pod-def: (deployment1)
+	//         container3:                         6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//     namespace2:
+	//       pod-ghi: (deployment2)
+	//         container4: [app2, env2]    6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//         container5: [app2, env2]    6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//       pod-jkl: (daemonset1)
+	//         container6: {service1}              6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	// +-----------------------------------------+------+------+------+------+------+------+------+
+	//   cluster1 subtotal                        66.00  11.00  31.00   6.00   6.00   6.00   6.00
+	// +-----------------------------------------+------+------+------+------+------+------+------+
+	//   cluster2:
+	//     idle:                                  10.00   5.00   5.00   0.00   0.00   0.00   0.00
+	//     namespace2:
+	//       pod-mno: (deployment2)
+	//         container4: [app2]              6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//         container5: [app2]              6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//       pod-pqr: (daemonset1)
+	//         container6: {service1}              6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//     namespace3:
+	//       pod-stu: (deployment3)
+	//         container7: an[team1]          6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//       pod-vwx: (statefulset1)
+	//         container8: an[team2]          6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	//         container9: an[team1]          6.00   1.00   1.00   1.00   1.00   1.00   1.00
+	// +----------------------------------------+------+------+------+------+------+------+------+
+	//   cluster2 subtotal                        46.00  11.00  11.00   6.00   6.00   6.00   6.00
+	// +----------------------------------------+------+------+------+------+------+------+------+
+	//   total                                   112.00  22.00  42.00  12.00  12.00  12.00  12.00
+	// +----------------------------------------+------+------+------+------+------+------+------+
+	end := time.Now().UTC().Truncate(day)
+	start := end.Add(-day)
+
+	isNamespace1 := func(a *Allocation) bool {
+		ns := a.Properties.Namespace
+		return ns == "namespace1"
+	}
+
+	isNamespace3 := func(a *Allocation) bool {
+		ns := a.Properties.Namespace
+		return ns == "namespace3"
+	}
+
+	cases := map[string]struct {
+		start   time.Time
+		aggBy   []string
+		aggOpts *AllocationAggregationOptions
+	}{
+		"agg cluster, flat shared cost": {
+			start: start,
+			aggBy: []string{"cluster"},
+			aggOpts: &AllocationAggregationOptions{
+				SharedHourlyCosts:          map[string]float64{"share_hourly": 10.0 / timeutil.HoursPerDay},
+				IncludeSharedCostBreakdown: true,
+			},
+		},
+		"agg namespace, shared namespace: namespace1": {
+			start: start,
+			aggBy: []string{"namespace"},
+			aggOpts: &AllocationAggregationOptions{
+				ShareFuncs: []AllocationMatchFunc{
+					isNamespace1,
+				},
+				IncludeSharedCostBreakdown: true,
+			},
+		},
+		"agg namespace, shared namespace: namespace3": {
+			start: start,
+			aggBy: []string{"namespace"},
+			aggOpts: &AllocationAggregationOptions{
+				ShareFuncs: []AllocationMatchFunc{
+					isNamespace3,
+				},
+				IncludeSharedCostBreakdown: true,
+			},
+		},
+	}
+
+	for name, tc := range cases {
+		t.Run(name, func(t *testing.T) {
+			as := GenerateMockAllocationSetClusterIdle(tc.start)
+			err := as.AggregateBy(tc.aggBy, tc.aggOpts)
+			if err != nil {
+				t.Fatalf("error aggregating: %s", err)
+			}
+			for _, alloc := range as.Allocations {
+				var breakdownTotal float64
+				// ignore idle since it should never have shared costs
+				if strings.Contains(alloc.Name, IdleSuffix) {
+					continue
+				}
+				for _, sharedAlloc := range alloc.SharedCostBreakdown {
+					breakdownTotal += sharedAlloc.TotalCost
+					totalInternal := sharedAlloc.CPUCost + sharedAlloc.GPUCost + sharedAlloc.RAMCost + sharedAlloc.NetworkCost + sharedAlloc.LBCost + sharedAlloc.PVCost + sharedAlloc.ExternalCost
+					// check that the total cost of a single item in the breakdown equals the sum of its parts
+					// we can ignore the overheadCost breakdown since it only has a total
+					if totalInternal != sharedAlloc.TotalCost && sharedAlloc.Name != "overheadCost" {
+						t.Errorf("expected internal total: %f; got %f", sharedAlloc.TotalCost, totalInternal)
+					}
+				}
+				// check that the totals of all shared cost breakdowns equal the allocation's SharedCost
+				if breakdownTotal != alloc.SharedCost {
+					t.Errorf("expected breakdown total: %f; got %f", alloc.SharedCost, breakdownTotal)
+				}
+			}
+		})
+	}
+}
+
 // TODO niko/etl
 //func TestAllocationSet_Clone(t *testing.T) {}
 

+ 15 - 0
pkg/metrics/kubemetrics.go

@@ -108,6 +108,9 @@ func InitKubeMetrics(clusterCache clustercache.ClusterCache, metricsConfig *Metr
 				metricsConfig:    *metricsConfig,
 			})
 		} else if opts.EmitKubeStateMetricsV1Only {
+			// We still need the kubecost_pv_info metric to look up storageclass on legacy clusters.
+			forceDisabled := []string{"kube_persistentvolume_capacity_bytes", "kube_persistentvolume_status_phase"}
+			metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics, forceDisabled...)
 			prometheus.MustRegister(KubeNodeCollector{
 				KubeClusterCache: clusterCache,
 				metricsConfig:    *metricsConfig,
@@ -120,6 +123,18 @@ func InitKubeMetrics(clusterCache clustercache.ClusterCache, metricsConfig *Metr
 				KubeClusterCache: clusterCache,
 				metricsConfig:    *metricsConfig,
 			})
+			prometheus.MustRegister(KubePVCollector{
+				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
+			})
+		} else {
+			// We still need the kubecost_pv_info metric to look up storageclass on legacy clusters.
+			forceDisabled := []string{"kube_persistentvolume_capacity_bytes", "kube_persistentvolume_status_phase"}
+			metricsConfig.DisabledMetrics = append(metricsConfig.DisabledMetrics, forceDisabled...)
+			prometheus.MustRegister(KubePVCollector{
+				KubeClusterCache: clusterCache,
+				metricsConfig:    *metricsConfig,
+			})
 		}
 	})
 }

+ 10 - 9
pkg/prom/diagnostics.go

@@ -74,41 +74,41 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
 	},
 	KubecostDiagnosticMetricID: {
 		ID:          KubecostDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(node_cpu_hourly_cost[5m] %s)`,
+		QueryFmt:    `absent_over_time(node_cpu_hourly_cost{%s}[5m] %s)`,
 		Label:       "Kubecost metrics available",
 		Description: "Determine if metrics from Kubecost are available during last 5 minutes.",
 	},
 	NodeExporterDiagnosticMetricID: {
 		ID:          NodeExporterDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(node_cpu_seconds_total[5m] %s)`,
+		QueryFmt:    `absent_over_time(node_cpu_seconds_total{%s}[5m] %s)`,
 		Label:       "Node-exporter metrics available",
 		Description: "Determine if metrics from node-exporter are available during last 5 minutes.",
 		DocLink:     fmt.Sprintf("%s#node-exporter-metrics-available", DocumentationBaseURL),
 	},
 	CAdvisorLabelDiagnosticMetricID: {
 		ID:          CAdvisorLabelDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(container_cpu_usage_seconds_total{container!="",pod!=""}[5m] %s)`,
+		QueryFmt:    `absent_over_time(container_cpu_usage_seconds_total{container!="",pod!="",%s}[5m] %s)`,
 		Label:       "Expected cAdvisor labels available",
 		Description: "Determine if expected cAdvisor labels are present during last 5 minutes.",
 		DocLink:     fmt.Sprintf("%s#cadvisor-metrics-available", DocumentationBaseURL),
 	},
 	KSMVersionDiagnosticMetricID: {
 		ID:          KSMVersionDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(kube_persistentvolume_capacity_bytes[5m] %s)`,
+		QueryFmt:    `absent_over_time(kube_persistentvolume_capacity_bytes{%s}[5m] %s)`,
 		Label:       "Expected kube-state-metrics version found",
 		Description: "Determine if metric in required kube-state-metrics version are present during last 5 minutes.",
 		DocLink:     fmt.Sprintf("%s#expected-kube-state-metrics-version-found", DocumentationBaseURL),
 	},
 	ScrapeIntervalDiagnosticMetricID: {
 		ID:          ScrapeIntervalDiagnosticMetricID,
-		QueryFmt:    `absent_over_time(prometheus_target_interval_length_seconds[5m]  %s)`,
+		QueryFmt:    `absent_over_time(prometheus_target_interval_length_seconds{%s}[5m]  %s)`,
 		Label:       "Expected Prometheus self-scrape metrics available",
 		Description: "Determine if prometheus has its own self-scraped metrics during the last 5 minutes.",
 	},
 	CPUThrottlingDiagnosticMetricID: {
 		ID: CPUThrottlingDiagnosticMetricID,
-		QueryFmt: `avg(increase(container_cpu_cfs_throttled_periods_total{container="cost-model"}[10m] %s)) by (container_name, pod_name, namespace)
-	/ avg(increase(container_cpu_cfs_periods_total{container="cost-model"}[10m] %s)) by (container_name, pod_name, namespace) > 0.2`,
+		QueryFmt: `avg(increase(container_cpu_cfs_throttled_periods_total{container="cost-model",%s}[10m] %s)) by (container_name, pod_name, namespace)
+	/ avg(increase(container_cpu_cfs_periods_total{container="cost-model",%s}[10m] %s)) by (container_name, pod_name, namespace) > 0.2`,
 		Label:       "Kubecost is not CPU throttled",
 		Description: "Kubecost loading slowly? A kubecost component might be CPU throttled",
 	},
@@ -266,10 +266,11 @@ func (pdd *diagnosticDefinition) NewDiagnostic(offset string) *PrometheusDiagnos
 	// FIXME: Any reasonable way to get the total number of replacements required in the query?
 	// FIXME: All of the other queries require a single offset replace, but CPUThrottle requires two.
 	var query string
+	filter := env.GetPromClusterFilter()
 	if pdd.ID == CPUThrottlingDiagnosticMetricID {
-		query = fmt.Sprintf(pdd.QueryFmt, offset, offset)
+		query = fmt.Sprintf(pdd.QueryFmt, filter, offset, filter, offset)
 	} else {
-		query = fmt.Sprintf(pdd.QueryFmt, offset)
+		query = fmt.Sprintf(pdd.QueryFmt, filter, offset)
 	}
 
 	return &PrometheusDiagnostic{

+ 145 - 38
pkg/util/allocationfilterutil/queryfilters.go

@@ -1,6 +1,8 @@
 package allocationfilterutil
 
 import (
+	"fmt"
+	"reflect"
 	"strings"
 
 	"github.com/opencost/opencost/pkg/costmodel/clusters"
@@ -30,6 +32,31 @@ const (
 	ParamFilterServices    = "filterServices"
 )
 
+var allocationFilterFieldMap = map[string]string{
+	kubecost.AllocationClusterProp:        ParamFilterClusters,
+	kubecost.FilterNode:                   ParamFilterNodes,
+	kubecost.AllocationNamespaceProp:      ParamFilterNamespaces,
+	kubecost.AllocationControllerKindProp: ParamFilterControllerKinds,
+	kubecost.AllocationControllerProp:     ParamFilterControllers,
+	kubecost.AllocationPodProp:            ParamFilterPods,
+	kubecost.AllocationContainerProp:      ParamFilterContainers,
+	kubecost.AllocationDepartmentProp:     ParamFilterDepartments,
+	kubecost.AllocationEnvironmentProp:    ParamFilterEnvironments,
+	kubecost.AllocationOwnerProp:          ParamFilterOwners,
+	kubecost.AllocationProductProp:        ParamFilterProducts,
+	kubecost.AllocationTeamProp:           ParamFilterTeams,
+	kubecost.AllocationAnnotationProp:     ParamFilterAnnotations,
+	kubecost.AllocationLabelProp:          ParamFilterLabels,
+	kubecost.AllocationServiceProp:        ParamFilterServices,
+}
+
+func GetAllocationFilterForTheAllocationProperty(allocationProp string) (string, error) {
+	if _, ok := allocationFilterFieldMap[allocationProp]; !ok {
+		return "", fmt.Errorf("unknown allocation property %s", allocationProp)
+	}
+	return allocationFilterFieldMap[allocationProp], nil
+}
+
 // AllHTTPParamKeys returns all HTTP GET parameters used for v1 filters. It is
 // intended to help validate HTTP queries in handlers to help avoid e.g.
 // spelling errors.
@@ -55,6 +82,42 @@ func AllHTTPParamKeys() []string {
 	}
 }
 
+type FilterV1 struct {
+	Annotations     []string `json:"annotations,omitempty"`
+	Containers      []string `json:"containers,omitempty"`
+	Controllers     []string `json:"controllers,omitempty"`
+	ControllerKinds []string `json:"controllerKinds,omitempty"`
+	Clusters        []string `json:"clusters,omitempty"`
+	Departments     []string `json:"departments,omitempty"`
+	Environments    []string `json:"environments,omitempty"`
+	Labels          []string `json:"labels,omitempty"`
+	Namespaces      []string `json:"namespaces,omitempty"`
+	Nodes           []string `json:"nodes,omitempty"`
+	Owners          []string `json:"owners,omitempty"`
+	Pods            []string `json:"pods,omitempty"`
+	Products        []string `json:"products,omitempty"`
+	Services        []string `json:"services,omitempty"`
+	Teams           []string `json:"teams,omitempty"`
+}
+
+func (f FilterV1) Equals(that FilterV1) bool {
+	return reflect.DeepEqual(f.Annotations, that.Annotations) &&
+		reflect.DeepEqual(f.Containers, that.Containers) &&
+		reflect.DeepEqual(f.Controllers, that.Controllers) &&
+		reflect.DeepEqual(f.ControllerKinds, that.ControllerKinds) &&
+		reflect.DeepEqual(f.Clusters, that.Clusters) &&
+		reflect.DeepEqual(f.Departments, that.Departments) &&
+		reflect.DeepEqual(f.Environments, that.Environments) &&
+		reflect.DeepEqual(f.Labels, that.Labels) &&
+		reflect.DeepEqual(f.Namespaces, that.Namespaces) &&
+		reflect.DeepEqual(f.Nodes, that.Nodes) &&
+		reflect.DeepEqual(f.Owners, that.Owners) &&
+		reflect.DeepEqual(f.Pods, that.Pods) &&
+		reflect.DeepEqual(f.Products, that.Products) &&
+		reflect.DeepEqual(f.Services, that.Services) &&
+		reflect.DeepEqual(f.Teams, that.Teams)
+}
+
 // ============================================================================
 // This file contains:
 // Parsing (HTTP query params -> AllocationFilter) for V1 of filters
@@ -72,7 +135,7 @@ func parseWildcardEnd(rawFilterValue string) (string, bool) {
 	return strings.TrimSuffix(rawFilterValue, "*"), strings.HasSuffix(rawFilterValue, "*")
 }
 
-// AllocationFilterFromParamsV1 takes a set of HTTP query parameters and
+// ParseAllocationFilterV1 takes a FilterV1 struct and
 // converts them to an AllocationFilter, which is a structured in-Go
 // representation of a set of filters.
 //
@@ -85,12 +148,7 @@ func parseWildcardEnd(rawFilterValue string) (string, bool) {
 // It takes an optional ClusterMap, which if provided enables cluster name
 // filtering. This turns all `filterClusters=foo` arguments into the equivalent
 // of `clusterID = "foo" OR clusterName = "foo"`.
-func AllocationFilterFromParamsV1(
-	qp mapper.PrimitiveMapReader,
-	labelConfig *kubecost.LabelConfig,
-	clusterMap clusters.ClusterMap,
-) kubecost.AllocationFilter {
-
+func ParseAllocationFilterV1(filters FilterV1, labelConfig *kubecost.LabelConfig, clusterMap clusters.ClusterMap) kubecost.AllocationFilter {
 	filter := kubecost.AllocationFilterAnd{
 		Filters: []kubecost.AllocationFilter{},
 	}
@@ -128,15 +186,15 @@ func AllocationFilterFromParamsV1(
 	// filter structs (they evaluate to true always) there could be overhead
 	// when calling Matches() repeatedly for no purpose.
 
-	if filterClusters := qp.GetList(ParamFilterClusters, ","); len(filterClusters) > 0 {
+	if len(filters.Clusters) > 0 {
 		clustersOr := kubecost.AllocationFilterOr{
 			Filters: []kubecost.AllocationFilter{},
 		}
 
-		if idFilters := filterV1SingleValueFromList(filterClusters, kubecost.FilterClusterID); len(idFilters.Filters) > 0 {
+		if idFilters := filterV1SingleValueFromList(filters.Clusters, kubecost.FilterClusterID); len(idFilters.Filters) > 0 {
 			clustersOr.Filters = append(clustersOr.Filters, idFilters)
 		}
-		for _, rawFilterValue := range filterClusters {
+		for _, rawFilterValue := range filters.Clusters {
 			clusterNameFilter, wildcard := parseWildcardEnd(rawFilterValue)
 
 			clusterIDsToFilter := []string{}
@@ -161,27 +219,27 @@ func AllocationFilterFromParamsV1(
 		filter.Filters = append(filter.Filters, clustersOr)
 	}
 
-	if raw := qp.GetList(ParamFilterNodes, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.FilterNode))
+	if len(filters.Nodes) > 0 {
+		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(filters.Nodes, kubecost.FilterNode))
 	}
 
-	if raw := qp.GetList(ParamFilterNamespaces, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.FilterNamespace))
+	if len(filters.Namespaces) > 0 {
+		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(filters.Namespaces, kubecost.FilterNamespace))
 	}
 
-	if raw := qp.GetList(ParamFilterControllerKinds, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.FilterControllerKind))
+	if len(filters.ControllerKinds) > 0 {
+		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(filters.ControllerKinds, kubecost.FilterControllerKind))
 	}
 
 	// filterControllers= accepts controllerkind:controllername filters, e.g.
 	// "deployment:kubecost-cost-analyzer"
 	//
 	// Thus, we have to make a custom OR filter for this condition.
-	if filterControllers := qp.GetList(ParamFilterControllers, ","); len(filterControllers) > 0 {
+	if len(filters.Controllers) > 0 {
 		controllersOr := kubecost.AllocationFilterOr{
 			Filters: []kubecost.AllocationFilter{},
 		}
-		for _, rawFilterValue := range filterControllers {
+		for _, rawFilterValue := range filters.Controllers {
 			split := strings.Split(rawFilterValue, ":")
 			if len(split) == 1 {
 				filterValue, wildcard := parseWildcardEnd(split[0])
@@ -228,49 +286,49 @@ func AllocationFilterFromParamsV1(
 		}
 	}
 
-	if raw := qp.GetList(ParamFilterPods, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.FilterPod))
+	if len(filters.Pods) > 0 {
+		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(filters.Pods, kubecost.FilterPod))
 	}
 
-	if raw := qp.GetList(ParamFilterContainers, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(raw, kubecost.FilterContainer))
+	if len(filters.Containers) > 0 {
+		filter.Filters = append(filter.Filters, filterV1SingleValueFromList(filters.Containers, kubecost.FilterContainer))
 	}
 
 	// Label-mapped queries require a label config to be present.
 	if labelConfig != nil {
-		if raw := qp.GetList(ParamFilterDepartments, ","); len(raw) > 0 {
-			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(raw, labelConfig.DepartmentLabel))
+		if len(filters.Departments) > 0 {
+			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(filters.Departments, labelConfig.DepartmentLabel))
 		}
-		if raw := qp.GetList(ParamFilterEnvironments, ","); len(raw) > 0 {
-			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(raw, labelConfig.EnvironmentLabel))
+		if len(filters.Environments) > 0 {
+			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(filters.Environments, labelConfig.EnvironmentLabel))
 		}
-		if raw := qp.GetList(ParamFilterOwners, ","); len(raw) > 0 {
-			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(raw, labelConfig.OwnerLabel))
+		if len(filters.Owners) > 0 {
+			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(filters.Owners, labelConfig.OwnerLabel))
 		}
-		if raw := qp.GetList(ParamFilterProducts, ","); len(raw) > 0 {
-			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(raw, labelConfig.ProductLabel))
+		if len(filters.Products) > 0 {
+			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(filters.Products, labelConfig.ProductLabel))
 		}
-		if raw := qp.GetList(ParamFilterTeams, ","); len(raw) > 0 {
-			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(raw, labelConfig.TeamLabel))
+		if len(filters.Teams) > 0 {
+			filter.Filters = append(filter.Filters, filterV1LabelAliasMappedFromList(filters.Teams, labelConfig.TeamLabel))
 		}
 	} else {
 		log.Debugf("No label config is available. Not creating filters for label-mapped 'fields'.")
 	}
 
-	if raw := qp.GetList(ParamFilterAnnotations, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1DoubleValueFromList(raw, kubecost.FilterAnnotation))
+	if len(filters.Annotations) > 0 {
+		filter.Filters = append(filter.Filters, filterV1DoubleValueFromList(filters.Annotations, kubecost.FilterAnnotation))
 	}
 
-	if raw := qp.GetList(ParamFilterLabels, ","); len(raw) > 0 {
-		filter.Filters = append(filter.Filters, filterV1DoubleValueFromList(raw, kubecost.FilterLabel))
+	if len(filters.Labels) > 0 {
+		filter.Filters = append(filter.Filters, filterV1DoubleValueFromList(filters.Labels, kubecost.FilterLabel))
 	}
 
-	if filterServices := qp.GetList(ParamFilterServices, ","); len(filterServices) > 0 {
+	if len(filters.Services) > 0 {
 		// filterServices= is the only filter that uses the "contains" operator.
 		servicesFilter := kubecost.AllocationFilterOr{
 			Filters: []kubecost.AllocationFilter{},
 		}
-		for _, filterValue := range filterServices {
+		for _, filterValue := range filters.Services {
 			// TODO: wildcard support
 			filterValue, wildcard := parseWildcardEnd(filterValue)
 			subFilter := kubecost.AllocationFilterCondition{
@@ -289,6 +347,28 @@ func AllocationFilterFromParamsV1(
 	return filter
 }
 
+// AllocationFilterFromParamsV1 takes a set of HTTP query parameters and
+// converts them to an AllocationFilter, which is a structured in-Go
+// representation of a set of filters.
+//
+// The HTTP query parameters are the "v1" filters attached to the Allocation
+// API: "filterNamespaces=", "filterNodes=", etc.
+//
+// It takes an optional LabelConfig, which if provided enables "label-mapped"
+// filters like "filterDepartments".
+//
+// It takes an optional ClusterMap, which if provided enables cluster name
+// filtering. This turns all `filterClusters=foo` arguments into the equivalent
+// of `clusterID = "foo" OR clusterName = "foo"`.
+func AllocationFilterFromParamsV1(
+	qp mapper.PrimitiveMapReader,
+	labelConfig *kubecost.LabelConfig,
+	clusterMap clusters.ClusterMap,
+) kubecost.AllocationFilter {
+	filter := ConvertFilterQueryParams(qp, labelConfig)
+	return ParseAllocationFilterV1(filter, labelConfig, clusterMap)
+}
+
 // filterV1SingleValueFromList creates an OR of equality filters for a given
 // filter field.
 //
@@ -320,6 +400,33 @@ func filterV1SingleValueFromList(rawFilterValues []string, filterField kubecost.
 	return filter
 }
 
+func ConvertFilterQueryParams(qp mapper.PrimitiveMapReader, labelConfig *kubecost.LabelConfig) FilterV1 {
+	filter := FilterV1{
+		Annotations:     qp.GetList(ParamFilterAnnotations, ","),
+		Containers:      qp.GetList(ParamFilterContainers, ","),
+		Controllers:     qp.GetList(ParamFilterControllers, ","),
+		ControllerKinds: qp.GetList(ParamFilterControllerKinds, ","),
+		Clusters:        qp.GetList(ParamFilterClusters, ","),
+		Labels:          qp.GetList(ParamFilterLabels, ","),
+		Namespaces:      qp.GetList(ParamFilterNamespaces, ","),
+		Nodes:           qp.GetList(ParamFilterNodes, ","),
+		Pods:            qp.GetList(ParamFilterPods, ","),
+		Services:        qp.GetList(ParamFilterServices, ","),
+	}
+
+	if labelConfig != nil {
+		filter.Departments = qp.GetList(ParamFilterDepartments, ",")
+		filter.Environments = qp.GetList(ParamFilterEnvironments, ",")
+		filter.Owners = qp.GetList(ParamFilterOwners, ",")
+		filter.Products = qp.GetList(ParamFilterProducts, ",")
+		filter.Teams = qp.GetList(ParamFilterTeams, ",")
+	} else {
+		log.Debugf("No label config is available. Not creating filters for label-mapped 'fields'.")
+	}
+
+	return filter
+}
+
 // filterV1LabelAliasMappedFromList is like filterV1SingleValueFromList but is
 // explicitly for labels and annotations because "label-mapped" filters (like filterTeams=)
 // are actually label filters with a fixed label key.

+ 209 - 0
pkg/util/allocationfilterutil/queryfilters_test.go

@@ -719,3 +719,212 @@ func TestFiltersFromParamsV1(t *testing.T) {
 		})
 	}
 }
+
+type FilterV1EqualsTestcase struct {
+	name     string
+	this     FilterV1
+	that     FilterV1
+	expected bool
+}
+
+func TestFilterV1_Equals(t *testing.T) {
+	testCases := []FilterV1EqualsTestcase{
+		{
+			name: "both filters nil",
+			this: FilterV1{
+				Annotations:     nil,
+				Containers:      nil,
+				Controllers:     nil,
+				ControllerKinds: nil,
+				Clusters:        nil,
+				Departments:     nil,
+				Environments:    nil,
+				Labels:          nil,
+				Namespaces:      nil,
+				Nodes:           nil,
+				Owners:          nil,
+				Pods:            nil,
+				Products:        nil,
+				Services:        nil,
+				Teams:           nil,
+			},
+			that: FilterV1{
+				Annotations:     nil,
+				Containers:      nil,
+				Controllers:     nil,
+				ControllerKinds: nil,
+				Clusters:        nil,
+				Departments:     nil,
+				Environments:    nil,
+				Labels:          nil,
+				Namespaces:      nil,
+				Nodes:           nil,
+				Owners:          nil,
+				Pods:            nil,
+				Products:        nil,
+				Services:        nil,
+				Teams:           nil,
+			},
+			expected: true,
+		},
+		{
+			name: "both filters not nil and matching",
+			this: FilterV1{
+				Annotations:     []string{"a1", "b1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			that: FilterV1{
+				Annotations:     []string{"a1", "b1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			expected: true,
+		},
+		{
+			name: "both filters diff count",
+			this: FilterV1{
+				Annotations:     []string{"a1", "b1", "c1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			that: FilterV1{
+				Annotations:     []string{"a1", "b1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			expected: false,
+		},
+		{
+			name: "slight mismatch",
+			this: FilterV1{
+				Annotations:     []string{"x1", "b1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			that: FilterV1{
+				Annotations:     []string{"a1", "b1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			expected: false,
+		},
+		{
+			name: "one nil",
+			this: FilterV1{
+				Annotations:     []string{"x1", "b1"},
+				Containers:      []string{"a1", "b1"},
+				Controllers:     []string{"a1", "b1"},
+				ControllerKinds: []string{"a1", "b1"},
+				Clusters:        []string{"a1", "b1"},
+				Departments:     []string{"a1", "b1"},
+				Environments:    []string{"a1", "b1"},
+				Labels:          []string{"a1", "b1"},
+				Namespaces:      []string{"a1", "b1"},
+				Nodes:           []string{"a1", "b1"},
+				Owners:          []string{"a1", "b1"},
+				Pods:            []string{"a1", "b1"},
+				Products:        []string{"a1", "b1"},
+				Services:        []string{"a1", "b1"},
+				Teams:           []string{"a1", "b1"},
+			},
+			that: FilterV1{
+				Annotations:     nil,
+				Containers:      nil,
+				Controllers:     nil,
+				ControllerKinds: nil,
+				Clusters:        nil,
+				Departments:     nil,
+				Environments:    nil,
+				Labels:          nil,
+				Namespaces:      nil,
+				Nodes:           nil,
+				Owners:          nil,
+				Pods:            nil,
+				Products:        nil,
+				Services:        nil,
+				Teams:           nil,
+			},
+			expected: false,
+		},
+	}
+
+	for _, tc := range testCases {
+		got := tc.this.Equals(tc.that)
+		if got != tc.expected {
+			t.Fatalf("expected %t, got: %t for test case: %s", tc.expected, got, tc.name)
+		}
+	}
+}

+ 18 - 14
pkg/util/allocationfilterutil/v2/parser.go

@@ -15,10 +15,11 @@ import (
 // into a kubecost.AllocationFilter.
 //
 // Example queries:
-//   namespace:"kubecost"
-//   label[app]:"cost-analyzer"
-//   node!:"node1","node2"
-//   cluster:"cluster-one"+namespace!:"kube-system"
+//
+//	namespace:"kubecost"
+//	label[app]:"cost-analyzer"
+//	node!:"node1","node2"
+//	cluster:"cluster-one"+namespace!:"kube-system"
 //
 // The grammar is approximately as follows:
 //
@@ -28,18 +29,20 @@ import (
 // [1] https://docs.google.com/document/d/1HKkp2bv3mnvfQoBZlpHjfZwQ0FzDLOHKpnwV9gQ_KgU/edit?pli=1
 //
 // <filter> ::= <comparison> ('+' <comparison>)*
-//              NOTE: Language can be extended to support ORs between
-//              comparisons by adding a '|' operator in between comparisons,
-//              though precedence will have to be carefully defined and it may
-//              require adding support for ()-enclosed statements to deal with
-//              precedence.
-//              This would allow for queries like:
-//                namespace:"x"|label[app]="foo"
+//
+//	NOTE: Language can be extended to support ORs between
+//	comparisons by adding a '|' operator in between comparisons,
+//	though precedence will have to be carefully defined and it may
+//	require adding support for ()-enclosed statements to deal with
+//	precedence.
+//	This would allow for queries like:
+//	  namespace:"x"|label[app]="foo"
 //
 // <comparison> ::= <filter-key> <filter-op> <filter-value>
 //
 // <filter-key> ::= <filter-field-2> <keyed-access>
-//                | <filter-field-1>
+//
+//	| <filter-field-1>
 //
 // <filter-op> ::= ':' | '!:'
 //
@@ -48,8 +51,9 @@ import (
 // <filter-field-2> ::= 'label' | 'annotation'
 //
 // <filter-field-1> ::= 'cluster' | 'node' | 'namespace'
-//                    | 'controllerName' | 'controllerKind'
-//                    | 'container' | 'pod' | 'services'
+//
+//	| 'controllerName' | 'controllerKind'
+//	| 'container' | 'pod' | 'services'
 //
 // <keyed-access> ::= '[' <identifier> ']'
 //

+ 10 - 0
pkg/util/compat.go

@@ -45,3 +45,13 @@ func GetOperatingSystem(labels map[string]string) (string, bool) {
 		return "", false
 	}
 }
+
+func GetArchType(labels map[string]string) (string, bool) {
+	if _, ok := labels[v1.LabelArchStable]; ok {
+		return labels[v1.LabelArchStable], true
+	} else if _, ok := labels["beta.kubernetes.io/arch"]; ok {
+		return labels["beta.kubernetes.io/arch"], true
+	} else {
+		return "", false
+	}
+}

+ 56 - 0
pkg/util/compat_test.go

@@ -0,0 +1,56 @@
+package util
+
+import (
+	"testing"
+)
+
+func TestGetArchType(t *testing.T) {
+	type args struct {
+		labels map[string]string
+	}
+	tests := map[string]struct {
+		args  args
+		want  string
+		found bool
+	}{
+		"amd64 beta": {
+			args: args{
+				labels: map[string]string{
+					"beta.kubernetes.io/arch": "amd64",
+				},
+			},
+			want:  "amd64",
+			found: true,
+		},
+		"arm64 beta": {
+			args: args{
+				labels: map[string]string{
+					"beta.kubernetes.io/arch": "arm64",
+				},
+			},
+			want:  "arm64",
+			found: true,
+		},
+		"amd64": {
+			args: args{
+				labels: map[string]string{
+					"kubernetes.io/arch": "amd64",
+				},
+			},
+			want:  "amd64",
+			found: true,
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			got, found := GetArchType(tt.args.labels)
+			if found != tt.found {
+				t.Errorf("GetArchType() error = %v, wantErr %v", found, tt.found)
+				return
+			}
+			if got != tt.want {
+				t.Errorf("GetArchType() got = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}

+ 69 - 3
test/cloud_test.go

@@ -20,9 +20,10 @@ import (
 )
 
 const (
-	providerIDMap = "spec.providerID"
-	nameMap       = "metadata.name"
-	labelMapFoo   = "metadata.labels.foo"
+	providerIDMap  = "spec.providerID"
+	nameMap        = "metadata.name"
+	labelMapFoo    = "metadata.labels.foo"
+	labelMapFooBar = "metadata.labels.foo.bar"
 )
 
 func TestRegionValueFromMapField(t *testing.T) {
@@ -124,6 +125,38 @@ func TestPVPriceFromCSV(t *testing.T) {
 
 }
 
+func TestPVPriceFromCSVStorageClass(t *testing.T) {
+	nameWant := "pvc-08e1f205-d7a9-4430-90fc-7b3965a18c4d"
+	storageClassWant := "storageclass0"
+	pv := &v1.PersistentVolume{}
+	pv.Name = nameWant
+	pv.Spec.StorageClassName = storageClassWant
+
+	confMan := config.NewConfigFileManager(&config.ConfigFileManagerOpts{
+		LocalConfigPath: "./",
+	})
+
+	wantPrice := "0.1338"
+	c := &provider.CSVProvider{
+		CSVLocation: "../configs/pricing_schema_pv_storageclass.csv",
+		CustomProvider: &provider.CustomProvider{
+			Config: provider.NewProviderConfig(confMan, "../configs/default.json"),
+		},
+	}
+	c.DownloadPricingData()
+	k := c.GetPVKey(pv, make(map[string]string), "")
+	resPV, err := c.PVPricing(k)
+	if err != nil {
+		t.Errorf("Error in NodePricing: %s", err.Error())
+	} else {
+		gotPrice := resPV.Cost
+		if gotPrice != wantPrice {
+			t.Errorf("Wanted price '%s' got price '%s'", wantPrice, gotPrice)
+		}
+	}
+
+}
+
 func TestNodePriceFromCSVWithGPU(t *testing.T) {
 	providerIDWant := "providerid"
 	nameWant := "gke-standard-cluster-1-pool-1-91dc432d-cg69"
@@ -194,6 +227,39 @@ func TestNodePriceFromCSVWithGPU(t *testing.T) {
 
 }
 
+func TestNodePriceFromCSVSpecialChar(t *testing.T) {
+	nameWant := "gke-standard-cluster-1-pool-1-91dc432d-cg69"
+
+	confMan := config.NewConfigFileManager(&config.ConfigFileManagerOpts{
+		LocalConfigPath: "./",
+	})
+
+	n := &v1.Node{}
+	n.Name = nameWant
+	n.Labels = make(map[string]string)
+	n.Labels["<http://metadata.label.servers.com/label|metadata.label.servers.com/label>"] = nameWant
+
+	wantPrice := "0.133700"
+
+	c := &provider.CSVProvider{
+		CSVLocation: "../configs/pricing_schema_special_char.csv",
+		CustomProvider: &provider.CustomProvider{
+			Config: provider.NewProviderConfig(confMan, "../configs/default.json"),
+		},
+	}
+	c.DownloadPricingData()
+	k := c.GetKey(n.Labels, n)
+	resN, err := c.NodePricing(k)
+	if err != nil {
+		t.Errorf("Error in NodePricing: %s", err.Error())
+	} else {
+		gotPrice := resN.Cost
+		if gotPrice != wantPrice {
+			t.Errorf("Wanted price '%s' got price '%s'", wantPrice, gotPrice)
+		}
+	}
+}
+
 func TestNodePriceFromCSV(t *testing.T) {
 	providerIDWant := "providerid"
 	nameWant := "gke-standard-cluster-1-pool-1-91dc432d-cg69"

+ 10 - 0
ui/Dockerfile.cross

@@ -0,0 +1,10 @@
+FROM nginx:alpine
+COPY ./dist /var/www
+COPY default.nginx.conf /etc/nginx/conf.d/
+COPY nginx.conf /etc/nginx/
+
+ENV BASE_URL=/model
+
+COPY ./docker-entrypoint.sh /usr/local/bin/
+ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
+CMD ["nginx", "-g", "daemon off;"]

+ 31 - 0
ui/justfile

@@ -0,0 +1,31 @@
+default:
+    just --list
+
+build-local:
+    npm install
+
+    npx parcel build src/index.html
+
+build IMAGETAG: build-local
+    docker buildx build \
+        --rm \
+        --platform "linux/amd64" \
+        -f 'Dockerfile.cross' \
+        --provenance=false \
+        -t {{IMAGETAG}}-amd64 \
+        --push \
+        .
+
+    docker buildx build \
+        --rm \
+        --platform "linux/arm64" \
+        -f 'Dockerfile.cross' \
+        --provenance=false \
+        -t {{IMAGETAG}}-arm64 \
+        --push \
+        .
+
+    manifest-tool push from-args \
+        --platforms "linux/amd64,linux/arm64" \
+        --template {{IMAGETAG}}-ARCH \
+        --target {{IMAGETAG}}

+ 11 - 2
ui/src/Reports.js

@@ -17,6 +17,7 @@ import Subtitle from './components/Subtitle';
 import Warnings from './components/Warnings';
 import AllocationService from './services/allocation';
 import { checkCustomWindow, cumulativeToTotals, rangeToCumulative, toVerboseTimeRange } from './util';
+import { currencyCodes } from './constants/currencyCodes'
 
 const windowOptions = [
   { name: 'Today', value: 'today' },
@@ -103,6 +104,7 @@ const ReportsPage = () => {
   const [window, setWindow] = useState(windowOptions[0].value)
   const [aggregateBy, setAggregateBy] = useState(aggregationOptions[0].value)
   const [accumulate, setAccumulate] = useState(accumulateOptions[0].value)
+  const [currency, setCurrency] = useState('USD')
 
   // Report state, including current report and saved options
   const [title, setTitle] = useState('Last 7 days by namespace daily')
@@ -119,8 +121,7 @@ const ReportsPage = () => {
   const [fetch, setFetch] = useState(false)
   const [loading, setLoading] = useState(true)
   const [errors, setErrors] = useState([])
-  const [currency, setCurrency] = useState('USD')
-
+  
   // Initialize once, then fetch report each time setFetch(true) is called
   useEffect(() => {
     if (!init) {
@@ -139,6 +140,7 @@ const ReportsPage = () => {
     setWindow(searchParams.get('window') || '6d');
     setAggregateBy(searchParams.get('agg') || 'namespace');
     setAccumulate((searchParams.get('acc') === 'true') || false);
+    setCurrency(searchParams.get('currency') || 'USD');
   }, [routerLocation]);
 
   async function initialize() {
@@ -245,6 +247,13 @@ const ReportsPage = () => {
             title={title}
             cumulativeData={cumulativeData}
             currency={currency}
+            currencyOptions={currencyCodes}
+            setCurrency={(curr) => {
+              searchParams.set('currency', curr);
+              routerHistory.push({
+                search: `?${searchParams.toString()}`
+              });
+            }}
           />
         </div>
 

+ 15 - 0
ui/src/components/Controls/Edit.js

@@ -22,6 +22,7 @@ function EditControl({
   windowOptions, window, setWindow,
   aggregationOptions, aggregateBy, setAggregateBy,
   accumulateOptions, accumulate, setAccumulate,
+  currencyOptions, currency, setCurrency,
 }) {
   const classes = useStyles();
   return (
@@ -52,6 +53,20 @@ function EditControl({
           {accumulateOptions.map((opt) => <MenuItem key={opt.value} value={opt.value}>{opt.name}</MenuItem>)}
         </Select>
       </FormControl>
+      <FormControl className={classes.formControl}>
+        <InputLabel id="currency-label">Currency</InputLabel>
+        <Select
+          id="currency"
+          value={currency}
+          onChange={e => setCurrency(e.target.value)}
+        >
+          {currencyOptions?.map((currency) => (
+            <MenuItem key={currency} value={currency}>
+              {currency}
+            </MenuItem>
+          ))}
+        </Select>
+      </FormControl>
     </div>
   );
 }

+ 4 - 0
ui/src/components/Controls/index.js

@@ -16,6 +16,8 @@ const Controls = ({
   title,
   cumulativeData,
   currency,
+  currencyOptions,
+  setCurrency,
 }) => {
 
   return (
@@ -31,6 +33,8 @@ const Controls = ({
         accumulate={accumulate}
         setAccumulate={setAccumulate}
         currency={currency}
+        currencyOptions={currencyOptions}
+        setCurrency={setCurrency}
       />
 
       <DownloadControl

+ 1 - 0
ui/src/constants/currencyCodes.js

@@ -0,0 +1 @@
+export const currencyCodes = ["AED", "AFN", "ALL", "AMD", "ANG", "AOA", "ARS", "AUD", "AWG", "AZN", "BAM", "BBD", "BDT", "BGN", "BHD", "BIF", "BMD", "BND", "BOB", "BOV", "BRL", "BSD", "BTN", "BWP", "BYR", "BZD", "CAD", "CDF", "CHE", "CHF", "CHW", "CLF", "CLP", "CNY", "COP", "COU", "CRC", "CUC", "CUP", "CVE", "CZK", "DJF", "DKK", "DOP", "DZD", "EGP", "ERN", "ETB", "EUR", "FJD", "FKP", "GBP", "GEL", "GHS", "GIP", "GMD", "GNF", "GTQ", "GYD", "HKD", "HNL", "HRK", "HTG", "HUF", "IDR", "ILS", "INR", "IQD", "IRR", "ISK", "JMD", "JOD", "JPY", "KES", "KGS", "KHR", "KMF", "KPW", "KRW", "KWD", "KYD", "KZT", "LAK", "LBP", "LKR", "LRD", "LSL", "LTL", "LVL", "LYD", "MAD", "MDL", "MGA", "MKD", "MMK", "MNT", "MOP", "MRO", "MUR", "MVR", "MWK", "MXN", "MXV", "MYR", "MZN", "NAD", "NGN", "NIO", "NOK", "NPR", "NZD", "OMR", "PAB", "PEN", "PGK", "PHP", "PKR", "PLN", "PYG", "QAR", "RON", "RSD", "RUB", "RWF", "SAR", "SBD", "SCR", "SDG", "SEK", "SGD", "SHP", "SLL", "SOS", "SRD", "SSP", "STD", "SYP", "SZL", "THB", "TJS", "TMT", "TND", "TOP", "TRY", "TTD", "TWD", "TZS", "UAH", "UGX", "USD", "USN", "USS", "UYI", "UYU", "UZS", "VEF", "VND", "VUV", "WST", "XAF", "XAG", "XAU", "XBA", "XBB", "XBC", "XBD", "XCD", "XDR", "XFU", "XOF", "XPD", "XPF", "XPT", "XTS", "XXX", "YER", "ZAR", "ZMW"];