Просмотр исходного кода

Merge branch 'develop' into kaelan-ingest-uid

Kaelan Patel 4 лет назад
Родитель
Сommit
ef41f3e76a
56 измененных файлов с 2470 добавлено и 1564 удалено
  1. 12 17
      .github/PULL_REQUEST_TEMPLATE.md
  2. 1 0
      ROADMAP.md
  3. 2 5
      cmd/costmodel/main.go
  4. 92 13
      go.mod
  5. 45 22
      go.sum
  6. 78 53
      pkg/cloud/awsprovider.go
  7. 19 18
      pkg/cloud/azureprovider.go
  8. 43 37
      pkg/cloud/gcpprovider.go
  9. 9 8
      pkg/cloud/provider.go
  10. 6 8
      pkg/cloud/providerconfig.go
  11. 3 3
      pkg/clustercache/clustercache.go
  12. 2 2
      pkg/clustercache/clusterexporter.go
  13. 3 3
      pkg/clustercache/clusterimporter.go
  14. 6 6
      pkg/clustercache/watchcontroller.go
  15. 3 4
      pkg/cmd/agent/agent.go
  16. 17 9
      pkg/cmd/commands.go
  17. 2 2
      pkg/config/configfile.go
  18. 2 2
      pkg/config/configmanager.go
  19. 31 32
      pkg/costmodel/aggregation.go
  20. 333 174
      pkg/costmodel/allocation.go
  21. 157 125
      pkg/costmodel/cluster.go
  22. 20 19
      pkg/costmodel/cluster_helpers.go
  23. 9 4
      pkg/costmodel/cluster_helpers_test.go
  24. 5 6
      pkg/costmodel/clusterinfo.go
  25. 2 2
      pkg/costmodel/clusters/clustermap.go
  26. 104 103
      pkg/costmodel/costmodel.go
  27. 22 24
      pkg/costmodel/metrics.go
  28. 1 1
      pkg/costmodel/promparsers.go
  29. 65 34
      pkg/costmodel/router.go
  30. 4 5
      pkg/costmodel/settings.go
  31. 2 3
      pkg/costmodel/sql.go
  32. 10 14
      pkg/env/costmodelenv.go
  33. 177 369
      pkg/kubecost/allocation.go
  34. 45 273
      pkg/kubecost/allocation_test.go
  35. 1 1
      pkg/kubecost/allocationprops.go
  36. 72 20
      pkg/kubecost/asset.go
  37. 12 0
      pkg/kubecost/mock.go
  38. 159 0
      pkg/kubecost/query.go
  39. 9 10
      pkg/kubecost/status.go
  40. 120 44
      pkg/kubecost/summaryallocation.go
  41. 86 15
      pkg/kubecost/totals.go
  42. 26 0
      pkg/kubecost/totals_test.go
  43. 10 0
      pkg/kubecost/window.go
  44. 43 9
      pkg/log/log.go
  45. 34 18
      pkg/prom/query.go
  46. 2 2
      pkg/prom/result.go
  47. 5 5
      pkg/services/clusters/clustermanager.go
  48. 2 2
      pkg/services/clusters/clustersendpoints.go
  49. 2 2
      pkg/services/clusterservice.go
  50. 1 1
      pkg/services/services.go
  51. 57 5
      pkg/storage/s3storage.go
  52. 157 0
      pkg/util/cache/cachegroup.go
  53. 187 0
      pkg/util/cache/cachegroup_test.go
  54. 72 0
      pkg/util/interval/intervalrunner.go
  55. 2 2
      pkg/util/watcher/configwatchers.go
  56. 79 28
      ui/package-lock.json

+ 12 - 17
.github/PULL_REQUEST_TEMPLATE.md

@@ -1,25 +1,20 @@
 ## What does this PR change?
+* 
 
+## Does this PR relate to any other PRs?
+* 
 
+## How will this PR impact users?
+* 
 
-## Does this PR rely on any other PRs?
-
-- 
-- 
-
-
-## How does this PR impact users? (This is the kind of thing that goes in release notes!)
-
-
-
-## Links to Issues or ZD tickets this PR addresses or fixes
-
-- 
-- 
-
+## Does this PR address any GitHub or Zendesk issues?
+* Closes ...
 
 ## How was this PR tested?
+* 
 
+## Does this PR require changes to documentation?
+* 
 
-## Have you made an update to documentation?
-
+## Have you labeled this PR and its corresponding Issue as "next release" if it should be part of the next Kubecost release? If not, why not?
+* 

+ 1 - 0
ROADMAP.md

@@ -3,6 +3,7 @@ The following items are outstanding for the open source cost-model:
 __2022 roadmap__
 
 * Improved testing frameworks for backend APIs as well as frontend UI
+* Add conformance tests to confirm implementation meets standards
 * Deeper billing integrations with other cloud providers
 * More accessible & improved user interface
 * Improved support from open source community helm chart

+ 2 - 5
cmd/costmodel/main.go

@@ -1,17 +1,14 @@
 package main
 
 import (
-	"os"
-
 	"github.com/kubecost/cost-model/pkg/cmd"
-	"k8s.io/klog"
+	"github.com/rs/zerolog/log"
 )
 
 func main() {
 	// runs the appropriate application mode using the default cost-model command
 	// see: github.com/kubecost/cost-model/pkg/cmd package for details
 	if err := cmd.Execute(nil); err != nil {
-		klog.Fatal(err)
-		os.Exit(1)
+		log.Fatal().Err(err)
 	}
 }

+ 92 - 13
go.mod

@@ -6,11 +6,8 @@ require (
 	cloud.google.com/go v0.81.0
 	cloud.google.com/go/bigquery v1.8.0
 	github.com/Azure/azure-sdk-for-go v61.6.0+incompatible
-	github.com/Azure/go-autorest/autorest v0.11.17
-	github.com/Azure/go-autorest/autorest/azure/auth v0.5.6
-	github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect
-	github.com/Azure/go-autorest/autorest/validation v0.3.1 // indirect
-	github.com/Azure/go-autorest/tracing v0.6.0 // indirect
+	github.com/Azure/go-autorest/autorest v0.11.27
+	github.com/Azure/go-autorest/autorest/azure/auth v0.5.11
 	github.com/aws/aws-sdk-go v1.28.9
 	github.com/aws/aws-sdk-go-v2 v1.13.0
 	github.com/aws/aws-sdk-go-v2/config v1.13.1
@@ -23,34 +20,116 @@ require (
 	github.com/davecgh/go-spew v1.1.1
 	github.com/getsentry/sentry-go v0.6.1
 	github.com/goccy/go-json v0.9.4
-	github.com/gofrs/uuid v4.2.0+incompatible // indirect
 	github.com/google/uuid v1.3.0
-	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/json-iterator/go v1.1.12
 	github.com/jszwec/csvutil v1.2.1
 	github.com/julienschmidt/httprouter v1.3.0
 	github.com/lib/pq v1.2.0
 	github.com/microcosm-cc/bluemonday v1.0.16
 	github.com/minio/minio-go/v7 v7.0.15
-	github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
 	github.com/patrickmn/go-cache v2.1.0+incompatible
 	github.com/pkg/errors v0.9.1
 	github.com/prometheus/client_golang v1.0.0
 	github.com/prometheus/client_model v0.2.0
 	github.com/rs/cors v1.7.0
-	github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24 // indirect
+	github.com/rs/zerolog v1.26.1
 	github.com/spf13/cobra v1.2.1
-	github.com/spf13/pflag v1.0.5
+	github.com/spf13/viper v1.8.1
 	go.etcd.io/bbolt v1.3.5
 	golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602
 	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 	google.golang.org/api v0.44.0
-	gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
 	gopkg.in/yaml.v2 v2.4.0
 	k8s.io/api v0.20.4
 	k8s.io/apimachinery v0.20.4
 	k8s.io/client-go v0.20.4
-	k8s.io/klog v0.4.0
 	sigs.k8s.io/yaml v1.2.0
 )
 
-go 1.16
+require (
+	github.com/Azure/go-autorest v14.2.0+incompatible // indirect
+	github.com/Azure/go-autorest/autorest/adal v0.9.18 // indirect
+	github.com/Azure/go-autorest/autorest/azure/cli v0.4.5 // indirect
+	github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
+	github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect
+	github.com/Azure/go-autorest/autorest/validation v0.3.1 // indirect
+	github.com/Azure/go-autorest/logger v0.2.1 // indirect
+	github.com/Azure/go-autorest/tracing v0.6.0 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.2.0 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.10.0 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.4 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.2.0 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.3.5 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.7.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.7.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.11.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.9.0 // indirect
+	github.com/aws/smithy-go v1.10.0 // indirect
+	github.com/aymerick/douceur v0.2.0 // indirect
+	github.com/beorn7/perks v1.0.0 // indirect
+	github.com/dimchansky/utfbom v1.1.1 // indirect
+	github.com/dustin/go-humanize v1.0.0 // indirect
+	github.com/fsnotify/fsnotify v1.4.9 // indirect
+	github.com/go-logr/logr v0.2.0 // indirect
+	github.com/gofrs/uuid v4.2.0+incompatible // indirect
+	github.com/gogo/protobuf v1.3.2 // indirect
+	github.com/golang-jwt/jwt/v4 v4.4.1 // indirect
+	github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
+	github.com/golang/protobuf v1.5.2 // indirect
+	github.com/google/go-cmp v0.5.6 // indirect
+	github.com/google/gofuzz v1.1.0 // indirect
+	github.com/googleapis/gax-go/v2 v2.0.5 // indirect
+	github.com/googleapis/gnostic v0.4.1 // indirect
+	github.com/gorilla/css v1.0.0 // indirect
+	github.com/hashicorp/golang-lru v0.5.1 // indirect
+	github.com/hashicorp/hcl v1.0.0 // indirect
+	github.com/imdario/mergo v0.3.5 // indirect
+	github.com/inconshreveable/mousetrap v1.0.0 // indirect
+	github.com/jmespath/go-jmespath v0.4.0 // indirect
+	github.com/jstemmer/go-junit-report v0.9.1 // indirect
+	github.com/klauspost/compress v1.13.5 // indirect
+	github.com/klauspost/cpuid v1.3.1 // indirect
+	github.com/magiconair/properties v1.8.5 // indirect
+	github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
+	github.com/minio/md5-simd v1.1.0 // indirect
+	github.com/minio/sha256-simd v0.1.1 // indirect
+	github.com/mitchellh/go-homedir v1.1.0 // indirect
+	github.com/mitchellh/mapstructure v1.4.1 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
+	github.com/pelletier/go-toml v1.9.3 // indirect
+	github.com/prometheus/common v0.4.1 // indirect
+	github.com/prometheus/procfs v0.0.2 // indirect
+	github.com/rs/xid v1.3.0 // indirect
+	github.com/shopspring/decimal v0.0.0-20180709203117-cd690d0c9e24 // indirect
+	github.com/sirupsen/logrus v1.8.1 // indirect
+	github.com/spf13/afero v1.6.0 // indirect
+	github.com/spf13/cast v1.3.1 // indirect
+	github.com/spf13/jwalterweatherman v1.1.0 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/subosito/gotenv v1.2.0 // indirect
+	go.opencensus.io v0.23.0 // indirect
+	golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect
+	golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
+	golang.org/x/mod v0.4.2 // indirect
+	golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 // indirect
+	golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect
+	golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 // indirect
+	golang.org/x/text v0.3.6 // indirect
+	golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e // indirect
+	golang.org/x/tools v0.1.7 // indirect
+	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
+	google.golang.org/appengine v1.6.7 // indirect
+	google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect
+	google.golang.org/grpc v1.38.0 // indirect
+	google.golang.org/protobuf v1.26.0 // indirect
+	gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
+	gopkg.in/inf.v0 v0.9.1 // indirect
+	gopkg.in/ini.v1 v1.62.0 // indirect
+	k8s.io/klog/v2 v2.4.0 // indirect
+	k8s.io/utils v0.0.0-20201110183641-67b214c5f920 // indirect
+	sigs.k8s.io/structured-merge-diff/v4 v4.0.2 // indirect
+)
+
+go 1.18

+ 45 - 22
go.sum

@@ -46,27 +46,30 @@ github.com/Azure/azure-sdk-for-go v61.6.0+incompatible/go.mod h1:9XXNKU+eRnpl9mo
 github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
 github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
 github.com/Azure/go-autorest/autorest v0.11.1/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
-github.com/Azure/go-autorest/autorest v0.11.17 h1:2zCdHwNgRH+St1J+ZMf66xI8aLr/5KMy+wWLH97zwYM=
-github.com/Azure/go-autorest/autorest v0.11.17/go.mod h1:eipySxLmqSyC5s5k1CLupqet0PSENBEDP93LQ9a8QYw=
+github.com/Azure/go-autorest/autorest v0.11.24/go.mod h1:G6kyRlFnTuSbEYkQGawPfsCswgme4iYf6rfSKUDzbCc=
+github.com/Azure/go-autorest/autorest v0.11.27 h1:F3R3q42aWytozkV8ihzcgMO4OA4cuqr3bNlsEuF6//A=
+github.com/Azure/go-autorest/autorest v0.11.27/go.mod h1:7l8ybrIdUmGqZMTD0sRtAr8NvbHjfofbf8RSP2q7w7U=
 github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg=
 github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A=
-github.com/Azure/go-autorest/autorest/adal v0.9.10 h1:r6fZHMaHD8B6LDCn0o5vyBFHIHrM6Ywwx7mb49lPItI=
-github.com/Azure/go-autorest/autorest/adal v0.9.10/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A=
-github.com/Azure/go-autorest/autorest/azure/auth v0.5.6 h1:cgiBtUxatlt/e3qY6fQJioqbocWHr5osz259MomF5M0=
-github.com/Azure/go-autorest/autorest/azure/auth v0.5.6/go.mod h1:nYlP+G+n8MhD5CjIi6W8nFTIJn/PnTHes5nUbK6BxD0=
-github.com/Azure/go-autorest/autorest/azure/cli v0.4.2 h1:dMOmEJfkLKW/7JsokJqkyoYSgmR08hi9KrhjZb+JALY=
-github.com/Azure/go-autorest/autorest/azure/cli v0.4.2/go.mod h1:7qkJkT+j6b+hIpzMOwPChJhTqS8VbsqqgULzMNRugoM=
+github.com/Azure/go-autorest/autorest/adal v0.9.18 h1:kLnPsRjzZZUF3K5REu/Kc+qMQrvuza2bwSnNdhmzLfQ=
+github.com/Azure/go-autorest/autorest/adal v0.9.18/go.mod h1:XVVeme+LZwABT8K5Lc3hA4nAe8LDBVle26gTrguhhPQ=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.11 h1:P6bYXFoao05z5uhOQzbC3Qd8JqF3jUoocoTeIxkp2cA=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.11/go.mod h1:84w/uV8E37feW2NCJ08uT9VBfjfUHpgLVnG2InYD6cg=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.5 h1:0W/yGmFdTIT77fvdlGZ0LMISoLHFJ7Tx4U0yeB+uFs4=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.5/go.mod h1:ADQAXrkgm7acgWVUNamOgh8YNrv4p27l3Wc55oVfpzg=
 github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw=
 github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=
 github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
-github.com/Azure/go-autorest/autorest/mocks v0.4.1 h1:K0laFcLE6VLTOwNgSxaGbUcLPuGXlNkbVvq4cW4nIHk=
 github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/mocks v0.4.2 h1:PGN4EDXnuQbojHbU0UWoNvmu9AGVwYHG9/fkDYhtAfw=
+github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU=
 github.com/Azure/go-autorest/autorest/to v0.4.0 h1:oXVqrxakqqV1UZdSazDOPOLvOIz+XA683u8EctwboHk=
 github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE=
 github.com/Azure/go-autorest/autorest/validation v0.3.1 h1:AgyqjAd94fwNAoTjl/WQXg4VvFeRFpO+UhNyRXqF1ac=
 github.com/Azure/go-autorest/autorest/validation v0.3.1/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E=
-github.com/Azure/go-autorest/logger v0.2.0 h1:e4RVHVZKC5p6UANLJHkM4OfR1UKZPj8Wt8Pcx+3oqrE=
 github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
+github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+ZtXWSmf4Tg=
+github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
 github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
 github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
@@ -156,7 +159,6 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
 github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
 github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
-github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
 github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U=
 github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
 github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
@@ -179,9 +181,9 @@ github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod
 github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
 github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
 github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA=
-github.com/form3tech-oss/jwt-go v3.2.2+incompatible h1:TcekIExNqud5crz4xD2pavyTgWiPvpYe4Xau31I0PRk=
 github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
 github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
+github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
 github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
 github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc=
 github.com/getsentry/sentry-go v0.6.1 h1:K84dY1/57OtWhdyr5lbU78Q/+qgzkEyGc/ud+Sipi5k=
@@ -222,6 +224,10 @@ github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7a
 github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
+github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
+github.com/golang-jwt/jwt/v4 v4.4.1 h1:pC5DB52sCeK48Wlb9oPcdhnjkz1TKt1D/P7WKJ0kUcQ=
+github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
@@ -323,6 +329,7 @@ github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA
 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
 github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+dAcgU=
 github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
+github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
 github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
 github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
 github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
@@ -347,7 +354,6 @@ github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGw
 github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
-github.com/json-iterator/go v1.1.11 h1:uVUAXhF2To8cbw/3xN3pxj6kk7TYKs98NIrTqPlMWAQ=
 github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
@@ -394,6 +400,7 @@ github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL
 github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0=
 github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
+github.com/magiconair/properties v1.8.5 h1:b6kJs+EmPFMYGkow9GiUyCyOvIwYetYJ3fSaWak/Gls=
 github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
 github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
@@ -427,12 +434,12 @@ github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS4
 github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY=
 github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
 github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
+github.com/mitchellh/mapstructure v1.4.1 h1:CpVNEelQCZBooIPDn+AR3NpivK/TIKU8bDxdASFVQag=
 github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
-github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
@@ -456,6 +463,7 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FI
 github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
 github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
+github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ=
 github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
 github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
@@ -485,8 +493,11 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
 github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
-github.com/rs/xid v1.2.1 h1:mhH9Nq+C1fY2l1XIpgxIiUOfNpRBYH1kKcr+qfKgjRc=
 github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
+github.com/rs/xid v1.3.0 h1:6NjYksEUlhurdVehpc7S7dk6DAmcKv8V9gG0FsVN2U4=
+github.com/rs/xid v1.3.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.26.1 h1:/ihwxqH+4z8UxyI70wM1z9yCvkWcfz/a3mj48k/Zngc=
+github.com/rs/zerolog v1.26.1/go.mod h1:/wSSJWX7lVrsOwlbyTRSOJvqRlc+WjWlfes+CiJ+tmc=
 github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
@@ -505,19 +516,23 @@ github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIK
 github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
 github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
 github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
+github.com/spf13/afero v1.6.0 h1:xoax2sJ2DT8S8xA2paPFjDCScCNeWsg75VG0DLRreiY=
 github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
 github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
+github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
 github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
 github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
 github.com/spf13/cobra v1.2.1 h1:+KmjbUw1hriSNMF55oPrkZcb27aECyrj8V2ytv7kWDw=
 github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
 github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
+github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
 github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
 github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
+github.com/spf13/viper v1.8.1 h1:Kq1fyeebqsBfbjZj4EL7gj2IO0mMaiyjYUWcUsl2O44=
 github.com/spf13/viper v1.8.1/go.mod h1:o0Pch8wJ9BVSWGQMbra6iw0oQ5oktSIBaujf1rJH9Ns=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -529,6 +544,7 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
 github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
 github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
 github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
@@ -552,6 +568,7 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
 go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
 go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
 go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
@@ -581,8 +598,11 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20201216223049-8b5274cf687f/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
-golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad h1:DN0cp81fZ3njFcrLCytUHRSUkqBjfTo4Tx9RJTWs0EY=
-golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.0.0-20211215165025-cf75a172585e/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
+golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA=
+golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -662,8 +682,10 @@ golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc=
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
-golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
 golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE=
+golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -741,8 +763,10 @@ golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e h1:WUoyKPm6nCo1BnNUvPGnFG3T5DUVem42yDJZZ4CNxMA=
+golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -814,8 +838,9 @@ golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
-golang.org/x/tools v0.1.2 h1:kRBLX7v7Af8W7Gdbbc908OJcdgtK8bOz9Uaj8/F1ACA=
 golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/tools v0.1.7 h1:6j8CgantCy3yc8JGBqkDLMKWqZ0RDU2g1HVgacojGWQ=
+golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -969,8 +994,6 @@ k8s.io/apimachinery v0.20.4/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRp
 k8s.io/client-go v0.20.4 h1:85crgh1IotNkLpKYKZHVNI1JT86nr/iDCvq2iWKsql4=
 k8s.io/client-go v0.20.4/go.mod h1:LiMv25ND1gLUdBeYxBIwKpkSC5IsozMMmOOeSJboP+k=
 k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
-k8s.io/klog v0.4.0 h1:lCJCxf/LIowc2IGS9TPjWDyXY4nOmdGdfcwwDQCOURQ=
-k8s.io/klog v0.4.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
 k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE=
 k8s.io/klog/v2 v2.4.0 h1:7+X0fUguPyrKEC4WjH8iGDg3laWgMo5tMnRTIGTTxGQ=
 k8s.io/klog/v2 v2.4.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y=

+ 78 - 53
pkg/cloud/awsprovider.go

@@ -6,7 +6,6 @@ import (
 	"context"
 	"encoding/csv"
 	"fmt"
-
 	"io"
 	"io/ioutil"
 	"net/http"
@@ -16,8 +15,6 @@ import (
 	"sync"
 	"time"
 
-	"k8s.io/klog"
-
 	"github.com/kubecost/cost-model/pkg/clustercache"
 	"github.com/kubecost/cost-model/pkg/env"
 	"github.com/kubecost/cost-model/pkg/errors"
@@ -56,23 +53,32 @@ func (aws *AWS) PricingSourceStatus() map[string]*PricingSource {
 	sources := make(map[string]*PricingSource)
 
 	sps := &PricingSource{
-		Name: SpotPricingSource,
+		Name:    SpotPricingSource,
+		Enabled: true,
 	}
-	sps.Error = ""
-	if aws.SpotPricingError != nil {
-		sps.Error = aws.SpotPricingError.Error()
-	}
-	if sps.Error != "" {
+
+	if !aws.SpotRefreshEnabled() {
 		sps.Available = false
-	} else if len(aws.SpotPricingByInstanceID) > 0 {
-		sps.Available = true
+		sps.Error = "Spot instances not set up"
+		sps.Enabled = false
 	} else {
-		sps.Error = "No spot instances detected"
+		sps.Error = ""
+		if aws.SpotPricingError != nil {
+			sps.Error = aws.SpotPricingError.Error()
+		}
+		if sps.Error != "" {
+			sps.Available = false
+		} else if len(aws.SpotPricingByInstanceID) > 0 {
+			sps.Available = true
+		} else {
+			sps.Error = "No spot instances detected"
+		}
 	}
 	sources[SpotPricingSource] = sps
 
 	rps := &PricingSource{
-		Name: ReservedInstancePricingSource,
+		Name:    ReservedInstancePricingSource,
+		Enabled: true,
 	}
 	rps.Error = ""
 	if aws.RIPricingError != nil {
@@ -394,6 +400,7 @@ type AwsAthenaInfo struct {
 	AthenaRegion     string `json:"athenaRegion"`
 	AthenaDatabase   string `json:"athenaDatabase"`
 	AthenaTable      string `json:"athenaTable"`
+	AthenaWorkgroup  string `json:"athenaWorkgroup"`
 	ServiceKeyName   string `json:"serviceKeyName"`
 	ServiceKeySecret string `json:"serviceKeySecret"`
 	AccountID        string `json:"projectID"`
@@ -406,6 +413,7 @@ func (aai *AwsAthenaInfo) IsEmpty() bool {
 		aai.AthenaRegion == "" &&
 		aai.AthenaDatabase == "" &&
 		aai.AthenaTable == "" &&
+		aai.AthenaWorkgroup == "" &&
 		aai.ServiceKeyName == "" &&
 		aai.ServiceKeySecret == "" &&
 		aai.AccountID == "" &&
@@ -505,6 +513,7 @@ func (aws *AWS) GetAWSAthenaInfo() (*AwsAthenaInfo, error) {
 		AthenaRegion:     config.AthenaRegion,
 		AthenaDatabase:   config.AthenaDatabase,
 		AthenaTable:      config.AthenaTable,
+		AthenaWorkgroup:  config.AthenaWorkgroup,
 		ServiceKeyName:   aak.AccessKeyID,
 		ServiceKeySecret: aak.SecretAccessKey,
 		AccountID:        config.AthenaProjectID,
@@ -546,6 +555,7 @@ func (aws *AWS) UpdateConfig(r io.Reader, updateType string) (*CustomPricing, er
 			c.AthenaRegion = aai.AthenaRegion
 			c.AthenaDatabase = aai.AthenaDatabase
 			c.AthenaTable = aai.AthenaTable
+			c.AthenaWorkgroup = aai.AthenaWorkgroup
 			c.ServiceKeyName = aai.ServiceKeyName
 			if aai.ServiceKeySecret != "" {
 				c.ServiceKeySecret = aai.ServiceKeySecret
@@ -602,7 +612,7 @@ func (k *awsKey) ID() string {
 			return group
 		}
 	}
-	klog.V(3).Infof("Could not find instance ID in \"%s\"", k.ProviderID)
+	log.Warnf("Could not find instance ID in \"%s\"", k.ProviderID)
 	return ""
 }
 
@@ -627,7 +637,7 @@ func (k *awsKey) Features() string {
 func (aws *AWS) PVPricing(pvk PVKey) (*PV, error) {
 	pricing, ok := aws.Pricing[pvk.Features()]
 	if !ok {
-		klog.V(4).Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
+		log.Debugf("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
 		return &PV{}, nil
 	}
 	return pricing.PV, nil
@@ -681,7 +691,7 @@ func (key *awsPVKey) Features() string {
 	}
 	class, ok := volTypes[storageClass]
 	if !ok {
-		klog.V(4).Infof("No voltype mapping for %s's storageClass: %s", key.Name, storageClass)
+		log.Debugf("No voltype mapping for %s's storageClass: %s", key.Name, storageClass)
 	}
 	return region + "," + class
 }
@@ -742,22 +752,28 @@ func (aws *AWS) getRegionPricing(nodeList []*v1.Node) (*http.Response, string, e
 
 	pricingURL += "index.json"
 
-	klog.V(2).Infof("starting download of \"%s\", which is quite large ...", pricingURL)
+	log.Infof("starting download of \"%s\", which is quite large ...", pricingURL)
 	resp, err := http.Get(pricingURL)
 	if err != nil {
-		klog.V(2).Infof("Bogus fetch of \"%s\": %v", pricingURL, err)
+		log.Errorf("Bogus fetch of \"%s\": %v", pricingURL, err)
 		return nil, pricingURL, err
 	}
 	return resp, pricingURL, err
 }
 
+// SpotRefreshEnabled determines whether the required configs to run the spot feed query have been set up
+func (aws *AWS) SpotRefreshEnabled() bool {
+	// Need a valid value for at least one of these fields to consider spot pricing as enabled
+	return len(aws.SpotDataBucket) != 0 || len(aws.SpotDataRegion) != 0 || len(aws.ProjectID) != 0
+}
+
 // DownloadPricingData fetches data from the AWS Pricing API
 func (aws *AWS) DownloadPricingData() error {
 	aws.DownloadPricingDataLock.Lock()
 	defer aws.DownloadPricingDataLock.Unlock()
 	c, err := aws.Config.GetCustomPricingData()
 	if err != nil {
-		klog.V(1).Infof("Error downloading default pricing data: %s", err.Error())
+		log.Errorf("Error downloading default pricing data: %s", err.Error())
 	}
 	aws.BaseCPUPrice = c.CPU
 	aws.BaseRAMPrice = c.RAM
@@ -775,7 +791,7 @@ func (aws *AWS) DownloadPricingData() error {
 	aws.ConfigureAuthWith(c) // load aws authentication from configuration or secret
 
 	if len(aws.SpotDataBucket) != 0 && len(aws.ProjectID) == 0 {
-		klog.V(1).Infof("using SpotDataBucket \"%s\" without ProjectID will not end well", aws.SpotDataBucket)
+		log.Warnf("using SpotDataBucket \"%s\" without ProjectID will not end well", aws.SpotDataBucket)
 	}
 	nodeList := aws.Clientset.GetAllNodes()
 
@@ -810,7 +826,7 @@ func (aws *AWS) DownloadPricingData() error {
 	for _, pv := range pvList {
 		params, ok := storageClassMap[pv.Spec.StorageClassName]
 		if !ok {
-			klog.V(2).Infof("Unable to find params for storageClassName %s, falling back to default pricing", pv.Spec.StorageClassName)
+			log.Infof("Unable to find params for storageClassName %s, falling back to default pricing", pv.Spec.StorageClassName)
 			continue
 		}
 		key := aws.GetPVKey(pv, params, "")
@@ -822,18 +838,18 @@ func (aws *AWS) DownloadPricingData() error {
 	if !aws.RIDataRunning {
 		err = aws.GetReservationDataFromAthena() // Block until one run has completed.
 		if err != nil {
-			klog.V(1).Infof("Failed to lookup reserved instance data: %s", err.Error())
+			log.Errorf("Failed to lookup reserved instance data: %s", err.Error())
 		} else { // If we make one successful run, check on new reservation data every hour
 			go func() {
 				defer errors.HandlePanic()
 				aws.RIDataRunning = true
 
 				for {
-					klog.Infof("Reserved Instance watcher running... next update in 1h")
+					log.Infof("Reserved Instance watcher running... next update in 1h")
 					time.Sleep(time.Hour)
 					err := aws.GetReservationDataFromAthena()
 					if err != nil {
-						klog.Infof("Error updating RI data: %s", err.Error())
+						log.Infof("Error updating RI data: %s", err.Error())
 					}
 				}
 			}()
@@ -842,17 +858,17 @@ func (aws *AWS) DownloadPricingData() error {
 	if !aws.SavingsPlanDataRunning {
 		err = aws.GetSavingsPlanDataFromAthena()
 		if err != nil {
-			klog.V(1).Infof("Failed to lookup savings plan data: %s", err.Error())
+			log.Errorf("Failed to lookup savings plan data: %s", err.Error())
 		} else {
 			go func() {
 				defer errors.HandlePanic()
 				aws.SavingsPlanDataRunning = true
 				for {
-					klog.Infof("Savings Plan watcher running... next update in 1h")
+					log.Infof("Savings Plan watcher running... next update in 1h")
 					time.Sleep(time.Hour)
 					err := aws.GetSavingsPlanDataFromAthena()
 					if err != nil {
-						klog.Infof("Error updating Savings Plan data: %s", err.Error())
+						log.Infof("Error updating Savings Plan data: %s", err.Error())
 					}
 				}
 			}()
@@ -871,10 +887,10 @@ func (aws *AWS) DownloadPricingData() error {
 	for {
 		t, err := dec.Token()
 		if err == io.EOF {
-			klog.V(2).Infof("done loading \"%s\"\n", pricingURL)
+			log.Infof("done loading \"%s\"\n", pricingURL)
 			break
 		} else if err != nil {
-			klog.V(2).Infof("error parsing response json %v", resp.Body)
+			log.Errorf("error parsing response json %v", resp.Body)
 			break
 		}
 		if t == "products" {
@@ -891,7 +907,7 @@ func (aws *AWS) DownloadPricingData() error {
 
 				err = dec.Decode(&product)
 				if err != nil {
-					klog.V(1).Infof("Error parsing response from \"%s\": %v", pricingURL, err.Error())
+					log.Errorf("Error parsing response from \"%s\": %v", pricingURL, err.Error())
 					break
 				}
 
@@ -968,7 +984,7 @@ func (aws *AWS) DownloadPricingData() error {
 					offerTerm := &AWSOfferTerm{}
 					err = dec.Decode(&offerTerm)
 					if err != nil {
-						klog.V(1).Infof("Error decoding AWS Offer Term: " + err.Error())
+						log.Errorf("Error decoding AWS Offer Term: " + err.Error())
 					}
 
 					key, ok := skusToKeys[sku.(string)]
@@ -1009,7 +1025,11 @@ func (aws *AWS) DownloadPricingData() error {
 			}
 		}
 	}
-	klog.V(2).Infof("Finished downloading \"%s\"", pricingURL)
+	log.Infof("Finished downloading \"%s\"", pricingURL)
+
+	if !aws.SpotRefreshEnabled() {
+		return nil
+	}
 
 	// Always run spot pricing refresh when performing download
 	aws.refreshSpotPricing(true)
@@ -1022,7 +1042,7 @@ func (aws *AWS) DownloadPricingData() error {
 			defer errors.HandlePanic()
 
 			for {
-				klog.Infof("Spot Pricing Refresh scheduled in %.2f minutes.", SpotRefreshDuration.Minutes())
+				log.Infof("Spot Pricing Refresh scheduled in %.2f minutes.", SpotRefreshDuration.Minutes())
 				time.Sleep(SpotRefreshDuration)
 
 				// Reoccurring refresh checks update times
@@ -1048,7 +1068,7 @@ func (aws *AWS) refreshSpotPricing(force bool) {
 
 	sp, err := aws.parseSpotData(aws.SpotDataBucket, aws.SpotDataPrefix, aws.ProjectID, aws.SpotDataRegion)
 	if err != nil {
-		klog.V(1).Infof("Skipping AWS spot data download: %s", err.Error())
+		log.Warnf("Skipping AWS spot data download: %s", err.Error())
 		aws.SpotPricingError = err
 		return
 	}
@@ -1145,7 +1165,7 @@ func (aws *AWS) createNode(terms *AWSProductTerms, usageType string, k Key) (*No
 		if len(arr) == 2 {
 			spotcost = arr[0]
 		} else {
-			klog.V(2).Infof("Spot data for node %s is missing", k.ID())
+			log.Infof("Spot data for node %s is missing", k.ID())
 		}
 		return &Node{
 			Cost:         spotcost,
@@ -1301,11 +1321,11 @@ func (awsProvider *AWS) ClusterInfo() (map[string]string, error) {
 
 	maybeClusterId := env.GetAWSClusterID()
 	if len(maybeClusterId) != 0 {
-		klog.V(2).Infof("Returning \"%s\" as ClusterName", maybeClusterId)
+		log.Infof("Returning \"%s\" as ClusterName", maybeClusterId)
 		return makeStructure(maybeClusterId)
 	}
 
-	klog.V(2).Infof("Unable to sniff out cluster ID, perhaps set $%s to force one", env.AWSClusterIDEnvVar)
+	log.Infof("Unable to sniff out cluster ID, perhaps set $%s to force one", env.AWSClusterIDEnvVar)
 	return makeStructure(defaultClusterName)
 }
 
@@ -1313,7 +1333,7 @@ func (awsProvider *AWS) ClusterInfo() (map[string]string, error) {
 func (aws *AWS) ConfigureAuth() error {
 	c, err := aws.Config.GetCustomPricingData()
 	if err != nil {
-		klog.V(1).Infof("Error downloading default pricing data: %s", err.Error())
+		log.Errorf("Error downloading default pricing data: %s", err.Error())
 	}
 	return aws.ConfigureAuthWith(c)
 }
@@ -1596,6 +1616,11 @@ func (aws *AWS) QueryAthenaPaginated(ctx context.Context, query string, fn func(
 		ResultConfiguration:   resultConfiguration,
 	}
 
+	// Only set if there is a value, the default input is nil which defaults to the 'primary' workgroup
+	if awsAthenaInfo.AthenaWorkgroup != "" {
+		startQueryExecutionInput.WorkGroup = awsSDK.String(awsAthenaInfo.AthenaWorkgroup)
+	}
+
 	// Create Athena Client
 	cfg, err := awsAthenaInfo.CreateConfig()
 	if err != nil {
@@ -1712,7 +1737,7 @@ func (aws *AWS) GetSavingsPlanDataFromAthena() error {
 			}
 			cost, err := strconv.ParseFloat(*r.Data[3].VarCharValue, 64)
 			if err != nil {
-				klog.Infof("Error converting `%s` from float ", *r.Data[3].VarCharValue)
+				log.Infof("Error converting `%s` from float ", *r.Data[3].VarCharValue)
 			}
 			r := &SavingsPlanData{
 				ResourceID:     *r.Data[2].VarCharValue,
@@ -1722,7 +1747,7 @@ func (aws *AWS) GetSavingsPlanDataFromAthena() error {
 			}
 			aws.SavingsPlanDataByInstanceID[r.ResourceID] = r
 		}
-		klog.V(1).Infof("Found %d savings plan applied instances", len(aws.SavingsPlanDataByInstanceID))
+		log.Debugf("Found %d savings plan applied instances", len(aws.SavingsPlanDataByInstanceID))
 		for k, r := range aws.SavingsPlanDataByInstanceID {
 			log.DedupedInfof(5, "Savings Plan Instance Data found for node %s : %f at time %s", k, r.EffectiveCost, r.MostRecentDate)
 		}
@@ -1732,7 +1757,7 @@ func (aws *AWS) GetSavingsPlanDataFromAthena() error {
 
 	query := fmt.Sprintf(q, cfg.AthenaTable, start, end)
 
-	klog.V(3).Infof("Running Query: %s", query)
+	log.Debugf("Running Query: %s", query)
 
 	err = aws.QueryAthenaPaginated(context.TODO(), query, processResults)
 	if err != nil {
@@ -1814,7 +1839,7 @@ func (aws *AWS) GetReservationDataFromAthena() error {
 			}
 			cost, err := strconv.ParseFloat(*r.Data[3].VarCharValue, 64)
 			if err != nil {
-				klog.Infof("Error converting `%s` from float ", *r.Data[3].VarCharValue)
+				log.Infof("Error converting `%s` from float ", *r.Data[3].VarCharValue)
 			}
 			r := &RIData{
 				ResourceID:     *r.Data[2].VarCharValue,
@@ -1824,7 +1849,7 @@ func (aws *AWS) GetReservationDataFromAthena() error {
 			}
 			aws.RIPricingByInstanceID[r.ResourceID] = r
 		}
-		klog.V(1).Infof("Found %d reserved instances", len(aws.RIPricingByInstanceID))
+		log.Debugf("Found %d reserved instances", len(aws.RIPricingByInstanceID))
 		for k, r := range aws.RIPricingByInstanceID {
 			log.DedupedInfof(5, "Reserved Instance Data found for node %s : %f at time %s", k, r.EffectiveCost, r.MostRecentDate)
 		}
@@ -1834,7 +1859,7 @@ func (aws *AWS) GetReservationDataFromAthena() error {
 
 	query := fmt.Sprintf(q, cfg.AthenaTable, start, end)
 
-	klog.V(3).Infof("Running Query: %s", query)
+	log.Debugf("Running Query: %s", query)
 
 	err = aws.QueryAthenaPaginated(context.TODO(), query, processResults)
 	if err != nil {
@@ -1945,18 +1970,18 @@ func (aws *AWS) parseSpotData(bucket string, prefix string, projectID string, re
 		})
 	}
 	lsoLen := len(lso.Contents)
-	klog.V(2).Infof("Found %d spot data files from yesterday", lsoLen)
+	log.Debugf("Found %d spot data files from yesterday", lsoLen)
 	if lsoLen == 0 {
-		klog.V(5).Infof("ListObjects \"s3://%s/%s\" produced no keys", *ls.Bucket, *ls.Prefix)
+		log.Debugf("ListObjects \"s3://%s/%s\" produced no keys", *ls.Bucket, *ls.Prefix)
 	}
 	lso2, err := cli.ListObjects(context.TODO(), ls2)
 	if err != nil {
 		return nil, err
 	}
 	lso2Len := len(lso2.Contents)
-	klog.V(2).Infof("Found %d spot data files from today", lso2Len)
+	log.Debugf("Found %d spot data files from today", lso2Len)
 	if lso2Len == 0 {
-		klog.V(5).Infof("ListObjects \"s3://%s/%s\" produced no keys", *ls2.Bucket, *ls2.Prefix)
+		log.Debugf("ListObjects \"s3://%s/%s\" produced no keys", *ls2.Bucket, *ls2.Prefix)
 	}
 
 	// TODO: Worth it to use LastModifiedDate to determine if we should reparse the spot data?
@@ -2028,17 +2053,17 @@ func (aws *AWS) parseSpotData(bucket string, prefix string, projectID string, re
 				// the first of which is "#Version"
 				// the second of which is "#Fields: "
 				if len(rec) != 1 {
-					klog.V(2).Infof("Expected %d spot info fields but received %d: %s", fieldsPerRecord, len(rec), rec)
+					log.Infof("Expected %d spot info fields but received %d: %s", fieldsPerRecord, len(rec), rec)
 					continue
 				}
 				if len(foundVersion) == 0 {
 					spotFeedVersion := rec[0]
-					klog.V(4).Infof("Spot feed version is \"%s\"", spotFeedVersion)
+					log.Debugf("Spot feed version is \"%s\"", spotFeedVersion)
 					matches := versionRx.FindStringSubmatch(spotFeedVersion)
 					if matches != nil {
 						foundVersion = matches[1]
 						if foundVersion != supportedSpotFeedVersion {
-							klog.V(2).Infof("Unsupported spot info feed version: wanted \"%s\" got \"%s\"", supportedSpotFeedVersion, foundVersion)
+							log.Infof("Unsupported spot info feed version: wanted \"%s\" got \"%s\"", supportedSpotFeedVersion, foundVersion)
 							break
 						}
 					}
@@ -2046,11 +2071,11 @@ func (aws *AWS) parseSpotData(bucket string, prefix string, projectID string, re
 				} else if strings.Index(rec[0], "#") == 0 {
 					continue
 				} else {
-					klog.V(3).Infof("skipping non-TSV line: %s", rec)
+					log.Infof("skipping non-TSV line: %s", rec)
 					continue
 				}
 			} else if err != nil {
-				klog.V(2).Infof("Error during spot info decode: %+v", err)
+				log.Warnf("Error during spot info decode: %+v", err)
 				continue
 			}
 

+ 19 - 18
pkg/cloud/azureprovider.go

@@ -13,10 +13,9 @@ import (
 	"sync"
 	"time"
 
-	"github.com/kubecost/cost-model/pkg/log"
-
 	"github.com/kubecost/cost-model/pkg/clustercache"
 	"github.com/kubecost/cost-model/pkg/env"
+	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/util"
 	"github.com/kubecost/cost-model/pkg/util/fileutil"
 	"github.com/kubecost/cost-model/pkg/util/json"
@@ -28,7 +27,6 @@ import (
 	"github.com/Azure/go-autorest/autorest/azure"
 	"github.com/Azure/go-autorest/autorest/azure/auth"
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/klog"
 )
 
 const (
@@ -196,7 +194,7 @@ func getRegions(service string, subscriptionsClient subscriptions.Client, provid
 						if loc, ok := allLocations[displName]; ok {
 							supLocations[loc] = displName
 						} else {
-							klog.V(1).Infof("unsupported cloud region %s", loc)
+							log.Warnf("unsupported cloud region %s", loc)
 						}
 					}
 					break
@@ -214,7 +212,7 @@ func getRegions(service string, subscriptionsClient subscriptions.Client, provid
 						if loc, ok := allLocations[displName]; ok {
 							supLocations[loc] = displName
 						} else {
-							klog.V(1).Infof("unsupported cloud region %s", loc)
+							log.Warnf("unsupported cloud region %s", loc)
 						}
 					}
 					break
@@ -468,6 +466,7 @@ type AzureStorageConfig struct {
 	AccountName    string `json:"azureStorageAccount"`
 	AccessKey      string `json:"azureStorageAccessKey"`
 	ContainerName  string `json:"azureStorageContainer"`
+	ContainerPath  string `json:"azureContainerPath"`
 	AzureCloud     string `json:"azureCloud"`
 }
 
@@ -477,6 +476,7 @@ func (asc *AzureStorageConfig) IsEmpty() bool {
 		asc.AccountName == "" &&
 		asc.AccessKey == "" &&
 		asc.ContainerName == "" &&
+		asc.ContainerPath == "" &&
 		asc.AzureCloud == ""
 }
 
@@ -626,7 +626,7 @@ func (az *Azure) loadAzureStorageConfig(force bool) (*AzureStorageConfig, error)
 func (az *Azure) GetKey(labels map[string]string, n *v1.Node) Key {
 	cfg, err := az.GetConfig()
 	if err != nil {
-		klog.Infof("Error loading azure custom pricing information")
+		log.Infof("Error loading azure custom pricing information")
 	}
 	// azure defaults, see https://docs.microsoft.com/en-us/azure/aks/gpu-cluster
 	gpuLabel := "accelerator"
@@ -784,17 +784,17 @@ func (az *Azure) DownloadPricingData() error {
 
 	rateCardFilter := fmt.Sprintf("OfferDurableId eq '%s' and Currency eq '%s' and Locale eq 'en-US' and RegionInfo eq '%s'", config.AzureOfferDurableID, config.CurrencyCode, config.AzureBillingRegion)
 
-	klog.Infof("Using ratecard query %s", rateCardFilter)
+	log.Infof("Using ratecard query %s", rateCardFilter)
 	result, err := rcClient.Get(context.TODO(), rateCardFilter)
 	if err != nil {
-		klog.Warningf("Error in pricing download query from API")
+		log.Warnf("Error in pricing download query from API")
 		az.RateCardPricingError = err
 		return err
 	}
 
 	regions, err := getRegions("compute", sClient, providersClient, config.AzureSubscriptionID)
 	if err != nil {
-		klog.Warningf("Error in pricing download regions from API")
+		log.Warnf("Error in pricing download regions from API")
 		az.RateCardPricingError = err
 		return err
 	}
@@ -832,7 +832,7 @@ func (az *Azure) DownloadPricingData() error {
 						var priceInUsd float64
 
 						if len(v.MeterRates) < 1 {
-							klog.V(1).Infof("missing rate info %+v", map[string]interface{}{"MeterSubCategory": *v.MeterSubCategory, "region": region})
+							log.Warnf("missing rate info %+v", map[string]interface{}{"MeterSubCategory": *v.MeterSubCategory, "region": region})
 							continue
 						}
 						for _, rate := range v.MeterRates {
@@ -843,7 +843,7 @@ func (az *Azure) DownloadPricingData() error {
 						priceStr := fmt.Sprintf("%f", pricePerHour)
 
 						key := region + "," + storageClass
-						klog.V(4).Infof("Adding PV.Key: %s, Cost: %s", key, priceStr)
+						log.Debugf("Adding PV.Key: %s, Cost: %s", key, priceStr)
 						allPrices[key] = &AzurePricing{
 							PV: &PV{
 								Cost:   priceStr,
@@ -881,7 +881,7 @@ func (az *Azure) DownloadPricingData() error {
 				var priceInUsd float64
 
 				if len(v.MeterRates) < 1 {
-					klog.V(1).Infof("missing rate info %+v", map[string]interface{}{"MeterSubCategory": *v.MeterSubCategory, "region": region})
+					log.Warnf("missing rate info %+v", map[string]interface{}{"MeterSubCategory": *v.MeterSubCategory, "region": region})
 					continue
 				}
 				for _, rate := range v.MeterRates {
@@ -910,7 +910,7 @@ func (az *Azure) DownloadPricingData() error {
 	zeroPrice := "0.0"
 	for region := range regions {
 		key := region + "," + AzureFileStandardStorageClass
-		klog.V(4).Infof("Adding PV.Key: %s, Cost: %s", key, zeroPrice)
+		log.Debugf("Adding PV.Key: %s, Cost: %s", key, zeroPrice)
 		allPrices[key] = &AzurePricing{
 			PV: &PV{
 				Cost:   zeroPrice,
@@ -1009,13 +1009,13 @@ func (az *Azure) NodePricing(key Key) (*Node, error) {
 	}
 
 	if n, ok := az.Pricing[azKey.Features()]; ok {
-		klog.V(4).Infof("Returning pricing for node %s: %+v from key %s", azKey, n, azKey.Features())
+		log.Debugf("Returning pricing for node %s: %+v from key %s", azKey, n, azKey.Features())
 		if azKey.isValidGPUNode() {
 			n.Node.GPU = azKey.GetGPUCount()
 		}
 		return n.Node, nil
 	}
-	klog.V(1).Infof("[Warning] no pricing data found for %s: %s", azKey.Features(), azKey)
+	log.Warnf("no pricing data found for %s: %s", azKey.Features(), azKey)
 	c, err := az.GetConfig()
 	if err != nil {
 		return nil, fmt.Errorf("No default pricing data available")
@@ -1244,7 +1244,7 @@ func (az *Azure) PVPricing(pvk PVKey) (*PV, error) {
 
 	pricing, ok := az.Pricing[pvk.Features()]
 	if !ok {
-		klog.V(4).Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
+		log.Debugf("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
 		return &PV{}, nil
 	}
 	return pricing.PV, nil
@@ -1268,8 +1268,9 @@ func (az *Azure) PricingSourceStatus() map[string]*PricingSource {
 		errMsg = az.RateCardPricingError.Error()
 	}
 	rcps := &PricingSource{
-		Name:  rateCardPricingSource,
-		Error: errMsg,
+		Name:    rateCardPricingSource,
+		Enabled: true,
+		Error:   errMsg,
 	}
 	if rcps.Error != "" {
 		rcps.Available = false

+ 43 - 37
pkg/cloud/gcpprovider.go

@@ -20,6 +20,7 @@ import (
 	"github.com/kubecost/cost-model/pkg/util/fileutil"
 	"github.com/kubecost/cost-model/pkg/util/json"
 	"github.com/kubecost/cost-model/pkg/util/timeutil"
+	"github.com/rs/zerolog"
 
 	"cloud.google.com/go/bigquery"
 	"cloud.google.com/go/compute/metadata"
@@ -27,7 +28,6 @@ import (
 	"golang.org/x/oauth2/google"
 	compute "google.golang.org/api/compute/v1"
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/klog"
 )
 
 const GKE_GPU_TAG = "cloud.google.com/gke-accelerator"
@@ -194,7 +194,7 @@ func (*GCP) loadGCPAuthSecret() {
 	keyPath := path + "key.json"
 	keyExists, _ := fileutil.FileExists(keyPath)
 	if keyExists {
-		klog.V(1).Infof("GCP Auth Key already exists, no need to load from secret")
+		log.Info("GCP Auth Key already exists, no need to load from secret")
 		return
 	}
 
@@ -205,19 +205,19 @@ func (*GCP) loadGCPAuthSecret() {
 			errMessage = err.Error()
 		}
 
-		klog.V(4).Infof("[Warning] Failed to load auth secret, or was not mounted: %s", errMessage)
+		log.Warnf("Failed to load auth secret, or was not mounted: %s", errMessage)
 		return
 	}
 
 	result, err := ioutil.ReadFile(authSecretPath)
 	if err != nil {
-		klog.V(4).Infof("[Warning] Failed to load auth secret, or was not mounted: %s", err.Error())
+		log.Warnf("Failed to load auth secret, or was not mounted: %s", err.Error())
 		return
 	}
 
 	err = ioutil.WriteFile(keyPath, result, 0644)
 	if err != nil {
-		klog.V(4).Infof("[Warning] Failed to copy auth secret to %s: %s", keyPath, err.Error())
+		log.Warnf("Failed to copy auth secret to %s: %s", keyPath, err.Error())
 	}
 }
 
@@ -262,6 +262,7 @@ func (gcp *GCP) UpdateConfig(r io.Reader, updateType string) (*CustomPricing, er
 			c.AthenaRegion = a.AthenaRegion
 			c.AthenaDatabase = a.AthenaDatabase
 			c.AthenaTable = a.AthenaTable
+			c.AthenaWorkgroup = a.AthenaWorkgroup
 			c.ServiceKeyName = a.ServiceKeyName
 			c.ServiceKeySecret = a.ServiceKeySecret
 			c.AthenaProjectID = a.AccountID
@@ -307,12 +308,12 @@ func (gcp *GCP) ClusterInfo() (map[string]string, error) {
 
 	attribute, err := metadataClient.InstanceAttributeValue("cluster-name")
 	if err != nil {
-		klog.Infof("Error loading metadata cluster-name: %s", err.Error())
+		log.Infof("Error loading metadata cluster-name: %s", err.Error())
 	}
 
 	c, err := gcp.GetConfig()
 	if err != nil {
-		klog.V(1).Infof("Error opening config: %s", err.Error())
+		log.Errorf("Error opening config: %s", err.Error())
 	}
 	if c.ClusterName != "" {
 		attribute = c.ClusterName
@@ -451,6 +452,8 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 		t, err := dec.Token()
 		if err == io.EOF {
 			break
+		} else if err != nil {
+			return nil, "", fmt.Errorf("Error parsing GCP pricing page: %s", err)
 		}
 		if t == "skus" {
 			_, err := dec.Token() // consumes [
@@ -591,7 +594,7 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 				for matchnum, group := range provIdRx.FindStringSubmatch(product.Description) {
 					if matchnum == 1 {
 						gpuType = strings.ToLower(strings.Join(strings.Split(group, " "), "-"))
-						klog.V(4).Info("GPU type found: " + gpuType)
+						log.Debug("GPU type found: " + gpuType)
 					}
 				}
 
@@ -632,8 +635,8 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 						for k, key := range inputKeys {
 							if key.GPUType() == gpuType+","+usageType {
 								if region == strings.Split(k, ",")[0] {
-									klog.V(3).Infof("Matched GPU to node in region \"%s\"", region)
-									klog.V(4).Infof("PRODUCT DESCRIPTION: %s", product.Description)
+									log.Infof("Matched GPU to node in region \"%s\"", region)
+									log.Debugf("PRODUCT DESCRIPTION: %s", product.Description)
 									matchedKey := key.Features()
 									if pl, ok := gcpPricingList[matchedKey]; ok {
 										pl.Node.GPUName = gpuType
@@ -647,7 +650,7 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 										}
 										gcpPricingList[matchedKey] = product
 									}
-									klog.V(3).Infof("Added data for " + matchedKey)
+									log.Infof("Added data for " + matchedKey)
 								}
 							}
 						}
@@ -668,7 +671,7 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 								continue
 							} else if strings.Contains(strings.ToUpper(product.Description), "RAM") {
 								if instanceType == "custom" {
-									klog.V(4).Infof("RAM custom sku is: " + product.Name)
+									log.Debug("RAM custom sku is: " + product.Name)
 								}
 								if _, ok := gcpPricingList[candidateKey]; ok {
 									gcpPricingList[candidateKey].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
@@ -685,10 +688,10 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 									gcpPricingList[candidateKey] = product
 								}
 								if _, ok := gcpPricingList[candidateKeyGPU]; ok {
-									klog.V(1).Infof("Adding RAM %f for %s", hourlyPrice, candidateKeyGPU)
+									log.Infof("Adding RAM %f for %s", hourlyPrice, candidateKeyGPU)
 									gcpPricingList[candidateKeyGPU].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
 								} else {
-									klog.V(1).Infof("Adding RAM %f for %s", hourlyPrice, candidateKeyGPU)
+									log.Infof("Adding RAM %f for %s", hourlyPrice, candidateKeyGPU)
 									product = &GCPPricing{}
 									product.Node = &Node{
 										RAMCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
@@ -740,7 +743,7 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[stri
 		if t == "nextPageToken" {
 			pageToken, err := dec.Token()
 			if err != nil {
-				klog.V(2).Infof("Error parsing nextpage token: " + err.Error())
+				log.Errorf("Error parsing nextpage token: " + err.Error())
 				return nil, "", err
 			}
 			if pageToken.(string) != "" {
@@ -760,7 +763,7 @@ func (gcp *GCP) parsePages(inputKeys map[string]Key, pvKeys map[string]PVKey) (m
 		return nil, err
 	}
 	url := "https://cloudbilling.googleapis.com/v1/services/6F81-5844-456A/skus?key=" + gcp.APIKey + "&currencyCode=" + c.CurrencyCode
-	klog.V(2).Infof("Fetch GCP Billing Data from URL: %s", url)
+	log.Infof("Fetch GCP Billing Data from URL: %s", url)
 	var parsePagesHelper func(string) error
 	parsePagesHelper = func(pageToken string) error {
 		if pageToken == "done" {
@@ -810,13 +813,13 @@ func (gcp *GCP) parsePages(inputKeys map[string]Key, pvKeys map[string]PVKey) (m
 			}
 		}
 	}
-	klog.V(1).Infof("ALL PAGES: %+v", returnPages)
+	log.Debugf("ALL PAGES: %+v", returnPages)
 	for k, v := range returnPages {
 		if v.Node != nil {
-			klog.V(1).Infof("Returned Page: %s : %+v", k, v.Node)
+			log.Debugf("Returned Page: %s : %+v", k, v.Node)
 		}
 		if v.PV != nil {
-			klog.V(1).Infof("Returned Page: %s : %+v", k, v.PV)
+			log.Debugf("Returned Page: %s : %+v", k, v.PV)
 		}
 	}
 	return returnPages, err
@@ -828,7 +831,7 @@ func (gcp *GCP) DownloadPricingData() error {
 	defer gcp.DownloadPricingDataLock.Unlock()
 	c, err := gcp.Config.GetCustomPricingData()
 	if err != nil {
-		klog.V(2).Infof("Error downloading default pricing data: %s", err.Error())
+		log.Errorf("Error downloading default pricing data: %s", err.Error())
 		return err
 	}
 	gcp.loadGCPAuthSecret()
@@ -880,12 +883,15 @@ func (gcp *GCP) DownloadPricingData() error {
 
 	reserved, err := gcp.getReservedInstances()
 	if err != nil {
-		klog.V(1).Infof("Failed to lookup reserved instance data: %s", err.Error())
+		log.Errorf("Failed to lookup reserved instance data: %s", err.Error())
 	} else {
-		klog.V(1).Infof("Found %d reserved instances", len(reserved))
 		gcp.ReservedInstances = reserved
-		for _, r := range reserved {
-			klog.V(1).Infof("%s", r)
+
+		if zerolog.GlobalLevel() <= zerolog.DebugLevel {
+			log.Debugf("Found %d reserved instances", len(reserved))
+			for _, r := range reserved {
+				log.Debugf("%s", r)
+			}
 		}
 	}
 
@@ -903,7 +909,7 @@ func (gcp *GCP) PVPricing(pvk PVKey) (*PV, error) {
 	defer gcp.DownloadPricingDataLock.RUnlock()
 	pricing, ok := gcp.Pricing[pvk.Features()]
 	if !ok {
-		klog.V(3).Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
+		log.Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
 		return &PV{}, nil
 	}
 	return pricing.PV, nil
@@ -1014,7 +1020,7 @@ func (gcp *GCP) ApplyReservedInstancePricing(nodes map[string]*Node) {
 
 	// Early return if no reserved instance data loaded
 	if numReserved == 0 {
-		klog.V(4).Infof("[Reserved] No Reserved Instances")
+		log.Debug("[Reserved] No Reserved Instances")
 		return
 	}
 
@@ -1023,7 +1029,7 @@ func (gcp *GCP) ApplyReservedInstancePricing(nodes map[string]*Node) {
 	counters := make(map[string][]*GCPReservedCounter)
 	for _, r := range gcp.ReservedInstances {
 		if now.Before(r.StartDate) || now.After(r.EndDate) {
-			klog.V(1).Infof("[Reserved] Skipped Reserved Instance due to dates")
+			log.Infof("[Reserved] Skipped Reserved Instance due to dates")
 			continue
 		}
 
@@ -1051,19 +1057,19 @@ func (gcp *GCP) ApplyReservedInstancePricing(nodes map[string]*Node) {
 
 		kNode, ok := gcpNodes[nodeName]
 		if !ok {
-			klog.V(4).Infof("[Reserved] Could not find K8s Node with name: %s", nodeName)
+			log.Debugf("[Reserved] Could not find K8s Node with name: %s", nodeName)
 			continue
 		}
 
 		nodeRegion, ok := util.GetRegion(kNode.Labels)
 		if !ok {
-			klog.V(4).Infof("[Reserved] Could not find node region")
+			log.Debug("[Reserved] Could not find node region")
 			continue
 		}
 
 		reservedCounters, ok := counters[nodeRegion]
 		if !ok {
-			klog.V(4).Infof("[Reserved] Could not find counters for region: %s", nodeRegion)
+			log.Debugf("[Reserved] Could not find counters for region: %s", nodeRegion)
 			continue
 		}
 
@@ -1134,7 +1140,7 @@ func (gcp *GCP) getReservedInstances() ([]*GCPReservedInstance, error) {
 				case GCPReservedInstanceResourceTypeCPU:
 					vcpu = resource.Amount
 				default:
-					klog.V(4).Infof("Failed to handle resource type: %s", resource.Type)
+					log.Debugf("Failed to handle resource type: %s", resource.Type)
 				}
 			}
 
@@ -1147,13 +1153,13 @@ func (gcp *GCP) getReservedInstances() ([]*GCPReservedInstance, error) {
 			timeLayout := "2006-01-02T15:04:05Z07:00"
 			startTime, err := time.Parse(timeLayout, commit.StartTimestamp)
 			if err != nil {
-				klog.V(1).Infof("Failed to parse start date: %s", commit.StartTimestamp)
+				log.Warnf("Failed to parse start date: %s", commit.StartTimestamp)
 				continue
 			}
 
 			endTime, err := time.Parse(timeLayout, commit.EndTimestamp)
 			if err != nil {
-				klog.V(1).Infof("Failed to parse end date: %s", commit.EndTimestamp)
+				log.Warnf("Failed to parse end date: %s", commit.EndTimestamp)
 				continue
 			}
 
@@ -1244,7 +1250,7 @@ func (gcp *gcpKey) GPUType() string {
 		} else {
 			usageType = "ondemand"
 		}
-		klog.V(4).Infof("GPU of type: \"%s\" found", t)
+		log.Debugf("GPU of type: \"%s\" found", t)
 		return t + "," + usageType
 	}
 	return ""
@@ -1328,7 +1334,7 @@ func (gcp *GCP) isValidPricingKey(key Key) bool {
 // NodePricing returns GCP pricing data for a single node
 func (gcp *GCP) NodePricing(key Key) (*Node, error) {
 	if n, ok := gcp.getPricing(key); ok {
-		klog.V(4).Infof("Returning pricing for node %s: %+v from SKU %s", key, n.Node, n.Name)
+		log.Debugf("Returning pricing for node %s: %+v from SKU %s", key, n.Node, n.Name)
 		n.Node.BaseCPUPrice = gcp.BaseCPUPrice
 		return n.Node, nil
 	} else if ok := gcp.isValidPricingKey(key); ok {
@@ -1337,11 +1343,11 @@ func (gcp *GCP) NodePricing(key Key) (*Node, error) {
 			return nil, fmt.Errorf("Download pricing data failed: %s", err.Error())
 		}
 		if n, ok := gcp.getPricing(key); ok {
-			klog.V(4).Infof("Returning pricing for node %s: %+v from SKU %s", key, n.Node, n.Name)
+			log.Debugf("Returning pricing for node %s: %+v from SKU %s", key, n.Node, n.Name)
 			n.Node.BaseCPUPrice = gcp.BaseCPUPrice
 			return n.Node, nil
 		}
-		klog.V(1).Infof("[Warning] no pricing data found for %s: %s", key.Features(), key)
+		log.Warnf("no pricing data found for %s: %s", key.Features(), key)
 		return nil, fmt.Errorf("Warning: no pricing data found for %s", key)
 	}
 	return nil, fmt.Errorf("Warning: no pricing data found for %s", key)

+ 9 - 8
pkg/cloud/provider.go

@@ -13,8 +13,6 @@ import (
 
 	"github.com/kubecost/cost-model/pkg/util"
 
-	"k8s.io/klog"
-
 	"cloud.google.com/go/compute/metadata"
 
 	"github.com/kubecost/cost-model/pkg/clustercache"
@@ -167,6 +165,7 @@ type CustomPricing struct {
 	AthenaRegion                 string `json:"athenaRegion"`
 	AthenaDatabase               string `json:"athenaDatabase"`
 	AthenaTable                  string `json:"athenaTable"`
+	AthenaWorkgroup              string `json:"athenaWorkgroup"`
 	MasterPayerARN               string `json:"masterPayerARN"`
 	BillingDataDataset           string `json:"billingDataDataset,omitempty"`
 	CustomPricesEnabled          string `json:"customPricesEnabled"`
@@ -188,6 +187,7 @@ type CustomPricing struct {
 	ShareTenancyCosts            string `json:"shareTenancyCosts"` // TODO clean up configuration so we can use a type other that string (this should be a bool, but the app panics if it's not a string)
 	ReadOnly                     string `json:"readOnly"`
 	KubecostToken                string `json:"kubecostToken"`
+	GoogleAnalyticsTag           string `json:"googleAnalyticsTag"`
 }
 
 // GetSharedOverheadCostPerMonth parses and returns a float64 representation
@@ -257,6 +257,7 @@ type PricingSources struct {
 
 type PricingSource struct {
 	Name      string `json:"name"`
+	Enabled   bool   `json:"enabled"`
 	Available bool   `json:"available"`
 	Error     string `json:"error"`
 }
@@ -395,7 +396,7 @@ func SharedLabels(p Provider) ([]string, []string) {
 	ks := strings.Split(config.SharedLabelNames, ",")
 	vs := strings.Split(config.SharedLabelValues, ",")
 	if len(ks) != len(vs) {
-		klog.V(2).Infof("[Warning] shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
+		log.Warnf("Shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
 		return names, values
 	}
 
@@ -429,7 +430,7 @@ func NewProvider(cache clustercache.ClusterCache, apiKey string, config *config.
 
 	switch cp.provider {
 	case "CSV":
-		klog.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
+		log.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
 		return &CSVProvider{
 			CSVLocation: env.GetCSVPath(),
 			CustomProvider: &CustomProvider{
@@ -438,7 +439,7 @@ func NewProvider(cache clustercache.ClusterCache, apiKey string, config *config.
 			},
 		}, nil
 	case "GCP":
-		klog.V(3).Info("metadata reports we are in GCE")
+		log.Info("metadata reports we are in GCE")
 		if apiKey == "" {
 			return nil, errors.New("Supply a GCP Key to start getting data")
 		}
@@ -450,7 +451,7 @@ func NewProvider(cache clustercache.ClusterCache, apiKey string, config *config.
 			clusterProjectId: cp.projectID,
 		}, nil
 	case "AWS":
-		klog.V(2).Info("Found ProviderID starting with \"aws\", using AWS Provider")
+		log.Info("Found ProviderID starting with \"aws\", using AWS Provider")
 		return &AWS{
 			Clientset:            cache,
 			Config:               NewProviderConfig(config, cp.configFileName),
@@ -459,7 +460,7 @@ func NewProvider(cache clustercache.ClusterCache, apiKey string, config *config.
 			serviceAccountChecks: NewServiceAccountChecks(),
 		}, nil
 	case "AZURE":
-		klog.V(2).Info("Found ProviderID starting with \"azure\", using Azure Provider")
+		log.Info("Found ProviderID starting with \"azure\", using Azure Provider")
 		return &Azure{
 			Clientset:            cache,
 			Config:               NewProviderConfig(config, cp.configFileName),
@@ -468,7 +469,7 @@ func NewProvider(cache clustercache.ClusterCache, apiKey string, config *config.
 			serviceAccountChecks: NewServiceAccountChecks(),
 		}, nil
 	default:
-		klog.V(2).Info("Unsupported provider, falling back to default")
+		log.Info("Unsupported provider, falling back to default")
 		return &CustomProvider{
 			Clientset: cache,
 			Config:    NewProviderConfig(config, cp.configFileName),

+ 6 - 8
pkg/cloud/providerconfig.go

@@ -13,8 +13,6 @@ import (
 	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/util/json"
 	"github.com/microcosm-cc/bluemonday"
-
-	"k8s.io/klog"
 )
 
 var sanitizePolicy = bluemonday.UGCPolicy()
@@ -61,7 +59,7 @@ func (pc *ProviderConfig) onConfigFileUpdated(changeType config.ChangeType, data
 		customPricing := new(CustomPricing)
 		err := json.Unmarshal(data, customPricing)
 		if err != nil {
-			klog.Infof("Could not decode Custom Pricing file at path %s. Using default.", pc.configFile.Path())
+			log.Infof("Could not decode Custom Pricing file at path %s. Using default.", pc.configFile.Path())
 			customPricing = DefaultPricing()
 		}
 
@@ -86,13 +84,13 @@ func (pc *ProviderConfig) loadConfig(writeIfNotExists bool) (*CustomPricing, err
 	exists, err := pc.configFile.Exists()
 	// File Error other than NotExists
 	if err != nil {
-		klog.Infof("Custom Pricing file at path '%s' read error: '%s'", pc.configFile.Path(), err.Error())
+		log.Infof("Custom Pricing file at path '%s' read error: '%s'", pc.configFile.Path(), err.Error())
 		return DefaultPricing(), err
 	}
 
 	// File Doesn't Exist
 	if !exists {
-		klog.Infof("Could not find Custom Pricing file at path '%s'", pc.configFile.Path())
+		log.Infof("Could not find Custom Pricing file at path '%s'", pc.configFile.Path())
 		pc.customPricing = DefaultPricing()
 
 		// Only write the file if flag enabled
@@ -104,7 +102,7 @@ func (pc *ProviderConfig) loadConfig(writeIfNotExists bool) (*CustomPricing, err
 
 			err = pc.configFile.Write(cj)
 			if err != nil {
-				klog.Infof("Could not write Custom Pricing file to path '%s'", pc.configFile.Path())
+				log.Infof("Could not write Custom Pricing file to path '%s'", pc.configFile.Path())
 				return pc.customPricing, err
 			}
 		}
@@ -115,7 +113,7 @@ func (pc *ProviderConfig) loadConfig(writeIfNotExists bool) (*CustomPricing, err
 	// File Exists - Read all contents of file, unmarshal json
 	byteValue, err := pc.configFile.Read()
 	if err != nil {
-		klog.Infof("Could not read Custom Pricing file at path %s", pc.configFile.Path())
+		log.Infof("Could not read Custom Pricing file at path %s", pc.configFile.Path())
 		// If read fails, we don't want to cache default, assuming that the file is valid
 		return DefaultPricing(), err
 	}
@@ -123,7 +121,7 @@ func (pc *ProviderConfig) loadConfig(writeIfNotExists bool) (*CustomPricing, err
 	var customPricing CustomPricing
 	err = json.Unmarshal(byteValue, &customPricing)
 	if err != nil {
-		klog.Infof("Could not decode Custom Pricing file at path %s", pc.configFile.Path())
+		log.Infof("Could not decode Custom Pricing file at path %s", pc.configFile.Path())
 		return DefaultPricing(), err
 	}
 

+ 3 - 3
pkg/clustercache/clustercache.go

@@ -4,7 +4,7 @@ import (
 	"sync"
 
 	"github.com/kubecost/cost-model/pkg/env"
-	"k8s.io/klog"
+	"github.com/kubecost/cost-model/pkg/log"
 
 	appsv1 "k8s.io/api/apps/v1"
 	autoscaling "k8s.io/api/autoscaling/v2beta1"
@@ -111,7 +111,7 @@ func NewKubernetesClusterCache(client kubernetes.Interface) ClusterCache {
 	pdbClient := client.PolicyV1beta1().RESTClient()
 
 	kubecostNamespace := env.GetKubecostNamespace()
-	klog.Infof("NAMESPACE: %s", kubecostNamespace)
+	log.Infof("NAMESPACE: %s", kubecostNamespace)
 
 	kcc := &KubernetesClusterCache{
 		client:                     client,
@@ -158,7 +158,7 @@ func NewKubernetesClusterCache(client kubernetes.Interface) ClusterCache {
 
 	wg.Wait()
 
-	klog.Infof("Done waiting")
+	log.Infof("Done waiting")
 
 	return kcc
 }

+ 2 - 2
pkg/clustercache/clusterexporter.go

@@ -59,7 +59,7 @@ func (ce *ClusterExporter) Run() {
 	ce.runState.WaitForReset()
 
 	if !ce.runState.Start() {
-		log.Warningf("ClusterExporter already running")
+		log.Warnf("ClusterExporter already running")
 		return
 	}
 
@@ -67,7 +67,7 @@ func (ce *ClusterExporter) Run() {
 		for {
 			err := ce.Export()
 			if err != nil {
-				log.Warningf("Failed to export cluster: %s", err)
+				log.Warnf("Failed to export cluster: %s", err)
 			}
 
 			select {

+ 3 - 3
pkg/clustercache/clusterimporter.go

@@ -49,7 +49,7 @@ func (ci *ClusterImporter) update(data []byte) {
 	ce := new(clusterEncoding)
 	err := json.Unmarshal(data, ce)
 	if err != nil {
-		log.Warningf("Failed to unmarshal cluster during import: %s", err)
+		log.Warnf("Failed to unmarshal cluster during import: %s", err)
 		return
 	}
 
@@ -74,7 +74,7 @@ func (ci *ClusterImporter) Run() {
 	if exists {
 		data, err := ci.source.Read()
 		if err != nil {
-			log.Warningf("Failed to import cluster: %s", err)
+			log.Warnf("Failed to import cluster: %s", err)
 		} else {
 			ci.update(data)
 		}
@@ -319,5 +319,5 @@ func (ci *ClusterImporter) GetAllReplicationControllers() []*v1.ReplicationContr
 func (ci *ClusterImporter) SetConfigMapUpdateFunc(_ func(interface{})) {
 	// TODO: (bolt) This function is still a bit strange to me for the ClusterCache interface.
 	// TODO: (bolt) no-op for now.
-	log.Warningf("SetConfigMapUpdateFunc is disabled for imported cluster data.")
+	log.Warnf("SetConfigMapUpdateFunc is disabled for imported cluster data.")
 }

+ 6 - 6
pkg/clustercache/watchcontroller.go

@@ -5,7 +5,7 @@ import (
 	"reflect"
 	"time"
 
-	"k8s.io/klog"
+	"github.com/kubecost/cost-model/pkg/log"
 
 	"k8s.io/apimachinery/pkg/fields"
 	rt "k8s.io/apimachinery/pkg/runtime"
@@ -135,7 +135,7 @@ func (c *CachingWatchController) processNextItem() bool {
 func (c *CachingWatchController) handle(key string) error {
 	obj, exists, err := c.indexer.GetByKey(key)
 	if err != nil {
-		klog.Errorf("Fetching %s with key %s from store failed with %v", c.resourceType, key, err)
+		log.Errorf("Fetching %s with key %s from store failed with %v", c.resourceType, key, err)
 		return err
 	}
 
@@ -163,7 +163,7 @@ func (c *CachingWatchController) handleErr(err error, key interface{}) {
 
 	// This controller retries 5 times if something goes wrong. After that, it stops trying.
 	if c.queue.NumRequeues(key) < 5 {
-		klog.V(3).Infof("Error syncing %s %v: %v", c.resourceType, key, err)
+		log.Errorf("Error syncing %s %v: %v", c.resourceType, key, err)
 
 		// Re-enqueue the key rate limited. Based on the rate limiter on the
 		// queue and the re-enqueue history, the key will be processed later again.
@@ -174,7 +174,7 @@ func (c *CachingWatchController) handleErr(err error, key interface{}) {
 	c.queue.Forget(key)
 	// Report to an external entity that, even after several retries, we could not successfully process this key
 	runtime.HandleError(err)
-	klog.Infof("Dropping %s %q out of the queue: %v", c.resourceType, key, err)
+	log.Infof("Dropping %s %q out of the queue: %v", c.resourceType, key, err)
 }
 
 func (c *CachingWatchController) WarmUp(cancelCh chan struct{}) {
@@ -192,14 +192,14 @@ func (c *CachingWatchController) Run(threadiness int, stopCh chan struct{}) {
 
 	// Let the workers stop when we are done
 	defer c.queue.ShutDown()
-	klog.V(3).Infof("Starting %s controller", c.resourceType)
+	log.Infof("Starting %s controller", c.resourceType)
 
 	for i := 0; i < threadiness; i++ {
 		go wait.Until(c.runWorker, time.Second, stopCh)
 	}
 
 	<-stopCh
-	klog.V(3).Infof("Stopping %s controller", c.resourceType)
+	log.Infof("Stopping %s controller", c.resourceType)
 }
 
 func (c *CachingWatchController) runWorker() {

+ 3 - 4
pkg/cmd/agent/agent.go

@@ -20,7 +20,6 @@ import (
 	prometheusAPI "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/klog"
 
 	"github.com/rs/cors"
 	"k8s.io/client-go/kubernetes"
@@ -152,7 +151,7 @@ func Execute(opts *AgentOpts) error {
 		scrapeInterval = si
 	}
 
-	klog.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
+	log.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
 
 	// initialize kubernetes client and cluster cache
 	k8sClient, clusterCache, err := newKubernetesClusterCache()
@@ -183,7 +182,7 @@ func Execute(opts *AgentOpts) error {
 	for _, cw := range watchedConfigs {
 		configs, err := k8sClient.CoreV1().ConfigMaps(kubecostNamespace).Get(context.Background(), cw, metav1.GetOptions{})
 		if err != nil {
-			klog.Infof("No %s configmap found at install time, using existing configs: %s", cw, err.Error())
+			log.Infof("No %s configmap found at install time, using existing configs: %s", cw, err.Error())
 		} else {
 			watchConfigFunc(configs)
 		}
@@ -220,7 +219,7 @@ func Execute(opts *AgentOpts) error {
 	// download pricing data
 	err = cloudProvider.DownloadPricingData()
 	if err != nil {
-		klog.Errorf("Error downloading pricing data: %s", err)
+		log.Errorf("Error downloading pricing data: %s", err)
 	}
 
 	// start emitting metrics

+ 17 - 9
pkg/cmd/commands.go

@@ -1,15 +1,15 @@
 package cmd
 
 import (
-	"flag"
 	"fmt"
 	"os"
+	"strings"
 
 	"github.com/kubecost/cost-model/pkg/cmd/agent"
 	"github.com/kubecost/cost-model/pkg/cmd/costmodel"
+	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/spf13/cobra"
-	"github.com/spf13/pflag"
-	"k8s.io/klog"
+	"github.com/spf13/viper"
 )
 
 const (
@@ -41,12 +41,6 @@ func Execute(costModelCmd *cobra.Command) error {
 
 	rootCmd := newRootCommand(costModelCmd)
 
-	// initialize klog and make cobra aware of all the go flags
-	klog.InitFlags(nil)
-	pflag.CommandLine.AddGoFlag(flag.CommandLine.Lookup("v"))
-	pflag.CommandLine.AddGoFlag(flag.CommandLine.Lookup("logtostderr"))
-	pflag.CommandLine.Set("v", "3")
-
 	// in the event that no directive/command is passed, we want to default to using the cost-model command
 	// cobra doesn't provide a way within the API to do this, so we'll prepend the command if it is omitted.
 	if len(os.Args) > 1 {
@@ -71,12 +65,26 @@ func newRootCommand(costModelCmd *cobra.Command) *cobra.Command {
 		SilenceUsage: true,
 	}
 
+	// Add our persistent flags, these are global and available anywhere
+	cmd.PersistentFlags().String("log-level", "info", "Set the log level")
+	cmd.PersistentFlags().String("log-format", "pretty", "Set the log format - Can be either 'JSON' or 'pretty'")
+
+	viper.BindPFlag("log-level", cmd.PersistentFlags().Lookup("log-level"))
+	viper.BindPFlag("log-format", cmd.PersistentFlags().Lookup("log-format"))
+
+	// Setup viper to read from the env, this allows reading flags from the command line or the env
+	// using the format 'LOG_LEVEL'
+	viper.AutomaticEnv()
+	viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
+
 	// add the modes of operation
 	cmd.AddCommand(
 		costModelCmd,
 		newAgentCommand(),
 	)
 
+	log.InitLogging()
+
 	return cmd
 }
 

+ 2 - 2
pkg/config/configfile.go

@@ -244,7 +244,7 @@ func (cf *ConfigFile) runWatcher() {
 	// if start fails after waiting for a reset, it means that another thread
 	// beat this thread to the start
 	if !cf.runState.Start() {
-		log.Warningf("Run watcher already running for file: %s", cf.file)
+		log.Warnf("Run watcher already running for file: %s", cf.file)
 		return
 	}
 
@@ -297,7 +297,7 @@ func (cf *ConfigFile) runWatcher() {
 			if !exists {
 				data, err := cf.internalRead(true)
 				if err != nil {
-					log.Warningf("Read() Error: %s\n", err)
+					log.Warnf("Read() Error: %s\n", err)
 					continue
 				}
 				exists = true

+ 2 - 2
pkg/config/configmanager.go

@@ -61,11 +61,11 @@ func NewConfigFileManager(opts *ConfigFileManagerOpts) *ConfigFileManager {
 	if opts.IsBucketStorageEnabled() {
 		bucketConfig, err := ioutil.ReadFile(opts.BucketStoreConfig)
 		if err != nil {
-			log.Warningf("Failed to initialize config bucket storage: %s", err)
+			log.Warnf("Failed to initialize config bucket storage: %s", err)
 		} else {
 			bucketStore, err := storage.NewBucketStorage(bucketConfig)
 			if err != nil {
-				log.Warningf("Failed to create config bucket storage: %s", err)
+				log.Warnf("Failed to create config bucket storage: %s", err)
 			} else {
 				configStore = bucketStore
 			}

+ 31 - 32
pkg/costmodel/aggregation.go

@@ -25,7 +25,6 @@ import (
 	"github.com/kubecost/cost-model/pkg/util/json"
 	"github.com/patrickmn/go-cache"
 	prometheusClient "github.com/prometheus/client_golang/api"
-	"k8s.io/klog"
 )
 
 const (
@@ -221,7 +220,7 @@ func (a *Accesses) ComputeIdleCoefficient(costData map[string]*CostData, cli pro
 
 	for cid, costs := range clusterCosts {
 		if costs.CPUCumulative == 0 && costs.RAMCumulative == 0 && costs.StorageCumulative == 0 {
-			klog.V(1).Infof("[Warning] No ClusterCosts data for cluster '%s'. Is it emitting data?", cid)
+			log.Warnf("No ClusterCosts data for cluster '%s'. Is it emitting data?", cid)
 			coefficients[cid] = 1.0
 			continue
 		}
@@ -272,13 +271,13 @@ type AggregationOptions struct {
 func clampAverage(requestsAvg float64, usedAverage float64, allocationAvg float64, resource string) (float64, float64) {
 	rAvg := requestsAvg
 	if rAvg > allocationAvg {
-		klog.V(4).Infof("[Warning] Average %s Requested (%f) > Average %s Allocated (%f). Clamping.", resource, rAvg, resource, allocationAvg)
+		log.Debugf("Average %s Requested (%f) > Average %s Allocated (%f). Clamping.", resource, rAvg, resource, allocationAvg)
 		rAvg = allocationAvg
 	}
 
 	uAvg := usedAverage
 	if uAvg > allocationAvg {
-		klog.V(4).Infof("[Warning]: Average %s Used (%f) > Average %s Allocated (%f). Clamping.", resource, uAvg, resource, allocationAvg)
+		log.Debugf(" Average %s Used (%f) > Average %s Allocated (%f). Clamping.", resource, uAvg, resource, allocationAvg)
 		uAvg = allocationAvg
 	}
 
@@ -414,7 +413,7 @@ func AggregateCostData(costData map[string]*CostData, field string, subfields []
 		if opts.SharedSplit == SplitTypeWeighted {
 			d := opts.TotalContainerCost - sharedResourceCost
 			if d == 0 {
-				klog.V(1).Infof("[Warning] Total container cost '%f' and shared resource cost '%f are the same'. Setting sharedCoefficient to 1", opts.TotalContainerCost, sharedResourceCost)
+				log.Warnf("Total container cost '%f' and shared resource cost '%f are the same'. Setting sharedCoefficient to 1", opts.TotalContainerCost, sharedResourceCost)
 				sharedCoefficient = 1.0
 			} else {
 				sharedCoefficient = (agg.CPUCost + agg.RAMCost + agg.GPUCost + agg.PVCost + agg.NetworkCost) / d
@@ -525,59 +524,59 @@ func AggregateCostData(costData map[string]*CostData, field string, subfields []
 		}
 		// Typesafety checks
 		if math.IsNaN(agg.CPUAllocationHourlyAverage) || math.IsInf(agg.CPUAllocationHourlyAverage, 0) {
-			klog.V(1).Infof("[Warning] CPUAllocationHourlyAverage is %f for '%s: %s/%s'", agg.CPUAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("CPUAllocationHourlyAverage is %f for '%s: %s/%s'", agg.CPUAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.CPUAllocationHourlyAverage = 0
 		}
 		if math.IsNaN(agg.CPUCost) || math.IsInf(agg.CPUCost, 0) {
-			klog.V(1).Infof("[Warning] CPUCost is %f for '%s: %s/%s'", agg.CPUCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("CPUCost is %f for '%s: %s/%s'", agg.CPUCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.CPUCost = 0
 		}
 		if math.IsNaN(agg.CPUEfficiency) || math.IsInf(agg.CPUEfficiency, 0) {
-			klog.V(1).Infof("[Warning] CPUEfficiency is %f for '%s: %s/%s'", agg.CPUEfficiency, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("CPUEfficiency is %f for '%s: %s/%s'", agg.CPUEfficiency, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.CPUEfficiency = 0
 		}
 		if math.IsNaN(agg.Efficiency) || math.IsInf(agg.Efficiency, 0) {
-			klog.V(1).Infof("[Warning] Efficiency is %f for '%s: %s/%s'", agg.Efficiency, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("Efficiency is %f for '%s: %s/%s'", agg.Efficiency, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.Efficiency = 0
 		}
 		if math.IsNaN(agg.GPUAllocationHourlyAverage) || math.IsInf(agg.GPUAllocationHourlyAverage, 0) {
-			klog.V(1).Infof("[Warning] GPUAllocationHourlyAverage is %f for '%s: %s/%s'", agg.GPUAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("GPUAllocationHourlyAverage is %f for '%s: %s/%s'", agg.GPUAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.GPUAllocationHourlyAverage = 0
 		}
 		if math.IsNaN(agg.GPUCost) || math.IsInf(agg.GPUCost, 0) {
-			klog.V(1).Infof("[Warning] GPUCost is %f for '%s: %s/%s'", agg.GPUCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("GPUCost is %f for '%s: %s/%s'", agg.GPUCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.GPUCost = 0
 		}
 		if math.IsNaN(agg.RAMAllocationHourlyAverage) || math.IsInf(agg.RAMAllocationHourlyAverage, 0) {
-			klog.V(1).Infof("[Warning] RAMAllocationHourlyAverage is %f for '%s: %s/%s'", agg.RAMAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("RAMAllocationHourlyAverage is %f for '%s: %s/%s'", agg.RAMAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.RAMAllocationHourlyAverage = 0
 		}
 		if math.IsNaN(agg.RAMCost) || math.IsInf(agg.RAMCost, 0) {
-			klog.V(1).Infof("[Warning] RAMCost is %f for '%s: %s/%s'", agg.RAMCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("RAMCost is %f for '%s: %s/%s'", agg.RAMCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.RAMCost = 0
 		}
 		if math.IsNaN(agg.RAMEfficiency) || math.IsInf(agg.RAMEfficiency, 0) {
-			klog.V(1).Infof("[Warning] RAMEfficiency is %f for '%s: %s/%s'", agg.RAMEfficiency, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("RAMEfficiency is %f for '%s: %s/%s'", agg.RAMEfficiency, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.RAMEfficiency = 0
 		}
 		if math.IsNaN(agg.PVAllocationHourlyAverage) || math.IsInf(agg.PVAllocationHourlyAverage, 0) {
-			klog.V(1).Infof("[Warning] PVAllocationHourlyAverage is %f for '%s: %s/%s'", agg.PVAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("PVAllocationHourlyAverage is %f for '%s: %s/%s'", agg.PVAllocationHourlyAverage, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.PVAllocationHourlyAverage = 0
 		}
 		if math.IsNaN(agg.PVCost) || math.IsInf(agg.PVCost, 0) {
-			klog.V(1).Infof("[Warning] PVCost is %f for '%s: %s/%s'", agg.PVCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("PVCost is %f for '%s: %s/%s'", agg.PVCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.PVCost = 0
 		}
 		if math.IsNaN(agg.NetworkCost) || math.IsInf(agg.NetworkCost, 0) {
-			klog.V(1).Infof("[Warning] NetworkCost is %f for '%s: %s/%s'", agg.NetworkCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("NetworkCost is %f for '%s: %s/%s'", agg.NetworkCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.NetworkCost = 0
 		}
 		if math.IsNaN(agg.SharedCost) || math.IsInf(agg.SharedCost, 0) {
-			klog.V(1).Infof("[Warning] SharedCost is %f for '%s: %s/%s'", agg.SharedCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("SharedCost is %f for '%s: %s/%s'", agg.SharedCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.SharedCost = 0
 		}
 		if math.IsNaN(agg.TotalCost) || math.IsInf(agg.TotalCost, 0) {
-			klog.V(1).Infof("[Warning] TotalCost is %f for '%s: %s/%s'", agg.TotalCost, agg.Cluster, agg.Aggregator, agg.Environment)
+			log.Warnf("TotalCost is %f for '%s: %s/%s'", agg.TotalCost, agg.Cluster, agg.Aggregator, agg.Environment)
 			agg.TotalCost = 0
 		}
 	}
@@ -662,13 +661,13 @@ func getDiscounts(costDatum *CostData, cpuCost float64, ramCost float64, discoun
 	if reserved != nil && reserved.CPUCost > 0 && reserved.RAMCost > 0 {
 		reservedCPUDiscount := 0.0
 		if cpuCost == 0 {
-			klog.V(1).Infof("[Warning] No cpu cost found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+			log.Warnf("No cpu cost found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
 		} else {
 			reservedCPUDiscount = 1.0 - (reserved.CPUCost / cpuCost)
 		}
 		reservedRAMDiscount := 0.0
 		if ramCost == 0 {
-			klog.V(1).Infof("[Warning] No ram cost found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+			log.Warnf("No ram cost found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
 		} else {
 			reservedRAMDiscount = 1.0 - (reserved.RAMCost / ramCost)
 		}
@@ -690,7 +689,7 @@ func getDiscounts(costDatum *CostData, cpuCost float64, ramCost float64, discoun
 					blendedCPUDiscount = reservedCPUDiscount
 				} else {
 					if nodeCPU == 0 {
-						klog.V(1).Infof("[Warning] No ram found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+						log.Warnf("No ram found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
 					} else {
 						blendedCPUDiscount = (float64(reserved.ReservedCPU) * reservedCPUDiscount) + (float64(nonReservedCPU)*discount)/float64(nodeCPU)
 					}
@@ -700,7 +699,7 @@ func getDiscounts(costDatum *CostData, cpuCost float64, ramCost float64, discoun
 					blendedRAMDiscount = reservedRAMDiscount
 				} else {
 					if nodeRAMGB == 0 {
-						klog.V(1).Infof("[Warning] No ram found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
+						log.Warnf("No ram found for cluster '%s' node '%s'", costDatum.ClusterID, costDatum.NodeName)
 					} else {
 						blendedRAMDiscount = (reservedRAMGB * reservedRAMDiscount) + (nonReservedRAM*discount)/nodeRAMGB
 					}
@@ -759,7 +758,7 @@ func getPriceVectors(cp cloud.Provider, costDatum *CostData, rate string, discou
 	// default cost values with custom values
 	customPricing, err := cp.GetConfig()
 	if err != nil {
-		klog.Errorf("failed to load custom pricing: %s", err)
+		log.Errorf("failed to load custom pricing: %s", err)
 	}
 	if cloud.CustomPricesEnabled(cp) && err == nil {
 		var cpuCostStr string
@@ -797,9 +796,9 @@ func getPriceVectors(cp cloud.Provider, costDatum *CostData, rate string, discou
 
 	cpuDiscount, ramDiscount := getDiscounts(costDatum, cpuCost, ramCost, discount)
 
-	klog.V(4).Infof("Node Name: %s", costDatum.NodeName)
-	klog.V(4).Infof("Blended CPU Discount: %f", cpuDiscount)
-	klog.V(4).Infof("Blended RAM Discount: %f", ramDiscount)
+	log.Debugf("Node Name: %s", costDatum.NodeName)
+	log.Debugf("Blended CPU Discount: %f", cpuDiscount)
+	log.Debugf("Blended RAM Discount: %f", ramDiscount)
 
 	// TODO should we try to apply the rate coefficient here or leave it as a totals-only metric?
 	rateCoeff := 1.0
@@ -1263,7 +1262,7 @@ func (a *Accesses) ComputeAggregateCostModel(promClient prometheusClient.Client,
 				labelValues[ln] = append(labelValues[ln], lv)
 			} else {
 				// label is not of the form name=value, so log it and move on
-				log.Warningf("ComputeAggregateCostModel: skipping illegal label filter: %s", l)
+				log.Warnf("ComputeAggregateCostModel: skipping illegal label filter: %s", l)
 			}
 		}
 
@@ -1312,7 +1311,7 @@ func (a *Accesses) ComputeAggregateCostModel(promClient prometheusClient.Client,
 				annotationValues[an] = append(annotationValues[an], av)
 			} else {
 				// annotation is not of the form name=value, so log it and move on
-				log.Warningf("ComputeAggregateCostModel: skipping illegal annotation filter: %s", annot)
+				log.Warnf("ComputeAggregateCostModel: skipping illegal annotation filter: %s", annot)
 			}
 		}
 
@@ -1682,7 +1681,7 @@ func GenerateAggKey(window kubecost.Window, field string, subfields []string, op
 				lFilters = append(lFilters, fmt.Sprintf("%s=%s", lfn, lfv))
 			} else {
 				// label is not of the form name=value, so log it and move on
-				klog.V(2).Infof("[Warning] GenerateAggKey: skipping illegal label filter: %s", lf)
+				log.Warnf("GenerateAggKey: skipping illegal label filter: %s", lf)
 			}
 		}
 	}
@@ -1703,7 +1702,7 @@ func GenerateAggKey(window kubecost.Window, field string, subfields []string, op
 				aFilters = append(aFilters, fmt.Sprintf("%s=%s", afn, afv))
 			} else {
 				// annotation is not of the form name=value, so log it and move on
-				klog.V(2).Infof("[Warning] GenerateAggKey: skipping illegal annotation filter: %s", af)
+				log.Warnf("GenerateAggKey: skipping illegal annotation filter: %s", af)
 			}
 		}
 	}
@@ -1800,7 +1799,7 @@ func (a *Accesses) warmAggregateCostModelCache() {
 			a.ClusterCostsCache.Set(key, totals, a.GetCacheExpiration(window.Duration()))
 			log.Infof("caching %s cluster costs for %s", fmtDuration, a.GetCacheExpiration(window.Duration()))
 		} else {
-			log.Warningf("not caching %s cluster costs: no data or less than %f minutes data ", fmtDuration, clusterCostsCacheMinutes)
+			log.Warnf("not caching %s cluster costs: no data or less than %f minutes data ", fmtDuration, clusterCostsCacheMinutes)
 		}
 		return aggErr, err
 	}

+ 333 - 174
pkg/costmodel/allocation.go

@@ -18,48 +18,48 @@ import (
 )
 
 const (
-	queryFmtPods                     = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]%s`
-	queryFmtPodsUID                  = `avg(kube_pod_container_status_running{}) by (pod, namespace, uid, %s)[%s:%s]%s`
-	queryFmtRAMBytesAllocated        = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s, provider_id)`
-	queryFmtRAMRequests              = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
-	queryFmtRAMUsageAvg              = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtRAMUsageMax              = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtCPUCoresAllocated        = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
-	queryFmtCPURequests              = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
-	queryFmtCPUUsageAvg              = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtCPUUsageMax              = `max(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
-	queryFmtGPUsRequested            = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
-	queryFmtGPUsAllocated            = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
-	queryFmtNodeCostPerCPUHr         = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeCostPerRAMGiBHr      = `avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeCostPerGPUHr         = `avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
-	queryFmtNodeIsSpot               = `avg_over_time(kubecost_node_is_spot[%s]%s)`
-	queryFmtPVCInfo                  = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]%s`
-	queryFmtPVBytes                  = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, %s)`
-	queryFmtPodPVCAllocation         = `avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
-	queryFmtPVCBytesRequested        = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, %s)`
-	queryFmtPVCostPerGiBHour         = `avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, %s)`
-	queryFmtNetZoneGiB               = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetZoneCostPerGiB        = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s]%s)) by (%s)`
-	queryFmtNetRegionGiB             = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetRegionCostPerGiB      = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s]%s)) by (%s)`
-	queryFmtNetInternetGiB           = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
-	queryFmtNetInternetCostPerGiB    = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s]%s)) by (%s)`
-	queryFmtNetReceiveBytes          = `sum(increase(container_network_receive_bytes_total{pod!="", container="POD"}[%s]%s)) by (pod_name, pod, namespace, %s)`
-	queryFmtNetTransferBytes         = `sum(increase(container_network_transmit_bytes_total{pod!="", container="POD"}[%s]%s)) by (pod_name, pod, namespace, %s)`
-	queryFmtNamespaceLabels          = `avg_over_time(kube_namespace_labels[%s]%s)`
-	queryFmtNamespaceAnnotations     = `avg_over_time(kube_namespace_annotations[%s]%s)`
-	queryFmtPodLabels                = `avg_over_time(kube_pod_labels[%s]%s)`
-	queryFmtPodAnnotations           = `avg_over_time(kube_pod_annotations[%s]%s)`
-	queryFmtServiceLabels            = `avg_over_time(service_selector_labels[%s]%s)`
-	queryFmtDeploymentLabels         = `avg_over_time(deployment_match_labels[%s]%s)`
-	queryFmtStatefulSetLabels        = `avg_over_time(statefulSet_match_labels[%s]%s)`
-	queryFmtDaemonSetLabels          = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s]%s)) by (pod, owner_name, namespace, %s)`
-	queryFmtJobLabels                = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,%s)`
-	queryFmtPodsWithReplicaSetOwner  = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet"}[%s]%s)) by (pod, owner_name, namespace ,%s)`
-	queryFmtReplicaSetsWithoutOwners = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>"}[%s]%s)) by (replicaset, namespace, %s)`
-	queryFmtLBCostPerHr              = `avg(avg_over_time(kubecost_load_balancer_cost[%s]%s)) by (namespace, service_name, %s)`
-	queryFmtLBActiveMins             = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]%s`
+	queryFmtPods                     = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]`
+	queryFmtPodsUID                  = `avg(kube_pod_container_status_running{}) by (pod, namespace, uid, %s)[%s:%s]`
+	queryFmtRAMBytesAllocated        = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s, provider_id)`
+	queryFmtRAMRequests              = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtRAMUsageAvg              = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtRAMUsageMax              = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtCPUCoresAllocated        = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtCPURequests              = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtCPUUsageAvg              = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtCPUUsageMax              = `max(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s])) by (container_name, container, pod_name, pod, namespace, instance, %s)`
+	queryFmtGPUsRequested            = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtGPUsAllocated            = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s])) by (container, pod, namespace, node, %s)`
+	queryFmtNodeCostPerCPUHr         = `avg(avg_over_time(node_cpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeCostPerRAMGiBHr      = `avg(avg_over_time(node_ram_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeCostPerGPUHr         = `avg(avg_over_time(node_gpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`
+	queryFmtNodeIsSpot               = `avg_over_time(kubecost_node_is_spot[%s])`
+	queryFmtPVCInfo                  = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]`
+	queryFmtPVBytes                  = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s])) by (persistentvolume, %s)`
+	queryFmtPodPVCAllocation         = `avg(avg_over_time(pod_pvc_allocation[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
+	queryFmtPVCBytesRequested        = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s])) by (persistentvolumeclaim, namespace, %s)`
+	queryFmtPVCostPerGiBHour         = `avg(avg_over_time(pv_hourly_cost[%s])) by (volumename, %s)`
+	queryFmtNetZoneGiB               = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetZoneCostPerGiB        = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s])) by (%s)`
+	queryFmtNetRegionGiB             = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetRegionCostPerGiB      = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s])) by (%s)`
+	queryFmtNetInternetGiB           = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s])) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
+	queryFmtNetInternetCostPerGiB    = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s])) by (%s)`
+	queryFmtNetReceiveBytes          = `sum(increase(container_network_receive_bytes_total{pod!="", container="POD"}[%s])) by (pod_name, pod, namespace, %s)`
+	queryFmtNetTransferBytes         = `sum(increase(container_network_transmit_bytes_total{pod!="", container="POD"}[%s])) by (pod_name, pod, namespace, %s)`
+	queryFmtNamespaceLabels          = `avg_over_time(kube_namespace_labels[%s])`
+	queryFmtNamespaceAnnotations     = `avg_over_time(kube_namespace_annotations[%s])`
+	queryFmtPodLabels                = `avg_over_time(kube_pod_labels[%s])`
+	queryFmtPodAnnotations           = `avg_over_time(kube_pod_annotations[%s])`
+	queryFmtServiceLabels            = `avg_over_time(service_selector_labels[%s])`
+	queryFmtDeploymentLabels         = `avg_over_time(deployment_match_labels[%s])`
+	queryFmtStatefulSetLabels        = `avg_over_time(statefulSet_match_labels[%s])`
+	queryFmtDaemonSetLabels          = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s])) by (pod, owner_name, namespace, %s)`
+	queryFmtJobLabels                = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s])) by (pod, owner_name, namespace ,%s)`
+	queryFmtPodsWithReplicaSetOwner  = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet"}[%s])) by (pod, owner_name, namespace ,%s)`
+	queryFmtReplicaSetsWithoutOwners = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>"}[%s])) by (replicaset, namespace, %s)`
+	queryFmtLBCostPerHr              = `avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s)`
+	queryFmtLBActiveMins             = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]`
 )
 
 // This is a bit of a hack to work around garbage data from cadvisor
@@ -84,6 +84,168 @@ func (cm *CostModel) Name() string {
 // for the window defined by the given start and end times. The Allocations
 // returned are unaggregated (i.e. down to the container level).
 func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {
+	// If the duration is short enough, compute the AllocationSet directly
+	if end.Sub(start) <= cm.MaxPrometheusQueryDuration {
+		return cm.computeAllocation(start, end, resolution)
+	}
+
+	// If the duration exceeds the configured MaxPrometheusQueryDuration, then
+	// query for maximum-sized AllocationSets, collect them, and accumulate.
+
+	// s and e track the coverage of the entire given window over multiple
+	// internal queries.
+	s, e := start, start
+
+	// Collect AllocationSets in a range, then accumulate
+	// TODO optimize by collecting consecutive AllocationSets, accumulating as we go
+	asr := kubecost.NewAllocationSetRange()
+
+	for e.Before(end) {
+		// By default, query for the full remaining duration. But do not let
+		// any individual query duration exceed the configured max Prometheus
+		// query duration.
+		duration := end.Sub(e)
+		if duration > cm.MaxPrometheusQueryDuration {
+			duration = cm.MaxPrometheusQueryDuration
+		}
+
+		// Set start and end parameters (s, e) for next individual computation.
+		e = s.Add(duration)
+
+		// Compute the individual AllocationSet for just (s, e)
+		as, err := cm.computeAllocation(s, e, resolution)
+		if err != nil {
+			return kubecost.NewAllocationSet(start, end), fmt.Errorf("error computing allocation for %s: %s", kubecost.NewClosedWindow(s, e), err)
+		}
+
+		// Append to the range
+		asr.Append(as)
+
+		// Set s equal to e to set up the next query, if one exists.
+		s = e
+	}
+
+	// Populate annotations, labels, and services on each Allocation. This is
+	// necessary because Properties.Intersection does not propagate any values
+	// stored in maps or slices for performance reasons. In this case, however,
+	// it is both acceptable and necessary to do so.
+	allocationAnnotations := map[string]map[string]string{}
+	allocationLabels := map[string]map[string]string{}
+	allocationServices := map[string]map[string]bool{}
+
+	// Also record errors and warnings, then append them to the results later.
+	errors := []string{}
+	warnings := []string{}
+
+	asr.Each(func(i int, as *kubecost.AllocationSet) {
+		as.Each(func(k string, a *kubecost.Allocation) {
+			if len(a.Properties.Annotations) > 0 {
+				if _, ok := allocationAnnotations[k]; !ok {
+					allocationAnnotations[k] = map[string]string{}
+				}
+				for name, val := range a.Properties.Annotations {
+					allocationAnnotations[k][name] = val
+				}
+			}
+
+			if len(a.Properties.Labels) > 0 {
+				if _, ok := allocationLabels[k]; !ok {
+					allocationLabels[k] = map[string]string{}
+				}
+				for name, val := range a.Properties.Labels {
+					allocationLabels[k][name] = val
+				}
+			}
+
+			if len(a.Properties.Services) > 0 {
+				if _, ok := allocationServices[k]; !ok {
+					allocationServices[k] = map[string]bool{}
+				}
+				for _, val := range a.Properties.Services {
+					allocationServices[k][val] = true
+				}
+			}
+		})
+
+		errors = append(errors, as.Errors...)
+		warnings = append(warnings, as.Warnings...)
+	})
+
+	// Accumulate to yield the result AllocationSet. After this step, we will
+	// be nearly complete, but without the raw allocation data, which must be
+	// recomputed.
+	result, err := asr.Accumulate()
+	if err != nil {
+		return kubecost.NewAllocationSet(start, end), fmt.Errorf("error accumulating data for %s: %s", kubecost.NewClosedWindow(s, e), err)
+	}
+
+	// Apply the annotations, labels, and services to the post-accumulation
+	// results. (See above for why this is necessary.)
+	result.Each(func(k string, a *kubecost.Allocation) {
+		if annotations, ok := allocationAnnotations[k]; ok {
+			a.Properties.Annotations = annotations
+		}
+
+		if labels, ok := allocationLabels[k]; ok {
+			a.Properties.Labels = labels
+		}
+
+		if services, ok := allocationServices[k]; ok {
+			a.Properties.Services = []string{}
+			for s := range services {
+				a.Properties.Services = append(a.Properties.Services, s)
+			}
+		}
+
+		// Expand the Window of all Allocations within the AllocationSet
+		// to match the Window of the AllocationSet, which gets expanded
+		// at the end of this function.
+		a.Window = a.Window.ExpandStart(start).ExpandEnd(end)
+	})
+
+	// Maintain RAM and CPU max usage values by iterating over the range,
+	// computing maximums on a rolling basis, and setting on the result set.
+	asr.Each(func(i int, as *kubecost.AllocationSet) {
+		as.Each(func(key string, alloc *kubecost.Allocation) {
+			resultAlloc := result.Get(key)
+			if resultAlloc == nil {
+				return
+			}
+
+			if resultAlloc.RawAllocationOnly == nil {
+				resultAlloc.RawAllocationOnly = &kubecost.RawAllocationOnlyData{}
+			}
+
+			if alloc.RawAllocationOnly == nil {
+				// This will happen inevitably for unmounted disks, but should
+				// ideally not happen for any allocation with CPU and RAM data.
+				if !alloc.IsUnmounted() {
+					log.DedupedWarningf(10, "ComputeAllocation: raw allocation data missing for %s", key)
+				}
+				return
+			}
+
+			if alloc.RawAllocationOnly.CPUCoreUsageMax > resultAlloc.RawAllocationOnly.CPUCoreUsageMax {
+				resultAlloc.RawAllocationOnly.CPUCoreUsageMax = alloc.RawAllocationOnly.CPUCoreUsageMax
+			}
+
+			if alloc.RawAllocationOnly.RAMBytesUsageMax > resultAlloc.RawAllocationOnly.RAMBytesUsageMax {
+				resultAlloc.RawAllocationOnly.RAMBytesUsageMax = alloc.RawAllocationOnly.RAMBytesUsageMax
+			}
+		})
+	})
+
+	// Expand the window to match the queried time range.
+	result.Window = result.Window.ExpandStart(start).ExpandEnd(end)
+
+	// Append errors and warnings
+	result.Errors = errors
+	result.Warnings = warnings
+
+	return result, nil
+}
+
+func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {
 	// 1. Build out Pod map from resolution-tuned, batched Pod start/end query
 	// 2. Run and apply the results of the remaining queries to
 	// 3. Build out AllocationSet from completed Pod map
@@ -130,16 +292,15 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 		log.Debugf("CostModel.ComputeAllocation: ingesting UID data from KSM metrics...")
 	}
 
-	cm.buildPodMap(window, resolution, env.GetETLMaxBatchDuration(), podMap, clusterStart, clusterEnd, ingestPodUID, podUIDKeyMap)
+	// TODO:CLEANUP remove "max batch" idea and clusterStart/End
+	cm.buildPodMap(window, resolution, env.GetETLMaxPrometheusQueryDuration(), podMap, clusterStart, clusterEnd, ingestPodUID, podUIDKeyMap)
 
 	// (2) Run and apply remaining queries
 
-	// Convert window (start, end) to (duration, offset) for querying Prometheus,
-	// including handling Thanos offset
-	durStr, offStr, err := window.DurationOffsetForPrometheus()
-	if err != nil {
-		// Negative duration, so return empty set
-		return allocSet, nil
+	// Query for the duration between start and end
+	durStr := timeutil.DurationString(end.Sub(start))
+	if durStr == "" {
+		return allocSet, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))
 	}
 
 	// Convert resolution duration to a query-ready string
@@ -147,125 +308,125 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 
 	ctx := prom.NewNamedContext(cm.PrometheusClient, prom.AllocationContextName)
 
-	queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr, env.GetPromClusterLabel())
-	resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
+	queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, env.GetPromClusterLabel())
+	resChRAMBytesAllocated := ctx.QueryAtTime(queryRAMBytesAllocated, end)
 
-	queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, offStr, env.GetPromClusterLabel())
-	resChRAMRequests := ctx.Query(queryRAMRequests)
+	queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, env.GetPromClusterLabel())
+	resChRAMRequests := ctx.QueryAtTime(queryRAMRequests, end)
 
-	queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, durStr, offStr, env.GetPromClusterLabel())
-	resChRAMUsageAvg := ctx.Query(queryRAMUsageAvg)
+	queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, durStr, env.GetPromClusterLabel())
+	resChRAMUsageAvg := ctx.QueryAtTime(queryRAMUsageAvg, end)
 
-	queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, durStr, offStr, env.GetPromClusterLabel())
-	resChRAMUsageMax := ctx.Query(queryRAMUsageMax)
+	queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, durStr, env.GetPromClusterLabel())
+	resChRAMUsageMax := ctx.QueryAtTime(queryRAMUsageMax, end)
 
-	queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, offStr, env.GetPromClusterLabel())
-	resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
+	queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, env.GetPromClusterLabel())
+	resChCPUCoresAllocated := ctx.QueryAtTime(queryCPUCoresAllocated, end)
 
-	queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, offStr, env.GetPromClusterLabel())
-	resChCPURequests := ctx.Query(queryCPURequests)
+	queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, env.GetPromClusterLabel())
+	resChCPURequests := ctx.QueryAtTime(queryCPURequests, end)
 
-	queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, offStr, env.GetPromClusterLabel())
-	resChCPUUsageAvg := ctx.Query(queryCPUUsageAvg)
+	queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, env.GetPromClusterLabel())
+	resChCPUUsageAvg := ctx.QueryAtTime(queryCPUUsageAvg, end)
 
-	queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMax, durStr, offStr, env.GetPromClusterLabel())
-	resChCPUUsageMax := ctx.Query(queryCPUUsageMax)
+	queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMax, durStr, env.GetPromClusterLabel())
+	resChCPUUsageMax := ctx.QueryAtTime(queryCPUUsageMax, end)
 
-	queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, offStr, env.GetPromClusterLabel())
-	resChGPUsRequested := ctx.Query(queryGPUsRequested)
+	queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, env.GetPromClusterLabel())
+	resChGPUsRequested := ctx.QueryAtTime(queryGPUsRequested, end)
 
-	queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, durStr, offStr, env.GetPromClusterLabel())
-	resChGPUsAllocated := ctx.Query(queryGPUsAllocated)
+	queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, durStr, env.GetPromClusterLabel())
+	resChGPUsAllocated := ctx.QueryAtTime(queryGPUsAllocated, end)
 
-	queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, offStr, env.GetPromClusterLabel())
-	resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
+	queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, env.GetPromClusterLabel())
+	resChNodeCostPerCPUHr := ctx.QueryAtTime(queryNodeCostPerCPUHr, end)
 
-	queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, offStr, env.GetPromClusterLabel())
-	resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
+	queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, env.GetPromClusterLabel())
+	resChNodeCostPerRAMGiBHr := ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end)
 
-	queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, offStr, env.GetPromClusterLabel())
-	resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
+	queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, env.GetPromClusterLabel())
+	resChNodeCostPerGPUHr := ctx.QueryAtTime(queryNodeCostPerGPUHr, end)
 
-	queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr, offStr)
-	resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
+	queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr)
+	resChNodeIsSpot := ctx.QueryAtTime(queryNodeIsSpot, end)
 
-	queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterLabel(), durStr, resStr, offStr)
-	resChPVCInfo := ctx.Query(queryPVCInfo)
+	queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterLabel(), durStr, resStr)
+	resChPVCInfo := ctx.QueryAtTime(queryPVCInfo, end)
 
-	queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, offStr, env.GetPromClusterLabel())
-	resChPVBytes := ctx.Query(queryPVBytes)
+	queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, env.GetPromClusterLabel())
+	resChPVBytes := ctx.QueryAtTime(queryPVBytes, end)
 
-	queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, offStr, env.GetPromClusterLabel())
-	resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
+	queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, env.GetPromClusterLabel())
+	resChPodPVCAllocation := ctx.QueryAtTime(queryPodPVCAllocation, end)
 
-	queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, offStr, env.GetPromClusterLabel())
-	resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
+	queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, env.GetPromClusterLabel())
+	resChPVCBytesRequested := ctx.QueryAtTime(queryPVCBytesRequested, end)
 
-	queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, offStr, env.GetPromClusterLabel())
-	resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
+	queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, env.GetPromClusterLabel())
+	resChPVCostPerGiBHour := ctx.QueryAtTime(queryPVCostPerGiBHour, end)
 
-	queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, durStr, offStr, env.GetPromClusterLabel())
-	resChNetTransferBytes := ctx.Query(queryNetTransferBytes)
+	queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, durStr, env.GetPromClusterLabel())
+	resChNetTransferBytes := ctx.QueryAtTime(queryNetTransferBytes, end)
 
-	queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, durStr, offStr, env.GetPromClusterLabel())
-	resChNetReceiveBytes := ctx.Query(queryNetReceiveBytes)
+	queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, durStr, env.GetPromClusterLabel())
+	resChNetReceiveBytes := ctx.QueryAtTime(queryNetReceiveBytes, end)
 
-	queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, offStr, env.GetPromClusterLabel())
-	resChNetZoneGiB := ctx.Query(queryNetZoneGiB)
+	queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, env.GetPromClusterLabel())
+	resChNetZoneGiB := ctx.QueryAtTime(queryNetZoneGiB, end)
 
-	queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
-	resChNetZoneCostPerGiB := ctx.Query(queryNetZoneCostPerGiB)
+	queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, env.GetPromClusterLabel())
+	resChNetZoneCostPerGiB := ctx.QueryAtTime(queryNetZoneCostPerGiB, end)
 
-	queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, offStr, env.GetPromClusterLabel())
-	resChNetRegionGiB := ctx.Query(queryNetRegionGiB)
+	queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, env.GetPromClusterLabel())
+	resChNetRegionGiB := ctx.QueryAtTime(queryNetRegionGiB, end)
 
-	queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
-	resChNetRegionCostPerGiB := ctx.Query(queryNetRegionCostPerGiB)
+	queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, env.GetPromClusterLabel())
+	resChNetRegionCostPerGiB := ctx.QueryAtTime(queryNetRegionCostPerGiB, end)
 
-	queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, offStr, env.GetPromClusterLabel())
-	resChNetInternetGiB := ctx.Query(queryNetInternetGiB)
+	queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, env.GetPromClusterLabel())
+	resChNetInternetGiB := ctx.QueryAtTime(queryNetInternetGiB, end)
 
-	queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
-	resChNetInternetCostPerGiB := ctx.Query(queryNetInternetCostPerGiB)
+	queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, env.GetPromClusterLabel())
+	resChNetInternetCostPerGiB := ctx.QueryAtTime(queryNetInternetCostPerGiB, end)
 
-	queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr, offStr)
-	resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
+	queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr)
+	resChNamespaceLabels := ctx.QueryAtTime(queryNamespaceLabels, end)
 
-	queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr, offStr)
-	resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
+	queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr)
+	resChNamespaceAnnotations := ctx.QueryAtTime(queryNamespaceAnnotations, end)
 
-	queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr, offStr)
-	resChPodLabels := ctx.Query(queryPodLabels)
+	queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr)
+	resChPodLabels := ctx.QueryAtTime(queryPodLabels, end)
 
-	queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr, offStr)
-	resChPodAnnotations := ctx.Query(queryPodAnnotations)
+	queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr)
+	resChPodAnnotations := ctx.QueryAtTime(queryPodAnnotations, end)
 
-	queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr, offStr)
-	resChServiceLabels := ctx.Query(queryServiceLabels)
+	queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr)
+	resChServiceLabels := ctx.QueryAtTime(queryServiceLabels, end)
 
-	queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr, offStr)
-	resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
+	queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr)
+	resChDeploymentLabels := ctx.QueryAtTime(queryDeploymentLabels, end)
 
-	queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr, offStr)
-	resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
+	queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr)
+	resChStatefulSetLabels := ctx.QueryAtTime(queryStatefulSetLabels, end)
 
-	queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, offStr, env.GetPromClusterLabel())
-	resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
+	queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, env.GetPromClusterLabel())
+	resChDaemonSetLabels := ctx.QueryAtTime(queryDaemonSetLabels, end)
 
-	queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, durStr, offStr, env.GetPromClusterLabel())
-	resChPodsWithReplicaSetOwner := ctx.Query(queryPodsWithReplicaSetOwner)
+	queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, durStr, env.GetPromClusterLabel())
+	resChPodsWithReplicaSetOwner := ctx.QueryAtTime(queryPodsWithReplicaSetOwner, end)
 
-	queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, durStr, offStr, env.GetPromClusterLabel())
-	resChReplicaSetsWithoutOwners := ctx.Query(queryReplicaSetsWithoutOwners)
+	queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, durStr, env.GetPromClusterLabel())
+	resChReplicaSetsWithoutOwners := ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end)
 
-	queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, offStr, env.GetPromClusterLabel())
-	resChJobLabels := ctx.Query(queryJobLabels)
+	queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, env.GetPromClusterLabel())
+	resChJobLabels := ctx.QueryAtTime(queryJobLabels, end)
 
-	queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, offStr, env.GetPromClusterLabel())
-	resChLBCostPerHr := ctx.Query(queryLBCostPerHr)
+	queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, env.GetPromClusterLabel())
+	resChLBCostPerHr := ctx.QueryAtTime(queryLBCostPerHr, end)
 
-	queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr, offStr)
-	resChLBActiveMins := ctx.Query(queryLBActiveMins)
+	queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr)
+	resChLBActiveMins := ctx.QueryAtTime(queryLBActiveMins, end)
 
 	resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
 	resCPURequests, _ := resChCPURequests.Await()
@@ -498,7 +659,7 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 					if pvcInterval, ok := pvcPodIntervalMap[pvcKey][podKey]; ok {
 						s, e = *pvcInterval.Start(), *pvcInterval.End()
 					} else {
-						log.Warningf("CostModel.ComputeAllocation: allocation %s and PVC %s have no associated active window", alloc.Name, pvc.Name)
+						log.Warnf("CostModel.ComputeAllocation: allocation %s and PVC %s have no associated active window", alloc.Name, pvc.Name)
 					}
 
 					minutes := e.Sub(s).Minutes()
@@ -516,7 +677,7 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 					if coeffComponents, ok := sharedPVCCostCoefficientMap[pvcKey][podKey]; ok {
 						cost *= getCoefficientFromComponents(coeffComponents)
 					} else {
-						log.Warningf("CostModel.ComputeAllocation: allocation %s and PVC %s have relation but no coeff", alloc.Name, pvc.Name)
+						log.Warnf("CostModel.ComputeAllocation: allocation %s and PVC %s have relation but no coeff", alloc.Name, pvc.Name)
 					}
 
 					// Apply the size and cost of the PV to the allocation, each
@@ -571,7 +732,6 @@ func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSiz
 		if batchEnd.After(end) {
 			batchEnd = end
 		}
-		batchWindow := kubecost.NewWindow(&batchStart, &batchEnd)
 
 		var resPods []*prom.QueryResult
 		var err error
@@ -580,10 +740,9 @@ func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSiz
 		for resPods == nil && numTries < maxTries {
 			numTries++
 
-			// Convert window (start, end) to (duration, offset) for querying Prometheus,
-			// including handling Thanos offset
-			durStr, offStr, err := batchWindow.DurationOffsetForPrometheus()
-			if err != nil || durStr == "" {
+			// Query for the duration between start and end
+			durStr := timeutil.DurationString(batchEnd.Sub(batchStart))
+			if durStr == "" {
 				// Negative duration, so set empty results and don't query
 				resPods = []*prom.QueryResult{}
 				err = nil
@@ -595,13 +754,13 @@ func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSiz
 			var queryPods string
 			// If ingesting UIDs, avg on them
 			if ingestPodUID {
-				queryPods = fmt.Sprintf(queryFmtPodsUID, env.GetPromClusterLabel(), durStr, resStr, offStr)
+				queryPods = fmt.Sprintf(queryFmtPodsUID, env.GetPromClusterLabel(), durStr, resStr)
 			} else {
-				queryPods = fmt.Sprintf(queryFmtPods, env.GetPromClusterLabel(), durStr, resStr, offStr)
+				queryPods = fmt.Sprintf(queryFmtPods, env.GetPromClusterLabel(), durStr, resStr)
 			}
 
 			queryProfile := time.Now()
-			resPods, err = ctx.Query(queryPods).Await()
+			resPods, err = ctx.QueryAtTime(queryPods, batchEnd).Await()
 			if err != nil {
 				log.Profile(queryProfile, fmt.Sprintf("CostModel.ComputeAllocation: pod query %d try %d failed: %s", numQuery, numTries, queryPods))
 				resPods = nil
@@ -645,7 +804,7 @@ func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSiz
 func applyPodResults(window kubecost.Window, resolution time.Duration, podMap map[podKey]*Pod, clusterStart, clusterEnd map[string]time.Time, resPods []*prom.QueryResult, ingestPodUID bool, podUIDKeyMap map[podKey][]podKey) {
 	for _, res := range resPods {
 		if len(res.Values) == 0 {
-			log.Warningf("CostModel.ComputeAllocation: empty minutes result")
+			log.Warnf("CostModel.ComputeAllocation: empty minutes result")
 			continue
 		}
 
@@ -656,7 +815,7 @@ func applyPodResults(window kubecost.Window, resolution time.Duration, podMap ma
 
 		labels, err := res.GetStrings("namespace", "pod")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
 			continue
 		}
 
@@ -844,7 +1003,7 @@ func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom
 
 			node, err := res.GetString("node")
 			if err != nil {
-				log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing 'node': %s", key)
+				log.Warnf("CostModel.ComputeAllocation: CPU allocation query result missing 'node': %s", key)
 				continue
 			}
 			pod.Allocations[container].Properties.Node = node
@@ -903,7 +1062,7 @@ func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom
 			}
 			node, err := res.GetString("node")
 			if err != nil {
-				log.Warningf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)
+				log.Warnf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)
 				continue
 			}
 			pod.Allocations[container].Properties.Node = node
@@ -1057,7 +1216,7 @@ func applyRAMBytesAllocated(podMap map[podKey]*Pod, resRAMBytesAllocated []*prom
 
 			node, err := res.GetString("node")
 			if err != nil {
-				log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing 'node': %s", key)
+				log.Warnf("CostModel.ComputeAllocation: RAM allocation query result missing 'node': %s", key)
 				continue
 			}
 			pod.Allocations[container].Properties.Node = node
@@ -1113,7 +1272,7 @@ func applyRAMBytesRequested(podMap map[podKey]*Pod, resRAMBytesRequested []*prom
 
 			node, err := res.GetString("node")
 			if err != nil {
-				log.Warningf("CostModel.ComputeAllocation: RAM request query result missing 'node': %s", key)
+				log.Warnf("CostModel.ComputeAllocation: RAM request query result missing 'node': %s", key)
 				continue
 			}
 			pod.Allocations[container].Properties.Node = node
@@ -1680,7 +1839,7 @@ func resToPodDaemonSetMap(resDaemonSetLabels []*prom.QueryResult, podUIDKeyMap m
 
 		pod, err := res.GetString("pod")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: DaemonSetLabel result without pod: %s", controllerKey)
+			log.Warnf("CostModel.ComputeAllocation: DaemonSetLabel result without pod: %s", controllerKey)
 		}
 
 		key := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
@@ -1723,7 +1882,7 @@ func resToPodJobMap(resJobLabels []*prom.QueryResult, podUIDKeyMap map[podKey][]
 
 		pod, err := res.GetString("pod")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
+			log.Warnf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
 		}
 
 		key := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
@@ -1779,7 +1938,7 @@ func resToPodReplicaSetMap(resPodsWithReplicaSetOwner []*prom.QueryResult, resRe
 
 		pod, err := res.GetString("pod")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: ReplicaSet result without pod: %s", controllerKey)
+			log.Warnf("CostModel.ComputeAllocation: ReplicaSet result without pod: %s", controllerKey)
 		}
 
 		key := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
@@ -1869,19 +2028,19 @@ func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerCPUHr
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
 			continue
 		}
 
 		instanceType, err := res.GetString("instance_type")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
 			continue
 		}
 
 		providerID, err := res.GetString("provider_id")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
 			continue
 		}
 
@@ -1907,19 +2066,19 @@ func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerRA
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
 			continue
 		}
 
 		instanceType, err := res.GetString("instance_type")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
 			continue
 		}
 
 		providerID, err := res.GetString("provider_id")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
 			continue
 		}
 
@@ -1945,19 +2104,19 @@ func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerGPUHr
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
 			continue
 		}
 
 		instanceType, err := res.GetString("instance_type")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
 			continue
 		}
 
 		providerID, err := res.GetString("provider_id")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
 			continue
 		}
 
@@ -1983,13 +2142,13 @@ func applyNodeSpot(nodeMap map[nodeKey]*NodePricing, resNodeIsSpot []*prom.Query
 
 		node, err := res.GetString("node")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
 			continue
 		}
 
 		key := newNodeKey(cluster, node)
 		if _, ok := nodeMap[key]; !ok {
-			log.Warningf("CostModel.ComputeAllocation: Node spot  query result for missing node: %s", key)
+			log.Warnf("CostModel.ComputeAllocation: Node spot  query result for missing node: %s", key)
 			continue
 		}
 
@@ -2037,7 +2196,7 @@ func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
 
 		name, err := res.GetString("volumename")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
+			log.Warnf("CostModel.ComputeAllocation: PV cost without volumename")
 			continue
 		}
 
@@ -2055,12 +2214,12 @@ func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
 	for _, res := range resPVBytes {
 		key, err := resultPVKey(res, env.GetPromClusterLabel(), "persistentvolume")
 		if err != nil {
-			log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
 			continue
 		}
 
 		if _, ok := pvMap[key]; !ok {
-			log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
+			log.Warnf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
 			continue
 		}
 
@@ -2105,7 +2264,7 @@ func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey
 			}
 		}
 		if pvcStart.IsZero() || pvcEnd.IsZero() {
-			log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
+			log.Warnf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
 		}
 		pvcStart = pvcStart.Add(-time.Minute)
 
@@ -2344,7 +2503,7 @@ func getLoadBalancerCosts(resLBCost, resLBActiveMins []*prom.QueryResult, resolu
 			continue
 		}
 		if _, ok := lbHourlyCosts[serviceKey]; !ok {
-			log.Warningf("CostModel: failed to find hourly cost for Load Balancer: %v", serviceKey)
+			log.Warnf("CostModel: failed to find hourly cost for Load Balancer: %v", serviceKey)
 			continue
 		}
 
@@ -2415,7 +2574,7 @@ func (cm *CostModel) getNodePricing(nodeMap map[nodeKey]*NodePricing, nodeKey no
 	// node pricing with the custom values.
 	customPricingConfig, err := cm.Provider.GetConfig()
 	if err != nil {
-		log.Warningf("CostModel: failed to load custom pricing: %s", err)
+		log.Warnf("CostModel: failed to load custom pricing: %s", err)
 	}
 	if cloud.CustomPricesEnabled(cm.Provider) && customPricingConfig != nil {
 		return cm.getCustomNodePricing(node.Preemptible)
@@ -2429,42 +2588,42 @@ func (cm *CostModel) getNodePricing(nodeMap map[nodeKey]*NodePricing, nodeKey no
 	// them as strings like this?
 
 	if node.CostPerCPUHr == 0 || math.IsNaN(node.CostPerCPUHr) {
-		log.Warningf("CostModel: node pricing has illegal CostPerCPUHr; replacing with custom pricing: %s", nodeKey)
+		log.Warnf("CostModel: node pricing has illegal CostPerCPUHr; replacing with custom pricing: %s", nodeKey)
 		cpuCostStr := customPricingConfig.CPU
 		if node.Preemptible {
 			cpuCostStr = customPricingConfig.SpotCPU
 		}
 		costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
 		if err != nil {
-			log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
+			log.Warnf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
 		}
 		node.CostPerCPUHr = costPerCPUHr
 		node.Source += "/customCPU"
 	}
 
 	if math.IsNaN(node.CostPerGPUHr) {
-		log.Warningf("CostModel: node pricing has illegal CostPerGPUHr; replacing with custom pricing: %s", nodeKey)
+		log.Warnf("CostModel: node pricing has illegal CostPerGPUHr; replacing with custom pricing: %s", nodeKey)
 		gpuCostStr := customPricingConfig.GPU
 		if node.Preemptible {
 			gpuCostStr = customPricingConfig.SpotGPU
 		}
 		costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
 		if err != nil {
-			log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
+			log.Warnf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
 		}
 		node.CostPerGPUHr = costPerGPUHr
 		node.Source += "/customGPU"
 	}
 
 	if node.CostPerRAMGiBHr == 0 || math.IsNaN(node.CostPerRAMGiBHr) {
-		log.Warningf("CostModel: node pricing has illegal CostPerRAMHr; replacing with custom pricing: %s", nodeKey)
+		log.Warnf("CostModel: node pricing has illegal CostPerRAMHr; replacing with custom pricing: %s", nodeKey)
 		ramCostStr := customPricingConfig.RAM
 		if node.Preemptible {
 			ramCostStr = customPricingConfig.SpotRAM
 		}
 		costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
 		if err != nil {
-			log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
+			log.Warnf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
 		}
 		node.CostPerRAMGiBHr = costPerRAMHr
 		node.Source += "/customRAM"
@@ -2494,19 +2653,19 @@ func (cm *CostModel) getCustomNodePricing(spot bool) *NodePricing {
 
 	costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
 	if err != nil {
-		log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
+		log.Warnf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
 	}
 	node.CostPerCPUHr = costPerCPUHr
 
 	costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
 	if err != nil {
-		log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
+		log.Warnf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
 	}
 	node.CostPerGPUHr = costPerGPUHr
 
 	costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
 	if err != nil {
-		log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
+		log.Warnf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
 	}
 	node.CostPerRAMGiBHr = costPerRAMHr
 

+ 157 - 125
pkg/costmodel/cluster.go

@@ -5,6 +5,7 @@ import (
 	"strconv"
 	"time"
 
+	"github.com/kubecost/cost-model/pkg/kubecost"
 	"github.com/kubecost/cost-model/pkg/util/timeutil"
 
 	"github.com/kubecost/cost-model/pkg/cloud"
@@ -13,7 +14,6 @@ import (
 	"github.com/kubecost/cost-model/pkg/prom"
 
 	prometheus "github.com/prometheus/client_golang/api"
-	"k8s.io/klog"
 )
 
 const (
@@ -118,13 +118,21 @@ type Disk struct {
 	Breakdown  *ClusterCostsBreakdown
 }
 
-func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, offset time.Duration) (map[string]*Disk, error) {
-	durationStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
-	offsetStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
-	if offset < time.Minute {
-		offsetStr = ""
+type DiskIdentifier struct {
+	Cluster string
+	Name    string
+}
+
+func ClusterDisks(client prometheus.Client, provider cloud.Provider, start, end time.Time) (map[DiskIdentifier]*Disk, error) {
+	// Query for the duration between start and end
+	durStr := timeutil.DurationString(end.Sub(start))
+	if durStr == "" {
+		return nil, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))
 	}
 
+	// Start from the time "end", querying backwards
+	t := end
+
 	// minsPerResolution determines accuracy and resource use for the following
 	// queries. Smaller values (higher resolution) result in better accuracy,
 	// but more expensive queries, and vice-a-versa.
@@ -140,22 +148,22 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 	costPerGBHr := 0.04 / 730.0
 
 	ctx := prom.NewNamedContext(client, prom.ClusterContextName)
-	queryPVCost := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost[%s]%s)) by (%s, persistentvolume,provider_id)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryPVSize := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (%s, persistentvolume)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryActiveMins := fmt.Sprintf(`count(pv_hourly_cost) by (%s, persistentvolume)[%s:%dm]%s`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr)
-
-	queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, hourlyToCumulative, costPerGBHr)
-	queryLocalStorageUsedCost := fmt.Sprintf(`sum_over_time(sum(container_fs_usage_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]%s) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, hourlyToCumulative, costPerGBHr)
-	queryLocalStorageBytes := fmt.Sprintf(`avg_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]%s)`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr)
-	queryLocalActiveMins := fmt.Sprintf(`count(node_total_hourly_cost) by (%s, node)[%s:%dm]%s`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr)
-
-	resChPVCost := ctx.Query(queryPVCost)
-	resChPVSize := ctx.Query(queryPVSize)
-	resChActiveMins := ctx.Query(queryActiveMins)
-	resChLocalStorageCost := ctx.Query(queryLocalStorageCost)
-	resChLocalStorageUsedCost := ctx.Query(queryLocalStorageUsedCost)
-	resChLocalStorageBytes := ctx.Query(queryLocalStorageBytes)
-	resChLocalActiveMins := ctx.Query(queryLocalActiveMins)
+	queryPVCost := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost[%s])) by (%s, persistentvolume,provider_id)`, durStr, env.GetPromClusterLabel())
+	queryPVSize := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s])) by (%s, persistentvolume)`, durStr, env.GetPromClusterLabel())
+	queryActiveMins := fmt.Sprintf(`count(pv_hourly_cost) by (%s, persistentvolume)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+
+	queryLocalStorageCost := fmt.Sprintf(`sum_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
+	queryLocalStorageUsedCost := fmt.Sprintf(`sum_over_time(sum(container_fs_usage_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`, env.GetPromClusterLabel(), durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
+	queryLocalStorageBytes := fmt.Sprintf(`avg_over_time(sum(container_fs_limit_bytes{device!="tmpfs", id="/"}) by (instance, %s)[%s:%dm])`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryLocalActiveMins := fmt.Sprintf(`count(node_total_hourly_cost) by (%s, node)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+
+	resChPVCost := ctx.QueryAtTime(queryPVCost, t)
+	resChPVSize := ctx.QueryAtTime(queryPVSize, t)
+	resChActiveMins := ctx.QueryAtTime(queryActiveMins, t)
+	resChLocalStorageCost := ctx.QueryAtTime(queryLocalStorageCost, t)
+	resChLocalStorageUsedCost := ctx.QueryAtTime(queryLocalStorageUsedCost, t)
+	resChLocalStorageBytes := ctx.QueryAtTime(queryLocalStorageBytes, t)
+	resChLocalActiveMins := ctx.QueryAtTime(queryLocalActiveMins, t)
 
 	resPVCost, _ := resChPVCost.Await()
 	resPVSize, _ := resChPVSize.Await()
@@ -168,7 +176,7 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 		return nil, ctx.ErrorCollection()
 	}
 
-	diskMap := map[string]*Disk{}
+	diskMap := map[DiskIdentifier]*Disk{}
 
 	pvCosts(diskMap, resolution, resActiveMins, resPVSize, resPVCost, provider)
 
@@ -180,12 +188,12 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterDisks: local storage data missing instance")
+			log.Warnf("ClusterDisks: local storage data missing instance")
 			continue
 		}
 
 		cost := result.Values[0].Value
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
 				Cluster:   cluster,
@@ -205,12 +213,12 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterDisks: local storage usage data missing instance")
+			log.Warnf("ClusterDisks: local storage usage data missing instance")
 			continue
 		}
 
 		cost := result.Values[0].Value
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
 				Cluster:   cluster,
@@ -230,12 +238,12 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterDisks: local storage data missing instance")
+			log.Warnf("ClusterDisks: local storage data missing instance")
 			continue
 		}
 
 		bytes := result.Values[0].Value
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
 				Cluster:   cluster,
@@ -263,7 +271,7 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 			continue
 		}
 
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			log.DedupedWarningf(5, "ClusterDisks: local active mins for unidentified disk or disk deleted from analysis")
 			continue
@@ -274,7 +282,7 @@ func ClusterDisks(client prometheus.Client, provider cloud.Provider, duration, o
 		}
 
 		s := time.Unix(int64(result.Values[0].Timestamp), 0)
-		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
+		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0)
 		mins := e.Sub(s).Minutes()
 
 		// TODO niko/assets if mins >= threshold, interpolate for missing data?
@@ -369,13 +377,16 @@ func costTimesMinute(activeDataMap map[NodeIdentifier]activeData, costMap map[No
 	}
 }
 
-func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset time.Duration) (map[NodeIdentifier]*Node, error) {
-	durationStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
-	offsetStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
-	if offset < time.Minute {
-		offsetStr = ""
+func ClusterNodes(cp cloud.Provider, client prometheus.Client, start, end time.Time) (map[NodeIdentifier]*Node, error) {
+	// Query for the duration between start and end
+	durStr := timeutil.DurationString(end.Sub(start))
+	if durStr == "" {
+		return nil, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))
 	}
 
+	// Start from the time "end", querying backwards
+	t := end
+
 	// minsPerResolution determines accuracy and resource use for the following
 	// queries. Smaller values (higher resolution) result in better accuracy,
 	// but more expensive queries, and vice-a-versa.
@@ -385,34 +396,34 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 	requiredCtx := prom.NewNamedContext(client, prom.ClusterContextName)
 	optionalCtx := prom.NewNamedContext(client, prom.ClusterOptionalContextName)
 
-	queryNodeCPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (%s, node, instance_type, provider_id)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryNodeCPUCores := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s]%s)) by (%s, node)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryNodeRAMHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (%s, node, instance_type, provider_id) / 1024 / 1024 / 1024`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryNodeRAMBytes := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s]%s)) by (%s, node)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryNodeGPUCount := fmt.Sprintf(`avg(avg_over_time(node_gpu_count[%s]%s)) by (%s, node, provider_id)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryNodeGPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (%s, node, instance_type, provider_id)`, durationStr, offsetStr, env.GetPromClusterLabel())
-	queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm]%s)) by (kubernetes_node, %s, mode)`, durationStr, minsPerResolution, offsetStr, env.GetPromClusterLabel())
-	queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, durationStr, minsPerResolution, offsetStr, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryNodeRAMUserPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system"}[%s:%dm]%s)) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, durationStr, minsPerResolution, offsetStr, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, env.GetPromClusterLabel(), env.GetPromClusterLabel())
-	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost) by (node, %s, provider_id)[%s:%dm]%s`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr)
-	queryIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
-	queryLabels := fmt.Sprintf(`count_over_time(kube_node_labels[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
+	queryNodeCPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
+	queryNodeCPUCores := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
+	queryNodeRAMHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s])) by (%s, node, instance_type, provider_id) / 1024 / 1024 / 1024`, durStr, env.GetPromClusterLabel())
+	queryNodeRAMBytes := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
+	queryNodeGPUCount := fmt.Sprintf(`avg(avg_over_time(node_gpu_count[%s])) by (%s, node, provider_id)`, durStr, env.GetPromClusterLabel())
+	queryNodeGPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
+	queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm])) by (kubernetes_node, %s, mode)`, durStr, minsPerResolution, env.GetPromClusterLabel())
+	queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[%s:%dm])) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm])) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, durStr, minsPerResolution, env.GetPromClusterLabel(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryNodeRAMUserPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system"}[%s:%dm])) by (instance, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm])) by (node, %s), "instance", "$1", "node", "(.*)")) by (instance, %s)`, durStr, minsPerResolution, env.GetPromClusterLabel(), durStr, minsPerResolution, env.GetPromClusterLabel(), env.GetPromClusterLabel())
+	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost) by (node, %s, provider_id)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+	queryIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s:%dm])`, durStr, minsPerResolution)
+	queryLabels := fmt.Sprintf(`count_over_time(kube_node_labels[%s:%dm])`, durStr, minsPerResolution)
 
 	// Return errors if these fail
-	resChNodeCPUHourlyCost := requiredCtx.Query(queryNodeCPUHourlyCost)
-	resChNodeCPUCores := requiredCtx.Query(queryNodeCPUCores)
-	resChNodeRAMHourlyCost := requiredCtx.Query(queryNodeRAMHourlyCost)
-	resChNodeRAMBytes := requiredCtx.Query(queryNodeRAMBytes)
-	resChNodeGPUCount := requiredCtx.Query(queryNodeGPUCount)
-	resChNodeGPUHourlyCost := requiredCtx.Query(queryNodeGPUHourlyCost)
-	resChActiveMins := requiredCtx.Query(queryActiveMins)
-	resChIsSpot := requiredCtx.Query(queryIsSpot)
+	resChNodeCPUHourlyCost := requiredCtx.QueryAtTime(queryNodeCPUHourlyCost, t)
+	resChNodeCPUCores := requiredCtx.QueryAtTime(queryNodeCPUCores, t)
+	resChNodeRAMHourlyCost := requiredCtx.QueryAtTime(queryNodeRAMHourlyCost, t)
+	resChNodeRAMBytes := requiredCtx.QueryAtTime(queryNodeRAMBytes, t)
+	resChNodeGPUCount := requiredCtx.QueryAtTime(queryNodeGPUCount, t)
+	resChNodeGPUHourlyCost := requiredCtx.QueryAtTime(queryNodeGPUHourlyCost, t)
+	resChActiveMins := requiredCtx.QueryAtTime(queryActiveMins, t)
+	resChIsSpot := requiredCtx.QueryAtTime(queryIsSpot, t)
 
 	// Do not return errors if these fail, but log warnings
-	resChNodeCPUModeTotal := optionalCtx.Query(queryNodeCPUModeTotal)
-	resChNodeRAMSystemPct := optionalCtx.Query(queryNodeRAMSystemPct)
-	resChNodeRAMUserPct := optionalCtx.Query(queryNodeRAMUserPct)
-	resChLabels := optionalCtx.Query(queryLabels)
+	resChNodeCPUModeTotal := optionalCtx.QueryAtTime(queryNodeCPUModeTotal, t)
+	resChNodeRAMSystemPct := optionalCtx.QueryAtTime(queryNodeRAMSystemPct, t)
+	resChNodeRAMUserPct := optionalCtx.QueryAtTime(queryNodeRAMUserPct, t)
+	resChLabels := optionalCtx.QueryAtTime(queryLabels, t)
 
 	resNodeCPUHourlyCost, _ := resChNodeCPUHourlyCost.Await()
 	resNodeCPUCores, _ := resChNodeCPUCores.Await()
@@ -429,7 +440,7 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 
 	if optionalCtx.HasErrors() {
 		for _, err := range optionalCtx.Errors() {
-			log.Warningf("ClusterNodes: %s", err)
+			log.Warnf("ClusterNodes: %s", err)
 		}
 	}
 	if requiredCtx.HasErrors() {
@@ -475,6 +486,7 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 		preemptibleMap,
 		labelsMap,
 		clusterAndNameToType,
+		resolution,
 	)
 
 	c, err := cp.GetConfig()
@@ -504,38 +516,45 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 	return nodeMap, nil
 }
 
+type LoadBalancerIdentifier struct {
+	Cluster   string
+	Namespace string
+	Name      string
+}
+
 type LoadBalancer struct {
 	Cluster    string
+	Namespace  string
 	Name       string
 	ProviderID string
 	Cost       float64
 	Start      time.Time
+	End        time.Time
 	Minutes    float64
 }
 
-func ClusterLoadBalancers(client prometheus.Client, duration, offset time.Duration) (map[string]*LoadBalancer, error) {
-	durationStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
-	offsetStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
-	if offset < time.Minute {
-		offsetStr = ""
+func ClusterLoadBalancers(client prometheus.Client, start, end time.Time) (map[LoadBalancerIdentifier]*LoadBalancer, error) {
+	// Query for the duration between start and end
+	durStr := timeutil.DurationString(end.Sub(start))
+	if durStr == "" {
+		return nil, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))
 	}
 
+	// Start from the time "end", querying backwards
+	t := end
+
 	// minsPerResolution determines accuracy and resource use for the following
 	// queries. Smaller values (higher resolution) result in better accuracy,
 	// but more expensive queries, and vice-a-versa.
-	minsPerResolution := 5
-
-	// hourlyToCumulative is a scaling factor that, when multiplied by an hourly
-	// value, converts it to a cumulative value; i.e.
-	// [$/hr] * [min/res]*[hr/min] = [$/res]
-	hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
+	minsPerResolution := 1
 
 	ctx := prom.NewNamedContext(client, prom.ClusterContextName)
-	queryLBCost := fmt.Sprintf(`sum_over_time((avg(kubecost_load_balancer_cost) by (namespace, service_name, %s, ingress_ip))[%s:%dm]%s) * %f`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr, hourlyToCumulative)
-	queryActiveMins := fmt.Sprintf(`count(kubecost_load_balancer_cost) by (namespace, service_name, %s, ingress_ip)[%s:%dm]%s`, env.GetPromClusterLabel(), durationStr, minsPerResolution, offsetStr)
 
-	resChLBCost := ctx.Query(queryLBCost)
-	resChActiveMins := ctx.Query(queryActiveMins)
+	queryLBCost := fmt.Sprintf(`avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s, ingress_ip)`, durStr, env.GetPromClusterLabel())
+	queryActiveMins := fmt.Sprintf(`avg(kubecost_load_balancer_cost) by (namespace, service_name, %s, ingress_ip)[%s:%dm]`, env.GetPromClusterLabel(), durStr, minsPerResolution)
+
+	resChLBCost := ctx.QueryAtTime(queryLBCost, t)
+	resChActiveMins := ctx.QueryAtTime(queryActiveMins, t)
 
 	resLBCost, _ := resChLBCost.Await()
 	resActiveMins, _ := resChActiveMins.Await()
@@ -544,21 +563,21 @@ func ClusterLoadBalancers(client prometheus.Client, duration, offset time.Durati
 		return nil, ctx.ErrorCollection()
 	}
 
-	loadBalancerMap := map[string]*LoadBalancer{}
+	loadBalancerMap := make(map[LoadBalancerIdentifier]*LoadBalancer, len(resActiveMins))
 
-	for _, result := range resLBCost {
+	for _, result := range resActiveMins {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
 		}
 		namespace, err := result.GetString("namespace")
 		if err != nil {
-			log.Warningf("ClusterLoadBalancers: LB cost data missing namespace")
+			log.Warnf("ClusterLoadBalancers: LB cost data missing namespace")
 			continue
 		}
-		serviceName, err := result.GetString("service_name")
+		name, err := result.GetString("service_name")
 		if err != nil {
-			log.Warningf("ClusterLoadBalancers: LB cost data missing service_name")
+			log.Warnf("ClusterLoadBalancers: LB cost data missing service_name")
 			continue
 		}
 		providerID, err := result.GetString("ingress_ip")
@@ -566,56 +585,74 @@ func ClusterLoadBalancers(client prometheus.Client, duration, offset time.Durati
 			log.DedupedWarningf(5, "ClusterLoadBalancers: LB cost data missing ingress_ip")
 			providerID = ""
 		}
-		lbCost := result.Values[0].Value
 
-		key := fmt.Sprintf("%s/%s/%s", cluster, namespace, serviceName)
+		key := LoadBalancerIdentifier{
+			Cluster:   cluster,
+			Namespace: namespace,
+			Name:      name,
+		}
+
+		// Skip if there are no data
+		if len(result.Values) == 0 {
+			continue
+		}
+
+		// Add load balancer to the set of load balancers
 		if _, ok := loadBalancerMap[key]; !ok {
 			loadBalancerMap[key] = &LoadBalancer{
 				Cluster:    cluster,
-				Name:       namespace + "/" + serviceName,
+				Namespace:  namespace,
+				Name:       fmt.Sprintf("%s/%s", namespace, name), // TODO:ETL this is kept for backwards-compatibility, but not good
 				ProviderID: cloud.ParseLBID(providerID),
 			}
 		}
+
+		// Append start, end, and minutes. This should come before all other data.
+		s := time.Unix(int64(result.Values[0].Timestamp), 0)
+		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0)
+		loadBalancerMap[key].Start = s
+		loadBalancerMap[key].End = e
+		loadBalancerMap[key].Minutes = e.Sub(s).Minutes()
+
 		// Fill in Provider ID if it is available and missing in the loadBalancerMap
 		// Prevents there from being a duplicate LoadBalancers on the same day
 		if providerID != "" && loadBalancerMap[key].ProviderID == "" {
 			loadBalancerMap[key].ProviderID = providerID
 		}
-		loadBalancerMap[key].Cost += lbCost
 	}
 
-	for _, result := range resActiveMins {
+	for _, result := range resLBCost {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
 			cluster = env.GetClusterID()
 		}
 		namespace, err := result.GetString("namespace")
 		if err != nil {
-			log.Warningf("ClusterLoadBalancers: LB cost data missing namespace")
+			log.Warnf("ClusterLoadBalancers: LB cost data missing namespace")
 			continue
 		}
-		serviceName, err := result.GetString("service_name")
+		name, err := result.GetString("service_name")
 		if err != nil {
-			log.Warningf("ClusterLoadBalancers: LB cost data missing service_name")
+			log.Warnf("ClusterLoadBalancers: LB cost data missing service_name")
 			continue
 		}
-		key := fmt.Sprintf("%s/%s/%s", cluster, namespace, serviceName)
 
-		if len(result.Values) == 0 {
-			continue
+		key := LoadBalancerIdentifier{
+			Cluster:   cluster,
+			Namespace: namespace,
+			Name:      name,
 		}
 
+		// Apply cost as price-per-hour * hours
 		if lb, ok := loadBalancerMap[key]; ok {
-			s := time.Unix(int64(result.Values[0].Timestamp), 0)
-			e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0)
-			mins := e.Sub(s).Minutes()
-
-			lb.Start = s
-			lb.Minutes = mins
+			lbPricePerHr := result.Values[0].Value
+			hrs := lb.Minutes / 60.0
+			lb.Cost += lbPricePerHr * hrs
 		} else {
 			log.DedupedWarningf(20, "ClusterLoadBalancers: found minutes for key that does not exist: %s", key)
 		}
 	}
+
 	return loadBalancerMap, nil
 }
 
@@ -770,7 +807,7 @@ func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider cloud.
 		if len(result.Values) > 0 {
 			dataMins = result.Values[0].Value
 		} else {
-			klog.V(3).Infof("[Warning] cluster cost data count returned no results for cluster %s", clusterID)
+			log.Warnf("Cluster cost data count returned no results for cluster %s", clusterID)
 		}
 		dataMinsByCluster[clusterID] = dataMins
 	}
@@ -846,7 +883,7 @@ func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider cloud.
 
 			mode, err := result.GetString("mode")
 			if err != nil {
-				klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
+				log.Warnf("ComputeClusterCosts: unable to read CPU mode: %s", err)
 				mode = "other"
 			}
 
@@ -920,11 +957,11 @@ func (a *Accesses) ComputeClusterCosts(client prometheus.Client, provider cloud.
 		dataMins, ok := dataMinsByCluster[id]
 		if !ok {
 			dataMins = mins
-			klog.V(3).Infof("[Warning] cluster cost data count not found for cluster %s", id)
+			log.Warnf("Cluster cost data count not found for cluster %s", id)
 		}
 		costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"]+cd["localstorage"], window, offset, dataMins/timeutil.MinsPerHour)
 		if err != nil {
-			klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
+			log.Warnf("Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
 			return nil, err
 		}
 
@@ -983,19 +1020,19 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 
 	start, err := time.Parse(layout, startString)
 	if err != nil {
-		klog.V(1).Infof("Error parsing time %s. Error: %s", startString, err.Error())
+		log.Errorf("Error parsing time %s. Error: %s", startString, err.Error())
 		return nil, err
 	}
 	end, err := time.Parse(layout, endString)
 	if err != nil {
-		klog.V(1).Infof("Error parsing time %s. Error: %s", endString, err.Error())
+		log.Errorf("Error parsing time %s. Error: %s", endString, err.Error())
 		return nil, err
 	}
 	fmtWindow := timeutil.DurationString(window)
 
 	if fmtWindow == "" {
 		err := fmt.Errorf("window value invalid or missing")
-		klog.V(1).Infof("Error parsing time %v. Error: %s", window, err.Error())
+		log.Errorf("Error parsing time %v. Error: %s", window, err.Error())
 		return nil, err
 	}
 
@@ -1034,19 +1071,19 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 
 	coreTotal, err := resultToTotals(resultClusterCores)
 	if err != nil {
-		klog.Infof("[Warning] ClusterCostsOverTime: no cpu data: %s", err)
+		log.Infof("[Warning] ClusterCostsOverTime: no cpu data: %s", err)
 		return nil, err
 	}
 
 	ramTotal, err := resultToTotals(resultClusterRAM)
 	if err != nil {
-		klog.Infof("[Warning] ClusterCostsOverTime: no ram data: %s", err)
+		log.Infof("[Warning] ClusterCostsOverTime: no ram data: %s", err)
 		return nil, err
 	}
 
 	storageTotal, err := resultToTotals(resultStorage)
 	if err != nil {
-		klog.Infof("[Warning] ClusterCostsOverTime: no storage data: %s", err)
+		log.Infof("[Warning] ClusterCostsOverTime: no storage data: %s", err)
 	}
 
 	clusterTotal, err := resultToTotals(resultTotal)
@@ -1058,7 +1095,7 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 
 		resultNodes, warnings, err := ctx.QueryRangeSync(qNodes, start, end, window)
 		for _, warning := range warnings {
-			log.Warningf(warning)
+			log.Warnf(warning)
 		}
 		if err != nil {
 			return nil, err
@@ -1066,7 +1103,7 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 
 		clusterTotal, err = resultToTotals(resultNodes)
 		if err != nil {
-			klog.Infof("[Warning] ClusterCostsOverTime: no node data: %s", err)
+			log.Infof("[Warning] ClusterCostsOverTime: no node data: %s", err)
 			return nil, err
 		}
 	}
@@ -1079,7 +1116,7 @@ func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startS
 	}, nil
 }
 
-func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins, resPVSize, resPVCost []*prom.QueryResult, cp cloud.Provider) {
+func pvCosts(diskMap map[DiskIdentifier]*Disk, resolution time.Duration, resActiveMins, resPVSize, resPVCost []*prom.QueryResult, cp cloud.Provider) {
 	for _, result := range resActiveMins {
 		cluster, err := result.GetString(env.GetPromClusterLabel())
 		if err != nil {
@@ -1088,7 +1125,7 @@ func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins,
 
 		name, err := result.GetString("persistentvolume")
 		if err != nil {
-			log.Warningf("ClusterDisks: active mins missing pv name")
+			log.Warnf("ClusterDisks: active mins missing pv name")
 			continue
 		}
 
@@ -1096,7 +1133,7 @@ func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins,
 			continue
 		}
 
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
 				Cluster:   cluster,
@@ -1105,7 +1142,7 @@ func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins,
 			}
 		}
 		s := time.Unix(int64(result.Values[0].Timestamp), 0)
-		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
+		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0)
 		mins := e.Sub(s).Minutes()
 
 		// TODO niko/assets if mins >= threshold, interpolate for missing data?
@@ -1123,14 +1160,14 @@ func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins,
 
 		name, err := result.GetString("persistentvolume")
 		if err != nil {
-			log.Warningf("ClusterDisks: PV size data missing persistentvolume")
+			log.Warnf("ClusterDisks: PV size data missing persistentvolume")
 			continue
 		}
 
 		// TODO niko/assets storage class
 
 		bytes := result.Values[0].Value
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
 				Cluster:   cluster,
@@ -1144,7 +1181,7 @@ func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins,
 	customPricingEnabled := cloud.CustomPricesEnabled(cp)
 	customPricingConfig, err := cp.GetConfig()
 	if err != nil {
-		log.Warningf("ClusterDisks: failed to load custom pricing: %s", err)
+		log.Warnf("ClusterDisks: failed to load custom pricing: %s", err)
 	}
 
 	for _, result := range resPVCost {
@@ -1155,32 +1192,27 @@ func pvCosts(diskMap map[string]*Disk, resolution time.Duration, resActiveMins,
 
 		name, err := result.GetString("persistentvolume")
 		if err != nil {
-			log.Warningf("ClusterDisks: PV cost data missing persistentvolume")
+			log.Warnf("ClusterDisks: PV cost data missing persistentvolume")
 			continue
 		}
 
 		// TODO niko/assets storage class
 
 		var cost float64
-
 		if customPricingEnabled && customPricingConfig != nil {
-
 			customPVCostStr := customPricingConfig.Storage
 
 			customPVCost, err := strconv.ParseFloat(customPVCostStr, 64)
 			if err != nil {
-				log.Warningf("ClusterDisks: error parsing custom PV price: %s", customPVCostStr)
+				log.Warnf("ClusterDisks: error parsing custom PV price: %s", customPVCostStr)
 			}
 
 			cost = customPVCost
-
 		} else {
-
 			cost = result.Values[0].Value
-
 		}
 
-		key := fmt.Sprintf("%s/%s", cluster, name)
+		key := DiskIdentifier{cluster, name}
 		if _, ok := diskMap[key]; !ok {
 			diskMap[key] = &Disk{
 				Cluster:   cluster,

+ 20 - 19
pkg/costmodel/cluster_helpers.go

@@ -43,7 +43,7 @@ func buildCPUCostMap(
 	customPricingEnabled := cloud.CustomPricesEnabled(cp)
 	customPricingConfig, err := cp.GetConfig()
 	if err != nil {
-		log.Warningf("ClusterNodes: failed to load custom pricing: %s", err)
+		log.Warnf("ClusterNodes: failed to load custom pricing: %s", err)
 	}
 
 	for _, result := range resNodeCPUCost {
@@ -54,7 +54,7 @@ func buildCPUCostMap(
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: CPU cost data missing node")
+			log.Warnf("ClusterNodes: CPU cost data missing node")
 			continue
 		}
 
@@ -84,7 +84,7 @@ func buildCPUCostMap(
 
 			customCPUCost, err := strconv.ParseFloat(customCPUStr, 64)
 			if err != nil {
-				log.Warningf("ClusterNodes: error parsing custom CPU price: %s", customCPUStr)
+				log.Warnf("ClusterNodes: error parsing custom CPU price: %s", customCPUStr)
 			}
 			cpuCost = customCPUCost
 
@@ -117,7 +117,7 @@ func buildRAMCostMap(
 	customPricingEnabled := cloud.CustomPricesEnabled(cp)
 	customPricingConfig, err := cp.GetConfig()
 	if err != nil {
-		log.Warningf("ClusterNodes: failed to load custom pricing: %s", err)
+		log.Warnf("ClusterNodes: failed to load custom pricing: %s", err)
 	}
 
 	for _, result := range resNodeRAMCost {
@@ -128,7 +128,7 @@ func buildRAMCostMap(
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: RAM cost data missing node")
+			log.Warnf("ClusterNodes: RAM cost data missing node")
 			continue
 		}
 
@@ -158,7 +158,7 @@ func buildRAMCostMap(
 
 			customRAMCost, err := strconv.ParseFloat(customRAMStr, 64)
 			if err != nil {
-				log.Warningf("ClusterNodes: error parsing custom RAM price: %s", customRAMStr)
+				log.Warnf("ClusterNodes: error parsing custom RAM price: %s", customRAMStr)
 			}
 			ramCost = customRAMCost / 1024 / 1024 / 1024
 
@@ -192,7 +192,7 @@ func buildGPUCostMap(
 	customPricingEnabled := cloud.CustomPricesEnabled(cp)
 	customPricingConfig, err := cp.GetConfig()
 	if err != nil {
-		log.Warningf("ClusterNodes: failed to load custom pricing: %s", err)
+		log.Warnf("ClusterNodes: failed to load custom pricing: %s", err)
 	}
 
 	for _, result := range resNodeGPUCost {
@@ -203,7 +203,7 @@ func buildGPUCostMap(
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: GPU cost data missing node")
+			log.Warnf("ClusterNodes: GPU cost data missing node")
 			continue
 		}
 
@@ -233,7 +233,7 @@ func buildGPUCostMap(
 
 			customGPUCost, err := strconv.ParseFloat(customGPUStr, 64)
 			if err != nil {
-				log.Warningf("ClusterNodes: error parsing custom GPU price: %s", customGPUStr)
+				log.Warnf("ClusterNodes: error parsing custom GPU price: %s", customGPUStr)
 			}
 			gpuCost = customGPUCost
 
@@ -271,7 +271,7 @@ func buildGPUCountMap(
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: GPU count data missing node")
+			log.Warnf("ClusterNodes: GPU count data missing node")
 			continue
 		}
 
@@ -303,7 +303,7 @@ func buildCPUCoresMap(
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: CPU cores data missing node")
+			log.Warnf("ClusterNodes: CPU cores data missing node")
 			continue
 		}
 
@@ -331,7 +331,7 @@ func buildRAMBytesMap(resNodeRAMBytes []*prom.QueryResult) map[nodeIdentifierNoP
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: RAM bytes data missing node")
+			log.Warnf("ClusterNodes: RAM bytes data missing node")
 			continue
 		}
 
@@ -373,7 +373,7 @@ func buildCPUBreakdownMap(resNodeCPUModeTotal []*prom.QueryResult) map[nodeIdent
 
 		mode, err := result.GetString("mode")
 		if err != nil {
-			log.Warningf("ClusterNodes: unable to read CPU mode: %s", err)
+			log.Warnf("ClusterNodes: unable to read CPU mode: %s", err)
 			mode = "other"
 		}
 
@@ -437,7 +437,7 @@ func buildRAMUserPctMap(resNodeRAMUserPct []*prom.QueryResult) map[nodeIdentifie
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterNodes: RAM user percent missing node")
+			log.Warnf("ClusterNodes: RAM user percent missing node")
 			continue
 		}
 
@@ -466,7 +466,7 @@ func buildRAMSystemPctMap(resNodeRAMSystemPct []*prom.QueryResult) map[nodeIdent
 
 		name, err := result.GetString("instance")
 		if err != nil {
-			log.Warningf("ClusterNodes: RAM system percent missing node")
+			log.Warnf("ClusterNodes: RAM system percent missing node")
 			continue
 		}
 
@@ -501,7 +501,7 @@ func buildActiveDataMap(resActiveMins []*prom.QueryResult, resolution time.Durat
 
 		name, err := result.GetString("node")
 		if err != nil {
-			log.Warningf("ClusterNodes: active mins missing node")
+			log.Warnf("ClusterNodes: active mins missing node")
 			continue
 		}
 
@@ -518,7 +518,7 @@ func buildActiveDataMap(resActiveMins []*prom.QueryResult, resolution time.Durat
 		}
 
 		s := time.Unix(int64(result.Values[0].Timestamp), 0)
-		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
+		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0)
 		mins := e.Sub(s).Minutes()
 
 		// TODO niko/assets if mins >= threshold, interpolate for missing data?
@@ -625,7 +625,7 @@ func checkForKeyAndInitIfMissing(
 		}]; ok {
 			nodeType = t
 		} else {
-			log.Warningf("ClusterNodes: Type does not exist for node identifier %s", key)
+			log.Warnf("ClusterNodes: Type does not exist for node identifier %s", key)
 		}
 
 		nodeMap[key] = &Node{
@@ -705,6 +705,7 @@ func buildNodeMap(
 	preemptibleMap map[NodeIdentifier]bool,
 	labelsMap map[nodeIdentifierNoProviderID]map[string]string,
 	clusterAndNameToType map[nodeIdentifierNoProviderID]string,
+	res time.Duration,
 ) map[NodeIdentifier]*Node {
 
 	nodeMap := make(map[NodeIdentifier]*Node)
@@ -740,7 +741,7 @@ func buildNodeMap(
 		checkForKeyAndInitIfMissing(nodeMap, id, clusterAndNameToType)
 		nodeMap[id].Start = activeData.start
 		nodeMap[id].End = activeData.end
-		nodeMap[id].Minutes = activeData.minutes
+		nodeMap[id].Minutes = nodeMap[id].End.Sub(nodeMap[id].Start).Minutes()
 	}
 
 	// We now merge in data that doesn't have a provider id by looping over

+ 9 - 4
pkg/costmodel/cluster_helpers_test.go

@@ -1,12 +1,12 @@
 package costmodel
 
 import (
-	"github.com/kubecost/cost-model/pkg/config"
 	"reflect"
 	"testing"
 	"time"
 
 	"github.com/kubecost/cost-model/pkg/cloud"
+	"github.com/kubecost/cost-model/pkg/config"
 	"github.com/kubecost/cost-model/pkg/prom"
 	"github.com/kubecost/cost-model/pkg/util"
 
@@ -687,6 +687,7 @@ func TestBuildNodeMap(t *testing.T) {
 				testCase.preemptibleMap,
 				testCase.labelsMap,
 				testCase.clusterAndNameToType,
+				time.Minute,
 			)
 
 			if !reflect.DeepEqual(result, testCase.expected) {
@@ -925,7 +926,11 @@ func TestAssetCustompricing(t *testing.T) {
 			Values: []*util.Vector{
 				&util.Vector{
 					Timestamp: 0,
-					Value:     60.0,
+					Value:     1.0,
+				},
+				&util.Vector{
+					Timestamp: 3600.0,
+					Value:     1.0,
 				},
 			},
 		},
@@ -994,10 +999,10 @@ func TestAssetCustompricing(t *testing.T) {
 			ramResult := ramMap[nodeKey]
 			gpuResult := gpuMap[nodeKey]
 
-			diskMap := map[string]*Disk{}
+			diskMap := map[DiskIdentifier]*Disk{}
 			pvCosts(diskMap, time.Hour, pvMinsPromResult, pvSizePromResult, pvCostPromResult, testProvider)
 
-			diskResult := diskMap["cluster1/pvc1"].Cost
+			diskResult := diskMap[DiskIdentifier{"cluster1", "pvc1"}].Cost
 
 			if !util.IsApproximately(cpuResult, testCase.expectedPricing["CPU"]) {
 				t.Errorf("CPU custom pricing error in %s. Got %v but expected %v", testCase.name, cpuResult, testCase.expectedPricing["CPU"])

+ 5 - 6
pkg/costmodel/clusterinfo.go

@@ -12,7 +12,6 @@ import (
 	"github.com/kubecost/cost-model/pkg/util/json"
 
 	"k8s.io/client-go/kubernetes"
-	"k8s.io/klog"
 )
 
 var (
@@ -64,12 +63,12 @@ func (dlcip *localClusterInfoProvider) GetClusterInfo() map[string]string {
 	if ok && data != nil {
 		v, err := kc.ServerVersion()
 		if err != nil {
-			klog.Infof("Could not get k8s version info: %s", err.Error())
+			log.Infof("Could not get k8s version info: %s", err.Error())
 		} else if v != nil {
 			data["version"] = v.Major + "." + v.Minor
 		}
 	} else {
-		klog.Infof("Could not get k8s version info: %s", err.Error())
+		log.Infof("Could not get k8s version info: %s", err.Error())
 	}
 
 	writeClusterProfile(data)
@@ -104,7 +103,7 @@ func (ccip *configuredClusterInfoProvider) GetClusterInfo() map[string]string {
 
 	err = json.Unmarshal(data, &clusterInfo)
 	if err != nil {
-		log.Warningf("ClusterInfo failed to load from configuration: %s", err)
+		log.Warnf("ClusterInfo failed to load from configuration: %s", err)
 		return clusterInfo
 	}
 
@@ -131,13 +130,13 @@ func (ciw *clusterInfoWriteOnRequest) GetClusterInfo() map[string]string {
 
 	result, err := json.Marshal(cInfo)
 	if err != nil {
-		log.Warningf("Failed to write the cluster info: %s", err)
+		log.Warnf("Failed to write the cluster info: %s", err)
 		return cInfo
 	}
 
 	err = ciw.config.Write(result)
 	if err != nil {
-		log.Warningf("Failed to write the cluster info to config: %s", err)
+		log.Warnf("Failed to write the cluster info to config: %s", err)
 	}
 
 	return cInfo

+ 2 - 2
pkg/costmodel/clusters/clustermap.go

@@ -156,13 +156,13 @@ func (pcm *PrometheusClusterMap) loadClusters() (map[string]*ClusterInfo, error)
 	for _, result := range qr {
 		id, err := result.GetString("id")
 		if err != nil {
-			log.Warningf("Failed to load 'id' field for ClusterInfo")
+			log.Warnf("Failed to load 'id' field for ClusterInfo")
 			continue
 		}
 
 		name, err := result.GetString("name")
 		if err != nil {
-			log.Warningf("Failed to load 'name' field for ClusterInfo")
+			log.Warnf("Failed to load 'name' field for ClusterInfo")
 			continue
 		}
 

+ 104 - 103
pkg/costmodel/costmodel.go

@@ -21,7 +21,6 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/klog"
 
 	"golang.org/x/sync/singleflight"
 )
@@ -47,13 +46,14 @@ const (
 var isCron = regexp.MustCompile(`^(.+)-\d{10}$`)
 
 type CostModel struct {
-	Cache            clustercache.ClusterCache
-	ClusterMap       clusters.ClusterMap
-	RequestGroup     *singleflight.Group
-	ScrapeInterval   time.Duration
-	PrometheusClient prometheus.Client
-	Provider         costAnalyzerCloud.Provider
-	pricingMetadata  *costAnalyzerCloud.PricingMatchMetadata
+	Cache                      clustercache.ClusterCache
+	ClusterMap                 clusters.ClusterMap
+	MaxPrometheusQueryDuration time.Duration
+	RequestGroup               *singleflight.Group
+	ScrapeInterval             time.Duration
+	PrometheusClient           prometheus.Client
+	Provider                   costAnalyzerCloud.Provider
+	pricingMetadata            *costAnalyzerCloud.PricingMatchMetadata
 }
 
 func NewCostModel(client prometheus.Client, provider costAnalyzerCloud.Provider, cache clustercache.ClusterCache, clusterMap clusters.ClusterMap, scrapeInterval time.Duration) *CostModel {
@@ -61,12 +61,13 @@ func NewCostModel(client prometheus.Client, provider costAnalyzerCloud.Provider,
 	requestGroup := new(singleflight.Group)
 
 	return &CostModel{
-		Cache:            cache,
-		ClusterMap:       clusterMap,
-		PrometheusClient: client,
-		Provider:         provider,
-		RequestGroup:     requestGroup,
-		ScrapeInterval:   scrapeInterval,
+		Cache:                      cache,
+		ClusterMap:                 clusterMap,
+		MaxPrometheusQueryDuration: env.GetETLMaxPrometheusQueryDuration(),
+		PrometheusClient:           client,
+		Provider:                   provider,
+		RequestGroup:               requestGroup,
+		ScrapeInterval:             scrapeInterval,
 	}
 }
 
@@ -297,7 +298,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 		// ErrorCollection is an collection of errors wrapped in a single error implementation
 		// We opt to not return an error for the sake of running as a pure exporter.
-		log.Warningf("ComputeCostData: continuing despite prometheus errors: %s", ctx.ErrorCollection().Error())
+		log.Warnf("ComputeCostData: continuing despite prometheus errors: %s", ctx.ErrorCollection().Error())
 	}
 
 	defer measureTime(time.Now(), profileThreshold, "ComputeCostData: Processing Query Data")
@@ -305,7 +306,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 	normalizationValue, err := getNormalization(resNormalization)
 	if err != nil {
 		// We opt to not return an error for the sake of running as a pure exporter.
-		log.Warningf("ComputeCostData: continuing despite error parsing normalization values from %s: %s", queryNormalization, err.Error())
+		log.Warnf("ComputeCostData: continuing despite error parsing normalization values from %s: %s", queryNormalization, err.Error())
 	}
 
 	// Determine if there are vgpus configured and if so get the total allocatable number
@@ -318,7 +319,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 	nodes, err := cm.GetNodeCost(cp)
 	if err != nil {
-		log.Warningf("GetNodeCost: no node cost model available: " + err.Error())
+		log.Warnf("GetNodeCost: no node cost model available: " + err.Error())
 		return nil, err
 	}
 
@@ -326,7 +327,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 	unmountedPVs := make(map[string][]*PersistentVolumeClaimData)
 	pvClaimMapping, err := GetPVInfoLocal(cm.Cache, clusterID)
 	if err != nil {
-		log.Warningf("GetPVInfo: unable to get PV data: %s", err.Error())
+		log.Warnf("GetPVInfo: unable to get PV data: %s", err.Error())
 	}
 	if pvClaimMapping != nil {
 		err = addPVData(cm.Cache, pvClaimMapping, cp)
@@ -341,7 +342,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 	networkUsageMap, err := GetNetworkUsageData(resNetZoneRequests, resNetRegionRequests, resNetInternetRequests, clusterID)
 	if err != nil {
-		klog.V(1).Infof("[Warning] Unable to get Network Cost Data: %s", err.Error())
+		log.Warnf("Unable to get Network Cost Data: %s", err.Error())
 		networkUsageMap = make(map[string]*NetworkUsageData)
 	}
 
@@ -450,7 +451,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 			if usage, ok := networkUsageMap[ns+","+podName+","+clusterID]; ok {
 				netCosts, err := GetNetworkCost(usage, cp)
 				if err != nil {
-					klog.V(4).Infof("Error pulling network costs: %s", err.Error())
+					log.Debugf("Error pulling network costs: %s", err.Error())
 				} else {
 					podNetCosts = netCosts
 				}
@@ -519,13 +520,13 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 				RAMUsedV, ok := RAMUsedMap[newKey]
 				if !ok {
-					klog.V(4).Info("no RAM usage for " + newKey)
+					log.Debug("no RAM usage for " + newKey)
 					RAMUsedV = []*util.Vector{{}}
 				}
 
 				CPUUsedV, ok := CPUUsedMap[newKey]
 				if !ok {
-					klog.V(4).Info("no CPU usage for " + newKey)
+					log.Debug("no CPU usage for " + newKey)
 					CPUUsedV = []*util.Vector{{}}
 				}
 
@@ -570,7 +571,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 			}
 		} else {
 			// The container has been deleted. Not all information is sent to prometheus via ksm, so fill out what we can without k8s api
-			klog.V(4).Info("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
+			log.Debug("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
 			c, err := NewContainerMetricFromKey(key)
 			if err != nil {
 				return nil, err
@@ -588,19 +589,19 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 			RAMUsedV, ok := RAMUsedMap[key]
 			if !ok {
-				klog.V(4).Info("no RAM usage for " + key)
+				log.Debug("no RAM usage for " + key)
 				RAMUsedV = []*util.Vector{{}}
 			}
 
 			CPUUsedV, ok := CPUUsedMap[key]
 			if !ok {
-				klog.V(4).Info("no CPU usage for " + key)
+				log.Debug("no CPU usage for " + key)
 				CPUUsedV = []*util.Vector{{}}
 			}
 
 			node, ok := nodes[c.NodeName]
 			if !ok {
-				klog.V(4).Infof("Node \"%s\" has been deleted from Kubernetes. Query historical data to get it.", c.NodeName)
+				log.Debugf("Node \"%s\" has been deleted from Kubernetes. Query historical data to get it.", c.NodeName)
 				if n, ok := missingNodes[c.NodeName]; ok {
 					node = n
 				} else {
@@ -643,7 +644,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 	// to pass along the cost data
 	unmounted := findUnmountedPVCostData(cm.ClusterMap, unmountedPVs, namespaceLabelsMapping, namespaceAnnotationsMapping)
 	for k, costs := range unmounted {
-		klog.V(4).Infof("Unmounted PVs in Namespace/ClusterID: %s/%s", costs.Namespace, costs.ClusterID)
+		log.Debugf("Unmounted PVs in Namespace/ClusterID: %s/%s", costs.Namespace, costs.ClusterID)
 
 		if filterNamespace == "" {
 			containerNameCost[k] = costs
@@ -654,12 +655,12 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, cp costAnalyze
 
 	err = findDeletedNodeInfo(cli, missingNodes, window, "")
 	if err != nil {
-		klog.V(1).Infof("Error fetching historical node data: %s", err.Error())
+		log.Errorf("Error fetching historical node data: %s", err.Error())
 	}
 
 	err = findDeletedPodInfo(cli, missingContainers, window)
 	if err != nil {
-		klog.V(1).Infof("Error fetching historical pod data: %s", err.Error())
+		log.Errorf("Error fetching historical pod data: %s", err.Error())
 	}
 	return containerNameCost, err
 }
@@ -673,7 +674,7 @@ func findUnmountedPVCostData(clusterMap clusters.ClusterMap, unmountedPVs map[st
 	for k, pv := range unmountedPVs {
 		keyParts := strings.Split(k, ",")
 		if len(keyParts) != 3 {
-			klog.V(1).Infof("Unmounted PV used key with incorrect parts: %s", k)
+			log.Warnf("Unmounted PV used key with incorrect parts: %s", k)
 			continue
 		}
 
@@ -781,7 +782,7 @@ func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*c
 		}
 
 		if len(cpuCosts) == 0 {
-			klog.V(1).Infof("Kubecost prometheus metrics not currently available. Ingest this server's /metrics endpoint to get that data.")
+			log.Infof("Kubecost prometheus metrics not currently available. Ingest this server's /metrics endpoint to get that data.")
 		}
 
 		for node, costv := range cpuCosts {
@@ -812,12 +813,12 @@ func getContainerAllocation(req []*util.Vector, used []*util.Vector, allocationT
 		if x != nil && y != nil {
 			x1 := *x
 			if math.IsNaN(x1) {
-				klog.V(1).Infof("[Warning] NaN value found during %s allocation calculation for requests.", allocationType)
+				log.Warnf("NaN value found during %s allocation calculation for requests.", allocationType)
 				x1 = 0.0
 			}
 			y1 := *y
 			if math.IsNaN(y1) {
-				klog.V(1).Infof("[Warning] NaN value found during %s allocation calculation for used.", allocationType)
+				log.Warnf("NaN value found during %s allocation calculation for used.", allocationType)
 				y1 = 0.0
 			}
 
@@ -862,7 +863,7 @@ func addPVData(cache clustercache.ClusterCache, pvClaimMapping map[string]*Persi
 	for _, pv := range pvs {
 		parameters, ok := storageClassMap[pv.Spec.StorageClassName]
 		if !ok {
-			klog.V(4).Infof("Unable to find parameters for storage class \"%s\". Does pv \"%s\" have a storageClassName?", pv.Spec.StorageClassName, pv.Name)
+			log.Debugf("Unable to find parameters for storage class \"%s\". Does pv \"%s\" have a storageClassName?", pv.Spec.StorageClassName, pv.Name)
 		}
 		var region string
 		if r, ok := util.GetRegion(pv.Labels); ok {
@@ -887,7 +888,7 @@ func addPVData(cache clustercache.ClusterCache, pvClaimMapping map[string]*Persi
 		if vol, ok := pvMap[pvc.VolumeName]; ok {
 			pvc.Volume = vol
 		} else {
-			klog.V(4).Infof("PV not found, using default")
+			log.Debugf("PV not found, using default")
 			pvc.Volume = &costAnalyzerCloud.PV{
 				Cost: cfg.Storage,
 			}
@@ -953,7 +954,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 
 		cnode, err := cp.NodePricing(cp.GetKey(nodeLabels, n))
 		if err != nil {
-			klog.Infof("Error getting node pricing. Error: %s", err.Error())
+			log.Infof("Error getting node pricing. Error: %s", err.Error())
 			if cnode != nil {
 				nodes[name] = cnode
 				continue
@@ -989,11 +990,11 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 		} else {
 			cpu, err = strconv.ParseFloat(newCnode.VCPU, 64)
 			if err != nil {
-				klog.V(1).Infof("[Warning] parsing VCPU value: \"%s\" as float64", newCnode.VCPU)
+				log.Warnf("parsing VCPU value: \"%s\" as float64", newCnode.VCPU)
 			}
 		}
 		if math.IsNaN(cpu) {
-			klog.V(1).Infof("[Warning] cpu parsed as NaN. Setting to 0.")
+			log.Warnf("cpu parsed as NaN. Setting to 0.")
 			cpu = 0
 		}
 
@@ -1003,7 +1004,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 		}
 		ram = float64(n.Status.Capacity.Memory().Value())
 		if math.IsNaN(ram) {
-			klog.V(1).Infof("[Warning] ram parsed as NaN. Setting to 0.")
+			log.Warnf("ram parsed as NaN. Setting to 0.")
 			ram = 0
 		}
 
@@ -1033,65 +1034,65 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			}
 		}
 		if math.IsNaN(gpuc) {
-			klog.V(1).Infof("[Warning] gpu count parsed as NaN. Setting to 0.")
+			log.Warnf("gpu count parsed as NaN. Setting to 0.")
 			gpuc = 0.0
 		}
 
 		if newCnode.GPU != "" && newCnode.GPUCost == "" {
 			// We couldn't find a gpu cost, so fix cpu and ram, then accordingly
-			klog.V(4).Infof("GPU without cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
+			log.Debugf("GPU without cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
 
 			defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
 			if err != nil {
-				klog.V(3).Infof("Could not parse default cpu price")
+				log.Errorf("Could not parse default cpu price")
 				defaultCPU = 0
 			}
 			if math.IsNaN(defaultCPU) {
-				klog.V(1).Infof("[Warning] defaultCPU parsed as NaN. Setting to 0.")
+				log.Warnf("defaultCPU parsed as NaN. Setting to 0.")
 				defaultCPU = 0
 			}
 
 			defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
 			if err != nil {
-				klog.V(3).Infof("Could not parse default ram price")
+				log.Errorf("Could not parse default ram price")
 				defaultRAM = 0
 			}
 			if math.IsNaN(defaultRAM) {
-				klog.V(1).Infof("[Warning] defaultRAM parsed as NaN. Setting to 0.")
+				log.Warnf("defaultRAM parsed as NaN. Setting to 0.")
 				defaultRAM = 0
 			}
 
 			defaultGPU, err := strconv.ParseFloat(cfg.GPU, 64)
 			if err != nil {
-				klog.V(3).Infof("Could not parse default gpu price")
+				log.Errorf("Could not parse default gpu price")
 				defaultGPU = 0
 			}
 			if math.IsNaN(defaultGPU) {
-				klog.V(1).Infof("[Warning] defaultGPU parsed as NaN. Setting to 0.")
+				log.Warnf("defaultGPU parsed as NaN. Setting to 0.")
 				defaultGPU = 0
 			}
 
 			cpuToRAMRatio := defaultCPU / defaultRAM
 			if math.IsNaN(cpuToRAMRatio) {
-				klog.V(1).Infof("[Warning] cpuToRAMRatio[defaultCPU: %f / defaultRAM: %f] is NaN. Setting to 0.", defaultCPU, defaultRAM)
+				log.Warnf("cpuToRAMRatio[defaultCPU: %f / defaultRAM: %f] is NaN. Setting to 0.", defaultCPU, defaultRAM)
 				cpuToRAMRatio = 0
 			}
 
 			gpuToRAMRatio := defaultGPU / defaultRAM
 			if math.IsNaN(gpuToRAMRatio) {
-				klog.V(1).Infof("[Warning] gpuToRAMRatio is NaN. Setting to 0.")
+				log.Warnf("gpuToRAMRatio is NaN. Setting to 0.")
 				gpuToRAMRatio = 0
 			}
 
 			ramGB := ram / 1024 / 1024 / 1024
 			if math.IsNaN(ramGB) {
-				klog.V(1).Infof("[Warning] ramGB is NaN. Setting to 0.")
+				log.Warnf("ramGB is NaN. Setting to 0.")
 				ramGB = 0
 			}
 
 			ramMultiple := gpuc*gpuToRAMRatio + cpu*cpuToRAMRatio + ramGB
 			if math.IsNaN(ramMultiple) {
-				klog.V(1).Infof("[Warning] ramMultiple is NaN. Setting to 0.")
+				log.Warnf("ramMultiple is NaN. Setting to 0.")
 				ramMultiple = 0
 			}
 
@@ -1099,24 +1100,24 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			if newCnode.Cost != "" {
 				nodePrice, err = strconv.ParseFloat(newCnode.Cost, 64)
 				if err != nil {
-					klog.V(3).Infof("Could not parse total node price")
+					log.Errorf("Could not parse total node price")
 					return nil, err
 				}
 			} else {
 				nodePrice, err = strconv.ParseFloat(newCnode.VCPUCost, 64) // all the price was allocated to the CPU
 				if err != nil {
-					klog.V(3).Infof("Could not parse node vcpu price")
+					log.Errorf("Could not parse node vcpu price")
 					return nil, err
 				}
 			}
 			if math.IsNaN(nodePrice) {
-				klog.V(1).Infof("[Warning] nodePrice parsed as NaN. Setting to 0.")
+				log.Warnf("nodePrice parsed as NaN. Setting to 0.")
 				nodePrice = 0
 			}
 
 			ramPrice := (nodePrice / ramMultiple)
 			if math.IsNaN(ramPrice) {
-				klog.V(1).Infof("[Warning] ramPrice[nodePrice: %f / ramMultiple: %f] parsed as NaN. Setting to 0.", nodePrice, ramMultiple)
+				log.Warnf("ramPrice[nodePrice: %f / ramMultiple: %f] parsed as NaN. Setting to 0.", nodePrice, ramMultiple)
 				ramPrice = 0
 			}
 
@@ -1129,43 +1130,43 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			newCnode.GPUCost = fmt.Sprintf("%f", gpuPrice)
 		} else if newCnode.RAMCost == "" {
 			// We couldn't find a ramcost, so fix cpu and allocate ram accordingly
-			klog.V(4).Infof("No RAM cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
+			log.Debugf("No RAM cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
 
 			defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
 			if err != nil {
-				klog.V(3).Infof("Could not parse default cpu price")
+				log.Warnf("Could not parse default cpu price")
 				defaultCPU = 0
 			}
 			if math.IsNaN(defaultCPU) {
-				klog.V(1).Infof("[Warning] defaultCPU parsed as NaN. Setting to 0.")
+				log.Warnf("defaultCPU parsed as NaN. Setting to 0.")
 				defaultCPU = 0
 			}
 
 			defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
 			if err != nil {
-				klog.V(3).Infof("Could not parse default ram price")
+				log.Warnf("Could not parse default ram price")
 				defaultRAM = 0
 			}
 			if math.IsNaN(defaultRAM) {
-				klog.V(1).Infof("[Warning] defaultRAM parsed as NaN. Setting to 0.")
+				log.Warnf("defaultRAM parsed as NaN. Setting to 0.")
 				defaultRAM = 0
 			}
 
 			cpuToRAMRatio := defaultCPU / defaultRAM
 			if math.IsNaN(cpuToRAMRatio) {
-				klog.V(1).Infof("[Warning] cpuToRAMRatio[defaultCPU: %f / defaultRAM: %f] is NaN. Setting to 0.", defaultCPU, defaultRAM)
+				log.Warnf("cpuToRAMRatio[defaultCPU: %f / defaultRAM: %f] is NaN. Setting to 0.", defaultCPU, defaultRAM)
 				cpuToRAMRatio = 0
 			}
 
 			ramGB := ram / 1024 / 1024 / 1024
 			if math.IsNaN(ramGB) {
-				klog.V(1).Infof("[Warning] ramGB is NaN. Setting to 0.")
+				log.Warnf("ramGB is NaN. Setting to 0.")
 				ramGB = 0
 			}
 
 			ramMultiple := cpu*cpuToRAMRatio + ramGB
 			if math.IsNaN(ramMultiple) {
-				klog.V(1).Infof("[Warning] ramMultiple is NaN. Setting to 0.")
+				log.Warnf("ramMultiple is NaN. Setting to 0.")
 				ramMultiple = 0
 			}
 
@@ -1173,24 +1174,24 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			if newCnode.Cost != "" {
 				nodePrice, err = strconv.ParseFloat(newCnode.Cost, 64)
 				if err != nil {
-					klog.V(3).Infof("Could not parse total node price")
+					log.Warnf("Could not parse total node price")
 					return nil, err
 				}
 			} else {
 				nodePrice, err = strconv.ParseFloat(newCnode.VCPUCost, 64) // all the price was allocated to the CPU
 				if err != nil {
-					klog.V(3).Infof("Could not parse node vcpu price")
+					log.Warnf("Could not parse node vcpu price")
 					return nil, err
 				}
 			}
 			if math.IsNaN(nodePrice) {
-				klog.V(1).Infof("[Warning] nodePrice parsed as NaN. Setting to 0.")
+				log.Warnf("nodePrice parsed as NaN. Setting to 0.")
 				nodePrice = 0
 			}
 
 			ramPrice := (nodePrice / ramMultiple)
 			if math.IsNaN(ramPrice) {
-				klog.V(1).Infof("[Warning] ramPrice[nodePrice: %f / ramMultiple: %f] parsed as NaN. Setting to 0.", nodePrice, ramMultiple)
+				log.Warnf("ramPrice[nodePrice: %f / ramMultiple: %f] parsed as NaN. Setting to 0.", nodePrice, ramMultiple)
 				ramPrice = 0
 			}
 
@@ -1208,7 +1209,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			}
 			newCnode.RAMBytes = fmt.Sprintf("%f", ram)
 
-			klog.V(4).Infof("Computed \"%s\" RAM Cost := %v", name, newCnode.RAMCost)
+			log.Debugf("Computed \"%s\" RAM Cost := %v", name, newCnode.RAMCost)
 		}
 
 		nodes[name] = &newCnode
@@ -1298,7 +1299,7 @@ func getPodStatefulsets(cache clustercache.ClusterCache, podList []*v1.Pod, clus
 		}
 		s, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector)
 		if err != nil {
-			klog.V(2).Infof("Error doing deployment label conversion: " + err.Error())
+			log.Errorf("Error doing deployment label conversion: " + err.Error())
 		}
 		for _, pod := range podList {
 			labelSet := labels.Set(pod.GetObjectMeta().GetLabels())
@@ -1329,7 +1330,7 @@ func getPodDeployments(cache clustercache.ClusterCache, podList []*v1.Pod, clust
 		}
 		s, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector)
 		if err != nil {
-			klog.V(2).Infof("Error doing deployment label conversion: " + err.Error())
+			log.Errorf("Error doing deployment label conversion: " + err.Error())
 		}
 		for _, pod := range podList {
 			labelSet := labels.Set(pod.GetObjectMeta().GetLabels())
@@ -1528,7 +1529,7 @@ func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, cp costAn
 	// for the specific inputs to prevent multiple queries for identical data.
 	key := requestKeyFor(window, resolution, filterNamespace, filterCluster, remoteEnabled)
 
-	klog.V(4).Infof("ComputeCostDataRange with Key: %s", key)
+	log.Debugf("ComputeCostDataRange with Key: %s", key)
 
 	// If there is already a request out that uses the same data, wait for it to return to share the results.
 	// Otherwise, start executing.
@@ -1564,7 +1565,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	// Warn if resolution does not evenly divide window
 	if int64(window.Minutes())%int64(resolution.Minutes()) != 0 {
-		log.Warningf("CostDataRange: window should be divisible by resolution or else samples may be missed: %s %% %s = %dm", window, resolution, int64(window.Minutes())%int64(resolution.Minutes()))
+		log.Warnf("CostDataRange: window should be divisible by resolution or else samples may be missed: %s %% %s = %dm", window, resolution, int64(window.Minutes())%int64(resolution.Minutes()))
 	}
 
 	// Convert to Prometheus-style duration string in terms of m or h
@@ -1577,7 +1578,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 		remoteLayout := "2006-01-02T15:04:05Z"
 		remoteStartStr := window.Start().Format(remoteLayout)
 		remoteEndStr := window.End().Format(remoteLayout)
-		klog.V(1).Infof("Using remote database for query from %s to %s with window %s", remoteStartStr, remoteEndStr, resolution)
+		log.Infof("Using remote database for query from %s to %s with window %s", remoteStartStr, remoteEndStr, resolution)
 		return CostDataRangeFromSQL("", "", resolution.String(), remoteStartStr, remoteEndStr)
 	}
 
@@ -1705,7 +1706,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 	pvClaimMapping, err := GetPVInfo(resPVRequests, clusterID)
 	if err != nil {
 		// Just log for compatibility with KSM less than 1.6
-		klog.Infof("Unable to get PV Data: %s", err.Error())
+		log.Infof("Unable to get PV Data: %s", err.Error())
 	}
 	if pvClaimMapping != nil {
 		err = addPVData(cm.Cache, pvClaimMapping, cp)
@@ -1716,13 +1717,13 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	pvCostMapping, err := GetPVCostMetrics(resPVHourlyCost, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get PV Hourly Cost Data: %s", err.Error())
+		log.Errorf("Unable to get PV Hourly Cost Data: %s", err.Error())
 	}
 
 	unmountedPVs := make(map[string][]*PersistentVolumeClaimData)
 	pvAllocationMapping, err := GetPVAllocationMetrics(resPVCAlloc, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get PV Allocation Cost Data: %s", err.Error())
+		log.Errorf("Unable to get PV Allocation Cost Data: %s", err.Error())
 	}
 	if pvAllocationMapping != nil {
 		addMetricPVData(pvAllocationMapping, pvCostMapping, cp)
@@ -1733,7 +1734,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	nsLabels, err := GetNamespaceLabelsMetrics(resNSLabels, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Namespace Labels for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Namespace Labels for Metrics: %s", err.Error())
 	}
 	if nsLabels != nil {
 		mergeStringMap(namespaceLabelsMapping, nsLabels)
@@ -1741,12 +1742,12 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	podLabels, err := GetPodLabelsMetrics(resPodLabels, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Pod Labels for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Pod Labels for Metrics: %s", err.Error())
 	}
 
 	nsAnnotations, err := GetNamespaceAnnotationsMetrics(resNSAnnotations, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Namespace Annotations for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Namespace Annotations for Metrics: %s", err.Error())
 	}
 	if nsAnnotations != nil {
 		mergeStringMap(namespaceAnnotationsMapping, nsAnnotations)
@@ -1754,55 +1755,55 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	podAnnotations, err := GetPodAnnotationsMetrics(resPodAnnotations, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Pod Annotations for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Pod Annotations for Metrics: %s", err.Error())
 	}
 
 	serviceLabels, err := GetServiceSelectorLabelsMetrics(resServiceLabels, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Service Selector Labels for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Service Selector Labels for Metrics: %s", err.Error())
 	}
 
 	deploymentLabels, err := GetDeploymentMatchLabelsMetrics(resDeploymentLabels, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Deployment Match Labels for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Deployment Match Labels for Metrics: %s", err.Error())
 	}
 
 	statefulsetLabels, err := GetStatefulsetMatchLabelsMetrics(resStatefulsetLabels, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Deployment Match Labels for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Deployment Match Labels for Metrics: %s", err.Error())
 	}
 
 	podStatefulsetMetricsMapping, err := getPodDeploymentsWithMetrics(statefulsetLabels, podLabels)
 	if err != nil {
-		klog.V(1).Infof("Unable to get match Statefulset Labels Metrics to Pods: %s", err.Error())
+		log.Errorf("Unable to get match Statefulset Labels Metrics to Pods: %s", err.Error())
 	}
 	appendLabelsList(podStatefulsetsMapping, podStatefulsetMetricsMapping)
 
 	podDeploymentsMetricsMapping, err := getPodDeploymentsWithMetrics(deploymentLabels, podLabels)
 	if err != nil {
-		klog.V(1).Infof("Unable to get match Deployment Labels Metrics to Pods: %s", err.Error())
+		log.Errorf("Unable to get match Deployment Labels Metrics to Pods: %s", err.Error())
 	}
 	appendLabelsList(podDeploymentsMapping, podDeploymentsMetricsMapping)
 
 	podDaemonsets, err := GetPodDaemonsetsWithMetrics(resDaemonsets, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Pod Daemonsets for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Pod Daemonsets for Metrics: %s", err.Error())
 	}
 
 	podJobs, err := GetPodJobsWithMetrics(resJobs, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Pod Jobs for Metrics: %s", err.Error())
+		log.Errorf("Unable to get Pod Jobs for Metrics: %s", err.Error())
 	}
 
 	podServicesMetricsMapping, err := getPodServicesWithMetrics(serviceLabels, podLabels)
 	if err != nil {
-		klog.V(1).Infof("Unable to get match Service Labels Metrics to Pods: %s", err.Error())
+		log.Errorf("Unable to get match Service Labels Metrics to Pods: %s", err.Error())
 	}
 	appendLabelsList(podServicesMapping, podServicesMetricsMapping)
 
 	networkUsageMap, err := GetNetworkUsageData(resNetZoneRequests, resNetRegionRequests, resNetInternetRequests, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Network Cost Data: %s", err.Error())
+		log.Errorf("Unable to get Network Cost Data: %s", err.Error())
 		networkUsageMap = make(map[string]*NetworkUsageData)
 	}
 
@@ -1883,37 +1884,37 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 		c, _ := NewContainerMetricFromKey(key)
 		RAMReqV, ok := RAMReqMap[key]
 		if !ok {
-			klog.V(4).Info("no RAM requests for " + key)
+			log.Debug("no RAM requests for " + key)
 			RAMReqV = []*util.Vector{}
 		}
 		RAMUsedV, ok := RAMUsedMap[key]
 		if !ok {
-			klog.V(4).Info("no RAM usage for " + key)
+			log.Debug("no RAM usage for " + key)
 			RAMUsedV = []*util.Vector{}
 		}
 		CPUReqV, ok := CPUReqMap[key]
 		if !ok {
-			klog.V(4).Info("no CPU requests for " + key)
+			log.Debug("no CPU requests for " + key)
 			CPUReqV = []*util.Vector{}
 		}
 		CPUUsedV, ok := CPUUsedMap[key]
 		if !ok {
-			klog.V(4).Info("no CPU usage for " + key)
+			log.Debug("no CPU usage for " + key)
 			CPUUsedV = []*util.Vector{}
 		}
 		RAMAllocsV, ok := RAMAllocMap[key]
 		if !ok {
-			klog.V(4).Info("no RAM allocation for " + key)
+			log.Debug("no RAM allocation for " + key)
 			RAMAllocsV = []*util.Vector{}
 		}
 		CPUAllocsV, ok := CPUAllocMap[key]
 		if !ok {
-			klog.V(4).Info("no CPU allocation for " + key)
+			log.Debug("no CPU allocation for " + key)
 			CPUAllocsV = []*util.Vector{}
 		}
 		GPUReqV, ok := GPUReqMap[key]
 		if !ok {
-			klog.V(4).Info("no GPU requests for " + key)
+			log.Debug("no GPU requests for " + key)
 			GPUReqV = []*util.Vector{}
 		}
 
@@ -1990,7 +1991,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 		// the pod_pvc_allocation metric
 		podPVData, ok := pvAllocationMapping[podKey]
 		if !ok {
-			klog.V(4).Infof("Failed to locate pv allocation mapping for missing pod.")
+			log.Debugf("Failed to locate pv allocation mapping for missing pod.")
 		}
 
 		// Delete the current pod key from potentially unmounted pvs
@@ -2002,7 +2003,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 		if usage, ok := networkUsageMap[podKey]; ok {
 			netCosts, err := GetNetworkCost(usage, cp)
 			if err != nil {
-				klog.V(3).Infof("Error pulling network costs: %s", err.Error())
+				log.Errorf("Error pulling network costs: %s", err.Error())
 			} else {
 				podNetworkCosts = netCosts
 			}
@@ -2062,7 +2063,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 
 	unmounted := findUnmountedPVCostData(cm.ClusterMap, unmountedPVs, namespaceLabelsMapping, namespaceAnnotationsMapping)
 	for k, costs := range unmounted {
-		klog.V(4).Infof("Unmounted PVs in Namespace/ClusterID: %s/%s", costs.Namespace, costs.ClusterID)
+		log.Debugf("Unmounted PVs in Namespace/ClusterID: %s/%s", costs.Namespace, costs.ClusterID)
 
 		if costDataPassesFilters(cm.ClusterMap, costs, filterNamespace, filterCluster) {
 			containerNameCost[k] = costs
@@ -2073,7 +2074,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, cp costAnalyzerC
 		dur, off := window.DurationOffsetStrings()
 		err = findDeletedNodeInfo(cli, missingNodes, dur, off)
 		if err != nil {
-			klog.V(1).Infof("Error fetching historical node data: %s", err.Error())
+			log.Errorf("Error fetching historical node data: %s", err.Error())
 		}
 	}
 
@@ -2116,7 +2117,7 @@ func applyAllocationToRequests(allocationMap map[string][]*util.Vector, requestM
 func addMetricPVData(pvAllocationMap map[string][]*PersistentVolumeClaimData, pvCostMap map[string]*costAnalyzerCloud.PV, cp costAnalyzerCloud.Provider) {
 	cfg, err := cp.GetConfig()
 	if err != nil {
-		klog.V(1).Infof("Failed to get provider config while adding pv metrics data.")
+		log.Errorf("Failed to get provider config while adding pv metrics data.")
 		return
 	}
 
@@ -2216,7 +2217,7 @@ func getAllocatableVGPUs(cache clustercache.ClusterCache) (float64, error) {
 					if strings.Contains(arg, "--vgpu=") {
 						vgpus, err := strconv.ParseFloat(arg[strings.IndexByte(arg, '=')+1:], 64)
 						if err != nil {
-							klog.V(1).Infof("failed to parse vgpu allocation string %s: %v", arg, err)
+							log.Errorf("failed to parse vgpu allocation string %s: %v", arg, err)
 							continue
 						}
 						vgpuCount = vgpus
@@ -2244,7 +2245,7 @@ type PersistentVolumeClaimData struct {
 func measureTime(start time.Time, threshold time.Duration, name string) {
 	elapsed := time.Since(start)
 	if elapsed > threshold {
-		klog.V(3).Infof("[Profiler] %s: %s", elapsed, name)
+		log.Infof("[Profiler] %s: %s", elapsed, name)
 	}
 }
 

+ 22 - 24
pkg/costmodel/metrics.go

@@ -22,8 +22,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
 	v1 "k8s.io/api/core/v1"
-
-	"k8s.io/klog"
 )
 
 //--------------------------------------------------------------------------
@@ -426,7 +424,7 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 		}
 
 		for {
-			klog.V(4).Info("Recording prices...")
+			log.Debugf("Recording prices...")
 			podlist := cmme.KubeClusterCache.GetAllPods()
 			podStatus := make(map[string]v1.PodPhase)
 			for _, pod := range podlist {
@@ -437,14 +435,14 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 
 			provisioner, clusterManagementCost, err := cmme.CloudProvider.ClusterManagementPricing()
 			if err != nil {
-				klog.V(1).Infof("Error getting cluster management cost %s", err.Error())
+				log.Errorf("Error getting cluster management cost %s", err.Error())
 			}
 			cmme.ClusterManagementCostRecorder.WithLabelValues(provisioner).Set(clusterManagementCost)
 
 			// Record network pricing at global scope
 			networkCosts, err := cmme.CloudProvider.NetworkPricing()
 			if err != nil {
-				klog.V(4).Infof("Failed to retrieve network costs: %s", err.Error())
+				log.Debugf("Failed to retrieve network costs: %s", err.Error())
 			} else {
 				cmme.NetworkZoneEgressRecorder.Set(networkCosts.ZoneNetworkEgressCost)
 				cmme.NetworkRegionEgressRecorder.Set(networkCosts.RegionNetworkEgressCost)
@@ -471,7 +469,7 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 			// TODO: Pass CloudProvider into CostModel on instantiation so this isn't so awkward
 			nodes, err := cmme.Model.GetNodeCost(cmme.CloudProvider)
 			if err != nil {
-				log.Warningf("Metric emission: error getting Node cost: %s", err)
+				log.Warnf("Metric emission: error getting Node cost: %s", err)
 			}
 			for nodeName, node := range nodes {
 				// Emit costs, guarding against NaN inputs for custom pricing.
@@ -540,14 +538,14 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 					avgCosts.CpuCostAverage = (avgCosts.CpuCostAverage*avgCosts.NumCpuDataPoints + cpuCost) / (avgCosts.NumCpuDataPoints + 1)
 					avgCosts.NumCpuDataPoints += 1
 				} else {
-					log.Warningf("CPU cost outlier detected; skipping data point.")
+					log.Warnf("CPU cost outlier detected; skipping data point.")
 				}
 				if ramCost < outlierFactor*avgCosts.RamCostAverage {
 					cmme.RAMPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(ramCost)
 					avgCosts.RamCostAverage = (avgCosts.RamCostAverage*avgCosts.NumRamDataPoints + ramCost) / (avgCosts.NumRamDataPoints + 1)
 					avgCosts.NumRamDataPoints += 1
 				} else {
-					log.Warningf("RAM cost outlier detected; skipping data point.")
+					log.Warnf("RAM cost outlier detected; skipping data point.")
 				}
 				// skip redording totalCost if any constituent costs were outliers
 				if cpuCost < outlierFactor*avgCosts.CpuCostAverage &&
@@ -568,7 +566,7 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 			// TODO: Pass CloudProvider into CostModel on instantiation so this isn't so awkward
 			loadBalancers, err := cmme.Model.GetLBCost(cmme.CloudProvider)
 			if err != nil {
-				log.Warningf("Metric emission: error getting LoadBalancer cost: %s", err)
+				log.Warnf("Metric emission: error getting LoadBalancer cost: %s", err)
 			}
 			for lbKey, lb := range loadBalancers {
 				// TODO: parse (if necessary) and calculate cost associated with loadBalancer based on dynamic cloud prices fetched into each lb struct on GetLBCost() call
@@ -644,7 +642,7 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 
 				parameters, ok := storageClassMap[pv.Spec.StorageClassName]
 				if !ok {
-					klog.V(4).Infof("Unable to find parameters for storage class \"%s\". Does pv \"%s\" have a storageClassName?", pv.Spec.StorageClassName, pv.Name)
+					log.Debugf("Unable to find parameters for storage class \"%s\". Does pv \"%s\" have a storageClassName?", pv.Spec.StorageClassName, pv.Name)
 				}
 				var region string
 				if r, ok := util.GetRegion(pv.Labels); ok {
@@ -668,43 +666,43 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 
 			for labelString, seen := range nodeSeen {
 				if !seen {
-					klog.V(4).Infof("Removing %s from nodes", labelString)
+					log.Debugf("Removing %s from nodes", labelString)
 					labels := getLabelStringsFromKey(labelString)
 					ok := cmme.NodeTotalPriceRecorder.DeleteLabelValues(labels...)
 					if ok {
-						klog.V(4).Infof("removed %s from totalprice", labelString)
+						log.Debugf("removed %s from totalprice", labelString)
 					} else {
-						klog.Infof("FAILURE TO REMOVE %s from totalprice", labelString)
+						log.Infof("FAILURE TO REMOVE %s from totalprice", labelString)
 					}
 					ok = cmme.NodeSpotRecorder.DeleteLabelValues(labels...)
 					if ok {
-						klog.V(4).Infof("removed %s from spot records", labelString)
+						log.Debugf("removed %s from spot records", labelString)
 					} else {
-						klog.Infof("FAILURE TO REMOVE %s from spot records", labelString)
+						log.Infof("FAILURE TO REMOVE %s from spot records", labelString)
 					}
 					ok = cmme.CPUPriceRecorder.DeleteLabelValues(labels...)
 					if ok {
-						klog.V(4).Infof("removed %s from cpuprice", labelString)
+						log.Debugf("removed %s from cpuprice", labelString)
 					} else {
-						klog.Infof("FAILURE TO REMOVE %s from cpuprice", labelString)
+						log.Infof("FAILURE TO REMOVE %s from cpuprice", labelString)
 					}
 					ok = cmme.GPUPriceRecorder.DeleteLabelValues(labels...)
 					if ok {
-						klog.V(4).Infof("removed %s from gpuprice", labelString)
+						log.Debugf("removed %s from gpuprice", labelString)
 					} else {
-						klog.Infof("FAILURE TO REMOVE %s from gpuprice", labelString)
+						log.Infof("FAILURE TO REMOVE %s from gpuprice", labelString)
 					}
 					ok = cmme.GPUCountRecorder.DeleteLabelValues(labels...)
 					if ok {
-						klog.V(4).Infof("removed %s from gpucount", labelString)
+						log.Debugf("removed %s from gpucount", labelString)
 					} else {
-						klog.Infof("FAILURE TO REMOVE %s from gpucount", labelString)
+						log.Infof("FAILURE TO REMOVE %s from gpucount", labelString)
 					}
 					ok = cmme.RAMPriceRecorder.DeleteLabelValues(labels...)
 					if ok {
-						klog.V(4).Infof("removed %s from ramprice", labelString)
+						log.Debugf("removed %s from ramprice", labelString)
 					} else {
-						klog.Infof("FAILURE TO REMOVE %s from ramprice", labelString)
+						log.Infof("FAILURE TO REMOVE %s from ramprice", labelString)
 					}
 					delete(nodeSeen, labelString)
 					delete(nodeCostAverages, labelString)
@@ -717,7 +715,7 @@ func (cmme *CostModelMetricsEmitter) Start() bool {
 					labels := getLabelStringsFromKey(labelString)
 					ok := cmme.LBCostRecorder.DeleteLabelValues(labels...)
 					if !ok {
-						log.Warningf("Metric emission: failed to delete LoadBalancer with labels: %v", labels)
+						log.Warnf("Metric emission: failed to delete LoadBalancer with labels: %v", labels)
 					}
 					delete(loadBalancerSeen, labelString)
 				} else {

+ 1 - 1
pkg/costmodel/promparsers.go

@@ -118,7 +118,7 @@ func GetPVAllocationMetrics(qrs []*prom.QueryResult, defaultClusterID string) (m
 
 		pvName, err := val.GetString("persistentvolume")
 		if err != nil {
-			log.Warningf("persistentvolume field does not exist for pv %s", pvcName) // This is possible for an unfulfilled claim
+			log.Warnf("persistentvolume field does not exist for pv %s", pvcName) // This is possible for an unfulfilled claim
 			continue
 		}
 

+ 65 - 34
pkg/costmodel/router.go

@@ -21,7 +21,6 @@ import (
 	"github.com/microcosm-cc/bluemonday"
 
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/klog"
 
 	"github.com/julienschmidt/httprouter"
 
@@ -200,7 +199,7 @@ func filterFields(fields string, data map[string]*CostData) map[string]CostData
 	fmap := make(map[string]bool)
 	for _, f := range fs {
 		fieldNameLower := strings.ToLower(f) // convert to go struct name by uppercasing first letter
-		klog.V(1).Infof("to delete: %s", fieldNameLower)
+		log.Debugf("to delete: %s", fieldNameLower)
 		fmap[fieldNameLower] = true
 	}
 	filteredData := make(map[string]CostData)
@@ -264,7 +263,7 @@ func WrapData(data interface{}, err error) []byte {
 	var resp []byte
 
 	if err != nil {
-		klog.V(1).Infof("Error returned to client: %s", err.Error())
+		log.Errorf("Error returned to client: %s", err.Error())
 		resp, _ = json.Marshal(&Response{
 			Code:    http.StatusInternalServerError,
 			Status:  "error",
@@ -286,7 +285,7 @@ func WrapDataWithMessage(data interface{}, err error, message string) []byte {
 	var resp []byte
 
 	if err != nil {
-		klog.V(1).Infof("Error returned to client: %s", err.Error())
+		log.Errorf("Error returned to client: %s", err.Error())
 		resp, _ = json.Marshal(&Response{
 			Code:    http.StatusInternalServerError,
 			Status:  "error",
@@ -309,7 +308,7 @@ func WrapDataWithWarning(data interface{}, err error, warning string) []byte {
 	var resp []byte
 
 	if err != nil {
-		klog.V(1).Infof("Error returned to client: %s", err.Error())
+		log.Errorf("Error returned to client: %s", err.Error())
 		resp, _ = json.Marshal(&Response{
 			Code:    http.StatusInternalServerError,
 			Status:  "error",
@@ -333,7 +332,7 @@ func WrapDataWithMessageAndWarning(data interface{}, err error, message, warning
 	var resp []byte
 
 	if err != nil {
-		klog.V(1).Infof("Error returned to client: %s", err.Error())
+		log.Errorf("Error returned to client: %s", err.Error())
 		resp, _ = json.Marshal(&Response{
 			Code:    http.StatusInternalServerError,
 			Status:  "error",
@@ -369,7 +368,7 @@ func (a *Accesses) RefreshPricingData(w http.ResponseWriter, r *http.Request, ps
 
 	err := a.CloudProvider.DownloadPricingData()
 	if err != nil {
-		klog.V(1).Infof("Error refreshing pricing data: %s", err.Error())
+		log.Errorf("Error refreshing pricing data: %s", err.Error())
 	}
 
 	w.Write(WrapData(nil, err))
@@ -581,7 +580,7 @@ func (a *Accesses) UpdateSpotInfoConfigs(w http.ResponseWriter, r *http.Request,
 	w.Write(WrapData(data, err))
 	err = a.CloudProvider.DownloadPricingData()
 	if err != nil {
-		klog.V(1).Infof("Error redownloading data on config update: %s", err.Error())
+		log.Errorf("Error redownloading data on config update: %s", err.Error())
 	}
 	return
 }
@@ -692,8 +691,24 @@ func (a *Accesses) PrometheusQuery(w http.ResponseWriter, r *http.Request, _ htt
 		return
 	}
 
+	// Attempt to parse time as either a unix timestamp or as an RFC3339 value
+	var timeVal time.Time
+	timeStr := qp.Get("time", "")
+	if len(timeStr) > 0 {
+		if t, err := strconv.ParseInt(timeStr, 10, 64); err == nil {
+			timeVal = time.Unix(t, 0)
+		} else if t, err := time.Parse(time.RFC3339, timeStr); err == nil {
+			timeVal = t
+		}
+
+		// If time is given, but not parse-able, return an error
+		if timeVal.IsZero() {
+			http.Error(w, fmt.Sprintf("time must be a unix timestamp or RFC3339 value; illegal value given: %s", timeStr), http.StatusBadRequest)
+		}
+	}
+
 	ctx := prom.NewNamedContext(a.PrometheusClient, prom.FrontendContextName)
-	body, err := ctx.RawQuery(query)
+	body, err := ctx.RawQuery(query, timeVal)
 	if err != nil {
 		w.Write(WrapData(nil, fmt.Errorf("Error running query %s. Error: %s", query, err)))
 		return
@@ -745,8 +760,24 @@ func (a *Accesses) ThanosQuery(w http.ResponseWriter, r *http.Request, _ httprou
 		return
 	}
 
+	// Attempt to parse time as either a unix timestamp or as an RFC3339 value
+	var timeVal time.Time
+	timeStr := qp.Get("time", "")
+	if len(timeStr) > 0 {
+		if t, err := strconv.ParseInt(timeStr, 10, 64); err == nil {
+			timeVal = time.Unix(t, 0)
+		} else if t, err := time.Parse(time.RFC3339, timeStr); err == nil {
+			timeVal = t
+		}
+
+		// If time is given, but not parse-able, return an error
+		if timeVal.IsZero() {
+			http.Error(w, fmt.Sprintf("time must be a unix timestamp or RFC3339 value; illegal value given: %s", timeStr), http.StatusBadRequest)
+		}
+	}
+
 	ctx := prom.NewNamedContext(a.ThanosClient, prom.FrontendContextName)
-	body, err := ctx.RawQuery(query)
+	body, err := ctx.RawQuery(query, timeVal)
 	if err != nil {
 		w.Write(WrapData(nil, fmt.Errorf("Error running query %s. Error: %s", query, err)))
 		return
@@ -833,7 +864,7 @@ func (a *Accesses) GetPrometheusQueueState(w http.ResponseWriter, _ *http.Reques
 	if thanos.IsEnabled() {
 		thanosQueueState, err := prom.GetPrometheusQueueState(a.ThanosClient)
 		if err != nil {
-			log.Warningf("Error getting Thanos queue state: %s", err)
+			log.Warnf("Error getting Thanos queue state: %s", err)
 		} else {
 			result["thanos"] = thanosQueueState
 		}
@@ -860,7 +891,7 @@ func (a *Accesses) GetPrometheusMetrics(w http.ResponseWriter, _ *http.Request,
 	if thanos.IsEnabled() {
 		thanosMetrics, err := prom.GetPrometheusMetrics(a.ThanosClient, thanos.QueryOffset())
 		if err != nil {
-			log.Warningf("Error getting Thanos queue state: %s", err)
+			log.Warnf("Error getting Thanos queue state: %s", err)
 		} else {
 			result["thanos"] = thanosMetrics
 		}
@@ -1288,7 +1319,7 @@ func (a *Accesses) Status(w http.ResponseWriter, r *http.Request, _ httprouter.P
 // captures the panic event in sentry
 func capturePanicEvent(err string, stack string) {
 	msg := fmt.Sprintf("Panic: %s\nStackTrace: %s\n", err, stack)
-	klog.V(1).Infoln(msg)
+	log.Infof(msg)
 	sentry.CurrentHub().CaptureEvent(&sentry.Event{
 		Level:   sentry.LevelError,
 		Message: msg,
@@ -1316,7 +1347,7 @@ func handlePanic(p errors.Panic) bool {
 }
 
 func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses {
-	klog.V(1).Infof("Starting cost-model (git commit \"%s\")", env.GetAppVersion())
+	log.Infof("Starting cost-model (git commit \"%s\")", env.GetAppVersion())
 
 	configWatchers := watcher.NewConfigMapWatchers(additionalConfigWatchers...)
 
@@ -1324,22 +1355,22 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 	if errorReportingEnabled {
 		err = sentry.Init(sentry.ClientOptions{Release: env.GetAppVersion()})
 		if err != nil {
-			klog.Infof("Failed to initialize sentry for error reporting")
+			log.Infof("Failed to initialize sentry for error reporting")
 		} else {
 			err = errors.SetPanicHandler(handlePanic)
 			if err != nil {
-				klog.Infof("Failed to set panic handler: %s", err)
+				log.Infof("Failed to set panic handler: %s", err)
 			}
 		}
 	}
 
 	address := env.GetPrometheusServerEndpoint()
 	if address == "" {
-		klog.Fatalf("No address for prometheus set in $%s. Aborting.", env.PrometheusServerEndpointEnvVar)
+		log.Fatalf("No address for prometheus set in $%s. Aborting.", env.PrometheusServerEndpointEnvVar)
 	}
 
 	queryConcurrency := env.GetMaxQueryConcurrency()
-	klog.Infof("Prometheus/Thanos Client Max Concurrency set to %d", queryConcurrency)
+	log.Infof("Prometheus/Thanos Client Max Concurrency set to %d", queryConcurrency)
 
 	timeout := 120 * time.Second
 	keepAlive := 120 * time.Second
@@ -1369,26 +1400,26 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 		QueryLogFile:     "",
 	})
 	if err != nil {
-		klog.Fatalf("Failed to create prometheus client, Error: %v", err)
+		log.Fatalf("Failed to create prometheus client, Error: %v", err)
 	}
 
 	m, err := prom.Validate(promCli)
 	if err != nil || !m.Running {
 		if err != nil {
-			klog.Errorf("Failed to query prometheus at %s. Error: %s . Troubleshooting help available at: %s", address, err.Error(), prom.PrometheusTroubleshootingURL)
+			log.Errorf("Failed to query prometheus at %s. Error: %s . Troubleshooting help available at: %s", address, err.Error(), prom.PrometheusTroubleshootingURL)
 		} else if !m.Running {
-			klog.Errorf("Prometheus at %s is not running. Troubleshooting help available at: %s", address, prom.PrometheusTroubleshootingURL)
+			log.Errorf("Prometheus at %s is not running. Troubleshooting help available at: %s", address, prom.PrometheusTroubleshootingURL)
 		}
 	} else {
-		klog.V(1).Info("Success: retrieved the 'up' query against prometheus at: " + address)
+		log.Infof("Success: retrieved the 'up' query against prometheus at: " + address)
 	}
 
 	api := prometheusAPI.NewAPI(promCli)
 	_, err = api.Config(context.Background())
 	if err != nil {
-		klog.Infof("No valid prometheus config file at %s. Error: %s . Troubleshooting help available at: %s. Ignore if using cortex/thanos here.", address, err.Error(), prom.PrometheusTroubleshootingURL)
+		log.Infof("No valid prometheus config file at %s. Error: %s . Troubleshooting help available at: %s. Ignore if using cortex/thanos here.", address, err.Error(), prom.PrometheusTroubleshootingURL)
 	} else {
-		klog.Infof("Retrieved a prometheus config file from: %s", address)
+		log.Infof("Retrieved a prometheus config file from: %s", address)
 	}
 
 	// Lookup scrape interval for kubecost job, update if found
@@ -1397,7 +1428,7 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 		scrapeInterval = si
 	}
 
-	klog.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
+	log.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
 
 	// Kubernetes API setup
 	var kc *rest.Config
@@ -1449,9 +1480,9 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 	for _, cw := range watchedConfigs {
 		configs, err := kubeClientset.CoreV1().ConfigMaps(kubecostNamespace).Get(context.Background(), cw, metav1.GetOptions{})
 		if err != nil {
-			klog.Infof("No %s configmap found at install time, using existing configs: %s", cw, err.Error())
+			log.Infof("No %s configmap found at install time, using existing configs: %s", cw, err.Error())
 		} else {
-			klog.Infof("Found configmap %s, watching...", configs.Name)
+			log.Infof("Found configmap %s, watching...", configs.Name)
 			watchConfigFunc(configs)
 		}
 	}
@@ -1461,13 +1492,13 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 	remoteEnabled := env.IsRemoteEnabled()
 	if remoteEnabled {
 		info, err := cloudProvider.ClusterInfo()
-		klog.Infof("Saving cluster  with id:'%s', and name:'%s' to durable storage", info["id"], info["name"])
+		log.Infof("Saving cluster  with id:'%s', and name:'%s' to durable storage", info["id"], info["name"])
 		if err != nil {
-			klog.Infof("Error saving cluster id %s", err.Error())
+			log.Infof("Error saving cluster id %s", err.Error())
 		}
 		_, _, err = cloud.GetOrCreateClusterMeta(info["id"], info["name"])
 		if err != nil {
-			klog.Infof("Unable to set cluster id '%s' for cluster '%s', %s", info["id"], info["name"], err.Error())
+			log.Infof("Unable to set cluster id '%s' for cluster '%s', %s", info["id"], info["name"], err.Error())
 		}
 	}
 
@@ -1494,16 +1525,16 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 
 			_, err = prom.Validate(thanosCli)
 			if err != nil {
-				klog.V(1).Infof("[Warning] Failed to query Thanos at %s. Error: %s.", thanosAddress, err.Error())
+				log.Warnf("Failed to query Thanos at %s. Error: %s.", thanosAddress, err.Error())
 				thanosClient = thanosCli
 			} else {
-				klog.V(1).Info("Success: retrieved the 'up' query against Thanos at: " + thanosAddress)
+				log.Infof("Success: retrieved the 'up' query against Thanos at: " + thanosAddress)
 
 				thanosClient = thanosCli
 			}
 
 		} else {
-			klog.Infof("Error resolving environment variable: $%s", env.ThanosQueryUrlEnvVar)
+			log.Infof("Error resolving environment variable: $%s", env.ThanosQueryUrlEnvVar)
 		}
 	}
 
@@ -1583,7 +1614,7 @@ func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses
 
 	err = a.CloudProvider.DownloadPricingData()
 	if err != nil {
-		klog.V(1).Info("Failed to download pricing data: " + err.Error())
+		log.Infof("Failed to download pricing data: " + err.Error())
 	}
 
 	// Warm the aggregate cache unless explicitly set to false

+ 4 - 5
pkg/costmodel/settings.go

@@ -7,7 +7,6 @@ import (
 	"github.com/kubecost/cost-model/pkg/cloud"
 	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/patrickmn/go-cache"
-	"k8s.io/klog"
 )
 
 // InitializeSettingsPubSub sets up the pub/sub mechanisms and kicks of
@@ -82,7 +81,7 @@ func (a *Accesses) SubscribeToDiscountChanges(ch chan string) {
 func (a *Accesses) customPricingHasChanged() bool {
 	customPricing, err := a.CloudProvider.GetConfig()
 	if err != nil || customPricing == nil {
-		klog.Errorf("error accessing cloud provider configuration: %s", err)
+		log.Errorf("error accessing cloud provider configuration: %s", err)
 		return false
 	}
 
@@ -105,7 +104,7 @@ func (a *Accesses) customPricingHasChanged() bool {
 	}
 	cpStrCached, ok := val.(string)
 	if !ok {
-		klog.Errorf("caching error: failed to cast custom pricing to string")
+		log.Errorf("caching error: failed to cast custom pricing to string")
 	}
 	if cpStr == cpStrCached {
 		return false
@@ -122,7 +121,7 @@ func (a *Accesses) customPricingHasChanged() bool {
 func (a *Accesses) discountHasChanged() bool {
 	customPricing, err := a.CloudProvider.GetConfig()
 	if err != nil || customPricing == nil {
-		klog.Errorf("error accessing cloud provider configuration: %s", err)
+		log.Errorf("error accessing cloud provider configuration: %s", err)
 		return false
 	}
 
@@ -144,7 +143,7 @@ func (a *Accesses) discountHasChanged() bool {
 	}
 	discStrCached, ok := val.(string)
 	if !ok {
-		klog.Errorf("caching error: failed to cast discount to string")
+		log.Errorf("caching error: failed to cast discount to string")
 	}
 	if discStr == discStrCached {
 		return false

+ 2 - 3
pkg/costmodel/sql.go

@@ -5,10 +5,9 @@ import (
 	"fmt"
 	"time"
 
-	"k8s.io/klog"
-
 	costAnalyzerCloud "github.com/kubecost/cost-model/pkg/cloud"
 	"github.com/kubecost/cost-model/pkg/env"
+	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/util"
 	"github.com/kubecost/cost-model/pkg/util/json"
 
@@ -302,7 +301,7 @@ func CostDataRangeFromSQL(field string, value string, window string, start strin
 
 	volumes, err := getPVCosts(db)
 	if err != nil {
-		klog.Infof("Error fetching pv data from sql: %s. Skipping PVData", err.Error())
+		log.Infof("Error fetching pv data from sql: %s. Skipping PVData", err.Error())
 	} else {
 		query = `SELECT time_bucket($1, time) AS bucket, name, avg(value), labels->>'persistentvolumeclaim' AS claim, labels->>'pod' AS pod,labels->>'namespace' AS namespace, labels->>'persistentvolume' AS volumename, labels->>'cluster_id' AS clusterid
 		FROM metrics

+ 10 - 14
pkg/env/costmodelenv.go

@@ -62,11 +62,11 @@ const (
 
 	UTCOffsetEnvVar = "UTC_OFFSET"
 
-	CacheWarmingEnabledEnvVar    = "CACHE_WARMING_ENABLED"
-	ETLEnabledEnvVar             = "ETL_ENABLED"
-	ETLMaxBatchHours             = "ETL_MAX_BATCH_HOURS"
-	ETLResolutionSeconds         = "ETL_RESOLUTION_SECONDS"
-	LegacyExternalAPIDisabledVar = "LEGACY_EXTERNAL_API_DISABLED"
+	CacheWarmingEnabledEnvVar            = "CACHE_WARMING_ENABLED"
+	ETLEnabledEnvVar                     = "ETL_ENABLED"
+	ETLMaxPrometheusQueryDurationMinutes = "ETL_MAX_PROMETHEUS_QUERY_DURATION_MINUTES"
+	ETLResolutionSeconds                 = "ETL_RESOLUTION_SECONDS"
+	LegacyExternalAPIDisabledVar         = "LEGACY_EXTERNAL_API_DISABLED"
 
 	PromClusterIDLabelEnvVar = "PROM_CLUSTER_ID_LABEL"
 
@@ -397,7 +397,7 @@ func GetParsedUTCOffset() time.Duration {
 		regex := regexp.MustCompile(`^(\+|-)(\d\d):(\d\d)$`)
 		match := regex.FindStringSubmatch(offsetStr)
 		if match == nil {
-			log.Warningf("Illegal UTC offset: %s", offsetStr)
+			log.Warnf("Illegal UTC offset: %s", offsetStr)
 			return offset
 		}
 
@@ -431,14 +431,10 @@ func IsETLEnabled() bool {
 	return GetBool(ETLEnabledEnvVar, true)
 }
 
-// GetETLMaxBatchDuration limits the window duration of the most expensive ETL
-// queries to a maximum batch size, such that queries can be tuned to avoid
-// timeout for large windows; e.g. if a 24h query is expected to timeout, but
-// a 6h query is expected to complete in 1m, then 6h could be a good value.
-func GetETLMaxBatchDuration() time.Duration {
-	// Default to 6h
-	hrs := time.Duration(GetInt64(ETLMaxBatchHours, 6))
-	return hrs * time.Hour
+func GetETLMaxPrometheusQueryDuration() time.Duration {
+	dayMins := 60 * 24
+	mins := time.Duration(GetInt64(ETLMaxPrometheusQueryDurationMinutes, int64(dayMins)))
+	return mins * time.Minute
 }
 
 // GetETLResolution determines the resolution of ETL queries. The smaller the

+ 177 - 369
pkg/kubecost/allocation.go

@@ -403,7 +403,13 @@ func (a *Allocation) NetworkTotalCost() float64 {
 }
 
 // LBTotalCost calculates total LB cost of Allocation including adjustment
+// TODO deprecate
 func (a *Allocation) LBTotalCost() float64 {
+	return a.LoadBalancerTotalCost()
+}
+
+// LoadBalancerTotalCost calculates total LB cost of Allocation including adjustment
+func (a *Allocation) LoadBalancerTotalCost() float64 {
 	if a == nil {
 		return 0.0
 	}
@@ -679,7 +685,7 @@ func (a *Allocation) String() string {
 
 func (a *Allocation) add(that *Allocation) {
 	if a == nil {
-		log.Warningf("Allocation.AggregateBy: trying to add a nil receiver")
+		log.Warnf("Allocation.AggregateBy: trying to add a nil receiver")
 		return
 	}
 
@@ -828,6 +834,8 @@ type AllocationAggregationOptions struct {
 	IdleByNode            bool
 	LabelConfig           *LabelConfig
 	MergeUnallocated      bool
+	Reconcile             bool
+	ReconcileNetwork      bool
 	ShareFuncs            []AllocationMatchFunc
 	ShareIdle             string
 	ShareSplit            string
@@ -872,14 +880,14 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 	//     the output (i.e. they can be used to generate a valid key for
 	//     the given properties) then aggregate; otherwise... ignore them?
 	//
-	// 10. If the merge idle option is enabled, merge any remaining idle
+	// 10. Distribute any undistributed idle, in the case that idle
+	//     coefficients end up being zero and some idle is not shared.
+	//
+	// 11. If the merge idle option is enabled, merge any remaining idle
 	//     allocations into a single idle allocation. If there was any idle
 	//	   whose costs were not distributed because there was no usage of a
 	//     specific resource type, re-add the idle to the aggregation with
 	//     only that type.
-	//
-	// 11. Distribute any undistributed idle, in the case that idle
-	//     coefficients end up being zero and some idle is not shared.
 
 	if as.IsEmpty() {
 		return nil
@@ -893,7 +901,12 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 		options.LabelConfig = NewLabelConfig()
 	}
 
-	var undistributedIdleMap map[string]bool
+	// idleFiltrationCoefficients relies on this being explicitly set
+	if options.ShareIdle != ShareWeighted {
+		options.ShareIdle = ShareNone
+	}
+
+	var allocatedTotalsMap map[string]map[string]float64
 
 	// If aggregateBy is nil, we don't aggregate anything. On the other hand,
 	// an empty slice implies that we should aggregate everything. See
@@ -1018,9 +1031,9 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 	// the shared allocations).
 	var idleCoefficients map[string]map[string]map[string]float64
 	if idleSet.Length() > 0 && options.ShareIdle != ShareNone {
-		idleCoefficients, undistributedIdleMap, err = computeIdleCoeffs(options, as, shareSet)
+		idleCoefficients, allocatedTotalsMap, err = computeIdleCoeffs(options, as, shareSet)
 		if err != nil {
-			log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
+			log.Warnf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
 			return fmt.Errorf("error computing idle coefficients: %s", err)
 		}
 	}
@@ -1144,11 +1157,11 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 
 				// Make sure idle coefficients exist
 				if _, ok := idleCoefficients[idleId]; !ok {
-					log.Warningf("AllocationSet.AggregateBy: error getting idle coefficient: no idleId '%s' for '%s'", idleId, alloc.Name)
+					log.Warnf("AllocationSet.AggregateBy: error getting idle coefficient: no idleId '%s' for '%s'", idleId, alloc.Name)
 					continue
 				}
 				if _, ok := idleCoefficients[idleId][alloc.Name]; !ok {
-					log.Warningf("AllocationSet.AggregateBy: error getting idle coefficient for '%s'", alloc.Name)
+					log.Warnf("AllocationSet.AggregateBy: error getting idle coefficient for '%s'", alloc.Name)
 					continue
 				}
 
@@ -1200,11 +1213,11 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 
 				// Make sure idle coefficients exist
 				if _, ok := idleCoefficients[idleId]; !ok {
-					log.Warningf("AllocationSet.AggregateBy: error getting idle coefficient: no idleId '%s' for '%s'", idleId, alloc.Name)
+					log.Warnf("AllocationSet.AggregateBy: error getting idle coefficient: no idleId '%s' for '%s'", idleId, alloc.Name)
 					continue
 				}
 				if _, ok := idleCoefficients[idleId][alloc.Name]; !ok {
-					log.Warningf("AllocationSet.AggregateBy: error getting idle coefficient for '%s'", alloc.Name)
+					log.Warnf("AllocationSet.AggregateBy: error getting idle coefficient for '%s'", alloc.Name)
 					continue
 				}
 
@@ -1264,6 +1277,8 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 				idleAlloc.CPUCoreHours *= resourceCoeffs["cpu"]
 				idleAlloc.RAMCost *= resourceCoeffs["ram"]
 				idleAlloc.RAMByteHours *= resourceCoeffs["ram"]
+				idleAlloc.GPUCost *= resourceCoeffs["gpu"]
+				idleAlloc.GPUHours *= resourceCoeffs["gpu"]
 			}
 		}
 	}
@@ -1274,7 +1289,7 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 			for _, sharedAlloc := range shareSet.allocations {
 				if _, ok := shareCoefficients[alloc.Name]; !ok {
 					if !alloc.IsIdle() && !alloc.IsUnmounted() {
-						log.Warningf("AllocationSet.AggregateBy: error getting share coefficienct for '%s'", alloc.Name)
+						log.Warnf("AllocationSet.AggregateBy: error getting share coefficienct for '%s'", alloc.Name)
 					}
 					continue
 				}
@@ -1304,16 +1319,7 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 		}
 	}
 
-	// (10) Combine all idle allocations into a single "__idle__" allocation
-	if !options.SplitIdle {
-		for _, idleAlloc := range aggSet.IdleAllocations() {
-			aggSet.Delete(idleAlloc.Name)
-			idleAlloc.Name = IdleSuffix
-			aggSet.Insert(idleAlloc)
-		}
-	}
-
-	// (11) In the edge case that some idle has not been distributed because
+	// (10) In the edge case that some idle has not been distributed because
 	// there is no usage of that resource type, add idle back to
 	// aggregations with only that cost applied.
 
@@ -1332,9 +1338,7 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 	// Name		CPU		GPU		RAM
 	// __idle__ $0      $12     $0
 	// kubecost $12     $0      $7
-
-	hasUndistributedIdle := undistributedIdleMap["cpu"] || undistributedIdleMap["gpu"] || undistributedIdleMap["ram"]
-	if idleSet.Length() > 0 && hasUndistributedIdle {
+	if idleSet.Length() > 0 {
 		for _, idleAlloc := range idleSet.allocations {
 			// if the idle does not apply to the non-filtered values, skip it
 			skip := false
@@ -1348,25 +1352,97 @@ func (as *AllocationSet) AggregateBy(aggregateBy []string, options *AllocationAg
 				continue
 			}
 
-			// if the idle doesn't have a cost to be shared, also skip it
-			if idleAlloc.CPUCost != 0 && idleAlloc.GPUCost != 0 && idleAlloc.RAMCost != 0 {
-				// artificially set the already shared costs to zero
-				if !undistributedIdleMap["cpu"] {
-					idleAlloc.CPUCost = 0
-				}
-				if !undistributedIdleMap["gpu"] {
-					idleAlloc.GPUCost = 0
-				}
-				if !undistributedIdleMap["ram"] {
-					idleAlloc.RAMCost = 0
-				}
+			idleId, err := idleAlloc.getIdleId(options)
+			if err != nil {
+				log.Errorf("AllocationSet.AggregateBy: idle allocation is missing idleId %s", idleAlloc.Name)
+				continue
+			}
+
+			hasUndistributableCost := false
+
+			if idleAlloc.CPUCost > 0 && allocatedTotalsMap[idleId]["cpu"] == 0 {
+				hasUndistributableCost = true
+			} else {
+				idleAlloc.CPUCost = 0
+			}
+
+			if idleAlloc.GPUCost > 0 && allocatedTotalsMap[idleId]["gpu"] == 0 {
+				hasUndistributableCost = true
+			} else {
+				idleAlloc.GPUCost = 0
+			}
+
+			if idleAlloc.RAMCost > 0 && allocatedTotalsMap[idleId]["ram"] == 0 {
+				hasUndistributableCost = true
+			} else {
+				idleAlloc.RAMCost = 0
+			}
 
-				idleAlloc.Name = IdleSuffix
+			if hasUndistributableCost {
+				idleAlloc.Name = fmt.Sprintf("%s/%s", idleId, IdleSuffix)
 				aggSet.Insert(idleAlloc)
 			}
 		}
 	}
 
+	// (11) Combine all idle allocations into a single "__idle__" allocation
+	if !options.SplitIdle {
+		for _, idleAlloc := range aggSet.IdleAllocations() {
+			aggSet.Delete(idleAlloc.Name)
+			idleAlloc.Name = IdleSuffix
+			aggSet.Insert(idleAlloc)
+		}
+	}
+
+	// TODO revisit this (ideally we just remove sharing from this function!)
+	// If filters and shared resources and shared idle are all enabled then
+	// we will over-count idle by exactly the portion that gets shared with the
+	// filtered allocations -- and idle filtration will miss this because it
+	// only filters the non-idle filtered costs.
+	//
+	// Consider the following example, from unit tests:
+	// - namespace1     28.000
+	// - namespace2     36.000
+	// - namespace3     18.000
+	// - cluster1/idle  20.000
+	// - cluster2/idle  10.000
+	//
+	// Now, we want to share namespace1, filter namespace2, and share idle:
+	//
+	// 1. Distribute idle
+	//                 ns1     ns2     ns3
+	//    non-idle  28.000  36.000  18.000
+	//        idle  14.688  10.312   5.000
+	//
+	// 2. Share namespace1
+	//
+	//                        ns2     ns3
+	//           non-idle  36.000  18.000
+	//               idle  10.312   5.000
+	//    shared non-idle  18.667   9.333
+	//    shared     idle   9.792   4.896 (***)
+	//
+	// 3. Filter out all but namespace2
+	//
+	//    ns2 = 36.000 + 10.312 + 18.667 + 9.792 = 74.771
+	//
+	// So, if we had NOT shared idle, we would expect something like this:
+	//
+	//    ns2 = 36.000 + 18.667 = 54.667
+	//   idle = 10.312 + 9.792  = 20.104
+	//
+	// But we will instead get this:
+	//
+	//    ns2 = 36.000 + 18.667 = 54.667
+	//   idle = 10.312 + 14.688 = 25.000
+	//
+	// Which over-shoots idle by 4.896 (***), i.e. precisely the amount of idle
+	// cost corresponding to namespace1 AND shared with namespace3. Phew.
+	//
+	// I originally wanted to fix this, but after 2 days, I'm punting with the
+	// recommendation that we rewrite this function soon. Too difficult.
+	// - Niko
+
 	as.allocations = aggSet.allocations
 
 	return nil
@@ -1423,8 +1499,8 @@ func computeShareCoeffs(aggregateBy []string, options *AllocationAggregationOpti
 		} else {
 			// Both are additive for weighted distribution, where each
 			// cumulative coefficient will be divided by the total.
-			coeffs[name] += alloc.TotalCost()
-			total += alloc.TotalCost()
+			coeffs[name] += alloc.TotalCost() - alloc.SharedCost
+			total += alloc.TotalCost() - alloc.SharedCost
 		}
 	}
 
@@ -1433,7 +1509,7 @@ func computeShareCoeffs(aggregateBy []string, options *AllocationAggregationOpti
 		if coeffs[a] > 0 && total > 0 {
 			coeffs[a] /= total
 		} else {
-			log.Warningf("ETL: invalid values for shared coefficients: %d, %d", coeffs[a], total)
+			log.Warnf("ETL: invalid values for shared coefficients: %v, %v", coeffs[a], total)
 			coeffs[a] = 0.0
 		}
 	}
@@ -1441,24 +1517,17 @@ func computeShareCoeffs(aggregateBy []string, options *AllocationAggregationOpti
 	return coeffs, nil
 }
 
-func computeIdleCoeffs(options *AllocationAggregationOptions, as *AllocationSet, shareSet *AllocationSet) (map[string]map[string]map[string]float64, map[string]bool, error) {
+func computeIdleCoeffs(options *AllocationAggregationOptions, as *AllocationSet, shareSet *AllocationSet) (map[string]map[string]map[string]float64, map[string]map[string]float64, error) {
 	types := []string{"cpu", "gpu", "ram"}
-	undistributedIdleMap := map[string]bool{
-		"cpu": true,
-		"gpu": true,
-		"ram": true,
-	}
 
 	// Compute idle coefficients, then save them in AllocationAggregationOptions
+	// [idle_id][allocation name][resource] = [coeff]
 	coeffs := map[string]map[string]map[string]float64{}
 
 	// Compute totals per resource for CPU, GPU, RAM, and PV
+	// [idle_id][resource] = [total]
 	totals := map[string]map[string]float64{}
 
-	// ShareEven counts each allocation with even weight, whereas ShareWeighted
-	// counts each allocation proportionally to its respective costs
-	shareType := options.ShareIdle
-
 	// Record allocation values first, then normalize by totals to get percentages
 	for _, alloc := range as.allocations {
 		if alloc.IsIdle() {
@@ -1486,24 +1555,13 @@ func computeIdleCoeffs(options *AllocationAggregationOptions, as *AllocationSet,
 			coeffs[idleId][name] = map[string]float64{}
 		}
 
-		if shareType == ShareEven {
-			for _, r := range types {
-				// Not additive - hard set to 1.0
-				coeffs[idleId][name][r] = 1.0
-
-				// totals are additive
-				totals[idleId][r] += 1.0
-			}
-		} else {
-			coeffs[idleId][name]["cpu"] += alloc.CPUTotalCost()
-			coeffs[idleId][name]["gpu"] += alloc.GPUTotalCost()
-			coeffs[idleId][name]["ram"] += alloc.RAMTotalCost()
-
-			totals[idleId]["cpu"] += alloc.CPUTotalCost()
-			totals[idleId]["gpu"] += alloc.GPUTotalCost()
-			totals[idleId]["ram"] += alloc.RAMTotalCost()
-		}
+		coeffs[idleId][name]["cpu"] += alloc.CPUTotalCost()
+		coeffs[idleId][name]["gpu"] += alloc.GPUTotalCost()
+		coeffs[idleId][name]["ram"] += alloc.RAMTotalCost()
 
+		totals[idleId]["cpu"] += alloc.CPUTotalCost()
+		totals[idleId]["gpu"] += alloc.GPUTotalCost()
+		totals[idleId]["ram"] += alloc.RAMTotalCost()
 	}
 
 	// Do the same for shared allocations
@@ -1534,38 +1592,27 @@ func computeIdleCoeffs(options *AllocationAggregationOptions, as *AllocationSet,
 			coeffs[idleId][name] = map[string]float64{}
 		}
 
-		if shareType == ShareEven {
-			for _, r := range types {
-				// Not additive - hard set to 1.0
-				coeffs[idleId][name][r] = 1.0
-
-				// totals are additive
-				totals[idleId][r] += 1.0
-			}
-		} else {
-			coeffs[idleId][name]["cpu"] += alloc.CPUTotalCost()
-			coeffs[idleId][name]["gpu"] += alloc.GPUTotalCost()
-			coeffs[idleId][name]["ram"] += alloc.RAMTotalCost()
+		coeffs[idleId][name]["cpu"] += alloc.CPUTotalCost()
+		coeffs[idleId][name]["gpu"] += alloc.GPUTotalCost()
+		coeffs[idleId][name]["ram"] += alloc.RAMTotalCost()
 
-			totals[idleId]["cpu"] += alloc.CPUTotalCost()
-			totals[idleId]["gpu"] += alloc.GPUTotalCost()
-			totals[idleId]["ram"] += alloc.RAMTotalCost()
-		}
+		totals[idleId]["cpu"] += alloc.CPUTotalCost()
+		totals[idleId]["gpu"] += alloc.GPUTotalCost()
+		totals[idleId]["ram"] += alloc.RAMTotalCost()
 	}
 
 	// Normalize coefficients by totals
-	for c := range coeffs {
-		for a := range coeffs[c] {
+	for id := range coeffs {
+		for a := range coeffs[id] {
 			for _, r := range types {
-				if coeffs[c][a][r] > 0 && totals[c][r] > 0 {
-					coeffs[c][a][r] /= totals[c][r]
-					undistributedIdleMap[r] = false
+				if coeffs[id][a][r] > 0 && totals[id][r] > 0 {
+					coeffs[id][a][r] /= totals[id][r]
 				}
 			}
 		}
 	}
 
-	return coeffs, undistributedIdleMap, nil
+	return coeffs, totals, nil
 }
 
 // getIdleId returns the providerId or cluster of an Allocation depending on the IdleByNode
@@ -1574,7 +1621,7 @@ func (a *Allocation) getIdleId(options *AllocationAggregationOptions) (string, e
 	var idleId string
 	if options.IdleByNode {
 		// Key allocations to ProviderId to match against node
-		idleId = a.Properties.ProviderID
+		idleId = fmt.Sprintf("%s/%s", a.Properties.Cluster, a.Properties.Node)
 		if idleId == "" {
 			return idleId, fmt.Errorf("ProviderId is not set")
 		}
@@ -1629,279 +1676,6 @@ func (as *AllocationSet) Clone() *AllocationSet {
 	}
 }
 
-// ComputeIdleAllocations computes the idle allocations for the AllocationSet,
-// given a set of Assets. Ideally, assetSet should contain only Nodes, but if
-// it contains other Assets, they will be ignored; only CPU, GPU and RAM are
-// considered for idle allocation. If the Nodes have adjustments, then apply
-// the adjustments proportionally to each of the resources so that total
-// allocation with idle reflects the adjusted node costs. One idle allocation
-// per-cluster will be computed and returned, keyed by cluster_id.
-func (as *AllocationSet) ComputeIdleAllocations(assetSet *AssetSet) (map[string]*Allocation, error) {
-	if as == nil {
-		return nil, fmt.Errorf("cannot compute idle allocation for nil AllocationSet")
-	}
-
-	if assetSet == nil {
-		return nil, fmt.Errorf("cannot compute idle allocation with nil AssetSet")
-	}
-
-	if !as.Window.Equal(assetSet.Window) {
-		return nil, fmt.Errorf("cannot compute idle allocation for sets with mismatched windows: %s != %s", as.Window, assetSet.Window)
-	}
-
-	window := as.Window
-
-	// Build a map of cumulative cluster asset costs, per resource; i.e.
-	// cluster-to-{cpu|gpu|ram}-to-cost.
-	assetClusterResourceCosts := map[string]map[string]float64{}
-	assetSet.Each(func(key string, a Asset) {
-		if node, ok := a.(*Node); ok {
-			if _, ok := assetClusterResourceCosts[node.Properties().Cluster]; !ok {
-				assetClusterResourceCosts[node.Properties().Cluster] = map[string]float64{}
-			}
-
-			// adjustmentRate is used to scale resource costs proportionally
-			// by the adjustment. This is necessary because we only get one
-			// adjustment per Node, not one per-resource-per-Node.
-			//
-			// e.g. total cost = $90, adjustment = -$10 => 0.9
-			// e.g. total cost = $150, adjustment = -$300 => 0.3333
-			// e.g. total cost = $150, adjustment = $50 => 1.5
-			adjustmentRate := 1.0
-			if node.TotalCost()-node.Adjustment() == 0 {
-				// If (totalCost - adjustment) is 0.0 then adjustment cancels
-				// the entire node cost and we should make everything 0
-				// without dividing by 0.
-				adjustmentRate = 0.0
-				log.DedupedWarningf(5, "Compute Idle Allocations: Node Cost Adjusted to $0.00 for %s", node.properties.Name)
-			} else if node.Adjustment() != 0.0 {
-				// adjustmentRate is the ratio of cost-with-adjustment (i.e. TotalCost)
-				// to cost-without-adjustment (i.e. TotalCost - Adjustment).
-				adjustmentRate = node.TotalCost() / (node.TotalCost() - node.Adjustment())
-			}
-
-			cpuCost := node.CPUCost * (1.0 - node.Discount) * adjustmentRate
-			ramCost := node.RAMCost * (1.0 - node.Discount) * adjustmentRate
-			gpuCost := node.GPUCost * (1.0) * adjustmentRate
-
-			assetClusterResourceCosts[node.Properties().Cluster]["cpu"] += cpuCost
-			assetClusterResourceCosts[node.Properties().Cluster]["ram"] += ramCost
-			assetClusterResourceCosts[node.Properties().Cluster]["gpu"] += gpuCost
-		}
-	})
-
-	// Determine start, end on a per-cluster basis
-	clusterStarts := map[string]time.Time{}
-	clusterEnds := map[string]time.Time{}
-
-	// Subtract allocated costs from asset costs, leaving only the remaining
-	// idle costs.
-	as.Each(func(name string, a *Allocation) {
-		cluster := a.Properties.Cluster
-		if cluster == "" {
-			// Failed to find allocation's cluster
-			return
-		}
-
-		if _, ok := assetClusterResourceCosts[cluster]; !ok {
-			// Failed to find assets for allocation's cluster
-			return
-		}
-
-		// Set cluster (start, end) if they are either not currently set,
-		// or if the detected (start, end) of the current allocation falls
-		// before or after, respectively, the current values.
-		if s, ok := clusterStarts[cluster]; !ok || a.Start.Before(s) {
-			clusterStarts[cluster] = a.Start
-		}
-		if e, ok := clusterEnds[cluster]; !ok || a.End.After(e) {
-			clusterEnds[cluster] = a.End
-		}
-
-		assetClusterResourceCosts[cluster]["cpu"] -= a.CPUTotalCost()
-		assetClusterResourceCosts[cluster]["gpu"] -= a.GPUTotalCost()
-		assetClusterResourceCosts[cluster]["ram"] -= a.RAMTotalCost()
-	})
-
-	// Turn remaining un-allocated asset costs into idle allocations
-	idleAllocs := map[string]*Allocation{}
-	for cluster, resources := range assetClusterResourceCosts {
-		// Default start and end to the (start, end) of the given window, but
-		// use the actual, detected (start, end) pair if they are available.
-		start := *window.Start()
-		if s, ok := clusterStarts[cluster]; ok && window.Contains(s) {
-			start = s
-		}
-		end := *window.End()
-		if e, ok := clusterEnds[cluster]; ok && window.Contains(e) {
-			end = e
-		}
-
-		idleAlloc := &Allocation{
-			Name:       fmt.Sprintf("%s/%s", cluster, IdleSuffix),
-			Window:     window.Clone(),
-			Properties: &AllocationProperties{Cluster: cluster},
-			Start:      start,
-			End:        end,
-			CPUCost:    resources["cpu"],
-			GPUCost:    resources["gpu"],
-			RAMCost:    resources["ram"],
-		}
-
-		// Do not continue if multiple idle allocations are computed for a
-		// single cluster.
-		if _, ok := idleAllocs[cluster]; ok {
-			return nil, fmt.Errorf("duplicate idle allocations for cluster %s", cluster)
-		}
-
-		idleAllocs[cluster] = idleAlloc
-	}
-
-	return idleAllocs, nil
-}
-
-// ComputeIdleAllocationsByNode computes the idle allocations for the AllocationSet,
-// given a set of Assets. Ideally, assetSet should contain only Nodes, but if
-// it contains other Assets, they will be ignored; only CPU, GPU and RAM are
-// considered for idle allocation. If the Nodes have adjustments, then apply
-// the adjustments proportionally to each of the resources so that total
-// allocation with idle reflects the adjusted node costs. One idle allocation
-// per-node will be computed and returned, keyed by cluster_id.
-func (as *AllocationSet) ComputeIdleAllocationsByNode(assetSet *AssetSet) (map[string]*Allocation, error) {
-
-	if as == nil {
-		return nil, fmt.Errorf("cannot compute idle allocation for nil AllocationSet")
-	}
-
-	if assetSet == nil {
-		return nil, fmt.Errorf("cannot compute idle allocation with nil AssetSet")
-	}
-
-	if !as.Window.Equal(assetSet.Window) {
-		return nil, fmt.Errorf("cannot compute idle allocation for sets with mismatched windows: %s != %s", as.Window, assetSet.Window)
-	}
-
-	window := as.Window
-
-	// Build a map of cumulative cluster asset costs, per resource; i.e.
-	// cluster-to-{cpu|gpu|ram}-to-cost.
-	assetNodeResourceCosts := map[string]map[string]float64{}
-	nodesByProviderId := map[string]*Node{}
-	assetSet.Each(func(key string, a Asset) {
-		if node, ok := a.(*Node); ok {
-			if _, ok := assetNodeResourceCosts[node.Properties().ProviderID]; ok || node.Properties().ProviderID == "" {
-				log.DedupedWarningf(5, "Compute Idle Allocations By Node: Node missing providerId: %s", node.properties.Name)
-				return
-			}
-
-			nodesByProviderId[node.Properties().ProviderID] = node
-			assetNodeResourceCosts[node.Properties().ProviderID] = map[string]float64{}
-
-			// adjustmentRate is used to scale resource costs proportionally
-			// by the adjustment. This is necessary because we only get one
-			// adjustment per Node, not one per-resource-per-Node.
-			//
-			// e.g. total cost = $90, adjustment = -$10 => 0.9
-			// e.g. total cost = $150, adjustment = -$300 => 0.3333
-			// e.g. total cost = $150, adjustment = $50 => 1.5
-			adjustmentRate := 1.0
-			if node.TotalCost()-node.Adjustment() == 0 {
-				// If (totalCost - adjustment) is 0.0 then adjustment cancels
-				// the entire node cost and we should make everything 0
-				// without dividing by 0.
-				adjustmentRate = 0.0
-				log.DedupedWarningf(5, "Compute Idle Allocations: Node Cost Adjusted to $0.00 for %s", node.properties.Name)
-			} else if node.Adjustment() != 0.0 {
-				// adjustmentRate is the ratio of cost-with-adjustment (i.e. TotalCost)
-				// to cost-without-adjustment (i.e. TotalCost - Adjustment).
-				adjustmentRate = node.TotalCost() / (node.TotalCost() - node.Adjustment())
-			}
-
-			cpuCost := node.CPUCost * (1.0 - node.Discount) * adjustmentRate
-			ramCost := node.RAMCost * (1.0 - node.Discount) * adjustmentRate
-			gpuCost := node.GPUCost * adjustmentRate
-
-			assetNodeResourceCosts[node.Properties().ProviderID]["cpu"] += cpuCost
-			assetNodeResourceCosts[node.Properties().ProviderID]["ram"] += ramCost
-			assetNodeResourceCosts[node.Properties().ProviderID]["gpu"] += gpuCost
-		}
-	})
-
-	// Determine start, end on a per-cluster basis
-	nodeStarts := map[string]time.Time{}
-	nodeEnds := map[string]time.Time{}
-
-	// Subtract allocated costs from asset costs, leaving only the remaining
-	// idle costs.
-	as.Each(func(name string, a *Allocation) {
-		providerId := a.Properties.ProviderID
-		if providerId == "" {
-			// Failed to find allocation's node
-			log.DedupedWarningf(5, "Compute Idle Allocations By Node: Allocation missing providerId: %s", a.Name)
-			return
-		}
-
-		if _, ok := assetNodeResourceCosts[providerId]; !ok {
-			// Failed to find assets for allocation's node
-			return
-		}
-
-		// Set cluster (start, end) if they are either not currently set,
-		// or if the detected (start, end) of the current allocation falls
-		// before or after, respectively, the current values.
-		if s, ok := nodeStarts[providerId]; !ok || a.Start.Before(s) {
-			nodeStarts[providerId] = a.Start
-		}
-		if e, ok := nodeEnds[providerId]; !ok || a.End.After(e) {
-			nodeEnds[providerId] = a.End
-		}
-
-		assetNodeResourceCosts[providerId]["cpu"] -= a.CPUTotalCost()
-		assetNodeResourceCosts[providerId]["gpu"] -= a.GPUTotalCost()
-		assetNodeResourceCosts[providerId]["ram"] -= a.RAMTotalCost()
-	})
-
-	// Turn remaining un-allocated asset costs into idle allocations
-	idleAllocs := map[string]*Allocation{}
-	for providerId, resources := range assetNodeResourceCosts {
-		// Default start and end to the (start, end) of the given window, but
-		// use the actual, detected (start, end) pair if they are available.
-		start := *window.Start()
-		if s, ok := nodeStarts[providerId]; ok && window.Contains(s) {
-			start = s
-		}
-		end := *window.End()
-		if e, ok := nodeEnds[providerId]; ok && window.Contains(e) {
-			end = e
-		}
-		node := nodesByProviderId[providerId]
-		idleAlloc := &Allocation{
-			Name:   fmt.Sprintf("%s/%s", node.properties.Name, IdleSuffix),
-			Window: window.Clone(),
-			Properties: &AllocationProperties{
-				Cluster:    node.properties.Cluster,
-				Node:       node.properties.Name,
-				ProviderID: providerId,
-			},
-			Start:   start,
-			End:     end,
-			CPUCost: resources["cpu"],
-			GPUCost: resources["gpu"],
-			RAMCost: resources["ram"],
-		}
-
-		// Do not continue if multiple idle allocations are computed for a
-		// single node.
-		if _, ok := idleAllocs[providerId]; ok {
-			return nil, fmt.Errorf("duplicate idle allocations for node Provider ID: %s", providerId)
-		}
-
-		idleAllocs[providerId] = idleAlloc
-	}
-
-	return idleAllocs, nil
-}
-
 // Delete removes the allocation with the given name from the set
 func (as *AllocationSet) Delete(name string) {
 	if as == nil {
@@ -1929,11 +1703,11 @@ func (as *AllocationSet) Each(f func(string, *Allocation)) {
 // End returns the End time of the AllocationSet window
 func (as *AllocationSet) End() time.Time {
 	if as == nil {
-		log.Warningf("AllocationSet: calling End on nil AllocationSet")
+		log.Warnf("AllocationSet: calling End on nil AllocationSet")
 		return time.Unix(0, 0)
 	}
 	if as.Window.End() == nil {
-		log.Warningf("AllocationSet: AllocationSet with illegal window: End is nil; len(as.allocations)=%d", len(as.allocations))
+		log.Warnf("AllocationSet: AllocationSet with illegal window: End is nil; len(as.allocations)=%d", len(as.allocations))
 		return time.Unix(0, 0)
 	}
 	return *as.Window.End()
@@ -2101,6 +1875,24 @@ func (as *AllocationSet) MarshalJSON() ([]byte, error) {
 	return json.Marshal(as.allocations)
 }
 
+// ResetAdjustments sets all cost adjustment fields to zero
+func (as *AllocationSet) ResetAdjustments() {
+	if as == nil {
+		return
+	}
+
+	as.Lock()
+	defer as.Unlock()
+
+	as.resetAdjustments()
+}
+
+func (as *AllocationSet) resetAdjustments() {
+	for _, a := range as.allocations {
+		a.ResetAdjustments()
+	}
+}
+
 // Resolution returns the AllocationSet's window duration
 func (as *AllocationSet) Resolution() time.Duration {
 	return as.Window.Duration()
@@ -2138,11 +1930,11 @@ func (as *AllocationSet) Set(alloc *Allocation) error {
 // Start returns the Start time of the AllocationSet window
 func (as *AllocationSet) Start() time.Time {
 	if as == nil {
-		log.Warningf("AllocationSet: calling Start on nil AllocationSet")
+		log.Warnf("AllocationSet: calling Start on nil AllocationSet")
 		return time.Unix(0, 0)
 	}
 	if as.Window.Start() == nil {
-		log.Warningf("AllocationSet: AllocationSet with illegal window: Start is nil; len(as.allocations)=%d", len(as.allocations))
+		log.Warnf("AllocationSet: AllocationSet with illegal window: Start is nil; len(as.allocations)=%d", len(as.allocations))
 		return time.Unix(0, 0)
 	}
 	return *as.Window.Start()
@@ -2533,3 +2325,19 @@ func (asr *AllocationSetRange) Minutes() float64 {
 
 	return duration.Minutes()
 }
+
+// TotalCost returns the sum of all TotalCosts of the allocations contained
+func (asr *AllocationSetRange) TotalCost() float64 {
+	if asr == nil || len(asr.allocations) == 0 {
+		return 0.0
+	}
+
+	asr.RLock()
+	defer asr.RUnlock()
+
+	tc := 0.0
+	for _, as := range asr.allocations {
+		tc += as.TotalCost()
+	}
+	return tc
+}

+ 45 - 273
pkg/kubecost/allocation_test.go

@@ -1086,25 +1086,9 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 3b AggregationProperties=(Namespace) ShareIdle=ShareEven
-		// namespace1: 38.0000 = 28.00 + 5.00*(1.0/2.0) + 15.0*(1.0/2.0)
-		// namespace2: 51.0000 = 36.00 + 5.0*(1.0/2.0) + 15.0*(1.0/2.0) + 5.0*(1.0/2.0) + 5.0*(1.0/2.0)
-		// namespace3: 23.0000 = 18.00 + 5.0*(1.0/2.0) + 5.0*(1.0/2.0)
-		"3b": {
-			start:      start,
-			aggBy:      []string{AllocationNamespaceProp},
-			aggOpts:    &AllocationAggregationOptions{ShareIdle: ShareEven},
-			numResults: numNamespaces,
-			totalCost:  activeTotalCost + idleTotalCost,
-			results: map[string]float64{
-				"namespace1": 38.00,
-				"namespace2": 51.00,
-				"namespace3": 23.00,
-			},
-			windowStart: startYesterday,
-			windowEnd:   endYesterday,
-			expMinutes:  1440.0,
-		},
+
+		// 3b: sharing idle evenly is deprecated
+
 		// 4  Share resources
 
 		// 4a Share namespace ShareEven
@@ -1315,30 +1299,14 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6c Share idle even with filters
-		// Should match values from unfiltered aggregation (3b)
-		// namespace2: 51.0000 = 36.00 + 5.0*(1.0/2.0) + 15.0*(1.0/2.0) + 5.0*(1.0/2.0) + 5.0*(1.0/2.0)
-		"6c": {
-			start: start,
-			aggBy: []string{AllocationNamespaceProp},
-			aggOpts: &AllocationAggregationOptions{
-				FilterFuncs: []AllocationMatchFunc{isNamespace("namespace2")},
-				ShareIdle:   ShareEven,
-			},
-			numResults: 1,
-			totalCost:  51.00,
-			results: map[string]float64{
-				"namespace2": 51.00,
-			},
-			windowStart: startYesterday,
-			windowEnd:   endYesterday,
-			expMinutes:  1440.0,
-		},
+
+		// 6c Share idle even with filters (share idle even is deprecated)
+
 		// 6d Share overhead with filters
 		// namespace1: 85.366 = 28.00 + (7.0*24.0)*(28.00/82.00)
 		// namespace2: 109.756 = 36.00 + (7.0*24.0)*(36.00/82.00)
 		// namespace3: 54.878 = 18.00 + (7.0*24.0)*(18.00/82.00)
-		// idle:       30.0000
+		// idle:       10.3125 = % of idle paired with namespace2
 		// Then namespace 2 is filtered.
 		"6d": {
 			start: start,
@@ -1349,23 +1317,16 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 				ShareSplit:        ShareWeighted,
 			},
 			numResults: 1 + numIdle,
-			totalCost:  139.756,
+			totalCost:  120.0686,
 			results: map[string]float64{
-				"namespace2": 109.756,
-				IdleSuffix:   30.00,
+				"namespace2": 109.7561,
+				IdleSuffix:   10.3125,
 			},
 			windowStart: startYesterday,
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
 		// 6e Share resources with filters
-		// --- Shared ---
-		// namespace1: 28.00 (gets shared among namespace2 and namespace3)
-		// --- Filtered ---
-		// namespace3: 27.33 = 18.00 + (28.00)*(18.00/54.00) (filtered out)
-		// --- Results ---
-		// namespace2: 54.667 = 36.00 + (28.00)*(36.00/54.00)
-		// idle:       30.0000
 		"6e": {
 			start: start,
 			aggBy: []string{AllocationNamespaceProp},
@@ -1375,16 +1336,35 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 				ShareSplit:  ShareWeighted,
 			},
 			numResults: 1 + numIdle,
-			totalCost:  84.667,
+			totalCost:  79.6667, // should be 74.7708, but I'm punting -- too difficult (NK)
 			results: map[string]float64{
-				"namespace2": 54.667,
-				IdleSuffix:   30.00,
+				"namespace2": 54.6667,
+				IdleSuffix:   25.000, // should be 20.1042, but I'm punting -- too difficult (NK)
+			},
+			windowStart: startYesterday,
+			windowEnd:   endYesterday,
+			expMinutes:  1440.0,
+		},
+		// 6f Share resources with filters and share idle
+		"6f": {
+			start: start,
+			aggBy: []string{AllocationNamespaceProp},
+			aggOpts: &AllocationAggregationOptions{
+				FilterFuncs: []AllocationMatchFunc{isNamespace("namespace2")},
+				ShareFuncs:  []AllocationMatchFunc{isNamespace("namespace1")},
+				ShareSplit:  ShareWeighted,
+				ShareIdle:   ShareWeighted,
+			},
+			numResults: 1,
+			totalCost:  74.77083,
+			results: map[string]float64{
+				"namespace2": 74.77083,
 			},
 			windowStart: startYesterday,
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6f Share idle weighted and share resources weighted
+		// 6g Share idle weighted and share resources weighted
 		//
 		// First, share idle weighted produces:
 		//
@@ -1418,7 +1398,7 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 		//   initial cost   18.0000
 		//   idle cost       5.0000
 		//   shared cost    14.2292 = (42.6875)*(18.0/54.0)
-		"6f": {
+		"6g": {
 			start: start,
 			aggBy: []string{AllocationNamespaceProp},
 			aggOpts: &AllocationAggregationOptions{
@@ -1436,7 +1416,7 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6g Share idle, share resources, and filter
+		// 6h Share idle, share resources, and filter
 		//
 		// First, share idle weighted produces:
 		//
@@ -1472,7 +1452,7 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 		//   shared cost    14.2292 = (42.6875)*(18.0/54.0)
 		//
 		// Then, filter for namespace2: 74.7708
-		"6g": {
+		"6h": {
 			start: start,
 			aggBy: []string{AllocationNamespaceProp},
 			aggOpts: &AllocationAggregationOptions{
@@ -1490,7 +1470,7 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6h Share idle, share resources, share overhead
+		// 6i Share idle, share resources, share overhead
 		//
 		// Share idle weighted:
 		//
@@ -1518,7 +1498,7 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 		// namespace3:      59.8780 = 23.0000 + (7.0*24.0)*(18.00/82.00)
 		//
 		// Then namespace 2 is filtered.
-		"6h": {
+		"6i": {
 			start: start,
 			aggBy: []string{AllocationNamespaceProp},
 			aggOpts: &AllocationAggregationOptions{
@@ -1536,8 +1516,8 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6i Idle by Node
-		"6i": {
+		// 6j Idle by Node
+		"6j": {
 			start: start,
 			aggBy: []string{AllocationNamespaceProp},
 			aggOpts: &AllocationAggregationOptions{
@@ -1555,8 +1535,8 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6j Split Idle, Idle by Node
-		"6j": {
+		// 6k Split Idle, Idle by Node
+		"6k": {
 			start: start,
 			aggBy: []string{AllocationNamespaceProp},
 			aggOpts: &AllocationAggregationOptions{
@@ -1578,26 +1558,9 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 			windowEnd:   endYesterday,
 			expMinutes:  1440.0,
 		},
-		// 6k Share idle Even Idle by Node
-		// Should match values from unfiltered aggregation (3b)
-		"6k": {
-			start: start,
-			aggBy: []string{AllocationNamespaceProp},
-			aggOpts: &AllocationAggregationOptions{
-				ShareIdle:  ShareEven,
-				IdleByNode: true,
-			},
-			numResults: 3,
-			totalCost:  112.00,
-			results: map[string]float64{
-				"namespace1": 38.00,
-				"namespace2": 51.00,
-				"namespace3": 23.00,
-			},
-			windowStart: startYesterday,
-			windowEnd:   endYesterday,
-			expMinutes:  1440.0,
-		},
+
+		// Old 6k Share idle Even Idle by Node (share idle even deprecated)
+
 		// 6l Share idle weighted with filters, Idle by Node
 		// Should match values from unfiltered aggregation (3a)
 		// namespace2: 46.3125 = 36.00 + 5.0*(3.0/6.0) + 15.0*(3.0/16.0) + 5.0*(3.0/6.0) + 5.0*(3.0/6.0)
@@ -1644,197 +1607,6 @@ func TestAllocationSet_AggregateBy(t *testing.T) {
 // TODO niko/etl
 //func TestAllocationSet_Clone(t *testing.T) {}
 
-func TestAllocationSet_ComputeIdleAllocations(t *testing.T) {
-	var as *AllocationSet
-	var err error
-	var idles map[string]*Allocation
-
-	end := time.Now().UTC().Truncate(day)
-	start := end.Add(-day)
-
-	// Generate AllocationSet without idle allocations
-	as = GenerateMockAllocationSet(start)
-
-	assetSets := GenerateMockAssetSets(start, end)
-
-	cases := map[string]struct {
-		allocationSet *AllocationSet
-		assetSet      *AssetSet
-		clusters      map[string]Allocation
-	}{
-		"1a": {
-			allocationSet: as,
-			assetSet:      assetSets[0],
-			clusters: map[string]Allocation{
-				"cluster1": {
-					CPUCost: 44.0,
-					RAMCost: 24.0,
-					GPUCost: 4.0,
-				},
-				"cluster2": {
-					CPUCost: 44.0,
-					RAMCost: 34.0,
-					GPUCost: 4.0,
-				},
-			},
-		},
-		"1b": {
-			allocationSet: as,
-			assetSet:      assetSets[1],
-			clusters: map[string]Allocation{
-				"cluster1": {
-					CPUCost: 44.0,
-					RAMCost: 24.0,
-					GPUCost: 4.0,
-				},
-				"cluster2": {
-					CPUCost: 44.0,
-					RAMCost: 34.0,
-					GPUCost: 4.0,
-				},
-			},
-		},
-	}
-
-	for name, testcase := range cases {
-		t.Run(name, func(t *testing.T) {
-			idles, err = as.ComputeIdleAllocations(testcase.assetSet)
-			if err != nil {
-				t.Fatalf("unexpected error: %s", err)
-			}
-
-			if len(idles) != len(testcase.clusters) {
-				t.Fatalf("idles: expected length %d; got length %d", len(testcase.clusters), len(idles))
-			}
-
-			for clusterName, cluster := range testcase.clusters {
-				if idle, ok := idles[clusterName]; !ok {
-					t.Fatalf("expected idle cost for %s", clusterName)
-				} else {
-					if !util.IsApproximately(idle.TotalCost(), cluster.TotalCost()) {
-						t.Fatalf("%s idle: expected total cost %f; got total cost %f", clusterName, cluster.TotalCost(), idle.TotalCost())
-					}
-				}
-				if !util.IsApproximately(idles[clusterName].CPUCost, cluster.CPUCost) {
-					t.Fatalf("expected idle CPU cost for %s to be %.2f; got %.2f", clusterName, cluster.CPUCost, idles[clusterName].CPUCost)
-				}
-				if !util.IsApproximately(idles[clusterName].RAMCost, cluster.RAMCost) {
-					t.Fatalf("expected idle RAM cost for %s to be %.2f; got %.2f", clusterName, cluster.RAMCost, idles[clusterName].RAMCost)
-				}
-				if !util.IsApproximately(idles[clusterName].GPUCost, cluster.GPUCost) {
-					t.Fatalf("expected idle GPU cost for %s to be %.2f; got %.2f", clusterName, cluster.GPUCost, idles[clusterName].GPUCost)
-				}
-			}
-		})
-	}
-}
-
-func TestAllocationSet_ComputeIdleAllocationsPerNode(t *testing.T) {
-
-	var as *AllocationSet
-	var err error
-	var idles map[string]*Allocation
-
-	end := time.Now().UTC().Truncate(day)
-	start := end.Add(-day)
-
-	// Generate AllocationSet without idle allocations
-	as = GenerateMockAllocationSet(start)
-
-	assetSets := GenerateMockAssetSets(start, end)
-
-	cases := map[string]struct {
-		allocationSet *AllocationSet
-		assetSet      *AssetSet
-		nodes         map[string]Allocation
-	}{
-		"1a": {
-			allocationSet: as,
-			assetSet:      assetSets[0],
-			nodes: map[string]Allocation{
-				"c1nodes": {
-					CPUCost: 44.0,
-					RAMCost: 24.0,
-					GPUCost: 4.0,
-				},
-				"node1": {
-					CPUCost: 18.0,
-					RAMCost: 13.0,
-					GPUCost: -2.0,
-				},
-				"node2": {
-					CPUCost: 18.0,
-					RAMCost: 13.0,
-					GPUCost: -2.0,
-				},
-				"node3": {
-					CPUCost: 8.0,
-					RAMCost: 8.0,
-					GPUCost: 8.0,
-				},
-			},
-		},
-		"1b": {
-			allocationSet: as,
-			assetSet:      assetSets[1],
-			nodes: map[string]Allocation{
-				"c1nodes": {
-					CPUCost: 44.0,
-					RAMCost: 24.0,
-					GPUCost: 4.0,
-				},
-				"node1": {
-					CPUCost: 18.0,
-					RAMCost: 13.0,
-					GPUCost: -2.0,
-				},
-				"node2": {
-					CPUCost: 18.0,
-					RAMCost: 13.0,
-					GPUCost: -2.0,
-				},
-				"node3": {
-					CPUCost: 8.0,
-					RAMCost: 8.0,
-					GPUCost: 8.0,
-				},
-			},
-		},
-	}
-
-	for name, testcase := range cases {
-		t.Run(name, func(t *testing.T) {
-			idles, err = as.ComputeIdleAllocationsByNode(testcase.assetSet)
-			if err != nil {
-				t.Fatalf("unexpected error: %s", err)
-			}
-
-			if len(idles) != len(testcase.nodes) {
-				t.Fatalf("idles: expected length %d; got length %d", len(testcase.nodes), len(idles))
-			}
-
-			for nodeName, node := range testcase.nodes {
-				if idle, ok := idles[nodeName]; !ok {
-					t.Fatalf("expected idle cost for %s", nodeName)
-				} else {
-					if !util.IsApproximately(idle.TotalCost(), node.TotalCost()) {
-						t.Fatalf("%s idle: expected total cost %f; got total cost %f", nodeName, node.TotalCost(), idle.TotalCost())
-					}
-				}
-				if !util.IsApproximately(idles[nodeName].CPUCost, node.CPUCost) {
-					t.Fatalf("expected idle CPU cost for %s to be %.2f; got %.2f", nodeName, node.CPUCost, idles[nodeName].CPUCost)
-				}
-				if !util.IsApproximately(idles[nodeName].RAMCost, node.RAMCost) {
-					t.Fatalf("expected idle RAM cost for %s to be %.2f; got %.2f", nodeName, node.RAMCost, idles[nodeName].RAMCost)
-				}
-				if !util.IsApproximately(idles[nodeName].GPUCost, node.GPUCost) {
-					t.Fatalf("expected idle GPU cost for %s to be %.2f; got %.2f", nodeName, node.GPUCost, idles[nodeName].GPUCost)
-				}
-			}
-		})
-	}
-}
-
 // TODO niko/etl
 //func TestAllocationSet_Delete(t *testing.T) {}
 

+ 1 - 1
pkg/kubecost/allocationprops.go

@@ -394,7 +394,7 @@ func (p *AllocationProperties) GenerateKey(aggregateBy []string, labelConfig *La
 			// This case should never be reached, as input up until this point
 			// should be checked and rejected if invalid. But if we do get a
 			// value we don't recognize, log a warning.
-			log.Warningf("generateKey: illegal aggregation parameter: %s", agg)
+			log.Warnf("generateKey: illegal aggregation parameter: %s", agg)
 		}
 	}
 

+ 72 - 20
pkg/kubecost/asset.go

@@ -246,7 +246,7 @@ func AssetToExternalAllocation(asset Asset, aggregateBy []string, labelConfig *L
 // Valid values of `aggregateBy` elements are strings which are an `AssetProperty`, and strings prefixed
 // with `"label:"`.
 func key(a Asset, aggregateBy []string) (string, error) {
-	keys := []string{}
+	var buffer strings.Builder
 
 	if aggregateBy == nil {
 		aggregateBy = []string{
@@ -262,7 +262,7 @@ func key(a Asset, aggregateBy []string) (string, error) {
 		}
 	}
 
-	for _, s := range aggregateBy {
+	for i, s := range aggregateBy {
 		key := ""
 		switch true {
 		case s == string(AssetProviderProp):
@@ -300,12 +300,15 @@ func key(a Asset, aggregateBy []string) (string, error) {
 		}
 
 		if key != "" {
-			keys = append(keys, key)
+			buffer.WriteString(key)
 		} else {
-			keys = append(keys, UndefinedKey)
+			buffer.WriteString(UndefinedKey)
+		}
+		if i != (len(aggregateBy) - 1) {
+			buffer.WriteString("/")
 		}
 	}
-	return strings.Join(keys, "/"), nil
+	return buffer.String(), nil
 }
 
 func toString(a Asset) string {
@@ -355,6 +358,23 @@ func (al AssetLabels) Merge(that AssetLabels) AssetLabels {
 	return result
 }
 
+// Append joins AssetLabels with a given map of labels
+func (al AssetLabels) Append(newLabels map[string]string, overwrite bool) {
+	if len(newLabels) == 0 {
+		return
+	}
+
+	for label, value := range newLabels {
+		if _, ok := al[label]; ok {
+			if overwrite {
+				al[label] = value
+			}
+		} else {
+			al[label] = value
+		}
+	}
+}
+
 // AssetMatchFunc is a function that can be used to match Assets by
 // returning true for any given Asset if a condition is met.
 type AssetMatchFunc func(Asset) bool
@@ -516,13 +536,13 @@ func (a *Any) SetStartEnd(start, end time.Time) {
 	if a.Window().Contains(start) {
 		a.start = start
 	} else {
-		log.Warningf("Any.SetStartEnd: start %s not in %s", start, a.Window())
+		log.Warnf("Any.SetStartEnd: start %s not in %s", start, a.Window())
 	}
 
 	if a.Window().Contains(end) {
 		a.end = end
 	} else {
-		log.Warningf("Any.SetStartEnd: end %s not in %s", end, a.Window())
+		log.Warnf("Any.SetStartEnd: end %s not in %s", end, a.Window())
 	}
 }
 
@@ -702,13 +722,13 @@ func (ca *Cloud) SetStartEnd(start, end time.Time) {
 	if ca.Window().Contains(start) {
 		ca.start = start
 	} else {
-		log.Warningf("Cloud.SetStartEnd: start %s not in %s", start, ca.Window())
+		log.Warnf("Cloud.SetStartEnd: start %s not in %s", start, ca.Window())
 	}
 
 	if ca.Window().Contains(end) {
 		ca.end = end
 	} else {
-		log.Warningf("Cloud.SetStartEnd: end %s not in %s", end, ca.Window())
+		log.Warnf("Cloud.SetStartEnd: end %s not in %s", end, ca.Window())
 	}
 }
 
@@ -1104,7 +1124,7 @@ func (d *Disk) Minutes() float64 {
 	windowMins := d.window.Minutes()
 
 	if diskMins > windowMins {
-		log.Warningf("Asset ETL: Disk.Minutes exceeds window: %.2f > %.2f", diskMins, windowMins)
+		log.Warnf("Asset ETL: Disk.Minutes exceeds window: %.2f > %.2f", diskMins, windowMins)
 		diskMins = windowMins
 	}
 
@@ -1130,13 +1150,13 @@ func (d *Disk) SetStartEnd(start, end time.Time) {
 	if d.Window().Contains(start) {
 		d.start = start
 	} else {
-		log.Warningf("Disk.SetStartEnd: start %s not in %s", start, d.Window())
+		log.Warnf("Disk.SetStartEnd: start %s not in %s", start, d.Window())
 	}
 
 	if d.Window().Contains(end) {
 		d.end = end
 	} else {
-		log.Warningf("Disk.SetStartEnd: end %s not in %s", end, d.Window())
+		log.Warnf("Disk.SetStartEnd: end %s not in %s", end, d.Window())
 	}
 }
 
@@ -1425,7 +1445,7 @@ func (n *Network) Minutes() float64 {
 	windowMins := n.window.Minutes()
 
 	if netMins > windowMins {
-		log.Warningf("Asset ETL: Network.Minutes exceeds window: %.2f > %.2f", netMins, windowMins)
+		log.Warnf("Asset ETL: Network.Minutes exceeds window: %.2f > %.2f", netMins, windowMins)
 		netMins = windowMins
 	}
 
@@ -1451,13 +1471,13 @@ func (n *Network) SetStartEnd(start, end time.Time) {
 	if n.Window().Contains(start) {
 		n.start = start
 	} else {
-		log.Warningf("Disk.SetStartEnd: start %s not in %s", start, n.Window())
+		log.Warnf("Disk.SetStartEnd: start %s not in %s", start, n.Window())
 	}
 
 	if n.Window().Contains(end) {
 		n.end = end
 	} else {
-		log.Warningf("Disk.SetStartEnd: end %s not in %s", end, n.Window())
+		log.Warnf("Disk.SetStartEnd: end %s not in %s", end, n.Window())
 	}
 }
 
@@ -1677,7 +1697,7 @@ func (n *Node) Minutes() float64 {
 	windowMins := n.window.Minutes()
 
 	if nodeMins > windowMins {
-		log.Warningf("Asset ETL: Node.Minutes exceeds window: %.2f > %.2f", nodeMins, windowMins)
+		log.Warnf("Asset ETL: Node.Minutes exceeds window: %.2f > %.2f", nodeMins, windowMins)
 		nodeMins = windowMins
 	}
 
@@ -1703,13 +1723,13 @@ func (n *Node) SetStartEnd(start, end time.Time) {
 	if n.Window().Contains(start) {
 		n.start = start
 	} else {
-		log.Warningf("Disk.SetStartEnd: start %s not in %s", start, n.Window())
+		log.Warnf("Disk.SetStartEnd: start %s not in %s", start, n.Window())
 	}
 
 	if n.Window().Contains(end) {
 		n.end = end
 	} else {
-		log.Warningf("Disk.SetStartEnd: end %s not in %s", end, n.Window())
+		log.Warnf("Disk.SetStartEnd: end %s not in %s", end, n.Window())
 	}
 }
 
@@ -2075,13 +2095,13 @@ func (lb *LoadBalancer) SetStartEnd(start, end time.Time) {
 	if lb.Window().Contains(start) {
 		lb.start = start
 	} else {
-		log.Warningf("Disk.SetStartEnd: start %s not in %s", start, lb.Window())
+		log.Warnf("Disk.SetStartEnd: start %s not in %s", start, lb.Window())
 	}
 
 	if lb.Window().Contains(end) {
 		lb.end = end
 	} else {
-		log.Warningf("Disk.SetStartEnd: end %s not in %s", end, lb.Window())
+		log.Warnf("Disk.SetStartEnd: end %s not in %s", end, lb.Window())
 	}
 }
 
@@ -2950,6 +2970,21 @@ func (asr *AssetSetRange) InsertRange(that *AssetSetRange) error {
 	return err
 }
 
+// IsEmpty returns false if AssetSetRange contains a single AssetSet that is not empty
+func (asr *AssetSetRange) IsEmpty() bool {
+	if asr == nil || asr.Length() == 0 {
+		return true
+	}
+	asr.RLock()
+	defer asr.RUnlock()
+	for _, asset := range asr.assets {
+		if !asset.IsEmpty() {
+			return false
+		}
+	}
+	return true
+}
+
 func (asr *AssetSetRange) MarshalJSON() ([]byte, error) {
 	asr.RLock()
 	defer asr.RUnlock()
@@ -3054,6 +3089,23 @@ func (asr *AssetSetRange) Minutes() float64 {
 	return duration.Minutes()
 }
 
+// TotalCost returns the AssetSetRange's total cost
+func (asr *AssetSetRange) TotalCost() float64 {
+	if asr == nil {
+		return 0.0
+	}
+
+	asr.RLock()
+	defer asr.RUnlock()
+
+	tc := 0.0
+	for _, as := range asr.assets {
+		tc += as.TotalCost()
+	}
+
+	return tc
+}
+
 // This is a helper type. The Asset API returns a json which cannot be natively
 // unmarshaled into any Asset struct. Therefore, this struct IN COMBINATION WITH
 // DESERIALIZATION LOGIC DEFINED IN asset_unmarshal.go can unmarshal a json directly

+ 12 - 0
pkg/kubecost/mock.go

@@ -156,6 +156,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod1",
 		Container:  "container1",
 		ProviderID: "c1nodes",
+		Node:       "c1nodes",
 	})
 	a1111.RAMCost = 11.00
 
@@ -165,6 +166,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-abc",
 		Container:  "container2",
 		ProviderID: "c1nodes",
+		Node:       "c1nodes",
 	})
 
 	a11def3 := NewMockUnitAllocation("cluster1/namespace1/pod-def/container3", start, day, &AllocationProperties{
@@ -173,6 +175,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-def",
 		Container:  "container3",
 		ProviderID: "c1nodes",
+		Node:       "c1nodes",
 	})
 
 	a12ghi4 := NewMockUnitAllocation("cluster1/namespace2/pod-ghi/container4", start, day, &AllocationProperties{
@@ -181,6 +184,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-ghi",
 		Container:  "container4",
 		ProviderID: "c1nodes",
+		Node:       "c1nodes",
 	})
 
 	a12ghi5 := NewMockUnitAllocation("cluster1/namespace2/pod-ghi/container5", start, day, &AllocationProperties{
@@ -189,6 +193,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-ghi",
 		Container:  "container5",
 		ProviderID: "c1nodes",
+		Node:       "c1nodes",
 	})
 
 	a12jkl6 := NewMockUnitAllocation("cluster1/namespace2/pod-jkl/container6", start, day, &AllocationProperties{
@@ -197,6 +202,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-jkl",
 		Container:  "container6",
 		ProviderID: "c1nodes",
+		Node:       "c1nodes",
 	})
 
 	a22mno4 := NewMockUnitAllocation("cluster2/namespace2/pod-mno/container4", start, day, &AllocationProperties{
@@ -205,6 +211,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-mno",
 		Container:  "container4",
 		ProviderID: "node1",
+		Node:       "node1",
 	})
 
 	a22mno5 := NewMockUnitAllocation("cluster2/namespace2/pod-mno/container5", start, day, &AllocationProperties{
@@ -213,6 +220,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-mno",
 		Container:  "container5",
 		ProviderID: "node1",
+		Node:       "node1",
 	})
 
 	a22pqr6 := NewMockUnitAllocation("cluster2/namespace2/pod-pqr/container6", start, day, &AllocationProperties{
@@ -221,6 +229,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-pqr",
 		Container:  "container6",
 		ProviderID: "node2",
+		Node:       "node2",
 	})
 
 	a23stu7 := NewMockUnitAllocation("cluster2/namespace3/pod-stu/container7", start, day, &AllocationProperties{
@@ -229,6 +238,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-stu",
 		Container:  "container7",
 		ProviderID: "node2",
+		Node:       "node2",
 	})
 
 	a23vwx8 := NewMockUnitAllocation("cluster2/namespace3/pod-vwx/container8", start, day, &AllocationProperties{
@@ -237,6 +247,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-vwx",
 		Container:  "container8",
 		ProviderID: "node3",
+		Node:       "node3",
 	})
 
 	a23vwx9 := NewMockUnitAllocation("cluster2/namespace3/pod-vwx/container9", start, day, &AllocationProperties{
@@ -245,6 +256,7 @@ func GenerateMockAllocationSet(start time.Time) *AllocationSet {
 		Pod:        "pod-vwx",
 		Container:  "container9",
 		ProviderID: "node3",
+		Node:       "node3",
 	})
 
 	// Controllers

+ 159 - 0
pkg/kubecost/query.go

@@ -0,0 +1,159 @@
+package kubecost
+
+import (
+	"time"
+)
+
+// Querier is an aggregate interface which has the ability to query each Kubecost store type
+type Querier interface {
+	AllocationQuerier
+	SummaryAllocationQuerier
+	AssetQuerier
+	CloudUsageQuerier
+}
+
+// AllocationQuerier interface defining api for requesting Allocation data
+type AllocationQuerier interface {
+	QueryAllocation(start, end time.Time, opts *AllocationQueryOptions) (*AllocationSetRange, error)
+}
+
+// SummaryAllocationQuerier interface defining api for requesting SummaryAllocation data
+type SummaryAllocationQuerier interface {
+	QuerySummaryAllocation(start, end time.Time, opts *AllocationQueryOptions) (*SummaryAllocationSetRange, error)
+}
+
+// AssetQuerier interface defining api for requesting Asset data
+type AssetQuerier interface {
+	QueryAsset(start, end time.Time, opts *AssetQueryOptions) (*AssetSetRange, error)
+}
+
+// CloudUsageQuerier interface defining api for requesting CloudUsage data
+type CloudUsageQuerier interface {
+	QueryCloudUsage(start, end time.Time, opts *CloudUsageQueryOptions) (*CloudUsageSetRange, error)
+}
+
+// AllocationQueryOptions defines optional parameters for querying an Allocation Store
+type AllocationQueryOptions struct {
+	Accumulate        bool
+	AccumulateBy      time.Duration
+	AggregateBy       []string
+	Compute           bool
+	FilterFuncs       []AllocationMatchFunc
+	IdleByNode        bool
+	IncludeExternal   bool
+	IncludeIdle       bool
+	LabelConfig       *LabelConfig
+	MergeUnallocated  bool
+	Reconcile         bool
+	ReconcileNetwork  bool
+	ShareFuncs        []AllocationMatchFunc
+	SharedHourlyCosts map[string]float64
+	ShareIdle         string
+	ShareSplit        string
+	ShareTenancyCosts bool
+	SplitIdle         bool
+	Step              time.Duration
+}
+
+// AssetQueryOptions defines optional parameters for querying an Asset Store
+type AssetQueryOptions struct {
+	Accumulate         bool
+	AggregateBy        []string
+	Compute            bool
+	DisableAdjustments bool
+	FilterFuncs        []AssetMatchFunc
+	IncludeCloud       bool
+	SharedHourlyCosts  map[string]float64
+	Step               time.Duration
+}
+
+// CloudUsageQueryOptions define optional parameters for querying a Store
+type CloudUsageQueryOptions struct {
+	Accumulate  bool
+	AggregateBy []string
+	FilterFuncs []CloudUsageMatchFunc
+}
+
+// QueryAllocationAsync provide a functions for retrieving results from any AllocationQuerier Asynchronously
+func QueryAllocationAsync(allocationQuerier AllocationQuerier, start, end time.Time, opts *AllocationQueryOptions) (chan *AllocationSetRange, chan error) {
+	asrCh := make(chan *AllocationSetRange)
+	errCh := make(chan error)
+
+	go func(asrCh chan *AllocationSetRange, errCh chan error) {
+		defer close(asrCh)
+		defer close(errCh)
+
+		asr, err := allocationQuerier.QueryAllocation(start, end, opts)
+		if err != nil {
+			errCh <- err
+			return
+		}
+
+		asrCh <- asr
+	}(asrCh, errCh)
+
+	return asrCh, errCh
+}
+
+// QuerySummaryAllocationAsync provide a functions for retrieving results from any SummaryAllocationQuerier Asynchronously
+func QuerySummaryAllocationAsync(summaryAllocationQuerier SummaryAllocationQuerier, start, end time.Time, opts *AllocationQueryOptions) (chan *SummaryAllocationSetRange, chan error) {
+	asrCh := make(chan *SummaryAllocationSetRange)
+	errCh := make(chan error)
+
+	go func(asrCh chan *SummaryAllocationSetRange, errCh chan error) {
+		defer close(asrCh)
+		defer close(errCh)
+
+		asr, err := summaryAllocationQuerier.QuerySummaryAllocation(start, end, opts)
+		if err != nil {
+			errCh <- err
+			return
+		}
+
+		asrCh <- asr
+	}(asrCh, errCh)
+
+	return asrCh, errCh
+}
+
+// QueryAsseetAsync provide a functions for retrieving results from any AssetQuerier Asynchronously
+func QueryAssetAsync(assetQuerier AssetQuerier, start, end time.Time, opts *AssetQueryOptions) (chan *AssetSetRange, chan error) {
+	asrCh := make(chan *AssetSetRange)
+	errCh := make(chan error)
+
+	go func(asrCh chan *AssetSetRange, errCh chan error) {
+		defer close(asrCh)
+		defer close(errCh)
+
+		asr, err := assetQuerier.QueryAsset(start, end, opts)
+		if err != nil {
+			errCh <- err
+			return
+		}
+
+		asrCh <- asr
+	}(asrCh, errCh)
+
+	return asrCh, errCh
+}
+
+// QueryCloudUsageAsync provide a functions for retrieving results from any CloudUsageQuerier Asynchronously
+func QueryCloudUsageAsync(cloudUsageQuerier CloudUsageQuerier, start, end time.Time, opts *CloudUsageQueryOptions) (chan *CloudUsageSetRange, chan error) {
+	cusrCh := make(chan *CloudUsageSetRange)
+	errCh := make(chan error)
+
+	go func(cusrCh chan *CloudUsageSetRange, errCh chan error) {
+		defer close(cusrCh)
+		defer close(errCh)
+
+		cusr, err := cloudUsageQuerier.QueryCloudUsage(start, end, opts)
+		if err != nil {
+			errCh <- err
+			return
+		}
+
+		cusrCh <- cusr
+	}(cusrCh, errCh)
+
+	return cusrCh, errCh
+}

+ 9 - 10
pkg/kubecost/status.go

@@ -4,15 +4,15 @@ import "time"
 
 // ETLStatus describes ETL metadata
 type ETLStatus struct {
-	Coverage    Window           `json:"coverage"`
-	LastRun     time.Time        `json:"lastRun"`
-	Progress    float64          `json:"progress"`
-	RefreshRate string           `json:"refreshRate"`
-	Resolution  string           `json:"resolution"`
-	MaxBatch    string           `json:"maxBatch"`
-	StartTime   time.Time        `json:"startTime"`
-	UTCOffset   string           `json:"utcOffset"`
-	Backup      *DirectoryStatus `json:"backup,omitempty"`
+	Coverage                   Window           `json:"coverage"`
+	LastRun                    time.Time        `json:"lastRun"`
+	Progress                   float64          `json:"progress"`
+	RefreshRate                string           `json:"refreshRate"`
+	Resolution                 string           `json:"resolution"`
+	MaxPrometheusQueryDuration string           `json:"maxPrometheusQueryDuration"`
+	StartTime                  time.Time        `json:"startTime"`
+	UTCOffset                  string           `json:"utcOffset"`
+	Backup                     *DirectoryStatus `json:"backup,omitempty"`
 }
 
 // DirectoryStatus describes metadata of a directory of files
@@ -39,7 +39,6 @@ type FileStatus struct {
 type CloudStatus struct {
 	CloudConnectionStatus string                `json:"cloudConnectionStatus"`
 	CloudUsage            *CloudAssetStatus     `json:"cloudUsage,omitempty"`
-	CloudAssets           *CloudAssetStatus     `json:"cloudAssets,omitempty"`
 	Reconciliation        *ReconciliationStatus `json:"reconciliation,omitempty"`
 }
 

+ 120 - 44
pkg/kubecost/summaryallocation.go

@@ -49,7 +49,7 @@ func NewSummaryAllocation(alloc *Allocation, reconcile, reconcileNetwork bool) *
 
 	sa := &SummaryAllocation{
 		Name:                   alloc.Name,
-		Properties:             alloc.Properties.Clone(),
+		Properties:             alloc.Properties,
 		Start:                  alloc.Start,
 		End:                    alloc.End,
 		CPUCoreRequestAverage:  alloc.CPUCoreRequestAverage,
@@ -297,11 +297,11 @@ type SummaryAllocationSet struct {
 }
 
 // NewSummaryAllocationSet converts an AllocationSet to a SummaryAllocationSet.
-// Filter functions, sharing functions, and reconciliation parameters are
+// Filter functions, keep functions, and reconciliation parameters are
 // required for unfortunate reasons to do with performance and legacy order-of-
 // operations details, as well as the fact that reconciliation has been
 // pushed down to the conversion step between Allocation and SummaryAllocation.
-func NewSummaryAllocationSet(as *AllocationSet, ffs, sfs []AllocationMatchFunc, reconcile, reconcileNetwork bool) *SummaryAllocationSet {
+func NewSummaryAllocationSet(as *AllocationSet, ffs, kfs []AllocationMatchFunc, reconcile, reconcileNetwork bool) *SummaryAllocationSet {
 	if as == nil {
 		return nil
 	}
@@ -309,7 +309,7 @@ func NewSummaryAllocationSet(as *AllocationSet, ffs, sfs []AllocationMatchFunc,
 	// If we can know the exact size of the map, use it. If filters or sharing
 	// functions are present, we can't know the size, so we make a default map.
 	var sasMap map[string]*SummaryAllocation
-	if len(ffs) == 0 && len(sfs) == 0 {
+	if len(ffs) == 0 && len(kfs) == 0 {
 		// No filters, so make the map of summary allocations exactly the size
 		// of the origin allocation set.
 		sasMap = make(map[string]*SummaryAllocation, len(as.allocations))
@@ -324,16 +324,16 @@ func NewSummaryAllocationSet(as *AllocationSet, ffs, sfs []AllocationMatchFunc,
 	}
 
 	for _, alloc := range as.allocations {
-		// First, detect if the allocation should be shared. If so, mark it as
+		// First, detect if the allocation should be kept. If so, mark it as
 		// such, insert it, and continue.
-		shouldShare := false
-		for _, sf := range sfs {
-			if sf(alloc) {
-				shouldShare = true
+		shouldKeep := false
+		for _, kf := range kfs {
+			if kf(alloc) {
+				shouldKeep = true
 				break
 			}
 		}
-		if shouldShare {
+		if shouldKeep {
 			sa := NewSummaryAllocation(alloc, reconcile, reconcileNetwork)
 			sa.Share = true
 			sas.Insert(sa)
@@ -454,8 +454,8 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 	// an empty slice implies that we should aggregate everything. (See
 	// generateKey for why that makes sense.)
 	shouldAggregate := aggregateBy != nil
-	shouldShare := len(options.SharedHourlyCosts) > 0 || len(options.ShareFuncs) > 0
-	if !shouldAggregate && !shouldShare {
+	shouldKeep := len(options.SharedHourlyCosts) > 0 || len(options.ShareFuncs) > 0
+	if !shouldAggregate && !shouldKeep {
 		return nil
 	}
 
@@ -492,7 +492,6 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 	//     by the proportion of allocation resources remaining after filters
 	//     have been applied.
 	//
-	//
 	// 11. Distribute shared resources according to sharing coefficients.
 	//
 	// 12. Insert external allocations into the result set.
@@ -611,15 +610,13 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 	// option. (See IdleByNode documentation; defaults to idle-by-cluster.)
 	var allocTotals map[string]*AllocationTotals
 	var ok bool
-	if options.IdleByNode {
-		if options.AllocationTotalsStore != nil {
+	if options.AllocationTotalsStore != nil {
+		if options.IdleByNode {
 			allocTotals, ok = options.AllocationTotalsStore.GetAllocationTotalsByNode(*sas.Window.Start(), *sas.Window.End())
 			if !ok {
 				return fmt.Errorf("nil allocation resource totals by node for %s", sas.Window)
 			}
-		}
-	} else {
-		if options.AllocationTotalsStore != nil {
+		} else {
 			allocTotals, ok = options.AllocationTotalsStore.GetAllocationTotalsByCluster(*sas.Window.Start(), *sas.Window.End())
 			if !ok {
 				return fmt.Errorf("nil allocation resource totals by cluster for %s", sas.Window)
@@ -627,6 +624,20 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 		}
 	}
 
+	// If reconciliation has been fully or partially disabled, clear the
+	// relevant adjustments from the alloc totals
+	if allocTotals != nil && (!options.Reconcile || !options.ReconcileNetwork) {
+		if !options.Reconcile {
+			for _, tot := range allocTotals {
+				tot.ClearAdjustments()
+			}
+		} else if !options.ReconcileNetwork {
+			for _, tot := range allocTotals {
+				tot.NetworkCostAdjustment = 0.0
+			}
+		}
+	}
+
 	// If filters have been applied, then we need to record allocation resource
 	// totals after filtration (i.e. the allocations that are present) so that
 	// we can identify the proportion of idle cost to keep. That is, we should
@@ -705,13 +716,13 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 		// NOTE: SummaryAllocation does not support ShareEven, so only record
 		// by cost for cost-weighted distribution.
 		if sharingCoeffs != nil {
-			sharingCoeffs[key] += sa.TotalCost()
+			sharingCoeffs[key] += sa.TotalCost() - sa.SharedCost
 		}
 
 		// 6. Distribute idle allocations according to the idle coefficients.
-		// NOTE: if idle allocation is off (i.e. ShareIdle == ShareNone) then
-		// all idle allocations will be in the resultSet at this point, so idleSet
-		// will be empty and we won't enter this block.
+		// NOTE: if idle allocation is off (i.e. options.ShareIdle: ShareNone)
+		// then all idle allocations will be in the resultSet at this point, so
+		// idleSet will be empty and we won't enter this block.
 		if len(idleSet.SummaryAllocations) > 0 {
 			for _, idle := range idleSet.SummaryAllocations {
 				// Idle key is either cluster or node, as determined by the
@@ -784,11 +795,11 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 				// the relevant property matches (i.e. Cluster or Node,
 				// depending on which idle sharing option is selected)
 				if options.IdleByNode {
-					if idle.Properties.Node != sa.Properties.Node {
+					if idle.Properties.Cluster != sa.Properties.Cluster || idle.Properties.Node != sa.Properties.Node {
 						continue
 					}
 
-					key = idle.Properties.Node
+					key = fmt.Sprintf("%s/%s", idle.Properties.Cluster, idle.Properties.Node)
 				} else {
 					if idle.Properties.Cluster != sa.Properties.Cluster {
 						continue
@@ -879,7 +890,7 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 		sharingCoeffDenominator -= totalUnmountedCost
 
 		if sharingCoeffDenominator <= 0.0 {
-			log.Warningf("SummaryAllocation: sharing coefficient denominator is %f", sharingCoeffDenominator)
+			log.Warnf("SummaryAllocation: sharing coefficient denominator is %f", sharingCoeffDenominator)
 		} else {
 			// Compute sharing coeffs by dividing the thus-far accumulated
 			// numerators by the now-finalized denominator.
@@ -887,7 +898,7 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 				if sharingCoeffs[key] > 0.0 {
 					sharingCoeffs[key] /= sharingCoeffDenominator
 				} else {
-					log.Warningf("SummaryAllocation: detected illegal sharing coefficient for %s: %v (setting to zero)", key, sharingCoeffs[key])
+					log.Warnf("SummaryAllocation: detected illegal sharing coefficient for %s: %v (setting to zero)", key, sharingCoeffs[key])
 					sharingCoeffs[key] = 0.0
 				}
 			}
@@ -919,12 +930,12 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 	for _, sa := range externalSet.SummaryAllocations {
 		skip := false
 
+		// Make an allocation with the same properties and test that
+		// against the FilterFunc to see if the external allocation should
+		// be filtered or not.
+		// TODO:CLEANUP do something about external cost, this stinks
+		ea := &Allocation{Properties: sa.Properties}
 		for _, ff := range options.FilterFuncs {
-			// Make an allocation with the same properties and test that
-			// against the FilterFunc to see if the external allocation should
-			// be filtered or not.
-			// TODO:CLEANUP do something about external cost, this stinks
-			ea := &Allocation{Properties: sa.Properties}
 			if !ff(ea) {
 				skip = true
 				break
@@ -942,50 +953,67 @@ func (sas *SummaryAllocationSet) AggregateBy(aggregateBy []string, options *Allo
 	// 13. Distribute remaining, undistributed idle. Undistributed idle is any
 	// per-resource idle cost for which there can be no idle coefficient
 	// computed because there is zero usage across all allocations.
-	for _, ia := range idleSet.SummaryAllocations {
-		key := ia.Properties.Cluster
+	for _, isa := range idleSet.SummaryAllocations {
+		// if the idle does not apply to the non-filtered values, skip it
+		skip := false
+		// Make an allocation with the same properties and test that
+		// against the FilterFunc to see if the external allocation should
+		// be filtered or not.
+		// TODO:CLEANUP do something about external cost, this stinks
+		ia := &Allocation{Properties: isa.Properties}
+		for _, ff := range options.FilterFuncs {
+			if !ff(ia) {
+				skip = true
+				break
+			}
+		}
+		if skip {
+			continue
+		}
+
+		key := isa.Properties.Cluster
 		if options.IdleByNode {
-			key = fmt.Sprintf("%s/%s", ia.Properties.Cluster, ia.Properties.Node)
+			key = fmt.Sprintf("%s/%s", isa.Properties.Cluster, isa.Properties.Node)
 		}
 
 		rt, ok := allocTotals[key]
 		if !ok {
-			log.Warningf("SummaryAllocation: AggregateBy: cannot handle undistributed idle for '%s'", key)
+			log.Warnf("SummaryAllocation: AggregateBy: cannot handle undistributed idle for '%s'", key)
 			continue
 		}
 
 		hasUndistributableCost := false
 
-		if ia.CPUCost > 0.0 && rt.CPUCost == 0.0 {
+		if isa.CPUCost > 0.0 && rt.CPUCost == 0.0 {
 			// There is idle CPU cost, but no allocated CPU cost, so that cost
 			// is undistributable and must be inserted.
 			hasUndistributableCost = true
 		} else {
 			// Cost was entirely distributed, so zero it out
-			ia.CPUCost = 0.0
+			isa.CPUCost = 0.0
 		}
 
-		if ia.GPUCost > 0.0 && rt.GPUCost == 0.0 {
+		if isa.GPUCost > 0.0 && rt.GPUCost == 0.0 {
 			// There is idle GPU cost, but no allocated GPU cost, so that cost
 			// is undistributable and must be inserted.
 			hasUndistributableCost = true
 		} else {
 			// Cost was entirely distributed, so zero it out
-			ia.GPUCost = 0.0
+			isa.GPUCost = 0.0
 		}
 
-		if ia.RAMCost > 0.0 && rt.RAMCost == 0.0 {
+		if isa.RAMCost > 0.0 && rt.RAMCost == 0.0 {
 			// There is idle CPU cost, but no allocated CPU cost, so that cost
 			// is undistributable and must be inserted.
 			hasUndistributableCost = true
 		} else {
 			// Cost was entirely distributed, so zero it out
-			ia.RAMCost = 0.0
+			isa.RAMCost = 0.0
 		}
 
 		if hasUndistributableCost {
-			ia.Name = fmt.Sprintf("%s/%s", key, IdleSuffix)
-			resultSet.Insert(ia)
+			isa.Name = fmt.Sprintf("%s/%s", key, IdleSuffix)
+			resultSet.Insert(isa)
 		}
 	}
 
@@ -1101,6 +1129,22 @@ func (sas *SummaryAllocationSet) Insert(sa *SummaryAllocation) error {
 	return nil
 }
 
+func (sas *SummaryAllocationSet) TotalCost() float64 {
+	if sas == nil {
+		return 0.0
+	}
+
+	sas.RLock()
+	defer sas.RUnlock()
+
+	tc := 0.0
+	for _, sa := range sas.SummaryAllocations {
+		tc += sa.TotalCost()
+	}
+
+	return tc
+}
+
 // SummaryAllocationSetRange is a thread-safe slice of SummaryAllocationSets.
 type SummaryAllocationSetRange struct {
 	sync.RWMutex
@@ -1130,7 +1174,7 @@ func NewSummaryAllocationSetRange(sass ...*SummaryAllocationSet) *SummaryAllocat
 		if step == 0 {
 			step = sas.Window.Duration()
 		} else if step != sas.Window.Duration() {
-			log.Warningf("instantiating range with step %s using set of step %s is illegal", step, sas.Window.Duration())
+			log.Warnf("instantiating range with step %s using set of step %s is illegal", step, sas.Window.Duration())
 		}
 	}
 
@@ -1268,3 +1312,35 @@ func (sasr *SummaryAllocationSetRange) InsertExternalAllocations(that *Allocatio
 	// err might be nil
 	return err
 }
+
+func (sasr *SummaryAllocationSetRange) TotalCost() float64 {
+	if sasr == nil {
+		return 0.0
+	}
+
+	sasr.RLock()
+	defer sasr.RUnlock()
+
+	tc := 0.0
+	for _, sas := range sasr.SummaryAllocationSets {
+		tc += sas.TotalCost()
+	}
+
+	return tc
+}
+
+// TODO remove after testing
+func (sasr *SummaryAllocationSetRange) Print(verbose bool) {
+	fmt.Printf("%s (dur=%s, len=%d, cost=%.5f)\n", sasr.Window, sasr.Window.Duration(), len(sasr.SummaryAllocationSets), sasr.TotalCost())
+	for _, sas := range sasr.SummaryAllocationSets {
+		fmt.Printf(" > %s (dur=%s, len=%d, cost=%.5f) \n", sas.Window, sas.Window.Duration(), len(sas.SummaryAllocations), sas.TotalCost())
+		for key, sa := range sas.SummaryAllocations {
+			if verbose {
+				fmt.Printf("   {\"%s\", cpu: %.5f, gpu: %.5f, lb: %.5f, net: %.5f, pv: %.5f, ram: %.5f, shared: %.5f, external: %.5f}\n",
+					key, sa.CPUCost, sa.GPUCost, sa.LoadBalancerCost, sa.NetworkCost, sa.PVCost, sa.RAMCost, sa.SharedCost, sa.ExternalCost)
+			} else {
+				fmt.Printf("   - \"%s\": %.5f\n", key, sa.TotalCost())
+			}
+		}
+	}
+}

+ 86 - 15
pkg/kubecost/totals.go

@@ -41,9 +41,35 @@ type AllocationTotals struct {
 func (art *AllocationTotals) ClearAdjustments() {
 	art.CPUCostAdjustment = 0.0
 	art.GPUCostAdjustment = 0.0
+	art.LoadBalancerCostAdjustment = 0.0
+	art.NetworkCostAdjustment = 0.0
+	art.PersistentVolumeCostAdjustment = 0.0
 	art.RAMCostAdjustment = 0.0
 }
 
+// Clone deep copies the AllocationTotals
+func (art *AllocationTotals) Clone() *AllocationTotals {
+	return &AllocationTotals{
+		Start:                          art.Start,
+		End:                            art.End,
+		Cluster:                        art.Cluster,
+		Node:                           art.Node,
+		Count:                          art.Count,
+		CPUCost:                        art.CPUCost,
+		CPUCostAdjustment:              art.CPUCostAdjustment,
+		GPUCost:                        art.GPUCost,
+		GPUCostAdjustment:              art.GPUCostAdjustment,
+		LoadBalancerCost:               art.LoadBalancerCost,
+		LoadBalancerCostAdjustment:     art.LoadBalancerCostAdjustment,
+		NetworkCost:                    art.NetworkCost,
+		NetworkCostAdjustment:          art.NetworkCostAdjustment,
+		PersistentVolumeCost:           art.PersistentVolumeCost,
+		PersistentVolumeCostAdjustment: art.PersistentVolumeCostAdjustment,
+		RAMCost:                        art.RAMCost,
+		RAMCostAdjustment:              art.RAMCostAdjustment,
+	}
+}
+
 // TotalCPUCost returns CPU cost with adjustment.
 func (art *AllocationTotals) TotalCPUCost() float64 {
 	return art.CPUCost + art.CPUCostAdjustment
@@ -181,6 +207,31 @@ func (art *AssetTotals) ClearAdjustments() {
 	art.RAMCostAdjustment = 0.0
 }
 
+// Clone deep copies the AssetTotals
+func (art *AssetTotals) Clone() *AssetTotals {
+	return &AssetTotals{
+		Start:                           art.Start,
+		End:                             art.End,
+		Cluster:                         art.Cluster,
+		Node:                            art.Node,
+		Count:                           art.Count,
+		AttachedVolumeCost:              art.AttachedVolumeCost,
+		AttachedVolumeCostAdjustment:    art.AttachedVolumeCostAdjustment,
+		ClusterManagementCost:           art.ClusterManagementCost,
+		ClusterManagementCostAdjustment: art.ClusterManagementCostAdjustment,
+		CPUCost:                         art.CPUCost,
+		CPUCostAdjustment:               art.CPUCostAdjustment,
+		GPUCost:                         art.GPUCost,
+		GPUCostAdjustment:               art.GPUCostAdjustment,
+		LoadBalancerCost:                art.LoadBalancerCost,
+		LoadBalancerCostAdjustment:      art.LoadBalancerCostAdjustment,
+		PersistentVolumeCost:            art.PersistentVolumeCost,
+		PersistentVolumeCostAdjustment:  art.PersistentVolumeCostAdjustment,
+		RAMCost:                         art.RAMCost,
+		RAMCostAdjustment:               art.RAMCostAdjustment,
+	}
+}
+
 // TotalAttachedVolumeCost returns CPU cost with adjustment.
 func (art *AssetTotals) TotalAttachedVolumeCost() float64 {
 	return art.AttachedVolumeCost + art.AttachedVolumeCostAdjustment
@@ -433,15 +484,15 @@ func ComputeIdleCoefficients(shareSplit, key string, cpuCost, gpuCost, ramCost f
 		return coeff, coeff, coeff
 	}
 
-	if allocationTotals[key].CPUCost > 0 {
+	if allocationTotals[key].TotalCPUCost() > 0 {
 		cpuCoeff = cpuCost / allocationTotals[key].TotalCPUCost()
 	}
 
-	if allocationTotals[key].GPUCost > 0 {
+	if allocationTotals[key].TotalGPUCost() > 0 {
 		gpuCoeff = gpuCost / allocationTotals[key].TotalGPUCost()
 	}
 
-	if allocationTotals[key].RAMCost > 0 {
+	if allocationTotals[key].TotalRAMCost() > 0 {
 		ramCoeff = ramCost / allocationTotals[key].TotalRAMCost()
 	}
 
@@ -554,10 +605,15 @@ func NewMemoryTotalsStore() *MemoryTotalsStore {
 // by cluster for the given start and end times.
 func (mts *MemoryTotalsStore) GetAllocationTotalsByCluster(start time.Time, end time.Time) (map[string]*AllocationTotals, bool) {
 	k := storeKey(start, end)
-	if raw, ok := mts.allocTotalsByCluster.Get(k); ok {
-		return raw.(map[string]*AllocationTotals), true
-	} else {
+	if raw, ok := mts.allocTotalsByCluster.Get(k); !ok {
 		return map[string]*AllocationTotals{}, false
+	} else {
+		original := raw.(map[string]*AllocationTotals)
+		totals := make(map[string]*AllocationTotals, len(original))
+		for k, v := range original {
+			totals[k] = v.Clone()
+		}
+		return totals, true
 	}
 }
 
@@ -565,10 +621,15 @@ func (mts *MemoryTotalsStore) GetAllocationTotalsByCluster(start time.Time, end
 // by node for the given start and end times.
 func (mts *MemoryTotalsStore) GetAllocationTotalsByNode(start time.Time, end time.Time) (map[string]*AllocationTotals, bool) {
 	k := storeKey(start, end)
-	if raw, ok := mts.allocTotalsByNode.Get(k); ok {
-		return raw.(map[string]*AllocationTotals), true
-	} else {
+	if raw, ok := mts.allocTotalsByNode.Get(k); !ok {
 		return map[string]*AllocationTotals{}, false
+	} else {
+		original := raw.(map[string]*AllocationTotals)
+		totals := make(map[string]*AllocationTotals, len(original))
+		for k, v := range original {
+			totals[k] = v.Clone()
+		}
+		return totals, true
 	}
 }
 
@@ -590,10 +651,15 @@ func (mts *MemoryTotalsStore) SetAllocationTotalsByNode(start time.Time, end tim
 // by cluster for the given start and end times.
 func (mts *MemoryTotalsStore) GetAssetTotalsByCluster(start time.Time, end time.Time) (map[string]*AssetTotals, bool) {
 	k := storeKey(start, end)
-	if raw, ok := mts.assetTotalsByCluster.Get(k); ok {
-		return raw.(map[string]*AssetTotals), true
-	} else {
+	if raw, ok := mts.assetTotalsByCluster.Get(k); !ok {
 		return map[string]*AssetTotals{}, false
+	} else {
+		original := raw.(map[string]*AssetTotals)
+		totals := make(map[string]*AssetTotals, len(original))
+		for k, v := range original {
+			totals[k] = v.Clone()
+		}
+		return totals, true
 	}
 }
 
@@ -601,10 +667,15 @@ func (mts *MemoryTotalsStore) GetAssetTotalsByCluster(start time.Time, end time.
 // by node for the given start and end times.
 func (mts *MemoryTotalsStore) GetAssetTotalsByNode(start time.Time, end time.Time) (map[string]*AssetTotals, bool) {
 	k := storeKey(start, end)
-	if raw, ok := mts.assetTotalsByNode.Get(k); ok {
-		return raw.(map[string]*AssetTotals), true
-	} else {
+	if raw, ok := mts.assetTotalsByNode.Get(k); !ok {
 		return map[string]*AssetTotals{}, false
+	} else {
+		original := raw.(map[string]*AssetTotals)
+		totals := make(map[string]*AssetTotals, len(original))
+		for k, v := range original {
+			totals[k] = v.Clone()
+		}
+		return totals, true
 	}
 }
 

+ 26 - 0
pkg/kubecost/totals_test.go

@@ -0,0 +1,26 @@
+package kubecost
+
+import (
+	"math"
+	"testing"
+)
+
+func TestComputeIdleCoefficients(t *testing.T) {
+	// test that passing totals where total + adjustment == 0 returns a 0 coefficient
+	at := make(map[string]*AllocationTotals)
+
+	at["item1"] = &AllocationTotals{
+		CPUCost:           1,
+		CPUCostAdjustment: -1,
+		RAMCost:           2,
+		RAMCostAdjustment: -2,
+		GPUCost:           3,
+		GPUCostAdjustment: -3,
+	}
+
+	cpu, gpu, ram := ComputeIdleCoefficients("weighted", "item1", 100, 100, 100, at)
+
+	if math.IsNaN(cpu) || math.IsNaN(gpu) || math.IsNaN(ram) || math.IsInf(cpu, 0) || math.IsInf(gpu, 0) || math.IsInf(ram, 0) {
+		t.Errorf("Idle coefficients should not be NaN or Inf")
+	}
+}

+ 10 - 0
pkg/kubecost/window.go

@@ -343,6 +343,16 @@ func (w Window) Contains(t time.Time) bool {
 	return true
 }
 
+func (w Window) ContainsWindow(that Window) bool {
+	// only support containing closed windows for now
+	// could check if openness is compatible with closure
+	if that.IsOpen() {
+		return false
+	}
+
+	return w.Contains(*that.start) && w.Contains(*that.end)
+}
+
 func (w Window) Duration() time.Duration {
 	if w.IsOpen() {
 		// TODO test

+ 43 - 9
pkg/log/log.go

@@ -2,9 +2,13 @@ package log
 
 import (
 	"fmt"
+	"os"
+	"strings"
 	"time"
 
-	"k8s.io/klog"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+	"github.com/spf13/viper"
 )
 
 // TODO for deduped functions, if timeLogged > logTypeLimit, should we log once
@@ -13,8 +17,26 @@ import (
 // concurrency-safe counter
 var ctr = newCounter()
 
+func InitLogging() {
+	zerolog.TimeFieldFormat = time.RFC3339Nano
+	// Default to using pretty formatting
+	if strings.ToLower(viper.GetString("log-format")) != "json" {
+		log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr, TimeFormat: time.RFC3339Nano})
+	}
+
+	level, err := zerolog.ParseLevel(viper.GetString("log-level"))
+	if err != nil {
+		zerolog.SetGlobalLevel(zerolog.InfoLevel)
+		log.Warn().Msg("Error parsing log-level, setting level to 'info'")
+		return
+	}
+	zerolog.SetGlobalLevel(level)
+	log.Log().Msgf("Log level set to %v", level)
+
+}
+
 func Errorf(format string, a ...interface{}) {
-	klog.Errorf(fmt.Sprintf("[Error] %s", format), a...)
+	log.Error().Msgf(format, a...)
 }
 
 func DedupedErrorf(logTypeLimit int, format string, a ...interface{}) {
@@ -28,23 +50,27 @@ func DedupedErrorf(logTypeLimit int, format string, a ...interface{}) {
 	}
 }
 
-func Warningf(format string, a ...interface{}) {
-	klog.V(2).Infof(fmt.Sprintf("[Warning] %s", format), a...)
+func Warnf(format string, a ...interface{}) {
+	log.Warn().Msgf(format, a...)
 }
 
 func DedupedWarningf(logTypeLimit int, format string, a ...interface{}) {
 	timesLogged := ctr.increment(format)
 
 	if timesLogged < logTypeLimit {
-		Warningf(format, a...)
+		Warnf(format, a...)
 	} else if timesLogged == logTypeLimit {
-		Warningf(format, a...)
+		Warnf(format, a...)
 		Infof("%s logged %d times: suppressing future logs", format, logTypeLimit)
 	}
 }
 
+func Info(msg string) {
+	log.Info().Msg(msg)
+}
+
 func Infof(format string, a ...interface{}) {
-	klog.V(3).Infof(fmt.Sprintf("[Info] %s", format), a...)
+	log.Info().Msgf(format, a...)
 }
 
 func DedupedInfof(logTypeLimit int, format string, a ...interface{}) {
@@ -59,11 +85,19 @@ func DedupedInfof(logTypeLimit int, format string, a ...interface{}) {
 }
 
 func Profilef(format string, a ...interface{}) {
-	klog.V(3).Infof(fmt.Sprintf("[Profiler] %s", format), a...)
+	log.Info().Msgf(fmt.Sprintf("[Profiler] %s", format), a...)
+}
+
+func Debug(msg string) {
+	log.Debug().Msg(msg)
 }
 
 func Debugf(format string, a ...interface{}) {
-	klog.V(5).Infof(fmt.Sprintf("[Debug] %s", format), a...)
+	log.Debug().Msgf(format, a...)
+}
+
+func Fatalf(format string, a ...interface{}) {
+	log.Fatal().Msgf(format, a...)
 }
 
 func Profile(start time.Time, name string) {

+ 34 - 18
pkg/prom/query.go

@@ -89,7 +89,19 @@ func (ctx *Context) ErrorCollection() error {
 func (ctx *Context) Query(query string) QueryResultsChan {
 	resCh := make(QueryResultsChan)
 
-	go runQuery(query, ctx, resCh, "")
+	go runQuery(query, ctx, resCh, time.Now(), "")
+
+	return resCh
+}
+
+// QueryWithTime returns a QueryResultsChan, then runs the given query at the
+// given time (see time parameter here: https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
+// and sends the results on the provided channel. Receiver is responsible for
+// closing the channel, preferably using the Read method.
+func (ctx *Context) QueryAtTime(query string, t time.Time) QueryResultsChan {
+	resCh := make(QueryResultsChan)
+
+	go runQuery(query, ctx, resCh, t, "")
 
 	return resCh
 }
@@ -100,7 +112,7 @@ func (ctx *Context) Query(query string) QueryResultsChan {
 func (ctx *Context) ProfileQuery(query string, profileLabel string) QueryResultsChan {
 	resCh := make(QueryResultsChan)
 
-	go runQuery(query, ctx, resCh, profileLabel)
+	go runQuery(query, ctx, resCh, time.Now(), profileLabel)
 
 	return resCh
 }
@@ -134,7 +146,7 @@ func (ctx *Context) ProfileQueryAll(queries ...string) []QueryResultsChan {
 }
 
 func (ctx *Context) QuerySync(query string) ([]*QueryResult, prometheus.Warnings, error) {
-	raw, warnings, err := ctx.query(query)
+	raw, warnings, err := ctx.query(query, time.Now())
 	if err != nil {
 		return nil, warnings, err
 	}
@@ -154,11 +166,11 @@ func (ctx *Context) QueryURL() *url.URL {
 
 // runQuery executes the prometheus query asynchronously, collects results and
 // errors, and passes them through the results channel.
-func runQuery(query string, ctx *Context, resCh QueryResultsChan, profileLabel string) {
+func runQuery(query string, ctx *Context, resCh QueryResultsChan, t time.Time, profileLabel string) {
 	defer errors.HandlePanic()
 	startQuery := time.Now()
 
-	raw, warnings, requestError := ctx.query(query)
+	raw, warnings, requestError := ctx.query(query, t)
 	results := NewQueryResults(query, raw)
 
 	// report all warnings, request, and parse errors (nils will be ignored)
@@ -172,18 +184,22 @@ func runQuery(query string, ctx *Context, resCh QueryResultsChan, profileLabel s
 }
 
 // RawQuery is a direct query to the prometheus client and returns the body of the response
-func (ctx *Context) RawQuery(query string) ([]byte, error) {
+func (ctx *Context) RawQuery(query string, t time.Time) ([]byte, error) {
 	u := ctx.Client.URL(epQuery, nil)
 	q := u.Query()
 	q.Set("query", query)
 
-	// for non-range queries, we set the timestamp for the query to time-offset
-	// this is a special use case that's typically only used when our primary
-	// prom db has delayed insertion (thanos, cortex, etc...)
-	if promQueryOffset != 0 && ctx.name != AllocationContextName {
-		q.Set("time", time.Now().Add(-promQueryOffset).UTC().Format(time.RFC3339))
+	if !t.IsZero() {
+		q.Set("time", strconv.FormatInt(t.Unix(), 10))
 	} else {
-		q.Set("time", time.Now().UTC().Format(time.RFC3339))
+		// for non-range queries, we set the timestamp for the query to time-offset
+		// this is a special use case that's typically only used when our primary
+		// prom db has delayed insertion (thanos, cortex, etc...)
+		if promQueryOffset != 0 && ctx.name != AllocationContextName {
+			q.Set("time", time.Now().Add(-promQueryOffset).UTC().Format(time.RFC3339))
+		} else {
+			q.Set("time", time.Now().UTC().Format(time.RFC3339))
+		}
 	}
 
 	u.RawQuery = q.Encode()
@@ -221,8 +237,8 @@ func (ctx *Context) RawQuery(query string) ([]byte, error) {
 	return body, err
 }
 
-func (ctx *Context) query(query string) (interface{}, prometheus.Warnings, error) {
-	body, err := ctx.RawQuery(query)
+func (ctx *Context) query(query string, t time.Time) (interface{}, prometheus.Warnings, error) {
+	body, err := ctx.RawQuery(query, t)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -230,7 +246,7 @@ func (ctx *Context) query(query string) (interface{}, prometheus.Warnings, error
 	var toReturn interface{}
 	err = json.Unmarshal(body, &toReturn)
 	if err != nil {
-		return nil, nil, fmt.Errorf("Unmarshal Error: %s\nQuery: %s", err, query)
+		return nil, nil, fmt.Errorf("query '%s' caused unmarshal error: %s", query, err)
 	}
 
 	warnings := warningsFrom(toReturn)
@@ -242,7 +258,7 @@ func (ctx *Context) query(query string) (interface{}, prometheus.Warnings, error
 			return nil, warnings, CommErrorf("Error: %s, Body: %s, Query: %s", w, body, query)
 		}
 
-		log.Warningf("fetching query '%s': %s", query, w)
+		log.Warnf("fetching query '%s': %s", query, w)
 	}
 
 	return toReturn, warnings, nil
@@ -354,7 +370,7 @@ func (ctx *Context) queryRange(query string, start, end time.Time, step time.Dur
 	var toReturn interface{}
 	err = json.Unmarshal(body, &toReturn)
 	if err != nil {
-		return nil, nil, fmt.Errorf("Unmarshal Error: %s\nQuery: %s", err, query)
+		return nil, nil, fmt.Errorf("query '%s' caused unmarshal error: %s", query, err)
 	}
 
 	warnings := warningsFrom(toReturn)
@@ -366,7 +382,7 @@ func (ctx *Context) queryRange(query string, start, end time.Time, step time.Dur
 			return nil, warnings, CommErrorf("Error: %s, Body: %s, Query: %s", w, body, query)
 		}
 
-		log.Warningf("fetching query '%s': %s", query, w)
+		log.Warnf("fetching query '%s': %s", query, w)
 	}
 
 	return toReturn, warnings, nil

+ 2 - 2
pkg/prom/result.go

@@ -264,7 +264,7 @@ func (qr *QueryResult) GetLabels() map[string]string {
 		label := strings.TrimPrefix(k, "label_")
 		value, ok := v.(string)
 		if !ok {
-			log.Warningf("Failed to parse label value for label: '%s'", label)
+			log.Warnf("Failed to parse label value for label: '%s'", label)
 			continue
 		}
 
@@ -287,7 +287,7 @@ func (qr *QueryResult) GetAnnotations() map[string]string {
 		annotations := strings.TrimPrefix(k, "annotation_")
 		value, ok := v.(string)
 		if !ok {
-			log.Warningf("Failed to parse label value for label: '%s'", annotations)
+			log.Warnf("Failed to parse label value for label: '%s'", annotations)
 			continue
 		}
 

+ 5 - 5
pkg/services/clusters/clustermanager.go

@@ -8,10 +8,10 @@ import (
 
 	"github.com/google/uuid"
 
+	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/util/fileutil"
 	"github.com/kubecost/cost-model/pkg/util/json"
 
-	"k8s.io/klog"
 	"sigs.k8s.io/yaml"
 )
 
@@ -92,7 +92,7 @@ func NewConfiguredClusterManager(storage ClusterStorage, config string) *Cluster
 	exists, err := fileutil.FileExists(config)
 	if !exists {
 		if err != nil {
-			klog.V(1).Infof("[Error] Failed to load config file: %s. Error: %s", config, err.Error())
+			log.Errorf("Failed to load config file: %s. Error: %s", config, err.Error())
 		}
 		return clusterManager
 	}
@@ -117,7 +117,7 @@ func NewConfiguredClusterManager(storage ClusterStorage, config string) *Cluster
 		if entry.Auth != nil {
 			authData, err := getAuth(entry.Auth)
 			if err != nil {
-				klog.V(1).Infof("[Error]: %s", err)
+				log.Errorf("%s", err)
 			} else {
 				details[DetailsAuthKey] = authData
 			}
@@ -188,7 +188,7 @@ func (cm *ClusterManager) GetAll() []*ClusterDefinition {
 		var cd ClusterDefinition
 		err := json.Unmarshal(cluster, &cd)
 		if err != nil {
-			klog.V(1).Infof("[Error] Failed to unmarshal json cluster definition for key: %s", key)
+			log.Errorf("Failed to unmarshal json cluster definition for key: %s", key)
 			return nil
 		}
 
@@ -197,7 +197,7 @@ func (cm *ClusterManager) GetAll() []*ClusterDefinition {
 	})
 
 	if err != nil {
-		klog.Infof("[Error] Failed to load list of clusters: %s", err.Error())
+		log.Infof("[Error] Failed to load list of clusters: %s", err.Error())
 	}
 
 	return clusters

+ 2 - 2
pkg/services/clusters/clustersendpoints.go

@@ -7,8 +7,8 @@ import (
 
 	"github.com/julienschmidt/httprouter"
 
+	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/util/json"
-	"k8s.io/klog"
 )
 
 // DataEnvelope is a generic wrapper struct for http response data
@@ -94,7 +94,7 @@ func wrapData(data interface{}, err error) []byte {
 	var resp []byte
 
 	if err != nil {
-		klog.V(1).Infof("Error returned to client: %s", err.Error())
+		log.Infof("Error returned to client: %s", err.Error())
 		resp, _ = json.Marshal(&DataEnvelope{
 			Code:   http.StatusInternalServerError,
 			Status: "error",

+ 2 - 2
pkg/services/clusterservice.go

@@ -22,13 +22,13 @@ func newClusterManager() *clusters.ClusterManager {
 		path := env.GetConfigPath()
 		db, err := bolt.Open(path+"costmodel.db", 0600, nil)
 		if err != nil {
-			klog.V(1).Infof("[Error] Failed to create costmodel.db: %s", err.Error())
+			log.Errorf("[Error] Failed to create costmodel.db: %s", err.Error())
 			return cm.NewConfiguredClusterManager(cm.NewMapDBClusterStorage(), clustersConfigFile)
 		}
 
 		store, err := clusters.NewBoltDBClusterStorage("clusters", db)
 		if err != nil {
-			klog.V(1).Infof("[Error] Failed to Create Cluster Storage: %s", err.Error())
+			log.Errorf("[Error] Failed to Create Cluster Storage: %s", err.Error())
 			return clusters.NewConfiguredClusterManager(clusters.NewMapDBClusterStorage(), clustersConfigFile)
 		}
 

+ 1 - 1
pkg/services/services.go

@@ -32,7 +32,7 @@ type defaultHTTPServices struct {
 // Add a HTTPService implementation for
 func (dhs *defaultHTTPServices) Add(service HTTPService) {
 	if service == nil {
-		log.Warningf("Attempting to Add nil HTTPService")
+		log.Warnf("Attempting to Add nil HTTPService")
 		return
 	}
 

+ 57 - 5
pkg/storage/s3storage.go

@@ -15,6 +15,9 @@ import (
 
 	"github.com/kubecost/cost-model/pkg/log"
 
+	aws "github.com/aws/aws-sdk-go-v2/aws"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+
 	"github.com/minio/minio-go/v7"
 	"github.com/minio/minio-go/v7/pkg/credentials"
 	"github.com/minio/minio-go/v7/pkg/encrypt"
@@ -57,7 +60,7 @@ var DefaultConfig = S3Config{
 		MaxIdleConnsPerHost:   100,
 		MaxConnsPerHost:       0,
 	},
-	PartSize: 1024 * 1024 * 64, // 64Ms3.
+	PartSize: 1024 * 1024 * 64, // 64MB.
 }
 
 // Config stores the configuration for s3 bucket.
@@ -65,6 +68,7 @@ type S3Config struct {
 	Bucket             string            `yaml:"bucket"`
 	Endpoint           string            `yaml:"endpoint"`
 	Region             string            `yaml:"region"`
+	AWSSDKAuth         bool              `yaml:"aws_sdk_auth"`
 	AccessKey          string            `yaml:"access_key"`
 	Insecure           bool              `yaml:"insecure"`
 	SignatureV2        bool              `yaml:"signature_version2"`
@@ -75,8 +79,9 @@ type S3Config struct {
 	ListObjectsVersion string            `yaml:"list_objects_version"`
 	// PartSize used for multipart upload. Only used if uploaded object size is known and larger than configured PartSize.
 	// NOTE we need to make sure this number does not produce more parts than 10 000.
-	PartSize  uint64    `yaml:"part_size"`
-	SSEConfig SSEConfig `yaml:"sse_config"`
+	PartSize    uint64    `yaml:"part_size"`
+	SSEConfig   SSEConfig `yaml:"sse_config"`
+	STSEndpoint string    `yaml:"sts_endpoint"`
 }
 
 // SSEConfig deals with the configuration of SSE for Minio. The following options are valid:
@@ -190,7 +195,12 @@ func NewS3StorageWith(config S3Config) (*S3Storage, error) {
 	if err := validate(config); err != nil {
 		return nil, err
 	}
-	if config.AccessKey != "" {
+
+	if config.AWSSDKAuth {
+		chain = []credentials.Provider{
+			wrapCredentialsProvider(&awsAuth{Region: config.Region}),
+		}
+	} else if config.AccessKey != "" {
 		chain = []credentials.Provider{wrapCredentialsProvider(&credentials.Static{
 			Value: credentials.Value{
 				AccessKeyID:     config.AccessKey,
@@ -206,6 +216,7 @@ func NewS3StorageWith(config S3Config) (*S3Storage, error) {
 				Client: &http.Client{
 					Transport: http.DefaultTransport,
 				},
+				Endpoint: config.STSEndpoint,
 			}),
 		}
 	}
@@ -233,6 +244,12 @@ func NewS3StorageWith(config S3Config) (*S3Storage, error) {
 	if config.SSEConfig.Type != "" {
 		switch config.SSEConfig.Type {
 		case SSEKMS:
+			// If the KMSEncryptionContext is a nil map the header that is
+			// constructed by the encrypt.ServerSide object will be base64
+			// encoded "nil" which is not accepted by AWS.
+			if config.SSEConfig.KMSEncryptionContext == nil {
+				config.SSEConfig.KMSEncryptionContext = make(map[string]string)
+			}
 			sse, err = encrypt.NewSSEKMS(config.SSEConfig.KMSKeyID, config.SSEConfig.KMSEncryptionContext)
 			if err != nil {
 				return nil, errors.Wrap(err, "initialize s3 client SSE-KMS")
@@ -283,7 +300,9 @@ func validate(conf S3Config) error {
 	if conf.Endpoint == "" {
 		return errors.New("no s3 endpoint in config file")
 	}
-
+	if conf.AWSSDKAuth && conf.AccessKey != "" {
+		return errors.New("aws_sdk_auth and access_key are mutually exclusive configurations")
+	}
 	if conf.AccessKey == "" && conf.SecretKey != "" {
 		return errors.New("no s3 acccess_key specified while secret_key is present in config file; either both should be present in config or envvars/IAM should be used.")
 	}
@@ -526,6 +545,39 @@ func (s3 *S3Storage) getRange(ctx context.Context, name string, off, length int6
 	return ioutil.ReadAll(r)
 }
 
+// awsAuth retrieves credentials from the aws-sdk-go.
+type awsAuth struct {
+	Region string
+	creds  aws.Credentials
+}
+
+// Retrieve retrieves the keys from the environment.
+func (a *awsAuth) Retrieve() (credentials.Value, error) {
+	cfg, err := awsconfig.LoadDefaultConfig(context.TODO(), awsconfig.WithRegion(a.Region))
+	if err != nil {
+		return credentials.Value{}, errors.Wrap(err, "load AWS SDK config")
+	}
+
+	creds, err := cfg.Credentials.Retrieve(context.TODO())
+	if err != nil {
+		return credentials.Value{}, errors.Wrap(err, "retrieve AWS SDK credentials")
+	}
+
+	a.creds = creds
+
+	return credentials.Value{
+		AccessKeyID:     creds.AccessKeyID,
+		SecretAccessKey: creds.SecretAccessKey,
+		SessionToken:    creds.SessionToken,
+		SignerType:      credentials.SignatureV4,
+	}, nil
+}
+
+// IsExpired returns if the credentials have been retrieved.
+func (a *awsAuth) IsExpired() bool {
+	return a.creds.Expired()
+}
+
 type overrideSignerType struct {
 	credentials.Provider
 	signerType credentials.SignatureType

+ 157 - 0
pkg/util/cache/cachegroup.go

@@ -0,0 +1,157 @@
+package cache
+
+import (
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/kubecost/cost-model/pkg/util/interval"
+	"golang.org/x/sync/singleflight"
+)
+
+// cacheEntry contains a T item and the time it was added to the cache
+type cacheEntry[T comparable] struct {
+	item T
+	ts   time.Time
+}
+
+// CacheGroup provides single flighting for grouping repeated calls for the same workload, as well
+// as a cache that extends the lifetime of the returned result by a specific duration.
+type CacheGroup[T comparable] struct {
+	lock             sync.Mutex
+	cache            map[string]*cacheEntry[T]
+	group            singleflight.Group
+	expirationLock   sync.Mutex
+	expirationRunner *interval.IntervalRunner
+	expiry           time.Duration
+	max              int
+}
+
+// NewCacheGroup[T] creates a new cache group instance given the max number of keys to cache. 
+// If a new cache entry is added that exceeds the maximum, the oldest entry is evicted.
+func NewCacheGroup[T comparable](max int) *CacheGroup[T] {
+	return &CacheGroup[T]{
+		cache: make(map[string]*cacheEntry[T]),
+		max:   max,
+	}
+}
+
+// Do accepts a group key and a factory function to execute a workload request. Any executions
+// of Do() using an identical key will wait on the originating request rather than executing a
+// new request, and the final result will be shared among any callers sharing the same key.
+// Additionally, once returned, the workload for that key will remained cached. An expiration
+// policy can be added for this cache by calling the WithExpiration method.
+func (cg *CacheGroup[T]) Do(key string, factory func() (T, error)) (T, error) {
+	// Check cache for existing data using the group key
+	cg.lock.Lock()
+	if result, ok := cg.cache[key]; ok {
+		cg.lock.Unlock()
+		return result.item, nil
+	}
+	cg.lock.Unlock()
+
+	// single flight the group using the group key
+	item, err, _ := cg.group.Do(key, func() (any, error) {
+		i, err := factory()
+		if err != nil {
+			return nil, err
+		}
+
+		// assign cache once a result for the group key is returned
+		cg.lock.Lock()
+		cg.removeOldestBeyondCapacity()
+		cg.cache[key] = &cacheEntry[T]{
+			item: i,
+			ts:   time.Now(),
+		}
+		cg.lock.Unlock()
+		return i, nil
+	})
+
+	if err != nil {
+		return defaultValue[T](), err
+	}
+
+	tItem, ok := item.(T)
+	if !ok {
+		return defaultValue[T](), fmt.Errorf("Failed to convert single flight result")
+	}
+
+	return tItem, nil
+}
+
+// WithExpiration assigns a cache expiration to cached entries, and  starts an eviction process,
+// which runs on the specified interval.
+func (cg *CacheGroup[T]) WithExpiration(expiry time.Duration, evictionInterval time.Duration) *CacheGroup[T] {
+	cg.expirationLock.Lock()
+	defer cg.expirationLock.Unlock()
+
+	if cg.expirationRunner == nil {
+		cg.expirationRunner = interval.NewIntervalRunner(func() {
+			cg.lock.Lock()
+			defer cg.lock.Unlock()
+
+			cg.removeExpired()
+		}, evictionInterval)
+	}
+
+	if cg.expirationRunner.Start() {
+		cg.expiry = expiry
+	}
+	return cg
+}
+
+// DisableExpiration will shutdown the expiration process which allows cache entries to remain until 'max' is
+// exceeded.
+func (cg *CacheGroup[T]) DisableExpiration() {
+	cg.expirationLock.Lock()
+	defer cg.expirationLock.Unlock()
+
+	if cg.expirationRunner == nil {
+		cg.expirationRunner.Stop()
+		cg.expirationRunner = nil
+	}
+}
+
+// locates the oldest entry and removes it from the map. caller should lock
+// prior to calling
+func (cg *CacheGroup[T]) removeOldestBeyondCapacity() {
+	// only remove the oldest entries if we're at max capacity
+	if len(cg.cache) < cg.max {
+		return
+	}
+
+	oldest := time.Now()
+	oldestKey := ""
+
+	for k, v := range cg.cache {
+		if v.ts.Before(oldest) {
+			oldest = v.ts
+			oldestKey = k
+		}
+	}
+
+	delete(cg.cache, oldestKey)
+}
+
+// removes any entries that have expired from the map. caller should lock prior
+// to calling
+func (cg *CacheGroup[T]) removeExpired() {
+	if len(cg.cache) == 0 {
+		return
+	}
+
+	now := time.Now()
+	for k, v := range cg.cache {
+		if now.Sub(v.ts) >= cg.expiry {
+			delete(cg.cache, k)
+		}
+	}
+}
+
+// default value helper function to returns the initialized value for a T instance
+// (ie: for value types, typically the 0 value. For pointer types, nil)
+func defaultValue[T any]() T {
+	var t T
+	return t
+}

+ 187 - 0
pkg/util/cache/cachegroup_test.go

@@ -0,0 +1,187 @@
+package cache
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+type Obj struct {
+	Value int
+}
+
+func TestGroupCacheSingleFlighting(t *testing.T) {
+	g := NewCacheGroup[*Obj](3)
+
+	factory := func() (*Obj, error) {
+		time.Sleep(2 * time.Second)
+		return &Obj{10}, nil
+	}
+
+	next := make(chan struct{})
+	done := make(chan struct{})
+
+	go func() {
+		now := time.Now()
+		o, _ := g.Do("a", func() (*Obj, error) {
+			next <- struct{}{}
+			return factory()
+		})
+		t.Logf("Took: %d ms, Obj Value: %d\n", time.Now().Sub(now).Milliseconds(), o.Value)
+	}()
+
+	go func() {
+		<-next
+
+		time.Sleep(1 * time.Second)
+
+		now := time.Now()
+		o, _ := g.Do("a", factory)
+		delta := time.Now().Sub(now)
+		t.Logf("Other Go Routine Took: %d ms, Obj Value: %d\n", delta.Milliseconds(), o.Value)
+
+		if delta > (time.Duration(1250 * time.Millisecond)) {
+			t.Errorf("Delta Time > 1250ms. Delta: %d, Expected 1000ms\n", delta)
+		}
+		done <- struct{}{}
+	}()
+
+	<-done
+}
+
+func TestGroupCacheAfterSingleFlighting(t *testing.T) {
+	g := NewCacheGroup[*Obj](3)
+
+	factory := func() (*Obj, error) {
+		time.Sleep(2 * time.Second)
+		return &Obj{10}, nil
+	}
+
+	next := make(chan struct{})
+	done := make(chan struct{})
+
+	go func() {
+		now := time.Now()
+		o, _ := g.Do("a", func() (*Obj, error) {
+			next <- struct{}{}
+			return factory()
+		})
+		t.Logf("Took: %d ms, Obj Value: %d\n", time.Now().Sub(now).Milliseconds(), o.Value)
+	}()
+
+	go func() {
+		<-next
+		// wait the full 2 seconds and then some, which will ensure we are no longer
+		// single flighting, and should reach into the cache
+		time.Sleep(2500 * time.Millisecond)
+
+		now := time.Now()
+		o, _ := g.Do("a", factory)
+		delta := time.Now().Sub(now)
+		t.Logf("Other Go Routine Took: %d ms, Obj Value: %d\n", delta.Milliseconds(), o.Value)
+
+		if delta > (time.Duration(1250 * time.Millisecond)) {
+			t.Errorf("Delta Time > 1250ms. Delta: %d, Expected 1000ms\n", delta)
+		}
+
+		done <- struct{}{}
+	}()
+
+	<-done
+}
+
+func TestGroupCacheMany(t *testing.T) {
+	// Apologies this test can be difficult to follow. (Concurrent tests are hard)
+	// The idea here is that we test a "request" that takes 1 second to return an
+	// Obj{10} result (factory).
+	// * To test the single flight behavior, we make a series of requests that will
+	//   happen while the initial request is in flight.
+	// * The second half of requests will be made after the original request returns
+	//   to ensure that we pull from cache.
+	// * The failure case is if all of these actions takes too long to execute, which
+	//   _should_ indicate a deadlock or problem with the API.
+	g := NewCacheGroup[*Obj](3).WithExpiration(10*time.Second, 5*time.Second)
+
+	factory := func() (*Obj, error) {
+		time.Sleep(1 * time.Second)
+		return &Obj{10}, nil
+	}
+
+	next := make(chan struct{})
+
+	go func() {
+		now := time.Now()
+		o, _ := g.Do("a", func() (*Obj, error) {
+			next <- struct{}{}
+			return factory()
+		})
+		t.Logf("Took: %d ms, Obj Value: %d\n", time.Now().Sub(now).Milliseconds(), o.Value)
+	}()
+
+	<-next
+	var wg sync.WaitGroup
+	wg.Add(10)
+	for i := 0; i < 10; i++ {
+		go func(ii int) {
+			t.Logf("Created Go Routine: %d\n", ii)
+			now := time.Now()
+			o, _ := g.Do("a", factory)
+			delta := time.Now().Sub(now)
+			t.Logf("Go Routine[%d] Took: %d ms, Obj Value: %d\n", ii, delta.Milliseconds(), o.Value)
+			wg.Done()
+		}(i)
+		time.Sleep(250 * time.Millisecond)
+	}
+
+	select {
+	case <-waitChannelFor(&wg):
+		t.Logf("Successfully returned values for all requests.")
+	case <-time.After(time.Second * 8):
+		t.Logf("Failed to complete after 8 second timeout")
+	}
+}
+
+func TestCacheGroupExpirationPolicy(t *testing.T) {
+	g := NewCacheGroup[*Obj](3).WithExpiration(2*time.Second, time.Second)
+	g.Do("a", func() (*Obj, error) {
+		return &Obj{10}, nil
+	})
+
+	time.Sleep(2100 * time.Millisecond)
+	if len(g.cache) > 0 {
+		t.Errorf("Expected cache to be empty (expired). Cache length was: %d\n", len(g.cache))
+	}
+}
+
+func TestCacheGroupMaxRollOff(t *testing.T) {
+	g := NewCacheGroup[*Obj](3)
+
+	g.Do("a", func() (*Obj, error) {
+		return &Obj{1}, nil
+	})
+
+	g.Do("b", func() (*Obj, error) {
+		return &Obj{1}, nil
+	})
+
+	g.Do("c", func() (*Obj, error) {
+		return &Obj{1}, nil
+	})
+
+	g.Do("d", func() (*Obj, error) {
+		return &Obj{1}, nil
+	})
+
+	if _, ok := g.cache["a"]; ok {
+		t.Errorf("Expected 'a' group cache to be evicted")
+	}
+}
+
+func waitChannelFor(wg *sync.WaitGroup) <-chan struct{} {
+	ch := make(chan struct{})
+	go func() {
+		wg.Wait()
+		ch <- struct{}{}
+	}()
+	return ch
+}

+ 72 - 0
pkg/util/interval/intervalrunner.go

@@ -0,0 +1,72 @@
+package interval
+
+import (
+	"time"
+
+	"github.com/kubecost/cost-model/pkg/util/atomic"
+)
+
+// IntervalRunner is an example implementation of AtomicRunState.
+type IntervalRunner struct {
+	runState atomic.AtomicRunState
+	action   func()
+	interval time.Duration
+}
+
+// NewIntervalRunner Creates a new instance of an interval runner to execute the provided
+// function on a designated interval until explicitly stopped.
+func NewIntervalRunner(action func(), interval time.Duration) *IntervalRunner {
+	return &IntervalRunner{
+		action:   action,
+		interval: interval,
+	}
+}
+
+// Start begins the interval execution. It returns true if the interval execution successfully starts.
+// It will return false if the interval execcution is already running.
+func (ir *IntervalRunner) Start() bool {
+	// Before we attempt to start, we must ensure we are not in a stopping state, this is a common
+	// pattern that should be used with the AtomicRunState
+	ir.runState.WaitForReset()
+
+	// This will atomically check the current state to ensure we can run, then advances the state.
+	// If the state is already started, it will return false.
+	if !ir.runState.Start() {
+		return false
+	}
+
+	// our run state is advanced, let's execute our action on the interval
+	// spawn a new goroutine which will loop and wait the interval each iteration
+	go func() {
+		ticker := time.NewTicker(ir.interval)
+		for {
+			// use a select statement to receive whichever channel receives data first
+			select {
+			// if our stop channel receives data, it means we have explicitly called
+			// Stop(), and must reset our AtomicRunState to it's initial idle state
+			case <-ir.runState.OnStop():
+				ticker.Stop()
+				ir.runState.Reset()
+				return // exit go routine
+
+			// After our interval elapses, fall through
+			case <-ticker.C:
+			}
+
+			// Execute the function
+			ir.action()
+
+			// Loop back to the select where we will wait for the interval to elapse
+			// or an explicit stop to be called
+		}
+	}()
+
+	return true
+}
+
+// Stop will explicitly stop the execution of the interval runner. If an action is already executing, it will wait
+// until completion before processing the stop. Any attempts to start during the stopping phase will block until
+// it's possible to Start() again
+func (ir *IntervalRunner) Stop() bool {
+	return ir.runState.Stop()
+}

+ 2 - 2
pkg/util/watcher/configwatchers.go

@@ -1,8 +1,8 @@
 package watcher
 
 import (
+	"github.com/kubecost/cost-model/pkg/log"
 	v1 "k8s.io/api/core/v1"
-	"k8s.io/klog"
 )
 
 // ConfigMapWatcher represents a single configmap watcher
@@ -66,7 +66,7 @@ func (cmw *ConfigMapWatchers) ToWatchFunc() func(interface{}) {
 			for _, cw := range watchers {
 				err := cw.WatchFunc(name, data)
 				if err != nil {
-					klog.Infof("ERROR UPDATING %s CONFIG: %s", name, err.Error())
+					log.Infof("ERROR UPDATING %s CONFIG: %s", name, err.Error())
 				}
 			}
 		}

+ 79 - 28
ui/package-lock.json

@@ -1818,12 +1818,6 @@
       "integrity": "sha512-Aj3mbwVzj7Vve4I/v2JYOPFkCGM2YS7OqQTNSxmUR+LECRpokuPgAYghePgr6SALDo5bD5DlfbSaYjOzGJZOLQ==",
       "dev": true
     },
-    "async": {
-      "version": "0.9.2",
-      "resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz",
-      "integrity": "sha1-rqdNXmHB+JlhO/ZL2mbUx48v0X0=",
-      "dev": true
-    },
     "available-typed-arrays": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz",
@@ -2760,12 +2754,81 @@
       "dev": true
     },
     "ejs": {
-      "version": "3.1.6",
-      "resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.6.tgz",
-      "integrity": "sha512-9lt9Zse4hPucPkoP7FHDF0LQAlGyF9JVpnClFLFH3aSSbxmyoqINRpp/9wePWJTUl4KOQwRL72Iw3InHPDkoGw==",
+      "version": "3.1.7",
+      "resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.7.tgz",
+      "integrity": "sha512-BIar7R6abbUxDA3bfXrO4DSgwo8I+fB5/1zgujl3HLLjwd6+9iOnrT+t3grn2qbk9vOgBubXOFwX2m9axoFaGw==",
       "dev": true,
       "requires": {
-        "jake": "^10.6.1"
+        "jake": "^10.8.5"
+      },
+      "dependencies": {
+        "ansi-styles": {
+          "version": "4.3.0",
+          "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+          "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+          "dev": true,
+          "requires": {
+            "color-convert": "^2.0.1"
+          }
+        },
+        "async": {
+          "version": "3.2.3",
+          "resolved": "https://registry.npmjs.org/async/-/async-3.2.3.tgz",
+          "integrity": "sha512-spZRyzKL5l5BZQrr/6m/SqFdBN0q3OCI0f9rjfBzCMBIP4p75P620rR3gTmaksNOhmzgdxcaxdNfMy6anrbM0g==",
+          "dev": true
+        },
+        "chalk": {
+          "version": "4.1.2",
+          "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
+          "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
+          "dev": true,
+          "requires": {
+            "ansi-styles": "^4.1.0",
+            "supports-color": "^7.1.0"
+          }
+        },
+        "color-convert": {
+          "version": "2.0.1",
+          "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+          "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+          "dev": true,
+          "requires": {
+            "color-name": "~1.1.4"
+          }
+        },
+        "color-name": {
+          "version": "1.1.4",
+          "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+          "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
+          "dev": true
+        },
+        "has-flag": {
+          "version": "4.0.0",
+          "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+          "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+          "dev": true
+        },
+        "jake": {
+          "version": "10.8.5",
+          "resolved": "https://registry.npmjs.org/jake/-/jake-10.8.5.tgz",
+          "integrity": "sha512-sVpxYeuAhWt0OTWITwT98oyV0GsXyMlXCF+3L1SuafBVUIr/uILGRB+NqwkzhgXKvoJpDIpQvqkUALgdmQsQxw==",
+          "dev": true,
+          "requires": {
+            "async": "^3.2.3",
+            "chalk": "^4.0.2",
+            "filelist": "^1.0.1",
+            "minimatch": "^3.0.4"
+          }
+        },
+        "supports-color": {
+          "version": "7.2.0",
+          "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
+          "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==",
+          "dev": true,
+          "requires": {
+            "has-flag": "^4.0.0"
+          }
+        }
       }
     },
     "electron-to-chromium": {
@@ -3698,18 +3761,6 @@
       "integrity": "sha1-TkMekrEalzFjaqH5yNHMvP2reN8=",
       "dev": true
     },
-    "jake": {
-      "version": "10.8.2",
-      "resolved": "https://registry.npmjs.org/jake/-/jake-10.8.2.tgz",
-      "integrity": "sha512-eLpKyrfG3mzvGE2Du8VoPbeSkRry093+tyNjdYaBbJS9v17knImYGNXQCUV0gLxQtF82m3E8iRb/wdSQZLoq7A==",
-      "dev": true,
-      "requires": {
-        "async": "0.9.x",
-        "chalk": "^2.4.2",
-        "filelist": "^1.0.1",
-        "minimatch": "^3.0.4"
-      }
-    },
     "js-tokens": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -4094,9 +4145,9 @@
       }
     },
     "minimist": {
-      "version": "1.2.5",
-      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz",
-      "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==",
+      "version": "1.2.6",
+      "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.6.tgz",
+      "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==",
       "dev": true
     },
     "mkdirp": {
@@ -4165,9 +4216,9 @@
       "dev": true
     },
     "node-forge": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.2.1.tgz",
-      "integrity": "sha512-Fcvtbb+zBcZXbTTVwqGA5W+MKBj56UjVRevvchv5XrcyXbmNdesfZL37nlcWOfpgHhgmxApw3tQbTr4CqNmX4w==",
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.0.tgz",
+      "integrity": "sha512-08ARB91bUi6zNKzVmaj3QO7cr397uiDT2nJ63cHjyNtCTWIgvS47j3eT0WfzUwS9+6Z5YshRaoasFkXCKrIYbA==",
       "dev": true
     },
     "node-gyp-build": {