cluster.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. package costmodel
  2. import (
  3. "fmt"
  4. "os"
  5. "sync"
  6. "time"
  7. "github.com/kubecost/cost-model/pkg/cloud"
  8. "github.com/kubecost/cost-model/pkg/errors"
  9. "github.com/kubecost/cost-model/pkg/prom"
  10. "github.com/kubecost/cost-model/pkg/util"
  11. prometheus "github.com/prometheus/client_golang/api"
  12. "k8s.io/klog"
  13. )
  14. const (
  15. queryClusterCores = `sum(
  16. avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s] %s)) by (node, cluster_id) * avg(avg_over_time(node_cpu_hourly_cost[%s] %s)) by (node, cluster_id) * 730 +
  17. avg(avg_over_time(node_gpu_hourly_cost[%s] %s)) by (node, cluster_id) * 730
  18. ) by (cluster_id)`
  19. queryClusterRAM = `sum(
  20. avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s] %s)) by (node, cluster_id) / 1024 / 1024 / 1024 * avg(avg_over_time(node_ram_hourly_cost[%s] %s)) by (node, cluster_id) * 730
  21. ) by (cluster_id)`
  22. queryStorage = `sum(
  23. avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, cluster_id) * 730
  24. * avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
  25. ) by (cluster_id) %s`
  26. queryTotal = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 +
  27. sum(
  28. avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730
  29. * avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
  30. ) by (cluster_id) %s`
  31. queryNodes = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 %s`
  32. )
  33. // TODO move this to a package-accessible helper
  34. type PromQueryContext struct {
  35. Client prometheus.Client
  36. ErrorCollector *errors.ErrorCollector
  37. WaitGroup *sync.WaitGroup
  38. }
  39. // TODO move this to a package-accessible helper function once dependencies are able to
  40. // be extricated from costmodel package (PromQueryResult -> util.Vector). Otherwise, circular deps.
  41. func AsyncPromQuery(query string, resultCh chan []*PromQueryResult, ctx PromQueryContext) {
  42. if ctx.WaitGroup != nil {
  43. defer ctx.WaitGroup.Done()
  44. }
  45. defer errors.HandlePanic()
  46. raw, promErr := Query(ctx.Client, query)
  47. ctx.ErrorCollector.Report(promErr)
  48. results, parseErr := NewQueryResults(raw)
  49. ctx.ErrorCollector.Report(parseErr)
  50. resultCh <- results
  51. }
  52. // Costs represents cumulative and monthly cluster costs over a given duration. Costs
  53. // are broken down by cores, memory, and storage.
  54. type ClusterCosts struct {
  55. Start *time.Time `json:"startTime"`
  56. End *time.Time `json:"endTime"`
  57. CPUCumulative float64 `json:"cpuCumulativeCost"`
  58. CPUMonthly float64 `json:"cpuMonthlyCost"`
  59. CPUBreakdown *ClusterCostsBreakdown `json:"cpuBreakdown"`
  60. GPUCumulative float64 `json:"gpuCumulativeCost"`
  61. GPUMonthly float64 `json:"gpuMonthlyCost"`
  62. RAMCumulative float64 `json:"ramCumulativeCost"`
  63. RAMMonthly float64 `json:"ramMonthlyCost"`
  64. RAMBreakdown *ClusterCostsBreakdown `json:"ramBreakdown"`
  65. StorageCumulative float64 `json:"storageCumulativeCost"`
  66. StorageMonthly float64 `json:"storageMonthlyCost"`
  67. StorageBreakdown *ClusterCostsBreakdown `json:"storageBreakdown"`
  68. TotalCumulative float64 `json:"totalCumulativeCost"`
  69. TotalMonthly float64 `json:"totalMonthlyCost"`
  70. DataMinutes float64
  71. }
  72. // ClusterCostsBreakdown provides percentage-based breakdown of a resource by
  73. // categories: user for user-space (i.e. non-system) usage, system, and idle.
  74. type ClusterCostsBreakdown struct {
  75. Idle float64 `json:"idle"`
  76. Other float64 `json:"other"`
  77. System float64 `json:"system"`
  78. User float64 `json:"user"`
  79. }
  80. // NewClusterCostsFromCumulative takes cumulative cost data over a given time range, computes
  81. // the associated monthly rate data, and returns the Costs.
  82. func NewClusterCostsFromCumulative(cpu, gpu, ram, storage float64, window, offset string, dataHours float64) (*ClusterCosts, error) {
  83. start, end, err := util.ParseTimeRange(window, offset)
  84. if err != nil {
  85. return nil, err
  86. }
  87. // If the number of hours is not given (i.e. is zero) compute one from the window and offset
  88. if dataHours == 0 {
  89. dataHours = end.Sub(*start).Hours()
  90. }
  91. // Do not allow zero-length windows to prevent divide-by-zero issues
  92. if dataHours == 0 {
  93. return nil, fmt.Errorf("illegal time range: window %s, offset %s", window, offset)
  94. }
  95. cc := &ClusterCosts{
  96. Start: start,
  97. End: end,
  98. CPUCumulative: cpu,
  99. GPUCumulative: gpu,
  100. RAMCumulative: ram,
  101. StorageCumulative: storage,
  102. TotalCumulative: cpu + gpu + ram + storage,
  103. CPUMonthly: cpu / dataHours * (util.HoursPerMonth),
  104. GPUMonthly: gpu / dataHours * (util.HoursPerMonth),
  105. RAMMonthly: ram / dataHours * (util.HoursPerMonth),
  106. StorageMonthly: storage / dataHours * (util.HoursPerMonth),
  107. }
  108. cc.TotalMonthly = cc.CPUMonthly + cc.GPUMonthly + cc.RAMMonthly + cc.StorageMonthly
  109. return cc, nil
  110. }
  111. // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
  112. func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
  113. // Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
  114. start, end, err := util.ParseTimeRange(window, offset)
  115. if err != nil {
  116. return nil, err
  117. }
  118. mins := end.Sub(*start).Minutes()
  119. // minsPerResolution determines accuracy and resource use for the following
  120. // queries. Smaller values (higher resolution) result in better accuracy,
  121. // but more expensive queries, and vice-a-versa.
  122. minsPerResolution := 5
  123. // hourlyToCumulative is a scaling factor that, when multiplied by an hourly
  124. // value, converts it to a cumulative value; i.e.
  125. // [$/hr] * [min/res]*[hr/min] = [$/res]
  126. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  127. const fmtQueryDataCount = `
  128. count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:%dm]%s) * %d
  129. `
  130. const fmtQueryTotalGPU = `
  131. sum(
  132. sum_over_time(node_gpu_hourly_cost[%s:%dm]%s) * %f
  133. ) by (cluster_id)
  134. `
  135. const fmtQueryTotalCPU = `
  136. sum(
  137. sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:%dm]%s) *
  138. avg(avg_over_time(node_cpu_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
  139. ) by (cluster_id)
  140. `
  141. const fmtQueryTotalRAM = `
  142. sum(
  143. sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
  144. avg(avg_over_time(node_ram_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
  145. ) by (cluster_id)
  146. `
  147. const fmtQueryTotalStorage = `
  148. sum(
  149. sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
  150. avg(avg_over_time(pv_hourly_cost[%s:%dm]%s)) by (persistentvolume, cluster_id) * %f
  151. ) by (cluster_id)
  152. `
  153. const fmtQueryCPUModePct = `
  154. sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
  155. group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)
  156. `
  157. const fmtQueryRAMSystemPct = `
  158. sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (cluster_id)
  159. / sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
  160. `
  161. const fmtQueryRAMUserPct = `
  162. sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:%dm]%s)) by (cluster_id)
  163. / sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
  164. `
  165. // TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
  166. // const fmtQueryPVStorageUsePct = `(sum(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass,namespace) + on (persistentvolumeclaim,namespace)
  167. // group_right(storageclass) sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim,namespace))`
  168. queryUsedLocalStorage := provider.GetLocalStorageQuery(window, offset, false, true)
  169. queryTotalLocalStorage := provider.GetLocalStorageQuery(window, offset, false, false)
  170. if queryTotalLocalStorage != "" {
  171. queryTotalLocalStorage = fmt.Sprintf(" + %s", queryTotalLocalStorage)
  172. }
  173. fmtOffset := ""
  174. if offset != "" {
  175. fmtOffset = fmt.Sprintf("offset %s", offset)
  176. }
  177. queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, minsPerResolution, fmtOffset, minsPerResolution)
  178. queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  179. queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  180. queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  181. queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  182. ctx := prom.NewContext(client)
  183. resChs := ctx.QueryAll(
  184. queryDataCount,
  185. queryTotalGPU,
  186. queryTotalCPU,
  187. queryTotalRAM,
  188. queryTotalStorage,
  189. queryTotalLocalStorage,
  190. )
  191. if withBreakdown {
  192. queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
  193. queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
  194. queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
  195. bdResChs := ctx.QueryAll(
  196. queryCPUModePct,
  197. queryRAMSystemPct,
  198. queryRAMUserPct,
  199. queryUsedLocalStorage,
  200. )
  201. resChs = append(resChs, bdResChs...)
  202. }
  203. defaultClusterID := os.Getenv(clusterIDKey)
  204. dataMinsByCluster := map[string]float64{}
  205. for _, result := range resChs[0].Await() {
  206. clusterID, _ := result.GetString("cluster_id")
  207. if clusterID == "" {
  208. clusterID = defaultClusterID
  209. }
  210. dataMins := mins
  211. if len(result.Values) > 0 {
  212. dataMins = result.Values[0].Value
  213. } else {
  214. klog.V(3).Infof("[Warning] cluster cost data count returned no results for cluster %s", clusterID)
  215. }
  216. dataMinsByCluster[clusterID] = dataMins
  217. }
  218. // Determine combined discount
  219. discount, customDiscount := 0.0, 0.0
  220. c, err := A.Cloud.GetConfig()
  221. if err == nil {
  222. discount, err = ParsePercentString(c.Discount)
  223. if err != nil {
  224. discount = 0.0
  225. }
  226. customDiscount, err = ParsePercentString(c.NegotiatedDiscount)
  227. if err != nil {
  228. customDiscount = 0.0
  229. }
  230. }
  231. // Intermediate structure storing mapping of [clusterID][type ∈ {cpu, ram, storage, total}]=cost
  232. costData := make(map[string]map[string]float64)
  233. // Helper function to iterate over Prom query results, parsing the raw values into
  234. // the intermediate costData structure.
  235. setCostsFromResults := func(costData map[string]map[string]float64, results []*prom.QueryResult, name string, discount float64, customDiscount float64) {
  236. for _, result := range results {
  237. clusterID, _ := result.GetString("cluster_id")
  238. if clusterID == "" {
  239. clusterID = defaultClusterID
  240. }
  241. if _, ok := costData[clusterID]; !ok {
  242. costData[clusterID] = map[string]float64{}
  243. }
  244. if len(result.Values) > 0 {
  245. costData[clusterID][name] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
  246. costData[clusterID]["total"] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
  247. }
  248. }
  249. }
  250. // Apply both sustained use and custom discounts to RAM and CPU
  251. setCostsFromResults(costData, resChs[2].Await(), "cpu", discount, customDiscount)
  252. setCostsFromResults(costData, resChs[3].Await(), "ram", discount, customDiscount)
  253. // Apply only custom discount to GPU and storage
  254. setCostsFromResults(costData, resChs[1].Await(), "gpu", 0.0, customDiscount)
  255. setCostsFromResults(costData, resChs[4].Await(), "storage", 0.0, customDiscount)
  256. setCostsFromResults(costData, resChs[5].Await(), "localstorage", 0.0, customDiscount)
  257. cpuBreakdownMap := map[string]*ClusterCostsBreakdown{}
  258. ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
  259. pvUsedCostMap := map[string]float64{}
  260. if withBreakdown {
  261. for _, result := range resChs[6].Await() {
  262. clusterID, _ := result.GetString("cluster_id")
  263. if clusterID == "" {
  264. clusterID = defaultClusterID
  265. }
  266. if _, ok := cpuBreakdownMap[clusterID]; !ok {
  267. cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  268. }
  269. cpuBD := cpuBreakdownMap[clusterID]
  270. mode, err := result.GetString("mode")
  271. if err != nil {
  272. klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
  273. mode = "other"
  274. }
  275. switch mode {
  276. case "idle":
  277. cpuBD.Idle += result.Values[0].Value
  278. case "system":
  279. cpuBD.System += result.Values[0].Value
  280. case "user":
  281. cpuBD.User += result.Values[0].Value
  282. default:
  283. cpuBD.Other += result.Values[0].Value
  284. }
  285. }
  286. for _, result := range resChs[7].Await() {
  287. clusterID, _ := result.GetString("cluster_id")
  288. if clusterID == "" {
  289. clusterID = defaultClusterID
  290. }
  291. if _, ok := ramBreakdownMap[clusterID]; !ok {
  292. ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  293. }
  294. ramBD := ramBreakdownMap[clusterID]
  295. ramBD.System += result.Values[0].Value
  296. }
  297. for _, result := range resChs[8].Await() {
  298. clusterID, _ := result.GetString("cluster_id")
  299. if clusterID == "" {
  300. clusterID = defaultClusterID
  301. }
  302. if _, ok := ramBreakdownMap[clusterID]; !ok {
  303. ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  304. }
  305. ramBD := ramBreakdownMap[clusterID]
  306. ramBD.User += result.Values[0].Value
  307. }
  308. for _, ramBD := range ramBreakdownMap {
  309. remaining := 1.0
  310. remaining -= ramBD.Other
  311. remaining -= ramBD.System
  312. remaining -= ramBD.User
  313. ramBD.Idle = remaining
  314. }
  315. for _, result := range resChs[9].Await() {
  316. clusterID, _ := result.GetString("cluster_id")
  317. if clusterID == "" {
  318. clusterID = defaultClusterID
  319. }
  320. pvUsedCostMap[clusterID] += result.Values[0].Value
  321. }
  322. }
  323. // Convert intermediate structure to Costs instances
  324. costsByCluster := map[string]*ClusterCosts{}
  325. for id, cd := range costData {
  326. dataMins, ok := dataMinsByCluster[id]
  327. if !ok {
  328. dataMins = mins
  329. klog.V(3).Infof("[Warning] cluster cost data count not found for cluster %s", id)
  330. }
  331. costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"]+cd["localstorage"], window, offset, dataMins/util.MinsPerHour)
  332. if err != nil {
  333. klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
  334. return nil, err
  335. }
  336. if cpuBD, ok := cpuBreakdownMap[id]; ok {
  337. costs.CPUBreakdown = cpuBD
  338. }
  339. if ramBD, ok := ramBreakdownMap[id]; ok {
  340. costs.RAMBreakdown = ramBD
  341. }
  342. costs.StorageBreakdown = &ClusterCostsBreakdown{}
  343. if pvUC, ok := pvUsedCostMap[id]; ok {
  344. costs.StorageBreakdown.Idle = (costs.StorageCumulative - pvUC) / costs.StorageCumulative
  345. costs.StorageBreakdown.User = pvUC / costs.StorageCumulative
  346. }
  347. costs.DataMinutes = dataMins
  348. costsByCluster[id] = costs
  349. }
  350. return costsByCluster, nil
  351. }
  352. type Totals struct {
  353. TotalCost [][]string `json:"totalcost"`
  354. CPUCost [][]string `json:"cpucost"`
  355. MemCost [][]string `json:"memcost"`
  356. StorageCost [][]string `json:"storageCost"`
  357. }
  358. func resultToTotals(qr interface{}) ([][]string, error) {
  359. results, err := NewQueryResults(qr)
  360. if err != nil {
  361. return nil, err
  362. }
  363. if len(results) == 0 {
  364. return [][]string{}, fmt.Errorf("Not enough data available in the selected time range")
  365. }
  366. result := results[0]
  367. totals := [][]string{}
  368. for _, value := range result.Values {
  369. d0 := fmt.Sprintf("%f", value.Timestamp)
  370. d1 := fmt.Sprintf("%f", value.Value)
  371. toAppend := []string{
  372. d0,
  373. d1,
  374. }
  375. totals = append(totals, toAppend)
  376. }
  377. return totals, nil
  378. }
  379. // ClusterCostsOverTime gives the full cluster costs over time
  380. func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startString, endString, windowString, offset string) (*Totals, error) {
  381. localStorageQuery := provider.GetLocalStorageQuery(windowString, offset, true, false)
  382. if localStorageQuery != "" {
  383. localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
  384. }
  385. layout := "2006-01-02T15:04:05.000Z"
  386. start, err := time.Parse(layout, startString)
  387. if err != nil {
  388. klog.V(1).Infof("Error parsing time " + startString + ". Error: " + err.Error())
  389. return nil, err
  390. }
  391. end, err := time.Parse(layout, endString)
  392. if err != nil {
  393. klog.V(1).Infof("Error parsing time " + endString + ". Error: " + err.Error())
  394. return nil, err
  395. }
  396. window, err := time.ParseDuration(windowString)
  397. if err != nil {
  398. klog.V(1).Infof("Error parsing time " + windowString + ". Error: " + err.Error())
  399. return nil, err
  400. }
  401. // turn offsets of the format "[0-9+]h" into the format "offset [0-9+]h" for use in query templatess
  402. if offset != "" {
  403. offset = fmt.Sprintf("offset %s", offset)
  404. }
  405. qCores := fmt.Sprintf(queryClusterCores, windowString, offset, windowString, offset, windowString, offset)
  406. qRAM := fmt.Sprintf(queryClusterRAM, windowString, offset, windowString, offset)
  407. qStorage := fmt.Sprintf(queryStorage, windowString, offset, windowString, offset, localStorageQuery)
  408. qTotal := fmt.Sprintf(queryTotal, localStorageQuery)
  409. resultClusterCores, err := QueryRange(cli, qCores, start, end, window)
  410. if err != nil {
  411. return nil, err
  412. }
  413. resultClusterRAM, err := QueryRange(cli, qRAM, start, end, window)
  414. if err != nil {
  415. return nil, err
  416. }
  417. resultStorage, err := QueryRange(cli, qStorage, start, end, window)
  418. if err != nil {
  419. return nil, err
  420. }
  421. resultTotal, err := QueryRange(cli, qTotal, start, end, window)
  422. if err != nil {
  423. return nil, err
  424. }
  425. coreTotal, err := resultToTotals(resultClusterCores)
  426. if err != nil {
  427. klog.Infof("[Warning] ClusterCostsOverTime: no cpu data: %s", err)
  428. return nil, err
  429. }
  430. ramTotal, err := resultToTotals(resultClusterRAM)
  431. if err != nil {
  432. klog.Infof("[Warning] ClusterCostsOverTime: no ram data: %s", err)
  433. return nil, err
  434. }
  435. storageTotal, err := resultToTotals(resultStorage)
  436. if err != nil {
  437. klog.Infof("[Warning] ClusterCostsOverTime: no storage data: %s", err)
  438. }
  439. clusterTotal, err := resultToTotals(resultTotal)
  440. if err != nil {
  441. // If clusterTotal query failed, it's likely because there are no PVs, which
  442. // causes the qTotal query to return no data. Instead, query only node costs.
  443. // If that fails, return an error because something is actually wrong.
  444. qNodes := fmt.Sprintf(queryNodes, localStorageQuery)
  445. resultNodes, err := QueryRange(cli, qNodes, start, end, window)
  446. if err != nil {
  447. return nil, err
  448. }
  449. clusterTotal, err = resultToTotals(resultNodes)
  450. if err != nil {
  451. klog.Infof("[Warning] ClusterCostsOverTime: no node data: %s", err)
  452. return nil, err
  453. }
  454. }
  455. return &Totals{
  456. TotalCost: clusterTotal,
  457. CPUCost: coreTotal,
  458. MemCost: ramTotal,
  459. StorageCost: storageTotal,
  460. }, nil
  461. }