cluster.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. package costmodel
  2. import (
  3. "fmt"
  4. "os"
  5. "sync"
  6. "time"
  7. "github.com/kubecost/cost-model/pkg/cloud"
  8. "github.com/kubecost/cost-model/pkg/log"
  9. "github.com/kubecost/cost-model/pkg/prom"
  10. "github.com/kubecost/cost-model/pkg/util"
  11. prometheus "github.com/prometheus/client_golang/api"
  12. "k8s.io/klog"
  13. )
  14. const (
  15. queryClusterCores = `sum(
  16. avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s] %s)) by (node, cluster_id) * avg(avg_over_time(node_cpu_hourly_cost[%s] %s)) by (node, cluster_id) * 730 +
  17. avg(avg_over_time(node_gpu_hourly_cost[%s] %s)) by (node, cluster_id) * 730
  18. ) by (cluster_id)`
  19. queryClusterRAM = `sum(
  20. avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s] %s)) by (node, cluster_id) / 1024 / 1024 / 1024 * avg(avg_over_time(node_ram_hourly_cost[%s] %s)) by (node, cluster_id) * 730
  21. ) by (cluster_id)`
  22. queryStorage = `sum(
  23. avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, cluster_id) * 730
  24. * avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
  25. ) by (cluster_id) %s`
  26. queryTotal = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 +
  27. sum(
  28. avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730
  29. * avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
  30. ) by (cluster_id) %s`
  31. queryNodes = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 %s`
  32. )
  33. // TODO move this to a package-accessible helper
  34. type PromQueryContext struct {
  35. Client prometheus.Client
  36. ErrorCollector *util.ErrorCollector
  37. WaitGroup *sync.WaitGroup
  38. }
  39. // TODO move this to a package-accessible helper function once dependencies are able to
  40. // be extricated from costmodel package (PromQueryResult -> util.Vector). Otherwise, circular deps.
  41. func AsyncPromQuery(query string, resultCh chan []*PromQueryResult, ctx PromQueryContext) {
  42. if ctx.WaitGroup != nil {
  43. defer ctx.WaitGroup.Done()
  44. }
  45. raw, promErr := Query(ctx.Client, query)
  46. ctx.ErrorCollector.Report(promErr)
  47. results, parseErr := NewQueryResults(raw)
  48. ctx.ErrorCollector.Report(parseErr)
  49. resultCh <- results
  50. }
  51. // Costs represents cumulative and monthly cluster costs over a given duration. Costs
  52. // are broken down by cores, memory, and storage.
  53. type ClusterCosts struct {
  54. Start *time.Time `json:"startTime"`
  55. End *time.Time `json:"endTime"`
  56. CPUCumulative float64 `json:"cpuCumulativeCost"`
  57. CPUMonthly float64 `json:"cpuMonthlyCost"`
  58. CPUBreakdown *ClusterCostsBreakdown `json:"cpuBreakdown"`
  59. GPUCumulative float64 `json:"gpuCumulativeCost"`
  60. GPUMonthly float64 `json:"gpuMonthlyCost"`
  61. RAMCumulative float64 `json:"ramCumulativeCost"`
  62. RAMMonthly float64 `json:"ramMonthlyCost"`
  63. RAMBreakdown *ClusterCostsBreakdown `json:"ramBreakdown"`
  64. StorageCumulative float64 `json:"storageCumulativeCost"`
  65. StorageMonthly float64 `json:"storageMonthlyCost"`
  66. StorageBreakdown *ClusterCostsBreakdown `json:"storageBreakdown"`
  67. TotalCumulative float64 `json:"totalCumulativeCost"`
  68. TotalMonthly float64 `json:"totalMonthlyCost"`
  69. }
  70. // ClusterCostsBreakdown provides percentage-based breakdown of a resource by
  71. // categories: user for user-space (i.e. non-system) usage, system, and idle.
  72. type ClusterCostsBreakdown struct {
  73. Idle float64 `json:"idle"`
  74. Other float64 `json:"other"`
  75. System float64 `json:"system"`
  76. User float64 `json:"user"`
  77. }
  78. // NewClusterCostsFromCumulative takes cumulative cost data over a given time range, computes
  79. // the associated monthly rate data, and returns the Costs.
  80. func NewClusterCostsFromCumulative(cpu, gpu, ram, storage float64, window, offset string, dataHours float64) (*ClusterCosts, error) {
  81. start, end, err := util.ParseTimeRange(window, offset)
  82. if err != nil {
  83. return nil, err
  84. }
  85. // If the number of hours is not given (i.e. is zero) compute one from the window and offset
  86. if dataHours == 0 {
  87. dataHours = end.Sub(*start).Hours()
  88. }
  89. // Do not allow zero-length windows to prevent divide-by-zero issues
  90. if dataHours == 0 {
  91. return nil, fmt.Errorf("illegal time range: window %s, offset %s", window, offset)
  92. }
  93. cc := &ClusterCosts{
  94. Start: start,
  95. End: end,
  96. CPUCumulative: cpu,
  97. GPUCumulative: gpu,
  98. RAMCumulative: ram,
  99. StorageCumulative: storage,
  100. TotalCumulative: cpu + gpu + ram + storage,
  101. CPUMonthly: cpu / dataHours * (util.HoursPerMonth),
  102. GPUMonthly: gpu / dataHours * (util.HoursPerMonth),
  103. RAMMonthly: ram / dataHours * (util.HoursPerMonth),
  104. StorageMonthly: storage / dataHours * (util.HoursPerMonth),
  105. }
  106. cc.TotalMonthly = cc.CPUMonthly + cc.GPUMonthly + cc.RAMMonthly + cc.StorageMonthly
  107. return cc, nil
  108. }
  109. // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
  110. func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
  111. // Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
  112. start, end, err := util.ParseTimeRange(window, offset)
  113. if err != nil {
  114. return nil, err
  115. }
  116. mins := end.Sub(*start).Minutes()
  117. // minsPerResolution determines accuracy and resource use for the following
  118. // queries. Smaller values (higher resolution) result in better accuracy,
  119. // but more expensive queries, and vice-a-versa.
  120. minsPerResolution := 5
  121. // hourlyToCumulative is a scaling factor that, when multiplied by an hourly
  122. // value, converts it to a cumulative value; i.e.
  123. // [$/hr] * [min/res]*[hr/min] = [$/res]
  124. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  125. const fmtQueryDataCount = `
  126. count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:1m]%s)
  127. `
  128. const fmtQueryTotalGPU = `
  129. sum(
  130. sum_over_time(node_gpu_hourly_cost[%s:%dm]%s) * %f
  131. ) by (cluster_id)
  132. `
  133. const fmtQueryTotalCPU = `
  134. sum(
  135. sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:%dm]%s) *
  136. avg(avg_over_time(node_cpu_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
  137. ) by (cluster_id)
  138. `
  139. const fmtQueryTotalRAM = `
  140. sum(
  141. sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
  142. avg(avg_over_time(node_ram_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
  143. ) by (cluster_id)
  144. `
  145. const fmtQueryTotalStorage = `
  146. sum(
  147. sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
  148. avg(avg_over_time(pv_hourly_cost[%s:%dm]%s)) by (persistentvolume, cluster_id) * %f
  149. ) by (cluster_id)
  150. `
  151. const fmtQueryCPUModePct = `
  152. sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
  153. group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)
  154. `
  155. const fmtQueryRAMSystemPct = `
  156. sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (cluster_id)
  157. / sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
  158. `
  159. const fmtQueryRAMUserPct = `
  160. sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:%dm]%s)) by (cluster_id)
  161. / sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
  162. `
  163. // TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
  164. // const fmtQueryPVStorageUsePct = `(sum(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass,namespace) + on (persistentvolumeclaim,namespace)
  165. // group_right(storageclass) sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim,namespace))`
  166. queryUsedLocalStorage := provider.GetLocalStorageQuery(window, offset, false, true)
  167. queryTotalLocalStorage := provider.GetLocalStorageQuery(window, offset, false, false)
  168. if queryTotalLocalStorage != "" {
  169. queryTotalLocalStorage = fmt.Sprintf(" + %s", queryTotalLocalStorage)
  170. }
  171. fmtOffset := ""
  172. if offset != "" {
  173. fmtOffset = fmt.Sprintf("offset %s", offset)
  174. }
  175. queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, fmtOffset)
  176. queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  177. queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  178. queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  179. queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  180. log.Infof("ComputeClusterCosts: queryDataCount: %s", queryDataCount)
  181. log.Infof("ComputeClusterCosts: queryTotalGPU: %s", queryTotalGPU)
  182. log.Infof("ComputeClusterCosts: queryTotalCPU: %s", queryTotalCPU)
  183. log.Infof("ComputeClusterCosts: queryTotalRAM: %s", queryTotalRAM)
  184. log.Infof("ComputeClusterCosts: queryTotalStorage: %s", queryTotalStorage)
  185. ctx := prom.NewContext(client)
  186. resChs := ctx.QueryAll(
  187. queryDataCount,
  188. queryTotalGPU,
  189. queryTotalCPU,
  190. queryTotalRAM,
  191. queryTotalStorage,
  192. queryTotalLocalStorage,
  193. )
  194. if withBreakdown {
  195. queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
  196. queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
  197. queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
  198. log.Infof("ComputeClusterCosts: queryCPUModePct: %s", queryCPUModePct)
  199. log.Infof("ComputeClusterCosts: queryRAMSystemPct: %s", queryRAMSystemPct)
  200. log.Infof("ComputeClusterCosts: queryRAMUserPct: %s", queryRAMUserPct)
  201. bdResChs := ctx.QueryAll(
  202. queryCPUModePct,
  203. queryRAMSystemPct,
  204. queryRAMUserPct,
  205. queryUsedLocalStorage,
  206. )
  207. resChs = append(resChs, bdResChs...)
  208. }
  209. defaultClusterID := os.Getenv(clusterIDKey)
  210. dataMinsByCluster := map[string]float64{}
  211. for _, result := range resChs[0].Await() {
  212. clusterID, _ := result.GetString("cluster_id")
  213. if clusterID == "" {
  214. clusterID = defaultClusterID
  215. }
  216. dataMins := mins
  217. if len(result.Values) > 0 {
  218. dataMins = result.Values[0].Value
  219. } else {
  220. klog.V(3).Infof("[Warning] cluster cost data count returned no results for cluster %s", clusterID)
  221. }
  222. dataMinsByCluster[clusterID] = dataMins
  223. }
  224. // Determine combined discount
  225. discount, customDiscount := 0.0, 0.0
  226. c, err := A.Cloud.GetConfig()
  227. if err == nil {
  228. discount, err = ParsePercentString(c.Discount)
  229. if err != nil {
  230. discount = 0.0
  231. }
  232. customDiscount, err = ParsePercentString(c.NegotiatedDiscount)
  233. if err != nil {
  234. customDiscount = 0.0
  235. }
  236. }
  237. // Intermediate structure storing mapping of [clusterID][type ∈ {cpu, ram, storage, total}]=cost
  238. costData := make(map[string]map[string]float64)
  239. // Helper function to iterate over Prom query results, parsing the raw values into
  240. // the intermediate costData structure.
  241. setCostsFromResults := func(costData map[string]map[string]float64, results []*prom.QueryResult, name string, discount float64, customDiscount float64) {
  242. for _, result := range results {
  243. clusterID, _ := result.GetString("cluster_id")
  244. if clusterID == "" {
  245. clusterID = defaultClusterID
  246. }
  247. if _, ok := costData[clusterID]; !ok {
  248. costData[clusterID] = map[string]float64{}
  249. }
  250. if len(result.Values) > 0 {
  251. costData[clusterID][name] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
  252. costData[clusterID]["total"] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
  253. }
  254. }
  255. }
  256. // Apply both sustained use and custom discounts to RAM and CPU
  257. setCostsFromResults(costData, resChs[2].Await(), "cpu", discount, customDiscount)
  258. setCostsFromResults(costData, resChs[3].Await(), "ram", discount, customDiscount)
  259. // Apply only custom discount to GPU and storage
  260. setCostsFromResults(costData, resChs[1].Await(), "gpu", 0.0, customDiscount)
  261. setCostsFromResults(costData, resChs[4].Await(), "storage", 0.0, customDiscount)
  262. setCostsFromResults(costData, resChs[5].Await(), "localstorage", 0.0, customDiscount)
  263. cpuBreakdownMap := map[string]*ClusterCostsBreakdown{}
  264. ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
  265. pvUsedCostMap := map[string]float64{}
  266. if withBreakdown {
  267. for _, result := range resChs[6].Await() {
  268. clusterID, _ := result.GetString("cluster_id")
  269. if clusterID == "" {
  270. clusterID = defaultClusterID
  271. }
  272. if _, ok := cpuBreakdownMap[clusterID]; !ok {
  273. cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  274. }
  275. cpuBD := cpuBreakdownMap[clusterID]
  276. mode, err := result.GetString("mode")
  277. if err != nil {
  278. klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
  279. mode = "other"
  280. }
  281. switch mode {
  282. case "idle":
  283. cpuBD.Idle += result.Values[0].Value
  284. case "system":
  285. cpuBD.System += result.Values[0].Value
  286. case "user":
  287. cpuBD.User += result.Values[0].Value
  288. default:
  289. cpuBD.Other += result.Values[0].Value
  290. }
  291. }
  292. for _, result := range resChs[7].Await() {
  293. clusterID, _ := result.GetString("cluster_id")
  294. if clusterID == "" {
  295. clusterID = defaultClusterID
  296. }
  297. if _, ok := ramBreakdownMap[clusterID]; !ok {
  298. ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  299. }
  300. ramBD := ramBreakdownMap[clusterID]
  301. ramBD.System += result.Values[0].Value
  302. }
  303. for _, result := range resChs[8].Await() {
  304. clusterID, _ := result.GetString("cluster_id")
  305. if clusterID == "" {
  306. clusterID = defaultClusterID
  307. }
  308. if _, ok := ramBreakdownMap[clusterID]; !ok {
  309. ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  310. }
  311. ramBD := ramBreakdownMap[clusterID]
  312. ramBD.User += result.Values[0].Value
  313. }
  314. for _, ramBD := range ramBreakdownMap {
  315. remaining := 1.0
  316. remaining -= ramBD.Other
  317. remaining -= ramBD.System
  318. remaining -= ramBD.User
  319. ramBD.Idle = remaining
  320. }
  321. for _, result := range resChs[9].Await() {
  322. clusterID, _ := result.GetString("cluster_id")
  323. if clusterID == "" {
  324. clusterID = defaultClusterID
  325. }
  326. pvUsedCostMap[clusterID] += result.Values[0].Value
  327. }
  328. }
  329. // Convert intermediate structure to Costs instances
  330. costsByCluster := map[string]*ClusterCosts{}
  331. for id, cd := range costData {
  332. dataMins, ok := dataMinsByCluster[id]
  333. if !ok {
  334. dataMins = mins
  335. klog.V(3).Infof("[Warning] cluster cost data count not found for cluster %s", id)
  336. }
  337. costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"]+cd["localstorage"], window, offset, dataMins/util.MinsPerHour)
  338. if err != nil {
  339. klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
  340. return nil, err
  341. }
  342. if cpuBD, ok := cpuBreakdownMap[id]; ok {
  343. costs.CPUBreakdown = cpuBD
  344. }
  345. if ramBD, ok := ramBreakdownMap[id]; ok {
  346. costs.RAMBreakdown = ramBD
  347. }
  348. costs.StorageBreakdown = &ClusterCostsBreakdown{}
  349. if pvUC, ok := pvUsedCostMap[id]; ok {
  350. costs.StorageBreakdown.Idle = (costs.StorageCumulative - pvUC) / costs.StorageCumulative
  351. costs.StorageBreakdown.User = pvUC / costs.StorageCumulative
  352. }
  353. costsByCluster[id] = costs
  354. }
  355. return costsByCluster, nil
  356. }
  357. type Totals struct {
  358. TotalCost [][]string `json:"totalcost"`
  359. CPUCost [][]string `json:"cpucost"`
  360. MemCost [][]string `json:"memcost"`
  361. StorageCost [][]string `json:"storageCost"`
  362. }
  363. func resultToTotals(qr interface{}) ([][]string, error) {
  364. results, err := NewQueryResults(qr)
  365. if err != nil {
  366. return nil, err
  367. }
  368. if len(results) == 0 {
  369. return [][]string{}, fmt.Errorf("Not enough data available in the selected time range")
  370. }
  371. result := results[0]
  372. totals := [][]string{}
  373. for _, value := range result.Values {
  374. d0 := fmt.Sprintf("%f", value.Timestamp)
  375. d1 := fmt.Sprintf("%f", value.Value)
  376. toAppend := []string{
  377. d0,
  378. d1,
  379. }
  380. totals = append(totals, toAppend)
  381. }
  382. return totals, nil
  383. }
  384. // ClusterCostsOverTime gives the full cluster costs over time
  385. func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startString, endString, windowString, offset string) (*Totals, error) {
  386. localStorageQuery := provider.GetLocalStorageQuery(windowString, offset, true, false)
  387. if localStorageQuery != "" {
  388. localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
  389. }
  390. layout := "2006-01-02T15:04:05.000Z"
  391. start, err := time.Parse(layout, startString)
  392. if err != nil {
  393. klog.V(1).Infof("Error parsing time " + startString + ". Error: " + err.Error())
  394. return nil, err
  395. }
  396. end, err := time.Parse(layout, endString)
  397. if err != nil {
  398. klog.V(1).Infof("Error parsing time " + endString + ". Error: " + err.Error())
  399. return nil, err
  400. }
  401. window, err := time.ParseDuration(windowString)
  402. if err != nil {
  403. klog.V(1).Infof("Error parsing time " + windowString + ". Error: " + err.Error())
  404. return nil, err
  405. }
  406. // turn offsets of the format "[0-9+]h" into the format "offset [0-9+]h" for use in query templatess
  407. if offset != "" {
  408. offset = fmt.Sprintf("offset %s", offset)
  409. }
  410. qCores := fmt.Sprintf(queryClusterCores, windowString, offset, windowString, offset, windowString, offset)
  411. qRAM := fmt.Sprintf(queryClusterRAM, windowString, offset, windowString, offset)
  412. qStorage := fmt.Sprintf(queryStorage, windowString, offset, windowString, offset, localStorageQuery)
  413. qTotal := fmt.Sprintf(queryTotal, localStorageQuery)
  414. resultClusterCores, err := QueryRange(cli, qCores, start, end, window)
  415. if err != nil {
  416. return nil, err
  417. }
  418. resultClusterRAM, err := QueryRange(cli, qRAM, start, end, window)
  419. if err != nil {
  420. return nil, err
  421. }
  422. resultStorage, err := QueryRange(cli, qStorage, start, end, window)
  423. if err != nil {
  424. return nil, err
  425. }
  426. resultTotal, err := QueryRange(cli, qTotal, start, end, window)
  427. if err != nil {
  428. return nil, err
  429. }
  430. coreTotal, err := resultToTotals(resultClusterCores)
  431. if err != nil {
  432. klog.Infof("[Warning] ClusterCostsOverTime: no cpu data: %s", err)
  433. return nil, err
  434. }
  435. ramTotal, err := resultToTotals(resultClusterRAM)
  436. if err != nil {
  437. klog.Infof("[Warning] ClusterCostsOverTime: no ram data: %s", err)
  438. return nil, err
  439. }
  440. storageTotal, err := resultToTotals(resultStorage)
  441. if err != nil {
  442. klog.Infof("[Warning] ClusterCostsOverTime: no storage data: %s", err)
  443. }
  444. clusterTotal, err := resultToTotals(resultTotal)
  445. if err != nil {
  446. // If clusterTotal query failed, it's likely because there are no PVs, which
  447. // causes the qTotal query to return no data. Instead, query only node costs.
  448. // If that fails, return an error because something is actually wrong.
  449. qNodes := fmt.Sprintf(queryNodes, localStorageQuery)
  450. resultNodes, err := QueryRange(cli, qNodes, start, end, window)
  451. if err != nil {
  452. return nil, err
  453. }
  454. clusterTotal, err = resultToTotals(resultNodes)
  455. if err != nil {
  456. klog.Infof("[Warning] ClusterCostsOverTime: no node data: %s", err)
  457. return nil, err
  458. }
  459. }
  460. return &Totals{
  461. TotalCost: clusterTotal,
  462. CPUCost: coreTotal,
  463. MemCost: ramTotal,
  464. StorageCost: storageTotal,
  465. }, nil
  466. }