cluster.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534
  1. package costmodel
  2. import (
  3. "fmt"
  4. "os"
  5. "sync"
  6. "time"
  7. "github.com/kubecost/cost-model/pkg/cloud"
  8. "github.com/kubecost/cost-model/pkg/errors"
  9. "github.com/kubecost/cost-model/pkg/prom"
  10. "github.com/kubecost/cost-model/pkg/util"
  11. prometheus "github.com/prometheus/client_golang/api"
  12. "k8s.io/klog"
  13. )
  14. const (
  15. queryClusterCores = `sum(
  16. avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s] %s)) by (node, cluster_id) * avg(avg_over_time(node_cpu_hourly_cost[%s] %s)) by (node, cluster_id) * 730 +
  17. avg(avg_over_time(node_gpu_hourly_cost[%s] %s)) by (node, cluster_id) * 730
  18. ) by (cluster_id)`
  19. queryClusterRAM = `sum(
  20. avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s] %s)) by (node, cluster_id) / 1024 / 1024 / 1024 * avg(avg_over_time(node_ram_hourly_cost[%s] %s)) by (node, cluster_id) * 730
  21. ) by (cluster_id)`
  22. queryStorage = `sum(
  23. avg(avg_over_time(pv_hourly_cost[%s] %s)) by (persistentvolume, cluster_id) * 730
  24. * avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s] %s)) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
  25. ) by (cluster_id) %s`
  26. queryTotal = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 +
  27. sum(
  28. avg(avg_over_time(pv_hourly_cost[1h])) by (persistentvolume, cluster_id) * 730
  29. * avg(avg_over_time(kube_persistentvolume_capacity_bytes[1h])) by (persistentvolume, cluster_id) / 1024 / 1024 / 1024
  30. ) by (cluster_id) %s`
  31. queryNodes = `sum(avg(node_total_hourly_cost) by (node, cluster_id)) * 730 %s`
  32. )
  33. // TODO move this to a package-accessible helper
  34. type PromQueryContext struct {
  35. Client prometheus.Client
  36. ErrorCollector *errors.ErrorCollector
  37. WaitGroup *sync.WaitGroup
  38. }
  39. // TODO move this to a package-accessible helper function once dependencies are able to
  40. // be extricated from costmodel package (PromQueryResult -> util.Vector). Otherwise, circular deps.
  41. func AsyncPromQuery(query string, resultCh chan []*PromQueryResult, ctx PromQueryContext) {
  42. if ctx.WaitGroup != nil {
  43. defer ctx.WaitGroup.Done()
  44. }
  45. defer errors.HandlePanic()
  46. raw, promErr := Query(ctx.Client, query)
  47. ctx.ErrorCollector.Report(promErr)
  48. results, parseErr := NewQueryResults(raw)
  49. ctx.ErrorCollector.Report(parseErr)
  50. resultCh <- results
  51. }
  52. // Costs represents cumulative and monthly cluster costs over a given duration. Costs
  53. // are broken down by cores, memory, and storage.
  54. type ClusterCosts struct {
  55. Start *time.Time `json:"startTime"`
  56. End *time.Time `json:"endTime"`
  57. CPUCumulative float64 `json:"cpuCumulativeCost"`
  58. CPUMonthly float64 `json:"cpuMonthlyCost"`
  59. CPUBreakdown *ClusterCostsBreakdown `json:"cpuBreakdown"`
  60. GPUCumulative float64 `json:"gpuCumulativeCost"`
  61. GPUMonthly float64 `json:"gpuMonthlyCost"`
  62. RAMCumulative float64 `json:"ramCumulativeCost"`
  63. RAMMonthly float64 `json:"ramMonthlyCost"`
  64. RAMBreakdown *ClusterCostsBreakdown `json:"ramBreakdown"`
  65. StorageCumulative float64 `json:"storageCumulativeCost"`
  66. StorageMonthly float64 `json:"storageMonthlyCost"`
  67. StorageBreakdown *ClusterCostsBreakdown `json:"storageBreakdown"`
  68. TotalCumulative float64 `json:"totalCumulativeCost"`
  69. TotalMonthly float64 `json:"totalMonthlyCost"`
  70. }
  71. // ClusterCostsBreakdown provides percentage-based breakdown of a resource by
  72. // categories: user for user-space (i.e. non-system) usage, system, and idle.
  73. type ClusterCostsBreakdown struct {
  74. Idle float64 `json:"idle"`
  75. Other float64 `json:"other"`
  76. System float64 `json:"system"`
  77. User float64 `json:"user"`
  78. }
  79. // NewClusterCostsFromCumulative takes cumulative cost data over a given time range, computes
  80. // the associated monthly rate data, and returns the Costs.
  81. func NewClusterCostsFromCumulative(cpu, gpu, ram, storage float64, window, offset string, dataHours float64) (*ClusterCosts, error) {
  82. start, end, err := util.ParseTimeRange(window, offset)
  83. if err != nil {
  84. return nil, err
  85. }
  86. // If the number of hours is not given (i.e. is zero) compute one from the window and offset
  87. if dataHours == 0 {
  88. dataHours = end.Sub(*start).Hours()
  89. }
  90. // Do not allow zero-length windows to prevent divide-by-zero issues
  91. if dataHours == 0 {
  92. return nil, fmt.Errorf("illegal time range: window %s, offset %s", window, offset)
  93. }
  94. cc := &ClusterCosts{
  95. Start: start,
  96. End: end,
  97. CPUCumulative: cpu,
  98. GPUCumulative: gpu,
  99. RAMCumulative: ram,
  100. StorageCumulative: storage,
  101. TotalCumulative: cpu + gpu + ram + storage,
  102. CPUMonthly: cpu / dataHours * (util.HoursPerMonth),
  103. GPUMonthly: gpu / dataHours * (util.HoursPerMonth),
  104. RAMMonthly: ram / dataHours * (util.HoursPerMonth),
  105. StorageMonthly: storage / dataHours * (util.HoursPerMonth),
  106. }
  107. cc.TotalMonthly = cc.CPUMonthly + cc.GPUMonthly + cc.RAMMonthly + cc.StorageMonthly
  108. return cc, nil
  109. }
  110. // ComputeClusterCosts gives the cumulative and monthly-rate cluster costs over a window of time for all clusters.
  111. func ComputeClusterCosts(client prometheus.Client, provider cloud.Provider, window, offset string, withBreakdown bool) (map[string]*ClusterCosts, error) {
  112. // Compute number of minutes in the full interval, for use interpolating missed scrapes or scaling missing data
  113. start, end, err := util.ParseTimeRange(window, offset)
  114. if err != nil {
  115. return nil, err
  116. }
  117. mins := end.Sub(*start).Minutes()
  118. // minsPerResolution determines accuracy and resource use for the following
  119. // queries. Smaller values (higher resolution) result in better accuracy,
  120. // but more expensive queries, and vice-a-versa.
  121. minsPerResolution := 5
  122. // hourlyToCumulative is a scaling factor that, when multiplied by an hourly
  123. // value, converts it to a cumulative value; i.e.
  124. // [$/hr] * [min/res]*[hr/min] = [$/res]
  125. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  126. const fmtQueryDataCount = `
  127. count_over_time(sum(kube_node_status_capacity_cpu_cores) by (cluster_id)[%s:%dm]%s) * %d
  128. `
  129. const fmtQueryTotalGPU = `
  130. sum(
  131. sum_over_time(node_gpu_hourly_cost[%s:%dm]%s) * %f
  132. ) by (cluster_id)
  133. `
  134. const fmtQueryTotalCPU = `
  135. sum(
  136. sum_over_time(avg(kube_node_status_capacity_cpu_cores) by (node, cluster_id)[%s:%dm]%s) *
  137. avg(avg_over_time(node_cpu_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
  138. ) by (cluster_id)
  139. `
  140. const fmtQueryTotalRAM = `
  141. sum(
  142. sum_over_time(avg(kube_node_status_capacity_memory_bytes) by (node, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
  143. avg(avg_over_time(node_ram_hourly_cost[%s:%dm]%s)) by (node, cluster_id) * %f
  144. ) by (cluster_id)
  145. `
  146. const fmtQueryTotalStorage = `
  147. sum(
  148. sum_over_time(avg(kube_persistentvolume_capacity_bytes) by (persistentvolume, cluster_id)[%s:%dm]%s) / 1024 / 1024 / 1024 *
  149. avg(avg_over_time(pv_hourly_cost[%s:%dm]%s)) by (persistentvolume, cluster_id) * %f
  150. ) by (cluster_id)
  151. `
  152. const fmtQueryCPUModePct = `
  153. sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id, mode) / ignoring(mode)
  154. group_left sum(rate(node_cpu_seconds_total[%s]%s)) by (cluster_id)
  155. `
  156. const fmtQueryRAMSystemPct = `
  157. sum(sum_over_time(container_memory_usage_bytes{container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (cluster_id)
  158. / sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
  159. `
  160. const fmtQueryRAMUserPct = `
  161. sum(sum_over_time(kubecost_cluster_memory_working_set_bytes[%s:%dm]%s)) by (cluster_id)
  162. / sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (cluster_id)
  163. `
  164. // TODO niko/clustercost metric "kubelet_volume_stats_used_bytes" was deprecated in 1.12, then seems to have come back in 1.17
  165. // const fmtQueryPVStorageUsePct = `(sum(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass,namespace) + on (persistentvolumeclaim,namespace)
  166. // group_right(storageclass) sum(kubelet_volume_stats_used_bytes) by (persistentvolumeclaim,namespace))`
  167. queryUsedLocalStorage := provider.GetLocalStorageQuery(window, offset, false, true)
  168. queryTotalLocalStorage := provider.GetLocalStorageQuery(window, offset, false, false)
  169. if queryTotalLocalStorage != "" {
  170. queryTotalLocalStorage = fmt.Sprintf(" + %s", queryTotalLocalStorage)
  171. }
  172. fmtOffset := ""
  173. if offset != "" {
  174. fmtOffset = fmt.Sprintf("offset %s", offset)
  175. }
  176. queryDataCount := fmt.Sprintf(fmtQueryDataCount, window, minsPerResolution, fmtOffset, minsPerResolution)
  177. queryTotalGPU := fmt.Sprintf(fmtQueryTotalGPU, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  178. queryTotalCPU := fmt.Sprintf(fmtQueryTotalCPU, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  179. queryTotalRAM := fmt.Sprintf(fmtQueryTotalRAM, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  180. queryTotalStorage := fmt.Sprintf(fmtQueryTotalStorage, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset, hourlyToCumulative)
  181. ctx := prom.NewContext(client)
  182. resChs := ctx.QueryAll(
  183. queryDataCount,
  184. queryTotalGPU,
  185. queryTotalCPU,
  186. queryTotalRAM,
  187. queryTotalStorage,
  188. queryTotalLocalStorage,
  189. )
  190. if withBreakdown {
  191. queryCPUModePct := fmt.Sprintf(fmtQueryCPUModePct, window, fmtOffset, window, fmtOffset)
  192. queryRAMSystemPct := fmt.Sprintf(fmtQueryRAMSystemPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
  193. queryRAMUserPct := fmt.Sprintf(fmtQueryRAMUserPct, window, minsPerResolution, fmtOffset, window, minsPerResolution, fmtOffset)
  194. bdResChs := ctx.QueryAll(
  195. queryCPUModePct,
  196. queryRAMSystemPct,
  197. queryRAMUserPct,
  198. queryUsedLocalStorage,
  199. )
  200. resChs = append(resChs, bdResChs...)
  201. }
  202. defaultClusterID := os.Getenv(clusterIDKey)
  203. dataMinsByCluster := map[string]float64{}
  204. for _, result := range resChs[0].Await() {
  205. clusterID, _ := result.GetString("cluster_id")
  206. if clusterID == "" {
  207. clusterID = defaultClusterID
  208. }
  209. dataMins := mins
  210. if len(result.Values) > 0 {
  211. dataMins = result.Values[0].Value
  212. } else {
  213. klog.V(3).Infof("[Warning] cluster cost data count returned no results for cluster %s", clusterID)
  214. }
  215. dataMinsByCluster[clusterID] = dataMins
  216. }
  217. // Determine combined discount
  218. discount, customDiscount := 0.0, 0.0
  219. c, err := A.Cloud.GetConfig()
  220. if err == nil {
  221. discount, err = ParsePercentString(c.Discount)
  222. if err != nil {
  223. discount = 0.0
  224. }
  225. customDiscount, err = ParsePercentString(c.NegotiatedDiscount)
  226. if err != nil {
  227. customDiscount = 0.0
  228. }
  229. }
  230. // Intermediate structure storing mapping of [clusterID][type ∈ {cpu, ram, storage, total}]=cost
  231. costData := make(map[string]map[string]float64)
  232. // Helper function to iterate over Prom query results, parsing the raw values into
  233. // the intermediate costData structure.
  234. setCostsFromResults := func(costData map[string]map[string]float64, results []*prom.QueryResult, name string, discount float64, customDiscount float64) {
  235. for _, result := range results {
  236. clusterID, _ := result.GetString("cluster_id")
  237. if clusterID == "" {
  238. clusterID = defaultClusterID
  239. }
  240. if _, ok := costData[clusterID]; !ok {
  241. costData[clusterID] = map[string]float64{}
  242. }
  243. if len(result.Values) > 0 {
  244. costData[clusterID][name] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
  245. costData[clusterID]["total"] += result.Values[0].Value * (1.0 - discount) * (1.0 - customDiscount)
  246. }
  247. }
  248. }
  249. // Apply both sustained use and custom discounts to RAM and CPU
  250. setCostsFromResults(costData, resChs[2].Await(), "cpu", discount, customDiscount)
  251. setCostsFromResults(costData, resChs[3].Await(), "ram", discount, customDiscount)
  252. // Apply only custom discount to GPU and storage
  253. setCostsFromResults(costData, resChs[1].Await(), "gpu", 0.0, customDiscount)
  254. setCostsFromResults(costData, resChs[4].Await(), "storage", 0.0, customDiscount)
  255. setCostsFromResults(costData, resChs[5].Await(), "localstorage", 0.0, customDiscount)
  256. cpuBreakdownMap := map[string]*ClusterCostsBreakdown{}
  257. ramBreakdownMap := map[string]*ClusterCostsBreakdown{}
  258. pvUsedCostMap := map[string]float64{}
  259. if withBreakdown {
  260. for _, result := range resChs[6].Await() {
  261. clusterID, _ := result.GetString("cluster_id")
  262. if clusterID == "" {
  263. clusterID = defaultClusterID
  264. }
  265. if _, ok := cpuBreakdownMap[clusterID]; !ok {
  266. cpuBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  267. }
  268. cpuBD := cpuBreakdownMap[clusterID]
  269. mode, err := result.GetString("mode")
  270. if err != nil {
  271. klog.V(3).Infof("[Warning] ComputeClusterCosts: unable to read CPU mode: %s", err)
  272. mode = "other"
  273. }
  274. switch mode {
  275. case "idle":
  276. cpuBD.Idle += result.Values[0].Value
  277. case "system":
  278. cpuBD.System += result.Values[0].Value
  279. case "user":
  280. cpuBD.User += result.Values[0].Value
  281. default:
  282. cpuBD.Other += result.Values[0].Value
  283. }
  284. }
  285. for _, result := range resChs[7].Await() {
  286. clusterID, _ := result.GetString("cluster_id")
  287. if clusterID == "" {
  288. clusterID = defaultClusterID
  289. }
  290. if _, ok := ramBreakdownMap[clusterID]; !ok {
  291. ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  292. }
  293. ramBD := ramBreakdownMap[clusterID]
  294. ramBD.System += result.Values[0].Value
  295. }
  296. for _, result := range resChs[8].Await() {
  297. clusterID, _ := result.GetString("cluster_id")
  298. if clusterID == "" {
  299. clusterID = defaultClusterID
  300. }
  301. if _, ok := ramBreakdownMap[clusterID]; !ok {
  302. ramBreakdownMap[clusterID] = &ClusterCostsBreakdown{}
  303. }
  304. ramBD := ramBreakdownMap[clusterID]
  305. ramBD.User += result.Values[0].Value
  306. }
  307. for _, ramBD := range ramBreakdownMap {
  308. remaining := 1.0
  309. remaining -= ramBD.Other
  310. remaining -= ramBD.System
  311. remaining -= ramBD.User
  312. ramBD.Idle = remaining
  313. }
  314. for _, result := range resChs[9].Await() {
  315. clusterID, _ := result.GetString("cluster_id")
  316. if clusterID == "" {
  317. clusterID = defaultClusterID
  318. }
  319. pvUsedCostMap[clusterID] += result.Values[0].Value
  320. }
  321. }
  322. // Convert intermediate structure to Costs instances
  323. costsByCluster := map[string]*ClusterCosts{}
  324. for id, cd := range costData {
  325. dataMins, ok := dataMinsByCluster[id]
  326. if !ok {
  327. dataMins = mins
  328. klog.V(3).Infof("[Warning] cluster cost data count not found for cluster %s", id)
  329. }
  330. costs, err := NewClusterCostsFromCumulative(cd["cpu"], cd["gpu"], cd["ram"], cd["storage"]+cd["localstorage"], window, offset, dataMins/util.MinsPerHour)
  331. if err != nil {
  332. klog.V(3).Infof("[Warning] Failed to parse cluster costs on %s (%s) from cumulative data: %+v", window, offset, cd)
  333. return nil, err
  334. }
  335. if cpuBD, ok := cpuBreakdownMap[id]; ok {
  336. costs.CPUBreakdown = cpuBD
  337. }
  338. if ramBD, ok := ramBreakdownMap[id]; ok {
  339. costs.RAMBreakdown = ramBD
  340. }
  341. costs.StorageBreakdown = &ClusterCostsBreakdown{}
  342. if pvUC, ok := pvUsedCostMap[id]; ok {
  343. costs.StorageBreakdown.Idle = (costs.StorageCumulative - pvUC) / costs.StorageCumulative
  344. costs.StorageBreakdown.User = pvUC / costs.StorageCumulative
  345. }
  346. costsByCluster[id] = costs
  347. }
  348. return costsByCluster, nil
  349. }
  350. type Totals struct {
  351. TotalCost [][]string `json:"totalcost"`
  352. CPUCost [][]string `json:"cpucost"`
  353. MemCost [][]string `json:"memcost"`
  354. StorageCost [][]string `json:"storageCost"`
  355. }
  356. func resultToTotals(qr interface{}) ([][]string, error) {
  357. results, err := NewQueryResults(qr)
  358. if err != nil {
  359. return nil, err
  360. }
  361. if len(results) == 0 {
  362. return [][]string{}, fmt.Errorf("Not enough data available in the selected time range")
  363. }
  364. result := results[0]
  365. totals := [][]string{}
  366. for _, value := range result.Values {
  367. d0 := fmt.Sprintf("%f", value.Timestamp)
  368. d1 := fmt.Sprintf("%f", value.Value)
  369. toAppend := []string{
  370. d0,
  371. d1,
  372. }
  373. totals = append(totals, toAppend)
  374. }
  375. return totals, nil
  376. }
  377. // ClusterCostsOverTime gives the full cluster costs over time
  378. func ClusterCostsOverTime(cli prometheus.Client, provider cloud.Provider, startString, endString, windowString, offset string) (*Totals, error) {
  379. localStorageQuery := provider.GetLocalStorageQuery(windowString, offset, true, false)
  380. if localStorageQuery != "" {
  381. localStorageQuery = fmt.Sprintf("+ %s", localStorageQuery)
  382. }
  383. layout := "2006-01-02T15:04:05.000Z"
  384. start, err := time.Parse(layout, startString)
  385. if err != nil {
  386. klog.V(1).Infof("Error parsing time " + startString + ". Error: " + err.Error())
  387. return nil, err
  388. }
  389. end, err := time.Parse(layout, endString)
  390. if err != nil {
  391. klog.V(1).Infof("Error parsing time " + endString + ". Error: " + err.Error())
  392. return nil, err
  393. }
  394. window, err := time.ParseDuration(windowString)
  395. if err != nil {
  396. klog.V(1).Infof("Error parsing time " + windowString + ". Error: " + err.Error())
  397. return nil, err
  398. }
  399. // turn offsets of the format "[0-9+]h" into the format "offset [0-9+]h" for use in query templatess
  400. if offset != "" {
  401. offset = fmt.Sprintf("offset %s", offset)
  402. }
  403. qCores := fmt.Sprintf(queryClusterCores, windowString, offset, windowString, offset, windowString, offset)
  404. qRAM := fmt.Sprintf(queryClusterRAM, windowString, offset, windowString, offset)
  405. qStorage := fmt.Sprintf(queryStorage, windowString, offset, windowString, offset, localStorageQuery)
  406. qTotal := fmt.Sprintf(queryTotal, localStorageQuery)
  407. resultClusterCores, err := QueryRange(cli, qCores, start, end, window)
  408. if err != nil {
  409. return nil, err
  410. }
  411. resultClusterRAM, err := QueryRange(cli, qRAM, start, end, window)
  412. if err != nil {
  413. return nil, err
  414. }
  415. resultStorage, err := QueryRange(cli, qStorage, start, end, window)
  416. if err != nil {
  417. return nil, err
  418. }
  419. resultTotal, err := QueryRange(cli, qTotal, start, end, window)
  420. if err != nil {
  421. return nil, err
  422. }
  423. coreTotal, err := resultToTotals(resultClusterCores)
  424. if err != nil {
  425. klog.Infof("[Warning] ClusterCostsOverTime: no cpu data: %s", err)
  426. return nil, err
  427. }
  428. ramTotal, err := resultToTotals(resultClusterRAM)
  429. if err != nil {
  430. klog.Infof("[Warning] ClusterCostsOverTime: no ram data: %s", err)
  431. return nil, err
  432. }
  433. storageTotal, err := resultToTotals(resultStorage)
  434. if err != nil {
  435. klog.Infof("[Warning] ClusterCostsOverTime: no storage data: %s", err)
  436. }
  437. clusterTotal, err := resultToTotals(resultTotal)
  438. if err != nil {
  439. // If clusterTotal query failed, it's likely because there are no PVs, which
  440. // causes the qTotal query to return no data. Instead, query only node costs.
  441. // If that fails, return an error because something is actually wrong.
  442. qNodes := fmt.Sprintf(queryNodes, localStorageQuery)
  443. resultNodes, err := QueryRange(cli, qNodes, start, end, window)
  444. if err != nil {
  445. return nil, err
  446. }
  447. clusterTotal, err = resultToTotals(resultNodes)
  448. if err != nil {
  449. klog.Infof("[Warning] ClusterCostsOverTime: no node data: %s", err)
  450. return nil, err
  451. }
  452. }
  453. return &Totals{
  454. TotalCost: clusterTotal,
  455. CPUCost: coreTotal,
  456. MemCost: ramTotal,
  457. StorageCost: storageTotal,
  458. }, nil
  459. }