2
0

aggregations.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. package costmodel
  2. import (
  3. "math"
  4. "sort"
  5. "strconv"
  6. "time"
  7. "github.com/kubecost/cost-model/cloud"
  8. prometheusClient "github.com/prometheus/client_golang/api"
  9. "k8s.io/klog"
  10. )
  11. type Aggregation struct {
  12. Aggregator string `json:"aggregation"`
  13. AggregatorSubField string `json:"aggregationSubfield"`
  14. Environment string `json:"environment"`
  15. Cluster string `json:"cluster"`
  16. CPUAllocation []*Vector `json:"-"`
  17. CPUCostVector []*Vector `json:"cpuCostVector,omitempty"`
  18. RAMAllocation []*Vector `json:"-"`
  19. RAMCostVector []*Vector `json:"ramCostVector,omitempty"`
  20. PVCostVector []*Vector `json:"pvCostVector,omitempty"`
  21. GPUAllocation []*Vector `json:"-"`
  22. GPUCostVector []*Vector `json:"gpuCostVector,omitempty"`
  23. CPUCost float64 `json:"cpuCost"`
  24. RAMCost float64 `json:"ramCost"`
  25. GPUCost float64 `json:"gpuCost"`
  26. PVCost float64 `json:"pvCost"`
  27. NetworkCost float64 `json:"networkCost"`
  28. SharedCost float64 `json:"sharedCost"`
  29. TotalCost float64 `json:"totalCost"`
  30. }
  31. type SharedResourceInfo struct {
  32. ShareResources bool
  33. SharedNamespace map[string]bool
  34. LabelSelectors map[string]string
  35. }
  36. func (s *SharedResourceInfo) IsSharedResource(costDatum *CostData) bool {
  37. if _, ok := s.SharedNamespace[costDatum.Namespace]; ok {
  38. return true
  39. }
  40. for labelName, labelValue := range s.LabelSelectors {
  41. if val, ok := costDatum.Labels[labelName]; ok {
  42. if val == labelValue {
  43. return true
  44. }
  45. }
  46. }
  47. return false
  48. }
  49. func NewSharedResourceInfo(shareResources bool, sharedNamespaces []string, labelnames []string, labelvalues []string) *SharedResourceInfo {
  50. sr := &SharedResourceInfo{
  51. ShareResources: shareResources,
  52. SharedNamespace: make(map[string]bool),
  53. LabelSelectors: make(map[string]string),
  54. }
  55. for _, ns := range sharedNamespaces {
  56. sr.SharedNamespace[ns] = true
  57. }
  58. sr.SharedNamespace["kube-system"] = true // kube-system should be split by default
  59. for i := range labelnames {
  60. sr.LabelSelectors[labelnames[i]] = labelvalues[i]
  61. }
  62. return sr
  63. }
  64. func ComputeIdleCoefficient(costData map[string]*CostData, cli prometheusClient.Client, cp cloud.Provider, discount float64, windowString, offset string) (float64, error) {
  65. windowDuration, err := time.ParseDuration(windowString)
  66. if err != nil {
  67. return 0.0, err
  68. }
  69. totals, err := ClusterCosts(cli, cp, windowString, offset)
  70. if err != nil {
  71. return 0.0, err
  72. }
  73. totalClusterCost, err := strconv.ParseFloat(totals.TotalCost[0][1], 64)
  74. if err != nil || totalClusterCost == 0.0 {
  75. return 0.0, err
  76. }
  77. totalClusterCostOverWindow := (totalClusterCost / 730) * windowDuration.Hours() * (1 - discount)
  78. totalContainerCost := 0.0
  79. for _, costDatum := range costData {
  80. cpuv, ramv, gpuv, pvvs := getPriceVectors(cp, costDatum, discount, 1)
  81. totalContainerCost += totalVector(cpuv)
  82. totalContainerCost += totalVector(ramv)
  83. totalContainerCost += totalVector(gpuv)
  84. for _, pv := range pvvs {
  85. totalContainerCost += totalVector(pv)
  86. }
  87. }
  88. return (totalContainerCost / totalClusterCostOverWindow), nil
  89. }
  90. // AggregateCostModel reduces the dimensions of raw cost data by field and, optionally, by time. The field parameter determines the field
  91. // by which to group data, with an optional subfield, e.g. for groupings like field="label" and subfield="app" for grouping by "label.app".
  92. func AggregateCostModel(cp cloud.Provider, costData map[string]*CostData, field string, subfield string, timeSeries bool, discount float64, idleCoefficient float64, sr *SharedResourceInfo) map[string]*Aggregation {
  93. // aggregations collects key-value pairs of resource group-to-aggregated data
  94. // e.g. namespace-to-data or label-value-to-data
  95. aggregations := make(map[string]*Aggregation)
  96. // sharedResourceCost is the running total cost of resources that should be reported
  97. // as shared across all other resources, rather than reported as a stand-alone category
  98. sharedResourceCost := 0.0
  99. for _, costDatum := range costData {
  100. if sr != nil && sr.ShareResources && sr.IsSharedResource(costDatum) {
  101. cpuv, ramv, gpuv, pvvs := getPriceVectors(cp, costDatum, discount, idleCoefficient)
  102. sharedResourceCost += totalVector(cpuv)
  103. sharedResourceCost += totalVector(ramv)
  104. sharedResourceCost += totalVector(gpuv)
  105. for _, pv := range pvvs {
  106. sharedResourceCost += totalVector(pv)
  107. }
  108. } else {
  109. if field == "cluster" {
  110. aggregateDatum(cp, aggregations, costDatum, field, subfield, costDatum.ClusterID, discount, idleCoefficient)
  111. } else if field == "namespace" {
  112. aggregateDatum(cp, aggregations, costDatum, field, subfield, costDatum.Namespace, discount, idleCoefficient)
  113. } else if field == "service" {
  114. if len(costDatum.Services) > 0 {
  115. aggregateDatum(cp, aggregations, costDatum, field, subfield, costDatum.Services[0], discount, idleCoefficient)
  116. }
  117. } else if field == "deployment" {
  118. if len(costDatum.Deployments) > 0 {
  119. aggregateDatum(cp, aggregations, costDatum, field, subfield, costDatum.Deployments[0], discount, idleCoefficient)
  120. }
  121. } else if field == "label" {
  122. if costDatum.Labels != nil {
  123. if subfieldName, ok := costDatum.Labels[subfield]; ok {
  124. aggregateDatum(cp, aggregations, costDatum, field, subfield, subfieldName, discount, idleCoefficient)
  125. }
  126. }
  127. }
  128. }
  129. }
  130. for _, agg := range aggregations {
  131. agg.CPUCost = totalVector(agg.CPUCostVector)
  132. agg.RAMCost = totalVector(agg.RAMCostVector)
  133. agg.GPUCost = totalVector(agg.GPUCostVector)
  134. agg.PVCost = totalVector(agg.PVCostVector)
  135. agg.SharedCost = sharedResourceCost / float64(len(aggregations))
  136. agg.TotalCost = agg.CPUCost + agg.RAMCost + agg.GPUCost + agg.PVCost + agg.SharedCost
  137. // remove time series data if it is not explicitly requested
  138. if !timeSeries {
  139. agg.CPUCostVector = nil
  140. agg.RAMCostVector = nil
  141. agg.PVCostVector = nil
  142. agg.GPUCostVector = nil
  143. }
  144. }
  145. return aggregations
  146. }
  147. func aggregateDatum(cp cloud.Provider, aggregations map[string]*Aggregation, costDatum *CostData, field string, subfield string, key string, discount float64, idleCoefficient float64) {
  148. // add new entry to aggregation results if a new
  149. if _, ok := aggregations[key]; !ok {
  150. agg := &Aggregation{}
  151. agg.Aggregator = field
  152. agg.AggregatorSubField = subfield
  153. agg.Environment = key
  154. agg.Cluster = costDatum.ClusterID
  155. aggregations[key] = agg
  156. }
  157. mergeVectors(cp, costDatum, aggregations[key], discount, idleCoefficient)
  158. }
  159. func mergeVectors(cp cloud.Provider, costDatum *CostData, aggregation *Aggregation, discount float64, idleCoefficient float64) {
  160. aggregation.CPUAllocation = addVectors(costDatum.CPUAllocation, aggregation.CPUAllocation)
  161. aggregation.RAMAllocation = addVectors(costDatum.RAMAllocation, aggregation.RAMAllocation)
  162. aggregation.GPUAllocation = addVectors(costDatum.GPUReq, aggregation.GPUAllocation)
  163. cpuv, ramv, gpuv, pvvs := getPriceVectors(cp, costDatum, discount, idleCoefficient)
  164. aggregation.CPUCostVector = addVectors(cpuv, aggregation.CPUCostVector)
  165. aggregation.RAMCostVector = addVectors(ramv, aggregation.RAMCostVector)
  166. aggregation.GPUCostVector = addVectors(gpuv, aggregation.GPUCostVector)
  167. for _, vectorList := range pvvs {
  168. aggregation.PVCostVector = addVectors(aggregation.PVCostVector, vectorList)
  169. }
  170. }
  171. func getPriceVectors(cp cloud.Provider, costDatum *CostData, discount float64, idleCoefficient float64) ([]*Vector, []*Vector, []*Vector, [][]*Vector) {
  172. cpuCostStr := costDatum.NodeData.VCPUCost
  173. ramCostStr := costDatum.NodeData.RAMCost
  174. gpuCostStr := costDatum.NodeData.GPUCost
  175. pvCostStr := costDatum.NodeData.StorageCost
  176. // If custom pricing is enabled and can be retrieved, replace
  177. // default cost values with custom values
  178. customPricing, err := cp.GetConfig()
  179. if err != nil {
  180. klog.Errorf("failed to load custom pricing: %s", err)
  181. }
  182. if cloud.CustomPricesEnabled(cp) && err == nil {
  183. if costDatum.NodeData.IsSpot() {
  184. cpuCostStr = customPricing.SpotCPU
  185. ramCostStr = customPricing.SpotRAM
  186. gpuCostStr = customPricing.SpotGPU
  187. } else {
  188. cpuCostStr = customPricing.CPU
  189. ramCostStr = customPricing.RAM
  190. gpuCostStr = customPricing.GPU
  191. }
  192. pvCostStr = customPricing.Storage
  193. }
  194. cpuCost, _ := strconv.ParseFloat(cpuCostStr, 64)
  195. ramCost, _ := strconv.ParseFloat(ramCostStr, 64)
  196. gpuCost, _ := strconv.ParseFloat(gpuCostStr, 64)
  197. pvCost, _ := strconv.ParseFloat(pvCostStr, 64)
  198. cpuv := make([]*Vector, 0, len(costDatum.CPUAllocation))
  199. for _, val := range costDatum.CPUAllocation {
  200. cpuv = append(cpuv, &Vector{
  201. Timestamp: math.Round(val.Timestamp/10) * 10,
  202. Value: val.Value * cpuCost * (1 - discount) * 1 / idleCoefficient,
  203. })
  204. }
  205. ramv := make([]*Vector, 0, len(costDatum.RAMAllocation))
  206. for _, val := range costDatum.RAMAllocation {
  207. ramv = append(ramv, &Vector{
  208. Timestamp: math.Round(val.Timestamp/10) * 10,
  209. Value: (val.Value / 1024 / 1024 / 1024) * ramCost * (1 - discount) * 1 / idleCoefficient,
  210. })
  211. }
  212. gpuv := make([]*Vector, 0, len(costDatum.GPUReq))
  213. for _, val := range costDatum.GPUReq {
  214. gpuv = append(gpuv, &Vector{
  215. Timestamp: math.Round(val.Timestamp/10) * 10,
  216. Value: val.Value * gpuCost * (1 - discount) * 1 / idleCoefficient,
  217. })
  218. }
  219. pvvs := make([][]*Vector, 0, len(costDatum.PVCData))
  220. for _, pvcData := range costDatum.PVCData {
  221. pvv := make([]*Vector, 0, len(pvcData.Values))
  222. if pvcData.Volume != nil {
  223. cost, _ := strconv.ParseFloat(pvcData.Volume.Cost, 64)
  224. // override with custom pricing if enabled
  225. if cloud.CustomPricesEnabled(cp) {
  226. cost = pvCost
  227. }
  228. for _, val := range pvcData.Values {
  229. pvv = append(pvv, &Vector{
  230. Timestamp: math.Round(val.Timestamp/10) * 10,
  231. Value: (val.Value / 1024 / 1024 / 1024) * cost * (1 - discount) * 1 / idleCoefficient,
  232. })
  233. }
  234. pvvs = append(pvvs, pvv)
  235. }
  236. }
  237. return cpuv, ramv, gpuv, pvvs
  238. }
  239. func totalVector(vectors []*Vector) float64 {
  240. total := 0.0
  241. for _, vector := range vectors {
  242. total += vector.Value
  243. }
  244. return total
  245. }
  246. func addVectors(req []*Vector, used []*Vector) []*Vector {
  247. if req == nil || len(req) == 0 {
  248. for _, usedV := range used {
  249. if usedV.Timestamp == 0 {
  250. continue
  251. }
  252. usedV.Timestamp = math.Round(usedV.Timestamp/10) * 10
  253. }
  254. return used
  255. }
  256. if used == nil || len(used) == 0 {
  257. for _, reqV := range req {
  258. if reqV.Timestamp == 0 {
  259. continue
  260. }
  261. reqV.Timestamp = math.Round(reqV.Timestamp/10) * 10
  262. }
  263. return req
  264. }
  265. var allocation []*Vector
  266. var timestamps []float64
  267. reqMap := make(map[float64]float64)
  268. for _, reqV := range req {
  269. if reqV.Timestamp == 0 {
  270. continue
  271. }
  272. reqV.Timestamp = math.Round(reqV.Timestamp/10) * 10
  273. reqMap[reqV.Timestamp] = reqV.Value
  274. timestamps = append(timestamps, reqV.Timestamp)
  275. }
  276. usedMap := make(map[float64]float64)
  277. for _, usedV := range used {
  278. if usedV.Timestamp == 0 {
  279. continue
  280. }
  281. usedV.Timestamp = math.Round(usedV.Timestamp/10) * 10
  282. usedMap[usedV.Timestamp] = usedV.Value
  283. if _, ok := reqMap[usedV.Timestamp]; !ok { // no need to double add, since we'll range over sorted timestamps and check.
  284. timestamps = append(timestamps, usedV.Timestamp)
  285. }
  286. }
  287. sort.Float64s(timestamps)
  288. for _, t := range timestamps {
  289. rv, okR := reqMap[t]
  290. uv, okU := usedMap[t]
  291. allocationVector := &Vector{
  292. Timestamp: t,
  293. }
  294. if okR && okU {
  295. allocationVector.Value = rv + uv
  296. } else if okR {
  297. allocationVector.Value = rv
  298. } else if okU {
  299. allocationVector.Value = uv
  300. }
  301. allocation = append(allocation, allocationVector)
  302. }
  303. return allocation
  304. }