totals.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. package kubecost
  2. import (
  3. "errors"
  4. "fmt"
  5. "strconv"
  6. "time"
  7. "github.com/kubecost/cost-model/pkg/log"
  8. "github.com/patrickmn/go-cache"
  9. )
  10. // AllocationTotals represents aggregate costs of all Allocations for
  11. // a given cluster or tuple of (cluster, node) between a given start and end
  12. // time, where the costs are aggregated per-resource. AllocationTotals
  13. // is designed to be used as a pre-computed intermediate data structure when
  14. // contextual knowledge is required to carry out a task, but computing totals
  15. // on-the-fly would be expensive; e.g. idle allocation; sharing coefficients
  16. // for idle or shared resources, etc.
  17. type AllocationTotals struct {
  18. Start time.Time `json:"start"`
  19. End time.Time `json:"end"`
  20. Cluster string `json:"cluster"`
  21. Node string `json:"node"`
  22. Count int `json:"count"`
  23. CPUCost float64 `json:"cpuCost"`
  24. CPUCostAdjustment float64 `json:"cpuCostAdjustment"`
  25. GPUCost float64 `json:"gpuCost"`
  26. GPUCostAdjustment float64 `json:"gpuCostAdjustment"`
  27. LoadBalancerCost float64 `json:"loadBalancerCost"`
  28. LoadBalancerCostAdjustment float64 `json:"loadBalancerCostAdjustment"`
  29. NetworkCost float64 `json:"networkCost"`
  30. NetworkCostAdjustment float64 `json:"networkCostAdjustment"`
  31. PersistentVolumeCost float64 `json:"persistentVolumeCost"`
  32. PersistentVolumeCostAdjustment float64 `json:"persistentVolumeCostAdjustment"`
  33. RAMCost float64 `json:"ramCost"`
  34. RAMCostAdjustment float64 `json:"ramCostAdjustment"`
  35. }
  36. // ClearAdjustments sets all adjustment fields to 0.0
  37. func (art *AllocationTotals) ClearAdjustments() {
  38. art.CPUCostAdjustment = 0.0
  39. art.GPUCostAdjustment = 0.0
  40. art.RAMCostAdjustment = 0.0
  41. }
  42. // TotalCPUCost returns CPU cost with adjustment.
  43. func (art *AllocationTotals) TotalCPUCost() float64 {
  44. return art.CPUCost + art.CPUCostAdjustment
  45. }
  46. // TotalGPUCost returns GPU cost with adjustment.
  47. func (art *AllocationTotals) TotalGPUCost() float64 {
  48. return art.GPUCost + art.GPUCostAdjustment
  49. }
  50. // TotalRAMCost returns RAM cost with adjustment.
  51. func (art *AllocationTotals) TotalRAMCost() float64 {
  52. return art.RAMCost + art.RAMCostAdjustment
  53. }
  54. // TotalCost returns the sum of all costs.
  55. func (art *AllocationTotals) TotalCost() float64 {
  56. return art.TotalCPUCost() + art.TotalGPUCost() + art.LoadBalancerCost +
  57. art.NetworkCost + art.PersistentVolumeCost + art.TotalRAMCost()
  58. }
  59. // ComputeAllocationTotals totals the resource costs of the given AllocationSet
  60. // using the given property, i.e. cluster or node, where "node" really means to
  61. // use the fully-qualified (cluster, node) tuple.
  62. func ComputeAllocationTotals(as *AllocationSet, prop string) map[string]*AllocationTotals {
  63. arts := map[string]*AllocationTotals{}
  64. as.Each(func(name string, alloc *Allocation) {
  65. // Do not count idle or unmounted allocations
  66. if alloc.IsIdle() || alloc.IsUnmounted() {
  67. return
  68. }
  69. // Default to computing totals by Cluster, but allow override to use Node.
  70. key := alloc.Properties.Cluster
  71. if prop == AllocationNodeProp {
  72. key = fmt.Sprintf("%s/%s", alloc.Properties.Cluster, alloc.Properties.Node)
  73. }
  74. if _, ok := arts[key]; !ok {
  75. arts[key] = &AllocationTotals{
  76. Start: alloc.Start,
  77. End: alloc.End,
  78. Cluster: alloc.Properties.Cluster,
  79. Node: alloc.Properties.Node,
  80. }
  81. }
  82. if arts[key].Start.After(alloc.Start) {
  83. arts[key].Start = alloc.Start
  84. }
  85. if arts[key].End.Before(alloc.End) {
  86. arts[key].End = alloc.End
  87. }
  88. if arts[key].Node != alloc.Properties.Node {
  89. arts[key].Node = ""
  90. }
  91. arts[key].Count++
  92. arts[key].CPUCost += alloc.CPUCost
  93. arts[key].CPUCostAdjustment += alloc.CPUCostAdjustment
  94. arts[key].GPUCost += alloc.GPUCost
  95. arts[key].GPUCostAdjustment += alloc.GPUCostAdjustment
  96. arts[key].LoadBalancerCost += alloc.LBTotalCost()
  97. arts[key].NetworkCost += alloc.NetworkTotalCost()
  98. arts[key].PersistentVolumeCost += alloc.PVCost()
  99. arts[key].RAMCost += alloc.RAMCost
  100. arts[key].RAMCostAdjustment += alloc.RAMCostAdjustment
  101. })
  102. return arts
  103. }
  104. // AssetTotals represents aggregate costs of all Assets for a given
  105. // cluster or tuple of (cluster, node) between a given start and end time,
  106. // where the costs are aggregated per-resource. AssetTotals is designed
  107. // to be used as a pre-computed intermediate data structure when contextual
  108. // knowledge is required to carry out a task, but computing totals on-the-fly
  109. // would be expensive; e.g. idle allocation, shared tenancy costs
  110. type AssetTotals struct {
  111. Start time.Time `json:"start"`
  112. End time.Time `json:"end"`
  113. Cluster string `json:"cluster"`
  114. Node string `json:"node"`
  115. Count int `json:"count"`
  116. AttachedVolumeCost float64 `json:"attachedVolumeCost"`
  117. ClusterManagementCost float64 `json:"clusterManagementCost"`
  118. CPUCost float64 `json:"cpuCost"`
  119. CPUCostAdjustment float64 `json:"cpuCostAdjustment"`
  120. GPUCost float64 `json:"gpuCost"`
  121. GPUCostAdjustment float64 `json:"gpuCostAdjustment"`
  122. PersistentVolumeCost float64 `json:"persistentVolumeCost"`
  123. RAMCost float64 `json:"ramCost"`
  124. RAMCostAdjustment float64 `json:"ramCostAdjustment"`
  125. }
  126. // ClearAdjustments sets all adjustment fields to 0.0
  127. func (art *AssetTotals) ClearAdjustments() {
  128. art.CPUCostAdjustment = 0.0
  129. art.GPUCostAdjustment = 0.0
  130. art.RAMCostAdjustment = 0.0
  131. }
  132. // TotalCPUCost returns CPU cost with adjustment.
  133. func (art *AssetTotals) TotalCPUCost() float64 {
  134. return art.CPUCost + art.CPUCostAdjustment
  135. }
  136. // TotalGPUCost returns GPU cost with adjustment.
  137. func (art *AssetTotals) TotalGPUCost() float64 {
  138. return art.GPUCost + art.GPUCostAdjustment
  139. }
  140. // TotalRAMCost returns RAM cost with adjustment.
  141. func (art *AssetTotals) TotalRAMCost() float64 {
  142. return art.RAMCost + art.RAMCostAdjustment
  143. }
  144. // TotalCost returns the sum of all costs
  145. func (art *AssetTotals) TotalCost() float64 {
  146. return art.AttachedVolumeCost + art.ClusterManagementCost + art.TotalCPUCost() +
  147. art.TotalGPUCost() + art.PersistentVolumeCost + art.TotalRAMCost()
  148. }
  149. // ComputeAssetTotals totals the resource costs of the given AssetSet,
  150. // using the given property, i.e. cluster or node, where "node" really means to
  151. // use the fully-qualified (cluster, node) tuple.
  152. // NOTE: we're not capturing LoadBalancers here yet, but only because we don't
  153. // yet need them. They could be added.
  154. func ComputeAssetTotals(as *AssetSet, prop AssetProperty) map[string]*AssetTotals {
  155. arts := map[string]*AssetTotals{}
  156. // Attached disks are tracked by matching their name with the name of the
  157. // node, as is standard for attached disks.
  158. nodeNames := map[string]bool{}
  159. disks := map[string]*Disk{}
  160. as.Each(func(name string, asset Asset) {
  161. if node, ok := asset.(*Node); ok {
  162. // Default to computing totals by Cluster, but allow override to use Node.
  163. key := node.Properties().Cluster
  164. if prop == AssetNodeProp {
  165. key = fmt.Sprintf("%s/%s", node.Properties().Cluster, node.Properties().Name)
  166. }
  167. // Add node name to list of node names, but only if aggregating
  168. // by node. (These are to be used later for attached volumes.)
  169. nodeNames[key] = true
  170. // adjustmentRate is used to scale resource costs proportionally
  171. // by the adjustment. This is necessary because we only get one
  172. // adjustment per Node, not one per-resource-per-Node.
  173. //
  174. // e.g. total cost = $90, adjustment = -$10 => 0.9
  175. // e.g. total cost = $150, adjustment = -$300 => 0.3333
  176. // e.g. total cost = $150, adjustment = $50 => 1.5
  177. adjustmentRate := 1.0
  178. if node.TotalCost()-node.Adjustment() == 0 {
  179. // If (totalCost - adjustment) is 0.0 then adjustment cancels
  180. // the entire node cost and we should make everything 0
  181. // without dividing by 0.
  182. adjustmentRate = 0.0
  183. log.DedupedWarningf(5, "ComputeTotals: node cost adjusted to $0.00 for %s", node.Properties().Name)
  184. } else if node.Adjustment() != 0.0 {
  185. // adjustmentRate is the ratio of cost-with-adjustment (i.e. TotalCost)
  186. // to cost-without-adjustment (i.e. TotalCost - Adjustment).
  187. adjustmentRate = node.TotalCost() / (node.TotalCost() - node.Adjustment())
  188. }
  189. totalCPUCost := node.CPUCost * (1.0 - node.Discount)
  190. cpuCost := totalCPUCost * adjustmentRate
  191. cpuCostAdjustment := totalCPUCost - cpuCost
  192. totalGPUCost := node.GPUCost * (1.0 - node.Discount)
  193. gpuCost := totalGPUCost * adjustmentRate
  194. gpuCostAdjustment := totalGPUCost - gpuCost
  195. totalRAMCost := node.RAMCost * (1.0 - node.Discount)
  196. ramCost := totalRAMCost * adjustmentRate
  197. ramCostAdjustment := totalRAMCost - ramCost
  198. if _, ok := arts[key]; !ok {
  199. arts[key] = &AssetTotals{
  200. Start: node.Start(),
  201. End: node.End(),
  202. Cluster: node.Properties().Cluster,
  203. Node: node.Properties().Name,
  204. }
  205. }
  206. if arts[key].Start.After(node.Start()) {
  207. arts[key].Start = node.Start()
  208. }
  209. if arts[key].End.Before(node.End()) {
  210. arts[key].End = node.End()
  211. }
  212. if arts[key].Node != node.Properties().Name {
  213. arts[key].Node = ""
  214. }
  215. arts[key].Count++
  216. arts[key].CPUCost += cpuCost
  217. arts[key].CPUCostAdjustment += cpuCostAdjustment
  218. arts[key].RAMCost += ramCost
  219. arts[key].RAMCostAdjustment += ramCostAdjustment
  220. arts[key].GPUCost += gpuCost
  221. arts[key].GPUCostAdjustment += gpuCostAdjustment
  222. } else if disk, ok := asset.(*Disk); ok {
  223. key := fmt.Sprintf("%s/%s", disk.Properties().Cluster, disk.Properties().Name)
  224. disks[key] = disk
  225. } else if cm, ok := asset.(*ClusterManagement); ok && prop == AssetClusterProp {
  226. // Only record cluster management when prop is Cluster because we
  227. // can't break down ClusterManagement by node.
  228. key := cm.Properties().Cluster
  229. if _, ok := arts[key]; !ok {
  230. arts[key] = &AssetTotals{
  231. Start: cm.Start(),
  232. End: cm.End(),
  233. Cluster: cm.Properties().Cluster,
  234. }
  235. }
  236. arts[key].Count++
  237. arts[key].ClusterManagementCost += cm.TotalCost()
  238. }
  239. })
  240. // Identify attached volumes as disks with names matching a node's name
  241. for name := range nodeNames {
  242. if disk, ok := disks[name]; ok {
  243. // By default, the key will be the name, which is the tuple of
  244. // cluster/node. But if we're aggregating by cluster only, then
  245. // reset the key to just the cluster.
  246. key := name
  247. if prop == AssetClusterProp {
  248. key = disk.Properties().Cluster
  249. }
  250. if _, ok := arts[key]; !ok {
  251. arts[key] = &AssetTotals{
  252. Start: disk.Start(),
  253. End: disk.End(),
  254. Cluster: disk.Properties().Cluster,
  255. }
  256. if prop == AssetNodeProp {
  257. arts[key].Node = disk.Properties().Name
  258. }
  259. }
  260. arts[key].Count++
  261. arts[key].AttachedVolumeCost += disk.TotalCost()
  262. }
  263. }
  264. return arts
  265. }
  266. // ComputeIdleCoefficients returns the idle coefficients for CPU, GPU, and RAM
  267. // (in that order) for the given resource costs and totals.
  268. func ComputeIdleCoefficients(shareSplit, key string, cpuCost, gpuCost, ramCost float64, allocationTotals map[string]*AllocationTotals) (float64, float64, float64) {
  269. if shareSplit == ShareNone {
  270. return 0.0, 0.0, 0.0
  271. }
  272. if shareSplit != ShareEven {
  273. shareSplit = ShareWeighted
  274. }
  275. var cpuCoeff, gpuCoeff, ramCoeff float64
  276. if _, ok := allocationTotals[key]; !ok {
  277. return 0.0, 0.0, 0.0
  278. }
  279. if shareSplit == ShareEven {
  280. coeff := 1.0 / float64(allocationTotals[key].Count)
  281. return coeff, coeff, coeff
  282. }
  283. if allocationTotals[key].CPUCost > 0 {
  284. cpuCoeff = cpuCost / allocationTotals[key].CPUCost
  285. }
  286. if allocationTotals[key].GPUCost > 0 {
  287. gpuCoeff = cpuCost / allocationTotals[key].GPUCost
  288. }
  289. if allocationTotals[key].RAMCost > 0 {
  290. ramCoeff = ramCost / allocationTotals[key].RAMCost
  291. }
  292. return cpuCoeff, gpuCoeff, ramCoeff
  293. }
  294. // TotalsStore acts as both an AllocationTotalsStore and an
  295. // AssetTotalsStore.
  296. type TotalsStore interface {
  297. AllocationTotalsStore
  298. AssetTotalsStore
  299. }
  300. // AllocationTotalsStore allows for storing (i.e. setting and
  301. // getting) AllocationTotals by cluster and by node.
  302. type AllocationTotalsStore interface {
  303. GetAllocationTotalsByCluster(start, end time.Time) (map[string]*AllocationTotals, bool)
  304. GetAllocationTotalsByNode(start, end time.Time) (map[string]*AllocationTotals, bool)
  305. SetAllocationTotalsByCluster(start, end time.Time, rts map[string]*AllocationTotals)
  306. SetAllocationTotalsByNode(start, end time.Time, rts map[string]*AllocationTotals)
  307. }
  308. // UpdateAllocationTotalsStore updates an AllocationTotalsStore
  309. // by totaling the given AllocationSet and saving the totals.
  310. func UpdateAllocationTotalsStore(arts AllocationTotalsStore, as *AllocationSet) error {
  311. if arts == nil {
  312. return errors.New("cannot update nil AllocationTotalsStore")
  313. }
  314. if as == nil {
  315. return errors.New("cannot update AllocationTotalsStore from nil AllocationSet")
  316. }
  317. if as.Window.IsOpen() {
  318. return errors.New("cannot update AllocationTotalsStore from AllocationSet with open window")
  319. }
  320. start := *as.Window.Start()
  321. end := *as.Window.End()
  322. artsByCluster := ComputeAllocationTotals(as, AllocationClusterProp)
  323. arts.SetAllocationTotalsByCluster(start, end, artsByCluster)
  324. artsByNode := ComputeAllocationTotals(as, AllocationNodeProp)
  325. arts.SetAllocationTotalsByNode(start, end, artsByNode)
  326. log.Infof("ETL: Allocation: updated resource totals for %s", as.Window)
  327. return nil
  328. }
  329. // AssetTotalsStore allows for storing (i.e. setting and getting)
  330. // AssetTotals by cluster and by node.
  331. type AssetTotalsStore interface {
  332. GetAssetTotalsByCluster(start, end time.Time) (map[string]*AssetTotals, bool)
  333. GetAssetTotalsByNode(start, end time.Time) (map[string]*AssetTotals, bool)
  334. SetAssetTotalsByCluster(start, end time.Time, rts map[string]*AssetTotals)
  335. SetAssetTotalsByNode(start, end time.Time, rts map[string]*AssetTotals)
  336. }
  337. // UpdateAssetTotalsStore updates an AssetTotalsStore
  338. // by totaling the given AssetSet and saving the totals.
  339. func UpdateAssetTotalsStore(arts AssetTotalsStore, as *AssetSet) error {
  340. if arts == nil {
  341. return errors.New("cannot update nil AssetTotalsStore")
  342. }
  343. if as == nil {
  344. return errors.New("cannot update AssetTotalsStore from nil AssetSet")
  345. }
  346. if as.Window.IsOpen() {
  347. return errors.New("cannot update AssetTotalsStore from AssetSet with open window")
  348. }
  349. start := *as.Window.Start()
  350. end := *as.Window.End()
  351. artsByCluster := ComputeAssetTotals(as, AssetClusterProp)
  352. arts.SetAssetTotalsByCluster(start, end, artsByCluster)
  353. artsByNode := ComputeAssetTotals(as, AssetNodeProp)
  354. arts.SetAssetTotalsByNode(start, end, artsByNode)
  355. log.Infof("ETL: Asset: updated resource totals for %s", as.Window)
  356. return nil
  357. }
  358. // MemoryTotalsStore is an in-memory cache TotalsStore
  359. type MemoryTotalsStore struct {
  360. allocTotalsByCluster *cache.Cache
  361. allocTotalsByNode *cache.Cache
  362. assetTotalsByCluster *cache.Cache
  363. assetTotalsByNode *cache.Cache
  364. }
  365. // NewMemoryTotalsStore instantiates a new MemoryTotalsStore,
  366. // which is composed of four in-memory caches.
  367. func NewMemoryTotalsStore() *MemoryTotalsStore {
  368. return &MemoryTotalsStore{
  369. allocTotalsByCluster: cache.New(cache.NoExpiration, cache.NoExpiration),
  370. allocTotalsByNode: cache.New(cache.NoExpiration, cache.NoExpiration),
  371. assetTotalsByCluster: cache.New(cache.NoExpiration, cache.NoExpiration),
  372. assetTotalsByNode: cache.New(cache.NoExpiration, cache.NoExpiration),
  373. }
  374. }
  375. // GetAllocationTotalsByCluster retrieves the AllocationTotals
  376. // by cluster for the given start and end times.
  377. func (mts *MemoryTotalsStore) GetAllocationTotalsByCluster(start time.Time, end time.Time) (map[string]*AllocationTotals, bool) {
  378. k := storeKey(start, end)
  379. if raw, ok := mts.allocTotalsByCluster.Get(k); ok {
  380. return raw.(map[string]*AllocationTotals), true
  381. } else {
  382. return map[string]*AllocationTotals{}, false
  383. }
  384. }
  385. // GetAllocationTotalsByNode retrieves the AllocationTotals
  386. // by node for the given start and end times.
  387. func (mts *MemoryTotalsStore) GetAllocationTotalsByNode(start time.Time, end time.Time) (map[string]*AllocationTotals, bool) {
  388. k := storeKey(start, end)
  389. if raw, ok := mts.allocTotalsByNode.Get(k); ok {
  390. return raw.(map[string]*AllocationTotals), true
  391. } else {
  392. return map[string]*AllocationTotals{}, false
  393. }
  394. }
  395. // SetAllocationTotalsByCluster set the per-cluster AllocationTotals
  396. // to the given values for the given start and end times.
  397. func (mts *MemoryTotalsStore) SetAllocationTotalsByCluster(start time.Time, end time.Time, arts map[string]*AllocationTotals) {
  398. k := storeKey(start, end)
  399. mts.allocTotalsByCluster.Set(k, arts, cache.NoExpiration)
  400. }
  401. // SetAllocationTotalsByNode set the per-node AllocationTotals
  402. // to the given values for the given start and end times.
  403. func (mts *MemoryTotalsStore) SetAllocationTotalsByNode(start time.Time, end time.Time, arts map[string]*AllocationTotals) {
  404. k := storeKey(start, end)
  405. mts.allocTotalsByNode.Set(k, arts, cache.NoExpiration)
  406. }
  407. // GetAssetTotalsByCluster retrieves the AssetTotals
  408. // by cluster for the given start and end times.
  409. func (mts *MemoryTotalsStore) GetAssetTotalsByCluster(start time.Time, end time.Time) (map[string]*AssetTotals, bool) {
  410. k := storeKey(start, end)
  411. if raw, ok := mts.assetTotalsByCluster.Get(k); ok {
  412. return raw.(map[string]*AssetTotals), true
  413. } else {
  414. return map[string]*AssetTotals{}, false
  415. }
  416. }
  417. // GetAssetTotalsByNode retrieves the AssetTotals
  418. // by node for the given start and end times.
  419. func (mts *MemoryTotalsStore) GetAssetTotalsByNode(start time.Time, end time.Time) (map[string]*AssetTotals, bool) {
  420. k := storeKey(start, end)
  421. if raw, ok := mts.assetTotalsByNode.Get(k); ok {
  422. return raw.(map[string]*AssetTotals), true
  423. } else {
  424. return map[string]*AssetTotals{}, false
  425. }
  426. }
  427. // SetAssetTotalsByCluster set the per-cluster AssetTotals
  428. // to the given values for the given start and end times.
  429. func (mts *MemoryTotalsStore) SetAssetTotalsByCluster(start time.Time, end time.Time, arts map[string]*AssetTotals) {
  430. k := storeKey(start, end)
  431. mts.assetTotalsByCluster.Set(k, arts, cache.NoExpiration)
  432. }
  433. // SetAssetTotalsByNode set the per-node AssetTotals
  434. // to the given values for the given start and end times.
  435. func (mts *MemoryTotalsStore) SetAssetTotalsByNode(start time.Time, end time.Time, arts map[string]*AssetTotals) {
  436. k := storeKey(start, end)
  437. mts.assetTotalsByNode.Set(k, arts, cache.NoExpiration)
  438. }
  439. // storeKey creates a storage key based on start and end times
  440. func storeKey(start, end time.Time) string {
  441. startStr := strconv.FormatInt(start.Unix(), 10)
  442. endStr := strconv.FormatInt(end.Unix(), 10)
  443. return fmt.Sprintf("%s-%s", startStr, endStr)
  444. }