allocation.go 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858
  1. package kubecost
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "sort"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/kubecost/cost-model/pkg/log"
  11. )
  12. // TODO Clean-up use of IsEmpty; nil checks should be separated for safety.
  13. // TODO Consider making Allocation an interface, which is fulfilled by structs
  14. // like KubernetesAllocation, IdleAllocation, and ExternalAllocation.
  15. // ExternalSuffix indicates an external allocation
  16. const ExternalSuffix = "__external__"
  17. // IdleSuffix indicates an idle allocation property
  18. const IdleSuffix = "__idle__"
  19. // SharedSuffix indicates an shared allocation property
  20. const SharedSuffix = "__shared__"
  21. // UnallocatedSuffix indicates an unallocated allocation property
  22. const UnallocatedSuffix = "__unallocated__"
  23. // UnmountedSuffix indicated allocation to an unmounted PV
  24. const UnmountedSuffix = "__unmounted__"
  25. // ShareWeighted indicates that a shared resource should be shared as a
  26. // proportion of the cost of the remaining allocations.
  27. const ShareWeighted = "__weighted__"
  28. // ShareEven indicates that a shared resource should be shared evenly across
  29. // all remaining allocations.
  30. const ShareEven = "__even__"
  31. // ShareNone indicates that a shareable resource should not be shared
  32. const ShareNone = "__none__"
  33. // Allocation is a unit of resource allocation and cost for a given window
  34. // of time and for a given kubernetes construct with its associated set of
  35. // properties.
  36. // TODO niko/computeallocation compute efficiency on the fly?
  37. type Allocation struct {
  38. Name string `json:"name"`
  39. Properties Properties `json:"properties,omitempty"`
  40. Window Window `json:"window"`
  41. Start time.Time `json:"start"`
  42. End time.Time `json:"end"`
  43. CPUCoreHours float64 `json:"cpuCoreHours"`
  44. CPUCoreRequestAverage float64 `json:"cpuCoreRequestAverage"`
  45. CPUCoreUsageAverage float64 `json:"cpuCoreUsageAverage"`
  46. CPUCost float64 `json:"cpuCost"`
  47. GPUHours float64 `json:"gpuHours"`
  48. GPUCost float64 `json:"gpuCost"`
  49. NetworkCost float64 `json:"networkCost"`
  50. PVByteHours float64 `json:"pvByteHours"`
  51. PVCost float64 `json:"pvCost"`
  52. RAMByteHours float64 `json:"ramByteHours"`
  53. RAMBytesRequestAverage float64 `json:"ramBytesRequestAverage"`
  54. RAMBytesUsageAverage float64 `json:"ramBytesUsageAverage"`
  55. RAMCost float64 `json:"ramCost"`
  56. SharedCost float64 `json:"sharedCost"`
  57. ExternalCost float64 `json:"externalCost"`
  58. TotalCost float64 `json:"totalCost"`
  59. }
  60. // AllocationMatchFunc is a function that can be used to match Allocations by
  61. // returning true for any given Allocation if a condition is met.
  62. type AllocationMatchFunc func(*Allocation) bool
  63. // Add returns the result of summing the two given Allocations, which sums the
  64. // summary fields (e.g. costs, resources) and recomputes efficiency. Neither of
  65. // the two original Allocations are mutated in the process.
  66. func (a *Allocation) Add(that *Allocation) (*Allocation, error) {
  67. if a == nil {
  68. return that.Clone(), nil
  69. }
  70. if that == nil {
  71. return a.Clone(), nil
  72. }
  73. // Note: no need to clone "that", as add only mutates the receiver
  74. agg := a.Clone()
  75. agg.add(that)
  76. return agg, nil
  77. }
  78. // Clone returns a deep copy of the given Allocation
  79. func (a *Allocation) Clone() *Allocation {
  80. if a == nil {
  81. return nil
  82. }
  83. return &Allocation{
  84. Name: a.Name,
  85. Properties: a.Properties.Clone(),
  86. Window: a.Window.Clone(),
  87. Start: a.Start,
  88. End: a.End,
  89. CPUCoreHours: a.CPUCoreHours,
  90. CPUCoreRequestAverage: a.CPUCoreRequestAverage,
  91. CPUCoreUsageAverage: a.CPUCoreUsageAverage,
  92. CPUCost: a.CPUCost,
  93. GPUHours: a.GPUHours,
  94. GPUCost: a.GPUCost,
  95. NetworkCost: a.NetworkCost,
  96. PVByteHours: a.PVByteHours,
  97. PVCost: a.PVCost,
  98. RAMByteHours: a.RAMByteHours,
  99. RAMBytesRequestAverage: a.RAMBytesRequestAverage,
  100. RAMBytesUsageAverage: a.RAMBytesUsageAverage,
  101. RAMCost: a.RAMCost,
  102. SharedCost: a.SharedCost,
  103. ExternalCost: a.ExternalCost,
  104. TotalCost: a.TotalCost,
  105. }
  106. }
  107. // Equal returns true if the values held in the given Allocation precisely
  108. // match those of the receiving Allocation. nil does not match nil.
  109. func (a *Allocation) Equal(that *Allocation) bool {
  110. if a == nil || that == nil {
  111. return false
  112. }
  113. if a.Name != that.Name {
  114. return false
  115. }
  116. if !a.Properties.Equal(&that.Properties) {
  117. return false
  118. }
  119. if !a.Window.Equal(that.Window) {
  120. return false
  121. }
  122. if !a.Start.Equal(that.Start) {
  123. return false
  124. }
  125. if !a.End.Equal(that.End) {
  126. return false
  127. }
  128. if a.CPUCoreHours != that.CPUCoreHours {
  129. return false
  130. }
  131. if a.CPUCost != that.CPUCost {
  132. return false
  133. }
  134. if a.GPUHours != that.GPUHours {
  135. return false
  136. }
  137. if a.GPUCost != that.GPUCost {
  138. return false
  139. }
  140. if a.NetworkCost != that.NetworkCost {
  141. return false
  142. }
  143. if a.PVByteHours != that.PVByteHours {
  144. return false
  145. }
  146. if a.PVCost != that.PVCost {
  147. return false
  148. }
  149. if a.RAMByteHours != that.RAMByteHours {
  150. return false
  151. }
  152. if a.RAMCost != that.RAMCost {
  153. return false
  154. }
  155. if a.SharedCost != that.SharedCost {
  156. return false
  157. }
  158. if a.ExternalCost != that.ExternalCost {
  159. return false
  160. }
  161. if a.TotalCost != that.TotalCost {
  162. return false
  163. }
  164. return true
  165. }
  166. // CPUEfficiency is the ratio of usage to request. If there is no request and
  167. // no usage or cost, then efficiency is zero. If there is no request, but there
  168. // is usage or cost, then efficiency is 100%.
  169. func (a *Allocation) CPUEfficiency() float64 {
  170. if a.CPUCoreRequestAverage > 0 {
  171. return a.CPUCoreUsageAverage / a.CPUCoreRequestAverage
  172. }
  173. if a.CPUCoreUsageAverage == 0.0 || a.CPUCost == 0.0 {
  174. return 0.0
  175. }
  176. return 1.0
  177. }
  178. // RAMEfficiency is the ratio of usage to request. If there is no request and
  179. // no usage or cost, then efficiency is zero. If there is no request, but there
  180. // is usage or cost, then efficiency is 100%.
  181. func (a *Allocation) RAMEfficiency() float64 {
  182. if a.RAMBytesRequestAverage > 0 {
  183. return a.RAMBytesUsageAverage / a.RAMBytesRequestAverage
  184. }
  185. if a.RAMBytesUsageAverage == 0.0 || a.RAMCost == 0.0 {
  186. return 0.0
  187. }
  188. return 1.0
  189. }
  190. // TotalEfficiency is the cost-weighted average of CPU and RAM efficiency. If
  191. // there is no cost at all, then efficiency is zero.
  192. func (a *Allocation) TotalEfficiency() float64 {
  193. if a.CPUCost+a.RAMCost > 0 {
  194. ramCostEff := a.RAMEfficiency() * a.RAMCost
  195. cpuCostEff := a.CPUEfficiency() * a.CPUCost
  196. return (ramCostEff + cpuCostEff) / (a.CPUCost + a.RAMCost)
  197. }
  198. return 0.0
  199. }
  200. // CPUCores converts the Allocation's CPUCoreHours into average CPUCores
  201. func (a *Allocation) CPUCores() float64 {
  202. if a.Minutes() <= 0.0 {
  203. return 0.0
  204. }
  205. return a.CPUCoreHours / (a.Minutes() / 60.0)
  206. }
  207. // RAMBytes converts the Allocation's RAMByteHours into average RAMBytes
  208. func (a *Allocation) RAMBytes() float64 {
  209. if a.Minutes() <= 0.0 {
  210. return 0.0
  211. }
  212. return a.RAMByteHours / (a.Minutes() / 60.0)
  213. }
  214. // PVBytes converts the Allocation's PVByteHours into average PVBytes
  215. func (a *Allocation) PVBytes() float64 {
  216. if a.Minutes() <= 0.0 {
  217. return 0.0
  218. }
  219. return a.PVByteHours / (a.Minutes() / 60.0)
  220. }
  221. // MarshalJSON implements json.Marshal interface
  222. func (a *Allocation) MarshalJSON() ([]byte, error) {
  223. buffer := bytes.NewBufferString("{")
  224. jsonEncodeString(buffer, "name", a.Name, ",")
  225. jsonEncode(buffer, "properties", a.Properties, ",")
  226. jsonEncode(buffer, "window", a.Window, ",")
  227. jsonEncodeString(buffer, "start", a.Start.Format(timeFmt), ",")
  228. jsonEncodeString(buffer, "end", a.End.Format(timeFmt), ",")
  229. jsonEncodeFloat64(buffer, "minutes", a.Minutes(), ",")
  230. jsonEncodeFloat64(buffer, "cpuCores", a.CPUCores(), ",")
  231. jsonEncodeFloat64(buffer, "cpuCoreRequestAverage", a.CPUCoreRequestAverage, ",")
  232. jsonEncodeFloat64(buffer, "cpuCoreUsageAverage", a.CPUCoreUsageAverage, ",")
  233. jsonEncodeFloat64(buffer, "cpuCoreHours", a.CPUCoreHours, ",")
  234. jsonEncodeFloat64(buffer, "cpuCost", a.CPUCost, ",")
  235. jsonEncodeFloat64(buffer, "cpuEfficiency", a.CPUEfficiency(), ",")
  236. jsonEncodeFloat64(buffer, "gpuHours", a.GPUHours, ",")
  237. jsonEncodeFloat64(buffer, "gpuCost", a.GPUCost, ",")
  238. jsonEncodeFloat64(buffer, "networkCost", a.NetworkCost, ",")
  239. jsonEncodeFloat64(buffer, "pvBytes", a.PVBytes(), ",")
  240. jsonEncodeFloat64(buffer, "pvByteHours", a.PVByteHours, ",")
  241. jsonEncodeFloat64(buffer, "pvCost", a.PVCost, ",")
  242. jsonEncodeFloat64(buffer, "ramBytes", a.RAMBytes(), ",")
  243. jsonEncodeFloat64(buffer, "ramByteRequestAverage", a.RAMBytesRequestAverage, ",")
  244. jsonEncodeFloat64(buffer, "ramByteUsageAverage", a.RAMBytesUsageAverage, ",")
  245. jsonEncodeFloat64(buffer, "ramByteHours", a.RAMByteHours, ",")
  246. jsonEncodeFloat64(buffer, "ramCost", a.RAMCost, ",")
  247. jsonEncodeFloat64(buffer, "ramEfficiency", a.RAMEfficiency(), ",")
  248. jsonEncodeFloat64(buffer, "sharedCost", a.SharedCost, ",")
  249. jsonEncodeFloat64(buffer, "totalCost", a.TotalCost, ",")
  250. jsonEncodeFloat64(buffer, "totalEfficiency", a.TotalEfficiency(), "")
  251. buffer.WriteString("}")
  252. return buffer.Bytes(), nil
  253. }
  254. // TODO niko/computeallocation
  255. // func (a *Allocation)UnmarshalJSON()
  256. // Resolution returns the duration of time covered by the Allocation
  257. func (a *Allocation) Resolution() time.Duration {
  258. return a.End.Sub(a.Start)
  259. }
  260. // IsAggregated is true if the given Allocation has been aggregated, which we
  261. // define by a lack of Properties.
  262. func (a *Allocation) IsAggregated() bool {
  263. return a == nil || a.Properties == nil
  264. }
  265. // IsExternal is true if the given Allocation represents external costs.
  266. func (a *Allocation) IsExternal() bool {
  267. return strings.Contains(a.Name, ExternalSuffix)
  268. }
  269. // IsIdle is true if the given Allocation represents idle costs.
  270. func (a *Allocation) IsIdle() bool {
  271. return strings.Contains(a.Name, IdleSuffix)
  272. }
  273. // IsUnallocated is true if the given Allocation represents unallocated costs.
  274. func (a *Allocation) IsUnallocated() bool {
  275. return strings.Contains(a.Name, UnallocatedSuffix)
  276. }
  277. // Minutes returns the number of minutes the Allocation represents, as defined
  278. // by the difference between the end and start times.
  279. func (a *Allocation) Minutes() float64 {
  280. return a.End.Sub(a.Start).Minutes()
  281. }
  282. // Share works like Add, but converts the entire cost of the given Allocation
  283. // to SharedCost, rather than adding to the individual resource costs.
  284. // TODO niko/computeallocation unit test changes!!!
  285. func (a *Allocation) Share(that *Allocation) (*Allocation, error) {
  286. if that == nil {
  287. return a.Clone(), nil
  288. }
  289. // Convert all costs of shared Allocation to SharedCost, zero out all
  290. // non-shared costs, then add.
  291. share := that.Clone()
  292. share.SharedCost += share.TotalCost
  293. share.CPUCost = 0
  294. share.CPUCoreHours = 0
  295. share.RAMCost = 0
  296. share.RAMByteHours = 0
  297. share.GPUCost = 0
  298. share.GPUHours = 0
  299. share.PVCost = 0
  300. share.PVByteHours = 0
  301. share.NetworkCost = 0
  302. share.ExternalCost = 0
  303. if a == nil {
  304. return share, nil
  305. }
  306. agg := a.Clone()
  307. agg.add(that)
  308. return agg, nil
  309. }
  310. // String represents the given Allocation as a string
  311. func (a *Allocation) String() string {
  312. return fmt.Sprintf("%s%s=%.2f", a.Name, NewWindow(&a.Start, &a.End), a.TotalCost)
  313. }
  314. func (a *Allocation) add(that *Allocation) {
  315. if a == nil {
  316. log.Warningf("Allocation.AggregateBy: trying to add a nil receiver")
  317. return
  318. }
  319. aCluster, _ := a.Properties.GetCluster()
  320. thatCluster, _ := that.Properties.GetCluster()
  321. aNode, _ := a.Properties.GetNode()
  322. thatNode, _ := that.Properties.GetNode()
  323. // reset properties
  324. a.Properties = nil
  325. // ensure that we carry cluster ID and/or node over if they're the same
  326. // required for idle/shared cost allocation
  327. if aCluster == thatCluster {
  328. a.Properties = Properties{ClusterProp: aCluster}
  329. }
  330. if aNode == thatNode {
  331. if a.Properties == nil {
  332. a.Properties = Properties{NodeProp: aNode}
  333. } else {
  334. a.Properties.SetNode(aNode)
  335. }
  336. }
  337. // Expand Window, Start, and End to be the "max" of each between the two
  338. // given Allocations.
  339. a.Window = a.Window.Expand(that.Window)
  340. if that.Start.Before(a.Start) {
  341. a.Start = that.Start
  342. }
  343. if that.End.After(a.End) {
  344. a.End = that.End
  345. }
  346. // Sum all cumulative resource fields
  347. a.CPUCoreHours += that.CPUCoreHours
  348. a.CPUCoreRequestAverage += that.CPUCoreRequestAverage
  349. a.CPUCoreUsageAverage += that.CPUCoreUsageAverage
  350. a.GPUHours += that.GPUHours
  351. a.RAMByteHours += that.RAMByteHours
  352. a.RAMBytesRequestAverage += that.RAMBytesRequestAverage
  353. a.RAMBytesUsageAverage += that.RAMBytesUsageAverage
  354. a.PVByteHours += that.PVByteHours
  355. // Sum all cumulative cost fields
  356. a.CPUCost += that.CPUCost
  357. a.GPUCost += that.GPUCost
  358. a.RAMCost += that.RAMCost
  359. a.PVCost += that.PVCost
  360. a.NetworkCost += that.NetworkCost
  361. a.SharedCost += that.SharedCost
  362. a.ExternalCost += that.ExternalCost
  363. a.TotalCost += that.TotalCost
  364. }
  365. // AllocationSet stores a set of Allocations, each with a unique name, that share
  366. // a window. An AllocationSet is mutable, so treat it like a threadsafe map.
  367. type AllocationSet struct {
  368. sync.RWMutex
  369. allocations map[string]*Allocation
  370. externalKeys map[string]bool
  371. idleKeys map[string]bool
  372. Window Window
  373. Warnings []string
  374. Errors []string
  375. }
  376. // NewAllocationSet instantiates a new AllocationSet and, optionally, inserts
  377. // the given list of Allocations
  378. func NewAllocationSet(start, end time.Time, allocs ...*Allocation) *AllocationSet {
  379. as := &AllocationSet{
  380. allocations: map[string]*Allocation{},
  381. externalKeys: map[string]bool{},
  382. idleKeys: map[string]bool{},
  383. Window: NewWindow(&start, &end),
  384. }
  385. for _, a := range allocs {
  386. as.Insert(a)
  387. }
  388. return as
  389. }
  390. // AllocationAggregationOptions provide advanced functionality to AggregateBy, including
  391. // filtering results and sharing allocations. FilterFuncs are a list of match
  392. // functions such that, if any function fails, the allocation is ignored.
  393. // ShareFuncs are a list of match functions such that, if any function
  394. // succeeds, the allocation is marked as a shared resource. ShareIdle is a
  395. // simple flag for sharing idle resources.
  396. type AllocationAggregationOptions struct {
  397. FilterFuncs []AllocationMatchFunc
  398. SplitIdle bool
  399. MergeUnallocated bool
  400. ShareFuncs []AllocationMatchFunc
  401. ShareIdle string
  402. ShareSplit string
  403. SharedHourlyCosts map[string]float64
  404. }
  405. // AggregateBy aggregates the Allocations in the given AllocationSet by the given
  406. // Property. This will only be legal if the AllocationSet is divisible by the
  407. // given Property; e.g. Containers can be divided by Namespace, but not vice-a-versa.
  408. func (as *AllocationSet) AggregateBy(properties Properties, options *AllocationAggregationOptions) error {
  409. // The order of operations for aggregating allocations is as follows:
  410. // 1. Partition external, idle, and shared allocations into separate sets
  411. // 2. Compute idle coefficients (if necessary)
  412. // a) if idle allocation is to be shared, compute idle coefficients
  413. // (do not compute shared coefficients here, see step 5)
  414. // b) if idle allocation is NOT shared, but filters are present, compute
  415. // idle filtration coefficients for the purpose of only returning the
  416. // portion of idle allocation that would have been shared with the
  417. // unfiltered results set. (See unit tests 5.a,b,c)
  418. // 3. Ignore allocation if it fails any of the FilterFuncs
  419. // 4. Distribute idle allocations among remaining non-idle, non-external
  420. // allocations
  421. // 5. Generate aggregation key and insert allocation into the output set
  422. // 6. Scale un-aggregated idle coefficients by filtration coefficient
  423. // 7. If there are shared allocations, compute sharing coefficients on
  424. // the aggregated set, then share allocation accordingly
  425. // 8. If there are external allocations that can be aggregated into
  426. // the output (i.e. they can be used to generate a valid key for
  427. // the given properties) then aggregate; otherwise... ignore them?
  428. // 9. If the merge idle option is enabled, merge any remaining idle
  429. // allocations into a single idle allocation
  430. // TODO niko/etl revisit (ShareIdle: ShareEven) case, which is probably wrong
  431. // (and, frankly, ill-defined; i.e. evenly across clusters? within clusters?)
  432. if options == nil {
  433. options = &AllocationAggregationOptions{}
  434. }
  435. if as.IsEmpty() {
  436. return nil
  437. }
  438. // aggSet will collect the aggregated allocations
  439. aggSet := &AllocationSet{
  440. Window: as.Window.Clone(),
  441. }
  442. // externalSet will collect external allocations
  443. externalSet := &AllocationSet{
  444. Window: as.Window.Clone(),
  445. }
  446. // idleSet will be shared among aggSet after initial aggregation
  447. // is complete
  448. idleSet := &AllocationSet{
  449. Window: as.Window.Clone(),
  450. }
  451. // shareSet will be shared among aggSet after initial aggregation
  452. // is complete
  453. shareSet := &AllocationSet{
  454. Window: as.Window.Clone(),
  455. }
  456. // Convert SharedHourlyCosts to Allocations in the shareSet
  457. for name, cost := range options.SharedHourlyCosts {
  458. if cost > 0.0 {
  459. hours := as.Resolution().Hours()
  460. // If set ends in the future, adjust hours accordingly
  461. diff := time.Now().Sub(as.End())
  462. if diff < 0.0 {
  463. hours += diff.Hours()
  464. }
  465. totalSharedCost := cost * hours
  466. shareSet.Insert(&Allocation{
  467. Name: fmt.Sprintf("%s/%s", name, SharedSuffix),
  468. Start: as.Start(),
  469. End: as.End(),
  470. SharedCost: totalSharedCost,
  471. TotalCost: totalSharedCost,
  472. })
  473. }
  474. }
  475. as.Lock()
  476. defer as.Unlock()
  477. // (1) Loop and find all of the external, idle, and shared allocations. Add
  478. // them to their respective sets, removing them from the set of allocations
  479. // to aggregate.
  480. for _, alloc := range as.allocations {
  481. // External allocations get aggregated post-hoc (see step 6) and do
  482. // not necessarily contain complete sets of properties, so they are
  483. // moved to a separate AllocationSet.
  484. if alloc.IsExternal() {
  485. delete(as.externalKeys, alloc.Name)
  486. delete(as.allocations, alloc.Name)
  487. externalSet.Insert(alloc)
  488. continue
  489. }
  490. cluster, err := alloc.Properties.GetCluster()
  491. if err != nil {
  492. log.Warningf("AllocationSet.AggregateBy: missing cluster for allocation: %s", alloc.Name)
  493. return err
  494. }
  495. // Idle allocations should be separated into idleSet if they are to be
  496. // shared later on. If they are not to be shared, then aggregate them.
  497. if alloc.IsIdle() {
  498. delete(as.idleKeys, alloc.Name)
  499. delete(as.allocations, alloc.Name)
  500. if options.ShareIdle == ShareEven || options.ShareIdle == ShareWeighted {
  501. idleSet.Insert(alloc)
  502. } else {
  503. aggSet.Insert(alloc)
  504. }
  505. continue
  506. }
  507. // Shared allocations must be identified and separated prior to
  508. // aggregation and filtering. That is, if any of the ShareFuncs
  509. // return true, then move the allocation to shareSet.
  510. for _, sf := range options.ShareFuncs {
  511. if sf(alloc) {
  512. delete(as.idleKeys, alloc.Name)
  513. delete(as.allocations, alloc.Name)
  514. alloc.Name = fmt.Sprintf("%s/%s", cluster, SharedSuffix)
  515. shareSet.Insert(alloc)
  516. break
  517. }
  518. }
  519. }
  520. // It's possible that no more un-shared, non-idle, non-external allocations
  521. // remain at this point. This always results in an emptySet.
  522. if len(as.allocations) == 0 {
  523. log.Warningf("ETL: AggregateBy: no allocations to aggregate")
  524. emptySet := &AllocationSet{
  525. Window: as.Window.Clone(),
  526. }
  527. as.allocations = emptySet.allocations
  528. return nil
  529. }
  530. // (2) In order to correctly apply idle and shared resource coefficients
  531. // appropriately, we need to determine the coefficients for the full set
  532. // of data. The ensures that the ratios are maintained through filtering.
  533. // idleCoefficients are organized by [cluster][allocation][resource]=coeff
  534. var idleCoefficients map[string]map[string]map[string]float64
  535. // shareCoefficients are organized by [allocation][resource]=coeff (no cluster)
  536. var shareCoefficients map[string]float64
  537. var err error
  538. // (2a) If there are idle costs and we intend to share them, compute the
  539. // coefficients for sharing the cost among the non-idle, non-aggregated
  540. // allocations.
  541. if idleSet.Length() > 0 && options.ShareIdle != ShareNone {
  542. idleCoefficients, err = computeIdleCoeffs(properties, options, as)
  543. if err != nil {
  544. log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
  545. return fmt.Errorf("error computing idle coefficients: %s", err)
  546. }
  547. }
  548. // (2b) If we're not sharing idle and we're filtering, we need to track the
  549. // amount of each idle allocation to "delete" in order to maintain parity
  550. // with the idle-allocated results. That is, we want to return only the
  551. // idle cost that would have been shared with the unfiltered portion of
  552. // the results, not the full idle cost.
  553. var idleFiltrationCoefficients map[string]map[string]map[string]float64
  554. if len(options.FilterFuncs) > 0 && options.ShareIdle == ShareNone {
  555. idleFiltrationCoefficients, err = computeIdleCoeffs(properties, options, as)
  556. if err != nil {
  557. log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
  558. return fmt.Errorf("error computing idle filtration coefficients: %s", err)
  559. }
  560. }
  561. // (3-5) Filter, distribute idle cost, and aggregate (in that order)
  562. for _, alloc := range as.allocations {
  563. cluster, err := alloc.Properties.GetCluster()
  564. if err != nil {
  565. log.Warningf("AllocationSet.AggregateBy: missing cluster for allocation: %s", alloc.Name)
  566. return err
  567. }
  568. skip := false
  569. // (3) If any of the filter funcs fail, immediately skip the allocation.
  570. for _, ff := range options.FilterFuncs {
  571. if !ff(alloc) {
  572. skip = true
  573. break
  574. }
  575. }
  576. if skip {
  577. // If we are tracking idle filtration coefficients, delete the
  578. // entry corresponding to the filtered allocation. (Deleting the
  579. // entry will result in that proportional amount being removed
  580. // from the idle allocation at the end of the process.)
  581. if idleFiltrationCoefficients != nil {
  582. if ifcc, ok := idleFiltrationCoefficients[cluster]; ok {
  583. delete(ifcc, alloc.Name)
  584. }
  585. }
  586. continue
  587. }
  588. // (4) Split idle allocations and distribute among remaining
  589. // un-aggregated allocations.
  590. // NOTE: if idle allocation is off (i.e. ShareIdle == ShareNone) then
  591. // all idle allocations will be in the aggSet at this point, so idleSet
  592. // will be empty and we won't enter this block.
  593. if idleSet.Length() > 0 {
  594. // Distribute idle allocations by coefficient per-cluster, per-allocation
  595. for _, idleAlloc := range idleSet.allocations {
  596. // Only share idle if the cluster matches; i.e. the allocation
  597. // is from the same cluster as the idle costs
  598. idleCluster, err := idleAlloc.Properties.GetCluster()
  599. if err != nil {
  600. return err
  601. }
  602. if idleCluster != cluster {
  603. continue
  604. }
  605. // Make sure idle coefficients exist
  606. if _, ok := idleCoefficients[cluster]; !ok {
  607. log.Errorf("ETL: share (idle) allocation: error getting allocation coefficient [no cluster: '%s' in coefficients] for '%s'", cluster, alloc.Name)
  608. continue
  609. }
  610. if _, ok := idleCoefficients[cluster][alloc.Name]; !ok {
  611. log.Errorf("ETL: share (idle) allocation: error getting allocation coefficienct for '%s'", alloc.Name)
  612. continue
  613. }
  614. alloc.CPUCoreHours += idleAlloc.CPUCoreHours * idleCoefficients[cluster][alloc.Name]["cpu"]
  615. alloc.GPUHours += idleAlloc.GPUHours * idleCoefficients[cluster][alloc.Name]["gpu"]
  616. alloc.RAMByteHours += idleAlloc.RAMByteHours * idleCoefficients[cluster][alloc.Name]["ram"]
  617. idleCPUCost := idleAlloc.CPUCost * idleCoefficients[cluster][alloc.Name]["cpu"]
  618. idleGPUCost := idleAlloc.GPUCost * idleCoefficients[cluster][alloc.Name]["gpu"]
  619. idleRAMCost := idleAlloc.RAMCost * idleCoefficients[cluster][alloc.Name]["ram"]
  620. alloc.CPUCost += idleCPUCost
  621. alloc.GPUCost += idleGPUCost
  622. alloc.RAMCost += idleRAMCost
  623. alloc.TotalCost += idleCPUCost + idleGPUCost + idleRAMCost
  624. }
  625. }
  626. // (5) generate key to use for aggregation-by-key and allocation name
  627. key, err := alloc.generateKey(properties)
  628. if err != nil {
  629. return err
  630. }
  631. alloc.Name = key
  632. if options.MergeUnallocated && alloc.IsUnallocated() {
  633. alloc.Name = UnallocatedSuffix
  634. }
  635. // Inserting the allocation with the generated key for a name will
  636. // perform the actual basic aggregation step.
  637. aggSet.Insert(alloc)
  638. }
  639. // clusterIdleFiltrationCoeffs is used to track per-resource idle
  640. // coefficients on a cluster-by-cluster basis. It is, essentailly, an
  641. // aggregation of idleFiltrationCoefficients after they have been
  642. // filtered above (in step 3)
  643. var clusterIdleFiltrationCoeffs map[string]map[string]float64
  644. if idleFiltrationCoefficients != nil {
  645. clusterIdleFiltrationCoeffs = map[string]map[string]float64{}
  646. for cluster, m := range idleFiltrationCoefficients {
  647. if _, ok := clusterIdleFiltrationCoeffs[cluster]; !ok {
  648. clusterIdleFiltrationCoeffs[cluster] = map[string]float64{
  649. "cpu": 0.0,
  650. "gpu": 0.0,
  651. "ram": 0.0,
  652. }
  653. }
  654. for _, n := range m {
  655. for resource, val := range n {
  656. clusterIdleFiltrationCoeffs[cluster][resource] += val
  657. }
  658. }
  659. }
  660. }
  661. // (6) If we have both un-shared idle allocations and idle filtration
  662. // coefficients (i.e. we have computed coefficients for scaling idle
  663. // allocation costs by cluster) then use those coefficients to scale down
  664. // each idle allocation.
  665. if len(aggSet.idleKeys) > 0 && clusterIdleFiltrationCoeffs != nil {
  666. for idleKey := range aggSet.idleKeys {
  667. idleAlloc := aggSet.Get(idleKey)
  668. cluster, err := idleAlloc.Properties.GetCluster()
  669. if err != nil {
  670. log.Warningf("AggregateBy: idle allocation without cluster: %s", idleAlloc)
  671. }
  672. if resourceCoeffs, ok := clusterIdleFiltrationCoeffs[cluster]; ok {
  673. idleAlloc.CPUCost *= resourceCoeffs["cpu"]
  674. idleAlloc.CPUCoreHours *= resourceCoeffs["cpu"]
  675. idleAlloc.RAMCost *= resourceCoeffs["ram"]
  676. idleAlloc.RAMByteHours *= resourceCoeffs["ram"]
  677. idleAlloc.TotalCost = idleAlloc.CPUCost + idleAlloc.RAMCost
  678. }
  679. }
  680. }
  681. // (7) Split shared allocations and distribute among aggregated allocations
  682. if shareSet.Length() > 0 {
  683. shareCoefficients, err = computeShareCoeffs(properties, options, aggSet)
  684. if err != nil {
  685. log.Warningf("AllocationSet.AggregateBy: compute shared coeff: missing cluster ID: %s", err)
  686. return err
  687. }
  688. for _, alloc := range aggSet.allocations {
  689. if alloc.IsIdle() {
  690. // Skip idle allocations (they do not receive shared allocation)
  691. continue
  692. }
  693. // Distribute shared allocations by coefficient per-allocation
  694. // NOTE: share coefficients do not partition by cluster, like
  695. // idle coefficients do.
  696. for _, sharedAlloc := range shareSet.allocations {
  697. if _, ok := shareCoefficients[alloc.Name]; !ok {
  698. log.Errorf("ETL: share allocation: error getting allocation coefficienct for '%s'", alloc.Name)
  699. continue
  700. }
  701. alloc.SharedCost += sharedAlloc.TotalCost * shareCoefficients[alloc.Name]
  702. alloc.TotalCost += sharedAlloc.TotalCost * shareCoefficients[alloc.Name]
  703. }
  704. }
  705. }
  706. // (8) Aggregate external allocations into aggregated allocations. This may
  707. // not be possible for every external allocation, but attempt to find an
  708. // exact key match, given each external allocation's proerties, and
  709. // aggregate if an exact match is found.
  710. for _, alloc := range externalSet.allocations {
  711. key, err := alloc.generateKey(properties)
  712. if err != nil {
  713. continue
  714. }
  715. alloc.Name = key
  716. aggSet.Insert(alloc)
  717. }
  718. // (9) Combine all idle allocations into a single "__idle__" allocation
  719. if !options.SplitIdle {
  720. for _, idleAlloc := range aggSet.IdleAllocations() {
  721. aggSet.Delete(idleAlloc.Name)
  722. idleAlloc.Name = IdleSuffix
  723. aggSet.Insert(idleAlloc)
  724. }
  725. }
  726. as.allocations = aggSet.allocations
  727. return nil
  728. }
  729. // TODO niko/etl deprecate the use of a map of resources here, we only use totals
  730. func computeShareCoeffs(properties Properties, options *AllocationAggregationOptions, as *AllocationSet) (map[string]float64, error) {
  731. // Compute coeffs by totalling per-allocation, then dividing by the total.
  732. coeffs := map[string]float64{}
  733. // Compute totals for all allocations
  734. total := 0.0
  735. // ShareEven counts each aggregation with even weight, whereas ShareWeighted
  736. // counts each aggregation proportionally to its respective costs
  737. shareType := options.ShareSplit
  738. // Record allocation values first, then normalize by totals to get percentages
  739. for name, alloc := range as.allocations {
  740. if alloc.IsIdle() {
  741. // Skip idle allocations in coefficient calculation
  742. continue
  743. }
  744. if shareType == ShareEven {
  745. // Not additive - set to 1.0 for even distribution
  746. coeffs[name] = 1.0
  747. // Total is always additive
  748. total += 1.0
  749. } else {
  750. // Both are additive for weighted distribution
  751. coeffs[name] += alloc.TotalCost
  752. total += alloc.TotalCost
  753. }
  754. }
  755. // Normalize coefficients by totals
  756. for a := range coeffs {
  757. if coeffs[a] > 0 && total > 0 {
  758. coeffs[a] /= total
  759. } else {
  760. log.Warningf("ETL: invalid values for shared coefficients: %d, %d", coeffs[a], total)
  761. coeffs[a] = 0.0
  762. }
  763. }
  764. return coeffs, nil
  765. }
  766. func computeIdleCoeffs(properties Properties, options *AllocationAggregationOptions, as *AllocationSet) (map[string]map[string]map[string]float64, error) {
  767. types := []string{"cpu", "gpu", "ram"}
  768. // Compute idle coefficients, then save them in AllocationAggregationOptions
  769. coeffs := map[string]map[string]map[string]float64{}
  770. // Compute totals per resource for CPU, GPU, RAM, and PV
  771. totals := map[string]map[string]float64{}
  772. // ShareEven counts each allocation with even weight, whereas ShareWeighted
  773. // counts each allocation proportionally to its respective costs
  774. shareType := options.ShareIdle
  775. // Record allocation values first, then normalize by totals to get percentages
  776. for _, alloc := range as.allocations {
  777. if alloc.IsIdle() {
  778. // Skip idle allocations in coefficient calculation
  779. continue
  780. }
  781. // If any of the share funcs succeed, share the allocation. Do this
  782. // prior to filtering so that shared namespaces, etc do not get
  783. // filtered out before we have a chance to share them.
  784. skip := false
  785. for _, sf := range options.ShareFuncs {
  786. if sf(alloc) {
  787. skip = true
  788. break
  789. }
  790. }
  791. if skip {
  792. continue
  793. }
  794. // We need to key the allocations by cluster id
  795. clusterID, err := alloc.Properties.GetCluster()
  796. if err != nil {
  797. return nil, err
  798. }
  799. // get the name key for the allocation
  800. name := alloc.Name
  801. // Create cluster based tables if they don't exist
  802. if _, ok := coeffs[clusterID]; !ok {
  803. coeffs[clusterID] = map[string]map[string]float64{}
  804. }
  805. if _, ok := totals[clusterID]; !ok {
  806. totals[clusterID] = map[string]float64{}
  807. }
  808. if _, ok := coeffs[clusterID][name]; !ok {
  809. coeffs[clusterID][name] = map[string]float64{}
  810. }
  811. if shareType == ShareEven {
  812. for _, r := range types {
  813. // Not additive - hard set to 1.0
  814. coeffs[clusterID][name][r] = 1.0
  815. // totals are additive
  816. totals[clusterID][r] += 1.0
  817. }
  818. } else {
  819. coeffs[clusterID][name]["cpu"] += alloc.CPUCost
  820. coeffs[clusterID][name]["gpu"] += alloc.GPUCost
  821. coeffs[clusterID][name]["ram"] += alloc.RAMCost
  822. totals[clusterID]["cpu"] += alloc.CPUCost
  823. totals[clusterID]["gpu"] += alloc.GPUCost
  824. totals[clusterID]["ram"] += alloc.RAMCost
  825. }
  826. }
  827. // Normalize coefficients by totals
  828. for c := range coeffs {
  829. for a := range coeffs[c] {
  830. for _, r := range types {
  831. if coeffs[c][a][r] > 0 && totals[c][r] > 0 {
  832. coeffs[c][a][r] /= totals[c][r]
  833. }
  834. }
  835. }
  836. }
  837. return coeffs, nil
  838. }
  839. func (alloc *Allocation) generateKey(properties Properties) (string, error) {
  840. // Names will ultimately be joined into a single name, which uniquely
  841. // identifies allocations.
  842. names := []string{}
  843. if properties.HasCluster() {
  844. cluster, err := alloc.Properties.GetCluster()
  845. if err != nil {
  846. return "", err
  847. }
  848. names = append(names, cluster)
  849. }
  850. if properties.HasNode() {
  851. node, err := alloc.Properties.GetNode()
  852. if err != nil {
  853. return "", err
  854. }
  855. names = append(names, node)
  856. }
  857. if properties.HasNamespace() {
  858. namespace, err := alloc.Properties.GetNamespace()
  859. if err != nil {
  860. return "", err
  861. }
  862. names = append(names, namespace)
  863. }
  864. if properties.HasControllerKind() {
  865. controllerKind, err := alloc.Properties.GetControllerKind()
  866. if err != nil {
  867. // Indicate that allocation has no controller
  868. controllerKind = UnallocatedSuffix
  869. }
  870. if prop, _ := properties.GetControllerKind(); prop != "" && prop != controllerKind {
  871. // The allocation does not have the specified controller kind
  872. controllerKind = UnallocatedSuffix
  873. }
  874. names = append(names, controllerKind)
  875. }
  876. if properties.HasController() {
  877. if !properties.HasControllerKind() {
  878. controllerKind, err := alloc.Properties.GetControllerKind()
  879. if err == nil {
  880. names = append(names, controllerKind)
  881. }
  882. }
  883. controller, err := alloc.Properties.GetController()
  884. if err != nil {
  885. // Indicate that allocation has no controller
  886. controller = UnallocatedSuffix
  887. }
  888. names = append(names, controller)
  889. }
  890. if properties.HasPod() {
  891. pod, err := alloc.Properties.GetPod()
  892. if err != nil {
  893. return "", err
  894. }
  895. names = append(names, pod)
  896. }
  897. if properties.HasContainer() {
  898. container, err := alloc.Properties.GetContainer()
  899. if err != nil {
  900. return "", err
  901. }
  902. names = append(names, container)
  903. }
  904. if properties.HasService() {
  905. services, err := alloc.Properties.GetServices()
  906. if err != nil {
  907. // Indicate that allocation has no services
  908. names = append(names, UnallocatedSuffix)
  909. } else {
  910. // TODO niko/etl support multi-service aggregation
  911. if len(services) > 0 {
  912. for _, service := range services {
  913. names = append(names, service)
  914. break
  915. }
  916. } else {
  917. // Indicate that allocation has no services
  918. names = append(names, UnallocatedSuffix)
  919. }
  920. }
  921. }
  922. if properties.HasAnnotations() {
  923. annotations, err := alloc.Properties.GetAnnotations() // annotations that the individual allocation possesses
  924. if err != nil {
  925. // Indicate that allocation has no annotations
  926. names = append(names, UnallocatedSuffix)
  927. } else {
  928. annotationNames := []string{}
  929. aggAnnotations, err := properties.GetAnnotations() // potential annotations to aggregate on supplied by the API caller
  930. if err != nil {
  931. // We've already checked HasAnnotation, so this should never occur
  932. return "", err
  933. }
  934. // calvin - support multi-annotation aggregation
  935. for annotationName := range aggAnnotations {
  936. if val, ok := annotations[annotationName]; ok {
  937. annotationNames = append(annotationNames, fmt.Sprintf("%s=%s", annotationName, val))
  938. } else if indexOf(UnallocatedSuffix, annotationNames) == -1 { // if UnallocatedSuffix not already in names
  939. annotationNames = append(annotationNames, UnallocatedSuffix)
  940. }
  941. }
  942. // resolve arbitrary ordering. e.g., app=app0/env=env0 is the same agg as env=env0/app=app0
  943. if len(annotationNames) > 1 {
  944. sort.Strings(annotationNames)
  945. }
  946. unallocatedSuffixIndex := indexOf(UnallocatedSuffix, annotationNames)
  947. // suffix should be at index 0 if it exists b/c of underscores
  948. if unallocatedSuffixIndex != -1 {
  949. annotationNames = append(annotationNames[:unallocatedSuffixIndex], annotationNames[unallocatedSuffixIndex+1:]...)
  950. annotationNames = append(annotationNames, UnallocatedSuffix) // append to end
  951. }
  952. names = append(names, annotationNames...)
  953. }
  954. }
  955. if properties.HasLabel() {
  956. labels, err := alloc.Properties.GetLabels() // labels that the individual allocation possesses
  957. if err != nil {
  958. // Indicate that allocation has no labels
  959. names = append(names, UnallocatedSuffix)
  960. } else {
  961. labelNames := []string{}
  962. aggLabels, err := properties.GetLabels() // potential labels to aggregate on supplied by the API caller
  963. if err != nil {
  964. // We've already checked HasLabel, so this should never occur
  965. return "", err
  966. }
  967. // calvin - support multi-label aggregation
  968. for labelName := range aggLabels {
  969. if val, ok := labels[labelName]; ok {
  970. labelNames = append(labelNames, fmt.Sprintf("%s=%s", labelName, val))
  971. } else if indexOf(UnallocatedSuffix, labelNames) == -1 { // if UnallocatedSuffix not already in names
  972. labelNames = append(labelNames, UnallocatedSuffix)
  973. }
  974. }
  975. // resolve arbitrary ordering. e.g., app=app0/env=env0 is the same agg as env=env0/app=app0
  976. if len(labelNames) > 1 {
  977. sort.Strings(labelNames)
  978. }
  979. unallocatedSuffixIndex := indexOf(UnallocatedSuffix, labelNames)
  980. // suffix should be at index 0 if it exists b/c of underscores
  981. if unallocatedSuffixIndex != -1 {
  982. labelNames = append(labelNames[:unallocatedSuffixIndex], labelNames[unallocatedSuffixIndex+1:]...)
  983. labelNames = append(labelNames, UnallocatedSuffix) // append to end
  984. }
  985. names = append(names, labelNames...)
  986. }
  987. }
  988. return strings.Join(names, "/"), nil
  989. }
  990. // TODO clean up
  991. // Helper function to check for slice membership. Not sure if repeated elsewhere in our codebase.
  992. func indexOf(v string, arr []string) int {
  993. for i, s := range arr {
  994. // This is caseless equivalence
  995. if strings.EqualFold(v, s) {
  996. return i
  997. }
  998. }
  999. return -1
  1000. }
  1001. // Clone returns a new AllocationSet with a deep copy of the given
  1002. // AllocationSet's allocations.
  1003. func (as *AllocationSet) Clone() *AllocationSet {
  1004. if as == nil {
  1005. return nil
  1006. }
  1007. as.RLock()
  1008. defer as.RUnlock()
  1009. allocs := map[string]*Allocation{}
  1010. for k, v := range as.allocations {
  1011. allocs[k] = v.Clone()
  1012. }
  1013. externalKeys := map[string]bool{}
  1014. for k, v := range as.externalKeys {
  1015. externalKeys[k] = v
  1016. }
  1017. idleKeys := map[string]bool{}
  1018. for k, v := range as.idleKeys {
  1019. idleKeys[k] = v
  1020. }
  1021. return &AllocationSet{
  1022. allocations: allocs,
  1023. externalKeys: externalKeys,
  1024. idleKeys: idleKeys,
  1025. Window: as.Window.Clone(),
  1026. }
  1027. }
  1028. // ComputeIdleAllocations computes the idle allocations for the AllocationSet,
  1029. // given a set of Assets. Ideally, assetSet should contain only Nodes, but if
  1030. // it contains other Assets, they will be ignored; only CPU, GPU and RAM are
  1031. // considered for idle allocation. If the Nodes have adjustments, then apply
  1032. // the adjustments proportionally to each of the resources so that total
  1033. // allocation with idle reflects the adjusted node costs. One idle allocation
  1034. // per-cluster will be computed and returned, keyed by cluster_id.
  1035. func (as *AllocationSet) ComputeIdleAllocations(assetSet *AssetSet) (map[string]*Allocation, error) {
  1036. if as == nil {
  1037. return nil, fmt.Errorf("cannot compute idle allocation for nil AllocationSet")
  1038. }
  1039. if assetSet == nil {
  1040. return nil, fmt.Errorf("cannot compute idle allocation with nil AssetSet")
  1041. }
  1042. if !as.Window.Equal(assetSet.Window) {
  1043. return nil, fmt.Errorf("cannot compute idle allocation for sets with mismatched windows: %s != %s", as.Window, assetSet.Window)
  1044. }
  1045. window := as.Window
  1046. // Build a map of cumulative cluster asset costs, per resource; i.e.
  1047. // cluster-to-{cpu|gpu|ram}-to-cost.
  1048. assetClusterResourceCosts := map[string]map[string]float64{}
  1049. assetSet.Each(func(key string, a Asset) {
  1050. if node, ok := a.(*Node); ok {
  1051. if _, ok := assetClusterResourceCosts[node.Properties().Cluster]; !ok {
  1052. assetClusterResourceCosts[node.Properties().Cluster] = map[string]float64{}
  1053. }
  1054. // adjustmentRate is used to scale resource costs proportionally
  1055. // by the adjustment. This is necessary because we only get one
  1056. // adjustment per Node, not one per-resource-per-Node.
  1057. //
  1058. // e.g. total cost = $90, adjustment = -$10 => 0.9
  1059. // e.g. total cost = $150, adjustment = -$300 => 0.3333
  1060. // e.g. total cost = $150, adjustment = $50 => 1.5
  1061. adjustmentRate := 1.0
  1062. if node.TotalCost()-node.Adjustment() == 0 {
  1063. // If (totalCost - adjustment) is 0.0 then adjustment cancels
  1064. // the entire node cost and we should make everything 0
  1065. // without dividing by 0.
  1066. adjustmentRate = 0.0
  1067. } else if node.Adjustment() != 0.0 {
  1068. // adjustmentRate is the ratio of cost-with-adjustment (i.e. TotalCost)
  1069. // to cost-without-adjustment (i.e. TotalCost - Adjustment).
  1070. adjustmentRate = node.TotalCost() / (node.TotalCost() - node.Adjustment())
  1071. }
  1072. cpuCost := node.CPUCost * (1.0 - node.Discount) * adjustmentRate
  1073. gpuCost := node.GPUCost * (1.0 - node.Discount) * adjustmentRate
  1074. ramCost := node.RAMCost * (1.0 - node.Discount) * adjustmentRate
  1075. assetClusterResourceCosts[node.Properties().Cluster]["cpu"] += cpuCost
  1076. assetClusterResourceCosts[node.Properties().Cluster]["gpu"] += gpuCost
  1077. assetClusterResourceCosts[node.Properties().Cluster]["ram"] += ramCost
  1078. }
  1079. })
  1080. // Determine start, end on a per-cluster basis
  1081. clusterStarts := map[string]time.Time{}
  1082. clusterEnds := map[string]time.Time{}
  1083. // Subtract allocated costs from asset costs, leaving only the remaining
  1084. // idle costs.
  1085. as.Each(func(name string, a *Allocation) {
  1086. cluster, err := a.Properties.GetCluster()
  1087. if err != nil {
  1088. // Failed to find allocation's cluster
  1089. return
  1090. }
  1091. if _, ok := assetClusterResourceCosts[cluster]; !ok {
  1092. // Failed to find assets for allocation's cluster
  1093. return
  1094. }
  1095. // Set cluster (start, end) if they are either not currently set,
  1096. // or if the detected (start, end) of the current allocation falls
  1097. // before or after, respectively, the current values.
  1098. if s, ok := clusterStarts[cluster]; !ok || a.Start.Before(s) {
  1099. clusterStarts[cluster] = a.Start
  1100. }
  1101. if e, ok := clusterEnds[cluster]; !ok || a.End.After(e) {
  1102. clusterEnds[cluster] = a.End
  1103. }
  1104. assetClusterResourceCosts[cluster]["cpu"] -= a.CPUCost
  1105. assetClusterResourceCosts[cluster]["gpu"] -= a.GPUCost
  1106. assetClusterResourceCosts[cluster]["ram"] -= a.RAMCost
  1107. })
  1108. // Turn remaining un-allocated asset costs into idle allocations
  1109. idleAllocs := map[string]*Allocation{}
  1110. for cluster, resources := range assetClusterResourceCosts {
  1111. // Default start and end to the (start, end) of the given window, but
  1112. // use the actual, detected (start, end) pair if they are available.
  1113. start := *window.Start()
  1114. if s, ok := clusterStarts[cluster]; ok && window.Contains(s) {
  1115. start = s
  1116. }
  1117. end := *window.End()
  1118. if e, ok := clusterEnds[cluster]; ok && window.Contains(e) {
  1119. end = e
  1120. }
  1121. idleAlloc := &Allocation{
  1122. Name: fmt.Sprintf("%s/%s", cluster, IdleSuffix),
  1123. Window: window.Clone(),
  1124. Properties: Properties{ClusterProp: cluster},
  1125. Start: start,
  1126. End: end,
  1127. CPUCost: resources["cpu"],
  1128. GPUCost: resources["gpu"],
  1129. RAMCost: resources["ram"],
  1130. }
  1131. idleAlloc.TotalCost = idleAlloc.CPUCost + idleAlloc.GPUCost + idleAlloc.RAMCost
  1132. // Do not continue if multiple idle allocations are computed for a
  1133. // single cluster.
  1134. if _, ok := idleAllocs[cluster]; ok {
  1135. return nil, fmt.Errorf("duplicate idle allocations for cluster %s", cluster)
  1136. }
  1137. idleAllocs[cluster] = idleAlloc
  1138. }
  1139. return idleAllocs, nil
  1140. }
  1141. // Delete removes the allocation with the given name from the set
  1142. func (as *AllocationSet) Delete(name string) {
  1143. if as == nil {
  1144. return
  1145. }
  1146. as.Lock()
  1147. defer as.Unlock()
  1148. delete(as.externalKeys, name)
  1149. delete(as.idleKeys, name)
  1150. delete(as.allocations, name)
  1151. }
  1152. // Each invokes the given function for each Allocation in the set
  1153. func (as *AllocationSet) Each(f func(string, *Allocation)) {
  1154. if as == nil {
  1155. return
  1156. }
  1157. for k, a := range as.allocations {
  1158. f(k, a)
  1159. }
  1160. }
  1161. // End returns the End time of the AllocationSet window
  1162. func (as *AllocationSet) End() time.Time {
  1163. if as == nil {
  1164. log.Warningf("Allocation ETL: calling End on nil AllocationSet")
  1165. return time.Unix(0, 0)
  1166. }
  1167. if as.Window.End() == nil {
  1168. log.Warningf("Allocation ETL: AllocationSet with illegal window: End is nil; len(as.allocations)=%d", len(as.allocations))
  1169. return time.Unix(0, 0)
  1170. }
  1171. return *as.Window.End()
  1172. }
  1173. // Get returns the Allocation at the given key in the AllocationSet
  1174. func (as *AllocationSet) Get(key string) *Allocation {
  1175. as.RLock()
  1176. defer as.RUnlock()
  1177. if alloc, ok := as.allocations[key]; ok {
  1178. return alloc
  1179. }
  1180. return nil
  1181. }
  1182. // ExternalAllocations returns a map of the external allocations in the set.
  1183. // Returns clones of the actual Allocations, so mutability is not a problem.
  1184. func (as *AllocationSet) ExternalAllocations() map[string]*Allocation {
  1185. externals := map[string]*Allocation{}
  1186. if as.IsEmpty() {
  1187. return externals
  1188. }
  1189. as.RLock()
  1190. defer as.RUnlock()
  1191. for key := range as.externalKeys {
  1192. if alloc, ok := as.allocations[key]; ok {
  1193. externals[key] = alloc.Clone()
  1194. }
  1195. }
  1196. return externals
  1197. }
  1198. // ExternalCost returns the total aggregated external costs of the set
  1199. func (as *AllocationSet) ExternalCost() float64 {
  1200. if as.IsEmpty() {
  1201. return 0.0
  1202. }
  1203. as.RLock()
  1204. defer as.RUnlock()
  1205. externalCost := 0.0
  1206. for _, alloc := range as.allocations {
  1207. externalCost += alloc.ExternalCost
  1208. }
  1209. return externalCost
  1210. }
  1211. // IdleAllocations returns a map of the idle allocations in the AllocationSet.
  1212. // Returns clones of the actual Allocations, so mutability is not a problem.
  1213. func (as *AllocationSet) IdleAllocations() map[string]*Allocation {
  1214. idles := map[string]*Allocation{}
  1215. if as.IsEmpty() {
  1216. return idles
  1217. }
  1218. as.RLock()
  1219. defer as.RUnlock()
  1220. for key := range as.idleKeys {
  1221. if alloc, ok := as.allocations[key]; ok {
  1222. idles[key] = alloc.Clone()
  1223. }
  1224. }
  1225. return idles
  1226. }
  1227. // Insert aggregates the current entry in the AllocationSet by the given Allocation,
  1228. // but only if the Allocation is valid, i.e. matches the AllocationSet's window. If
  1229. // there is no existing entry, one is created. Nil error response indicates success.
  1230. func (as *AllocationSet) Insert(that *Allocation) error {
  1231. return as.insert(that)
  1232. }
  1233. func (as *AllocationSet) insert(that *Allocation) error {
  1234. if as == nil {
  1235. return fmt.Errorf("cannot insert into nil AllocationSet")
  1236. }
  1237. as.Lock()
  1238. defer as.Unlock()
  1239. if as.allocations == nil {
  1240. as.allocations = map[string]*Allocation{}
  1241. }
  1242. if as.externalKeys == nil {
  1243. as.externalKeys = map[string]bool{}
  1244. }
  1245. if as.idleKeys == nil {
  1246. as.idleKeys = map[string]bool{}
  1247. }
  1248. // Add the given Allocation to the existing entry, if there is one;
  1249. // otherwise just set directly into allocations
  1250. if _, ok := as.allocations[that.Name]; !ok {
  1251. as.allocations[that.Name] = that
  1252. } else {
  1253. as.allocations[that.Name].add(that)
  1254. }
  1255. // If the given Allocation is an external one, record that
  1256. if that.IsExternal() {
  1257. as.externalKeys[that.Name] = true
  1258. }
  1259. // If the given Allocation is an idle one, record that
  1260. if that.IsIdle() {
  1261. as.idleKeys[that.Name] = true
  1262. }
  1263. return nil
  1264. }
  1265. // IsEmpty returns true if the AllocationSet is nil, or if it contains
  1266. // zero allocations.
  1267. func (as *AllocationSet) IsEmpty() bool {
  1268. if as == nil || len(as.allocations) == 0 {
  1269. return true
  1270. }
  1271. as.RLock()
  1272. defer as.RUnlock()
  1273. return as.allocations == nil || len(as.allocations) == 0
  1274. }
  1275. // Length returns the number of Allocations in the set
  1276. func (as *AllocationSet) Length() int {
  1277. if as == nil {
  1278. return 0
  1279. }
  1280. as.RLock()
  1281. defer as.RUnlock()
  1282. return len(as.allocations)
  1283. }
  1284. // Map clones and returns a map of the AllocationSet's Allocations
  1285. func (as *AllocationSet) Map() map[string]*Allocation {
  1286. if as.IsEmpty() {
  1287. return map[string]*Allocation{}
  1288. }
  1289. return as.Clone().allocations
  1290. }
  1291. // MarshalJSON JSON-encodes the AllocationSet
  1292. func (as *AllocationSet) MarshalJSON() ([]byte, error) {
  1293. as.RLock()
  1294. defer as.RUnlock()
  1295. return json.Marshal(as.allocations)
  1296. }
  1297. // Resolution returns the AllocationSet's window duration
  1298. func (as *AllocationSet) Resolution() time.Duration {
  1299. return as.Window.Duration()
  1300. }
  1301. // Set uses the given Allocation to overwrite the existing entry in the
  1302. // AllocationSet under the Allocation's name.
  1303. func (as *AllocationSet) Set(alloc *Allocation) error {
  1304. if as.IsEmpty() {
  1305. as.Lock()
  1306. as.allocations = map[string]*Allocation{}
  1307. as.externalKeys = map[string]bool{}
  1308. as.idleKeys = map[string]bool{}
  1309. as.Unlock()
  1310. }
  1311. as.Lock()
  1312. defer as.Unlock()
  1313. as.allocations[alloc.Name] = alloc
  1314. // If the given Allocation is an external one, record that
  1315. if alloc.IsExternal() {
  1316. as.externalKeys[alloc.Name] = true
  1317. }
  1318. // If the given Allocation is an idle one, record that
  1319. if alloc.IsIdle() {
  1320. as.idleKeys[alloc.Name] = true
  1321. }
  1322. return nil
  1323. }
  1324. // Start returns the Start time of the AllocationSet window
  1325. func (as *AllocationSet) Start() time.Time {
  1326. if as == nil {
  1327. log.Warningf("Allocation ETL: calling Start on nil AllocationSet")
  1328. return time.Unix(0, 0)
  1329. }
  1330. if as.Window.Start() == nil {
  1331. log.Warningf("Allocation ETL: AllocationSet with illegal window: Start is nil; len(as.allocations)=%d", len(as.allocations))
  1332. return time.Unix(0, 0)
  1333. }
  1334. return *as.Window.Start()
  1335. }
  1336. // String represents the given Allocation as a string
  1337. func (as *AllocationSet) String() string {
  1338. if as == nil {
  1339. return "<nil>"
  1340. }
  1341. return fmt.Sprintf("AllocationSet{length: %d; window: %s; totalCost: %.2f}",
  1342. as.Length(), as.Window, as.TotalCost())
  1343. }
  1344. // TotalCost returns the sum of all TotalCosts of the allocations contained
  1345. func (as *AllocationSet) TotalCost() float64 {
  1346. if as.IsEmpty() {
  1347. return 0.0
  1348. }
  1349. as.RLock()
  1350. defer as.RUnlock()
  1351. tc := 0.0
  1352. for _, a := range as.allocations {
  1353. tc += a.TotalCost
  1354. }
  1355. return tc
  1356. }
  1357. // UTCOffset returns the AllocationSet's configured UTCOffset.
  1358. func (as *AllocationSet) UTCOffset() time.Duration {
  1359. _, zone := as.Start().Zone()
  1360. return time.Duration(zone) * time.Second
  1361. }
  1362. func (as *AllocationSet) accumulate(that *AllocationSet) (*AllocationSet, error) {
  1363. if as.IsEmpty() {
  1364. return that, nil
  1365. }
  1366. if that.IsEmpty() {
  1367. return as, nil
  1368. }
  1369. // Set start, end to min(start), max(end)
  1370. start := as.Start()
  1371. end := as.End()
  1372. if that.Start().Before(start) {
  1373. start = that.Start()
  1374. }
  1375. if that.End().After(end) {
  1376. end = that.End()
  1377. }
  1378. acc := NewAllocationSet(start, end)
  1379. as.RLock()
  1380. defer as.RUnlock()
  1381. that.RLock()
  1382. defer that.RUnlock()
  1383. for _, alloc := range as.allocations {
  1384. err := acc.insert(alloc)
  1385. if err != nil {
  1386. return nil, err
  1387. }
  1388. }
  1389. for _, alloc := range that.allocations {
  1390. err := acc.insert(alloc)
  1391. if err != nil {
  1392. return nil, err
  1393. }
  1394. }
  1395. return acc, nil
  1396. }
  1397. // AllocationSetRange is a thread-safe slice of AllocationSets. It is meant to
  1398. // be used such that the AllocationSets held are consecutive and coherent with
  1399. // respect to using the same aggregation properties, UTC offset, and
  1400. // resolution. However these rules are not necessarily enforced, so use wisely.
  1401. type AllocationSetRange struct {
  1402. sync.RWMutex
  1403. allocations []*AllocationSet
  1404. }
  1405. // NewAllocationSetRange instantiates a new range composed of the given
  1406. // AllocationSets in the order provided.
  1407. func NewAllocationSetRange(allocs ...*AllocationSet) *AllocationSetRange {
  1408. return &AllocationSetRange{
  1409. allocations: allocs,
  1410. }
  1411. }
  1412. // Accumulate sums each AllocationSet in the given range, returning a single cumulative
  1413. // AllocationSet for the entire range.
  1414. func (asr *AllocationSetRange) Accumulate() (*AllocationSet, error) {
  1415. var allocSet *AllocationSet
  1416. var err error
  1417. asr.RLock()
  1418. defer asr.RUnlock()
  1419. for _, as := range asr.allocations {
  1420. allocSet, err = allocSet.accumulate(as)
  1421. if err != nil {
  1422. return nil, err
  1423. }
  1424. }
  1425. return allocSet, nil
  1426. }
  1427. // TODO niko/etl accumulate into lower-resolution chunks of the given resolution
  1428. // func (asr *AllocationSetRange) AccumulateBy(resolution time.Duration) *AllocationSetRange
  1429. // AggregateBy aggregates each AllocationSet in the range by the given
  1430. // properties and options.
  1431. func (asr *AllocationSetRange) AggregateBy(properties Properties, options *AllocationAggregationOptions) error {
  1432. aggRange := &AllocationSetRange{allocations: []*AllocationSet{}}
  1433. asr.Lock()
  1434. defer asr.Unlock()
  1435. for _, as := range asr.allocations {
  1436. err := as.AggregateBy(properties, options)
  1437. if err != nil {
  1438. return err
  1439. }
  1440. aggRange.allocations = append(aggRange.allocations, as)
  1441. }
  1442. asr.allocations = aggRange.allocations
  1443. return nil
  1444. }
  1445. // Append appends the given AllocationSet to the end of the range. It does not
  1446. // validate whether or not that violates window continuity.
  1447. func (asr *AllocationSetRange) Append(that *AllocationSet) {
  1448. asr.Lock()
  1449. defer asr.Unlock()
  1450. asr.allocations = append(asr.allocations, that)
  1451. }
  1452. // Each invokes the given function for each AllocationSet in the range
  1453. func (asr *AllocationSetRange) Each(f func(int, *AllocationSet)) {
  1454. if asr == nil {
  1455. return
  1456. }
  1457. for i, as := range asr.allocations {
  1458. f(i, as)
  1459. }
  1460. }
  1461. // Get retrieves the AllocationSet at the given index of the range.
  1462. func (asr *AllocationSetRange) Get(i int) (*AllocationSet, error) {
  1463. if i < 0 || i >= len(asr.allocations) {
  1464. return nil, fmt.Errorf("AllocationSetRange: index out of range: %d", i)
  1465. }
  1466. asr.RLock()
  1467. defer asr.RUnlock()
  1468. return asr.allocations[i], nil
  1469. }
  1470. // InsertRange merges the given AllocationSetRange into the receiving one by
  1471. // lining up sets with matching windows, then inserting each allocation from
  1472. // the given ASR into the respective set in the receiving ASR. If the given
  1473. // ASR contains an AllocationSet from a window that does not exist in the
  1474. // receiving ASR, then an error is returned. However, the given ASR does not
  1475. // need to cover the full range of the receiver.
  1476. func (asr *AllocationSetRange) InsertRange(that *AllocationSetRange) error {
  1477. if asr == nil {
  1478. return fmt.Errorf("cannot insert range into nil AllocationSetRange")
  1479. }
  1480. // keys maps window to index in asr
  1481. keys := map[string]int{}
  1482. asr.Each(func(i int, as *AllocationSet) {
  1483. if as == nil {
  1484. return
  1485. }
  1486. keys[as.Window.String()] = i
  1487. })
  1488. // Nothing to merge, so simply return
  1489. if len(keys) == 0 {
  1490. return nil
  1491. }
  1492. var err error
  1493. that.Each(func(j int, thatAS *AllocationSet) {
  1494. if thatAS == nil || err != nil {
  1495. return
  1496. }
  1497. // Find matching AllocationSet in asr
  1498. i, ok := keys[thatAS.Window.String()]
  1499. if !ok {
  1500. err = fmt.Errorf("cannot merge AllocationSet into window that does not exist: %s", thatAS.Window.String())
  1501. return
  1502. }
  1503. as, err := asr.Get(i)
  1504. if err != nil {
  1505. err = fmt.Errorf("AllocationSetRange index does not exist: %d", i)
  1506. return
  1507. }
  1508. // Insert each Allocation from the given set
  1509. thatAS.Each(func(k string, alloc *Allocation) {
  1510. err = as.Insert(alloc)
  1511. if err != nil {
  1512. err = fmt.Errorf("error inserting allocation: %s", err)
  1513. return
  1514. }
  1515. })
  1516. })
  1517. // err might be nil
  1518. return err
  1519. }
  1520. // Length returns the length of the range, which is zero if nil
  1521. func (asr *AllocationSetRange) Length() int {
  1522. if asr == nil || asr.allocations == nil {
  1523. return 0
  1524. }
  1525. asr.RLock()
  1526. defer asr.RUnlock()
  1527. return len(asr.allocations)
  1528. }
  1529. // MarshalJSON JSON-encodes the range
  1530. func (asr *AllocationSetRange) MarshalJSON() ([]byte, error) {
  1531. asr.RLock()
  1532. asr.RUnlock()
  1533. return json.Marshal(asr.allocations)
  1534. }
  1535. // Slice copies the underlying slice of AllocationSets, maintaining order,
  1536. // and returns the copied slice.
  1537. func (asr *AllocationSetRange) Slice() []*AllocationSet {
  1538. if asr == nil || asr.allocations == nil {
  1539. return nil
  1540. }
  1541. asr.RLock()
  1542. defer asr.RUnlock()
  1543. copy := []*AllocationSet{}
  1544. for _, as := range asr.allocations {
  1545. copy = append(copy, as.Clone())
  1546. }
  1547. return copy
  1548. }
  1549. // String represents the given AllocationSetRange as a string
  1550. func (asr *AllocationSetRange) String() string {
  1551. if asr == nil {
  1552. return "<nil>"
  1553. }
  1554. return fmt.Sprintf("AllocationSetRange{length: %d}", asr.Length())
  1555. }
  1556. // UTCOffset returns the detected UTCOffset of the AllocationSets within the
  1557. // range. Defaults to 0 if the range is nil or empty. Does not warn if there
  1558. // are sets with conflicting UTCOffsets (just returns the first).
  1559. func (asr *AllocationSetRange) UTCOffset() time.Duration {
  1560. if asr.Length() == 0 {
  1561. return 0
  1562. }
  1563. as, err := asr.Get(0)
  1564. if err != nil {
  1565. return 0
  1566. }
  1567. return as.UTCOffset()
  1568. }
  1569. // Window returns the full window that the AllocationSetRange spans, from the
  1570. // start of the first AllocationSet to the end of the last one.
  1571. func (asr *AllocationSetRange) Window() Window {
  1572. if asr == nil || asr.Length() == 0 {
  1573. return NewWindow(nil, nil)
  1574. }
  1575. start := asr.allocations[0].Start()
  1576. end := asr.allocations[asr.Length()-1].End()
  1577. return NewWindow(&start, &end)
  1578. }