allocation.go 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799
  1. package kubecost
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "sort"
  6. "strings"
  7. "sync"
  8. "time"
  9. "github.com/kubecost/cost-model/pkg/log"
  10. )
  11. // TODO Clean-up use of IsEmpty; nil checks should be separated for safety.
  12. // TODO Consider making Allocation an interface, which is fulfilled by structs
  13. // like KubernetesAllocation, IdleAllocation, and ExternalAllocation.
  14. // ExternalSuffix indicates an external allocation
  15. const ExternalSuffix = "__external__"
  16. // IdleSuffix indicates an idle allocation property
  17. const IdleSuffix = "__idle__"
  18. // SharedSuffix indicates an shared allocation property
  19. const SharedSuffix = "__shared__"
  20. // UnallocatedSuffix indicates an unallocated allocation property
  21. const UnallocatedSuffix = "__unallocated__"
  22. // ShareWeighted indicates that a shared resource should be shared as a
  23. // proportion of the cost of the remaining allocations.
  24. const ShareWeighted = "__weighted__"
  25. // ShareEven indicates that a shared resource should be shared evenly across
  26. // all remaining allocations.
  27. const ShareEven = "__even__"
  28. // ShareNone indicates that a shareable resource should not be shared
  29. const ShareNone = "__none__"
  30. // Allocation is a unit of resource allocation and cost for a given window
  31. // of time and for a given kubernetes construct with its associated set of
  32. // properties.
  33. type Allocation struct {
  34. Name string `json:"name"`
  35. Properties Properties `json:"properties,omitempty"`
  36. Window Window `json:"window"`
  37. Start time.Time `json:"start"`
  38. End time.Time `json:"end"`
  39. CPUCoreHours float64 `json:"cpuCoreHours"`
  40. CPUCost float64 `json:"cpuCost"`
  41. CPUEfficiency float64 `json:"cpuEfficiency"`
  42. CPURequestAvg float64 `json:"cpuRequestAverage"`
  43. CPUUsageAvg float64 `json:"cpuUsageAverage"`
  44. GPUHours float64 `json:"gpuHours"`
  45. GPUCost float64 `json:"gpuCost"`
  46. NetworkCost float64 `json:"networkCost"`
  47. PVByteHours float64 `json:"pvByteHours"`
  48. PVCost float64 `json:"pvCost"`
  49. RAMByteHours float64 `json:"ramByteHours"`
  50. RAMCost float64 `json:"ramCost"`
  51. RAMEfficiency float64 `json:"ramEfficiency"`
  52. RAMRequestAvg float64 `json:"ramRequestAverage"`
  53. RAMUsageAvg float64 `json:"ramUsageAverage"`
  54. SharedCost float64 `json:"sharedCost"`
  55. ExternalCost float64 `json:"externalCost"`
  56. TotalCost float64 `json:"totalCost"`
  57. TotalEfficiency float64 `json:"totalEfficiency"`
  58. }
  59. // AllocationMatchFunc is a function that can be used to match Allocations by
  60. // returning true for any given Allocation if a condition is met.
  61. type AllocationMatchFunc func(*Allocation) bool
  62. // Add returns the result of summing the two given Allocations, which sums the
  63. // summary fields (e.g. costs, resources) and recomputes efficiency. Neither of
  64. // the two original Allocations are mutated in the process.
  65. func (a *Allocation) Add(that *Allocation) (*Allocation, error) {
  66. if a == nil {
  67. return that.Clone(), nil
  68. }
  69. if that == nil {
  70. return a.Clone(), nil
  71. }
  72. // Note: no need to clone "that", as add only mutates the receiver
  73. agg := a.Clone()
  74. agg.add(that)
  75. return agg, nil
  76. }
  77. // Clone returns a deep copy of the given Allocation
  78. func (a *Allocation) Clone() *Allocation {
  79. if a == nil {
  80. return nil
  81. }
  82. return &Allocation{
  83. Name: a.Name,
  84. Properties: a.Properties.Clone(),
  85. Window: a.Window.Clone(),
  86. Start: a.Start,
  87. End: a.End,
  88. CPUCoreHours: a.CPUCoreHours,
  89. CPUCost: a.CPUCost,
  90. CPUEfficiency: a.CPUEfficiency,
  91. GPUHours: a.GPUHours,
  92. GPUCost: a.GPUCost,
  93. NetworkCost: a.NetworkCost,
  94. PVByteHours: a.PVByteHours,
  95. PVCost: a.PVCost,
  96. RAMByteHours: a.RAMByteHours,
  97. RAMCost: a.RAMCost,
  98. RAMEfficiency: a.RAMEfficiency,
  99. SharedCost: a.SharedCost,
  100. ExternalCost: a.ExternalCost,
  101. TotalCost: a.TotalCost,
  102. TotalEfficiency: a.TotalEfficiency,
  103. }
  104. }
  105. // Equal returns true if the values held in the given Allocation precisely
  106. // match those of the receiving Allocation. nil does not match nil.
  107. func (a *Allocation) Equal(that *Allocation) bool {
  108. if a == nil || that == nil {
  109. return false
  110. }
  111. if a.Name != that.Name {
  112. return false
  113. }
  114. if !a.Properties.Equal(&that.Properties) {
  115. return false
  116. }
  117. if !a.Window.Equal(that.Window) {
  118. return false
  119. }
  120. if !a.Start.Equal(that.Start) {
  121. return false
  122. }
  123. if !a.End.Equal(that.End) {
  124. return false
  125. }
  126. if a.CPUCoreHours != that.CPUCoreHours {
  127. return false
  128. }
  129. if a.CPUCost != that.CPUCost {
  130. return false
  131. }
  132. if a.CPUEfficiency != that.CPUEfficiency {
  133. return false
  134. }
  135. if a.GPUHours != that.GPUHours {
  136. return false
  137. }
  138. if a.GPUCost != that.GPUCost {
  139. return false
  140. }
  141. if a.NetworkCost != that.NetworkCost {
  142. return false
  143. }
  144. if a.PVByteHours != that.PVByteHours {
  145. return false
  146. }
  147. if a.PVCost != that.PVCost {
  148. return false
  149. }
  150. if a.RAMByteHours != that.RAMByteHours {
  151. return false
  152. }
  153. if a.RAMCost != that.RAMCost {
  154. return false
  155. }
  156. if a.RAMEfficiency != that.RAMEfficiency {
  157. return false
  158. }
  159. if a.SharedCost != that.SharedCost {
  160. return false
  161. }
  162. if a.ExternalCost != that.ExternalCost {
  163. return false
  164. }
  165. if a.TotalCost != that.TotalCost {
  166. return false
  167. }
  168. if a.TotalEfficiency != that.TotalEfficiency {
  169. return false
  170. }
  171. return true
  172. }
  173. // CPUCores converts the Allocation's CPUCoreHours into average CPUCores
  174. func (a *Allocation) CPUCores() float64 {
  175. if a.Minutes() <= 0.0 {
  176. return 0.0
  177. }
  178. return a.CPUCoreHours / (a.Minutes() / 60.0)
  179. }
  180. // RAMBytes converts the Allocation's RAMByteHours into average RAMBytes
  181. func (a *Allocation) RAMBytes() float64 {
  182. if a.Minutes() <= 0.0 {
  183. return 0.0
  184. }
  185. return a.RAMByteHours / (a.Minutes() / 60.0)
  186. }
  187. // PVBytes converts the Allocation's PVByteHours into average PVBytes
  188. func (a *Allocation) PVBytes() float64 {
  189. if a.Minutes() <= 0.0 {
  190. return 0.0
  191. }
  192. return a.PVByteHours / (a.Minutes() / 60.0)
  193. }
  194. // Resolution returns the duration of time covered by the Allocation
  195. func (a *Allocation) Resolution() time.Duration {
  196. return a.End.Sub(a.Start)
  197. }
  198. // IsAggregated is true if the given Allocation has been aggregated, which we
  199. // define by a lack of Properties.
  200. func (a *Allocation) IsAggregated() bool {
  201. return a == nil || a.Properties == nil
  202. }
  203. // IsExternal is true if the given Allocation represents external costs.
  204. func (a *Allocation) IsExternal() bool {
  205. return strings.Contains(a.Name, ExternalSuffix)
  206. }
  207. // IsIdle is true if the given Allocation represents idle costs.
  208. func (a *Allocation) IsIdle() bool {
  209. return strings.Contains(a.Name, IdleSuffix)
  210. }
  211. // IsUnallocated is true if the given Allocation represents unallocated costs.
  212. func (a *Allocation) IsUnallocated() bool {
  213. return strings.Contains(a.Name, UnallocatedSuffix)
  214. }
  215. // Minutes returns the number of minutes the Allocation represents, as defined
  216. // by the difference between the end and start times.
  217. func (a *Allocation) Minutes() float64 {
  218. return a.End.Sub(a.Start).Minutes()
  219. }
  220. // Share works like Add, but converts the entire cost of the given Allocation
  221. // to SharedCost, rather than adding to the individual resource costs.
  222. // TODO niko/cdmr unit test changes!!!
  223. func (a *Allocation) Share(that *Allocation) (*Allocation, error) {
  224. if that == nil {
  225. return a.Clone(), nil
  226. }
  227. // Convert all costs of shared Allocation to SharedCost, zero out all
  228. // non-shared costs, then add.
  229. share := that.Clone()
  230. share.SharedCost += share.TotalCost
  231. share.TotalEfficiency = 1.0
  232. share.CPUCost = 0
  233. share.CPUCoreHours = 0
  234. share.CPUEfficiency = 0
  235. share.RAMCost = 0
  236. share.RAMByteHours = 0
  237. share.RAMEfficiency = 0
  238. share.GPUCost = 0
  239. share.GPUHours = 0
  240. share.PVCost = 0
  241. share.PVByteHours = 0
  242. share.NetworkCost = 0
  243. share.ExternalCost = 0
  244. if a == nil {
  245. return share, nil
  246. }
  247. agg := a.Clone()
  248. agg.add(that)
  249. return agg, nil
  250. }
  251. // String represents the given Allocation as a string
  252. func (a *Allocation) String() string {
  253. return fmt.Sprintf("%s%s=%.2f", a.Name, NewWindow(&a.Start, &a.End), a.TotalCost)
  254. }
  255. func (a *Allocation) add(that *Allocation) {
  256. if a == nil {
  257. log.Warningf("Allocation.AggregateBy: trying to add a nil receiver")
  258. return
  259. }
  260. aCluster, _ := a.Properties.GetCluster()
  261. thatCluster, _ := that.Properties.GetCluster()
  262. aNode, _ := a.Properties.GetNode()
  263. thatNode, _ := that.Properties.GetNode()
  264. // reset properties
  265. a.Properties = nil
  266. // ensure that we carry cluster ID and/or node over if they're the same
  267. // required for idle/shared cost allocation
  268. if aCluster == thatCluster {
  269. a.Properties = Properties{ClusterProp: aCluster}
  270. }
  271. if aNode == thatNode {
  272. if a.Properties == nil {
  273. a.Properties = Properties{NodeProp: aNode}
  274. } else {
  275. a.Properties.SetNode(aNode)
  276. }
  277. }
  278. // Expand Window, Start, and End to be the "max" of each between the two
  279. // given Allocations.
  280. a.Window = a.Window.Expand(that.Window)
  281. if that.Start.Before(a.Start) {
  282. a.Start = that.Start
  283. }
  284. if that.End.Before(a.End) {
  285. a.End = that.End
  286. }
  287. // Note: efficiency numbers are computed the cost-weighted sum of each
  288. // Allocation's efficiency.
  289. // e.g. ($10 @ 25%) + ($10 @ 75%) = (2.5+7.5)/20 = 50%
  290. // e.g. ($90 @ 10%) + ($10 @ 100%) = (9.0+10.0)/100 = 19%
  291. // e.g. ($100 @ 0%) + ($100 @ 0%) = (0.0+0.0)/200 = 0%
  292. // e.g. ($10 @ 150%) + ($10 @ 50%) = (15.0+5.0)/20 = 100%
  293. // e.g. ($0 @ 100%) + ($0 @ 50%) = 0% (no div by 0)
  294. // Compute CPU efficiency (see note above for methodology)
  295. aggCPUCost := a.CPUCost + that.CPUCost
  296. if aggCPUCost > 0 {
  297. a.CPUEfficiency = (a.CPUEfficiency*a.CPUCost + that.CPUEfficiency*that.CPUCost) / aggCPUCost
  298. } else {
  299. a.CPUEfficiency = 0.0
  300. }
  301. // Compute RAM efficiency (see note above for methodology)
  302. aggRAMCost := a.RAMCost + that.RAMCost
  303. if aggRAMCost > 0 {
  304. a.RAMEfficiency = (a.RAMEfficiency*a.RAMCost + that.RAMEfficiency*that.RAMCost) / aggRAMCost
  305. } else {
  306. a.RAMEfficiency = 0.0
  307. }
  308. // Compute total efficiency (see note above for methodology)
  309. aggTotalCost := a.TotalCost + that.TotalCost
  310. if aggTotalCost > 0 {
  311. a.TotalEfficiency = (a.TotalEfficiency*a.TotalCost + that.TotalEfficiency*that.TotalCost) / aggTotalCost
  312. } else {
  313. aggTotalCost = 0.0
  314. }
  315. // Sum all cumulative resource fields
  316. a.CPUCoreHours += that.CPUCoreHours
  317. a.GPUHours += that.GPUHours
  318. a.RAMByteHours += that.RAMByteHours
  319. a.PVByteHours += that.PVByteHours
  320. // Sum all cumulative cost fields
  321. a.CPUCost += that.CPUCost
  322. a.GPUCost += that.GPUCost
  323. a.RAMCost += that.RAMCost
  324. a.PVCost += that.PVCost
  325. a.NetworkCost += that.NetworkCost
  326. a.SharedCost += that.SharedCost
  327. a.ExternalCost += that.ExternalCost
  328. a.TotalCost += that.TotalCost
  329. }
  330. // AllocationSet stores a set of Allocations, each with a unique name, that share
  331. // a window. An AllocationSet is mutable, so treat it like a threadsafe map.
  332. type AllocationSet struct {
  333. sync.RWMutex
  334. allocations map[string]*Allocation
  335. externalKeys map[string]bool
  336. idleKeys map[string]bool
  337. Window Window
  338. Warnings []string
  339. Errors []string
  340. }
  341. // NewAllocationSet instantiates a new AllocationSet and, optionally, inserts
  342. // the given list of Allocations
  343. func NewAllocationSet(start, end time.Time, allocs ...*Allocation) *AllocationSet {
  344. as := &AllocationSet{
  345. allocations: map[string]*Allocation{},
  346. externalKeys: map[string]bool{},
  347. idleKeys: map[string]bool{},
  348. Window: NewWindow(&start, &end),
  349. }
  350. for _, a := range allocs {
  351. as.Insert(a)
  352. }
  353. return as
  354. }
  355. // AllocationAggregationOptions provide advanced functionality to AggregateBy, including
  356. // filtering results and sharing allocations. FilterFuncs are a list of match
  357. // functions such that, if any function fails, the allocation is ignored.
  358. // ShareFuncs are a list of match functions such that, if any function
  359. // succeeds, the allocation is marked as a shared resource. ShareIdle is a
  360. // simple flag for sharing idle resources.
  361. type AllocationAggregationOptions struct {
  362. FilterFuncs []AllocationMatchFunc
  363. SplitIdle bool
  364. MergeUnallocated bool
  365. ShareFuncs []AllocationMatchFunc
  366. ShareIdle string
  367. ShareSplit string
  368. SharedHourlyCosts map[string]float64
  369. }
  370. // AggregateBy aggregates the Allocations in the given AllocationSet by the given
  371. // Property. This will only be legal if the AllocationSet is divisible by the
  372. // given Property; e.g. Containers can be divided by Namespace, but not vice-a-versa.
  373. func (as *AllocationSet) AggregateBy(properties Properties, options *AllocationAggregationOptions) error {
  374. // The order of operations for aggregating allocations is as follows:
  375. // 1. Partition external, idle, and shared allocations into separate sets
  376. // 2. Compute idle coefficients (if necessary)
  377. // a) if idle allocation is to be shared, compute idle coefficients
  378. // (do not compute shared coefficients here, see step 5)
  379. // b) if idle allocation is NOT shared, but filters are present, compute
  380. // idle filtration coefficients for the purpose of only returning the
  381. // portion of idle allocation that would have been shared with the
  382. // unfiltered results set. (See unit tests 5.a,b,c)
  383. // 3. Ignore allocation if it fails any of the FilterFuncs
  384. // 4. Distribute idle allocations among remaining non-idle, non-external
  385. // allocations
  386. // 5. Generate aggregation key and insert allocation into the output set
  387. // 6. Scale un-aggregated idle coefficients by filtration coefficient
  388. // 7. If there are shared allocations, compute sharing coefficients on
  389. // the aggregated set, then share allocation accordingly
  390. // 8. If there are external allocations that can be aggregated into
  391. // the output (i.e. they can be used to generate a valid key for
  392. // the given properties) then aggregate; otherwise... ignore them?
  393. // 9. If the merge idle option is enabled, merge any remaining idle
  394. // allocations into a single idle allocation
  395. // TODO niko/etl revisit (ShareIdle: ShareEven) case, which is probably wrong
  396. // (and, frankly, ill-defined; i.e. evenly across clusters? within clusters?)
  397. if options == nil {
  398. options = &AllocationAggregationOptions{}
  399. }
  400. if as.IsEmpty() {
  401. return nil
  402. }
  403. // aggSet will collect the aggregated allocations
  404. aggSet := &AllocationSet{
  405. Window: as.Window.Clone(),
  406. }
  407. // externalSet will collect external allocations
  408. externalSet := &AllocationSet{
  409. Window: as.Window.Clone(),
  410. }
  411. // idleSet will be shared among aggSet after initial aggregation
  412. // is complete
  413. idleSet := &AllocationSet{
  414. Window: as.Window.Clone(),
  415. }
  416. // shareSet will be shared among aggSet after initial aggregation
  417. // is complete
  418. shareSet := &AllocationSet{
  419. Window: as.Window.Clone(),
  420. }
  421. // Convert SharedHourlyCosts to Allocations in the shareSet
  422. for name, cost := range options.SharedHourlyCosts {
  423. if cost > 0.0 {
  424. hours := as.Resolution().Hours()
  425. // If set ends in the future, adjust hours accordingly
  426. diff := time.Now().Sub(as.End())
  427. if diff < 0.0 {
  428. hours += diff.Hours()
  429. }
  430. totalSharedCost := cost * hours
  431. shareSet.Insert(&Allocation{
  432. Name: fmt.Sprintf("%s/%s", name, SharedSuffix),
  433. Start: as.Start(),
  434. End: as.End(),
  435. SharedCost: totalSharedCost,
  436. TotalCost: totalSharedCost,
  437. })
  438. }
  439. }
  440. as.Lock()
  441. defer as.Unlock()
  442. // (1) Loop and find all of the external, idle, and shared allocations. Add
  443. // them to their respective sets, removing them from the set of allocations
  444. // to aggregate.
  445. for _, alloc := range as.allocations {
  446. // External allocations get aggregated post-hoc (see step 6) and do
  447. // not necessarily contain complete sets of properties, so they are
  448. // moved to a separate AllocationSet.
  449. if alloc.IsExternal() {
  450. delete(as.externalKeys, alloc.Name)
  451. delete(as.allocations, alloc.Name)
  452. externalSet.Insert(alloc)
  453. continue
  454. }
  455. cluster, err := alloc.Properties.GetCluster()
  456. if err != nil {
  457. log.Warningf("AllocationSet.AggregateBy: missing cluster for allocation: %s", alloc.Name)
  458. return err
  459. }
  460. // Idle allocations should be separated into idleSet if they are to be
  461. // shared later on. If they are not to be shared, then aggregate them.
  462. if alloc.IsIdle() {
  463. delete(as.idleKeys, alloc.Name)
  464. delete(as.allocations, alloc.Name)
  465. if options.ShareIdle == ShareEven || options.ShareIdle == ShareWeighted {
  466. idleSet.Insert(alloc)
  467. } else {
  468. aggSet.Insert(alloc)
  469. }
  470. continue
  471. }
  472. // Shared allocations must be identified and separated prior to
  473. // aggregation and filtering. That is, if any of the ShareFuncs
  474. // return true, then move the allocation to shareSet.
  475. for _, sf := range options.ShareFuncs {
  476. if sf(alloc) {
  477. delete(as.idleKeys, alloc.Name)
  478. delete(as.allocations, alloc.Name)
  479. alloc.Name = fmt.Sprintf("%s/%s", cluster, SharedSuffix)
  480. shareSet.Insert(alloc)
  481. break
  482. }
  483. }
  484. }
  485. // It's possible that no more un-shared, non-idle, non-external allocations
  486. // remain at this point. This always results in an emptySet.
  487. if len(as.allocations) == 0 {
  488. log.Warningf("ETL: AggregateBy: no allocations to aggregate")
  489. emptySet := &AllocationSet{
  490. Window: as.Window.Clone(),
  491. }
  492. as.allocations = emptySet.allocations
  493. return nil
  494. }
  495. // (2) In order to correctly apply idle and shared resource coefficients
  496. // appropriately, we need to determine the coefficients for the full set
  497. // of data. The ensures that the ratios are maintained through filtering.
  498. // idleCoefficients are organized by [cluster][allocation][resource]=coeff
  499. var idleCoefficients map[string]map[string]map[string]float64
  500. // shareCoefficients are organized by [allocation][resource]=coeff (no cluster)
  501. var shareCoefficients map[string]float64
  502. var err error
  503. // (2a) If there are idle costs and we intend to share them, compute the
  504. // coefficients for sharing the cost among the non-idle, non-aggregated
  505. // allocations.
  506. if idleSet.Length() > 0 && options.ShareIdle != ShareNone {
  507. idleCoefficients, err = computeIdleCoeffs(properties, options, as)
  508. if err != nil {
  509. log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
  510. return fmt.Errorf("error computing idle coefficients: %s", err)
  511. }
  512. }
  513. // (2b) If we're not sharing idle and we're filtering, we need to track the
  514. // amount of each idle allocation to "delete" in order to maintain parity
  515. // with the idle-allocated results. That is, we want to return only the
  516. // idle cost that would have been shared with the unfiltered portion of
  517. // the results, not the full idle cost.
  518. var idleFiltrationCoefficients map[string]map[string]map[string]float64
  519. if len(options.FilterFuncs) > 0 && options.ShareIdle == ShareNone {
  520. idleFiltrationCoefficients, err = computeIdleCoeffs(properties, options, as)
  521. if err != nil {
  522. log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
  523. return fmt.Errorf("error computing idle filtration coefficients: %s", err)
  524. }
  525. }
  526. // (3-5) Filter, distribute idle cost, and aggregate (in that order)
  527. for _, alloc := range as.allocations {
  528. cluster, err := alloc.Properties.GetCluster()
  529. if err != nil {
  530. log.Warningf("AllocationSet.AggregateBy: missing cluster for allocation: %s", alloc.Name)
  531. return err
  532. }
  533. skip := false
  534. // (3) If any of the filter funcs fail, immediately skip the allocation.
  535. for _, ff := range options.FilterFuncs {
  536. if !ff(alloc) {
  537. skip = true
  538. break
  539. }
  540. }
  541. if skip {
  542. // If we are tracking idle filtration coefficients, delete the
  543. // entry corresponding to the filtered allocation. (Deleting the
  544. // entry will result in that proportional amount being removed
  545. // from the idle allocation at the end of the process.)
  546. if idleFiltrationCoefficients != nil {
  547. if ifcc, ok := idleFiltrationCoefficients[cluster]; ok {
  548. delete(ifcc, alloc.Name)
  549. }
  550. }
  551. continue
  552. }
  553. // (4) Split idle allocations and distribute among remaining
  554. // un-aggregated allocations.
  555. // NOTE: if idle allocation is off (i.e. ShareIdle == ShareNone) then
  556. // all idle allocations will be in the aggSet at this point, so idleSet
  557. // will be empty and we won't enter this block.
  558. if idleSet.Length() > 0 {
  559. // Distribute idle allocations by coefficient per-cluster, per-allocation
  560. for _, idleAlloc := range idleSet.allocations {
  561. // Only share idle if the cluster matches; i.e. the allocation
  562. // is from the same cluster as the idle costs
  563. idleCluster, err := idleAlloc.Properties.GetCluster()
  564. if err != nil {
  565. return err
  566. }
  567. if idleCluster != cluster {
  568. continue
  569. }
  570. // Make sure idle coefficients exist
  571. if _, ok := idleCoefficients[cluster]; !ok {
  572. log.Errorf("ETL: share (idle) allocation: error getting allocation coefficient [no cluster: '%s' in coefficients] for '%s'", cluster, alloc.Name)
  573. continue
  574. }
  575. if _, ok := idleCoefficients[cluster][alloc.Name]; !ok {
  576. log.Errorf("ETL: share (idle) allocation: error getting allocation coefficienct for '%s'", alloc.Name)
  577. continue
  578. }
  579. alloc.CPUCoreHours += idleAlloc.CPUCoreHours * idleCoefficients[cluster][alloc.Name]["cpu"]
  580. alloc.GPUHours += idleAlloc.GPUHours * idleCoefficients[cluster][alloc.Name]["gpu"]
  581. alloc.RAMByteHours += idleAlloc.RAMByteHours * idleCoefficients[cluster][alloc.Name]["ram"]
  582. idleCPUCost := idleAlloc.CPUCost * idleCoefficients[cluster][alloc.Name]["cpu"]
  583. idleGPUCost := idleAlloc.GPUCost * idleCoefficients[cluster][alloc.Name]["gpu"]
  584. idleRAMCost := idleAlloc.RAMCost * idleCoefficients[cluster][alloc.Name]["ram"]
  585. alloc.CPUCost += idleCPUCost
  586. alloc.GPUCost += idleGPUCost
  587. alloc.RAMCost += idleRAMCost
  588. alloc.TotalCost += idleCPUCost + idleGPUCost + idleRAMCost
  589. }
  590. }
  591. // (5) generate key to use for aggregation-by-key and allocation name
  592. key, err := alloc.generateKey(properties)
  593. if err != nil {
  594. return err
  595. }
  596. alloc.Name = key
  597. if options.MergeUnallocated && alloc.IsUnallocated() {
  598. alloc.Name = UnallocatedSuffix
  599. }
  600. // Inserting the allocation with the generated key for a name will
  601. // perform the actual basic aggregation step.
  602. aggSet.Insert(alloc)
  603. }
  604. // clusterIdleFiltrationCoeffs is used to track per-resource idle
  605. // coefficients on a cluster-by-cluster basis. It is, essentailly, an
  606. // aggregation of idleFiltrationCoefficients after they have been
  607. // filtered above (in step 3)
  608. var clusterIdleFiltrationCoeffs map[string]map[string]float64
  609. if idleFiltrationCoefficients != nil {
  610. clusterIdleFiltrationCoeffs = map[string]map[string]float64{}
  611. for cluster, m := range idleFiltrationCoefficients {
  612. if _, ok := clusterIdleFiltrationCoeffs[cluster]; !ok {
  613. clusterIdleFiltrationCoeffs[cluster] = map[string]float64{
  614. "cpu": 0.0,
  615. "gpu": 0.0,
  616. "ram": 0.0,
  617. }
  618. }
  619. for _, n := range m {
  620. for resource, val := range n {
  621. clusterIdleFiltrationCoeffs[cluster][resource] += val
  622. }
  623. }
  624. }
  625. }
  626. // (6) If we have both un-shared idle allocations and idle filtration
  627. // coefficients (i.e. we have computed coefficients for scaling idle
  628. // allocation costs by cluster) then use those coefficients to scale down
  629. // each idle allocation.
  630. if len(aggSet.idleKeys) > 0 && clusterIdleFiltrationCoeffs != nil {
  631. for idleKey := range aggSet.idleKeys {
  632. idleAlloc := aggSet.Get(idleKey)
  633. cluster, err := idleAlloc.Properties.GetCluster()
  634. if err != nil {
  635. log.Warningf("AggregateBy: idle allocation without cluster: %s", idleAlloc)
  636. }
  637. if resourceCoeffs, ok := clusterIdleFiltrationCoeffs[cluster]; ok {
  638. idleAlloc.CPUCost *= resourceCoeffs["cpu"]
  639. idleAlloc.CPUCoreHours *= resourceCoeffs["cpu"]
  640. idleAlloc.RAMCost *= resourceCoeffs["ram"]
  641. idleAlloc.RAMByteHours *= resourceCoeffs["ram"]
  642. idleAlloc.TotalCost = idleAlloc.CPUCost + idleAlloc.RAMCost
  643. }
  644. }
  645. }
  646. // (7) Split shared allocations and distribute among aggregated allocations
  647. if shareSet.Length() > 0 {
  648. shareCoefficients, err = computeShareCoeffs(properties, options, aggSet)
  649. if err != nil {
  650. log.Warningf("AllocationSet.AggregateBy: compute shared coeff: missing cluster ID: %s", err)
  651. return err
  652. }
  653. for _, alloc := range aggSet.allocations {
  654. if alloc.IsIdle() {
  655. // Skip idle allocations (they do not receive shared allocation)
  656. continue
  657. }
  658. // Distribute shared allocations by coefficient per-allocation
  659. // NOTE: share coefficients do not partition by cluster, like
  660. // idle coefficients do.
  661. for _, sharedAlloc := range shareSet.allocations {
  662. if _, ok := shareCoefficients[alloc.Name]; !ok {
  663. log.Errorf("ETL: share allocation: error getting allocation coefficienct for '%s'", alloc.Name)
  664. continue
  665. }
  666. alloc.SharedCost += sharedAlloc.TotalCost * shareCoefficients[alloc.Name]
  667. alloc.TotalCost += sharedAlloc.TotalCost * shareCoefficients[alloc.Name]
  668. }
  669. }
  670. }
  671. // (8) Aggregate external allocations into aggregated allocations. This may
  672. // not be possible for every external allocation, but attempt to find an
  673. // exact key match, given each external allocation's proerties, and
  674. // aggregate if an exact match is found.
  675. for _, alloc := range externalSet.allocations {
  676. key, err := alloc.generateKey(properties)
  677. if err != nil {
  678. continue
  679. }
  680. alloc.Name = key
  681. aggSet.Insert(alloc)
  682. }
  683. // (9) Combine all idle allocations into a single "__idle__" allocation
  684. if !options.SplitIdle {
  685. for _, idleAlloc := range aggSet.IdleAllocations() {
  686. aggSet.Delete(idleAlloc.Name)
  687. idleAlloc.Name = IdleSuffix
  688. aggSet.Insert(idleAlloc)
  689. }
  690. }
  691. as.allocations = aggSet.allocations
  692. return nil
  693. }
  694. // TODO niko/etl deprecate the use of a map of resources here, we only use totals
  695. func computeShareCoeffs(properties Properties, options *AllocationAggregationOptions, as *AllocationSet) (map[string]float64, error) {
  696. // Compute coeffs by totalling per-allocation, then dividing by the total.
  697. coeffs := map[string]float64{}
  698. // Compute totals for all allocations
  699. total := 0.0
  700. // ShareEven counts each aggregation with even weight, whereas ShareWeighted
  701. // counts each aggregation proportionally to its respective costs
  702. shareType := options.ShareSplit
  703. // Record allocation values first, then normalize by totals to get percentages
  704. for name, alloc := range as.allocations {
  705. if alloc.IsIdle() {
  706. // Skip idle allocations in coefficient calculation
  707. continue
  708. }
  709. if shareType == ShareEven {
  710. // Not additive - set to 1.0 for even distribution
  711. coeffs[name] = 1.0
  712. // Total is always additive
  713. total += 1.0
  714. } else {
  715. // Both are additive for weighted distribution
  716. coeffs[name] += alloc.TotalCost
  717. total += alloc.TotalCost
  718. }
  719. }
  720. // Normalize coefficients by totals
  721. for a := range coeffs {
  722. if coeffs[a] > 0 && total > 0 {
  723. coeffs[a] /= total
  724. } else {
  725. log.Warningf("ETL: invalid values for shared coefficients: %d, %d", coeffs[a], total)
  726. coeffs[a] = 0.0
  727. }
  728. }
  729. return coeffs, nil
  730. }
  731. func computeIdleCoeffs(properties Properties, options *AllocationAggregationOptions, as *AllocationSet) (map[string]map[string]map[string]float64, error) {
  732. types := []string{"cpu", "gpu", "ram"}
  733. // Compute idle coefficients, then save them in AllocationAggregationOptions
  734. coeffs := map[string]map[string]map[string]float64{}
  735. // Compute totals per resource for CPU, GPU, RAM, and PV
  736. totals := map[string]map[string]float64{}
  737. // ShareEven counts each allocation with even weight, whereas ShareWeighted
  738. // counts each allocation proportionally to its respective costs
  739. shareType := options.ShareIdle
  740. // Record allocation values first, then normalize by totals to get percentages
  741. for _, alloc := range as.allocations {
  742. if alloc.IsIdle() {
  743. // Skip idle allocations in coefficient calculation
  744. continue
  745. }
  746. // If any of the share funcs succeed, share the allocation. Do this
  747. // prior to filtering so that shared namespaces, etc do not get
  748. // filtered out before we have a chance to share them.
  749. skip := false
  750. for _, sf := range options.ShareFuncs {
  751. if sf(alloc) {
  752. skip = true
  753. break
  754. }
  755. }
  756. if skip {
  757. continue
  758. }
  759. // We need to key the allocations by cluster id
  760. clusterID, err := alloc.Properties.GetCluster()
  761. if err != nil {
  762. return nil, err
  763. }
  764. // get the name key for the allocation
  765. name := alloc.Name
  766. // Create cluster based tables if they don't exist
  767. if _, ok := coeffs[clusterID]; !ok {
  768. coeffs[clusterID] = map[string]map[string]float64{}
  769. }
  770. if _, ok := totals[clusterID]; !ok {
  771. totals[clusterID] = map[string]float64{}
  772. }
  773. if _, ok := coeffs[clusterID][name]; !ok {
  774. coeffs[clusterID][name] = map[string]float64{}
  775. }
  776. if shareType == ShareEven {
  777. for _, r := range types {
  778. // Not additive - hard set to 1.0
  779. coeffs[clusterID][name][r] = 1.0
  780. // totals are additive
  781. totals[clusterID][r] += 1.0
  782. }
  783. } else {
  784. coeffs[clusterID][name]["cpu"] += alloc.CPUCost
  785. coeffs[clusterID][name]["gpu"] += alloc.GPUCost
  786. coeffs[clusterID][name]["ram"] += alloc.RAMCost
  787. totals[clusterID]["cpu"] += alloc.CPUCost
  788. totals[clusterID]["gpu"] += alloc.GPUCost
  789. totals[clusterID]["ram"] += alloc.RAMCost
  790. }
  791. }
  792. // Normalize coefficients by totals
  793. for c := range coeffs {
  794. for a := range coeffs[c] {
  795. for _, r := range types {
  796. if coeffs[c][a][r] > 0 && totals[c][r] > 0 {
  797. coeffs[c][a][r] /= totals[c][r]
  798. }
  799. }
  800. }
  801. }
  802. return coeffs, nil
  803. }
  804. func (alloc *Allocation) generateKey(properties Properties) (string, error) {
  805. // Names will ultimately be joined into a single name, which uniquely
  806. // identifies allocations.
  807. names := []string{}
  808. if properties.HasCluster() {
  809. cluster, err := alloc.Properties.GetCluster()
  810. if err != nil {
  811. return "", err
  812. }
  813. names = append(names, cluster)
  814. }
  815. if properties.HasNode() {
  816. node, err := alloc.Properties.GetNode()
  817. if err != nil {
  818. return "", err
  819. }
  820. names = append(names, node)
  821. }
  822. if properties.HasNamespace() {
  823. namespace, err := alloc.Properties.GetNamespace()
  824. if err != nil {
  825. return "", err
  826. }
  827. names = append(names, namespace)
  828. }
  829. if properties.HasControllerKind() {
  830. controllerKind, err := alloc.Properties.GetControllerKind()
  831. if err != nil {
  832. // Indicate that allocation has no controller
  833. controllerKind = UnallocatedSuffix
  834. }
  835. if prop, _ := properties.GetControllerKind(); prop != "" && prop != controllerKind {
  836. // The allocation does not have the specified controller kind
  837. controllerKind = UnallocatedSuffix
  838. }
  839. names = append(names, controllerKind)
  840. }
  841. if properties.HasController() {
  842. if !properties.HasControllerKind() {
  843. controllerKind, err := alloc.Properties.GetControllerKind()
  844. if err == nil {
  845. names = append(names, controllerKind)
  846. }
  847. }
  848. controller, err := alloc.Properties.GetController()
  849. if err != nil {
  850. // Indicate that allocation has no controller
  851. controller = UnallocatedSuffix
  852. }
  853. names = append(names, controller)
  854. }
  855. if properties.HasPod() {
  856. pod, err := alloc.Properties.GetPod()
  857. if err != nil {
  858. return "", err
  859. }
  860. names = append(names, pod)
  861. }
  862. if properties.HasContainer() {
  863. container, err := alloc.Properties.GetContainer()
  864. if err != nil {
  865. return "", err
  866. }
  867. names = append(names, container)
  868. }
  869. if properties.HasService() {
  870. services, err := alloc.Properties.GetServices()
  871. if err != nil {
  872. // Indicate that allocation has no services
  873. names = append(names, UnallocatedSuffix)
  874. } else {
  875. // TODO niko/etl support multi-service aggregation
  876. if len(services) > 0 {
  877. for _, service := range services {
  878. names = append(names, service)
  879. break
  880. }
  881. } else {
  882. // Indicate that allocation has no services
  883. names = append(names, UnallocatedSuffix)
  884. }
  885. }
  886. }
  887. if properties.HasAnnotations() {
  888. annotations, err := alloc.Properties.GetAnnotations() // annotations that the individual allocation possesses
  889. if err != nil {
  890. // Indicate that allocation has no annotations
  891. names = append(names, UnallocatedSuffix)
  892. } else {
  893. annotationNames := []string{}
  894. aggAnnotations, err := properties.GetAnnotations() // potential annotations to aggregate on supplied by the API caller
  895. if err != nil {
  896. // We've already checked HasAnnotation, so this should never occur
  897. return "", err
  898. }
  899. // calvin - support multi-annotation aggregation
  900. for annotationName := range aggAnnotations {
  901. if val, ok := annotations[annotationName]; ok {
  902. annotationNames = append(annotationNames, fmt.Sprintf("%s=%s", annotationName, val))
  903. } else if indexOf(UnallocatedSuffix, annotationNames) == -1 { // if UnallocatedSuffix not already in names
  904. annotationNames = append(annotationNames, UnallocatedSuffix)
  905. }
  906. }
  907. // resolve arbitrary ordering. e.g., app=app0/env=env0 is the same agg as env=env0/app=app0
  908. if len(annotationNames) > 1 {
  909. sort.Strings(annotationNames)
  910. }
  911. unallocatedSuffixIndex := indexOf(UnallocatedSuffix, annotationNames)
  912. // suffix should be at index 0 if it exists b/c of underscores
  913. if unallocatedSuffixIndex != -1 {
  914. annotationNames = append(annotationNames[:unallocatedSuffixIndex], annotationNames[unallocatedSuffixIndex+1:]...)
  915. annotationNames = append(annotationNames, UnallocatedSuffix) // append to end
  916. }
  917. names = append(names, annotationNames...)
  918. }
  919. }
  920. if properties.HasLabel() {
  921. labels, err := alloc.Properties.GetLabels() // labels that the individual allocation possesses
  922. if err != nil {
  923. // Indicate that allocation has no labels
  924. names = append(names, UnallocatedSuffix)
  925. } else {
  926. labelNames := []string{}
  927. aggLabels, err := properties.GetLabels() // potential labels to aggregate on supplied by the API caller
  928. if err != nil {
  929. // We've already checked HasLabel, so this should never occur
  930. return "", err
  931. }
  932. // calvin - support multi-label aggregation
  933. for labelName := range aggLabels {
  934. if val, ok := labels[labelName]; ok {
  935. labelNames = append(labelNames, fmt.Sprintf("%s=%s", labelName, val))
  936. } else if indexOf(UnallocatedSuffix, labelNames) == -1 { // if UnallocatedSuffix not already in names
  937. labelNames = append(labelNames, UnallocatedSuffix)
  938. }
  939. }
  940. // resolve arbitrary ordering. e.g., app=app0/env=env0 is the same agg as env=env0/app=app0
  941. if len(labelNames) > 1 {
  942. sort.Strings(labelNames)
  943. }
  944. unallocatedSuffixIndex := indexOf(UnallocatedSuffix, labelNames)
  945. // suffix should be at index 0 if it exists b/c of underscores
  946. if unallocatedSuffixIndex != -1 {
  947. labelNames = append(labelNames[:unallocatedSuffixIndex], labelNames[unallocatedSuffixIndex+1:]...)
  948. labelNames = append(labelNames, UnallocatedSuffix) // append to end
  949. }
  950. names = append(names, labelNames...)
  951. }
  952. }
  953. return strings.Join(names, "/"), nil
  954. }
  955. // TODO clean up
  956. // Helper function to check for slice membership. Not sure if repeated elsewhere in our codebase.
  957. func indexOf(v string, arr []string) int {
  958. for i, s := range arr {
  959. // This is caseless equivalence
  960. if strings.EqualFold(v, s) {
  961. return i
  962. }
  963. }
  964. return -1
  965. }
  966. // Clone returns a new AllocationSet with a deep copy of the given
  967. // AllocationSet's allocations.
  968. func (as *AllocationSet) Clone() *AllocationSet {
  969. if as == nil {
  970. return nil
  971. }
  972. as.RLock()
  973. defer as.RUnlock()
  974. allocs := map[string]*Allocation{}
  975. for k, v := range as.allocations {
  976. allocs[k] = v.Clone()
  977. }
  978. externalKeys := map[string]bool{}
  979. for k, v := range as.externalKeys {
  980. externalKeys[k] = v
  981. }
  982. idleKeys := map[string]bool{}
  983. for k, v := range as.idleKeys {
  984. idleKeys[k] = v
  985. }
  986. return &AllocationSet{
  987. allocations: allocs,
  988. externalKeys: externalKeys,
  989. idleKeys: idleKeys,
  990. Window: as.Window.Clone(),
  991. }
  992. }
  993. // ComputeIdleAllocations computes the idle allocations for the AllocationSet,
  994. // given a set of Assets. Ideally, assetSet should contain only Nodes, but if
  995. // it contains other Assets, they will be ignored; only CPU, GPU and RAM are
  996. // considered for idle allocation. One idle allocation per-cluster will be
  997. // computed and returned, keyed by cluster_id.
  998. func (as *AllocationSet) ComputeIdleAllocations(assetSet *AssetSet) (map[string]*Allocation, error) {
  999. if as == nil {
  1000. return nil, fmt.Errorf("cannot compute idle allocation for nil AllocationSet")
  1001. }
  1002. // TODO: external allocation: remove after testing and benchmarking
  1003. profStart := time.Now()
  1004. defer log.Profile(profStart, fmt.Sprintf("ComputeIdleAllocations: %s", as.Window))
  1005. if assetSet == nil {
  1006. return nil, fmt.Errorf("cannot compute idle allocation with nil AssetSet")
  1007. }
  1008. if !as.Window.Equal(assetSet.Window) {
  1009. return nil, fmt.Errorf("cannot compute idle allocation for sets with mismatched windows: %s != %s", as.Window, assetSet.Window)
  1010. }
  1011. window := as.Window
  1012. // Build a map of cumulative cluster asset costs, per resource; i.e.
  1013. // cluster-to-{cpu|gpu|ram}-to-cost.
  1014. assetClusterResourceCosts := map[string]map[string]float64{}
  1015. assetSet.Each(func(key string, a Asset) {
  1016. if node, ok := a.(*Node); ok {
  1017. if _, ok := assetClusterResourceCosts[node.Properties().Cluster]; !ok {
  1018. assetClusterResourceCosts[node.Properties().Cluster] = map[string]float64{}
  1019. }
  1020. assetClusterResourceCosts[node.Properties().Cluster]["cpu"] += node.CPUCost * (1.0 - node.Discount)
  1021. assetClusterResourceCosts[node.Properties().Cluster]["gpu"] += node.GPUCost * (1.0 - node.Discount)
  1022. assetClusterResourceCosts[node.Properties().Cluster]["ram"] += node.RAMCost * (1.0 - node.Discount)
  1023. }
  1024. })
  1025. // Determine start, end on a per-cluster basis
  1026. clusterStarts := map[string]time.Time{}
  1027. clusterEnds := map[string]time.Time{}
  1028. // Subtract allocated costs from asset costs, leaving only the remaining
  1029. // idle costs.
  1030. as.Each(func(name string, a *Allocation) {
  1031. cluster, err := a.Properties.GetCluster()
  1032. if err != nil {
  1033. // Failed to find allocation's cluster
  1034. return
  1035. }
  1036. if _, ok := assetClusterResourceCosts[cluster]; !ok {
  1037. // Failed to find assets for allocation's cluster
  1038. return
  1039. }
  1040. // Set cluster (start, end) if they are either not currently set,
  1041. // or if the detected (start, end) of the current allocation falls
  1042. // before or after, respectively, the current values.
  1043. if s, ok := clusterStarts[cluster]; !ok || a.Start.Before(s) {
  1044. clusterStarts[cluster] = a.Start
  1045. }
  1046. if e, ok := clusterEnds[cluster]; !ok || a.End.Before(e) {
  1047. clusterEnds[cluster] = a.End
  1048. }
  1049. assetClusterResourceCosts[cluster]["cpu"] -= a.CPUCost
  1050. assetClusterResourceCosts[cluster]["gpu"] -= a.GPUCost
  1051. assetClusterResourceCosts[cluster]["ram"] -= a.RAMCost
  1052. })
  1053. // Turn remaining un-allocated asset costs into idle allocations
  1054. idleAllocs := map[string]*Allocation{}
  1055. for cluster, resources := range assetClusterResourceCosts {
  1056. // Default start and end to the (start, end) of the given window, but
  1057. // use the actual, detected (start, end) pair if they are available.
  1058. start := *window.Start()
  1059. if s, ok := clusterStarts[cluster]; ok && window.Contains(s) {
  1060. start = s
  1061. }
  1062. end := *window.End()
  1063. if e, ok := clusterEnds[cluster]; ok && window.Contains(e) {
  1064. end = e
  1065. }
  1066. idleAlloc := &Allocation{
  1067. Name: fmt.Sprintf("%s/%s", cluster, IdleSuffix),
  1068. Window: window.Clone(),
  1069. Properties: Properties{ClusterProp: cluster},
  1070. Start: start,
  1071. End: end,
  1072. CPUCost: resources["cpu"],
  1073. GPUCost: resources["gpu"],
  1074. RAMCost: resources["ram"],
  1075. }
  1076. idleAlloc.TotalCost = idleAlloc.CPUCost + idleAlloc.GPUCost + idleAlloc.RAMCost
  1077. // Do not continue if multiple idle allocations are computed for a
  1078. // single cluster.
  1079. if _, ok := idleAllocs[cluster]; ok {
  1080. return nil, fmt.Errorf("duplicate idle allocations for cluster %s", cluster)
  1081. }
  1082. idleAllocs[cluster] = idleAlloc
  1083. }
  1084. return idleAllocs, nil
  1085. }
  1086. // Delete removes the allocation with the given name from the set
  1087. func (as *AllocationSet) Delete(name string) {
  1088. if as == nil {
  1089. return
  1090. }
  1091. as.Lock()
  1092. defer as.Unlock()
  1093. delete(as.externalKeys, name)
  1094. delete(as.idleKeys, name)
  1095. delete(as.allocations, name)
  1096. }
  1097. // Each invokes the given function for each Allocation in the set
  1098. func (as *AllocationSet) Each(f func(string, *Allocation)) {
  1099. if as == nil {
  1100. return
  1101. }
  1102. for k, a := range as.allocations {
  1103. f(k, a)
  1104. }
  1105. }
  1106. // End returns the End time of the AllocationSet window
  1107. func (as *AllocationSet) End() time.Time {
  1108. if as == nil {
  1109. log.Warningf("Allocation ETL: calling End on nil AllocationSet")
  1110. return time.Unix(0, 0)
  1111. }
  1112. if as.Window.End() == nil {
  1113. log.Warningf("Allocation ETL: AllocationSet with illegal window: End is nil; len(as.allocations)=%d", len(as.allocations))
  1114. return time.Unix(0, 0)
  1115. }
  1116. return *as.Window.End()
  1117. }
  1118. // Get returns the Allocation at the given key in the AllocationSet
  1119. func (as *AllocationSet) Get(key string) *Allocation {
  1120. as.RLock()
  1121. defer as.RUnlock()
  1122. if alloc, ok := as.allocations[key]; ok {
  1123. return alloc
  1124. }
  1125. return nil
  1126. }
  1127. // ExternalAllocations returns a map of the external allocations in the set.
  1128. // Returns clones of the actual Allocations, so mutability is not a problem.
  1129. func (as *AllocationSet) ExternalAllocations() map[string]*Allocation {
  1130. externals := map[string]*Allocation{}
  1131. if as.IsEmpty() {
  1132. return externals
  1133. }
  1134. as.RLock()
  1135. defer as.RUnlock()
  1136. for key := range as.externalKeys {
  1137. if alloc, ok := as.allocations[key]; ok {
  1138. externals[key] = alloc.Clone()
  1139. }
  1140. }
  1141. return externals
  1142. }
  1143. // ExternalCost returns the total aggregated external costs of the set
  1144. func (as *AllocationSet) ExternalCost() float64 {
  1145. if as.IsEmpty() {
  1146. return 0.0
  1147. }
  1148. as.RLock()
  1149. defer as.RUnlock()
  1150. externalCost := 0.0
  1151. for _, alloc := range as.allocations {
  1152. externalCost += alloc.ExternalCost
  1153. }
  1154. return externalCost
  1155. }
  1156. // IdleAllocations returns a map of the idle allocations in the AllocationSet.
  1157. // Returns clones of the actual Allocations, so mutability is not a problem.
  1158. func (as *AllocationSet) IdleAllocations() map[string]*Allocation {
  1159. idles := map[string]*Allocation{}
  1160. if as.IsEmpty() {
  1161. return idles
  1162. }
  1163. as.RLock()
  1164. defer as.RUnlock()
  1165. for key := range as.idleKeys {
  1166. if alloc, ok := as.allocations[key]; ok {
  1167. idles[key] = alloc.Clone()
  1168. }
  1169. }
  1170. return idles
  1171. }
  1172. // Insert aggregates the current entry in the AllocationSet by the given Allocation,
  1173. // but only if the Allocation is valid, i.e. matches the AllocationSet's window. If
  1174. // there is no existing entry, one is created. Nil error response indicates success.
  1175. func (as *AllocationSet) Insert(that *Allocation) error {
  1176. return as.insert(that, false)
  1177. }
  1178. func (as *AllocationSet) insert(that *Allocation, accumulate bool) error {
  1179. if as == nil {
  1180. return fmt.Errorf("cannot insert into nil AllocationSet")
  1181. }
  1182. as.Lock()
  1183. defer as.Unlock()
  1184. if as.allocations == nil {
  1185. as.allocations = map[string]*Allocation{}
  1186. }
  1187. if as.externalKeys == nil {
  1188. as.externalKeys = map[string]bool{}
  1189. }
  1190. if as.idleKeys == nil {
  1191. as.idleKeys = map[string]bool{}
  1192. }
  1193. // Add the given Allocation to the existing entry, if there is one;
  1194. // otherwise just set directly into allocations
  1195. if _, ok := as.allocations[that.Name]; !ok {
  1196. as.allocations[that.Name] = that
  1197. } else {
  1198. as.allocations[that.Name].add(that)
  1199. }
  1200. // If the given Allocation is an external one, record that
  1201. if that.IsExternal() {
  1202. as.externalKeys[that.Name] = true
  1203. }
  1204. // If the given Allocation is an idle one, record that
  1205. if that.IsIdle() {
  1206. as.idleKeys[that.Name] = true
  1207. }
  1208. return nil
  1209. }
  1210. // IsEmpty returns true if the AllocationSet is nil, or if it contains
  1211. // zero allocations.
  1212. func (as *AllocationSet) IsEmpty() bool {
  1213. if as == nil || len(as.allocations) == 0 {
  1214. return true
  1215. }
  1216. as.RLock()
  1217. defer as.RUnlock()
  1218. return as.allocations == nil || len(as.allocations) == 0
  1219. }
  1220. // Length returns the number of Allocations in the set
  1221. func (as *AllocationSet) Length() int {
  1222. if as == nil {
  1223. return 0
  1224. }
  1225. as.RLock()
  1226. defer as.RUnlock()
  1227. return len(as.allocations)
  1228. }
  1229. // Map clones and returns a map of the AllocationSet's Allocations
  1230. func (as *AllocationSet) Map() map[string]*Allocation {
  1231. if as.IsEmpty() {
  1232. return map[string]*Allocation{}
  1233. }
  1234. return as.Clone().allocations
  1235. }
  1236. // MarshalJSON JSON-encodes the AllocationSet
  1237. func (as *AllocationSet) MarshalJSON() ([]byte, error) {
  1238. as.RLock()
  1239. defer as.RUnlock()
  1240. return json.Marshal(as.allocations)
  1241. }
  1242. // Resolution returns the AllocationSet's window duration
  1243. func (as *AllocationSet) Resolution() time.Duration {
  1244. return as.Window.Duration()
  1245. }
  1246. // Set uses the given Allocation to overwrite the existing entry in the
  1247. // AllocationSet under the Allocation's name.
  1248. func (as *AllocationSet) Set(alloc *Allocation) error {
  1249. if as.IsEmpty() {
  1250. as.Lock()
  1251. as.allocations = map[string]*Allocation{}
  1252. as.externalKeys = map[string]bool{}
  1253. as.idleKeys = map[string]bool{}
  1254. as.Unlock()
  1255. }
  1256. as.Lock()
  1257. defer as.Unlock()
  1258. as.allocations[alloc.Name] = alloc
  1259. // If the given Allocation is an external one, record that
  1260. if alloc.IsExternal() {
  1261. as.externalKeys[alloc.Name] = true
  1262. }
  1263. // If the given Allocation is an idle one, record that
  1264. if alloc.IsIdle() {
  1265. as.idleKeys[alloc.Name] = true
  1266. }
  1267. return nil
  1268. }
  1269. // Start returns the Start time of the AllocationSet window
  1270. func (as *AllocationSet) Start() time.Time {
  1271. if as == nil {
  1272. log.Warningf("Allocation ETL: calling Start on nil AllocationSet")
  1273. return time.Unix(0, 0)
  1274. }
  1275. if as.Window.Start() == nil {
  1276. log.Warningf("Allocation ETL: AllocationSet with illegal window: Start is nil; len(as.allocations)=%d", len(as.allocations))
  1277. return time.Unix(0, 0)
  1278. }
  1279. return *as.Window.Start()
  1280. }
  1281. // String represents the given Allocation as a string
  1282. func (as *AllocationSet) String() string {
  1283. if as == nil {
  1284. return "<nil>"
  1285. }
  1286. return fmt.Sprintf("AllocationSet{length: %d; window: %s; totalCost: %.2f}",
  1287. as.Length(), as.Window, as.TotalCost())
  1288. }
  1289. // TotalCost returns the sum of all TotalCosts of the allocations contained
  1290. func (as *AllocationSet) TotalCost() float64 {
  1291. if as.IsEmpty() {
  1292. return 0.0
  1293. }
  1294. as.RLock()
  1295. defer as.RUnlock()
  1296. tc := 0.0
  1297. for _, a := range as.allocations {
  1298. tc += a.TotalCost
  1299. }
  1300. return tc
  1301. }
  1302. // UTCOffset returns the AllocationSet's configured UTCOffset.
  1303. func (as *AllocationSet) UTCOffset() time.Duration {
  1304. _, zone := as.Start().Zone()
  1305. return time.Duration(zone) * time.Second
  1306. }
  1307. func (as *AllocationSet) accumulate(that *AllocationSet) (*AllocationSet, error) {
  1308. if as.IsEmpty() {
  1309. return that, nil
  1310. }
  1311. if that.IsEmpty() {
  1312. return as, nil
  1313. }
  1314. // TODO niko/cdmr implement first
  1315. // if that.Window.Overlaps(as.Window) {
  1316. // return nil, fmt.Errorf("AllocationSet.accumulate: overlapping windows: %s", that.Window, as.Window)
  1317. // }
  1318. // Set start, end to min(start), max(end)
  1319. start := as.Start()
  1320. end := as.End()
  1321. if that.Start().Before(start) {
  1322. start = that.Start()
  1323. }
  1324. if that.End().After(end) {
  1325. end = that.End()
  1326. }
  1327. acc := NewAllocationSet(start, end)
  1328. as.RLock()
  1329. defer as.RUnlock()
  1330. that.RLock()
  1331. defer that.RUnlock()
  1332. for _, alloc := range as.allocations {
  1333. err := acc.insert(alloc, true)
  1334. if err != nil {
  1335. return nil, err
  1336. }
  1337. }
  1338. for _, alloc := range that.allocations {
  1339. err := acc.insert(alloc, true)
  1340. if err != nil {
  1341. return nil, err
  1342. }
  1343. }
  1344. return acc, nil
  1345. }
  1346. // AllocationSetRange is a thread-safe slice of AllocationSets. It is meant to
  1347. // be used such that the AllocationSets held are consecutive and coherent with
  1348. // respect to using the same aggregation properties, UTC offset, and
  1349. // resolution. However these rules are not necessarily enforced, so use wisely.
  1350. type AllocationSetRange struct {
  1351. sync.RWMutex
  1352. allocations []*AllocationSet
  1353. }
  1354. // NewAllocationSetRange instantiates a new range composed of the given
  1355. // AllocationSets in the order provided.
  1356. func NewAllocationSetRange(allocs ...*AllocationSet) *AllocationSetRange {
  1357. return &AllocationSetRange{
  1358. allocations: allocs,
  1359. }
  1360. }
  1361. // Accumulate sums each AllocationSet in the given range, returning a single cumulative
  1362. // AllocationSet for the entire range.
  1363. func (asr *AllocationSetRange) Accumulate() (*AllocationSet, error) {
  1364. var allocSet *AllocationSet
  1365. var err error
  1366. asr.RLock()
  1367. defer asr.RUnlock()
  1368. for _, as := range asr.allocations {
  1369. allocSet, err = allocSet.accumulate(as)
  1370. if err != nil {
  1371. return nil, err
  1372. }
  1373. }
  1374. return allocSet, nil
  1375. }
  1376. // TODO niko/etl accumulate into lower-resolution chunks of the given resolution
  1377. // func (asr *AllocationSetRange) AccumulateBy(resolution time.Duration) *AllocationSetRange
  1378. // AggregateBy aggregates each AllocationSet in the range by the given
  1379. // properties and options.
  1380. func (asr *AllocationSetRange) AggregateBy(properties Properties, options *AllocationAggregationOptions) error {
  1381. aggRange := &AllocationSetRange{allocations: []*AllocationSet{}}
  1382. asr.Lock()
  1383. defer asr.Unlock()
  1384. for _, as := range asr.allocations {
  1385. err := as.AggregateBy(properties, options)
  1386. if err != nil {
  1387. return err
  1388. }
  1389. aggRange.allocations = append(aggRange.allocations, as)
  1390. }
  1391. asr.allocations = aggRange.allocations
  1392. return nil
  1393. }
  1394. // Append appends the given AllocationSet to the end of the range. It does not
  1395. // validate whether or not that violates window continuity.
  1396. func (asr *AllocationSetRange) Append(that *AllocationSet) {
  1397. asr.Lock()
  1398. defer asr.Unlock()
  1399. asr.allocations = append(asr.allocations, that)
  1400. }
  1401. // Each invokes the given function for each AllocationSet in the range
  1402. func (asr *AllocationSetRange) Each(f func(int, *AllocationSet)) {
  1403. if asr == nil {
  1404. return
  1405. }
  1406. for i, as := range asr.allocations {
  1407. f(i, as)
  1408. }
  1409. }
  1410. // Get retrieves the AllocationSet at the given index of the range.
  1411. func (asr *AllocationSetRange) Get(i int) (*AllocationSet, error) {
  1412. if i < 0 || i >= len(asr.allocations) {
  1413. return nil, fmt.Errorf("AllocationSetRange: index out of range: %d", i)
  1414. }
  1415. asr.RLock()
  1416. defer asr.RUnlock()
  1417. return asr.allocations[i], nil
  1418. }
  1419. // InsertRange merges the given AllocationSetRange into the receiving one by
  1420. // lining up sets with matching windows, then inserting each allocation from
  1421. // the given ASR into the respective set in the receiving ASR. If the given
  1422. // ASR contains an AllocationSet from a window that does not exist in the
  1423. // receiving ASR, then an error is returned. However, the given ASR does not
  1424. // need to cover the full range of the receiver.
  1425. func (asr *AllocationSetRange) InsertRange(that *AllocationSetRange) error {
  1426. if asr == nil {
  1427. return fmt.Errorf("cannot insert range into nil AllocationSetRange")
  1428. }
  1429. // keys maps window to index in asr
  1430. keys := map[string]int{}
  1431. asr.Each(func(i int, as *AllocationSet) {
  1432. if as == nil {
  1433. return
  1434. }
  1435. keys[as.Window.String()] = i
  1436. })
  1437. // Nothing to merge, so simply return
  1438. if len(keys) == 0 {
  1439. return nil
  1440. }
  1441. var err error
  1442. that.Each(func(j int, thatAS *AllocationSet) {
  1443. if thatAS == nil || err != nil {
  1444. return
  1445. }
  1446. // Find matching AllocationSet in asr
  1447. i, ok := keys[thatAS.Window.String()]
  1448. if !ok {
  1449. err = fmt.Errorf("cannot merge AllocationSet into window that does not exist: %s", thatAS.Window.String())
  1450. return
  1451. }
  1452. as, err := asr.Get(i)
  1453. if err != nil {
  1454. err = fmt.Errorf("AllocationSetRange index does not exist: %d", i)
  1455. return
  1456. }
  1457. // Insert each Allocation from the given set
  1458. thatAS.Each(func(k string, alloc *Allocation) {
  1459. err = as.Insert(alloc)
  1460. if err != nil {
  1461. err = fmt.Errorf("error inserting allocation: %s", err)
  1462. return
  1463. }
  1464. })
  1465. })
  1466. // err might be nil
  1467. return err
  1468. }
  1469. // Length returns the length of the range, which is zero if nil
  1470. func (asr *AllocationSetRange) Length() int {
  1471. if asr == nil || asr.allocations == nil {
  1472. return 0
  1473. }
  1474. asr.RLock()
  1475. defer asr.RUnlock()
  1476. return len(asr.allocations)
  1477. }
  1478. // MarshalJSON JSON-encodes the range
  1479. func (asr *AllocationSetRange) MarshalJSON() ([]byte, error) {
  1480. asr.RLock()
  1481. asr.RUnlock()
  1482. return json.Marshal(asr.allocations)
  1483. }
  1484. // Slice copies the underlying slice of AllocationSets, maintaining order,
  1485. // and returns the copied slice.
  1486. func (asr *AllocationSetRange) Slice() []*AllocationSet {
  1487. if asr == nil || asr.allocations == nil {
  1488. return nil
  1489. }
  1490. asr.RLock()
  1491. defer asr.RUnlock()
  1492. copy := []*AllocationSet{}
  1493. for _, as := range asr.allocations {
  1494. copy = append(copy, as.Clone())
  1495. }
  1496. return copy
  1497. }
  1498. // String represents the given AllocationSetRange as a string
  1499. func (asr *AllocationSetRange) String() string {
  1500. if asr == nil {
  1501. return "<nil>"
  1502. }
  1503. return fmt.Sprintf("AllocationSetRange{length: %d}", asr.Length())
  1504. }
  1505. // UTCOffset returns the detected UTCOffset of the AllocationSets within the
  1506. // range. Defaults to 0 if the range is nil or empty. Does not warn if there
  1507. // are sets with conflicting UTCOffsets (just returns the first).
  1508. func (asr *AllocationSetRange) UTCOffset() time.Duration {
  1509. if asr.Length() == 0 {
  1510. return 0
  1511. }
  1512. as, err := asr.Get(0)
  1513. if err != nil {
  1514. return 0
  1515. }
  1516. return as.UTCOffset()
  1517. }
  1518. // Window returns the full window that the AllocationSetRange spans, from the
  1519. // start of the first AllocationSet to the end of the last one.
  1520. func (asr *AllocationSetRange) Window() Window {
  1521. if asr == nil || asr.Length() == 0 {
  1522. return NewWindow(nil, nil)
  1523. }
  1524. start := asr.allocations[0].Start()
  1525. end := asr.allocations[asr.Length()-1].End()
  1526. return NewWindow(&start, &end)
  1527. }