allocation.go 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842
  1. package kubecost
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "sort"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/kubecost/cost-model/pkg/log"
  11. )
  12. // TODO Clean-up use of IsEmpty; nil checks should be separated for safety.
  13. // TODO Consider making Allocation an interface, which is fulfilled by structs
  14. // like KubernetesAllocation, IdleAllocation, and ExternalAllocation.
  15. // ExternalSuffix indicates an external allocation
  16. const ExternalSuffix = "__external__"
  17. // IdleSuffix indicates an idle allocation property
  18. const IdleSuffix = "__idle__"
  19. // SharedSuffix indicates an shared allocation property
  20. const SharedSuffix = "__shared__"
  21. // UnallocatedSuffix indicates an unallocated allocation property
  22. const UnallocatedSuffix = "__unallocated__"
  23. // UnmountedSuffix indicated allocation to an unmounted PV
  24. const UnmountedSuffix = "__unmounted__"
  25. // ShareWeighted indicates that a shared resource should be shared as a
  26. // proportion of the cost of the remaining allocations.
  27. const ShareWeighted = "__weighted__"
  28. // ShareEven indicates that a shared resource should be shared evenly across
  29. // all remaining allocations.
  30. const ShareEven = "__even__"
  31. // ShareNone indicates that a shareable resource should not be shared
  32. const ShareNone = "__none__"
  33. // Allocation is a unit of resource allocation and cost for a given window
  34. // of time and for a given kubernetes construct with its associated set of
  35. // properties.
  36. // TODO niko/computeallocation compute efficiency on the fly?
  37. type Allocation struct {
  38. Name string `json:"name"`
  39. Properties Properties `json:"properties,omitempty"`
  40. Window Window `json:"window"`
  41. Start time.Time `json:"start"`
  42. End time.Time `json:"end"`
  43. CPUCoreHours float64 `json:"cpuCoreHours"`
  44. CPUCoreRequestAverage float64 `json:"cpuCoreRequestAverage"`
  45. CPUCoreUsageAverage float64 `json:"cpuCoreUsageAverage"`
  46. CPUCost float64 `json:"cpuCost"`
  47. GPUHours float64 `json:"gpuHours"`
  48. GPUCost float64 `json:"gpuCost"`
  49. NetworkCost float64 `json:"networkCost"`
  50. PVByteHours float64 `json:"pvByteHours"`
  51. PVCost float64 `json:"pvCost"`
  52. RAMByteHours float64 `json:"ramByteHours"`
  53. RAMBytesRequestAverage float64 `json:"ramBytesRequestAverage"`
  54. RAMBytesUsageAverage float64 `json:"ramBytesUsageAverage"`
  55. RAMCost float64 `json:"ramCost"`
  56. SharedCost float64 `json:"sharedCost"`
  57. ExternalCost float64 `json:"externalCost"`
  58. TotalCost float64 `json:"totalCost"`
  59. }
  60. // AllocationMatchFunc is a function that can be used to match Allocations by
  61. // returning true for any given Allocation if a condition is met.
  62. type AllocationMatchFunc func(*Allocation) bool
  63. // Add returns the result of summing the two given Allocations, which sums the
  64. // summary fields (e.g. costs, resources) and recomputes efficiency. Neither of
  65. // the two original Allocations are mutated in the process.
  66. func (a *Allocation) Add(that *Allocation) (*Allocation, error) {
  67. if a == nil {
  68. return that.Clone(), nil
  69. }
  70. if that == nil {
  71. return a.Clone(), nil
  72. }
  73. // Note: no need to clone "that", as add only mutates the receiver
  74. agg := a.Clone()
  75. agg.add(that)
  76. return agg, nil
  77. }
  78. // Clone returns a deep copy of the given Allocation
  79. func (a *Allocation) Clone() *Allocation {
  80. if a == nil {
  81. return nil
  82. }
  83. return &Allocation{
  84. Name: a.Name,
  85. Properties: a.Properties.Clone(),
  86. Window: a.Window.Clone(),
  87. Start: a.Start,
  88. End: a.End,
  89. CPUCoreHours: a.CPUCoreHours,
  90. CPUCoreRequestAverage: a.CPUCoreRequestAverage,
  91. CPUCoreUsageAverage: a.CPUCoreUsageAverage,
  92. CPUCost: a.CPUCost,
  93. GPUHours: a.GPUHours,
  94. GPUCost: a.GPUCost,
  95. NetworkCost: a.NetworkCost,
  96. PVByteHours: a.PVByteHours,
  97. PVCost: a.PVCost,
  98. RAMByteHours: a.RAMByteHours,
  99. RAMBytesRequestAverage: a.RAMBytesRequestAverage,
  100. RAMBytesUsageAverage: a.RAMBytesUsageAverage,
  101. RAMCost: a.RAMCost,
  102. SharedCost: a.SharedCost,
  103. ExternalCost: a.ExternalCost,
  104. TotalCost: a.TotalCost,
  105. }
  106. }
  107. // Equal returns true if the values held in the given Allocation precisely
  108. // match those of the receiving Allocation. nil does not match nil.
  109. func (a *Allocation) Equal(that *Allocation) bool {
  110. if a == nil || that == nil {
  111. return false
  112. }
  113. if a.Name != that.Name {
  114. return false
  115. }
  116. if !a.Properties.Equal(&that.Properties) {
  117. return false
  118. }
  119. if !a.Window.Equal(that.Window) {
  120. return false
  121. }
  122. if !a.Start.Equal(that.Start) {
  123. return false
  124. }
  125. if !a.End.Equal(that.End) {
  126. return false
  127. }
  128. if a.CPUCoreHours != that.CPUCoreHours {
  129. return false
  130. }
  131. if a.CPUCost != that.CPUCost {
  132. return false
  133. }
  134. if a.GPUHours != that.GPUHours {
  135. return false
  136. }
  137. if a.GPUCost != that.GPUCost {
  138. return false
  139. }
  140. if a.NetworkCost != that.NetworkCost {
  141. return false
  142. }
  143. if a.PVByteHours != that.PVByteHours {
  144. return false
  145. }
  146. if a.PVCost != that.PVCost {
  147. return false
  148. }
  149. if a.RAMByteHours != that.RAMByteHours {
  150. return false
  151. }
  152. if a.RAMCost != that.RAMCost {
  153. return false
  154. }
  155. if a.SharedCost != that.SharedCost {
  156. return false
  157. }
  158. if a.ExternalCost != that.ExternalCost {
  159. return false
  160. }
  161. if a.TotalCost != that.TotalCost {
  162. return false
  163. }
  164. return true
  165. }
  166. func (a *Allocation) CPUEfficiency() float64 {
  167. if a.CPUCoreRequestAverage > 0 {
  168. return a.CPUCoreUsageAverage / a.CPUCoreRequestAverage
  169. }
  170. return 1.0
  171. }
  172. func (a *Allocation) RAMEfficiency() float64 {
  173. if a.RAMBytesRequestAverage > 0 {
  174. return a.RAMBytesUsageAverage / a.RAMBytesRequestAverage
  175. }
  176. return 1.0
  177. }
  178. func (a *Allocation) TotalEfficiency() float64 {
  179. if a.CPUCost+a.RAMCost > 0 {
  180. ramCostEff := a.RAMEfficiency() * a.RAMCost
  181. cpuCostEff := a.CPUEfficiency() * a.CPUCost
  182. return (ramCostEff + cpuCostEff) / (a.CPUCost + a.RAMCost)
  183. }
  184. return 0.0
  185. }
  186. // CPUCores converts the Allocation's CPUCoreHours into average CPUCores
  187. func (a *Allocation) CPUCores() float64 {
  188. if a.Minutes() <= 0.0 {
  189. return 0.0
  190. }
  191. return a.CPUCoreHours / (a.Minutes() / 60.0)
  192. }
  193. // RAMBytes converts the Allocation's RAMByteHours into average RAMBytes
  194. func (a *Allocation) RAMBytes() float64 {
  195. if a.Minutes() <= 0.0 {
  196. return 0.0
  197. }
  198. return a.RAMByteHours / (a.Minutes() / 60.0)
  199. }
  200. // PVBytes converts the Allocation's PVByteHours into average PVBytes
  201. func (a *Allocation) PVBytes() float64 {
  202. if a.Minutes() <= 0.0 {
  203. return 0.0
  204. }
  205. return a.PVByteHours / (a.Minutes() / 60.0)
  206. }
  207. // MarshalJSON implements json.Marshal interface
  208. func (a *Allocation) MarshalJSON() ([]byte, error) {
  209. buffer := bytes.NewBufferString("{")
  210. jsonEncodeString(buffer, "name", a.Name, ",")
  211. jsonEncode(buffer, "properties", a.Properties, ",")
  212. jsonEncode(buffer, "window", a.Window, ",")
  213. jsonEncodeString(buffer, "start", a.Start.Format(timeFmt), ",")
  214. jsonEncodeString(buffer, "end", a.End.Format(timeFmt), ",")
  215. jsonEncodeFloat64(buffer, "minutes", a.Minutes(), ",")
  216. jsonEncodeFloat64(buffer, "cpuCores", a.CPUCores(), ",")
  217. jsonEncodeFloat64(buffer, "cpuCoreRequestAverage", a.CPUCoreRequestAverage, ",")
  218. jsonEncodeFloat64(buffer, "cpuCoreUsageAverage", a.CPUCoreUsageAverage, ",")
  219. jsonEncodeFloat64(buffer, "cpuCoreHours", a.CPUCoreHours, ",")
  220. jsonEncodeFloat64(buffer, "cpuCost", a.CPUCost, ",")
  221. jsonEncodeFloat64(buffer, "cpuEfficiency", a.CPUEfficiency(), ",")
  222. jsonEncodeFloat64(buffer, "gpuHours", a.GPUHours, ",")
  223. jsonEncodeFloat64(buffer, "gpuCost", a.GPUCost, ",")
  224. jsonEncodeFloat64(buffer, "networkCost", a.NetworkCost, ",")
  225. jsonEncodeFloat64(buffer, "pvBytes", a.PVBytes(), ",")
  226. jsonEncodeFloat64(buffer, "pvByteHours", a.PVByteHours, ",")
  227. jsonEncodeFloat64(buffer, "pvCost", a.PVCost, ",")
  228. jsonEncodeFloat64(buffer, "ramBytes", a.RAMBytes(), ",")
  229. jsonEncodeFloat64(buffer, "ramByteRequestAverage", a.RAMBytesRequestAverage, ",")
  230. jsonEncodeFloat64(buffer, "ramByteUsageAverage", a.RAMBytesUsageAverage, ",")
  231. jsonEncodeFloat64(buffer, "ramByteHours", a.RAMByteHours, ",")
  232. jsonEncodeFloat64(buffer, "ramCost", a.RAMCost, ",")
  233. jsonEncodeFloat64(buffer, "ramEfficiency", a.RAMEfficiency(), ",")
  234. jsonEncodeFloat64(buffer, "sharedCost", a.SharedCost, ",")
  235. jsonEncodeFloat64(buffer, "totalCost", a.TotalCost, ",")
  236. jsonEncodeFloat64(buffer, "totalEfficiency", a.TotalEfficiency(), "")
  237. buffer.WriteString("}")
  238. return buffer.Bytes(), nil
  239. }
  240. // TODO niko/computeallocation
  241. // func (a *Allocation)UnmarshalJSON()
  242. // Resolution returns the duration of time covered by the Allocation
  243. func (a *Allocation) Resolution() time.Duration {
  244. return a.End.Sub(a.Start)
  245. }
  246. // IsAggregated is true if the given Allocation has been aggregated, which we
  247. // define by a lack of Properties.
  248. func (a *Allocation) IsAggregated() bool {
  249. return a == nil || a.Properties == nil
  250. }
  251. // IsExternal is true if the given Allocation represents external costs.
  252. func (a *Allocation) IsExternal() bool {
  253. return strings.Contains(a.Name, ExternalSuffix)
  254. }
  255. // IsIdle is true if the given Allocation represents idle costs.
  256. func (a *Allocation) IsIdle() bool {
  257. return strings.Contains(a.Name, IdleSuffix)
  258. }
  259. // IsUnallocated is true if the given Allocation represents unallocated costs.
  260. func (a *Allocation) IsUnallocated() bool {
  261. return strings.Contains(a.Name, UnallocatedSuffix)
  262. }
  263. // Minutes returns the number of minutes the Allocation represents, as defined
  264. // by the difference between the end and start times.
  265. func (a *Allocation) Minutes() float64 {
  266. return a.End.Sub(a.Start).Minutes()
  267. }
  268. // Share works like Add, but converts the entire cost of the given Allocation
  269. // to SharedCost, rather than adding to the individual resource costs.
  270. // TODO niko/computeallocation unit test changes!!!
  271. func (a *Allocation) Share(that *Allocation) (*Allocation, error) {
  272. if that == nil {
  273. return a.Clone(), nil
  274. }
  275. // Convert all costs of shared Allocation to SharedCost, zero out all
  276. // non-shared costs, then add.
  277. share := that.Clone()
  278. share.SharedCost += share.TotalCost
  279. share.CPUCost = 0
  280. share.CPUCoreHours = 0
  281. share.RAMCost = 0
  282. share.RAMByteHours = 0
  283. share.GPUCost = 0
  284. share.GPUHours = 0
  285. share.PVCost = 0
  286. share.PVByteHours = 0
  287. share.NetworkCost = 0
  288. share.ExternalCost = 0
  289. if a == nil {
  290. return share, nil
  291. }
  292. agg := a.Clone()
  293. agg.add(that)
  294. return agg, nil
  295. }
  296. // String represents the given Allocation as a string
  297. func (a *Allocation) String() string {
  298. return fmt.Sprintf("%s%s=%.2f", a.Name, NewWindow(&a.Start, &a.End), a.TotalCost)
  299. }
  300. func (a *Allocation) add(that *Allocation) {
  301. if a == nil {
  302. log.Warningf("Allocation.AggregateBy: trying to add a nil receiver")
  303. return
  304. }
  305. aCluster, _ := a.Properties.GetCluster()
  306. thatCluster, _ := that.Properties.GetCluster()
  307. aNode, _ := a.Properties.GetNode()
  308. thatNode, _ := that.Properties.GetNode()
  309. // reset properties
  310. a.Properties = nil
  311. // ensure that we carry cluster ID and/or node over if they're the same
  312. // required for idle/shared cost allocation
  313. if aCluster == thatCluster {
  314. a.Properties = Properties{ClusterProp: aCluster}
  315. }
  316. if aNode == thatNode {
  317. if a.Properties == nil {
  318. a.Properties = Properties{NodeProp: aNode}
  319. } else {
  320. a.Properties.SetNode(aNode)
  321. }
  322. }
  323. // Expand Window, Start, and End to be the "max" of each between the two
  324. // given Allocations.
  325. a.Window = a.Window.Expand(that.Window)
  326. if that.Start.Before(a.Start) {
  327. a.Start = that.Start
  328. }
  329. if that.End.After(a.End) {
  330. a.End = that.End
  331. }
  332. // Sum all cumulative resource fields
  333. a.CPUCoreHours += that.CPUCoreHours
  334. a.CPUCoreRequestAverage += that.CPUCoreRequestAverage
  335. a.CPUCoreUsageAverage += that.CPUCoreUsageAverage
  336. a.GPUHours += that.GPUHours
  337. a.RAMByteHours += that.RAMByteHours
  338. a.RAMBytesRequestAverage += that.RAMBytesRequestAverage
  339. a.RAMBytesUsageAverage += that.RAMBytesUsageAverage
  340. a.PVByteHours += that.PVByteHours
  341. // Sum all cumulative cost fields
  342. a.CPUCost += that.CPUCost
  343. a.GPUCost += that.GPUCost
  344. a.RAMCost += that.RAMCost
  345. a.PVCost += that.PVCost
  346. a.NetworkCost += that.NetworkCost
  347. a.SharedCost += that.SharedCost
  348. a.ExternalCost += that.ExternalCost
  349. a.TotalCost += that.TotalCost
  350. }
  351. // AllocationSet stores a set of Allocations, each with a unique name, that share
  352. // a window. An AllocationSet is mutable, so treat it like a threadsafe map.
  353. type AllocationSet struct {
  354. sync.RWMutex
  355. allocations map[string]*Allocation
  356. externalKeys map[string]bool
  357. idleKeys map[string]bool
  358. Window Window
  359. Warnings []string
  360. Errors []string
  361. }
  362. // NewAllocationSet instantiates a new AllocationSet and, optionally, inserts
  363. // the given list of Allocations
  364. func NewAllocationSet(start, end time.Time, allocs ...*Allocation) *AllocationSet {
  365. as := &AllocationSet{
  366. allocations: map[string]*Allocation{},
  367. externalKeys: map[string]bool{},
  368. idleKeys: map[string]bool{},
  369. Window: NewWindow(&start, &end),
  370. }
  371. for _, a := range allocs {
  372. as.Insert(a)
  373. }
  374. return as
  375. }
  376. // AllocationAggregationOptions provide advanced functionality to AggregateBy, including
  377. // filtering results and sharing allocations. FilterFuncs are a list of match
  378. // functions such that, if any function fails, the allocation is ignored.
  379. // ShareFuncs are a list of match functions such that, if any function
  380. // succeeds, the allocation is marked as a shared resource. ShareIdle is a
  381. // simple flag for sharing idle resources.
  382. type AllocationAggregationOptions struct {
  383. FilterFuncs []AllocationMatchFunc
  384. SplitIdle bool
  385. MergeUnallocated bool
  386. ShareFuncs []AllocationMatchFunc
  387. ShareIdle string
  388. ShareSplit string
  389. SharedHourlyCosts map[string]float64
  390. }
  391. // AggregateBy aggregates the Allocations in the given AllocationSet by the given
  392. // Property. This will only be legal if the AllocationSet is divisible by the
  393. // given Property; e.g. Containers can be divided by Namespace, but not vice-a-versa.
  394. func (as *AllocationSet) AggregateBy(properties Properties, options *AllocationAggregationOptions) error {
  395. // The order of operations for aggregating allocations is as follows:
  396. // 1. Partition external, idle, and shared allocations into separate sets
  397. // 2. Compute idle coefficients (if necessary)
  398. // a) if idle allocation is to be shared, compute idle coefficients
  399. // (do not compute shared coefficients here, see step 5)
  400. // b) if idle allocation is NOT shared, but filters are present, compute
  401. // idle filtration coefficients for the purpose of only returning the
  402. // portion of idle allocation that would have been shared with the
  403. // unfiltered results set. (See unit tests 5.a,b,c)
  404. // 3. Ignore allocation if it fails any of the FilterFuncs
  405. // 4. Distribute idle allocations among remaining non-idle, non-external
  406. // allocations
  407. // 5. Generate aggregation key and insert allocation into the output set
  408. // 6. Scale un-aggregated idle coefficients by filtration coefficient
  409. // 7. If there are shared allocations, compute sharing coefficients on
  410. // the aggregated set, then share allocation accordingly
  411. // 8. If there are external allocations that can be aggregated into
  412. // the output (i.e. they can be used to generate a valid key for
  413. // the given properties) then aggregate; otherwise... ignore them?
  414. // 9. If the merge idle option is enabled, merge any remaining idle
  415. // allocations into a single idle allocation
  416. // TODO niko/etl revisit (ShareIdle: ShareEven) case, which is probably wrong
  417. // (and, frankly, ill-defined; i.e. evenly across clusters? within clusters?)
  418. if options == nil {
  419. options = &AllocationAggregationOptions{}
  420. }
  421. if as.IsEmpty() {
  422. return nil
  423. }
  424. // aggSet will collect the aggregated allocations
  425. aggSet := &AllocationSet{
  426. Window: as.Window.Clone(),
  427. }
  428. // externalSet will collect external allocations
  429. externalSet := &AllocationSet{
  430. Window: as.Window.Clone(),
  431. }
  432. // idleSet will be shared among aggSet after initial aggregation
  433. // is complete
  434. idleSet := &AllocationSet{
  435. Window: as.Window.Clone(),
  436. }
  437. // shareSet will be shared among aggSet after initial aggregation
  438. // is complete
  439. shareSet := &AllocationSet{
  440. Window: as.Window.Clone(),
  441. }
  442. // Convert SharedHourlyCosts to Allocations in the shareSet
  443. for name, cost := range options.SharedHourlyCosts {
  444. if cost > 0.0 {
  445. hours := as.Resolution().Hours()
  446. // If set ends in the future, adjust hours accordingly
  447. diff := time.Now().Sub(as.End())
  448. if diff < 0.0 {
  449. hours += diff.Hours()
  450. }
  451. totalSharedCost := cost * hours
  452. shareSet.Insert(&Allocation{
  453. Name: fmt.Sprintf("%s/%s", name, SharedSuffix),
  454. Start: as.Start(),
  455. End: as.End(),
  456. SharedCost: totalSharedCost,
  457. TotalCost: totalSharedCost,
  458. })
  459. }
  460. }
  461. as.Lock()
  462. defer as.Unlock()
  463. // (1) Loop and find all of the external, idle, and shared allocations. Add
  464. // them to their respective sets, removing them from the set of allocations
  465. // to aggregate.
  466. for _, alloc := range as.allocations {
  467. // External allocations get aggregated post-hoc (see step 6) and do
  468. // not necessarily contain complete sets of properties, so they are
  469. // moved to a separate AllocationSet.
  470. if alloc.IsExternal() {
  471. delete(as.externalKeys, alloc.Name)
  472. delete(as.allocations, alloc.Name)
  473. externalSet.Insert(alloc)
  474. continue
  475. }
  476. cluster, err := alloc.Properties.GetCluster()
  477. if err != nil {
  478. log.Warningf("AllocationSet.AggregateBy: missing cluster for allocation: %s", alloc.Name)
  479. return err
  480. }
  481. // Idle allocations should be separated into idleSet if they are to be
  482. // shared later on. If they are not to be shared, then aggregate them.
  483. if alloc.IsIdle() {
  484. delete(as.idleKeys, alloc.Name)
  485. delete(as.allocations, alloc.Name)
  486. if options.ShareIdle == ShareEven || options.ShareIdle == ShareWeighted {
  487. idleSet.Insert(alloc)
  488. } else {
  489. aggSet.Insert(alloc)
  490. }
  491. continue
  492. }
  493. // Shared allocations must be identified and separated prior to
  494. // aggregation and filtering. That is, if any of the ShareFuncs
  495. // return true, then move the allocation to shareSet.
  496. for _, sf := range options.ShareFuncs {
  497. if sf(alloc) {
  498. delete(as.idleKeys, alloc.Name)
  499. delete(as.allocations, alloc.Name)
  500. alloc.Name = fmt.Sprintf("%s/%s", cluster, SharedSuffix)
  501. shareSet.Insert(alloc)
  502. break
  503. }
  504. }
  505. }
  506. // It's possible that no more un-shared, non-idle, non-external allocations
  507. // remain at this point. This always results in an emptySet.
  508. if len(as.allocations) == 0 {
  509. log.Warningf("ETL: AggregateBy: no allocations to aggregate")
  510. emptySet := &AllocationSet{
  511. Window: as.Window.Clone(),
  512. }
  513. as.allocations = emptySet.allocations
  514. return nil
  515. }
  516. // (2) In order to correctly apply idle and shared resource coefficients
  517. // appropriately, we need to determine the coefficients for the full set
  518. // of data. The ensures that the ratios are maintained through filtering.
  519. // idleCoefficients are organized by [cluster][allocation][resource]=coeff
  520. var idleCoefficients map[string]map[string]map[string]float64
  521. // shareCoefficients are organized by [allocation][resource]=coeff (no cluster)
  522. var shareCoefficients map[string]float64
  523. var err error
  524. // (2a) If there are idle costs and we intend to share them, compute the
  525. // coefficients for sharing the cost among the non-idle, non-aggregated
  526. // allocations.
  527. if idleSet.Length() > 0 && options.ShareIdle != ShareNone {
  528. idleCoefficients, err = computeIdleCoeffs(properties, options, as)
  529. if err != nil {
  530. log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
  531. return fmt.Errorf("error computing idle coefficients: %s", err)
  532. }
  533. }
  534. // (2b) If we're not sharing idle and we're filtering, we need to track the
  535. // amount of each idle allocation to "delete" in order to maintain parity
  536. // with the idle-allocated results. That is, we want to return only the
  537. // idle cost that would have been shared with the unfiltered portion of
  538. // the results, not the full idle cost.
  539. var idleFiltrationCoefficients map[string]map[string]map[string]float64
  540. if len(options.FilterFuncs) > 0 && options.ShareIdle == ShareNone {
  541. idleFiltrationCoefficients, err = computeIdleCoeffs(properties, options, as)
  542. if err != nil {
  543. log.Warningf("AllocationSet.AggregateBy: compute idle coeff: %s", err)
  544. return fmt.Errorf("error computing idle filtration coefficients: %s", err)
  545. }
  546. }
  547. // (3-5) Filter, distribute idle cost, and aggregate (in that order)
  548. for _, alloc := range as.allocations {
  549. cluster, err := alloc.Properties.GetCluster()
  550. if err != nil {
  551. log.Warningf("AllocationSet.AggregateBy: missing cluster for allocation: %s", alloc.Name)
  552. return err
  553. }
  554. skip := false
  555. // (3) If any of the filter funcs fail, immediately skip the allocation.
  556. for _, ff := range options.FilterFuncs {
  557. if !ff(alloc) {
  558. skip = true
  559. break
  560. }
  561. }
  562. if skip {
  563. // If we are tracking idle filtration coefficients, delete the
  564. // entry corresponding to the filtered allocation. (Deleting the
  565. // entry will result in that proportional amount being removed
  566. // from the idle allocation at the end of the process.)
  567. if idleFiltrationCoefficients != nil {
  568. if ifcc, ok := idleFiltrationCoefficients[cluster]; ok {
  569. delete(ifcc, alloc.Name)
  570. }
  571. }
  572. continue
  573. }
  574. // (4) Split idle allocations and distribute among remaining
  575. // un-aggregated allocations.
  576. // NOTE: if idle allocation is off (i.e. ShareIdle == ShareNone) then
  577. // all idle allocations will be in the aggSet at this point, so idleSet
  578. // will be empty and we won't enter this block.
  579. if idleSet.Length() > 0 {
  580. // Distribute idle allocations by coefficient per-cluster, per-allocation
  581. for _, idleAlloc := range idleSet.allocations {
  582. // Only share idle if the cluster matches; i.e. the allocation
  583. // is from the same cluster as the idle costs
  584. idleCluster, err := idleAlloc.Properties.GetCluster()
  585. if err != nil {
  586. return err
  587. }
  588. if idleCluster != cluster {
  589. continue
  590. }
  591. // Make sure idle coefficients exist
  592. if _, ok := idleCoefficients[cluster]; !ok {
  593. log.Errorf("ETL: share (idle) allocation: error getting allocation coefficient [no cluster: '%s' in coefficients] for '%s'", cluster, alloc.Name)
  594. continue
  595. }
  596. if _, ok := idleCoefficients[cluster][alloc.Name]; !ok {
  597. log.Errorf("ETL: share (idle) allocation: error getting allocation coefficienct for '%s'", alloc.Name)
  598. continue
  599. }
  600. alloc.CPUCoreHours += idleAlloc.CPUCoreHours * idleCoefficients[cluster][alloc.Name]["cpu"]
  601. alloc.GPUHours += idleAlloc.GPUHours * idleCoefficients[cluster][alloc.Name]["gpu"]
  602. alloc.RAMByteHours += idleAlloc.RAMByteHours * idleCoefficients[cluster][alloc.Name]["ram"]
  603. idleCPUCost := idleAlloc.CPUCost * idleCoefficients[cluster][alloc.Name]["cpu"]
  604. idleGPUCost := idleAlloc.GPUCost * idleCoefficients[cluster][alloc.Name]["gpu"]
  605. idleRAMCost := idleAlloc.RAMCost * idleCoefficients[cluster][alloc.Name]["ram"]
  606. alloc.CPUCost += idleCPUCost
  607. alloc.GPUCost += idleGPUCost
  608. alloc.RAMCost += idleRAMCost
  609. alloc.TotalCost += idleCPUCost + idleGPUCost + idleRAMCost
  610. }
  611. }
  612. // (5) generate key to use for aggregation-by-key and allocation name
  613. key, err := alloc.generateKey(properties)
  614. if err != nil {
  615. return err
  616. }
  617. alloc.Name = key
  618. if options.MergeUnallocated && alloc.IsUnallocated() {
  619. alloc.Name = UnallocatedSuffix
  620. }
  621. // Inserting the allocation with the generated key for a name will
  622. // perform the actual basic aggregation step.
  623. aggSet.Insert(alloc)
  624. }
  625. // clusterIdleFiltrationCoeffs is used to track per-resource idle
  626. // coefficients on a cluster-by-cluster basis. It is, essentailly, an
  627. // aggregation of idleFiltrationCoefficients after they have been
  628. // filtered above (in step 3)
  629. var clusterIdleFiltrationCoeffs map[string]map[string]float64
  630. if idleFiltrationCoefficients != nil {
  631. clusterIdleFiltrationCoeffs = map[string]map[string]float64{}
  632. for cluster, m := range idleFiltrationCoefficients {
  633. if _, ok := clusterIdleFiltrationCoeffs[cluster]; !ok {
  634. clusterIdleFiltrationCoeffs[cluster] = map[string]float64{
  635. "cpu": 0.0,
  636. "gpu": 0.0,
  637. "ram": 0.0,
  638. }
  639. }
  640. for _, n := range m {
  641. for resource, val := range n {
  642. clusterIdleFiltrationCoeffs[cluster][resource] += val
  643. }
  644. }
  645. }
  646. }
  647. // (6) If we have both un-shared idle allocations and idle filtration
  648. // coefficients (i.e. we have computed coefficients for scaling idle
  649. // allocation costs by cluster) then use those coefficients to scale down
  650. // each idle allocation.
  651. if len(aggSet.idleKeys) > 0 && clusterIdleFiltrationCoeffs != nil {
  652. for idleKey := range aggSet.idleKeys {
  653. idleAlloc := aggSet.Get(idleKey)
  654. cluster, err := idleAlloc.Properties.GetCluster()
  655. if err != nil {
  656. log.Warningf("AggregateBy: idle allocation without cluster: %s", idleAlloc)
  657. }
  658. if resourceCoeffs, ok := clusterIdleFiltrationCoeffs[cluster]; ok {
  659. idleAlloc.CPUCost *= resourceCoeffs["cpu"]
  660. idleAlloc.CPUCoreHours *= resourceCoeffs["cpu"]
  661. idleAlloc.RAMCost *= resourceCoeffs["ram"]
  662. idleAlloc.RAMByteHours *= resourceCoeffs["ram"]
  663. idleAlloc.TotalCost = idleAlloc.CPUCost + idleAlloc.RAMCost
  664. }
  665. }
  666. }
  667. // (7) Split shared allocations and distribute among aggregated allocations
  668. if shareSet.Length() > 0 {
  669. shareCoefficients, err = computeShareCoeffs(properties, options, aggSet)
  670. if err != nil {
  671. log.Warningf("AllocationSet.AggregateBy: compute shared coeff: missing cluster ID: %s", err)
  672. return err
  673. }
  674. for _, alloc := range aggSet.allocations {
  675. if alloc.IsIdle() {
  676. // Skip idle allocations (they do not receive shared allocation)
  677. continue
  678. }
  679. // Distribute shared allocations by coefficient per-allocation
  680. // NOTE: share coefficients do not partition by cluster, like
  681. // idle coefficients do.
  682. for _, sharedAlloc := range shareSet.allocations {
  683. if _, ok := shareCoefficients[alloc.Name]; !ok {
  684. log.Errorf("ETL: share allocation: error getting allocation coefficienct for '%s'", alloc.Name)
  685. continue
  686. }
  687. alloc.SharedCost += sharedAlloc.TotalCost * shareCoefficients[alloc.Name]
  688. alloc.TotalCost += sharedAlloc.TotalCost * shareCoefficients[alloc.Name]
  689. }
  690. }
  691. }
  692. // (8) Aggregate external allocations into aggregated allocations. This may
  693. // not be possible for every external allocation, but attempt to find an
  694. // exact key match, given each external allocation's proerties, and
  695. // aggregate if an exact match is found.
  696. for _, alloc := range externalSet.allocations {
  697. key, err := alloc.generateKey(properties)
  698. if err != nil {
  699. continue
  700. }
  701. alloc.Name = key
  702. aggSet.Insert(alloc)
  703. }
  704. // (9) Combine all idle allocations into a single "__idle__" allocation
  705. if !options.SplitIdle {
  706. for _, idleAlloc := range aggSet.IdleAllocations() {
  707. aggSet.Delete(idleAlloc.Name)
  708. idleAlloc.Name = IdleSuffix
  709. aggSet.Insert(idleAlloc)
  710. }
  711. }
  712. as.allocations = aggSet.allocations
  713. return nil
  714. }
  715. // TODO niko/etl deprecate the use of a map of resources here, we only use totals
  716. func computeShareCoeffs(properties Properties, options *AllocationAggregationOptions, as *AllocationSet) (map[string]float64, error) {
  717. // Compute coeffs by totalling per-allocation, then dividing by the total.
  718. coeffs := map[string]float64{}
  719. // Compute totals for all allocations
  720. total := 0.0
  721. // ShareEven counts each aggregation with even weight, whereas ShareWeighted
  722. // counts each aggregation proportionally to its respective costs
  723. shareType := options.ShareSplit
  724. // Record allocation values first, then normalize by totals to get percentages
  725. for name, alloc := range as.allocations {
  726. if alloc.IsIdle() {
  727. // Skip idle allocations in coefficient calculation
  728. continue
  729. }
  730. if shareType == ShareEven {
  731. // Not additive - set to 1.0 for even distribution
  732. coeffs[name] = 1.0
  733. // Total is always additive
  734. total += 1.0
  735. } else {
  736. // Both are additive for weighted distribution
  737. coeffs[name] += alloc.TotalCost
  738. total += alloc.TotalCost
  739. }
  740. }
  741. // Normalize coefficients by totals
  742. for a := range coeffs {
  743. if coeffs[a] > 0 && total > 0 {
  744. coeffs[a] /= total
  745. } else {
  746. log.Warningf("ETL: invalid values for shared coefficients: %d, %d", coeffs[a], total)
  747. coeffs[a] = 0.0
  748. }
  749. }
  750. return coeffs, nil
  751. }
  752. func computeIdleCoeffs(properties Properties, options *AllocationAggregationOptions, as *AllocationSet) (map[string]map[string]map[string]float64, error) {
  753. types := []string{"cpu", "gpu", "ram"}
  754. // Compute idle coefficients, then save them in AllocationAggregationOptions
  755. coeffs := map[string]map[string]map[string]float64{}
  756. // Compute totals per resource for CPU, GPU, RAM, and PV
  757. totals := map[string]map[string]float64{}
  758. // ShareEven counts each allocation with even weight, whereas ShareWeighted
  759. // counts each allocation proportionally to its respective costs
  760. shareType := options.ShareIdle
  761. // Record allocation values first, then normalize by totals to get percentages
  762. for _, alloc := range as.allocations {
  763. if alloc.IsIdle() {
  764. // Skip idle allocations in coefficient calculation
  765. continue
  766. }
  767. // If any of the share funcs succeed, share the allocation. Do this
  768. // prior to filtering so that shared namespaces, etc do not get
  769. // filtered out before we have a chance to share them.
  770. skip := false
  771. for _, sf := range options.ShareFuncs {
  772. if sf(alloc) {
  773. skip = true
  774. break
  775. }
  776. }
  777. if skip {
  778. continue
  779. }
  780. // We need to key the allocations by cluster id
  781. clusterID, err := alloc.Properties.GetCluster()
  782. if err != nil {
  783. return nil, err
  784. }
  785. // get the name key for the allocation
  786. name := alloc.Name
  787. // Create cluster based tables if they don't exist
  788. if _, ok := coeffs[clusterID]; !ok {
  789. coeffs[clusterID] = map[string]map[string]float64{}
  790. }
  791. if _, ok := totals[clusterID]; !ok {
  792. totals[clusterID] = map[string]float64{}
  793. }
  794. if _, ok := coeffs[clusterID][name]; !ok {
  795. coeffs[clusterID][name] = map[string]float64{}
  796. }
  797. if shareType == ShareEven {
  798. for _, r := range types {
  799. // Not additive - hard set to 1.0
  800. coeffs[clusterID][name][r] = 1.0
  801. // totals are additive
  802. totals[clusterID][r] += 1.0
  803. }
  804. } else {
  805. coeffs[clusterID][name]["cpu"] += alloc.CPUCost
  806. coeffs[clusterID][name]["gpu"] += alloc.GPUCost
  807. coeffs[clusterID][name]["ram"] += alloc.RAMCost
  808. totals[clusterID]["cpu"] += alloc.CPUCost
  809. totals[clusterID]["gpu"] += alloc.GPUCost
  810. totals[clusterID]["ram"] += alloc.RAMCost
  811. }
  812. }
  813. // Normalize coefficients by totals
  814. for c := range coeffs {
  815. for a := range coeffs[c] {
  816. for _, r := range types {
  817. if coeffs[c][a][r] > 0 && totals[c][r] > 0 {
  818. coeffs[c][a][r] /= totals[c][r]
  819. }
  820. }
  821. }
  822. }
  823. return coeffs, nil
  824. }
  825. func (alloc *Allocation) generateKey(properties Properties) (string, error) {
  826. // Names will ultimately be joined into a single name, which uniquely
  827. // identifies allocations.
  828. names := []string{}
  829. if properties.HasCluster() {
  830. cluster, err := alloc.Properties.GetCluster()
  831. if err != nil {
  832. return "", err
  833. }
  834. names = append(names, cluster)
  835. }
  836. if properties.HasNode() {
  837. node, err := alloc.Properties.GetNode()
  838. if err != nil {
  839. return "", err
  840. }
  841. names = append(names, node)
  842. }
  843. if properties.HasNamespace() {
  844. namespace, err := alloc.Properties.GetNamespace()
  845. if err != nil {
  846. return "", err
  847. }
  848. names = append(names, namespace)
  849. }
  850. if properties.HasControllerKind() {
  851. controllerKind, err := alloc.Properties.GetControllerKind()
  852. if err != nil {
  853. // Indicate that allocation has no controller
  854. controllerKind = UnallocatedSuffix
  855. }
  856. if prop, _ := properties.GetControllerKind(); prop != "" && prop != controllerKind {
  857. // The allocation does not have the specified controller kind
  858. controllerKind = UnallocatedSuffix
  859. }
  860. names = append(names, controllerKind)
  861. }
  862. if properties.HasController() {
  863. if !properties.HasControllerKind() {
  864. controllerKind, err := alloc.Properties.GetControllerKind()
  865. if err == nil {
  866. names = append(names, controllerKind)
  867. }
  868. }
  869. controller, err := alloc.Properties.GetController()
  870. if err != nil {
  871. // Indicate that allocation has no controller
  872. controller = UnallocatedSuffix
  873. }
  874. names = append(names, controller)
  875. }
  876. if properties.HasPod() {
  877. pod, err := alloc.Properties.GetPod()
  878. if err != nil {
  879. return "", err
  880. }
  881. names = append(names, pod)
  882. }
  883. if properties.HasContainer() {
  884. container, err := alloc.Properties.GetContainer()
  885. if err != nil {
  886. return "", err
  887. }
  888. names = append(names, container)
  889. }
  890. if properties.HasService() {
  891. services, err := alloc.Properties.GetServices()
  892. if err != nil {
  893. // Indicate that allocation has no services
  894. names = append(names, UnallocatedSuffix)
  895. } else {
  896. // TODO niko/etl support multi-service aggregation
  897. if len(services) > 0 {
  898. for _, service := range services {
  899. names = append(names, service)
  900. break
  901. }
  902. } else {
  903. // Indicate that allocation has no services
  904. names = append(names, UnallocatedSuffix)
  905. }
  906. }
  907. }
  908. if properties.HasAnnotations() {
  909. annotations, err := alloc.Properties.GetAnnotations() // annotations that the individual allocation possesses
  910. if err != nil {
  911. // Indicate that allocation has no annotations
  912. names = append(names, UnallocatedSuffix)
  913. } else {
  914. annotationNames := []string{}
  915. aggAnnotations, err := properties.GetAnnotations() // potential annotations to aggregate on supplied by the API caller
  916. if err != nil {
  917. // We've already checked HasAnnotation, so this should never occur
  918. return "", err
  919. }
  920. // calvin - support multi-annotation aggregation
  921. for annotationName := range aggAnnotations {
  922. if val, ok := annotations[annotationName]; ok {
  923. annotationNames = append(annotationNames, fmt.Sprintf("%s=%s", annotationName, val))
  924. } else if indexOf(UnallocatedSuffix, annotationNames) == -1 { // if UnallocatedSuffix not already in names
  925. annotationNames = append(annotationNames, UnallocatedSuffix)
  926. }
  927. }
  928. // resolve arbitrary ordering. e.g., app=app0/env=env0 is the same agg as env=env0/app=app0
  929. if len(annotationNames) > 1 {
  930. sort.Strings(annotationNames)
  931. }
  932. unallocatedSuffixIndex := indexOf(UnallocatedSuffix, annotationNames)
  933. // suffix should be at index 0 if it exists b/c of underscores
  934. if unallocatedSuffixIndex != -1 {
  935. annotationNames = append(annotationNames[:unallocatedSuffixIndex], annotationNames[unallocatedSuffixIndex+1:]...)
  936. annotationNames = append(annotationNames, UnallocatedSuffix) // append to end
  937. }
  938. names = append(names, annotationNames...)
  939. }
  940. }
  941. if properties.HasLabel() {
  942. labels, err := alloc.Properties.GetLabels() // labels that the individual allocation possesses
  943. if err != nil {
  944. // Indicate that allocation has no labels
  945. names = append(names, UnallocatedSuffix)
  946. } else {
  947. labelNames := []string{}
  948. aggLabels, err := properties.GetLabels() // potential labels to aggregate on supplied by the API caller
  949. if err != nil {
  950. // We've already checked HasLabel, so this should never occur
  951. return "", err
  952. }
  953. // calvin - support multi-label aggregation
  954. for labelName := range aggLabels {
  955. if val, ok := labels[labelName]; ok {
  956. labelNames = append(labelNames, fmt.Sprintf("%s=%s", labelName, val))
  957. } else if indexOf(UnallocatedSuffix, labelNames) == -1 { // if UnallocatedSuffix not already in names
  958. labelNames = append(labelNames, UnallocatedSuffix)
  959. }
  960. }
  961. // resolve arbitrary ordering. e.g., app=app0/env=env0 is the same agg as env=env0/app=app0
  962. if len(labelNames) > 1 {
  963. sort.Strings(labelNames)
  964. }
  965. unallocatedSuffixIndex := indexOf(UnallocatedSuffix, labelNames)
  966. // suffix should be at index 0 if it exists b/c of underscores
  967. if unallocatedSuffixIndex != -1 {
  968. labelNames = append(labelNames[:unallocatedSuffixIndex], labelNames[unallocatedSuffixIndex+1:]...)
  969. labelNames = append(labelNames, UnallocatedSuffix) // append to end
  970. }
  971. names = append(names, labelNames...)
  972. }
  973. }
  974. return strings.Join(names, "/"), nil
  975. }
  976. // TODO clean up
  977. // Helper function to check for slice membership. Not sure if repeated elsewhere in our codebase.
  978. func indexOf(v string, arr []string) int {
  979. for i, s := range arr {
  980. // This is caseless equivalence
  981. if strings.EqualFold(v, s) {
  982. return i
  983. }
  984. }
  985. return -1
  986. }
  987. // Clone returns a new AllocationSet with a deep copy of the given
  988. // AllocationSet's allocations.
  989. func (as *AllocationSet) Clone() *AllocationSet {
  990. if as == nil {
  991. return nil
  992. }
  993. as.RLock()
  994. defer as.RUnlock()
  995. allocs := map[string]*Allocation{}
  996. for k, v := range as.allocations {
  997. allocs[k] = v.Clone()
  998. }
  999. externalKeys := map[string]bool{}
  1000. for k, v := range as.externalKeys {
  1001. externalKeys[k] = v
  1002. }
  1003. idleKeys := map[string]bool{}
  1004. for k, v := range as.idleKeys {
  1005. idleKeys[k] = v
  1006. }
  1007. return &AllocationSet{
  1008. allocations: allocs,
  1009. externalKeys: externalKeys,
  1010. idleKeys: idleKeys,
  1011. Window: as.Window.Clone(),
  1012. }
  1013. }
  1014. // ComputeIdleAllocations computes the idle allocations for the AllocationSet,
  1015. // given a set of Assets. Ideally, assetSet should contain only Nodes, but if
  1016. // it contains other Assets, they will be ignored; only CPU, GPU and RAM are
  1017. // considered for idle allocation. If the Nodes have adjustments, then apply
  1018. // the adjustments proportionally to each of the resources so that total
  1019. // allocation with idle reflects the adjusted node costs. One idle allocation
  1020. // per-cluster will be computed and returned, keyed by cluster_id.
  1021. func (as *AllocationSet) ComputeIdleAllocations(assetSet *AssetSet) (map[string]*Allocation, error) {
  1022. if as == nil {
  1023. return nil, fmt.Errorf("cannot compute idle allocation for nil AllocationSet")
  1024. }
  1025. if assetSet == nil {
  1026. return nil, fmt.Errorf("cannot compute idle allocation with nil AssetSet")
  1027. }
  1028. if !as.Window.Equal(assetSet.Window) {
  1029. return nil, fmt.Errorf("cannot compute idle allocation for sets with mismatched windows: %s != %s", as.Window, assetSet.Window)
  1030. }
  1031. window := as.Window
  1032. // Build a map of cumulative cluster asset costs, per resource; i.e.
  1033. // cluster-to-{cpu|gpu|ram}-to-cost.
  1034. assetClusterResourceCosts := map[string]map[string]float64{}
  1035. assetSet.Each(func(key string, a Asset) {
  1036. if node, ok := a.(*Node); ok {
  1037. if _, ok := assetClusterResourceCosts[node.Properties().Cluster]; !ok {
  1038. assetClusterResourceCosts[node.Properties().Cluster] = map[string]float64{}
  1039. }
  1040. // adjustmentRate is used to scale resource costs proportionally
  1041. // by the adjustment. This is necessary because we only get one
  1042. // adjustment per Node, not one per-resource-per-Node.
  1043. //
  1044. // e.g. total cost = $90, adjustment = -$10 => 0.9
  1045. // e.g. total cost = $150, adjustment = -$300 => 0.3333
  1046. // e.g. total cost = $150, adjustment = $50 => 1.5
  1047. adjustmentRate := 1.0
  1048. if node.TotalCost()-node.Adjustment() == 0 {
  1049. // If (totalCost - adjustment) is 0.0 then adjustment cancels
  1050. // the entire node cost and we should make everything 0
  1051. // without dividing by 0.
  1052. adjustmentRate = 0.0
  1053. } else if node.Adjustment() != 0.0 {
  1054. // adjustmentRate is the ratio of cost-with-adjustment (i.e. TotalCost)
  1055. // to cost-without-adjustment (i.e. TotalCost - Adjustment).
  1056. adjustmentRate = node.TotalCost() / (node.TotalCost() - node.Adjustment())
  1057. }
  1058. cpuCost := node.CPUCost * (1.0 - node.Discount) * adjustmentRate
  1059. gpuCost := node.GPUCost * (1.0 - node.Discount) * adjustmentRate
  1060. ramCost := node.RAMCost * (1.0 - node.Discount) * adjustmentRate
  1061. assetClusterResourceCosts[node.Properties().Cluster]["cpu"] += cpuCost
  1062. assetClusterResourceCosts[node.Properties().Cluster]["gpu"] += gpuCost
  1063. assetClusterResourceCosts[node.Properties().Cluster]["ram"] += ramCost
  1064. }
  1065. })
  1066. // Determine start, end on a per-cluster basis
  1067. clusterStarts := map[string]time.Time{}
  1068. clusterEnds := map[string]time.Time{}
  1069. // Subtract allocated costs from asset costs, leaving only the remaining
  1070. // idle costs.
  1071. as.Each(func(name string, a *Allocation) {
  1072. cluster, err := a.Properties.GetCluster()
  1073. if err != nil {
  1074. // Failed to find allocation's cluster
  1075. return
  1076. }
  1077. if _, ok := assetClusterResourceCosts[cluster]; !ok {
  1078. // Failed to find assets for allocation's cluster
  1079. return
  1080. }
  1081. // Set cluster (start, end) if they are either not currently set,
  1082. // or if the detected (start, end) of the current allocation falls
  1083. // before or after, respectively, the current values.
  1084. if s, ok := clusterStarts[cluster]; !ok || a.Start.Before(s) {
  1085. clusterStarts[cluster] = a.Start
  1086. }
  1087. if e, ok := clusterEnds[cluster]; !ok || a.End.After(e) {
  1088. clusterEnds[cluster] = a.End
  1089. }
  1090. assetClusterResourceCosts[cluster]["cpu"] -= a.CPUCost
  1091. assetClusterResourceCosts[cluster]["gpu"] -= a.GPUCost
  1092. assetClusterResourceCosts[cluster]["ram"] -= a.RAMCost
  1093. })
  1094. // Turn remaining un-allocated asset costs into idle allocations
  1095. idleAllocs := map[string]*Allocation{}
  1096. for cluster, resources := range assetClusterResourceCosts {
  1097. // Default start and end to the (start, end) of the given window, but
  1098. // use the actual, detected (start, end) pair if they are available.
  1099. start := *window.Start()
  1100. if s, ok := clusterStarts[cluster]; ok && window.Contains(s) {
  1101. start = s
  1102. }
  1103. end := *window.End()
  1104. if e, ok := clusterEnds[cluster]; ok && window.Contains(e) {
  1105. end = e
  1106. }
  1107. idleAlloc := &Allocation{
  1108. Name: fmt.Sprintf("%s/%s", cluster, IdleSuffix),
  1109. Window: window.Clone(),
  1110. Properties: Properties{ClusterProp: cluster},
  1111. Start: start,
  1112. End: end,
  1113. CPUCost: resources["cpu"],
  1114. GPUCost: resources["gpu"],
  1115. RAMCost: resources["ram"],
  1116. }
  1117. idleAlloc.TotalCost = idleAlloc.CPUCost + idleAlloc.GPUCost + idleAlloc.RAMCost
  1118. // Do not continue if multiple idle allocations are computed for a
  1119. // single cluster.
  1120. if _, ok := idleAllocs[cluster]; ok {
  1121. return nil, fmt.Errorf("duplicate idle allocations for cluster %s", cluster)
  1122. }
  1123. idleAllocs[cluster] = idleAlloc
  1124. }
  1125. return idleAllocs, nil
  1126. }
  1127. // Delete removes the allocation with the given name from the set
  1128. func (as *AllocationSet) Delete(name string) {
  1129. if as == nil {
  1130. return
  1131. }
  1132. as.Lock()
  1133. defer as.Unlock()
  1134. delete(as.externalKeys, name)
  1135. delete(as.idleKeys, name)
  1136. delete(as.allocations, name)
  1137. }
  1138. // Each invokes the given function for each Allocation in the set
  1139. func (as *AllocationSet) Each(f func(string, *Allocation)) {
  1140. if as == nil {
  1141. return
  1142. }
  1143. for k, a := range as.allocations {
  1144. f(k, a)
  1145. }
  1146. }
  1147. // End returns the End time of the AllocationSet window
  1148. func (as *AllocationSet) End() time.Time {
  1149. if as == nil {
  1150. log.Warningf("Allocation ETL: calling End on nil AllocationSet")
  1151. return time.Unix(0, 0)
  1152. }
  1153. if as.Window.End() == nil {
  1154. log.Warningf("Allocation ETL: AllocationSet with illegal window: End is nil; len(as.allocations)=%d", len(as.allocations))
  1155. return time.Unix(0, 0)
  1156. }
  1157. return *as.Window.End()
  1158. }
  1159. // Get returns the Allocation at the given key in the AllocationSet
  1160. func (as *AllocationSet) Get(key string) *Allocation {
  1161. as.RLock()
  1162. defer as.RUnlock()
  1163. if alloc, ok := as.allocations[key]; ok {
  1164. return alloc
  1165. }
  1166. return nil
  1167. }
  1168. // ExternalAllocations returns a map of the external allocations in the set.
  1169. // Returns clones of the actual Allocations, so mutability is not a problem.
  1170. func (as *AllocationSet) ExternalAllocations() map[string]*Allocation {
  1171. externals := map[string]*Allocation{}
  1172. if as.IsEmpty() {
  1173. return externals
  1174. }
  1175. as.RLock()
  1176. defer as.RUnlock()
  1177. for key := range as.externalKeys {
  1178. if alloc, ok := as.allocations[key]; ok {
  1179. externals[key] = alloc.Clone()
  1180. }
  1181. }
  1182. return externals
  1183. }
  1184. // ExternalCost returns the total aggregated external costs of the set
  1185. func (as *AllocationSet) ExternalCost() float64 {
  1186. if as.IsEmpty() {
  1187. return 0.0
  1188. }
  1189. as.RLock()
  1190. defer as.RUnlock()
  1191. externalCost := 0.0
  1192. for _, alloc := range as.allocations {
  1193. externalCost += alloc.ExternalCost
  1194. }
  1195. return externalCost
  1196. }
  1197. // IdleAllocations returns a map of the idle allocations in the AllocationSet.
  1198. // Returns clones of the actual Allocations, so mutability is not a problem.
  1199. func (as *AllocationSet) IdleAllocations() map[string]*Allocation {
  1200. idles := map[string]*Allocation{}
  1201. if as.IsEmpty() {
  1202. return idles
  1203. }
  1204. as.RLock()
  1205. defer as.RUnlock()
  1206. for key := range as.idleKeys {
  1207. if alloc, ok := as.allocations[key]; ok {
  1208. idles[key] = alloc.Clone()
  1209. }
  1210. }
  1211. return idles
  1212. }
  1213. // Insert aggregates the current entry in the AllocationSet by the given Allocation,
  1214. // but only if the Allocation is valid, i.e. matches the AllocationSet's window. If
  1215. // there is no existing entry, one is created. Nil error response indicates success.
  1216. func (as *AllocationSet) Insert(that *Allocation) error {
  1217. return as.insert(that)
  1218. }
  1219. func (as *AllocationSet) insert(that *Allocation) error {
  1220. if as == nil {
  1221. return fmt.Errorf("cannot insert into nil AllocationSet")
  1222. }
  1223. as.Lock()
  1224. defer as.Unlock()
  1225. if as.allocations == nil {
  1226. as.allocations = map[string]*Allocation{}
  1227. }
  1228. if as.externalKeys == nil {
  1229. as.externalKeys = map[string]bool{}
  1230. }
  1231. if as.idleKeys == nil {
  1232. as.idleKeys = map[string]bool{}
  1233. }
  1234. // Add the given Allocation to the existing entry, if there is one;
  1235. // otherwise just set directly into allocations
  1236. if _, ok := as.allocations[that.Name]; !ok {
  1237. as.allocations[that.Name] = that
  1238. } else {
  1239. as.allocations[that.Name].add(that)
  1240. }
  1241. // If the given Allocation is an external one, record that
  1242. if that.IsExternal() {
  1243. as.externalKeys[that.Name] = true
  1244. }
  1245. // If the given Allocation is an idle one, record that
  1246. if that.IsIdle() {
  1247. as.idleKeys[that.Name] = true
  1248. }
  1249. return nil
  1250. }
  1251. // IsEmpty returns true if the AllocationSet is nil, or if it contains
  1252. // zero allocations.
  1253. func (as *AllocationSet) IsEmpty() bool {
  1254. if as == nil || len(as.allocations) == 0 {
  1255. return true
  1256. }
  1257. as.RLock()
  1258. defer as.RUnlock()
  1259. return as.allocations == nil || len(as.allocations) == 0
  1260. }
  1261. // Length returns the number of Allocations in the set
  1262. func (as *AllocationSet) Length() int {
  1263. if as == nil {
  1264. return 0
  1265. }
  1266. as.RLock()
  1267. defer as.RUnlock()
  1268. return len(as.allocations)
  1269. }
  1270. // Map clones and returns a map of the AllocationSet's Allocations
  1271. func (as *AllocationSet) Map() map[string]*Allocation {
  1272. if as.IsEmpty() {
  1273. return map[string]*Allocation{}
  1274. }
  1275. return as.Clone().allocations
  1276. }
  1277. // MarshalJSON JSON-encodes the AllocationSet
  1278. func (as *AllocationSet) MarshalJSON() ([]byte, error) {
  1279. as.RLock()
  1280. defer as.RUnlock()
  1281. return json.Marshal(as.allocations)
  1282. }
  1283. // Resolution returns the AllocationSet's window duration
  1284. func (as *AllocationSet) Resolution() time.Duration {
  1285. return as.Window.Duration()
  1286. }
  1287. // Set uses the given Allocation to overwrite the existing entry in the
  1288. // AllocationSet under the Allocation's name.
  1289. func (as *AllocationSet) Set(alloc *Allocation) error {
  1290. if as.IsEmpty() {
  1291. as.Lock()
  1292. as.allocations = map[string]*Allocation{}
  1293. as.externalKeys = map[string]bool{}
  1294. as.idleKeys = map[string]bool{}
  1295. as.Unlock()
  1296. }
  1297. as.Lock()
  1298. defer as.Unlock()
  1299. as.allocations[alloc.Name] = alloc
  1300. // If the given Allocation is an external one, record that
  1301. if alloc.IsExternal() {
  1302. as.externalKeys[alloc.Name] = true
  1303. }
  1304. // If the given Allocation is an idle one, record that
  1305. if alloc.IsIdle() {
  1306. as.idleKeys[alloc.Name] = true
  1307. }
  1308. return nil
  1309. }
  1310. // Start returns the Start time of the AllocationSet window
  1311. func (as *AllocationSet) Start() time.Time {
  1312. if as == nil {
  1313. log.Warningf("Allocation ETL: calling Start on nil AllocationSet")
  1314. return time.Unix(0, 0)
  1315. }
  1316. if as.Window.Start() == nil {
  1317. log.Warningf("Allocation ETL: AllocationSet with illegal window: Start is nil; len(as.allocations)=%d", len(as.allocations))
  1318. return time.Unix(0, 0)
  1319. }
  1320. return *as.Window.Start()
  1321. }
  1322. // String represents the given Allocation as a string
  1323. func (as *AllocationSet) String() string {
  1324. if as == nil {
  1325. return "<nil>"
  1326. }
  1327. return fmt.Sprintf("AllocationSet{length: %d; window: %s; totalCost: %.2f}",
  1328. as.Length(), as.Window, as.TotalCost())
  1329. }
  1330. // TotalCost returns the sum of all TotalCosts of the allocations contained
  1331. func (as *AllocationSet) TotalCost() float64 {
  1332. if as.IsEmpty() {
  1333. return 0.0
  1334. }
  1335. as.RLock()
  1336. defer as.RUnlock()
  1337. tc := 0.0
  1338. for _, a := range as.allocations {
  1339. tc += a.TotalCost
  1340. }
  1341. return tc
  1342. }
  1343. // UTCOffset returns the AllocationSet's configured UTCOffset.
  1344. func (as *AllocationSet) UTCOffset() time.Duration {
  1345. _, zone := as.Start().Zone()
  1346. return time.Duration(zone) * time.Second
  1347. }
  1348. func (as *AllocationSet) accumulate(that *AllocationSet) (*AllocationSet, error) {
  1349. if as.IsEmpty() {
  1350. return that, nil
  1351. }
  1352. if that.IsEmpty() {
  1353. return as, nil
  1354. }
  1355. // Set start, end to min(start), max(end)
  1356. start := as.Start()
  1357. end := as.End()
  1358. if that.Start().Before(start) {
  1359. start = that.Start()
  1360. }
  1361. if that.End().After(end) {
  1362. end = that.End()
  1363. }
  1364. acc := NewAllocationSet(start, end)
  1365. as.RLock()
  1366. defer as.RUnlock()
  1367. that.RLock()
  1368. defer that.RUnlock()
  1369. for _, alloc := range as.allocations {
  1370. err := acc.insert(alloc)
  1371. if err != nil {
  1372. return nil, err
  1373. }
  1374. }
  1375. for _, alloc := range that.allocations {
  1376. err := acc.insert(alloc)
  1377. if err != nil {
  1378. return nil, err
  1379. }
  1380. }
  1381. return acc, nil
  1382. }
  1383. // AllocationSetRange is a thread-safe slice of AllocationSets. It is meant to
  1384. // be used such that the AllocationSets held are consecutive and coherent with
  1385. // respect to using the same aggregation properties, UTC offset, and
  1386. // resolution. However these rules are not necessarily enforced, so use wisely.
  1387. type AllocationSetRange struct {
  1388. sync.RWMutex
  1389. allocations []*AllocationSet
  1390. }
  1391. // NewAllocationSetRange instantiates a new range composed of the given
  1392. // AllocationSets in the order provided.
  1393. func NewAllocationSetRange(allocs ...*AllocationSet) *AllocationSetRange {
  1394. return &AllocationSetRange{
  1395. allocations: allocs,
  1396. }
  1397. }
  1398. // Accumulate sums each AllocationSet in the given range, returning a single cumulative
  1399. // AllocationSet for the entire range.
  1400. func (asr *AllocationSetRange) Accumulate() (*AllocationSet, error) {
  1401. var allocSet *AllocationSet
  1402. var err error
  1403. asr.RLock()
  1404. defer asr.RUnlock()
  1405. for _, as := range asr.allocations {
  1406. allocSet, err = allocSet.accumulate(as)
  1407. if err != nil {
  1408. return nil, err
  1409. }
  1410. }
  1411. return allocSet, nil
  1412. }
  1413. // TODO niko/etl accumulate into lower-resolution chunks of the given resolution
  1414. // func (asr *AllocationSetRange) AccumulateBy(resolution time.Duration) *AllocationSetRange
  1415. // AggregateBy aggregates each AllocationSet in the range by the given
  1416. // properties and options.
  1417. func (asr *AllocationSetRange) AggregateBy(properties Properties, options *AllocationAggregationOptions) error {
  1418. aggRange := &AllocationSetRange{allocations: []*AllocationSet{}}
  1419. asr.Lock()
  1420. defer asr.Unlock()
  1421. for _, as := range asr.allocations {
  1422. err := as.AggregateBy(properties, options)
  1423. if err != nil {
  1424. return err
  1425. }
  1426. aggRange.allocations = append(aggRange.allocations, as)
  1427. }
  1428. asr.allocations = aggRange.allocations
  1429. return nil
  1430. }
  1431. // Append appends the given AllocationSet to the end of the range. It does not
  1432. // validate whether or not that violates window continuity.
  1433. func (asr *AllocationSetRange) Append(that *AllocationSet) {
  1434. asr.Lock()
  1435. defer asr.Unlock()
  1436. asr.allocations = append(asr.allocations, that)
  1437. }
  1438. // Each invokes the given function for each AllocationSet in the range
  1439. func (asr *AllocationSetRange) Each(f func(int, *AllocationSet)) {
  1440. if asr == nil {
  1441. return
  1442. }
  1443. for i, as := range asr.allocations {
  1444. f(i, as)
  1445. }
  1446. }
  1447. // Get retrieves the AllocationSet at the given index of the range.
  1448. func (asr *AllocationSetRange) Get(i int) (*AllocationSet, error) {
  1449. if i < 0 || i >= len(asr.allocations) {
  1450. return nil, fmt.Errorf("AllocationSetRange: index out of range: %d", i)
  1451. }
  1452. asr.RLock()
  1453. defer asr.RUnlock()
  1454. return asr.allocations[i], nil
  1455. }
  1456. // InsertRange merges the given AllocationSetRange into the receiving one by
  1457. // lining up sets with matching windows, then inserting each allocation from
  1458. // the given ASR into the respective set in the receiving ASR. If the given
  1459. // ASR contains an AllocationSet from a window that does not exist in the
  1460. // receiving ASR, then an error is returned. However, the given ASR does not
  1461. // need to cover the full range of the receiver.
  1462. func (asr *AllocationSetRange) InsertRange(that *AllocationSetRange) error {
  1463. if asr == nil {
  1464. return fmt.Errorf("cannot insert range into nil AllocationSetRange")
  1465. }
  1466. // keys maps window to index in asr
  1467. keys := map[string]int{}
  1468. asr.Each(func(i int, as *AllocationSet) {
  1469. if as == nil {
  1470. return
  1471. }
  1472. keys[as.Window.String()] = i
  1473. })
  1474. // Nothing to merge, so simply return
  1475. if len(keys) == 0 {
  1476. return nil
  1477. }
  1478. var err error
  1479. that.Each(func(j int, thatAS *AllocationSet) {
  1480. if thatAS == nil || err != nil {
  1481. return
  1482. }
  1483. // Find matching AllocationSet in asr
  1484. i, ok := keys[thatAS.Window.String()]
  1485. if !ok {
  1486. err = fmt.Errorf("cannot merge AllocationSet into window that does not exist: %s", thatAS.Window.String())
  1487. return
  1488. }
  1489. as, err := asr.Get(i)
  1490. if err != nil {
  1491. err = fmt.Errorf("AllocationSetRange index does not exist: %d", i)
  1492. return
  1493. }
  1494. // Insert each Allocation from the given set
  1495. thatAS.Each(func(k string, alloc *Allocation) {
  1496. err = as.Insert(alloc)
  1497. if err != nil {
  1498. err = fmt.Errorf("error inserting allocation: %s", err)
  1499. return
  1500. }
  1501. })
  1502. })
  1503. // err might be nil
  1504. return err
  1505. }
  1506. // Length returns the length of the range, which is zero if nil
  1507. func (asr *AllocationSetRange) Length() int {
  1508. if asr == nil || asr.allocations == nil {
  1509. return 0
  1510. }
  1511. asr.RLock()
  1512. defer asr.RUnlock()
  1513. return len(asr.allocations)
  1514. }
  1515. // MarshalJSON JSON-encodes the range
  1516. func (asr *AllocationSetRange) MarshalJSON() ([]byte, error) {
  1517. asr.RLock()
  1518. asr.RUnlock()
  1519. return json.Marshal(asr.allocations)
  1520. }
  1521. // Slice copies the underlying slice of AllocationSets, maintaining order,
  1522. // and returns the copied slice.
  1523. func (asr *AllocationSetRange) Slice() []*AllocationSet {
  1524. if asr == nil || asr.allocations == nil {
  1525. return nil
  1526. }
  1527. asr.RLock()
  1528. defer asr.RUnlock()
  1529. copy := []*AllocationSet{}
  1530. for _, as := range asr.allocations {
  1531. copy = append(copy, as.Clone())
  1532. }
  1533. return copy
  1534. }
  1535. // String represents the given AllocationSetRange as a string
  1536. func (asr *AllocationSetRange) String() string {
  1537. if asr == nil {
  1538. return "<nil>"
  1539. }
  1540. return fmt.Sprintf("AllocationSetRange{length: %d}", asr.Length())
  1541. }
  1542. // UTCOffset returns the detected UTCOffset of the AllocationSets within the
  1543. // range. Defaults to 0 if the range is nil or empty. Does not warn if there
  1544. // are sets with conflicting UTCOffsets (just returns the first).
  1545. func (asr *AllocationSetRange) UTCOffset() time.Duration {
  1546. if asr.Length() == 0 {
  1547. return 0
  1548. }
  1549. as, err := asr.Get(0)
  1550. if err != nil {
  1551. return 0
  1552. }
  1553. return as.UTCOffset()
  1554. }
  1555. // Window returns the full window that the AllocationSetRange spans, from the
  1556. // start of the first AllocationSet to the end of the last one.
  1557. func (asr *AllocationSetRange) Window() Window {
  1558. if asr == nil || asr.Length() == 0 {
  1559. return NewWindow(nil, nil)
  1560. }
  1561. start := asr.allocations[0].Start()
  1562. end := asr.allocations[asr.Length()-1].End()
  1563. return NewWindow(&start, &end)
  1564. }