allocation.go 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485
  1. package costmodel
  2. import (
  3. "fmt"
  4. "time"
  5. "github.com/kubecost/cost-model/pkg/env"
  6. "github.com/kubecost/cost-model/pkg/kubecost"
  7. "github.com/kubecost/cost-model/pkg/log"
  8. "github.com/kubecost/cost-model/pkg/prom"
  9. "github.com/kubecost/cost-model/pkg/thanos"
  10. "k8s.io/apimachinery/pkg/labels"
  11. )
  12. const (
  13. queryFmtMinutes = `avg(kube_pod_container_status_running{}) by (container, pod, namespace, cluster_id)[%s:%s]%s`
  14. queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  15. queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  16. queryFmtRAMUsage = `avg(avg_over_time(container_memory_working_set_bytes{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`
  17. queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  18. queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  19. queryFmtCPUUsage = `avg(rate(container_cpu_usage_seconds_total{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`
  20. queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  21. queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`
  22. queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`
  23. queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`
  24. queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot[%s]%s)`
  25. queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, cluster_id)[%s:%s]%s`
  26. queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, cluster_id)`
  27. queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, cluster_id)`
  28. queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, cluster_id)`
  29. queryFmtPVCostPerGiBHour = `avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, cluster_id)`
  30. queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`
  31. queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s]%s)) by (cluster_id)`
  32. queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`
  33. queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s]%s)) by (cluster_id)`
  34. queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`
  35. queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s]%s)) by (cluster_id)`
  36. queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels[%s]%s)`
  37. queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations[%s]%s)`
  38. queryFmtPodLabels = `avg_over_time(kube_pod_labels[%s]%s)`
  39. queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations[%s]%s)`
  40. queryFmtServiceLabels = `avg_over_time(service_selector_labels[%s]%s)`
  41. queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels[%s]%s)`
  42. queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels[%s]%s)`
  43. queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s]%s)) by (pod, owner_name, namespace, cluster_id)`
  44. queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,cluster_id)`
  45. )
  46. // TODO niko/computeallocation idle minutes = 1?
  47. // ComputeAllocation uses the CostModel instance to compute an AllocationSet
  48. // for the window defined by the given start and end times. The Allocations
  49. // returned are unaggregated (i.e. down to the container level).
  50. func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.AllocationSet, error) {
  51. // Create a window spanning the requested query
  52. s, e := start, end
  53. window := kubecost.NewWindow(&s, &e)
  54. // Create an empty AllocationSet. For safety, in the case of an error, we
  55. // should prefer to return this empty set with the error. (In the case of
  56. // no error, of course we populate the set and return it.)
  57. allocSet := kubecost.NewAllocationSet(start, end)
  58. // Convert window (start, end) to (duration, offset) for querying Prometheus
  59. timesToDurations := func(s, e time.Time) (dur, off time.Duration) {
  60. now := time.Now()
  61. off = now.Sub(e)
  62. dur = e.Sub(s)
  63. return dur, off
  64. }
  65. duration, offset := timesToDurations(start, end)
  66. // If using Thanos, increase offset to 3 hours, reducing the duration by
  67. // equal measure to maintain the same starting point.
  68. thanosDur := thanos.OffsetDuration()
  69. if offset < thanosDur && env.IsThanosEnabled() {
  70. diff := thanosDur - offset
  71. offset += diff
  72. duration -= diff
  73. }
  74. // If duration < 0, return an empty set
  75. if duration < 0 {
  76. return allocSet, nil
  77. }
  78. // Negative offset means that the end time is in the future. Prometheus
  79. // fails for non-positive offset values, so shrink the duration and
  80. // remove the offset altogether.
  81. if offset < 0 {
  82. duration = duration + offset
  83. offset = 0
  84. }
  85. durStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
  86. offStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
  87. if offset < time.Minute {
  88. offStr = ""
  89. }
  90. // TODO niko/computeallocation dynamic resolution? add to ComputeAllocation() in allocation.Source?
  91. resStr := "1m"
  92. // resPerHr := 60
  93. startQuerying := time.Now()
  94. ctx := prom.NewContext(cm.PrometheusClient)
  95. // TODO niko/computeallocation retries? (That should probably go into the Store.)
  96. // TODO niko/computeallocation split into required and optional queries?
  97. queryMinutes := fmt.Sprintf(queryFmtMinutes, durStr, resStr, offStr)
  98. resChMinutes := ctx.Query(queryMinutes)
  99. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr)
  100. resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
  101. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, offStr)
  102. resChRAMRequests := ctx.Query(queryRAMRequests)
  103. queryRAMUsage := fmt.Sprintf(queryFmtRAMUsage, durStr, offStr)
  104. resChRAMUsage := ctx.Query(queryRAMUsage)
  105. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, offStr)
  106. resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
  107. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, offStr)
  108. resChCPURequests := ctx.Query(queryCPURequests)
  109. queryCPUUsage := fmt.Sprintf(queryFmtCPUUsage, durStr, offStr)
  110. resChCPUUsage := ctx.Query(queryCPUUsage)
  111. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, offStr)
  112. resChGPUsRequested := ctx.Query(queryGPUsRequested)
  113. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, offStr)
  114. resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
  115. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, offStr)
  116. resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
  117. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, offStr)
  118. resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
  119. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr, offStr)
  120. resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
  121. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, durStr, resStr, offStr)
  122. resChPVCInfo := ctx.Query(queryPVCInfo)
  123. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, offStr)
  124. resChPVBytes := ctx.Query(queryPVBytes)
  125. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, offStr)
  126. resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
  127. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, offStr)
  128. resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
  129. queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, offStr)
  130. resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
  131. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, offStr)
  132. resChNetZoneGiB := ctx.Query(queryNetZoneGiB)
  133. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, offStr)
  134. resChNetZoneCostPerGiB := ctx.Query(queryNetZoneCostPerGiB)
  135. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, offStr)
  136. resChNetRegionGiB := ctx.Query(queryNetRegionGiB)
  137. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, offStr)
  138. resChNetRegionCostPerGiB := ctx.Query(queryNetRegionCostPerGiB)
  139. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, offStr)
  140. resChNetInternetGiB := ctx.Query(queryNetInternetGiB)
  141. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, offStr)
  142. resChNetInternetCostPerGiB := ctx.Query(queryNetInternetCostPerGiB)
  143. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr, offStr)
  144. resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
  145. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr, offStr)
  146. resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
  147. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr, offStr)
  148. resChPodLabels := ctx.Query(queryPodLabels)
  149. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr, offStr)
  150. resChPodAnnotations := ctx.Query(queryPodAnnotations)
  151. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr, offStr)
  152. resChServiceLabels := ctx.Query(queryServiceLabels)
  153. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr, offStr)
  154. resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
  155. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr, offStr)
  156. resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
  157. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, offStr)
  158. resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
  159. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, offStr)
  160. resChJobLabels := ctx.Query(queryJobLabels)
  161. resMinutes, _ := resChMinutes.Await()
  162. resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
  163. resCPURequests, _ := resChCPURequests.Await()
  164. resCPUUsage, _ := resChCPUUsage.Await()
  165. resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()
  166. resRAMRequests, _ := resChRAMRequests.Await()
  167. resRAMUsage, _ := resChRAMUsage.Await()
  168. resGPUsRequested, _ := resChGPUsRequested.Await()
  169. resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
  170. resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
  171. resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
  172. resNodeIsSpot, _ := resChNodeIsSpot.Await()
  173. resPVBytes, _ := resChPVBytes.Await()
  174. resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()
  175. resPVCInfo, _ := resChPVCInfo.Await()
  176. resPVCBytesRequested, _ := resChPVCBytesRequested.Await()
  177. resPodPVCAllocation, _ := resChPodPVCAllocation.Await()
  178. resNetZoneGiB, _ := resChNetZoneGiB.Await()
  179. resNetZoneCostPerGiB, _ := resChNetZoneCostPerGiB.Await()
  180. resNetRegionGiB, _ := resChNetRegionGiB.Await()
  181. resNetRegionCostPerGiB, _ := resChNetRegionCostPerGiB.Await()
  182. resNetInternetGiB, _ := resChNetInternetGiB.Await()
  183. resNetInternetCostPerGiB, _ := resChNetInternetCostPerGiB.Await()
  184. resNamespaceLabels, _ := resChNamespaceLabels.Await()
  185. resNamespaceAnnotations, _ := resChNamespaceAnnotations.Await()
  186. resPodLabels, _ := resChPodLabels.Await()
  187. resPodAnnotations, _ := resChPodAnnotations.Await()
  188. resServiceLabels, _ := resChServiceLabels.Await()
  189. resDeploymentLabels, _ := resChDeploymentLabels.Await()
  190. resStatefulSetLabels, _ := resChStatefulSetLabels.Await()
  191. resDaemonSetLabels, _ := resChDaemonSetLabels.Await()
  192. resJobLabels, _ := resChJobLabels.Await()
  193. log.Profile(startQuerying, "CostModel.ComputeAllocation: queries complete")
  194. defer log.Profile(time.Now(), "CostModel.ComputeAllocation: processing complete")
  195. // Build out a map of Allocations, starting with (start, end) so that we
  196. // begin with minutes, from which we compute resource allocation and cost
  197. // totals from measured rate data.
  198. allocationMap := map[containerKey]*kubecost.Allocation{}
  199. // Keep track of the allocations per pod, for the sake of splitting PVC and
  200. // Network allocation into per-Allocation from per-Pod.
  201. podAllocation := map[podKey][]*kubecost.Allocation{}
  202. // clusterStarts and clusterEnds record the earliest start and latest end
  203. // times, respectively, on a cluster-basis. These are used for unmounted
  204. // PVs and other "virtual" Allocations so that minutes are maximally
  205. // accurate during start-up or spin-down of a cluster
  206. clusterStart := map[string]time.Time{}
  207. clusterEnd := map[string]time.Time{}
  208. buildAllocationMap(window, allocationMap, podAllocation, clusterStart, clusterEnd, resMinutes)
  209. applyCPUCoresAllocated(allocationMap, resCPUCoresAllocated)
  210. applyCPUCoresRequested(allocationMap, resCPURequests)
  211. applyCPUCoresUsed(allocationMap, resCPUUsage)
  212. applyRAMBytesAllocated(allocationMap, resRAMBytesAllocated)
  213. applyRAMBytesRequested(allocationMap, resRAMRequests)
  214. applyRAMBytesUsed(allocationMap, resRAMUsage)
  215. applyGPUsRequested(allocationMap, resGPUsRequested)
  216. applyNetworkAllocation(allocationMap, podAllocation, resNetZoneGiB, resNetZoneCostPerGiB)
  217. applyNetworkAllocation(allocationMap, podAllocation, resNetRegionGiB, resNetRegionCostPerGiB)
  218. applyNetworkAllocation(allocationMap, podAllocation, resNetInternetGiB, resNetInternetCostPerGiB)
  219. // TODO niko/computeallocation pruneDuplicateData? (see costmodel.go)
  220. namespaceLabels := resToNamespaceLabels(resNamespaceLabels)
  221. podLabels := resToPodLabels(resPodLabels)
  222. namespaceAnnotations := resToNamespaceAnnotations(resNamespaceAnnotations)
  223. podAnnotations := resToPodAnnotations(resPodAnnotations)
  224. applyLabels(allocationMap, namespaceLabels, podLabels)
  225. applyAnnotations(allocationMap, namespaceAnnotations, podAnnotations)
  226. serviceLabels := getServiceLabels(resServiceLabels)
  227. applyServicesToPods(allocationMap, podLabels, serviceLabels)
  228. podDeploymentMap := labelsToPodControllerMap(podLabels, resToDeploymentLabels(resDeploymentLabels))
  229. podStatefulSetMap := labelsToPodControllerMap(podLabels, resToStatefulSetLabels(resStatefulSetLabels))
  230. podDaemonSetMap := resToPodDaemonSetMap(resDaemonSetLabels)
  231. podJobMap := resToPodJobMap(resJobLabels)
  232. applyControllersToPods(allocationMap, podDeploymentMap)
  233. applyControllersToPods(allocationMap, podStatefulSetMap)
  234. applyControllersToPods(allocationMap, podDaemonSetMap)
  235. applyControllersToPods(allocationMap, podJobMap)
  236. // TODO breakdown network costs?
  237. // Build out a map of Nodes with resource costs, discounts, and node types
  238. // for converting resource allocation data to cumulative costs.
  239. nodeMap := map[nodeKey]*Node{}
  240. applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
  241. applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
  242. applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
  243. applyNodeSpot(nodeMap, resNodeIsSpot)
  244. applyNodeDiscount(nodeMap, cm)
  245. // Build out the map of all PVs with class, size and cost-per-hour.
  246. // Note: this does not record time running, which we may want to
  247. // include later for increased PV precision. (As long as the PV has
  248. // a PVC, we get time running there, so this is only inaccurate
  249. // for short-lived, unmounted PVs.)
  250. pvMap := map[pvKey]*PV{}
  251. buildPVMap(pvMap, resPVCostPerGiBHour)
  252. applyPVBytes(pvMap, resPVBytes)
  253. // Build out the map of all PVCs with time running, bytes requested,
  254. // and connect to the correct PV from pvMap. (If no PV exists, that
  255. // is noted, but does not result in any allocation/cost.)
  256. pvcMap := map[pvcKey]*PVC{}
  257. buildPVCMap(window, pvcMap, pvMap, resPVCInfo)
  258. applyPVCBytesRequested(pvcMap, resPVCBytesRequested)
  259. // Build out the relationships of pods to their PVCs. This step
  260. // populates the PVC.Count field so that PVC allocation can be
  261. // split appropriately among each pod's container allocation.
  262. podPVCMap := map[podKey][]*PVC{}
  263. buildPodPVCMap(podPVCMap, pvMap, pvcMap, podAllocation, resPodPVCAllocation)
  264. // Identify unmounted PVs (PVs without PVCs) and add one Allocation per
  265. // cluster representing each cluster's unmounted PVs (if necessary).
  266. applyUnmountedPVs(window, allocationMap, pvMap, pvcMap)
  267. for _, alloc := range allocationMap {
  268. cluster, _ := alloc.Properties.GetCluster()
  269. node, _ := alloc.Properties.GetNode()
  270. namespace, _ := alloc.Properties.GetNamespace()
  271. pod, _ := alloc.Properties.GetPod()
  272. container, _ := alloc.Properties.GetContainer()
  273. podKey := newPodKey(cluster, namespace, pod)
  274. nodeKey := newNodeKey(cluster, node)
  275. if n, ok := nodeMap[nodeKey]; !ok {
  276. if pod != "unmounted-pvs" {
  277. log.Warningf("CostModel.ComputeAllocation: failed to find node %s for %s", nodeKey, alloc.Name)
  278. }
  279. } else {
  280. alloc.CPUCost = alloc.CPUCoreHours * n.CostPerCPUHr
  281. alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * n.CostPerRAMGiBHr
  282. alloc.GPUCost = alloc.GPUHours * n.CostPerGPUHr
  283. }
  284. if pvcs, ok := podPVCMap[podKey]; ok {
  285. for _, pvc := range pvcs {
  286. // Determine the (start, end) of the relationship between the
  287. // given PVC and the associated Allocation so that a precise
  288. // number of hours can be used to compute cumulative cost.
  289. s, e := alloc.Start, alloc.End
  290. if pvc.Start.After(alloc.Start) {
  291. s = pvc.Start
  292. }
  293. if pvc.End.Before(alloc.End) {
  294. e = pvc.End
  295. }
  296. minutes := e.Sub(s).Minutes()
  297. hrs := minutes / 60.0
  298. gib := pvc.Bytes / 1024 / 1024 / 1024
  299. alloc.PVByteHours += pvc.Bytes * hrs
  300. count := float64(pvc.Count)
  301. if pvc.Count < 1 {
  302. // TODO niko/computeallocation remove log (why would this happen?)
  303. log.Warningf("CostModel.ComputeAllocation: PVC.Count=%d for %s", pvc.Count, alloc.Name)
  304. count = 1
  305. }
  306. alloc.PVCost += pvc.Volume.CostPerGiBHour * gib * hrs / count
  307. }
  308. }
  309. alloc.TotalCost = 0.0
  310. alloc.TotalCost += alloc.CPUCost
  311. alloc.TotalCost += alloc.RAMCost
  312. alloc.TotalCost += alloc.GPUCost
  313. alloc.TotalCost += alloc.PVCost
  314. alloc.TotalCost += alloc.NetworkCost
  315. alloc.TotalCost += alloc.SharedCost
  316. alloc.TotalCost += alloc.ExternalCost
  317. if alloc.RAMBytesRequestAverage > 0 {
  318. alloc.RAMEfficiency = alloc.RAMBytesUsageAverage / alloc.RAMBytesRequestAverage
  319. }
  320. if alloc.CPUCoreRequestAverage > 0 {
  321. alloc.CPUEfficiency = alloc.CPUCoreUsageAverage / alloc.CPUCoreRequestAverage
  322. }
  323. if alloc.CPUCost+alloc.RAMCost > 0 {
  324. ramCostEff := alloc.RAMEfficiency * alloc.RAMCost
  325. cpuCostEff := alloc.CPUEfficiency * alloc.CPUCost
  326. alloc.TotalEfficiency = (ramCostEff + cpuCostEff) / (alloc.CPUCost + alloc.RAMCost)
  327. }
  328. // Make sure that the name is correct (node may not be present at this
  329. // point due to it missing from queryMinutes) then insert.
  330. alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container)
  331. allocSet.Set(alloc)
  332. }
  333. return allocSet, nil
  334. }
  335. func buildAllocationMap(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, podAllocation map[podKey][]*kubecost.Allocation, clusterStart, clusterEnd map[string]time.Time, resMinutes []*prom.QueryResult) {
  336. for _, res := range resMinutes {
  337. if len(res.Values) == 0 {
  338. log.Warningf("CostModel.ComputeAllocation: empty minutes result")
  339. continue
  340. }
  341. cluster, err := res.GetString("cluster_id")
  342. if err != nil {
  343. cluster = env.GetClusterID()
  344. }
  345. labels, err := res.GetStrings("namespace", "pod", "container")
  346. if err != nil {
  347. log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
  348. continue
  349. }
  350. namespace := labels["namespace"]
  351. pod := labels["pod"]
  352. container := labels["container"]
  353. containerKey := newContainerKey(cluster, namespace, pod, container)
  354. podKey := newPodKey(cluster, namespace, pod)
  355. // allocStart and allocEnd are the timestamps of the first and last
  356. // minutes the allocation was running, respectively. We subtract 1m
  357. // from allocStart because this point will actually represent the end
  358. // of the first minute. We don't subtract from allocEnd because it
  359. // already represents the end of the last minute.
  360. var allocStart, allocEnd time.Time
  361. for _, datum := range res.Values {
  362. t := time.Unix(int64(datum.Timestamp), 0)
  363. if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  364. allocStart = t
  365. }
  366. if datum.Value > 0 && window.Contains(t) {
  367. allocEnd = t
  368. }
  369. }
  370. if allocStart.IsZero() || allocEnd.IsZero() {
  371. continue
  372. }
  373. allocStart = allocStart.Add(-time.Minute)
  374. // Set start if unset or this datum's start time is earlier than the
  375. // current earliest time.
  376. if _, ok := clusterStart[cluster]; !ok || allocStart.Before(clusterStart[cluster]) {
  377. clusterStart[cluster] = allocStart
  378. }
  379. // Set end if unset or this datum's end time is later than the
  380. // current latest time.
  381. if _, ok := clusterEnd[cluster]; !ok || allocEnd.After(clusterEnd[cluster]) {
  382. clusterEnd[cluster] = allocEnd
  383. }
  384. name := fmt.Sprintf("%s/%s/%s/%s", cluster, namespace, pod, container)
  385. alloc := &kubecost.Allocation{
  386. Name: name,
  387. Properties: kubecost.Properties{},
  388. Window: window.Clone(),
  389. Start: allocStart,
  390. End: allocEnd,
  391. }
  392. alloc.Properties.SetContainer(container)
  393. alloc.Properties.SetPod(pod)
  394. alloc.Properties.SetNamespace(namespace)
  395. alloc.Properties.SetCluster(cluster)
  396. allocationMap[containerKey] = alloc
  397. if _, ok := podAllocation[podKey]; !ok {
  398. podAllocation[podKey] = []*kubecost.Allocation{}
  399. }
  400. podAllocation[podKey] = append(podAllocation[podKey], alloc)
  401. }
  402. }
  403. func applyCPUCoresAllocated(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresAllocated []*prom.QueryResult) {
  404. for _, res := range resCPUCoresAllocated {
  405. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  406. if err != nil {
  407. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing field: %s", err)
  408. continue
  409. }
  410. _, ok := allocationMap[key]
  411. if !ok {
  412. log.Warningf("CostModel.ComputeAllocation: unidentified CPU allocation query result: %s", key)
  413. continue
  414. }
  415. cpuCores := res.Values[0].Value
  416. hours := allocationMap[key].Minutes() / 60.0
  417. allocationMap[key].CPUCoreHours = cpuCores * hours
  418. node, err := res.GetString("node")
  419. if err != nil {
  420. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing 'node': %s", key)
  421. continue
  422. }
  423. allocationMap[key].Properties.SetNode(node)
  424. }
  425. }
  426. func applyCPUCoresRequested(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresRequested []*prom.QueryResult) {
  427. for _, res := range resCPUCoresRequested {
  428. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  429. if err != nil {
  430. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing field: %s", err)
  431. continue
  432. }
  433. _, ok := allocationMap[key]
  434. if !ok {
  435. continue
  436. }
  437. allocationMap[key].CPUCoreRequestAverage = res.Values[0].Value
  438. // If CPU allocation is less than requests, set CPUCoreHours to
  439. // request level.
  440. if allocationMap[key].CPUCores() < res.Values[0].Value {
  441. allocationMap[key].CPUCoreHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
  442. }
  443. node, err := res.GetString("node")
  444. if err != nil {
  445. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)
  446. continue
  447. }
  448. allocationMap[key].Properties.SetNode(node)
  449. }
  450. }
  451. func applyCPUCoresUsed(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresUsed []*prom.QueryResult) {
  452. for _, res := range resCPUCoresUsed {
  453. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
  454. if err != nil {
  455. log.Warningf("CostModel.ComputeAllocation: CPU usage query result missing field: %s", err)
  456. continue
  457. }
  458. _, ok := allocationMap[key]
  459. if !ok {
  460. log.Warningf("CostModel.ComputeAllocation: unidentified CPU usage query result: %s", key)
  461. continue
  462. }
  463. allocationMap[key].CPUCoreUsageAverage = res.Values[0].Value
  464. }
  465. }
  466. func applyRAMBytesAllocated(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesAllocated []*prom.QueryResult) {
  467. for _, res := range resRAMBytesAllocated {
  468. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  469. if err != nil {
  470. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing field: %s", err)
  471. continue
  472. }
  473. _, ok := allocationMap[key]
  474. if !ok {
  475. log.Warningf("CostModel.ComputeAllocation: unidentified RAM allocation query result: %s", key)
  476. continue
  477. }
  478. ramBytes := res.Values[0].Value
  479. hours := allocationMap[key].Minutes() / 60.0
  480. allocationMap[key].RAMByteHours = ramBytes * hours
  481. node, err := res.GetString("node")
  482. if err != nil {
  483. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing 'node': %s", key)
  484. continue
  485. }
  486. allocationMap[key].Properties.SetNode(node)
  487. }
  488. }
  489. func applyRAMBytesRequested(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesRequested []*prom.QueryResult) {
  490. for _, res := range resRAMBytesRequested {
  491. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  492. if err != nil {
  493. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing field: %s", err)
  494. continue
  495. }
  496. _, ok := allocationMap[key]
  497. if !ok {
  498. continue
  499. }
  500. allocationMap[key].RAMBytesRequestAverage = res.Values[0].Value
  501. // If RAM allocation is less than requests, set RAMByteHours to
  502. // request level.
  503. if allocationMap[key].RAMBytes() < res.Values[0].Value {
  504. allocationMap[key].RAMByteHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
  505. }
  506. node, err := res.GetString("node")
  507. if err != nil {
  508. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing 'node': %s", key)
  509. continue
  510. }
  511. allocationMap[key].Properties.SetNode(node)
  512. }
  513. }
  514. func applyRAMBytesUsed(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesUsed []*prom.QueryResult) {
  515. for _, res := range resRAMBytesUsed {
  516. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
  517. if err != nil {
  518. log.Warningf("CostModel.ComputeAllocation: RAM usage query result missing field: %s", err)
  519. continue
  520. }
  521. _, ok := allocationMap[key]
  522. if !ok {
  523. log.Warningf("CostModel.ComputeAllocation: unidentified RAM usage query result: %s", key)
  524. continue
  525. }
  526. allocationMap[key].RAMBytesUsageAverage = res.Values[0].Value
  527. }
  528. }
  529. func applyGPUsRequested(allocationMap map[containerKey]*kubecost.Allocation, resGPUsRequested []*prom.QueryResult) {
  530. for _, res := range resGPUsRequested {
  531. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  532. if err != nil {
  533. log.Warningf("CostModel.ComputeAllocation: GPU allocation query result missing field: %s", err)
  534. continue
  535. }
  536. _, ok := allocationMap[key]
  537. if !ok {
  538. log.Warningf("CostModel.ComputeAllocation: unidentified GPU allocation query result: %s", key)
  539. continue
  540. }
  541. // TODO niko/computeallocation remove log
  542. log.Infof("CostModel.ComputeAllocation: GPU results: %s=%f", key, res.Values[0].Value)
  543. hrs := allocationMap[key].Minutes() / 60.0
  544. allocationMap[key].GPUHours = res.Values[0].Value * hrs
  545. }
  546. }
  547. func applyNetworkAllocation(allocationMap map[containerKey]*kubecost.Allocation, podAllocation map[podKey][]*kubecost.Allocation, resNetworkGiB []*prom.QueryResult, resNetworkCostPerGiB []*prom.QueryResult) {
  548. costPerGiBByCluster := map[string]float64{}
  549. for _, res := range resNetworkCostPerGiB {
  550. cluster, err := res.GetString("cluster_id")
  551. if err != nil {
  552. cluster = env.GetClusterID()
  553. }
  554. costPerGiBByCluster[cluster] = res.Values[0].Value
  555. }
  556. for _, res := range resNetworkGiB {
  557. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod_name")
  558. if err != nil {
  559. log.Warningf("CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
  560. continue
  561. }
  562. allocs, ok := podAllocation[podKey]
  563. if !ok {
  564. log.Warningf("CostModel.ComputeAllocation: Network allocation query result for unidentified pod allocations: %s", podKey)
  565. continue
  566. }
  567. for _, alloc := range allocs {
  568. gib := res.Values[0].Value
  569. costPerGiB := costPerGiBByCluster[podKey.Cluster]
  570. alloc.NetworkCost = gib * costPerGiB
  571. }
  572. }
  573. }
  574. func resToNamespaceLabels(resNamespaceLabels []*prom.QueryResult) map[string]map[string]string {
  575. namespaceLabels := map[string]map[string]string{}
  576. for _, res := range resNamespaceLabels {
  577. namespace, err := res.GetString("namespace")
  578. if err != nil {
  579. continue
  580. }
  581. if _, ok := namespaceLabels[namespace]; !ok {
  582. namespaceLabels[namespace] = map[string]string{}
  583. }
  584. for k, l := range res.GetLabels() {
  585. namespaceLabels[namespace][k] = l
  586. }
  587. }
  588. return namespaceLabels
  589. }
  590. func resToPodLabels(resPodLabels []*prom.QueryResult) map[podKey]map[string]string {
  591. podLabels := map[podKey]map[string]string{}
  592. for _, res := range resPodLabels {
  593. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod")
  594. if err != nil {
  595. continue
  596. }
  597. if _, ok := podLabels[podKey]; !ok {
  598. podLabels[podKey] = map[string]string{}
  599. }
  600. for k, l := range res.GetLabels() {
  601. podLabels[podKey][k] = l
  602. }
  603. }
  604. return podLabels
  605. }
  606. func resToNamespaceAnnotations(resNamespaceAnnotations []*prom.QueryResult) map[string]map[string]string {
  607. namespaceAnnotations := map[string]map[string]string{}
  608. for _, res := range resNamespaceAnnotations {
  609. namespace, err := res.GetString("namespace")
  610. if err != nil {
  611. continue
  612. }
  613. if _, ok := namespaceAnnotations[namespace]; !ok {
  614. namespaceAnnotations[namespace] = map[string]string{}
  615. }
  616. for k, l := range res.GetAnnotations() {
  617. namespaceAnnotations[namespace][k] = l
  618. }
  619. }
  620. return namespaceAnnotations
  621. }
  622. func resToPodAnnotations(resPodAnnotations []*prom.QueryResult) map[podKey]map[string]string {
  623. podAnnotations := map[podKey]map[string]string{}
  624. for _, res := range resPodAnnotations {
  625. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod")
  626. if err != nil {
  627. continue
  628. }
  629. if _, ok := podAnnotations[podKey]; !ok {
  630. podAnnotations[podKey] = map[string]string{}
  631. }
  632. for k, l := range res.GetAnnotations() {
  633. podAnnotations[podKey][k] = l
  634. }
  635. }
  636. return podAnnotations
  637. }
  638. func applyLabels(allocationMap map[containerKey]*kubecost.Allocation, namespaceLabels map[string]map[string]string, podLabels map[podKey]map[string]string) {
  639. for key, alloc := range allocationMap {
  640. allocLabels, err := alloc.Properties.GetLabels()
  641. if err != nil {
  642. allocLabels = map[string]string{}
  643. }
  644. // Apply namespace labels first, then pod labels so that pod labels
  645. // overwrite namespace labels.
  646. if labels, ok := namespaceLabels[key.Namespace]; ok {
  647. for k, v := range labels {
  648. allocLabels[k] = v
  649. }
  650. }
  651. podKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  652. if labels, ok := podLabels[podKey]; ok {
  653. for k, v := range labels {
  654. allocLabels[k] = v
  655. }
  656. }
  657. alloc.Properties.SetLabels(allocLabels)
  658. }
  659. }
  660. func applyAnnotations(allocationMap map[containerKey]*kubecost.Allocation, namespaceAnnotations map[string]map[string]string, podAnnotations map[podKey]map[string]string) {
  661. for key, alloc := range allocationMap {
  662. allocAnnotations, err := alloc.Properties.GetAnnotations()
  663. if err != nil {
  664. allocAnnotations = map[string]string{}
  665. }
  666. // Apply namespace annotations first, then pod annotations so that
  667. // pod labels overwrite namespace labels.
  668. if labels, ok := namespaceAnnotations[key.Namespace]; ok {
  669. for k, v := range labels {
  670. allocAnnotations[k] = v
  671. }
  672. }
  673. podKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  674. if labels, ok := podAnnotations[podKey]; ok {
  675. for k, v := range labels {
  676. allocAnnotations[k] = v
  677. }
  678. }
  679. alloc.Properties.SetAnnotations(allocAnnotations)
  680. }
  681. }
  682. func getServiceLabels(resServiceLabels []*prom.QueryResult) map[serviceKey]map[string]string {
  683. serviceLabels := map[serviceKey]map[string]string{}
  684. for _, res := range resServiceLabels {
  685. serviceKey, err := resultServiceKey(res, "cluster_id", "namespace", "service")
  686. if err != nil {
  687. continue
  688. }
  689. if _, ok := serviceLabels[serviceKey]; !ok {
  690. serviceLabels[serviceKey] = map[string]string{}
  691. }
  692. for k, l := range res.GetLabels() {
  693. serviceLabels[serviceKey][k] = l
  694. }
  695. }
  696. return serviceLabels
  697. }
  698. func resToDeploymentLabels(resDeploymentLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  699. deploymentLabels := map[controllerKey]map[string]string{}
  700. for _, res := range resDeploymentLabels {
  701. controllerKey, err := resultDeploymentKey(res, "cluster_id", "namespace", "deployment")
  702. if err != nil {
  703. continue
  704. }
  705. if _, ok := deploymentLabels[controllerKey]; !ok {
  706. deploymentLabels[controllerKey] = map[string]string{}
  707. }
  708. for k, l := range res.GetLabels() {
  709. deploymentLabels[controllerKey][k] = l
  710. }
  711. }
  712. return deploymentLabels
  713. }
  714. func resToStatefulSetLabels(resStatefulSetLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  715. statefulSetLabels := map[controllerKey]map[string]string{}
  716. for _, res := range resStatefulSetLabels {
  717. controllerKey, err := resultStatefulSetKey(res, "cluster_id", "namespace", "statefulSet")
  718. if err != nil {
  719. continue
  720. }
  721. if _, ok := statefulSetLabels[controllerKey]; !ok {
  722. statefulSetLabels[controllerKey] = map[string]string{}
  723. }
  724. for k, l := range res.GetLabels() {
  725. statefulSetLabels[controllerKey][k] = l
  726. }
  727. }
  728. return statefulSetLabels
  729. }
  730. func labelsToPodControllerMap(podLabels map[podKey]map[string]string, controllerLabels map[controllerKey]map[string]string) map[podKey]controllerKey {
  731. podControllerMap := map[podKey]controllerKey{}
  732. // For each controller, turn the labels into a selector and attempt to
  733. // match it with each set of pod labels. A match indicates that the pod
  734. // belongs to the controller.
  735. for cKey, cLabels := range controllerLabels {
  736. selector := labels.Set(cLabels).AsSelectorPreValidated()
  737. for pKey, pLabels := range podLabels {
  738. // If the pod is in a different cluster or namespace, there is
  739. // no need to compare the labels.
  740. if cKey.Cluster != pKey.Cluster || cKey.Namespace != pKey.Namespace {
  741. continue
  742. }
  743. podLabelSet := labels.Set(pLabels)
  744. if selector.Matches(podLabelSet) {
  745. if _, ok := podControllerMap[pKey]; ok {
  746. log.Warningf("CostModel.ComputeAllocation: PodControllerMap match already exists: %s matches %s and %s", pKey, podControllerMap[pKey], cKey)
  747. }
  748. podControllerMap[pKey] = cKey
  749. }
  750. }
  751. }
  752. return podControllerMap
  753. }
  754. func resToPodDaemonSetMap(resDaemonSetLabels []*prom.QueryResult) map[podKey]controllerKey {
  755. daemonSetLabels := map[podKey]controllerKey{}
  756. for _, res := range resDaemonSetLabels {
  757. controllerKey, err := resultDaemonSetKey(res, "cluster_id", "namespace", "owner_name")
  758. if err != nil {
  759. continue
  760. }
  761. pod, err := res.GetString("pod")
  762. if err != nil {
  763. log.Warningf("CostModel.ComputeAllocation: DaemonSetLabel result without pod: %s", controllerKey)
  764. }
  765. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  766. daemonSetLabels[podKey] = controllerKey
  767. }
  768. return daemonSetLabels
  769. }
  770. func resToPodJobMap(resJobLabels []*prom.QueryResult) map[podKey]controllerKey {
  771. jobLabels := map[podKey]controllerKey{}
  772. for _, res := range resJobLabels {
  773. controllerKey, err := resultJobKey(res, "cluster_id", "namespace", "owner_name")
  774. if err != nil {
  775. continue
  776. }
  777. pod, err := res.GetString("pod")
  778. if err != nil {
  779. log.Warningf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
  780. }
  781. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  782. jobLabels[podKey] = controllerKey
  783. }
  784. return jobLabels
  785. }
  786. func applyServicesToPods(allocationMap map[containerKey]*kubecost.Allocation, podLabels map[podKey]map[string]string, serviceLabels map[serviceKey]map[string]string) {
  787. podServicesMap := map[podKey][]serviceKey{}
  788. // For each service, turn the labels into a selector and attempt to
  789. // match it with each set of pod labels. A match indicates that the pod
  790. // belongs to the service.
  791. for sKey, sLabels := range serviceLabels {
  792. selector := labels.Set(sLabels).AsSelectorPreValidated()
  793. for pKey, pLabels := range podLabels {
  794. // If the pod is in a different cluster or namespace, there is
  795. // no need to compare the labels.
  796. if sKey.Cluster != pKey.Cluster || sKey.Namespace != pKey.Namespace {
  797. continue
  798. }
  799. podLabelSet := labels.Set(pLabels)
  800. if selector.Matches(podLabelSet) {
  801. if _, ok := podServicesMap[pKey]; !ok {
  802. podServicesMap[pKey] = []serviceKey{}
  803. }
  804. podServicesMap[pKey] = append(podServicesMap[pKey], sKey)
  805. }
  806. }
  807. }
  808. // For each allocation, attempt to find and apply the list of services
  809. // associated with the allocation's pod.
  810. for key, alloc := range allocationMap {
  811. pKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  812. if sKeys, ok := podServicesMap[pKey]; ok {
  813. services := []string{}
  814. for _, sKey := range sKeys {
  815. services = append(services, sKey.Service)
  816. }
  817. alloc.Properties.SetServices(services)
  818. }
  819. }
  820. }
  821. func applyControllersToPods(allocationMap map[containerKey]*kubecost.Allocation, podControllerMap map[podKey]controllerKey) {
  822. for key, alloc := range allocationMap {
  823. podKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  824. if controllerKey, ok := podControllerMap[podKey]; ok {
  825. alloc.Properties.SetControllerKind(controllerKey.ControllerKind)
  826. alloc.Properties.SetController(controllerKey.Controller)
  827. }
  828. }
  829. }
  830. func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*prom.QueryResult) {
  831. for _, res := range resNodeCostPerCPUHr {
  832. cluster, err := res.GetString("cluster_id")
  833. if err != nil {
  834. cluster = env.GetClusterID()
  835. }
  836. node, err := res.GetString("node")
  837. if err != nil {
  838. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  839. continue
  840. }
  841. instanceType, err := res.GetString("instance_type")
  842. if err != nil {
  843. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  844. continue
  845. }
  846. key := newNodeKey(cluster, node)
  847. if _, ok := nodeMap[key]; !ok {
  848. nodeMap[key] = &Node{
  849. Name: node,
  850. NodeType: instanceType,
  851. }
  852. }
  853. nodeMap[key].CostPerCPUHr = res.Values[0].Value
  854. }
  855. }
  856. func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
  857. for _, res := range resNodeCostPerRAMGiBHr {
  858. cluster, err := res.GetString("cluster_id")
  859. if err != nil {
  860. cluster = env.GetClusterID()
  861. }
  862. node, err := res.GetString("node")
  863. if err != nil {
  864. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  865. continue
  866. }
  867. instanceType, err := res.GetString("instance_type")
  868. if err != nil {
  869. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  870. continue
  871. }
  872. key := newNodeKey(cluster, node)
  873. if _, ok := nodeMap[key]; !ok {
  874. nodeMap[key] = &Node{
  875. Name: node,
  876. NodeType: instanceType,
  877. }
  878. }
  879. nodeMap[key].CostPerRAMGiBHr = res.Values[0].Value
  880. }
  881. }
  882. func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*prom.QueryResult) {
  883. for _, res := range resNodeCostPerGPUHr {
  884. cluster, err := res.GetString("cluster_id")
  885. if err != nil {
  886. cluster = env.GetClusterID()
  887. }
  888. node, err := res.GetString("node")
  889. if err != nil {
  890. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  891. continue
  892. }
  893. instanceType, err := res.GetString("instance_type")
  894. if err != nil {
  895. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  896. continue
  897. }
  898. key := newNodeKey(cluster, node)
  899. if _, ok := nodeMap[key]; !ok {
  900. nodeMap[key] = &Node{
  901. Name: node,
  902. NodeType: instanceType,
  903. }
  904. }
  905. nodeMap[key].CostPerGPUHr = res.Values[0].Value
  906. }
  907. }
  908. func applyNodeSpot(nodeMap map[nodeKey]*Node, resNodeIsSpot []*prom.QueryResult) {
  909. for _, res := range resNodeIsSpot {
  910. cluster, err := res.GetString("cluster_id")
  911. if err != nil {
  912. cluster = env.GetClusterID()
  913. }
  914. node, err := res.GetString("node")
  915. if err != nil {
  916. log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
  917. continue
  918. }
  919. key := newNodeKey(cluster, node)
  920. if _, ok := nodeMap[key]; !ok {
  921. log.Warningf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
  922. continue
  923. }
  924. nodeMap[key].Preemptible = res.Values[0].Value > 0
  925. }
  926. }
  927. func applyNodeDiscount(nodeMap map[nodeKey]*Node, cm *CostModel) {
  928. if cm == nil {
  929. return
  930. }
  931. c, err := cm.Provider.GetConfig()
  932. if err != nil {
  933. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  934. return
  935. }
  936. discount, err := ParsePercentString(c.Discount)
  937. if err != nil {
  938. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  939. return
  940. }
  941. negotiatedDiscount, err := ParsePercentString(c.NegotiatedDiscount)
  942. if err != nil {
  943. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  944. return
  945. }
  946. for _, node := range nodeMap {
  947. // TODO niko/computeallocation GKE Reserved Instances into account
  948. node.Discount = cm.Provider.CombinedDiscountForNode(node.NodeType, node.Preemptible, discount, negotiatedDiscount)
  949. node.CostPerCPUHr *= (1.0 - node.Discount)
  950. node.CostPerRAMGiBHr *= (1.0 - node.Discount)
  951. }
  952. }
  953. func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
  954. for _, res := range resPVCostPerGiBHour {
  955. cluster, err := res.GetString("cluster_id")
  956. if err != nil {
  957. cluster = env.GetClusterID()
  958. }
  959. name, err := res.GetString("volumename")
  960. if err != nil {
  961. log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
  962. continue
  963. }
  964. key := newPVKey(cluster, name)
  965. pvMap[key] = &PV{
  966. Cluster: cluster,
  967. Name: name,
  968. CostPerGiBHour: res.Values[0].Value,
  969. }
  970. }
  971. }
  972. func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
  973. for _, res := range resPVBytes {
  974. key, err := resultPVKey(res, "cluster_id", "persistentvolume")
  975. if err != nil {
  976. log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
  977. continue
  978. }
  979. if _, ok := pvMap[key]; !ok {
  980. log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
  981. continue
  982. }
  983. pvMap[key].Bytes = res.Values[0].Value
  984. }
  985. }
  986. func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey]*PV, resPVCInfo []*prom.QueryResult) {
  987. for _, res := range resPVCInfo {
  988. cluster, err := res.GetString("cluster_id")
  989. if err != nil {
  990. cluster = env.GetClusterID()
  991. }
  992. values, err := res.GetStrings("persistentvolumeclaim", "storageclass", "volumename", "namespace")
  993. if err != nil {
  994. log.Warningf("CostModel.ComputeAllocation: PVC info query result missing field: %s", err)
  995. continue
  996. }
  997. namespace := values["namespace"]
  998. name := values["persistentvolumeclaim"]
  999. volume := values["volumename"]
  1000. storageClass := values["storageclass"]
  1001. pvKey := newPVKey(cluster, volume)
  1002. pvcKey := newPVCKey(cluster, namespace, name)
  1003. // pvcStart and pvcEnd are the timestamps of the first and last minutes
  1004. // the PVC was running, respectively. We subtract 1m from pvcStart
  1005. // because this point will actually represent the end of the first
  1006. // minute. We don't subtract from pvcEnd because it already represents
  1007. // the end of the last minute.
  1008. var pvcStart, pvcEnd time.Time
  1009. for _, datum := range res.Values {
  1010. t := time.Unix(int64(datum.Timestamp), 0)
  1011. if pvcStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  1012. pvcStart = t
  1013. }
  1014. if datum.Value > 0 && window.Contains(t) {
  1015. pvcEnd = t
  1016. }
  1017. }
  1018. if pvcStart.IsZero() || pvcEnd.IsZero() {
  1019. log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
  1020. }
  1021. pvcStart = pvcStart.Add(-time.Minute)
  1022. if _, ok := pvMap[pvKey]; !ok {
  1023. log.Warningf("CostModel.ComputeAllocation: PV missing for PVC info query result: %s", pvKey)
  1024. continue
  1025. }
  1026. pvMap[pvKey].StorageClass = storageClass
  1027. if _, ok := pvcMap[pvcKey]; !ok {
  1028. pvcMap[pvcKey] = &PVC{}
  1029. }
  1030. pvcMap[pvcKey].Name = name
  1031. pvcMap[pvcKey].Namespace = namespace
  1032. pvcMap[pvcKey].Volume = pvMap[pvKey]
  1033. pvcMap[pvcKey].Start = pvcStart
  1034. pvcMap[pvcKey].End = pvcEnd
  1035. }
  1036. }
  1037. func applyPVCBytesRequested(pvcMap map[pvcKey]*PVC, resPVCBytesRequested []*prom.QueryResult) {
  1038. for _, res := range resPVCBytesRequested {
  1039. key, err := resultPVCKey(res, "cluster_id", "namespace", "persistentvolumeclaim")
  1040. if err != nil {
  1041. log.Warningf("CostModel.ComputeAllocation: PVC bytes requested query result missing field: %s", err)
  1042. continue
  1043. }
  1044. if _, ok := pvcMap[key]; !ok {
  1045. log.Warningf("CostModel.ComputeAllocation: PVC bytes requested result for missing PVC: %s", key)
  1046. continue
  1047. }
  1048. pvcMap[key].Bytes = res.Values[0].Value
  1049. }
  1050. }
  1051. func buildPodPVCMap(podPVCMap map[podKey][]*PVC, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC, podAllocation map[podKey][]*kubecost.Allocation, resPodPVCAllocation []*prom.QueryResult) {
  1052. for _, res := range resPodPVCAllocation {
  1053. cluster, err := res.GetString("cluster_id")
  1054. if err != nil {
  1055. cluster = env.GetClusterID()
  1056. }
  1057. values, err := res.GetStrings("persistentvolume", "persistentvolumeclaim", "pod", "namespace")
  1058. if err != nil {
  1059. log.Warningf("CostModel.ComputeAllocation: PVC allocation query result missing field: %s", err)
  1060. continue
  1061. }
  1062. namespace := values["namespace"]
  1063. pod := values["pod"]
  1064. name := values["persistentvolumeclaim"]
  1065. volume := values["persistentvolume"]
  1066. podKey := newPodKey(cluster, namespace, pod)
  1067. pvKey := newPVKey(cluster, volume)
  1068. pvcKey := newPVCKey(cluster, namespace, name)
  1069. if _, ok := pvMap[pvKey]; !ok {
  1070. log.Warningf("CostModel.ComputeAllocation: PV missing for PVC allocation query result: %s", pvKey)
  1071. continue
  1072. }
  1073. if _, ok := podPVCMap[podKey]; !ok {
  1074. podPVCMap[podKey] = []*PVC{}
  1075. }
  1076. pvc, ok := pvcMap[pvcKey]
  1077. if !ok {
  1078. log.Warningf("CostModel.ComputeAllocation: PVC missing for PVC allocation query: %s", pvcKey)
  1079. continue
  1080. }
  1081. pvc.Count = len(podAllocation[podKey])
  1082. pvc.Mounted = true
  1083. podPVCMap[podKey] = append(podPVCMap[podKey], pvc)
  1084. }
  1085. }
  1086. func applyUnmountedPVs(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC) {
  1087. unmountedPVBytes := map[string]float64{}
  1088. unmountedPVCost := map[string]float64{}
  1089. for _, pv := range pvMap {
  1090. mounted := false
  1091. for _, pvc := range pvcMap {
  1092. if pvc.Volume == nil {
  1093. continue
  1094. }
  1095. if pvc.Volume == pv {
  1096. mounted = true
  1097. break
  1098. }
  1099. }
  1100. if !mounted {
  1101. gib := pv.Bytes / 1024 / 1024 / 1024
  1102. hrs := window.Minutes() / 60.0 // TODO niko/computeallocation PV hours, not window hours?
  1103. cost := pv.CostPerGiBHour * gib * hrs
  1104. unmountedPVCost[pv.Cluster] += cost
  1105. unmountedPVBytes[pv.Cluster] += pv.Bytes
  1106. }
  1107. }
  1108. for cluster, amount := range unmountedPVCost {
  1109. container := "unmounted-pvs"
  1110. pod := "unmounted-pvs"
  1111. namespace := ""
  1112. node := ""
  1113. containerKey := newContainerKey(cluster, namespace, pod, container)
  1114. allocationMap[containerKey] = &kubecost.Allocation{
  1115. Name: fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container),
  1116. Properties: kubecost.Properties{
  1117. kubecost.ClusterProp: cluster,
  1118. kubecost.NodeProp: node,
  1119. kubecost.NamespaceProp: namespace,
  1120. kubecost.PodProp: pod,
  1121. kubecost.ContainerProp: container,
  1122. },
  1123. Window: window.Clone(),
  1124. Start: *window.Start(),
  1125. End: *window.End(),
  1126. PVByteHours: unmountedPVBytes[cluster] * window.Minutes() / 60.0,
  1127. PVCost: amount,
  1128. TotalCost: amount,
  1129. }
  1130. }
  1131. }
  1132. func applyUnmountedPVCs(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, pvcMap map[pvcKey]*PVC) {
  1133. unmountedPVCBytes := map[namespaceKey]float64{}
  1134. unmountedPVCCost := map[namespaceKey]float64{}
  1135. for _, pvc := range pvcMap {
  1136. if !pvc.Mounted && pvc.Volume != nil {
  1137. key := newNamespaceKey(pvc.Cluster, pvc.Namespace)
  1138. gib := pvc.Volume.Bytes / 1024 / 1024 / 1024
  1139. hrs := pvc.Minutes() / 60.0
  1140. cost := pvc.Volume.CostPerGiBHour * gib * hrs
  1141. unmountedPVCCost[key] += cost
  1142. unmountedPVCBytes[key] += pvc.Volume.Bytes
  1143. }
  1144. }
  1145. for key, amount := range unmountedPVCCost {
  1146. container := "unmounted-pvs"
  1147. pod := "unmounted-pvs"
  1148. namespace := key.Namespace
  1149. node := ""
  1150. cluster := key.Cluster
  1151. containerKey := newContainerKey(cluster, namespace, pod, container)
  1152. allocationMap[containerKey] = &kubecost.Allocation{
  1153. Name: fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container),
  1154. Properties: kubecost.Properties{
  1155. kubecost.ClusterProp: cluster,
  1156. kubecost.NodeProp: node,
  1157. kubecost.NamespaceProp: namespace,
  1158. kubecost.PodProp: pod,
  1159. kubecost.ContainerProp: container,
  1160. },
  1161. Window: window.Clone(),
  1162. Start: *window.Start(),
  1163. End: *window.End(),
  1164. PVByteHours: unmountedPVCBytes[key] * window.Minutes() / 60.0,
  1165. PVCost: amount,
  1166. TotalCost: amount,
  1167. }
  1168. }
  1169. }
  1170. // PVC describes a PersistentVolumeClaim
  1171. // TODO move to pkg/kubecost? [TODO:CLEANUP]
  1172. // TODO add PersistentVolumeClaims field to type Allocation? [TODO:CLEANUP]
  1173. type PVC struct {
  1174. Bytes float64 `json:"bytes"`
  1175. Count int `json:"count"`
  1176. Name string `json:"name"`
  1177. Cluster string `json:"cluster"`
  1178. Namespace string `json:"namespace"`
  1179. Volume *PV `json:"persistentVolume"`
  1180. Mounted bool `json:"mounted"`
  1181. Start time.Time `json:"start"`
  1182. End time.Time `json:"end"`
  1183. }
  1184. // Cost computes the cumulative cost of the PVC
  1185. func (pvc *PVC) Cost() float64 {
  1186. if pvc == nil || pvc.Volume == nil {
  1187. return 0.0
  1188. }
  1189. gib := pvc.Bytes / 1024 / 1024 / 1024
  1190. hrs := pvc.Minutes() / 60.0
  1191. return pvc.Volume.CostPerGiBHour * gib * hrs
  1192. }
  1193. // Minutes computes the number of minutes over which the PVC is defined
  1194. func (pvc *PVC) Minutes() float64 {
  1195. if pvc == nil {
  1196. return 0.0
  1197. }
  1198. return pvc.End.Sub(pvc.Start).Minutes()
  1199. }
  1200. // String returns a string representation of the PVC
  1201. func (pvc *PVC) String() string {
  1202. if pvc == nil {
  1203. return "<nil>"
  1204. }
  1205. return fmt.Sprintf("%s/%s/%s{Bytes:%.2f, Cost:%.6f, Start,End:%s}", pvc.Cluster, pvc.Namespace, pvc.Name, pvc.Bytes, pvc.Cost(), kubecost.NewWindow(&pvc.Start, &pvc.End))
  1206. }
  1207. // PV describes a PersistentVolume
  1208. // TODO move to pkg/kubecost? [TODO:CLEANUP]
  1209. type PV struct {
  1210. Bytes float64 `json:"bytes"`
  1211. CostPerGiBHour float64 `json:"costPerGiBHour"` // TODO niko/computeallocation GiB or GB?
  1212. Cluster string `json:"cluster"`
  1213. Name string `json:"name"`
  1214. StorageClass string `json:"storageClass"`
  1215. }
  1216. // String returns a string representation of the PV
  1217. func (pv *PV) String() string {
  1218. if pv == nil {
  1219. return "<nil>"
  1220. }
  1221. return fmt.Sprintf("%s/%s{Bytes:%.2f, Cost/GiB*Hr:%.6f, StorageClass:%s}", pv.Cluster, pv.Name, pv.Bytes, pv.CostPerGiBHour, pv.StorageClass)
  1222. }