allocation.go 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483
  1. package costmodel
  2. import (
  3. "fmt"
  4. "time"
  5. "github.com/kubecost/cost-model/pkg/env"
  6. "github.com/kubecost/cost-model/pkg/kubecost"
  7. "github.com/kubecost/cost-model/pkg/log"
  8. "github.com/kubecost/cost-model/pkg/prom"
  9. "github.com/kubecost/cost-model/pkg/thanos"
  10. "k8s.io/apimachinery/pkg/labels"
  11. )
  12. const (
  13. queryFmtMinutes = `avg(kube_pod_container_status_running{}) by (container, pod, namespace, cluster_id)[%s:%s]%s`
  14. queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  15. queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  16. queryFmtRAMUsage = `avg(avg_over_time(container_memory_working_set_bytes{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`
  17. queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  18. queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  19. queryFmtCPUUsage = `avg(rate(container_cpu_usage_seconds_total{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`
  20. queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
  21. queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`
  22. queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`
  23. queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`
  24. queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot[%s]%s)`
  25. queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, cluster_id)[%s:%s]%s`
  26. queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, cluster_id)`
  27. queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, cluster_id)`
  28. queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, cluster_id)`
  29. queryFmtPVCostPerGiBHour = `avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, cluster_id)`
  30. queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`
  31. queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s]%s)) by (cluster_id)`
  32. queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`
  33. queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s]%s)) by (cluster_id)`
  34. queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`
  35. queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s]%s)) by (cluster_id)`
  36. queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels[%s]%s)`
  37. queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations[%s]%s)`
  38. queryFmtPodLabels = `avg_over_time(kube_pod_labels[%s]%s)`
  39. queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations[%s]%s)`
  40. queryFmtServiceLabels = `avg_over_time(service_selector_labels[%s]%s)`
  41. queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels[%s]%s)`
  42. queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels[%s]%s)`
  43. queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s]%s)) by (pod, owner_name, namespace, cluster_id)`
  44. queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,cluster_id)`
  45. )
  46. // ComputeAllocation uses the CostModel instance to compute an AllocationSet
  47. // for the window defined by the given start and end times. The Allocations
  48. // returned are unaggregated (i.e. down to the container level).
  49. func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.AllocationSet, error) {
  50. // Create a window spanning the requested query
  51. s, e := start, end
  52. window := kubecost.NewWindow(&s, &e)
  53. // Create an empty AllocationSet. For safety, in the case of an error, we
  54. // should prefer to return this empty set with the error. (In the case of
  55. // no error, of course we populate the set and return it.)
  56. allocSet := kubecost.NewAllocationSet(start, end)
  57. // Convert window (start, end) to (duration, offset) for querying Prometheus
  58. timesToDurations := func(s, e time.Time) (dur, off time.Duration) {
  59. now := time.Now()
  60. off = now.Sub(e)
  61. dur = e.Sub(s)
  62. return dur, off
  63. }
  64. duration, offset := timesToDurations(start, end)
  65. // If using Thanos, increase offset to 3 hours, reducing the duration by
  66. // equal measure to maintain the same starting point.
  67. thanosDur := thanos.OffsetDuration()
  68. if offset < thanosDur && env.IsThanosEnabled() {
  69. diff := thanosDur - offset
  70. offset += diff
  71. duration -= diff
  72. }
  73. // If duration < 0, return an empty set
  74. if duration < 0 {
  75. return allocSet, nil
  76. }
  77. // Negative offset means that the end time is in the future. Prometheus
  78. // fails for non-positive offset values, so shrink the duration and
  79. // remove the offset altogether.
  80. if offset < 0 {
  81. duration = duration + offset
  82. offset = 0
  83. }
  84. durStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
  85. offStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
  86. if offset < time.Minute {
  87. offStr = ""
  88. }
  89. // TODO niko/computeallocation dynamic resolution? add to ComputeAllocation() in allocation.Source?
  90. resStr := "1m"
  91. // resPerHr := 60
  92. startQuerying := time.Now()
  93. ctx := prom.NewContext(cm.PrometheusClient)
  94. // TODO niko/computeallocation retries? (That should probably go into the Store.)
  95. // TODO niko/computeallocation split into required and optional queries?
  96. queryMinutes := fmt.Sprintf(queryFmtMinutes, durStr, resStr, offStr)
  97. resChMinutes := ctx.Query(queryMinutes)
  98. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr)
  99. resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
  100. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, offStr)
  101. resChRAMRequests := ctx.Query(queryRAMRequests)
  102. queryRAMUsage := fmt.Sprintf(queryFmtRAMUsage, durStr, offStr)
  103. resChRAMUsage := ctx.Query(queryRAMUsage)
  104. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, offStr)
  105. resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
  106. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, offStr)
  107. resChCPURequests := ctx.Query(queryCPURequests)
  108. queryCPUUsage := fmt.Sprintf(queryFmtCPUUsage, durStr, offStr)
  109. resChCPUUsage := ctx.Query(queryCPUUsage)
  110. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, offStr)
  111. resChGPUsRequested := ctx.Query(queryGPUsRequested)
  112. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, offStr)
  113. resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
  114. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, offStr)
  115. resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
  116. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, offStr)
  117. resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
  118. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr, offStr)
  119. resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
  120. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, durStr, resStr, offStr)
  121. resChPVCInfo := ctx.Query(queryPVCInfo)
  122. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, offStr)
  123. resChPVBytes := ctx.Query(queryPVBytes)
  124. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, offStr)
  125. resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
  126. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, offStr)
  127. resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
  128. queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, offStr)
  129. resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
  130. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, offStr)
  131. resChNetZoneGiB := ctx.Query(queryNetZoneGiB)
  132. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, offStr)
  133. resChNetZoneCostPerGiB := ctx.Query(queryNetZoneCostPerGiB)
  134. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, offStr)
  135. resChNetRegionGiB := ctx.Query(queryNetRegionGiB)
  136. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, offStr)
  137. resChNetRegionCostPerGiB := ctx.Query(queryNetRegionCostPerGiB)
  138. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, offStr)
  139. resChNetInternetGiB := ctx.Query(queryNetInternetGiB)
  140. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, offStr)
  141. resChNetInternetCostPerGiB := ctx.Query(queryNetInternetCostPerGiB)
  142. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr, offStr)
  143. resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
  144. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr, offStr)
  145. resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
  146. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr, offStr)
  147. resChPodLabels := ctx.Query(queryPodLabels)
  148. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr, offStr)
  149. resChPodAnnotations := ctx.Query(queryPodAnnotations)
  150. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr, offStr)
  151. resChServiceLabels := ctx.Query(queryServiceLabels)
  152. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr, offStr)
  153. resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
  154. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr, offStr)
  155. resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
  156. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, offStr)
  157. resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
  158. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, offStr)
  159. resChJobLabels := ctx.Query(queryJobLabels)
  160. resMinutes, _ := resChMinutes.Await()
  161. resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
  162. resCPURequests, _ := resChCPURequests.Await()
  163. resCPUUsage, _ := resChCPUUsage.Await()
  164. resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()
  165. resRAMRequests, _ := resChRAMRequests.Await()
  166. resRAMUsage, _ := resChRAMUsage.Await()
  167. resGPUsRequested, _ := resChGPUsRequested.Await()
  168. resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
  169. resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
  170. resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
  171. resNodeIsSpot, _ := resChNodeIsSpot.Await()
  172. resPVBytes, _ := resChPVBytes.Await()
  173. resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()
  174. resPVCInfo, _ := resChPVCInfo.Await()
  175. resPVCBytesRequested, _ := resChPVCBytesRequested.Await()
  176. resPodPVCAllocation, _ := resChPodPVCAllocation.Await()
  177. resNetZoneGiB, _ := resChNetZoneGiB.Await()
  178. resNetZoneCostPerGiB, _ := resChNetZoneCostPerGiB.Await()
  179. resNetRegionGiB, _ := resChNetRegionGiB.Await()
  180. resNetRegionCostPerGiB, _ := resChNetRegionCostPerGiB.Await()
  181. resNetInternetGiB, _ := resChNetInternetGiB.Await()
  182. resNetInternetCostPerGiB, _ := resChNetInternetCostPerGiB.Await()
  183. resNamespaceLabels, _ := resChNamespaceLabels.Await()
  184. resNamespaceAnnotations, _ := resChNamespaceAnnotations.Await()
  185. resPodLabels, _ := resChPodLabels.Await()
  186. resPodAnnotations, _ := resChPodAnnotations.Await()
  187. resServiceLabels, _ := resChServiceLabels.Await()
  188. resDeploymentLabels, _ := resChDeploymentLabels.Await()
  189. resStatefulSetLabels, _ := resChStatefulSetLabels.Await()
  190. resDaemonSetLabels, _ := resChDaemonSetLabels.Await()
  191. resJobLabels, _ := resChJobLabels.Await()
  192. log.Profile(startQuerying, "CostModel.ComputeAllocation: queries complete")
  193. defer log.Profile(time.Now(), "CostModel.ComputeAllocation: processing complete")
  194. // Build out a map of Allocations, starting with (start, end) so that we
  195. // begin with minutes, from which we compute resource allocation and cost
  196. // totals from measured rate data.
  197. allocationMap := map[containerKey]*kubecost.Allocation{}
  198. // Keep track of the allocations per pod, for the sake of splitting PVC and
  199. // Network allocation into per-Allocation from per-Pod.
  200. podAllocation := map[podKey][]*kubecost.Allocation{}
  201. // clusterStarts and clusterEnds record the earliest start and latest end
  202. // times, respectively, on a cluster-basis. These are used for unmounted
  203. // PVs and other "virtual" Allocations so that minutes are maximally
  204. // accurate during start-up or spin-down of a cluster
  205. clusterStart := map[string]time.Time{}
  206. clusterEnd := map[string]time.Time{}
  207. buildAllocationMap(window, allocationMap, podAllocation, clusterStart, clusterEnd, resMinutes)
  208. applyCPUCoresAllocated(allocationMap, resCPUCoresAllocated)
  209. applyCPUCoresRequested(allocationMap, resCPURequests)
  210. applyCPUCoresUsed(allocationMap, resCPUUsage)
  211. applyRAMBytesAllocated(allocationMap, resRAMBytesAllocated)
  212. applyRAMBytesRequested(allocationMap, resRAMRequests)
  213. applyRAMBytesUsed(allocationMap, resRAMUsage)
  214. applyGPUsRequested(allocationMap, resGPUsRequested)
  215. applyNetworkAllocation(allocationMap, podAllocation, resNetZoneGiB, resNetZoneCostPerGiB)
  216. applyNetworkAllocation(allocationMap, podAllocation, resNetRegionGiB, resNetRegionCostPerGiB)
  217. applyNetworkAllocation(allocationMap, podAllocation, resNetInternetGiB, resNetInternetCostPerGiB)
  218. // TODO niko/computeallocation pruneDuplicateData? (see costmodel.go)
  219. namespaceLabels := resToNamespaceLabels(resNamespaceLabels)
  220. podLabels := resToPodLabels(resPodLabels)
  221. namespaceAnnotations := resToNamespaceAnnotations(resNamespaceAnnotations)
  222. podAnnotations := resToPodAnnotations(resPodAnnotations)
  223. applyLabels(allocationMap, namespaceLabels, podLabels)
  224. applyAnnotations(allocationMap, namespaceAnnotations, podAnnotations)
  225. serviceLabels := getServiceLabels(resServiceLabels)
  226. applyServicesToPods(allocationMap, podLabels, serviceLabels)
  227. podDeploymentMap := labelsToPodControllerMap(podLabels, resToDeploymentLabels(resDeploymentLabels))
  228. podStatefulSetMap := labelsToPodControllerMap(podLabels, resToStatefulSetLabels(resStatefulSetLabels))
  229. podDaemonSetMap := resToPodDaemonSetMap(resDaemonSetLabels)
  230. podJobMap := resToPodJobMap(resJobLabels)
  231. applyControllersToPods(allocationMap, podDeploymentMap)
  232. applyControllersToPods(allocationMap, podStatefulSetMap)
  233. applyControllersToPods(allocationMap, podDaemonSetMap)
  234. applyControllersToPods(allocationMap, podJobMap)
  235. // TODO breakdown network costs?
  236. // Build out a map of Nodes with resource costs, discounts, and node types
  237. // for converting resource allocation data to cumulative costs.
  238. nodeMap := map[nodeKey]*Node{}
  239. applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
  240. applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
  241. applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
  242. applyNodeSpot(nodeMap, resNodeIsSpot)
  243. applyNodeDiscount(nodeMap, cm)
  244. // Build out the map of all PVs with class, size and cost-per-hour.
  245. // Note: this does not record time running, which we may want to
  246. // include later for increased PV precision. (As long as the PV has
  247. // a PVC, we get time running there, so this is only inaccurate
  248. // for short-lived, unmounted PVs.)
  249. pvMap := map[pvKey]*PV{}
  250. buildPVMap(pvMap, resPVCostPerGiBHour)
  251. applyPVBytes(pvMap, resPVBytes)
  252. // Build out the map of all PVCs with time running, bytes requested,
  253. // and connect to the correct PV from pvMap. (If no PV exists, that
  254. // is noted, but does not result in any allocation/cost.)
  255. pvcMap := map[pvcKey]*PVC{}
  256. buildPVCMap(window, pvcMap, pvMap, resPVCInfo)
  257. applyPVCBytesRequested(pvcMap, resPVCBytesRequested)
  258. // Build out the relationships of pods to their PVCs. This step
  259. // populates the PVC.Count field so that PVC allocation can be
  260. // split appropriately among each pod's container allocation.
  261. podPVCMap := map[podKey][]*PVC{}
  262. buildPodPVCMap(podPVCMap, pvMap, pvcMap, podAllocation, resPodPVCAllocation)
  263. // Identify unmounted PVs (PVs without PVCs) and add one Allocation per
  264. // cluster representing each cluster's unmounted PVs (if necessary).
  265. applyUnmountedPVs(window, allocationMap, pvMap, pvcMap)
  266. for _, alloc := range allocationMap {
  267. cluster, _ := alloc.Properties.GetCluster()
  268. node, _ := alloc.Properties.GetNode()
  269. namespace, _ := alloc.Properties.GetNamespace()
  270. pod, _ := alloc.Properties.GetPod()
  271. container, _ := alloc.Properties.GetContainer()
  272. podKey := newPodKey(cluster, namespace, pod)
  273. nodeKey := newNodeKey(cluster, node)
  274. if n, ok := nodeMap[nodeKey]; !ok {
  275. if pod != "unmounted-pvs" {
  276. log.Warningf("CostModel.ComputeAllocation: failed to find node %s for %s", nodeKey, alloc.Name)
  277. }
  278. } else {
  279. alloc.CPUCost = alloc.CPUCoreHours * n.CostPerCPUHr
  280. alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * n.CostPerRAMGiBHr
  281. alloc.GPUCost = alloc.GPUHours * n.CostPerGPUHr
  282. }
  283. if pvcs, ok := podPVCMap[podKey]; ok {
  284. for _, pvc := range pvcs {
  285. // Determine the (start, end) of the relationship between the
  286. // given PVC and the associated Allocation so that a precise
  287. // number of hours can be used to compute cumulative cost.
  288. s, e := alloc.Start, alloc.End
  289. if pvc.Start.After(alloc.Start) {
  290. s = pvc.Start
  291. }
  292. if pvc.End.Before(alloc.End) {
  293. e = pvc.End
  294. }
  295. minutes := e.Sub(s).Minutes()
  296. hrs := minutes / 60.0
  297. gib := pvc.Bytes / 1024 / 1024 / 1024
  298. alloc.PVByteHours += pvc.Bytes * hrs
  299. count := float64(pvc.Count)
  300. if pvc.Count < 1 {
  301. // TODO niko/computeallocation remove log (why would this happen?)
  302. log.Warningf("CostModel.ComputeAllocation: PVC.Count=%d for %s", pvc.Count, alloc.Name)
  303. count = 1
  304. }
  305. alloc.PVCost += pvc.Volume.CostPerGiBHour * gib * hrs / count
  306. }
  307. }
  308. alloc.TotalCost = 0.0
  309. alloc.TotalCost += alloc.CPUCost
  310. alloc.TotalCost += alloc.RAMCost
  311. alloc.TotalCost += alloc.GPUCost
  312. alloc.TotalCost += alloc.PVCost
  313. alloc.TotalCost += alloc.NetworkCost
  314. alloc.TotalCost += alloc.SharedCost
  315. alloc.TotalCost += alloc.ExternalCost
  316. if alloc.RAMBytesRequestAverage > 0 {
  317. alloc.RAMEfficiency = alloc.RAMBytesUsageAverage / alloc.RAMBytesRequestAverage
  318. }
  319. if alloc.CPUCoreRequestAverage > 0 {
  320. alloc.CPUEfficiency = alloc.CPUCoreUsageAverage / alloc.CPUCoreRequestAverage
  321. }
  322. if alloc.CPUCost+alloc.RAMCost > 0 {
  323. ramCostEff := alloc.RAMEfficiency * alloc.RAMCost
  324. cpuCostEff := alloc.CPUEfficiency * alloc.CPUCost
  325. alloc.TotalEfficiency = (ramCostEff + cpuCostEff) / (alloc.CPUCost + alloc.RAMCost)
  326. }
  327. // Make sure that the name is correct (node may not be present at this
  328. // point due to it missing from queryMinutes) then insert.
  329. alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container)
  330. allocSet.Set(alloc)
  331. }
  332. return allocSet, nil
  333. }
  334. func buildAllocationMap(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, podAllocation map[podKey][]*kubecost.Allocation, clusterStart, clusterEnd map[string]time.Time, resMinutes []*prom.QueryResult) {
  335. for _, res := range resMinutes {
  336. if len(res.Values) == 0 {
  337. log.Warningf("CostModel.ComputeAllocation: empty minutes result")
  338. continue
  339. }
  340. cluster, err := res.GetString("cluster_id")
  341. if err != nil {
  342. cluster = env.GetClusterID()
  343. }
  344. labels, err := res.GetStrings("namespace", "pod", "container")
  345. if err != nil {
  346. log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
  347. continue
  348. }
  349. namespace := labels["namespace"]
  350. pod := labels["pod"]
  351. container := labels["container"]
  352. containerKey := newContainerKey(cluster, namespace, pod, container)
  353. podKey := newPodKey(cluster, namespace, pod)
  354. // allocStart and allocEnd are the timestamps of the first and last
  355. // minutes the allocation was running, respectively. We subtract 1m
  356. // from allocStart because this point will actually represent the end
  357. // of the first minute. We don't subtract from allocEnd because it
  358. // already represents the end of the last minute.
  359. var allocStart, allocEnd time.Time
  360. for _, datum := range res.Values {
  361. t := time.Unix(int64(datum.Timestamp), 0)
  362. if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  363. allocStart = t
  364. }
  365. if datum.Value > 0 && window.Contains(t) {
  366. allocEnd = t
  367. }
  368. }
  369. if allocStart.IsZero() || allocEnd.IsZero() {
  370. continue
  371. }
  372. allocStart = allocStart.Add(-time.Minute)
  373. // Set start if unset or this datum's start time is earlier than the
  374. // current earliest time.
  375. if _, ok := clusterStart[cluster]; !ok || allocStart.Before(clusterStart[cluster]) {
  376. clusterStart[cluster] = allocStart
  377. }
  378. // Set end if unset or this datum's end time is later than the
  379. // current latest time.
  380. if _, ok := clusterEnd[cluster]; !ok || allocEnd.After(clusterEnd[cluster]) {
  381. clusterEnd[cluster] = allocEnd
  382. }
  383. name := fmt.Sprintf("%s/%s/%s/%s", cluster, namespace, pod, container)
  384. alloc := &kubecost.Allocation{
  385. Name: name,
  386. Properties: kubecost.Properties{},
  387. Window: window.Clone(),
  388. Start: allocStart,
  389. End: allocEnd,
  390. }
  391. alloc.Properties.SetContainer(container)
  392. alloc.Properties.SetPod(pod)
  393. alloc.Properties.SetNamespace(namespace)
  394. alloc.Properties.SetCluster(cluster)
  395. allocationMap[containerKey] = alloc
  396. if _, ok := podAllocation[podKey]; !ok {
  397. podAllocation[podKey] = []*kubecost.Allocation{}
  398. }
  399. podAllocation[podKey] = append(podAllocation[podKey], alloc)
  400. }
  401. }
  402. func applyCPUCoresAllocated(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresAllocated []*prom.QueryResult) {
  403. for _, res := range resCPUCoresAllocated {
  404. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  405. if err != nil {
  406. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing field: %s", err)
  407. continue
  408. }
  409. _, ok := allocationMap[key]
  410. if !ok {
  411. log.Warningf("CostModel.ComputeAllocation: unidentified CPU allocation query result: %s", key)
  412. continue
  413. }
  414. cpuCores := res.Values[0].Value
  415. hours := allocationMap[key].Minutes() / 60.0
  416. allocationMap[key].CPUCoreHours = cpuCores * hours
  417. node, err := res.GetString("node")
  418. if err != nil {
  419. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing 'node': %s", key)
  420. continue
  421. }
  422. allocationMap[key].Properties.SetNode(node)
  423. }
  424. }
  425. func applyCPUCoresRequested(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresRequested []*prom.QueryResult) {
  426. for _, res := range resCPUCoresRequested {
  427. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  428. if err != nil {
  429. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing field: %s", err)
  430. continue
  431. }
  432. _, ok := allocationMap[key]
  433. if !ok {
  434. continue
  435. }
  436. allocationMap[key].CPUCoreRequestAverage = res.Values[0].Value
  437. // If CPU allocation is less than requests, set CPUCoreHours to
  438. // request level.
  439. if allocationMap[key].CPUCores() < res.Values[0].Value {
  440. allocationMap[key].CPUCoreHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
  441. }
  442. node, err := res.GetString("node")
  443. if err != nil {
  444. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)
  445. continue
  446. }
  447. allocationMap[key].Properties.SetNode(node)
  448. }
  449. }
  450. func applyCPUCoresUsed(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresUsed []*prom.QueryResult) {
  451. for _, res := range resCPUCoresUsed {
  452. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
  453. if err != nil {
  454. log.Warningf("CostModel.ComputeAllocation: CPU usage query result missing field: %s", err)
  455. continue
  456. }
  457. _, ok := allocationMap[key]
  458. if !ok {
  459. log.Warningf("CostModel.ComputeAllocation: unidentified CPU usage query result: %s", key)
  460. continue
  461. }
  462. allocationMap[key].CPUCoreUsageAverage = res.Values[0].Value
  463. }
  464. }
  465. func applyRAMBytesAllocated(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesAllocated []*prom.QueryResult) {
  466. for _, res := range resRAMBytesAllocated {
  467. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  468. if err != nil {
  469. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing field: %s", err)
  470. continue
  471. }
  472. _, ok := allocationMap[key]
  473. if !ok {
  474. log.Warningf("CostModel.ComputeAllocation: unidentified RAM allocation query result: %s", key)
  475. continue
  476. }
  477. ramBytes := res.Values[0].Value
  478. hours := allocationMap[key].Minutes() / 60.0
  479. allocationMap[key].RAMByteHours = ramBytes * hours
  480. node, err := res.GetString("node")
  481. if err != nil {
  482. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing 'node': %s", key)
  483. continue
  484. }
  485. allocationMap[key].Properties.SetNode(node)
  486. }
  487. }
  488. func applyRAMBytesRequested(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesRequested []*prom.QueryResult) {
  489. for _, res := range resRAMBytesRequested {
  490. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  491. if err != nil {
  492. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing field: %s", err)
  493. continue
  494. }
  495. _, ok := allocationMap[key]
  496. if !ok {
  497. continue
  498. }
  499. allocationMap[key].RAMBytesRequestAverage = res.Values[0].Value
  500. // If RAM allocation is less than requests, set RAMByteHours to
  501. // request level.
  502. if allocationMap[key].RAMBytes() < res.Values[0].Value {
  503. allocationMap[key].RAMByteHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
  504. }
  505. node, err := res.GetString("node")
  506. if err != nil {
  507. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing 'node': %s", key)
  508. continue
  509. }
  510. allocationMap[key].Properties.SetNode(node)
  511. }
  512. }
  513. func applyRAMBytesUsed(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesUsed []*prom.QueryResult) {
  514. for _, res := range resRAMBytesUsed {
  515. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
  516. if err != nil {
  517. log.Warningf("CostModel.ComputeAllocation: RAM usage query result missing field: %s", err)
  518. continue
  519. }
  520. _, ok := allocationMap[key]
  521. if !ok {
  522. log.Warningf("CostModel.ComputeAllocation: unidentified RAM usage query result: %s", key)
  523. continue
  524. }
  525. allocationMap[key].RAMBytesUsageAverage = res.Values[0].Value
  526. }
  527. }
  528. func applyGPUsRequested(allocationMap map[containerKey]*kubecost.Allocation, resGPUsRequested []*prom.QueryResult) {
  529. for _, res := range resGPUsRequested {
  530. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  531. if err != nil {
  532. log.Warningf("CostModel.ComputeAllocation: GPU allocation query result missing field: %s", err)
  533. continue
  534. }
  535. _, ok := allocationMap[key]
  536. if !ok {
  537. log.Warningf("CostModel.ComputeAllocation: unidentified GPU allocation query result: %s", key)
  538. continue
  539. }
  540. // TODO niko/computeallocation remove log
  541. log.Infof("CostModel.ComputeAllocation: GPU results: %s=%f", key, res.Values[0].Value)
  542. hrs := allocationMap[key].Minutes() / 60.0
  543. allocationMap[key].GPUHours = res.Values[0].Value * hrs
  544. }
  545. }
  546. func applyNetworkAllocation(allocationMap map[containerKey]*kubecost.Allocation, podAllocation map[podKey][]*kubecost.Allocation, resNetworkGiB []*prom.QueryResult, resNetworkCostPerGiB []*prom.QueryResult) {
  547. costPerGiBByCluster := map[string]float64{}
  548. for _, res := range resNetworkCostPerGiB {
  549. cluster, err := res.GetString("cluster_id")
  550. if err != nil {
  551. cluster = env.GetClusterID()
  552. }
  553. costPerGiBByCluster[cluster] = res.Values[0].Value
  554. }
  555. for _, res := range resNetworkGiB {
  556. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod_name")
  557. if err != nil {
  558. log.Warningf("CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
  559. continue
  560. }
  561. allocs, ok := podAllocation[podKey]
  562. if !ok {
  563. log.Warningf("CostModel.ComputeAllocation: Network allocation query result for unidentified pod allocations: %s", podKey)
  564. continue
  565. }
  566. for _, alloc := range allocs {
  567. gib := res.Values[0].Value
  568. costPerGiB := costPerGiBByCluster[podKey.Cluster]
  569. alloc.NetworkCost = gib * costPerGiB
  570. }
  571. }
  572. }
  573. func resToNamespaceLabels(resNamespaceLabels []*prom.QueryResult) map[string]map[string]string {
  574. namespaceLabels := map[string]map[string]string{}
  575. for _, res := range resNamespaceLabels {
  576. namespace, err := res.GetString("namespace")
  577. if err != nil {
  578. continue
  579. }
  580. if _, ok := namespaceLabels[namespace]; !ok {
  581. namespaceLabels[namespace] = map[string]string{}
  582. }
  583. for k, l := range res.GetLabels() {
  584. namespaceLabels[namespace][k] = l
  585. }
  586. }
  587. return namespaceLabels
  588. }
  589. func resToPodLabels(resPodLabels []*prom.QueryResult) map[podKey]map[string]string {
  590. podLabels := map[podKey]map[string]string{}
  591. for _, res := range resPodLabels {
  592. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod")
  593. if err != nil {
  594. continue
  595. }
  596. if _, ok := podLabels[podKey]; !ok {
  597. podLabels[podKey] = map[string]string{}
  598. }
  599. for k, l := range res.GetLabels() {
  600. podLabels[podKey][k] = l
  601. }
  602. }
  603. return podLabels
  604. }
  605. func resToNamespaceAnnotations(resNamespaceAnnotations []*prom.QueryResult) map[string]map[string]string {
  606. namespaceAnnotations := map[string]map[string]string{}
  607. for _, res := range resNamespaceAnnotations {
  608. namespace, err := res.GetString("namespace")
  609. if err != nil {
  610. continue
  611. }
  612. if _, ok := namespaceAnnotations[namespace]; !ok {
  613. namespaceAnnotations[namespace] = map[string]string{}
  614. }
  615. for k, l := range res.GetAnnotations() {
  616. namespaceAnnotations[namespace][k] = l
  617. }
  618. }
  619. return namespaceAnnotations
  620. }
  621. func resToPodAnnotations(resPodAnnotations []*prom.QueryResult) map[podKey]map[string]string {
  622. podAnnotations := map[podKey]map[string]string{}
  623. for _, res := range resPodAnnotations {
  624. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod")
  625. if err != nil {
  626. continue
  627. }
  628. if _, ok := podAnnotations[podKey]; !ok {
  629. podAnnotations[podKey] = map[string]string{}
  630. }
  631. for k, l := range res.GetAnnotations() {
  632. podAnnotations[podKey][k] = l
  633. }
  634. }
  635. return podAnnotations
  636. }
  637. func applyLabels(allocationMap map[containerKey]*kubecost.Allocation, namespaceLabels map[string]map[string]string, podLabels map[podKey]map[string]string) {
  638. for key, alloc := range allocationMap {
  639. allocLabels, err := alloc.Properties.GetLabels()
  640. if err != nil {
  641. allocLabels = map[string]string{}
  642. }
  643. // Apply namespace labels first, then pod labels so that pod labels
  644. // overwrite namespace labels.
  645. if labels, ok := namespaceLabels[key.Namespace]; ok {
  646. for k, v := range labels {
  647. allocLabels[k] = v
  648. }
  649. }
  650. podKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  651. if labels, ok := podLabels[podKey]; ok {
  652. for k, v := range labels {
  653. allocLabels[k] = v
  654. }
  655. }
  656. alloc.Properties.SetLabels(allocLabels)
  657. }
  658. }
  659. func applyAnnotations(allocationMap map[containerKey]*kubecost.Allocation, namespaceAnnotations map[string]map[string]string, podAnnotations map[podKey]map[string]string) {
  660. for key, alloc := range allocationMap {
  661. allocAnnotations, err := alloc.Properties.GetAnnotations()
  662. if err != nil {
  663. allocAnnotations = map[string]string{}
  664. }
  665. // Apply namespace annotations first, then pod annotations so that
  666. // pod labels overwrite namespace labels.
  667. if labels, ok := namespaceAnnotations[key.Namespace]; ok {
  668. for k, v := range labels {
  669. allocAnnotations[k] = v
  670. }
  671. }
  672. podKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  673. if labels, ok := podAnnotations[podKey]; ok {
  674. for k, v := range labels {
  675. allocAnnotations[k] = v
  676. }
  677. }
  678. alloc.Properties.SetAnnotations(allocAnnotations)
  679. }
  680. }
  681. func getServiceLabels(resServiceLabels []*prom.QueryResult) map[serviceKey]map[string]string {
  682. serviceLabels := map[serviceKey]map[string]string{}
  683. for _, res := range resServiceLabels {
  684. serviceKey, err := resultServiceKey(res, "cluster_id", "namespace", "service")
  685. if err != nil {
  686. continue
  687. }
  688. if _, ok := serviceLabels[serviceKey]; !ok {
  689. serviceLabels[serviceKey] = map[string]string{}
  690. }
  691. for k, l := range res.GetLabels() {
  692. serviceLabels[serviceKey][k] = l
  693. }
  694. }
  695. return serviceLabels
  696. }
  697. func resToDeploymentLabels(resDeploymentLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  698. deploymentLabels := map[controllerKey]map[string]string{}
  699. for _, res := range resDeploymentLabels {
  700. controllerKey, err := resultDeploymentKey(res, "cluster_id", "namespace", "deployment")
  701. if err != nil {
  702. continue
  703. }
  704. if _, ok := deploymentLabels[controllerKey]; !ok {
  705. deploymentLabels[controllerKey] = map[string]string{}
  706. }
  707. for k, l := range res.GetLabels() {
  708. deploymentLabels[controllerKey][k] = l
  709. }
  710. }
  711. return deploymentLabels
  712. }
  713. func resToStatefulSetLabels(resStatefulSetLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  714. statefulSetLabels := map[controllerKey]map[string]string{}
  715. for _, res := range resStatefulSetLabels {
  716. controllerKey, err := resultStatefulSetKey(res, "cluster_id", "namespace", "statefulSet")
  717. if err != nil {
  718. continue
  719. }
  720. if _, ok := statefulSetLabels[controllerKey]; !ok {
  721. statefulSetLabels[controllerKey] = map[string]string{}
  722. }
  723. for k, l := range res.GetLabels() {
  724. statefulSetLabels[controllerKey][k] = l
  725. }
  726. }
  727. return statefulSetLabels
  728. }
  729. func labelsToPodControllerMap(podLabels map[podKey]map[string]string, controllerLabels map[controllerKey]map[string]string) map[podKey]controllerKey {
  730. podControllerMap := map[podKey]controllerKey{}
  731. // For each controller, turn the labels into a selector and attempt to
  732. // match it with each set of pod labels. A match indicates that the pod
  733. // belongs to the controller.
  734. for cKey, cLabels := range controllerLabels {
  735. selector := labels.Set(cLabels).AsSelectorPreValidated()
  736. for pKey, pLabels := range podLabels {
  737. // If the pod is in a different cluster or namespace, there is
  738. // no need to compare the labels.
  739. if cKey.Cluster != pKey.Cluster || cKey.Namespace != pKey.Namespace {
  740. continue
  741. }
  742. podLabelSet := labels.Set(pLabels)
  743. if selector.Matches(podLabelSet) {
  744. if _, ok := podControllerMap[pKey]; ok {
  745. log.Warningf("CostModel.ComputeAllocation: PodControllerMap match already exists: %s matches %s and %s", pKey, podControllerMap[pKey], cKey)
  746. }
  747. podControllerMap[pKey] = cKey
  748. }
  749. }
  750. }
  751. return podControllerMap
  752. }
  753. func resToPodDaemonSetMap(resDaemonSetLabels []*prom.QueryResult) map[podKey]controllerKey {
  754. daemonSetLabels := map[podKey]controllerKey{}
  755. for _, res := range resDaemonSetLabels {
  756. controllerKey, err := resultDaemonSetKey(res, "cluster_id", "namespace", "owner_name")
  757. if err != nil {
  758. continue
  759. }
  760. pod, err := res.GetString("pod")
  761. if err != nil {
  762. log.Warningf("CostModel.ComputeAllocation: DaemonSetLabel result without pod: %s", controllerKey)
  763. }
  764. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  765. daemonSetLabels[podKey] = controllerKey
  766. }
  767. return daemonSetLabels
  768. }
  769. func resToPodJobMap(resJobLabels []*prom.QueryResult) map[podKey]controllerKey {
  770. jobLabels := map[podKey]controllerKey{}
  771. for _, res := range resJobLabels {
  772. controllerKey, err := resultJobKey(res, "cluster_id", "namespace", "owner_name")
  773. if err != nil {
  774. continue
  775. }
  776. pod, err := res.GetString("pod")
  777. if err != nil {
  778. log.Warningf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
  779. }
  780. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  781. jobLabels[podKey] = controllerKey
  782. }
  783. return jobLabels
  784. }
  785. func applyServicesToPods(allocationMap map[containerKey]*kubecost.Allocation, podLabels map[podKey]map[string]string, serviceLabels map[serviceKey]map[string]string) {
  786. podServicesMap := map[podKey][]serviceKey{}
  787. // For each service, turn the labels into a selector and attempt to
  788. // match it with each set of pod labels. A match indicates that the pod
  789. // belongs to the service.
  790. for sKey, sLabels := range serviceLabels {
  791. selector := labels.Set(sLabels).AsSelectorPreValidated()
  792. for pKey, pLabels := range podLabels {
  793. // If the pod is in a different cluster or namespace, there is
  794. // no need to compare the labels.
  795. if sKey.Cluster != pKey.Cluster || sKey.Namespace != pKey.Namespace {
  796. continue
  797. }
  798. podLabelSet := labels.Set(pLabels)
  799. if selector.Matches(podLabelSet) {
  800. if _, ok := podServicesMap[pKey]; !ok {
  801. podServicesMap[pKey] = []serviceKey{}
  802. }
  803. podServicesMap[pKey] = append(podServicesMap[pKey], sKey)
  804. }
  805. }
  806. }
  807. // For each allocation, attempt to find and apply the list of services
  808. // associated with the allocation's pod.
  809. for key, alloc := range allocationMap {
  810. pKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  811. if sKeys, ok := podServicesMap[pKey]; ok {
  812. services := []string{}
  813. for _, sKey := range sKeys {
  814. services = append(services, sKey.Service)
  815. }
  816. alloc.Properties.SetServices(services)
  817. }
  818. }
  819. }
  820. func applyControllersToPods(allocationMap map[containerKey]*kubecost.Allocation, podControllerMap map[podKey]controllerKey) {
  821. for key, alloc := range allocationMap {
  822. podKey := newPodKey(key.Cluster, key.Namespace, key.Pod)
  823. if controllerKey, ok := podControllerMap[podKey]; ok {
  824. alloc.Properties.SetControllerKind(controllerKey.ControllerKind)
  825. alloc.Properties.SetController(controllerKey.Controller)
  826. }
  827. }
  828. }
  829. func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*prom.QueryResult) {
  830. for _, res := range resNodeCostPerCPUHr {
  831. cluster, err := res.GetString("cluster_id")
  832. if err != nil {
  833. cluster = env.GetClusterID()
  834. }
  835. node, err := res.GetString("node")
  836. if err != nil {
  837. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  838. continue
  839. }
  840. instanceType, err := res.GetString("instance_type")
  841. if err != nil {
  842. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  843. continue
  844. }
  845. key := newNodeKey(cluster, node)
  846. if _, ok := nodeMap[key]; !ok {
  847. nodeMap[key] = &Node{
  848. Name: node,
  849. NodeType: instanceType,
  850. }
  851. }
  852. nodeMap[key].CostPerCPUHr = res.Values[0].Value
  853. }
  854. }
  855. func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
  856. for _, res := range resNodeCostPerRAMGiBHr {
  857. cluster, err := res.GetString("cluster_id")
  858. if err != nil {
  859. cluster = env.GetClusterID()
  860. }
  861. node, err := res.GetString("node")
  862. if err != nil {
  863. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  864. continue
  865. }
  866. instanceType, err := res.GetString("instance_type")
  867. if err != nil {
  868. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  869. continue
  870. }
  871. key := newNodeKey(cluster, node)
  872. if _, ok := nodeMap[key]; !ok {
  873. nodeMap[key] = &Node{
  874. Name: node,
  875. NodeType: instanceType,
  876. }
  877. }
  878. nodeMap[key].CostPerRAMGiBHr = res.Values[0].Value
  879. }
  880. }
  881. func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*prom.QueryResult) {
  882. for _, res := range resNodeCostPerGPUHr {
  883. cluster, err := res.GetString("cluster_id")
  884. if err != nil {
  885. cluster = env.GetClusterID()
  886. }
  887. node, err := res.GetString("node")
  888. if err != nil {
  889. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  890. continue
  891. }
  892. instanceType, err := res.GetString("instance_type")
  893. if err != nil {
  894. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  895. continue
  896. }
  897. key := newNodeKey(cluster, node)
  898. if _, ok := nodeMap[key]; !ok {
  899. nodeMap[key] = &Node{
  900. Name: node,
  901. NodeType: instanceType,
  902. }
  903. }
  904. nodeMap[key].CostPerGPUHr = res.Values[0].Value
  905. }
  906. }
  907. func applyNodeSpot(nodeMap map[nodeKey]*Node, resNodeIsSpot []*prom.QueryResult) {
  908. for _, res := range resNodeIsSpot {
  909. cluster, err := res.GetString("cluster_id")
  910. if err != nil {
  911. cluster = env.GetClusterID()
  912. }
  913. node, err := res.GetString("node")
  914. if err != nil {
  915. log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
  916. continue
  917. }
  918. key := newNodeKey(cluster, node)
  919. if _, ok := nodeMap[key]; !ok {
  920. log.Warningf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
  921. continue
  922. }
  923. nodeMap[key].Preemptible = res.Values[0].Value > 0
  924. }
  925. }
  926. func applyNodeDiscount(nodeMap map[nodeKey]*Node, cm *CostModel) {
  927. if cm == nil {
  928. return
  929. }
  930. c, err := cm.Provider.GetConfig()
  931. if err != nil {
  932. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  933. return
  934. }
  935. discount, err := ParsePercentString(c.Discount)
  936. if err != nil {
  937. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  938. return
  939. }
  940. negotiatedDiscount, err := ParsePercentString(c.NegotiatedDiscount)
  941. if err != nil {
  942. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  943. return
  944. }
  945. for _, node := range nodeMap {
  946. // TODO niko/computeallocation GKE Reserved Instances into account
  947. node.Discount = cm.Provider.CombinedDiscountForNode(node.NodeType, node.Preemptible, discount, negotiatedDiscount)
  948. node.CostPerCPUHr *= (1.0 - node.Discount)
  949. node.CostPerRAMGiBHr *= (1.0 - node.Discount)
  950. }
  951. }
  952. func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
  953. for _, res := range resPVCostPerGiBHour {
  954. cluster, err := res.GetString("cluster_id")
  955. if err != nil {
  956. cluster = env.GetClusterID()
  957. }
  958. name, err := res.GetString("volumename")
  959. if err != nil {
  960. log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
  961. continue
  962. }
  963. key := newPVKey(cluster, name)
  964. pvMap[key] = &PV{
  965. Cluster: cluster,
  966. Name: name,
  967. CostPerGiBHour: res.Values[0].Value,
  968. }
  969. }
  970. }
  971. func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
  972. for _, res := range resPVBytes {
  973. key, err := resultPVKey(res, "cluster_id", "persistentvolume")
  974. if err != nil {
  975. log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
  976. continue
  977. }
  978. if _, ok := pvMap[key]; !ok {
  979. log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
  980. continue
  981. }
  982. pvMap[key].Bytes = res.Values[0].Value
  983. }
  984. }
  985. func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey]*PV, resPVCInfo []*prom.QueryResult) {
  986. for _, res := range resPVCInfo {
  987. cluster, err := res.GetString("cluster_id")
  988. if err != nil {
  989. cluster = env.GetClusterID()
  990. }
  991. values, err := res.GetStrings("persistentvolumeclaim", "storageclass", "volumename", "namespace")
  992. if err != nil {
  993. log.Warningf("CostModel.ComputeAllocation: PVC info query result missing field: %s", err)
  994. continue
  995. }
  996. namespace := values["namespace"]
  997. name := values["persistentvolumeclaim"]
  998. volume := values["volumename"]
  999. storageClass := values["storageclass"]
  1000. pvKey := newPVKey(cluster, volume)
  1001. pvcKey := newPVCKey(cluster, namespace, name)
  1002. // pvcStart and pvcEnd are the timestamps of the first and last minutes
  1003. // the PVC was running, respectively. We subtract 1m from pvcStart
  1004. // because this point will actually represent the end of the first
  1005. // minute. We don't subtract from pvcEnd because it already represents
  1006. // the end of the last minute.
  1007. var pvcStart, pvcEnd time.Time
  1008. for _, datum := range res.Values {
  1009. t := time.Unix(int64(datum.Timestamp), 0)
  1010. if pvcStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  1011. pvcStart = t
  1012. }
  1013. if datum.Value > 0 && window.Contains(t) {
  1014. pvcEnd = t
  1015. }
  1016. }
  1017. if pvcStart.IsZero() || pvcEnd.IsZero() {
  1018. log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
  1019. }
  1020. pvcStart = pvcStart.Add(-time.Minute)
  1021. if _, ok := pvMap[pvKey]; !ok {
  1022. log.Warningf("CostModel.ComputeAllocation: PV missing for PVC info query result: %s", pvKey)
  1023. continue
  1024. }
  1025. pvMap[pvKey].StorageClass = storageClass
  1026. if _, ok := pvcMap[pvcKey]; !ok {
  1027. pvcMap[pvcKey] = &PVC{}
  1028. }
  1029. pvcMap[pvcKey].Name = name
  1030. pvcMap[pvcKey].Namespace = namespace
  1031. pvcMap[pvcKey].Volume = pvMap[pvKey]
  1032. pvcMap[pvcKey].Start = pvcStart
  1033. pvcMap[pvcKey].End = pvcEnd
  1034. }
  1035. }
  1036. func applyPVCBytesRequested(pvcMap map[pvcKey]*PVC, resPVCBytesRequested []*prom.QueryResult) {
  1037. for _, res := range resPVCBytesRequested {
  1038. key, err := resultPVCKey(res, "cluster_id", "namespace", "persistentvolumeclaim")
  1039. if err != nil {
  1040. log.Warningf("CostModel.ComputeAllocation: PVC bytes requested query result missing field: %s", err)
  1041. continue
  1042. }
  1043. if _, ok := pvcMap[key]; !ok {
  1044. log.Warningf("CostModel.ComputeAllocation: PVC bytes requested result for missing PVC: %s", key)
  1045. continue
  1046. }
  1047. pvcMap[key].Bytes = res.Values[0].Value
  1048. }
  1049. }
  1050. func buildPodPVCMap(podPVCMap map[podKey][]*PVC, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC, podAllocation map[podKey][]*kubecost.Allocation, resPodPVCAllocation []*prom.QueryResult) {
  1051. for _, res := range resPodPVCAllocation {
  1052. cluster, err := res.GetString("cluster_id")
  1053. if err != nil {
  1054. cluster = env.GetClusterID()
  1055. }
  1056. values, err := res.GetStrings("persistentvolume", "persistentvolumeclaim", "pod", "namespace")
  1057. if err != nil {
  1058. log.Warningf("CostModel.ComputeAllocation: PVC allocation query result missing field: %s", err)
  1059. continue
  1060. }
  1061. namespace := values["namespace"]
  1062. pod := values["pod"]
  1063. name := values["persistentvolumeclaim"]
  1064. volume := values["persistentvolume"]
  1065. podKey := newPodKey(cluster, namespace, pod)
  1066. pvKey := newPVKey(cluster, volume)
  1067. pvcKey := newPVCKey(cluster, namespace, name)
  1068. if _, ok := pvMap[pvKey]; !ok {
  1069. log.Warningf("CostModel.ComputeAllocation: PV missing for PVC allocation query result: %s", pvKey)
  1070. continue
  1071. }
  1072. if _, ok := podPVCMap[podKey]; !ok {
  1073. podPVCMap[podKey] = []*PVC{}
  1074. }
  1075. pvc, ok := pvcMap[pvcKey]
  1076. if !ok {
  1077. log.Warningf("CostModel.ComputeAllocation: PVC missing for PVC allocation query: %s", pvcKey)
  1078. continue
  1079. }
  1080. pvc.Count = len(podAllocation[podKey])
  1081. pvc.Mounted = true
  1082. podPVCMap[podKey] = append(podPVCMap[podKey], pvc)
  1083. }
  1084. }
  1085. func applyUnmountedPVs(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC) {
  1086. unmountedPVBytes := map[string]float64{}
  1087. unmountedPVCost := map[string]float64{}
  1088. for _, pv := range pvMap {
  1089. mounted := false
  1090. for _, pvc := range pvcMap {
  1091. if pvc.Volume == nil {
  1092. continue
  1093. }
  1094. if pvc.Volume == pv {
  1095. mounted = true
  1096. break
  1097. }
  1098. }
  1099. if !mounted {
  1100. gib := pv.Bytes / 1024 / 1024 / 1024
  1101. hrs := window.Minutes() / 60.0 // TODO niko/computeallocation PV hours, not window hours?
  1102. cost := pv.CostPerGiBHour * gib * hrs
  1103. unmountedPVCost[pv.Cluster] += cost
  1104. unmountedPVBytes[pv.Cluster] += pv.Bytes
  1105. }
  1106. }
  1107. for cluster, amount := range unmountedPVCost {
  1108. container := "unmounted-pvs"
  1109. pod := "unmounted-pvs"
  1110. namespace := ""
  1111. node := ""
  1112. containerKey := newContainerKey(cluster, namespace, pod, container)
  1113. allocationMap[containerKey] = &kubecost.Allocation{
  1114. Name: fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container),
  1115. Properties: kubecost.Properties{
  1116. kubecost.ClusterProp: cluster,
  1117. kubecost.NodeProp: node,
  1118. kubecost.NamespaceProp: namespace,
  1119. kubecost.PodProp: pod,
  1120. kubecost.ContainerProp: container,
  1121. },
  1122. Window: window.Clone(),
  1123. Start: *window.Start(),
  1124. End: *window.End(),
  1125. PVByteHours: unmountedPVBytes[cluster] * window.Minutes() / 60.0,
  1126. PVCost: amount,
  1127. TotalCost: amount,
  1128. }
  1129. }
  1130. }
  1131. func applyUnmountedPVCs(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, pvcMap map[pvcKey]*PVC) {
  1132. unmountedPVCBytes := map[namespaceKey]float64{}
  1133. unmountedPVCCost := map[namespaceKey]float64{}
  1134. for _, pvc := range pvcMap {
  1135. if !pvc.Mounted && pvc.Volume != nil {
  1136. key := newNamespaceKey(pvc.Cluster, pvc.Namespace)
  1137. gib := pvc.Volume.Bytes / 1024 / 1024 / 1024
  1138. hrs := pvc.Minutes() / 60.0
  1139. cost := pvc.Volume.CostPerGiBHour * gib * hrs
  1140. unmountedPVCCost[key] += cost
  1141. unmountedPVCBytes[key] += pvc.Volume.Bytes
  1142. }
  1143. }
  1144. for key, amount := range unmountedPVCCost {
  1145. container := "unmounted-pvs"
  1146. pod := "unmounted-pvs"
  1147. namespace := key.Namespace
  1148. node := ""
  1149. cluster := key.Cluster
  1150. containerKey := newContainerKey(cluster, namespace, pod, container)
  1151. allocationMap[containerKey] = &kubecost.Allocation{
  1152. Name: fmt.Sprintf("%s/%s/%s/%s/%s", cluster, node, namespace, pod, container),
  1153. Properties: kubecost.Properties{
  1154. kubecost.ClusterProp: cluster,
  1155. kubecost.NodeProp: node,
  1156. kubecost.NamespaceProp: namespace,
  1157. kubecost.PodProp: pod,
  1158. kubecost.ContainerProp: container,
  1159. },
  1160. Window: window.Clone(),
  1161. Start: *window.Start(),
  1162. End: *window.End(),
  1163. PVByteHours: unmountedPVCBytes[key] * window.Minutes() / 60.0,
  1164. PVCost: amount,
  1165. TotalCost: amount,
  1166. }
  1167. }
  1168. }
  1169. // PVC describes a PersistentVolumeClaim
  1170. // TODO move to pkg/kubecost? [TODO:CLEANUP]
  1171. // TODO add PersistentVolumeClaims field to type Allocation? [TODO:CLEANUP]
  1172. type PVC struct {
  1173. Bytes float64 `json:"bytes"`
  1174. Count int `json:"count"`
  1175. Name string `json:"name"`
  1176. Cluster string `json:"cluster"`
  1177. Namespace string `json:"namespace"`
  1178. Volume *PV `json:"persistentVolume"`
  1179. Mounted bool `json:"mounted"`
  1180. Start time.Time `json:"start"`
  1181. End time.Time `json:"end"`
  1182. }
  1183. // Cost computes the cumulative cost of the PVC
  1184. func (pvc *PVC) Cost() float64 {
  1185. if pvc == nil || pvc.Volume == nil {
  1186. return 0.0
  1187. }
  1188. gib := pvc.Bytes / 1024 / 1024 / 1024
  1189. hrs := pvc.Minutes() / 60.0
  1190. return pvc.Volume.CostPerGiBHour * gib * hrs
  1191. }
  1192. // Minutes computes the number of minutes over which the PVC is defined
  1193. func (pvc *PVC) Minutes() float64 {
  1194. if pvc == nil {
  1195. return 0.0
  1196. }
  1197. return pvc.End.Sub(pvc.Start).Minutes()
  1198. }
  1199. // String returns a string representation of the PVC
  1200. func (pvc *PVC) String() string {
  1201. if pvc == nil {
  1202. return "<nil>"
  1203. }
  1204. return fmt.Sprintf("%s/%s/%s{Bytes:%.2f, Cost:%.6f, Start,End:%s}", pvc.Cluster, pvc.Namespace, pvc.Name, pvc.Bytes, pvc.Cost(), kubecost.NewWindow(&pvc.Start, &pvc.End))
  1205. }
  1206. // PV describes a PersistentVolume
  1207. // TODO move to pkg/kubecost? [TODO:CLEANUP]
  1208. type PV struct {
  1209. Bytes float64 `json:"bytes"`
  1210. CostPerGiBHour float64 `json:"costPerGiBHour"` // TODO niko/computeallocation GiB or GB?
  1211. Cluster string `json:"cluster"`
  1212. Name string `json:"name"`
  1213. StorageClass string `json:"storageClass"`
  1214. }
  1215. // String returns a string representation of the PV
  1216. func (pv *PV) String() string {
  1217. if pv == nil {
  1218. return "<nil>"
  1219. }
  1220. return fmt.Sprintf("%s/%s{Bytes:%.2f, Cost/GiB*Hr:%.6f, StorageClass:%s}", pv.Cluster, pv.Name, pv.Bytes, pv.CostPerGiBHour, pv.StorageClass)
  1221. }