allocation.go 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288
  1. package costmodel
  2. import (
  3. "fmt"
  4. "time"
  5. "github.com/kubecost/cost-model/pkg/env"
  6. "github.com/kubecost/cost-model/pkg/kubecost"
  7. "github.com/kubecost/cost-model/pkg/log"
  8. "github.com/kubecost/cost-model/pkg/prom"
  9. "github.com/kubecost/cost-model/pkg/thanos"
  10. )
  11. // TODO niko/cdmr move to pkg/kubecost
  12. // TODO niko/cdmr add PersistenVolumeClaims to type Allocation?
  13. type PVC struct {
  14. Bytes float64 `json:"bytes"`
  15. Count int `json:"count"`
  16. Name string `json:"name"`
  17. Cluster string `json:"cluster"`
  18. Namespace string `json:"namespace"`
  19. Volume *PV `json:"persistentVolume"`
  20. Start time.Time `json:"start"`
  21. End time.Time `json:"end"`
  22. }
  23. func (pvc *PVC) Cost() float64 {
  24. if pvc == nil || pvc.Volume == nil {
  25. return 0.0
  26. }
  27. gib := pvc.Bytes / 1024 / 1024 / 1024
  28. hrs := pvc.Minutes() / 60.0
  29. return pvc.Volume.CostPerGiBHour * gib * hrs
  30. }
  31. func (pvc *PVC) Minutes() float64 {
  32. if pvc == nil {
  33. return 0.0
  34. }
  35. return pvc.End.Sub(pvc.Start).Minutes()
  36. }
  37. func (pvc *PVC) String() string {
  38. if pvc == nil {
  39. return "<nil>"
  40. }
  41. return fmt.Sprintf("%s/%s/%s{Bytes:%.2f, Cost:%.6f, Start,End:%s}", pvc.Cluster, pvc.Namespace, pvc.Name, pvc.Bytes, pvc.Cost(), kubecost.NewWindow(&pvc.Start, &pvc.End))
  42. }
  43. // TODO niko/cdmr move to pkg/kubecost
  44. type PV struct {
  45. Bytes float64 `json:"bytes"`
  46. CostPerGiBHour float64 `json:"costPerGiBHour"` // TODO niko/cdmr GiB or GB?
  47. Cluster string `json:"cluster"`
  48. Name string `json:"name"`
  49. StorageClass string `json:"storageClass"`
  50. }
  51. func (pv *PV) String() string {
  52. if pv == nil {
  53. return "<nil>"
  54. }
  55. return fmt.Sprintf("%s/%s{Bytes:%.2f, Cost/GiB*Hr:%.6f, StorageClass:%s}", pv.Cluster, pv.Name, pv.Bytes, pv.CostPerGiBHour, pv.StorageClass)
  56. }
  57. // ComputeAllocation uses the CostModel instance to compute an AllocationSet
  58. // for the window defined by the given start and end times. The Allocations
  59. // returned are unaggregated (i.e. down to the container level).
  60. func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.AllocationSet, error) {
  61. // Create a window spanning the requested query
  62. s, e := start, end
  63. window := kubecost.NewWindow(&s, &e)
  64. // Create an empty AllocationSet. For safety, in the case of an error, we
  65. // should prefer to return this empty set with the error. (In the case of
  66. // no error, of course we populate the set and return it.)
  67. allocSet := kubecost.NewAllocationSet(start, end)
  68. // Convert window (start, end) to (duration, offset) for querying Prometheus
  69. timesToDurations := func(s, e time.Time) (dur, off time.Duration) {
  70. now := time.Now()
  71. off = now.Sub(e)
  72. dur = e.Sub(s)
  73. return dur, off
  74. }
  75. duration, offset := timesToDurations(start, end)
  76. // If using Thanos, increase offset to 3 hours, reducing the duration by
  77. // equal measure to maintain the same starting point.
  78. thanosDur := thanos.OffsetDuration()
  79. // TODO niko/cdmr confirm that this flag works interchangeably with ThanosClient != nil
  80. if offset < thanosDur && env.IsThanosEnabled() {
  81. diff := thanosDur - offset
  82. offset += diff
  83. duration -= diff
  84. }
  85. // If duration < 0, return an empty set
  86. if duration < 0 {
  87. return allocSet, nil
  88. }
  89. // Negative offset means that the end time is in the future. Prometheus
  90. // fails for non-positive offset values, so shrink the duration and
  91. // remove the offset altogether.
  92. if offset < 0 {
  93. duration = duration + offset
  94. offset = 0
  95. }
  96. durStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
  97. offStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
  98. if offset < time.Minute {
  99. offStr = ""
  100. }
  101. // TODO niko/cdmr dynamic resolution? add to ComputeAllocation() in allocation.Source?
  102. resStr := "1m"
  103. // resPerHr := 60
  104. // TODO niko/cdmr remove after testing
  105. startQuerying := time.Now()
  106. ctx := prom.NewContext(cm.PrometheusClient)
  107. // TODO niko/cdmr retries? (That should probably go into the Store.)
  108. // TODO niko/cmdr check: will multiple Prometheus jobs multiply the totals?
  109. // TODO niko/cdmr should we try doing this without resolution? Could yield
  110. // more accurate results, but might also be more challenging in some
  111. // respects; e.g. "correcting" the start point by what amount?
  112. queryMinutes := fmt.Sprintf(`avg(kube_pod_container_status_running{}) by (container, pod, namespace, kubernetes_node, cluster_id)[%s:%s]%s`, durStr, resStr, offStr)
  113. resChMinutes := ctx.Query(queryMinutes)
  114. queryRAMBytesAllocated := fmt.Sprintf(`avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
  115. resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
  116. queryRAMRequests := fmt.Sprintf(`avg(avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
  117. resChRAMRequests := ctx.Query(queryRAMRequests)
  118. queryRAMUsage := fmt.Sprintf(`avg(avg_over_time(container_memory_working_set_bytes{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`, durStr, offStr)
  119. resChRAMUsage := ctx.Query(queryRAMUsage)
  120. queryCPUCoresAllocated := fmt.Sprintf(`avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
  121. resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
  122. queryCPURequests := fmt.Sprintf(`avg(avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
  123. resChCPURequests := ctx.Query(queryCPURequests)
  124. queryCPUUsage := fmt.Sprintf(`avg(rate(container_cpu_usage_seconds_total{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`, durStr, offStr)
  125. resChCPUUsage := ctx.Query(queryCPUUsage)
  126. // TODO niko/cdmr find an env with GPUs to test this (generate one?)
  127. queryGPUsRequested := fmt.Sprintf(`avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
  128. resChGPUsRequested := ctx.Query(queryGPUsRequested)
  129. queryNodeCostPerCPUHr := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`, durStr, offStr)
  130. resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
  131. queryNodeCostPerRAMGiBHr := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`, durStr, offStr)
  132. resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
  133. queryNodeCostPerGPUHr := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`, durStr, offStr)
  134. resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
  135. queryNodeIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s]%s)`, durStr, offStr)
  136. resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
  137. queryPVCInfo := fmt.Sprintf(`avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, cluster_id)[%s:%s]%s`, durStr, resStr, offStr)
  138. resChPVCInfo := ctx.Query(queryPVCInfo)
  139. queryPVBytes := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, cluster_id)`, durStr, offStr)
  140. resChPVBytes := ctx.Query(queryPVBytes)
  141. queryPodPVCAllocation := fmt.Sprintf(`avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, cluster_id)`, durStr, offStr)
  142. resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
  143. queryPVCBytesRequested := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, cluster_id)`, durStr, offStr)
  144. resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
  145. queryPVCostPerGiBHour := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, cluster_id)`, durStr, offStr)
  146. resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
  147. queryNetZoneGiB := fmt.Sprintf(`sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`, durStr, offStr)
  148. resChNetZoneGiB := ctx.Query(queryNetZoneGiB)
  149. queryNetZoneCostPerGiB := fmt.Sprintf(`avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s]%s)) by (cluster_id)`, durStr, offStr)
  150. resChNetZoneCostPerGiB := ctx.Query(queryNetZoneCostPerGiB)
  151. queryNetRegionGiB := fmt.Sprintf(`sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`, durStr, offStr)
  152. resChNetRegionGiB := ctx.Query(queryNetRegionGiB)
  153. queryNetRegionCostPerGiB := fmt.Sprintf(`avg(avg_over_time(kubecost_network_region_egress_cost{}[%s]%s)) by (cluster_id)`, durStr, offStr)
  154. resChNetRegionCostPerGiB := ctx.Query(queryNetRegionCostPerGiB)
  155. queryNetInternetGiB := fmt.Sprintf(`sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s]%s)) by (pod_name, namespace, cluster_id) / 1024 / 1024 / 1024`, durStr, offStr)
  156. resChNetInternetGiB := ctx.Query(queryNetInternetGiB)
  157. queryNetInternetCostPerGiB := fmt.Sprintf(`avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s]%s)) by (cluster_id)`, durStr, offStr)
  158. resChNetInternetCostPerGiB := ctx.Query(queryNetInternetCostPerGiB)
  159. queryNamespaceLabels := fmt.Sprintf(`avg_over_time(kube_namespace_labels[%s]%s)`, durStr, offStr)
  160. resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
  161. queryNamespaceAnnotations := fmt.Sprintf(`avg_over_time(kube_namespace_annotations[%s]%s)`, durStr, offStr)
  162. resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
  163. queryPodLabels := fmt.Sprintf(`avg_over_time(kube_pod_labels[%s]%s)`, durStr, offStr)
  164. resChPodLabels := ctx.Query(queryPodLabels)
  165. queryPodAnnotations := fmt.Sprintf(`avg_over_time(kube_pod_annotations[%s]%s)`, durStr, offStr)
  166. resChPodAnnotations := ctx.Query(queryPodAnnotations)
  167. queryServiceLabels := fmt.Sprintf(`avg_over_time(service_selector_labels[%s]%s)`, durStr, offStr)
  168. resChServiceLabels := ctx.Query(queryServiceLabels)
  169. queryDeploymentLabels := fmt.Sprintf(`avg_over_time(deployment_match_labels[%s]%s)`, durStr, offStr)
  170. resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
  171. queryStatefulSetLabels := fmt.Sprintf(`avg_over_time(statefulSet_match_labels[%s]%s)`, durStr, offStr)
  172. resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
  173. queryDaemonSetLabels := fmt.Sprintf(`sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s]%s)) by (pod, owner_name, namespace, cluster_id)`, durStr, offStr)
  174. resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
  175. queryJobLabels := fmt.Sprintf(`sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,cluster_id)`, durStr, offStr)
  176. resChJobLabels := ctx.Query(queryJobLabels)
  177. resMinutes, _ := resChMinutes.Await()
  178. resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
  179. resCPURequests, _ := resChCPURequests.Await()
  180. resCPUUsage, _ := resChCPUUsage.Await()
  181. resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()
  182. resRAMRequests, _ := resChRAMRequests.Await()
  183. resRAMUsage, _ := resChRAMUsage.Await()
  184. resGPUsRequested, _ := resChGPUsRequested.Await()
  185. resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
  186. resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
  187. resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
  188. resNodeIsSpot, _ := resChNodeIsSpot.Await()
  189. resPVBytes, _ := resChPVBytes.Await()
  190. resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()
  191. resPVCInfo, _ := resChPVCInfo.Await()
  192. resPVCBytesRequested, _ := resChPVCBytesRequested.Await()
  193. resPodPVCAllocation, _ := resChPodPVCAllocation.Await()
  194. resNetZoneGiB, _ := resChNetZoneGiB.Await()
  195. resNetZoneCostPerGiB, _ := resChNetZoneCostPerGiB.Await()
  196. resNetRegionGiB, _ := resChNetRegionGiB.Await()
  197. resNetRegionCostPerGiB, _ := resChNetRegionCostPerGiB.Await()
  198. resNetInternetGiB, _ := resChNetInternetGiB.Await()
  199. resNetInternetCostPerGiB, _ := resChNetInternetCostPerGiB.Await()
  200. resNamespaceLabels, _ := resChNamespaceLabels.Await()
  201. resNamespaceAnnotations, _ := resChNamespaceAnnotations.Await()
  202. resPodLabels, _ := resChPodLabels.Await()
  203. resPodAnnotations, _ := resChPodAnnotations.Await()
  204. resServiceLabels, _ := resChServiceLabels.Await()
  205. resDeploymentLabels, _ := resChDeploymentLabels.Await()
  206. resStatefulSetLabels, _ := resChStatefulSetLabels.Await()
  207. resDaemonSetLabels, _ := resChDaemonSetLabels.Await()
  208. resJobLabels, _ := resChJobLabels.Await()
  209. // ----------------------------------------------------------------------//
  210. // TODO niko/cdmr remove all logs after testing
  211. log.Infof("CostModel.ComputeAllocation: minutes : %s", queryMinutes)
  212. log.Infof("CostModel.ComputeAllocation: CPU cores: %s", queryCPUCoresAllocated)
  213. log.Infof("CostModel.ComputeAllocation: CPU req : %s", queryCPURequests)
  214. log.Infof("CostModel.ComputeAllocation: CPU use : %s", queryCPUUsage)
  215. log.Infof("CostModel.ComputeAllocation: $/CPU*Hr : %s", queryNodeCostPerCPUHr)
  216. log.Infof("CostModel.ComputeAllocation: RAM bytes: %s", queryRAMBytesAllocated)
  217. log.Infof("CostModel.ComputeAllocation: RAM req : %s", queryRAMRequests)
  218. log.Infof("CostModel.ComputeAllocation: RAM use : %s", queryRAMUsage)
  219. log.Infof("CostModel.ComputeAllocation: $/GiB*Hr : %s", queryNodeCostPerRAMGiBHr)
  220. log.Infof("CostModel.ComputeAllocation: PV $/gbhr: %s", queryPVCostPerGiBHour)
  221. log.Infof("CostModel.ComputeAllocation: PV bytes : %s", queryPVBytes)
  222. log.Infof("CostModel.ComputeAllocation: PVC alloc: %s", queryPodPVCAllocation)
  223. log.Infof("CostModel.ComputeAllocation: PVC bytes: %s", queryPVCBytesRequested)
  224. log.Infof("CostModel.ComputeAllocation: PVC info : %s", queryPVCInfo)
  225. log.Infof("CostModel.ComputeAllocation: Net Z GiB: %s", queryNetZoneGiB)
  226. log.Infof("CostModel.ComputeAllocation: Net Z $ : %s", queryNetZoneCostPerGiB)
  227. log.Infof("CostModel.ComputeAllocation: Net R GiB: %s", queryNetRegionGiB)
  228. log.Infof("CostModel.ComputeAllocation: Net R $ : %s", queryNetRegionCostPerGiB)
  229. log.Infof("CostModel.ComputeAllocation: Net I GiB: %s", queryNetInternetGiB)
  230. log.Infof("CostModel.ComputeAllocation: Net I $ : %s", queryNetInternetCostPerGiB)
  231. log.Infof("CostModel.ComputeAllocation: NamespaceLabels: %s", queryNamespaceLabels)
  232. log.Infof("CostModel.ComputeAllocation: NamespaceAnnotations: %s", queryNamespaceAnnotations)
  233. log.Infof("CostModel.ComputeAllocation: PodLabels: %s", queryPodLabels)
  234. log.Infof("CostModel.ComputeAllocation: PodAnnotations: %s", queryPodAnnotations)
  235. log.Infof("CostModel.ComputeAllocation: ServiceLabels: %s", queryServiceLabels)
  236. log.Infof("CostModel.ComputeAllocation: DeploymentLabels: %s", queryDeploymentLabels)
  237. log.Infof("CostModel.ComputeAllocation: StatefulSetLabels: %s", queryStatefulSetLabels)
  238. log.Infof("CostModel.ComputeAllocation: DaemonSetLabels: %s", queryDaemonSetLabels)
  239. log.Infof("CostModel.ComputeAllocation: JobLabels: %s", queryJobLabels)
  240. log.Profile(startQuerying, "CostModel.ComputeAllocation: queries complete")
  241. defer log.Profile(time.Now(), "CostModel.ComputeAllocation: processing complete")
  242. // ----------------------------------------------------------------------//
  243. // Build out a map of Allocations, starting with (start, end) so that we
  244. // begin with minutes, from which we compute resource allocation and cost
  245. // totals from measured rate data.
  246. // TODO niko/cdmr can we start with a reasonable guess at map size?
  247. allocationMap := map[containerKey]*kubecost.Allocation{}
  248. // Keep track of the allocations per pod, for the sake of splitting PVC and
  249. // Network allocation into per-Allocation from per-Pod.
  250. podAllocation := map[podKey][]*kubecost.Allocation{}
  251. // clusterStarts and clusterEnds record the earliest start and latest end
  252. // times, respectively, on a cluster-basis. These are used for unmounted
  253. // PVs and other "virtual" Allocations so that minutes are maximally
  254. // accurate during start-up or spin-down of a cluster
  255. clusterStart := map[string]time.Time{}
  256. clusterEnd := map[string]time.Time{}
  257. buildAllocationMap(window, allocationMap, podAllocation, clusterStart, clusterEnd, resMinutes)
  258. applyCPUCoresAllocated(allocationMap, resCPUCoresAllocated)
  259. applyCPUCoresRequested(allocationMap, resCPURequests)
  260. applyCPUCoresUsed(allocationMap, resCPUUsage)
  261. applyRAMBytesAllocated(allocationMap, resRAMBytesAllocated)
  262. applyRAMBytesRequested(allocationMap, resRAMRequests)
  263. applyRAMBytesUsed(allocationMap, resRAMUsage)
  264. applyGPUsRequested(allocationMap, resGPUsRequested)
  265. applyNetworkAllocation(allocationMap, podAllocation, resNetZoneGiB, resNetZoneCostPerGiB)
  266. applyNetworkAllocation(allocationMap, podAllocation, resNetRegionGiB, resNetRegionCostPerGiB)
  267. applyNetworkAllocation(allocationMap, podAllocation, resNetInternetGiB, resNetInternetCostPerGiB)
  268. applyLabels := func(name string, res []*prom.QueryResult) {
  269. log.Infof("CostModel.ComputeAllocation: %s: %d results", name, len(res))
  270. }
  271. applyLabels("NamespaceLabels", resNamespaceLabels)
  272. applyLabels("NamespaceAnnotations", resNamespaceAnnotations)
  273. applyLabels("PodLabels", resPodLabels)
  274. applyLabels("PodAnnotations", resPodAnnotations)
  275. applyLabels("ServiceLabels", resServiceLabels)
  276. applyLabels("DeploymentLabels", resDeploymentLabels)
  277. applyLabels("StatefulSetLabels", resStatefulSetLabels)
  278. applyLabels("DaemonSetLabels", resDaemonSetLabels)
  279. applyLabels("JobLabels", resJobLabels)
  280. // TODO niko/cdmr breakdown network costs?
  281. // Build out a map of Nodes with resource costs, discounts, and node types
  282. // for converting resource allocation data to cumulative costs.
  283. nodeMap := map[nodeKey]*Node{}
  284. applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
  285. applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
  286. applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
  287. applyNodeSpot(nodeMap, resNodeIsSpot)
  288. applyNodeDiscount(nodeMap, cm)
  289. // TODO niko/cdmr comment
  290. pvMap := map[pvKey]*PV{}
  291. buildPVMap(pvMap, resPVCostPerGiBHour)
  292. applyPVBytes(pvMap, resPVBytes)
  293. // TODO niko/cdmr apply PV bytes?
  294. // TODO niko/cdmr comment
  295. pvcMap := map[pvcKey]*PVC{}
  296. buildPVCMap(window, pvcMap, pvMap, resPVCInfo)
  297. applyPVCBytesRequested(pvcMap, resPVCBytesRequested)
  298. // TODO niko/cdmr comment
  299. podPVCMap := map[podKey][]*PVC{}
  300. buildPodPVCMap(podPVCMap, pvMap, pvcMap, podAllocation, resPodPVCAllocation)
  301. // Identify unmounted PVs (PVs without PVCs) and add one Allocation per
  302. // cluster representing each cluster's unmounted PVs (if necessary).
  303. applyUnmountedPVs(window, allocationMap, pvMap, pvcMap)
  304. // TODO niko/cdmr remove logs
  305. log.Infof("CostModel.ComputeAllocation: %d allocations", len(allocationMap))
  306. log.Infof("CostModel.ComputeAllocation: %d nodes", len(nodeMap))
  307. log.Infof("CostModel.ComputeAllocation: %d PVs", len(pvMap))
  308. log.Infof("CostModel.ComputeAllocation: %d PVCs", len(pvcMap))
  309. log.Infof("CostModel.ComputeAllocation: %d pods with PVCs", len(podPVCMap))
  310. for _, node := range nodeMap {
  311. log.Infof("CostModel.ComputeAllocation: Node: %s: %f/CPUHr; %f/RAMHr; %f/GPUHr; %f discount", node.Name, node.CostPerCPUHr, node.CostPerRAMGiBHr, node.CostPerGPUHr, node.Discount)
  312. }
  313. for _, pv := range pvMap {
  314. log.Infof("CostModel.ComputeAllocation: PV: %s", pv)
  315. }
  316. for pod, pvcs := range podPVCMap {
  317. for _, pvc := range pvcs {
  318. log.Infof("CostModel.ComputeAllocation: Pod %s: PVC: %s", pod, pvc)
  319. }
  320. }
  321. for _, alloc := range allocationMap {
  322. cluster, _ := alloc.Properties.GetCluster()
  323. node, _ := alloc.Properties.GetNode()
  324. namespace, _ := alloc.Properties.GetNamespace()
  325. pod, _ := alloc.Properties.GetPod()
  326. podKey := newPodKey(cluster, namespace, pod)
  327. nodeKey := newNodeKey(cluster, node)
  328. if n, ok := nodeMap[nodeKey]; !ok {
  329. if pod != "unmounted-pvs" {
  330. log.Warningf("CostModel.ComputeAllocation: failed to find node %s for %s", nodeKey, alloc.Name)
  331. }
  332. } else {
  333. alloc.CPUCost = alloc.CPUCoreHours * n.CostPerCPUHr
  334. alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * n.CostPerRAMGiBHr
  335. alloc.GPUCost = alloc.GPUHours * n.CostPerGPUHr
  336. }
  337. if pvcs, ok := podPVCMap[podKey]; ok {
  338. for _, pvc := range pvcs {
  339. // Determine the (start, end) of the relationship between the
  340. // given PVC and the associated Allocation so that a precise
  341. // number of hours can be used to compute cumulative cost.
  342. s, e := alloc.Start, alloc.End
  343. if pvc.Start.After(alloc.Start) {
  344. s = pvc.Start
  345. }
  346. if pvc.End.Before(alloc.End) {
  347. e = pvc.End
  348. }
  349. minutes := e.Sub(s).Minutes()
  350. hrs := minutes / 60.0
  351. gib := pvc.Bytes / 1024 / 1024 / 1024
  352. alloc.PVByteHours += pvc.Bytes * hrs
  353. alloc.PVCost += pvc.Volume.CostPerGiBHour * gib * hrs / float64(pvc.Count)
  354. }
  355. }
  356. alloc.TotalCost = 0.0
  357. alloc.TotalCost += alloc.CPUCost
  358. alloc.TotalCost += alloc.RAMCost
  359. alloc.TotalCost += alloc.GPUCost
  360. alloc.TotalCost += alloc.PVCost
  361. alloc.TotalCost += alloc.NetworkCost
  362. alloc.TotalCost += alloc.SharedCost
  363. alloc.TotalCost += alloc.ExternalCost
  364. allocSet.Set(alloc)
  365. }
  366. return allocSet, nil
  367. }
  368. func buildAllocationMap(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, podAllocation map[podKey][]*kubecost.Allocation, clusterStart, clusterEnd map[string]time.Time, resMinutes []*prom.QueryResult) {
  369. for _, res := range resMinutes {
  370. if len(res.Values) == 0 {
  371. log.Warningf("CostModel.ComputeAllocation: empty minutes result")
  372. continue
  373. }
  374. cluster, err := res.GetString("cluster_id")
  375. if err != nil {
  376. cluster = env.GetClusterID()
  377. }
  378. labels, err := res.GetStrings("kubernetes_node", "namespace", "pod", "container")
  379. if err != nil {
  380. log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
  381. continue
  382. }
  383. node := labels["kubernetes_node"]
  384. namespace := labels["namespace"]
  385. pod := labels["pod"]
  386. container := labels["container"]
  387. containerKey := newContainerKey(cluster, namespace, pod, container)
  388. podKey := newPodKey(cluster, namespace, pod)
  389. // allocStart and allocEnd are the timestamps of the first and last
  390. // minutes the allocation was running, respectively. We subtract 1m
  391. // from allocStart because this point will actually represent the end
  392. // of the first minute. We don't subtract from allocEnd because it
  393. // already represents the end of the last minute.
  394. var allocStart, allocEnd time.Time
  395. for _, datum := range res.Values {
  396. t := time.Unix(int64(datum.Timestamp), 0)
  397. if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  398. allocStart = t
  399. }
  400. if datum.Value > 0 && window.Contains(t) {
  401. allocEnd = t
  402. }
  403. }
  404. if allocStart.IsZero() || allocEnd.IsZero() {
  405. // TODO niko/cdmr remove log?
  406. // log.Warningf("CostModel.ComputeAllocation: allocation %s has no running time, skipping", containerKey)
  407. continue
  408. }
  409. allocStart = allocStart.Add(-time.Minute)
  410. // Set start if unset or this datum's start time is earlier than the
  411. // current earliest time.
  412. if _, ok := clusterStart[cluster]; !ok || allocStart.Before(clusterStart[cluster]) {
  413. clusterStart[cluster] = allocStart
  414. }
  415. // Set end if unset or this datum's end time is later than the
  416. // current latest time.
  417. if _, ok := clusterEnd[cluster]; !ok || allocEnd.After(clusterEnd[cluster]) {
  418. clusterEnd[cluster] = allocEnd
  419. }
  420. name := fmt.Sprintf("%s/%s/%s/%s", cluster, namespace, pod, container)
  421. alloc := &kubecost.Allocation{
  422. Name: name,
  423. Properties: kubecost.Properties{},
  424. Window: window.Clone(),
  425. Start: allocStart,
  426. End: allocEnd,
  427. }
  428. alloc.Properties.SetContainer(container)
  429. alloc.Properties.SetPod(pod)
  430. alloc.Properties.SetNamespace(namespace)
  431. alloc.Properties.SetNode(node)
  432. alloc.Properties.SetCluster(cluster)
  433. allocationMap[containerKey] = alloc
  434. if _, ok := podAllocation[podKey]; !ok {
  435. podAllocation[podKey] = []*kubecost.Allocation{}
  436. }
  437. podAllocation[podKey] = append(podAllocation[podKey], alloc)
  438. }
  439. }
  440. func applyCPUCoresAllocated(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresAllocated []*prom.QueryResult) {
  441. for _, res := range resCPUCoresAllocated {
  442. // TODO niko/cdmr do we need node here?
  443. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  444. if err != nil {
  445. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing field: %s", err)
  446. continue
  447. }
  448. _, ok := allocationMap[key]
  449. if !ok {
  450. log.Warningf("CostModel.ComputeAllocation: unidentified CPU allocation query result: %s", key)
  451. continue
  452. }
  453. cpuCores := res.Values[0].Value
  454. hours := allocationMap[key].Minutes() / 60.0
  455. allocationMap[key].CPUCoreHours = cpuCores * hours
  456. }
  457. }
  458. func applyCPUCoresRequested(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresRequested []*prom.QueryResult) {
  459. for _, res := range resCPUCoresRequested {
  460. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  461. if err != nil {
  462. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing field: %s", err)
  463. continue
  464. }
  465. _, ok := allocationMap[key]
  466. if !ok {
  467. // TODO niko/cdmr remove log?
  468. // log.Warningf("CostModel.ComputeAllocation: unidentified CPU request query result: %s", key)
  469. continue
  470. }
  471. allocationMap[key].CPUCoreRequestAverage = res.Values[0].Value
  472. // CPU allocation is less than requests, so set CPUCoreHours to
  473. // request level.
  474. // TODO niko/cdmr why is this happening?
  475. if allocationMap[key].CPUCores() < res.Values[0].Value {
  476. allocationMap[key].CPUCoreHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
  477. }
  478. }
  479. }
  480. func applyCPUCoresUsed(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresUsed []*prom.QueryResult) {
  481. for _, res := range resCPUCoresUsed {
  482. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
  483. if err != nil {
  484. log.Warningf("CostModel.ComputeAllocation: CPU usage query result missing field: %s", err)
  485. continue
  486. }
  487. _, ok := allocationMap[key]
  488. if !ok {
  489. log.Warningf("CostModel.ComputeAllocation: unidentified CPU usage query result: %s", key)
  490. continue
  491. }
  492. allocationMap[key].CPUCoreUsageAverage = res.Values[0].Value
  493. }
  494. }
  495. func applyRAMBytesRequested(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesRequested []*prom.QueryResult) {
  496. for _, res := range resRAMBytesRequested {
  497. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  498. if err != nil {
  499. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing field: %s", err)
  500. continue
  501. }
  502. _, ok := allocationMap[key]
  503. if !ok {
  504. // TODO niko/cdmr remove log?
  505. // log.Warningf("CostModel.ComputeAllocation: unidentified RAM request query result: %s", key)
  506. continue
  507. }
  508. allocationMap[key].RAMBytesRequestAverage = res.Values[0].Value
  509. // RAM allocation is less than requests, so set RAMByteHours to
  510. // request level.
  511. // TODO niko/cdmr why is this happening?
  512. if allocationMap[key].RAMBytes() < res.Values[0].Value {
  513. allocationMap[key].RAMByteHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
  514. }
  515. }
  516. }
  517. func applyRAMBytesUsed(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesUsed []*prom.QueryResult) {
  518. for _, res := range resRAMBytesUsed {
  519. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
  520. if err != nil {
  521. log.Warningf("CostModel.ComputeAllocation: RAM usage query result missing field: %s", err)
  522. continue
  523. }
  524. _, ok := allocationMap[key]
  525. if !ok {
  526. log.Warningf("CostModel.ComputeAllocation: unidentified RAM usage query result: %s", key)
  527. continue
  528. }
  529. allocationMap[key].RAMBytesUsageAverage = res.Values[0].Value
  530. }
  531. }
  532. func applyRAMBytesAllocated(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesAllocated []*prom.QueryResult) {
  533. for _, res := range resRAMBytesAllocated {
  534. // TODO niko/cdmr do we need node here?
  535. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  536. if err != nil {
  537. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing field: %s", err)
  538. continue
  539. }
  540. _, ok := allocationMap[key]
  541. if !ok {
  542. log.Warningf("CostModel.ComputeAllocation: unidentified RAM allocation query result: %s", key)
  543. continue
  544. }
  545. ramBytes := res.Values[0].Value
  546. hours := allocationMap[key].Minutes() / 60.0
  547. allocationMap[key].RAMByteHours = ramBytes * hours
  548. }
  549. }
  550. func applyGPUsRequested(allocationMap map[containerKey]*kubecost.Allocation, resGPUsRequested []*prom.QueryResult) {
  551. for _, res := range resGPUsRequested {
  552. // TODO niko/cdmr do we need node here?
  553. key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
  554. if err != nil {
  555. log.Warningf("CostModel.ComputeAllocation: GPU allocation query result missing field: %s", err)
  556. continue
  557. }
  558. _, ok := allocationMap[key]
  559. if !ok {
  560. log.Warningf("CostModel.ComputeAllocation: unidentified GPU allocation query result: %s", key)
  561. continue
  562. }
  563. // TODO niko/cdmr complete
  564. log.Infof("CostModel.ComputeAllocation: GPU results: %s=%f", key, res.Values[0].Value)
  565. }
  566. }
  567. func applyNetworkAllocation(allocationMap map[containerKey]*kubecost.Allocation, podAllocation map[podKey][]*kubecost.Allocation, resNetworkGiB []*prom.QueryResult, resNetworkCostPerGiB []*prom.QueryResult) {
  568. costPerGiBByCluster := map[string]float64{}
  569. for _, res := range resNetworkCostPerGiB {
  570. cluster, err := res.GetString("cluster_id")
  571. if err != nil {
  572. cluster = env.GetClusterID()
  573. }
  574. costPerGiBByCluster[cluster] = res.Values[0].Value
  575. }
  576. for _, res := range resNetworkGiB {
  577. podKey, err := resultPodKey(res, "cluster_id", "namespace", "pod_name")
  578. if err != nil {
  579. log.Warningf("CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
  580. continue
  581. }
  582. allocs, ok := podAllocation[podKey]
  583. if !ok {
  584. log.Warningf("CostModel.ComputeAllocation: Network allocation query result for unidentified pod allocations: %s", podKey)
  585. continue
  586. }
  587. for _, alloc := range allocs {
  588. gib := res.Values[0].Value
  589. costPerGiB := costPerGiBByCluster[podKey.Cluster]
  590. alloc.NetworkCost = gib * costPerGiB
  591. }
  592. }
  593. }
  594. func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*prom.QueryResult) {
  595. for _, res := range resNodeCostPerCPUHr {
  596. cluster, err := res.GetString("cluster_id")
  597. if err != nil {
  598. cluster = env.GetClusterID()
  599. }
  600. node, err := res.GetString("node")
  601. if err != nil {
  602. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  603. continue
  604. }
  605. instanceType, err := res.GetString("instance_type")
  606. if err != nil {
  607. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  608. continue
  609. }
  610. key := newNodeKey(cluster, node)
  611. if _, ok := nodeMap[key]; !ok {
  612. nodeMap[key] = &Node{
  613. Name: node,
  614. NodeType: instanceType,
  615. }
  616. }
  617. nodeMap[key].CostPerCPUHr = res.Values[0].Value
  618. }
  619. }
  620. func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
  621. for _, res := range resNodeCostPerRAMGiBHr {
  622. cluster, err := res.GetString("cluster_id")
  623. if err != nil {
  624. cluster = env.GetClusterID()
  625. }
  626. node, err := res.GetString("node")
  627. if err != nil {
  628. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  629. continue
  630. }
  631. instanceType, err := res.GetString("instance_type")
  632. if err != nil {
  633. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  634. continue
  635. }
  636. key := newNodeKey(cluster, node)
  637. if _, ok := nodeMap[key]; !ok {
  638. nodeMap[key] = &Node{
  639. Name: node,
  640. NodeType: instanceType,
  641. }
  642. }
  643. nodeMap[key].CostPerRAMGiBHr = res.Values[0].Value
  644. }
  645. }
  646. func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*prom.QueryResult) {
  647. for _, res := range resNodeCostPerGPUHr {
  648. cluster, err := res.GetString("cluster_id")
  649. if err != nil {
  650. cluster = env.GetClusterID()
  651. }
  652. node, err := res.GetString("node")
  653. if err != nil {
  654. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  655. continue
  656. }
  657. instanceType, err := res.GetString("instance_type")
  658. if err != nil {
  659. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  660. continue
  661. }
  662. key := newNodeKey(cluster, node)
  663. if _, ok := nodeMap[key]; !ok {
  664. nodeMap[key] = &Node{
  665. Name: node,
  666. NodeType: instanceType,
  667. }
  668. }
  669. nodeMap[key].CostPerGPUHr = res.Values[0].Value
  670. }
  671. }
  672. func applyNodeSpot(nodeMap map[nodeKey]*Node, resNodeIsSpot []*prom.QueryResult) {
  673. for _, res := range resNodeIsSpot {
  674. cluster, err := res.GetString("cluster_id")
  675. if err != nil {
  676. cluster = env.GetClusterID()
  677. }
  678. node, err := res.GetString("node")
  679. if err != nil {
  680. log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
  681. continue
  682. }
  683. key := newNodeKey(cluster, node)
  684. if _, ok := nodeMap[key]; !ok {
  685. log.Warningf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
  686. continue
  687. }
  688. nodeMap[key].Preemptible = res.Values[0].Value > 0
  689. }
  690. }
  691. func applyNodeDiscount(nodeMap map[nodeKey]*Node, cm *CostModel) {
  692. if cm == nil {
  693. return
  694. }
  695. c, err := cm.Provider.GetConfig()
  696. if err != nil {
  697. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  698. return
  699. }
  700. discount, err := ParsePercentString(c.Discount)
  701. if err != nil {
  702. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  703. return
  704. }
  705. negotiatedDiscount, err := ParsePercentString(c.NegotiatedDiscount)
  706. if err != nil {
  707. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  708. return
  709. }
  710. for _, node := range nodeMap {
  711. // TODO niko/cdmr take RI into account?
  712. node.Discount = cm.Provider.CombinedDiscountForNode(node.NodeType, node.Preemptible, discount, negotiatedDiscount)
  713. node.CostPerCPUHr *= (1.0 - node.Discount)
  714. node.CostPerRAMGiBHr *= (1.0 - node.Discount)
  715. }
  716. }
  717. func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
  718. for _, res := range resPVCostPerGiBHour {
  719. cluster, err := res.GetString("cluster_id")
  720. if err != nil {
  721. cluster = env.GetClusterID()
  722. }
  723. name, err := res.GetString("volumename")
  724. if err != nil {
  725. log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
  726. continue
  727. }
  728. key := newPVKey(cluster, name)
  729. pvMap[key] = &PV{
  730. Cluster: cluster,
  731. Name: name,
  732. CostPerGiBHour: res.Values[0].Value,
  733. }
  734. }
  735. }
  736. func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
  737. for _, res := range resPVBytes {
  738. key, err := resultPVKey(res, "cluster_id", "persistentvolume")
  739. if err != nil {
  740. log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
  741. continue
  742. }
  743. if _, ok := pvMap[key]; !ok {
  744. log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
  745. continue
  746. }
  747. pvMap[key].Bytes = res.Values[0].Value
  748. }
  749. }
  750. func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey]*PV, resPVCInfo []*prom.QueryResult) {
  751. for _, res := range resPVCInfo {
  752. cluster, err := res.GetString("cluster_id")
  753. if err != nil {
  754. cluster = env.GetClusterID()
  755. }
  756. values, err := res.GetStrings("persistentvolumeclaim", "storageclass", "volumename", "namespace")
  757. if err != nil {
  758. log.Warningf("CostModel.ComputeAllocation: PVC info query result missing field: %s", err)
  759. continue
  760. }
  761. // TODO niko/cdmr ?
  762. namespace := values["namespace"]
  763. name := values["persistentvolumeclaim"]
  764. volume := values["volumename"]
  765. storageClass := values["storageclass"]
  766. pvKey := newPVKey(cluster, volume)
  767. pvcKey := newPVCKey(cluster, namespace, name)
  768. // pvcStart and pvcEnd are the timestamps of the first and last minutes
  769. // the PVC was running, respectively. We subtract 1m from pvcStart
  770. // because this point will actually represent the end of the first
  771. // minute. We don't subtract from pvcEnd because it already represents
  772. // the end of the last minute.
  773. var pvcStart, pvcEnd time.Time
  774. for _, datum := range res.Values {
  775. t := time.Unix(int64(datum.Timestamp), 0)
  776. if pvcStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  777. pvcStart = t
  778. }
  779. if datum.Value > 0 && window.Contains(t) {
  780. pvcEnd = t
  781. }
  782. }
  783. if pvcStart.IsZero() || pvcEnd.IsZero() {
  784. log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
  785. }
  786. pvcStart = pvcStart.Add(-time.Minute)
  787. if _, ok := pvMap[pvKey]; !ok {
  788. log.Warningf("CostModel.ComputeAllocation: PV missing for PVC info query result: %s", pvKey)
  789. continue
  790. }
  791. pvMap[pvKey].StorageClass = storageClass
  792. if _, ok := pvcMap[pvcKey]; !ok {
  793. pvcMap[pvcKey] = &PVC{}
  794. }
  795. pvcMap[pvcKey].Name = name
  796. pvcMap[pvcKey].Namespace = namespace
  797. pvcMap[pvcKey].Volume = pvMap[pvKey]
  798. pvcMap[pvcKey].Start = pvcStart
  799. pvcMap[pvcKey].End = pvcEnd
  800. }
  801. }
  802. func applyPVCBytesRequested(pvcMap map[pvcKey]*PVC, resPVCBytesRequested []*prom.QueryResult) {
  803. for _, res := range resPVCBytesRequested {
  804. key, err := resultPVCKey(res, "cluster_id", "namespace", "persistentvolumeclaim")
  805. if err != nil {
  806. log.Warningf("CostModel.ComputeAllocation: PVC bytes requested query result missing field: %s", err)
  807. continue
  808. }
  809. if _, ok := pvcMap[key]; !ok {
  810. log.Warningf("CostModel.ComputeAllocation: PVC bytes requested result for missing PVC: %s", err)
  811. continue
  812. }
  813. pvcMap[key].Bytes = res.Values[0].Value
  814. }
  815. }
  816. func buildPodPVCMap(podPVCMap map[podKey][]*PVC, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC, podAllocation map[podKey][]*kubecost.Allocation, resPodPVCAllocation []*prom.QueryResult) {
  817. for _, res := range resPodPVCAllocation {
  818. cluster, err := res.GetString("cluster_id")
  819. if err != nil {
  820. cluster = env.GetClusterID()
  821. }
  822. values, err := res.GetStrings("persistentvolume", "persistentvolumeclaim", "pod", "namespace")
  823. if err != nil {
  824. log.Warningf("CostModel.ComputeAllocation: PVC allocation query result missing field: %s", err)
  825. continue
  826. }
  827. namespace := values["namespace"]
  828. pod := values["pod"]
  829. name := values["persistentvolumeclaim"]
  830. volume := values["persistentvolume"]
  831. podKey := newPodKey(cluster, namespace, pod)
  832. pvKey := newPVKey(cluster, volume)
  833. pvcKey := newPVCKey(cluster, namespace, name)
  834. if _, ok := pvMap[pvKey]; !ok {
  835. log.Warningf("CostModel.ComputeAllocation: PV missing for PVC allocation query result: %s", pvKey)
  836. continue
  837. }
  838. if _, ok := podPVCMap[podKey]; !ok {
  839. podPVCMap[podKey] = []*PVC{}
  840. }
  841. pvc, ok := pvcMap[pvcKey]
  842. if !ok {
  843. log.Warningf("CostModel.ComputeAllocation: PVC missing for PVC allocation query: %s", pvcKey)
  844. continue
  845. }
  846. pvc.Count = len(podAllocation[podKey])
  847. podPVCMap[podKey] = append(podPVCMap[podKey], pvc)
  848. }
  849. }
  850. func applyUnmountedPVs(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC) {
  851. unmountedPVBytes := map[string]float64{}
  852. unmountedPVCost := map[string]float64{}
  853. for _, pv := range pvMap {
  854. mounted := false
  855. for _, pvc := range pvcMap {
  856. if pvc.Volume == nil {
  857. continue
  858. }
  859. if pvc.Volume == pv {
  860. mounted = true
  861. break
  862. }
  863. }
  864. log.Infof("CostModel.ComputeAllocation: PV %s is mounted? %t", pv.Name, mounted)
  865. if !mounted {
  866. gib := pv.Bytes / 1024 / 1024 / 1024
  867. hrs := window.Minutes() / 60.0
  868. cost := pv.CostPerGiBHour * gib * hrs
  869. unmountedPVCost[pv.Cluster] += cost
  870. unmountedPVBytes[pv.Cluster] += pv.Bytes
  871. }
  872. }
  873. for cluster, amount := range unmountedPVCost {
  874. container := "unmounted-pvs"
  875. pod := "unmounted-pvs"
  876. namespace := "" // TODO niko/cdmr what about this?
  877. containerKey := newContainerKey(cluster, namespace, pod, container)
  878. allocationMap[containerKey] = &kubecost.Allocation{
  879. Name: fmt.Sprintf("%s/%s/%s/%s", cluster, namespace, pod, container),
  880. Properties: kubecost.Properties{
  881. kubecost.ClusterProp: cluster,
  882. kubecost.NamespaceProp: namespace,
  883. kubecost.PodProp: pod,
  884. kubecost.ContainerProp: container,
  885. },
  886. Window: window.Clone(),
  887. Start: *window.Start(),
  888. End: *window.End(),
  889. PVByteHours: unmountedPVBytes[cluster] * window.Minutes() / 60.0,
  890. PVCost: amount,
  891. TotalCost: amount,
  892. }
  893. }
  894. }
  895. type containerKey struct {
  896. Cluster string
  897. Namespace string
  898. Pod string
  899. Container string
  900. }
  901. func (k containerKey) String() string {
  902. return fmt.Sprintf("%s/%s/%s/%s", k.Cluster, k.Namespace, k.Pod, k.Container)
  903. }
  904. func newContainerKey(cluster, namespace, pod, container string) containerKey {
  905. return containerKey{
  906. Cluster: cluster,
  907. Namespace: namespace,
  908. Pod: pod,
  909. Container: container,
  910. }
  911. }
  912. func resultContainerKey(res *prom.QueryResult, clusterLabel, namespaceLabel, podLabel, containerLabel string) (containerKey, error) {
  913. key := containerKey{}
  914. cluster, err := res.GetString(clusterLabel)
  915. if err != nil {
  916. cluster = env.GetClusterID()
  917. }
  918. key.Cluster = cluster
  919. namespace, err := res.GetString(namespaceLabel)
  920. if err != nil {
  921. return key, err
  922. }
  923. key.Namespace = namespace
  924. pod, err := res.GetString(podLabel)
  925. if err != nil {
  926. return key, err
  927. }
  928. key.Pod = pod
  929. container, err := res.GetString(containerLabel)
  930. if err != nil {
  931. return key, err
  932. }
  933. key.Container = container
  934. return key, nil
  935. }
  936. type podKey struct {
  937. Cluster string
  938. Namespace string
  939. Pod string
  940. }
  941. func (k podKey) String() string {
  942. return fmt.Sprintf("%s/%s/%s", k.Cluster, k.Namespace, k.Pod)
  943. }
  944. func newPodKey(cluster, namespace, pod string) podKey {
  945. return podKey{
  946. Cluster: cluster,
  947. Namespace: namespace,
  948. Pod: pod,
  949. }
  950. }
  951. func resultPodKey(res *prom.QueryResult, clusterLabel, namespaceLabel, podLabel string) (podKey, error) {
  952. key := podKey{}
  953. cluster, err := res.GetString(clusterLabel)
  954. if err != nil {
  955. cluster = env.GetClusterID()
  956. }
  957. key.Cluster = cluster
  958. namespace, err := res.GetString(namespaceLabel)
  959. if err != nil {
  960. return key, err
  961. }
  962. key.Namespace = namespace
  963. pod, err := res.GetString(podLabel)
  964. if err != nil {
  965. return key, err
  966. }
  967. key.Pod = pod
  968. return key, nil
  969. }
  970. type nodeKey struct {
  971. Cluster string
  972. Node string
  973. }
  974. func (k nodeKey) String() string {
  975. return fmt.Sprintf("%s/%s", k.Cluster, k.Node)
  976. }
  977. func newNodeKey(cluster, node string) nodeKey {
  978. return nodeKey{
  979. Cluster: cluster,
  980. Node: node,
  981. }
  982. }
  983. func resultNodeKey(res *prom.QueryResult, clusterLabel, nodeLabel string) (nodeKey, error) {
  984. key := nodeKey{}
  985. cluster, err := res.GetString(clusterLabel)
  986. if err != nil {
  987. cluster = env.GetClusterID()
  988. }
  989. key.Cluster = cluster
  990. node, err := res.GetString(nodeLabel)
  991. if err != nil {
  992. return key, err
  993. }
  994. key.Node = node
  995. return key, nil
  996. }
  997. type pvcKey struct {
  998. Cluster string
  999. Namespace string
  1000. PersistentVolumeClaim string
  1001. }
  1002. func (k pvcKey) String() string {
  1003. return fmt.Sprintf("%s/%s/%s", k.Cluster, k.Namespace, k.PersistentVolumeClaim)
  1004. }
  1005. func newPVCKey(cluster, namespace, persistentVolumeClaim string) pvcKey {
  1006. return pvcKey{
  1007. Cluster: cluster,
  1008. Namespace: namespace,
  1009. PersistentVolumeClaim: persistentVolumeClaim,
  1010. }
  1011. }
  1012. func resultPVCKey(res *prom.QueryResult, clusterLabel, namespaceLabel, pvcLabel string) (pvcKey, error) {
  1013. key := pvcKey{}
  1014. cluster, err := res.GetString(clusterLabel)
  1015. if err != nil {
  1016. cluster = env.GetClusterID()
  1017. }
  1018. key.Cluster = cluster
  1019. namespace, err := res.GetString(namespaceLabel)
  1020. if err != nil {
  1021. return key, err
  1022. }
  1023. key.Namespace = namespace
  1024. pvc, err := res.GetString(pvcLabel)
  1025. if err != nil {
  1026. return key, err
  1027. }
  1028. key.PersistentVolumeClaim = pvc
  1029. return key, nil
  1030. }
  1031. type pvKey struct {
  1032. Cluster string
  1033. PersistentVolume string
  1034. }
  1035. func (k pvKey) String() string {
  1036. return fmt.Sprintf("%s/%s", k.Cluster, k.PersistentVolume)
  1037. }
  1038. func newPVKey(cluster, persistentVolume string) pvKey {
  1039. return pvKey{
  1040. Cluster: cluster,
  1041. PersistentVolume: persistentVolume,
  1042. }
  1043. }
  1044. func resultPVKey(res *prom.QueryResult, clusterLabel, persistentVolumeLabel string) (pvKey, error) {
  1045. key := pvKey{}
  1046. cluster, err := res.GetString(clusterLabel)
  1047. if err != nil {
  1048. cluster = env.GetClusterID()
  1049. }
  1050. key.Cluster = cluster
  1051. persistentVolume, err := res.GetString(persistentVolumeLabel)
  1052. if err != nil {
  1053. return key, err
  1054. }
  1055. key.PersistentVolume = persistentVolume
  1056. return key, nil
  1057. }