allocation.go 74 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128
  1. package costmodel
  2. import (
  3. "fmt"
  4. "math"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "github.com/kubecost/cost-model/pkg/util/timeutil"
  9. "github.com/kubecost/cost-model/pkg/cloud"
  10. "github.com/kubecost/cost-model/pkg/env"
  11. "github.com/kubecost/cost-model/pkg/kubecost"
  12. "github.com/kubecost/cost-model/pkg/log"
  13. "github.com/kubecost/cost-model/pkg/prom"
  14. "k8s.io/apimachinery/pkg/labels"
  15. "k8s.io/klog"
  16. )
  17. const (
  18. queryFmtPods = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]%s`
  19. queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s, provider_id)`
  20. queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  21. queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  22. queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  23. queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  24. queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  25. queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  26. // This query could be written without the recording rule
  27. // "kubecost_savings_container_cpu_usage_seconds", but we should
  28. // only do that when we're ready to incur the performance tradeoffs
  29. // with subqueries which would probably be in the world of hourly
  30. // ETL.
  31. //
  32. // See PromQL subquery documentation for a rate example:
  33. // https://prometheus.io/blog/2019/01/28/subquery-support/#examples
  34. queryFmtCPUUsageMax = `max(max_over_time(kubecost_savings_container_cpu_usage_seconds[%s]%s)) by (container_name, pod_name, namespace, instance, %s)`
  35. queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  36. queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  37. queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
  38. queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
  39. queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
  40. queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot[%s]%s)`
  41. queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]%s`
  42. queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, %s)`
  43. queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
  44. queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, %s)`
  45. queryFmtPVCostPerGiBHour = `avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, %s)`
  46. queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
  47. queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s]%s)) by (%s)`
  48. queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
  49. queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s]%s)) by (%s)`
  50. queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
  51. queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s]%s)) by (%s)`
  52. queryFmtNetReceiveBytes = `sum(increase(container_network_receive_bytes_total{pod_name!=""}[%s]%s)) by (pod_name, namespace, %s)`
  53. queryFmtNetTransferBytes = `sum(increase(container_network_transmit_bytes_total{pod_name!=""}[%s]%s)) by (pod_name, namespace, %s)`
  54. queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels[%s]%s)`
  55. queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations[%s]%s)`
  56. queryFmtPodLabels = `avg_over_time(kube_pod_labels[%s]%s)`
  57. queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations[%s]%s)`
  58. queryFmtServiceLabels = `avg_over_time(service_selector_labels[%s]%s)`
  59. queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels[%s]%s)`
  60. queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels[%s]%s)`
  61. queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s]%s)) by (pod, owner_name, namespace, %s)`
  62. queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,%s)`
  63. queryFmtLBCostPerHr = `avg(avg_over_time(kubecost_load_balancer_cost[%s]%s)) by (namespace, service_name, %s)`
  64. queryFmtLBActiveMins = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]%s`
  65. )
  66. // This is a bit of a hack to work around garbage data from cadvisor
  67. // The maximum CPU count on AWS is 96. Standard_M192idms_v2 on azure allows 192. GCP offers 224 in n2ds. Many improvements here are possible to get lower bounnds, but 96 seems reasonable now.
  68. // Ideally you cap each pod to the max CPU on its node, but that involves a bit more complexity, as it it would need to be done when allocations joins with asset data.
  69. const MAX_CPU_CAP = 96
  70. // CanCompute should return true if CostModel can act as a valid source for the
  71. // given time range. In the case of CostModel we want to attempt to compute as
  72. // long as the range starts in the past. If the CostModel ends up not having
  73. // data to match, that's okay, and should be communicated with an error
  74. // response from ComputeAllocation.
  75. func (cm *CostModel) CanCompute(start, end time.Time) bool {
  76. return start.Before(time.Now())
  77. }
  78. // Name returns the name of the Source
  79. func (cm *CostModel) Name() string {
  80. return "CostModel"
  81. }
  82. // ComputeAllocation uses the CostModel instance to compute an AllocationSet
  83. // for the window defined by the given start and end times. The Allocations
  84. // returned are unaggregated (i.e. down to the container level).
  85. func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {
  86. // 1. Build out Pod map from resolution-tuned, batched Pod start/end query
  87. // 2. Run and apply the results of the remaining queries to
  88. // 3. Build out AllocationSet from completed Pod map
  89. // Create a window spanning the requested query
  90. window := kubecost.NewWindow(&start, &end)
  91. // Create an empty AllocationSet. For safety, in the case of an error, we
  92. // should prefer to return this empty set with the error. (In the case of
  93. // no error, of course we populate the set and return it.)
  94. allocSet := kubecost.NewAllocationSet(start, end)
  95. // (1) Build out Pod map
  96. // Build out a map of Allocations as a mapping from pod-to-container-to-
  97. // underlying-Allocation instance, starting with (start, end) so that we
  98. // begin with minutes, from which we compute resource allocation and cost
  99. // totals from measured rate data.
  100. podMap := map[podKey]*Pod{}
  101. // clusterStarts and clusterEnds record the earliest start and latest end
  102. // times, respectively, on a cluster-basis. These are used for unmounted
  103. // PVs and other "virtual" Allocations so that minutes are maximally
  104. // accurate during start-up or spin-down of a cluster
  105. clusterStart := map[string]time.Time{}
  106. clusterEnd := map[string]time.Time{}
  107. cm.buildPodMap(window, resolution, env.GetETLMaxBatchDuration(), podMap, clusterStart, clusterEnd)
  108. // (2) Run and apply remaining queries
  109. // Convert window (start, end) to (duration, offset) for querying Prometheus,
  110. // including handling Thanos offset
  111. durStr, offStr, err := window.DurationOffsetForPrometheus()
  112. if err != nil {
  113. // Negative duration, so return empty set
  114. return allocSet, nil
  115. }
  116. // Convert resolution duration to a query-ready string
  117. resStr := timeutil.DurationString(resolution)
  118. ctx := prom.NewContext(cm.PrometheusClient)
  119. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr, env.GetPromClusterLabel())
  120. resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
  121. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, offStr, env.GetPromClusterLabel())
  122. resChRAMRequests := ctx.Query(queryRAMRequests)
  123. queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, durStr, offStr, env.GetPromClusterLabel())
  124. resChRAMUsageAvg := ctx.Query(queryRAMUsageAvg)
  125. queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, durStr, offStr, env.GetPromClusterLabel())
  126. resChRAMUsageMax := ctx.Query(queryRAMUsageMax)
  127. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, offStr, env.GetPromClusterLabel())
  128. resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
  129. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, offStr, env.GetPromClusterLabel())
  130. resChCPURequests := ctx.Query(queryCPURequests)
  131. queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, offStr, env.GetPromClusterLabel())
  132. resChCPUUsageAvg := ctx.Query(queryCPUUsageAvg)
  133. queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMax, durStr, offStr, env.GetPromClusterLabel())
  134. resChCPUUsageMax := ctx.Query(queryCPUUsageMax)
  135. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, offStr, env.GetPromClusterLabel())
  136. resChGPUsRequested := ctx.Query(queryGPUsRequested)
  137. queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, durStr, offStr, env.GetPromClusterLabel())
  138. resChGPUsAllocated := ctx.Query(queryGPUsAllocated)
  139. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, offStr, env.GetPromClusterLabel())
  140. resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
  141. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, offStr, env.GetPromClusterLabel())
  142. resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
  143. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, offStr, env.GetPromClusterLabel())
  144. resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
  145. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr, offStr)
  146. resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
  147. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterLabel(), durStr, resStr, offStr)
  148. resChPVCInfo := ctx.Query(queryPVCInfo)
  149. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, offStr, env.GetPromClusterLabel())
  150. resChPVBytes := ctx.Query(queryPVBytes)
  151. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, offStr, env.GetPromClusterLabel())
  152. resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
  153. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, offStr, env.GetPromClusterLabel())
  154. resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
  155. queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, offStr, env.GetPromClusterLabel())
  156. resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
  157. queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, durStr, offStr, env.GetPromClusterLabel())
  158. resChNetTransferBytes := ctx.Query(queryNetTransferBytes)
  159. queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, durStr, offStr, env.GetPromClusterLabel())
  160. resChNetReceiveBytes := ctx.Query(queryNetReceiveBytes)
  161. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, offStr, env.GetPromClusterLabel())
  162. resChNetZoneGiB := ctx.Query(queryNetZoneGiB)
  163. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
  164. resChNetZoneCostPerGiB := ctx.Query(queryNetZoneCostPerGiB)
  165. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, offStr, env.GetPromClusterLabel())
  166. resChNetRegionGiB := ctx.Query(queryNetRegionGiB)
  167. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
  168. resChNetRegionCostPerGiB := ctx.Query(queryNetRegionCostPerGiB)
  169. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, offStr, env.GetPromClusterLabel())
  170. resChNetInternetGiB := ctx.Query(queryNetInternetGiB)
  171. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
  172. resChNetInternetCostPerGiB := ctx.Query(queryNetInternetCostPerGiB)
  173. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr, offStr)
  174. resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
  175. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr, offStr)
  176. resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
  177. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr, offStr)
  178. resChPodLabels := ctx.Query(queryPodLabels)
  179. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr, offStr)
  180. resChPodAnnotations := ctx.Query(queryPodAnnotations)
  181. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr, offStr)
  182. resChServiceLabels := ctx.Query(queryServiceLabels)
  183. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr, offStr)
  184. resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
  185. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr, offStr)
  186. resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
  187. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, offStr, env.GetPromClusterLabel())
  188. resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
  189. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, offStr, env.GetPromClusterLabel())
  190. resChJobLabels := ctx.Query(queryJobLabels)
  191. queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, offStr, env.GetPromClusterLabel())
  192. resChLBCostPerHr := ctx.Query(queryLBCostPerHr)
  193. queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr, offStr)
  194. resChLBActiveMins := ctx.Query(queryLBActiveMins)
  195. resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
  196. resCPURequests, _ := resChCPURequests.Await()
  197. resCPUUsageAvg, _ := resChCPUUsageAvg.Await()
  198. resCPUUsageMax, _ := resChCPUUsageMax.Await()
  199. resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()
  200. resRAMRequests, _ := resChRAMRequests.Await()
  201. resRAMUsageAvg, _ := resChRAMUsageAvg.Await()
  202. resRAMUsageMax, _ := resChRAMUsageMax.Await()
  203. resGPUsRequested, _ := resChGPUsRequested.Await()
  204. resGPUsAllocated, _ := resChGPUsAllocated.Await()
  205. resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
  206. resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
  207. resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
  208. resNodeIsSpot, _ := resChNodeIsSpot.Await()
  209. resPVBytes, _ := resChPVBytes.Await()
  210. resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()
  211. resPVCInfo, _ := resChPVCInfo.Await()
  212. resPVCBytesRequested, _ := resChPVCBytesRequested.Await()
  213. resPodPVCAllocation, _ := resChPodPVCAllocation.Await()
  214. resNetTransferBytes, _ := resChNetTransferBytes.Await()
  215. resNetReceiveBytes, _ := resChNetReceiveBytes.Await()
  216. resNetZoneGiB, _ := resChNetZoneGiB.Await()
  217. resNetZoneCostPerGiB, _ := resChNetZoneCostPerGiB.Await()
  218. resNetRegionGiB, _ := resChNetRegionGiB.Await()
  219. resNetRegionCostPerGiB, _ := resChNetRegionCostPerGiB.Await()
  220. resNetInternetGiB, _ := resChNetInternetGiB.Await()
  221. resNetInternetCostPerGiB, _ := resChNetInternetCostPerGiB.Await()
  222. resNamespaceLabels, _ := resChNamespaceLabels.Await()
  223. resNamespaceAnnotations, _ := resChNamespaceAnnotations.Await()
  224. resPodLabels, _ := resChPodLabels.Await()
  225. resPodAnnotations, _ := resChPodAnnotations.Await()
  226. resServiceLabels, _ := resChServiceLabels.Await()
  227. resDeploymentLabels, _ := resChDeploymentLabels.Await()
  228. resStatefulSetLabels, _ := resChStatefulSetLabels.Await()
  229. resDaemonSetLabels, _ := resChDaemonSetLabels.Await()
  230. resJobLabels, _ := resChJobLabels.Await()
  231. resLBCostPerHr, _ := resChLBCostPerHr.Await()
  232. resLBActiveMins, _ := resChLBActiveMins.Await()
  233. if ctx.HasErrors() {
  234. for _, err := range ctx.Errors() {
  235. log.Errorf("CostModel.ComputeAllocation: %s", err)
  236. }
  237. return allocSet, ctx.ErrorCollection()
  238. }
  239. // We choose to apply allocation before requests in the cases of RAM and
  240. // CPU so that we can assert that allocation should always be greater than
  241. // or equal to request.
  242. applyCPUCoresAllocated(podMap, resCPUCoresAllocated)
  243. applyCPUCoresRequested(podMap, resCPURequests)
  244. applyCPUCoresUsedAvg(podMap, resCPUUsageAvg)
  245. applyCPUCoresUsedMax(podMap, resCPUUsageMax)
  246. applyRAMBytesAllocated(podMap, resRAMBytesAllocated)
  247. applyRAMBytesRequested(podMap, resRAMRequests)
  248. applyRAMBytesUsedAvg(podMap, resRAMUsageAvg)
  249. applyRAMBytesUsedMax(podMap, resRAMUsageMax)
  250. applyGPUsAllocated(podMap, resGPUsRequested, resGPUsAllocated)
  251. applyNetworkTotals(podMap, resNetTransferBytes, resNetReceiveBytes)
  252. applyNetworkAllocation(podMap, resNetZoneGiB, resNetZoneCostPerGiB)
  253. applyNetworkAllocation(podMap, resNetRegionGiB, resNetRegionCostPerGiB)
  254. applyNetworkAllocation(podMap, resNetInternetGiB, resNetInternetCostPerGiB)
  255. namespaceLabels := resToNamespaceLabels(resNamespaceLabels)
  256. podLabels := resToPodLabels(resPodLabels)
  257. namespaceAnnotations := resToNamespaceAnnotations(resNamespaceAnnotations)
  258. podAnnotations := resToPodAnnotations(resPodAnnotations)
  259. applyLabels(podMap, namespaceLabels, podLabels)
  260. applyAnnotations(podMap, namespaceAnnotations, podAnnotations)
  261. serviceLabels := getServiceLabels(resServiceLabels)
  262. allocsByService := map[serviceKey][]*kubecost.Allocation{}
  263. applyServicesToPods(podMap, podLabels, allocsByService, serviceLabels)
  264. podDeploymentMap := labelsToPodControllerMap(podLabels, resToDeploymentLabels(resDeploymentLabels))
  265. podStatefulSetMap := labelsToPodControllerMap(podLabels, resToStatefulSetLabels(resStatefulSetLabels))
  266. podDaemonSetMap := resToPodDaemonSetMap(resDaemonSetLabels)
  267. podJobMap := resToPodJobMap(resJobLabels)
  268. applyControllersToPods(podMap, podDeploymentMap)
  269. applyControllersToPods(podMap, podStatefulSetMap)
  270. applyControllersToPods(podMap, podDaemonSetMap)
  271. applyControllersToPods(podMap, podJobMap)
  272. // TODO breakdown network costs?
  273. // Build out a map of Nodes with resource costs, discounts, and node types
  274. // for converting resource allocation data to cumulative costs.
  275. nodeMap := map[nodeKey]*NodePricing{}
  276. applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
  277. applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
  278. applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
  279. applyNodeSpot(nodeMap, resNodeIsSpot)
  280. applyNodeDiscount(nodeMap, cm)
  281. // Build out the map of all PVs with class, size and cost-per-hour.
  282. // Note: this does not record time running, which we may want to
  283. // include later for increased PV precision. (As long as the PV has
  284. // a PVC, we get time running there, so this is only inaccurate
  285. // for short-lived, unmounted PVs.)
  286. pvMap := map[pvKey]*PV{}
  287. buildPVMap(pvMap, resPVCostPerGiBHour)
  288. applyPVBytes(pvMap, resPVBytes)
  289. // Build out the map of all PVCs with time running, bytes requested,
  290. // and connect to the correct PV from pvMap. (If no PV exists, that
  291. // is noted, but does not result in any allocation/cost.)
  292. pvcMap := map[pvcKey]*PVC{}
  293. buildPVCMap(window, pvcMap, pvMap, resPVCInfo)
  294. applyPVCBytesRequested(pvcMap, resPVCBytesRequested)
  295. // Build out the relationships of pods to their PVCs. This step
  296. // populates the PVC.Count field so that PVC allocation can be
  297. // split appropriately among each pod's container allocation.
  298. podPVCMap := map[podKey][]*PVC{}
  299. buildPodPVCMap(podPVCMap, pvMap, pvcMap, podMap, resPodPVCAllocation)
  300. // Identify unmounted PVs (PVs without PVCs) and add one Allocation per
  301. // cluster representing each cluster's unmounted PVs (if necessary).
  302. applyUnmountedPVs(window, podMap, pvMap, pvcMap)
  303. lbMap := getLoadBalancerCosts(resLBCostPerHr, resLBActiveMins, resolution)
  304. applyLoadBalancersToPods(lbMap, allocsByService)
  305. // (3) Build out AllocationSet from Pod map
  306. for _, pod := range podMap {
  307. for _, alloc := range pod.Allocations {
  308. cluster := alloc.Properties.Cluster
  309. nodeName := alloc.Properties.Node
  310. namespace := alloc.Properties.Namespace
  311. pod := alloc.Properties.Pod
  312. container := alloc.Properties.Container
  313. podKey := newPodKey(cluster, namespace, pod)
  314. nodeKey := newNodeKey(cluster, nodeName)
  315. node := cm.getNodePricing(nodeMap, nodeKey)
  316. alloc.Properties.ProviderID = node.ProviderID
  317. alloc.CPUCost = alloc.CPUCoreHours * node.CostPerCPUHr
  318. alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * node.CostPerRAMGiBHr
  319. alloc.GPUCost = alloc.GPUHours * node.CostPerGPUHr
  320. if pvcs, ok := podPVCMap[podKey]; ok {
  321. for _, pvc := range pvcs {
  322. // Determine the (start, end) of the relationship between the
  323. // given PVC and the associated Allocation so that a precise
  324. // number of hours can be used to compute cumulative cost.
  325. s, e := alloc.Start, alloc.End
  326. if pvc.Start.After(alloc.Start) {
  327. s = pvc.Start
  328. }
  329. if pvc.End.Before(alloc.End) {
  330. e = pvc.End
  331. }
  332. minutes := e.Sub(s).Minutes()
  333. hrs := minutes / 60.0
  334. count := float64(pvc.Count)
  335. if pvc.Count < 1 {
  336. count = 1
  337. }
  338. gib := pvc.Bytes / 1024 / 1024 / 1024
  339. cost := pvc.Volume.CostPerGiBHour * gib * hrs
  340. // Apply the size and cost of the PV to the allocation, each
  341. // weighted by count (i.e. the number of containers in the pod)
  342. // record the amount of total PVBytes Hours attributable to a given PV
  343. if alloc.PVs == nil {
  344. alloc.PVs = kubecost.PVAllocations{}
  345. }
  346. pvKey := kubecost.PVKey{
  347. Cluster: pvc.Cluster,
  348. Name: pvc.Volume.Name,
  349. }
  350. alloc.PVs[pvKey] = &kubecost.PVAllocation{
  351. ByteHours: pvc.Bytes * hrs / count,
  352. Cost: cost / count,
  353. }
  354. }
  355. }
  356. // Make sure that the name is correct (node may not be present at this
  357. // point due to it missing from queryMinutes) then insert.
  358. alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, nodeName, namespace, pod, container)
  359. allocSet.Set(alloc)
  360. }
  361. }
  362. return allocSet, nil
  363. }
  364. func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSize time.Duration, podMap map[podKey]*Pod, clusterStart, clusterEnd map[string]time.Time) error {
  365. // Assumes that window is positive and closed
  366. start, end := *window.Start(), *window.End()
  367. // Convert resolution duration to a query-ready string
  368. resStr := timeutil.DurationString(resolution)
  369. ctx := prom.NewContext(cm.PrometheusClient)
  370. // Query for (start, end) by (pod, namespace, cluster) over the given
  371. // window, using the given resolution, and if necessary in batches no
  372. // larger than the given maximum batch size. If working in batches, track
  373. // overall progress by starting with (window.start, window.start) and
  374. // querying in batches no larger than maxBatchSize from start-to-end,
  375. // folding each result set into podMap as the results come back.
  376. coverage := kubecost.NewWindow(&start, &start)
  377. numQuery := 1
  378. for coverage.End().Before(end) {
  379. // Determine the (start, end) of the current batch
  380. batchStart := *coverage.End()
  381. batchEnd := coverage.End().Add(maxBatchSize)
  382. if batchEnd.After(end) {
  383. batchEnd = end
  384. }
  385. batchWindow := kubecost.NewWindow(&batchStart, &batchEnd)
  386. var resPods []*prom.QueryResult
  387. var err error
  388. maxTries := 3
  389. numTries := 0
  390. for resPods == nil && numTries < maxTries {
  391. numTries++
  392. // Convert window (start, end) to (duration, offset) for querying Prometheus,
  393. // including handling Thanos offset
  394. durStr, offStr, err := batchWindow.DurationOffsetForPrometheus()
  395. if err != nil || durStr == "" {
  396. // Negative duration, so set empty results and don't query
  397. resPods = []*prom.QueryResult{}
  398. err = nil
  399. break
  400. }
  401. // Submit and profile query
  402. queryPods := fmt.Sprintf(queryFmtPods, env.GetPromClusterLabel(), durStr, resStr, offStr)
  403. queryProfile := time.Now()
  404. resPods, err = ctx.Query(queryPods).Await()
  405. if err != nil {
  406. log.Profile(queryProfile, fmt.Sprintf("CostModel.ComputeAllocation: pod query %d try %d failed: %s", numQuery, numTries, queryPods))
  407. resPods = nil
  408. }
  409. }
  410. if err != nil {
  411. return err
  412. }
  413. applyPodResults(window, resolution, podMap, clusterStart, clusterEnd, resPods)
  414. coverage = coverage.ExpandEnd(batchEnd)
  415. numQuery++
  416. }
  417. return nil
  418. }
  419. func applyPodResults(window kubecost.Window, resolution time.Duration, podMap map[podKey]*Pod, clusterStart, clusterEnd map[string]time.Time, resPods []*prom.QueryResult) {
  420. for _, res := range resPods {
  421. if len(res.Values) == 0 {
  422. log.Warningf("CostModel.ComputeAllocation: empty minutes result")
  423. continue
  424. }
  425. cluster, err := res.GetString(env.GetPromClusterLabel())
  426. if err != nil {
  427. cluster = env.GetClusterID()
  428. }
  429. labels, err := res.GetStrings("namespace", "pod")
  430. if err != nil {
  431. log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
  432. continue
  433. }
  434. namespace := labels["namespace"]
  435. pod := labels["pod"]
  436. key := newPodKey(cluster, namespace, pod)
  437. // allocStart and allocEnd are the timestamps of the first and last
  438. // minutes the pod was running, respectively. We subtract one resolution
  439. // from allocStart because this point will actually represent the end
  440. // of the first minute. We don't subtract from allocEnd because it
  441. // already represents the end of the last minute.
  442. var allocStart, allocEnd time.Time
  443. startAdjustmentCoeff, endAdjustmentCoeff := 1.0, 1.0
  444. for _, datum := range res.Values {
  445. t := time.Unix(int64(datum.Timestamp), 0)
  446. if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  447. // Set the start timestamp to the earliest non-zero timestamp
  448. allocStart = t
  449. // Record adjustment coefficient, i.e. the portion of the start
  450. // timestamp to "ignore". That is, sometimes the value will be
  451. // 0.5, meaning that we should discount the time running by
  452. // half of the resolution the timestamp stands for.
  453. startAdjustmentCoeff = (1.0 - datum.Value)
  454. }
  455. if datum.Value > 0 && window.Contains(t) {
  456. // Set the end timestamp to the latest non-zero timestamp
  457. allocEnd = t
  458. // Record adjustment coefficient, i.e. the portion of the end
  459. // timestamp to "ignore". (See explanation above for start.)
  460. endAdjustmentCoeff = (1.0 - datum.Value)
  461. }
  462. }
  463. if allocStart.IsZero() || allocEnd.IsZero() {
  464. continue
  465. }
  466. // Adjust timestamps according to the resolution and the adjustment
  467. // coefficients, as described above. That is, count the start timestamp
  468. // from the beginning of the resolution, not the end. Then "reduce" the
  469. // start and end by the correct amount, in the case that the "running"
  470. // value of the first or last timestamp was not a full 1.0.
  471. allocStart = allocStart.Add(-resolution)
  472. // Note: the *100 and /100 are necessary because Duration is an int, so
  473. // 0.5, for instance, will be truncated, resulting in no adjustment.
  474. allocStart = allocStart.Add(time.Duration(startAdjustmentCoeff*100) * resolution / time.Duration(100))
  475. allocEnd = allocEnd.Add(-time.Duration(endAdjustmentCoeff*100) * resolution / time.Duration(100))
  476. // If there is only one point with a value <= 0.5 that the start and
  477. // end timestamps both share, then we will enter this case because at
  478. // least half of a resolution will be subtracted from both the start
  479. // and the end. If that is the case, then add back half of each side
  480. // so that the pod is said to run for half a resolution total.
  481. // e.g. For resolution 1m and a value of 0.5 at one timestamp, we'll
  482. // end up with allocEnd == allocStart and each coeff == 0.5. In
  483. // that case, add 0.25m to each side, resulting in 0.5m duration.
  484. if !allocEnd.After(allocStart) {
  485. allocStart = allocStart.Add(-time.Duration(50*startAdjustmentCoeff) * resolution / time.Duration(100))
  486. allocEnd = allocEnd.Add(time.Duration(50*endAdjustmentCoeff) * resolution / time.Duration(100))
  487. }
  488. // Set start if unset or this datum's start time is earlier than the
  489. // current earliest time.
  490. if _, ok := clusterStart[cluster]; !ok || allocStart.Before(clusterStart[cluster]) {
  491. clusterStart[cluster] = allocStart
  492. }
  493. // Set end if unset or this datum's end time is later than the
  494. // current latest time.
  495. if _, ok := clusterEnd[cluster]; !ok || allocEnd.After(clusterEnd[cluster]) {
  496. clusterEnd[cluster] = allocEnd
  497. }
  498. if pod, ok := podMap[key]; ok {
  499. // Pod has already been recorded, so update it accordingly
  500. if allocStart.Before(pod.Start) {
  501. pod.Start = allocStart
  502. }
  503. if allocEnd.After(pod.End) {
  504. pod.End = allocEnd
  505. }
  506. } else {
  507. // Pod has not been recorded yet, so insert it
  508. podMap[key] = &Pod{
  509. Window: window.Clone(),
  510. Start: allocStart,
  511. End: allocEnd,
  512. Key: key,
  513. Allocations: map[string]*kubecost.Allocation{},
  514. }
  515. }
  516. }
  517. }
  518. func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom.QueryResult) {
  519. for _, res := range resCPUCoresAllocated {
  520. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  521. if err != nil {
  522. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU allocation result missing field: %s", err)
  523. continue
  524. }
  525. pod, ok := podMap[key]
  526. if !ok {
  527. continue
  528. }
  529. container, err := res.GetString("container")
  530. if err != nil {
  531. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU allocation query result missing 'container': %s", key)
  532. continue
  533. }
  534. if _, ok := pod.Allocations[container]; !ok {
  535. pod.AppendContainer(container)
  536. }
  537. cpuCores := res.Values[0].Value
  538. if cpuCores > MAX_CPU_CAP {
  539. klog.Infof("[WARNING] Very large cpu allocation, clamping to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
  540. cpuCores = 0.0
  541. }
  542. hours := pod.Allocations[container].Minutes() / 60.0
  543. pod.Allocations[container].CPUCoreHours = cpuCores * hours
  544. node, err := res.GetString("node")
  545. if err != nil {
  546. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing 'node': %s", key)
  547. continue
  548. }
  549. pod.Allocations[container].Properties.Node = node
  550. }
  551. }
  552. func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom.QueryResult) {
  553. for _, res := range resCPUCoresRequested {
  554. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  555. if err != nil {
  556. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request result missing field: %s", err)
  557. continue
  558. }
  559. pod, ok := podMap[key]
  560. if !ok {
  561. continue
  562. }
  563. container, err := res.GetString("container")
  564. if err != nil {
  565. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request query result missing 'container': %s", key)
  566. continue
  567. }
  568. if _, ok := pod.Allocations[container]; !ok {
  569. pod.AppendContainer(container)
  570. }
  571. pod.Allocations[container].CPUCoreRequestAverage = res.Values[0].Value
  572. // If CPU allocation is less than requests, set CPUCoreHours to
  573. // request level.
  574. if pod.Allocations[container].CPUCores() < res.Values[0].Value {
  575. pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
  576. }
  577. if pod.Allocations[container].CPUCores() > MAX_CPU_CAP {
  578. klog.Infof("[WARNING] Very large cpu allocation, clamping! to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
  579. pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
  580. }
  581. node, err := res.GetString("node")
  582. if err != nil {
  583. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)
  584. continue
  585. }
  586. pod.Allocations[container].Properties.Node = node
  587. }
  588. }
  589. func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.QueryResult) {
  590. for _, res := range resCPUCoresUsedAvg {
  591. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  592. if err != nil {
  593. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg result missing field: %s", err)
  594. continue
  595. }
  596. pod, ok := podMap[key]
  597. if !ok {
  598. continue
  599. }
  600. container, err := res.GetString("container")
  601. if container == "" || err != nil {
  602. container, err = res.GetString("container_name")
  603. if err != nil {
  604. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg query result missing 'container': %s", key)
  605. continue
  606. }
  607. }
  608. if _, ok := pod.Allocations[container]; !ok {
  609. pod.AppendContainer(container)
  610. }
  611. pod.Allocations[container].CPUCoreUsageAverage = res.Values[0].Value
  612. if res.Values[0].Value > MAX_CPU_CAP {
  613. klog.Infof("[WARNING] Very large cpu USAGE, dropping outlier")
  614. pod.Allocations[container].CPUCoreUsageAverage = 0.0
  615. }
  616. }
  617. }
  618. func applyCPUCoresUsedMax(podMap map[podKey]*Pod, resCPUCoresUsedMax []*prom.QueryResult) {
  619. for _, res := range resCPUCoresUsedMax {
  620. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  621. if err != nil {
  622. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max result missing field: %s", err)
  623. continue
  624. }
  625. pod, ok := podMap[key]
  626. if !ok {
  627. continue
  628. }
  629. container, err := res.GetString("container_name")
  630. if err != nil {
  631. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max query result missing 'container': %s", key)
  632. continue
  633. }
  634. if _, ok := pod.Allocations[container]; !ok {
  635. pod.AppendContainer(container)
  636. }
  637. if pod.Allocations[container].RawAllocationOnly == nil {
  638. pod.Allocations[container].RawAllocationOnly = &kubecost.RawAllocationOnlyData{
  639. CPUCoreUsageMax: res.Values[0].Value,
  640. }
  641. } else {
  642. pod.Allocations[container].RawAllocationOnly.CPUCoreUsageMax = res.Values[0].Value
  643. }
  644. }
  645. }
  646. func applyRAMBytesAllocated(podMap map[podKey]*Pod, resRAMBytesAllocated []*prom.QueryResult) {
  647. for _, res := range resRAMBytesAllocated {
  648. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  649. if err != nil {
  650. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM allocation result missing field: %s", err)
  651. continue
  652. }
  653. pod, ok := podMap[key]
  654. if !ok {
  655. continue
  656. }
  657. container, err := res.GetString("container")
  658. if err != nil {
  659. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM allocation query result missing 'container': %s", key)
  660. continue
  661. }
  662. if _, ok := pod.Allocations[container]; !ok {
  663. pod.AppendContainer(container)
  664. }
  665. ramBytes := res.Values[0].Value
  666. hours := pod.Allocations[container].Minutes() / 60.0
  667. pod.Allocations[container].RAMByteHours = ramBytes * hours
  668. node, err := res.GetString("node")
  669. if err != nil {
  670. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing 'node': %s", key)
  671. continue
  672. }
  673. pod.Allocations[container].Properties.Node = node
  674. }
  675. }
  676. func applyRAMBytesRequested(podMap map[podKey]*Pod, resRAMBytesRequested []*prom.QueryResult) {
  677. for _, res := range resRAMBytesRequested {
  678. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  679. if err != nil {
  680. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM request result missing field: %s", err)
  681. continue
  682. }
  683. pod, ok := podMap[key]
  684. if !ok {
  685. continue
  686. }
  687. container, err := res.GetString("container")
  688. if err != nil {
  689. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM request query result missing 'container': %s", key)
  690. continue
  691. }
  692. if _, ok := pod.Allocations[container]; !ok {
  693. pod.AppendContainer(container)
  694. }
  695. pod.Allocations[container].RAMBytesRequestAverage = res.Values[0].Value
  696. // If RAM allocation is less than requests, set RAMByteHours to
  697. // request level.
  698. if pod.Allocations[container].RAMBytes() < res.Values[0].Value {
  699. pod.Allocations[container].RAMByteHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
  700. }
  701. node, err := res.GetString("node")
  702. if err != nil {
  703. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing 'node': %s", key)
  704. continue
  705. }
  706. pod.Allocations[container].Properties.Node = node
  707. }
  708. }
  709. func applyRAMBytesUsedAvg(podMap map[podKey]*Pod, resRAMBytesUsedAvg []*prom.QueryResult) {
  710. for _, res := range resRAMBytesUsedAvg {
  711. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  712. if err != nil {
  713. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM avg usage result missing field: %s", err)
  714. continue
  715. }
  716. pod, ok := podMap[key]
  717. if !ok {
  718. continue
  719. }
  720. container, err := res.GetString("container")
  721. if container == "" || err != nil {
  722. container, err = res.GetString("container_name")
  723. if err != nil {
  724. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg query result missing 'container': %s", key)
  725. continue
  726. }
  727. }
  728. if _, ok := pod.Allocations[container]; !ok {
  729. pod.AppendContainer(container)
  730. }
  731. pod.Allocations[container].RAMBytesUsageAverage = res.Values[0].Value
  732. }
  733. }
  734. func applyRAMBytesUsedMax(podMap map[podKey]*Pod, resRAMBytesUsedMax []*prom.QueryResult) {
  735. for _, res := range resRAMBytesUsedMax {
  736. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  737. if err != nil {
  738. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max result missing field: %s", err)
  739. continue
  740. }
  741. pod, ok := podMap[key]
  742. if !ok {
  743. continue
  744. }
  745. container, err := res.GetString("container_name")
  746. if err != nil {
  747. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max query result missing 'container': %s", key)
  748. continue
  749. }
  750. if _, ok := pod.Allocations[container]; !ok {
  751. pod.AppendContainer(container)
  752. }
  753. if pod.Allocations[container].RawAllocationOnly == nil {
  754. pod.Allocations[container].RawAllocationOnly = &kubecost.RawAllocationOnlyData{
  755. RAMBytesUsageMax: res.Values[0].Value,
  756. }
  757. } else {
  758. pod.Allocations[container].RawAllocationOnly.RAMBytesUsageMax = res.Values[0].Value
  759. }
  760. }
  761. }
  762. func applyGPUsAllocated(podMap map[podKey]*Pod, resGPUsRequested []*prom.QueryResult, resGPUsAllocated []*prom.QueryResult) {
  763. if len(resGPUsAllocated) > 0 { // Use the new query, when it's become available in a window
  764. resGPUsRequested = resGPUsAllocated
  765. }
  766. for _, res := range resGPUsRequested {
  767. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  768. if err != nil {
  769. log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU request result missing field: %s", err)
  770. continue
  771. }
  772. pod, ok := podMap[key]
  773. if !ok {
  774. continue
  775. }
  776. container, err := res.GetString("container")
  777. if err != nil {
  778. log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU request query result missing 'container': %s", key)
  779. continue
  780. }
  781. if _, ok := pod.Allocations[container]; !ok {
  782. pod.AppendContainer(container)
  783. }
  784. hrs := pod.Allocations[container].Minutes() / 60.0
  785. pod.Allocations[container].GPUHours = res.Values[0].Value * hrs
  786. }
  787. }
  788. func applyNetworkTotals(podMap map[podKey]*Pod, resNetworkTransferBytes []*prom.QueryResult, resNetworkReceiveBytes []*prom.QueryResult) {
  789. for _, res := range resNetworkTransferBytes {
  790. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  791. if err != nil {
  792. log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Transfer Bytes query result missing field: %s", err)
  793. continue
  794. }
  795. pod, ok := podMap[podKey]
  796. if !ok {
  797. continue
  798. }
  799. for _, alloc := range pod.Allocations {
  800. alloc.NetworkTransferBytes = res.Values[0].Value / float64(len(pod.Allocations))
  801. }
  802. }
  803. for _, res := range resNetworkReceiveBytes {
  804. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  805. if err != nil {
  806. log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Receive Bytes query result missing field: %s", err)
  807. continue
  808. }
  809. pod, ok := podMap[podKey]
  810. if !ok {
  811. continue
  812. }
  813. for _, alloc := range pod.Allocations {
  814. alloc.NetworkReceiveBytes = res.Values[0].Value / float64(len(pod.Allocations))
  815. }
  816. }
  817. }
  818. func applyNetworkAllocation(podMap map[podKey]*Pod, resNetworkGiB []*prom.QueryResult, resNetworkCostPerGiB []*prom.QueryResult) {
  819. costPerGiBByCluster := map[string]float64{}
  820. for _, res := range resNetworkCostPerGiB {
  821. cluster, err := res.GetString(env.GetPromClusterLabel())
  822. if err != nil {
  823. cluster = env.GetClusterID()
  824. }
  825. costPerGiBByCluster[cluster] = res.Values[0].Value
  826. }
  827. for _, res := range resNetworkGiB {
  828. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  829. if err != nil {
  830. log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
  831. continue
  832. }
  833. pod, ok := podMap[podKey]
  834. if !ok {
  835. continue
  836. }
  837. for _, alloc := range pod.Allocations {
  838. gib := res.Values[0].Value / float64(len(pod.Allocations))
  839. costPerGiB := costPerGiBByCluster[podKey.Cluster]
  840. alloc.NetworkCost = gib * costPerGiB
  841. }
  842. }
  843. }
  844. func resToNamespaceLabels(resNamespaceLabels []*prom.QueryResult) map[namespaceKey]map[string]string {
  845. namespaceLabels := map[namespaceKey]map[string]string{}
  846. for _, res := range resNamespaceLabels {
  847. nsKey, err := resultNamespaceKey(res, env.GetPromClusterLabel(), "namespace")
  848. if err != nil {
  849. continue
  850. }
  851. if _, ok := namespaceLabels[nsKey]; !ok {
  852. namespaceLabels[nsKey] = map[string]string{}
  853. }
  854. for k, l := range res.GetLabels() {
  855. namespaceLabels[nsKey][k] = l
  856. }
  857. }
  858. return namespaceLabels
  859. }
  860. func resToPodLabels(resPodLabels []*prom.QueryResult) map[podKey]map[string]string {
  861. podLabels := map[podKey]map[string]string{}
  862. for _, res := range resPodLabels {
  863. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  864. if err != nil {
  865. continue
  866. }
  867. if _, ok := podLabels[podKey]; !ok {
  868. podLabels[podKey] = map[string]string{}
  869. }
  870. for k, l := range res.GetLabels() {
  871. podLabels[podKey][k] = l
  872. }
  873. }
  874. return podLabels
  875. }
  876. func resToNamespaceAnnotations(resNamespaceAnnotations []*prom.QueryResult) map[string]map[string]string {
  877. namespaceAnnotations := map[string]map[string]string{}
  878. for _, res := range resNamespaceAnnotations {
  879. namespace, err := res.GetString("namespace")
  880. if err != nil {
  881. continue
  882. }
  883. if _, ok := namespaceAnnotations[namespace]; !ok {
  884. namespaceAnnotations[namespace] = map[string]string{}
  885. }
  886. for k, l := range res.GetAnnotations() {
  887. namespaceAnnotations[namespace][k] = l
  888. }
  889. }
  890. return namespaceAnnotations
  891. }
  892. func resToPodAnnotations(resPodAnnotations []*prom.QueryResult) map[podKey]map[string]string {
  893. podAnnotations := map[podKey]map[string]string{}
  894. for _, res := range resPodAnnotations {
  895. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  896. if err != nil {
  897. continue
  898. }
  899. if _, ok := podAnnotations[podKey]; !ok {
  900. podAnnotations[podKey] = map[string]string{}
  901. }
  902. for k, l := range res.GetAnnotations() {
  903. podAnnotations[podKey][k] = l
  904. }
  905. }
  906. return podAnnotations
  907. }
  908. func applyLabels(podMap map[podKey]*Pod, namespaceLabels map[namespaceKey]map[string]string, podLabels map[podKey]map[string]string) {
  909. for podKey, pod := range podMap {
  910. for _, alloc := range pod.Allocations {
  911. allocLabels := alloc.Properties.Labels
  912. if allocLabels == nil {
  913. allocLabels = make(map[string]string)
  914. }
  915. // Apply namespace labels first, then pod labels so that pod labels
  916. // overwrite namespace labels.
  917. nsKey := newNamespaceKey(podKey.Cluster, podKey.Namespace)
  918. if labels, ok := namespaceLabels[nsKey]; ok {
  919. for k, v := range labels {
  920. allocLabels[k] = v
  921. }
  922. }
  923. if labels, ok := podLabels[podKey]; ok {
  924. for k, v := range labels {
  925. allocLabels[k] = v
  926. }
  927. }
  928. alloc.Properties.Labels = allocLabels
  929. }
  930. }
  931. }
  932. func applyAnnotations(podMap map[podKey]*Pod, namespaceAnnotations map[string]map[string]string, podAnnotations map[podKey]map[string]string) {
  933. for key, pod := range podMap {
  934. for _, alloc := range pod.Allocations {
  935. allocAnnotations := alloc.Properties.Annotations
  936. if allocAnnotations == nil {
  937. allocAnnotations = make(map[string]string)
  938. }
  939. // Apply namespace annotations first, then pod annotations so that
  940. // pod labels overwrite namespace labels.
  941. if labels, ok := namespaceAnnotations[key.Namespace]; ok {
  942. for k, v := range labels {
  943. allocAnnotations[k] = v
  944. }
  945. }
  946. if labels, ok := podAnnotations[key]; ok {
  947. for k, v := range labels {
  948. allocAnnotations[k] = v
  949. }
  950. }
  951. alloc.Properties.Annotations = allocAnnotations
  952. }
  953. }
  954. }
  955. func getServiceLabels(resServiceLabels []*prom.QueryResult) map[serviceKey]map[string]string {
  956. serviceLabels := map[serviceKey]map[string]string{}
  957. for _, res := range resServiceLabels {
  958. serviceKey, err := resultServiceKey(res, env.GetPromClusterLabel(), "namespace", "service")
  959. if err != nil {
  960. continue
  961. }
  962. if _, ok := serviceLabels[serviceKey]; !ok {
  963. serviceLabels[serviceKey] = map[string]string{}
  964. }
  965. for k, l := range res.GetLabels() {
  966. serviceLabels[serviceKey][k] = l
  967. }
  968. }
  969. // Prune duplicate services. That is, if the same service exists with
  970. // hyphens instead of underscores, keep the one that uses hyphens.
  971. for key := range serviceLabels {
  972. if strings.Contains(key.Service, "_") {
  973. duplicateService := strings.Replace(key.Service, "_", "-", -1)
  974. duplicateKey := newServiceKey(key.Cluster, key.Namespace, duplicateService)
  975. if _, ok := serviceLabels[duplicateKey]; ok {
  976. delete(serviceLabels, key)
  977. }
  978. }
  979. }
  980. return serviceLabels
  981. }
  982. func resToDeploymentLabels(resDeploymentLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  983. deploymentLabels := map[controllerKey]map[string]string{}
  984. for _, res := range resDeploymentLabels {
  985. controllerKey, err := resultDeploymentKey(res, env.GetPromClusterLabel(), "namespace", "deployment")
  986. if err != nil {
  987. continue
  988. }
  989. if _, ok := deploymentLabels[controllerKey]; !ok {
  990. deploymentLabels[controllerKey] = map[string]string{}
  991. }
  992. for k, l := range res.GetLabels() {
  993. deploymentLabels[controllerKey][k] = l
  994. }
  995. }
  996. // Prune duplicate deployments. That is, if the same deployment exists with
  997. // hyphens instead of underscores, keep the one that uses hyphens.
  998. for key := range deploymentLabels {
  999. if strings.Contains(key.Controller, "_") {
  1000. duplicateController := strings.Replace(key.Controller, "_", "-", -1)
  1001. duplicateKey := newControllerKey(key.Cluster, key.Namespace, key.ControllerKind, duplicateController)
  1002. if _, ok := deploymentLabels[duplicateKey]; ok {
  1003. delete(deploymentLabels, key)
  1004. }
  1005. }
  1006. }
  1007. return deploymentLabels
  1008. }
  1009. func resToStatefulSetLabels(resStatefulSetLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  1010. statefulSetLabels := map[controllerKey]map[string]string{}
  1011. for _, res := range resStatefulSetLabels {
  1012. controllerKey, err := resultStatefulSetKey(res, env.GetPromClusterLabel(), "namespace", "statefulSet")
  1013. if err != nil {
  1014. continue
  1015. }
  1016. if _, ok := statefulSetLabels[controllerKey]; !ok {
  1017. statefulSetLabels[controllerKey] = map[string]string{}
  1018. }
  1019. for k, l := range res.GetLabels() {
  1020. statefulSetLabels[controllerKey][k] = l
  1021. }
  1022. }
  1023. // Prune duplicate stateful sets. That is, if the same stateful set exists
  1024. // with hyphens instead of underscores, keep the one that uses hyphens.
  1025. for key := range statefulSetLabels {
  1026. if strings.Contains(key.Controller, "_") {
  1027. duplicateController := strings.Replace(key.Controller, "_", "-", -1)
  1028. duplicateKey := newControllerKey(key.Cluster, key.Namespace, key.ControllerKind, duplicateController)
  1029. if _, ok := statefulSetLabels[duplicateKey]; ok {
  1030. delete(statefulSetLabels, key)
  1031. }
  1032. }
  1033. }
  1034. return statefulSetLabels
  1035. }
  1036. func labelsToPodControllerMap(podLabels map[podKey]map[string]string, controllerLabels map[controllerKey]map[string]string) map[podKey]controllerKey {
  1037. podControllerMap := map[podKey]controllerKey{}
  1038. // For each controller, turn the labels into a selector and attempt to
  1039. // match it with each set of pod labels. A match indicates that the pod
  1040. // belongs to the controller.
  1041. for cKey, cLabels := range controllerLabels {
  1042. selector := labels.Set(cLabels).AsSelectorPreValidated()
  1043. for pKey, pLabels := range podLabels {
  1044. // If the pod is in a different cluster or namespace, there is
  1045. // no need to compare the labels.
  1046. if cKey.Cluster != pKey.Cluster || cKey.Namespace != pKey.Namespace {
  1047. continue
  1048. }
  1049. podLabelSet := labels.Set(pLabels)
  1050. if selector.Matches(podLabelSet) {
  1051. if _, ok := podControllerMap[pKey]; ok {
  1052. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PodControllerMap match already exists: %s matches %s and %s", pKey, podControllerMap[pKey], cKey)
  1053. }
  1054. podControllerMap[pKey] = cKey
  1055. }
  1056. }
  1057. }
  1058. return podControllerMap
  1059. }
  1060. func resToPodDaemonSetMap(resDaemonSetLabels []*prom.QueryResult) map[podKey]controllerKey {
  1061. daemonSetLabels := map[podKey]controllerKey{}
  1062. for _, res := range resDaemonSetLabels {
  1063. controllerKey, err := resultDaemonSetKey(res, env.GetPromClusterLabel(), "namespace", "owner_name")
  1064. if err != nil {
  1065. continue
  1066. }
  1067. pod, err := res.GetString("pod")
  1068. if err != nil {
  1069. log.Warningf("CostModel.ComputeAllocation: DaemonSetLabel result without pod: %s", controllerKey)
  1070. }
  1071. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  1072. daemonSetLabels[podKey] = controllerKey
  1073. }
  1074. return daemonSetLabels
  1075. }
  1076. func resToPodJobMap(resJobLabels []*prom.QueryResult) map[podKey]controllerKey {
  1077. jobLabels := map[podKey]controllerKey{}
  1078. for _, res := range resJobLabels {
  1079. controllerKey, err := resultJobKey(res, env.GetPromClusterLabel(), "namespace", "owner_name")
  1080. if err != nil {
  1081. continue
  1082. }
  1083. // Convert the name of Jobs generated by CronJobs to the name of the
  1084. // CronJob by stripping the timestamp off the end.
  1085. match := isCron.FindStringSubmatch(controllerKey.Controller)
  1086. if match != nil {
  1087. controllerKey.Controller = match[1]
  1088. }
  1089. pod, err := res.GetString("pod")
  1090. if err != nil {
  1091. log.Warningf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
  1092. }
  1093. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  1094. jobLabels[podKey] = controllerKey
  1095. }
  1096. return jobLabels
  1097. }
  1098. func applyServicesToPods(podMap map[podKey]*Pod, podLabels map[podKey]map[string]string, allocsByService map[serviceKey][]*kubecost.Allocation, serviceLabels map[serviceKey]map[string]string) {
  1099. podServicesMap := map[podKey][]serviceKey{}
  1100. // For each service, turn the labels into a selector and attempt to
  1101. // match it with each set of pod labels. A match indicates that the pod
  1102. // belongs to the service.
  1103. for sKey, sLabels := range serviceLabels {
  1104. selector := labels.Set(sLabels).AsSelectorPreValidated()
  1105. for pKey, pLabels := range podLabels {
  1106. // If the pod is in a different cluster or namespace, there is
  1107. // no need to compare the labels.
  1108. if sKey.Cluster != pKey.Cluster || sKey.Namespace != pKey.Namespace {
  1109. continue
  1110. }
  1111. podLabelSet := labels.Set(pLabels)
  1112. if selector.Matches(podLabelSet) {
  1113. if _, ok := podServicesMap[pKey]; !ok {
  1114. podServicesMap[pKey] = []serviceKey{}
  1115. }
  1116. podServicesMap[pKey] = append(podServicesMap[pKey], sKey)
  1117. }
  1118. }
  1119. }
  1120. // For each allocation in each pod, attempt to find and apply the list of
  1121. // services associated with the allocation's pod.
  1122. for key, pod := range podMap {
  1123. for _, alloc := range pod.Allocations {
  1124. if sKeys, ok := podServicesMap[key]; ok {
  1125. services := []string{}
  1126. for _, sKey := range sKeys {
  1127. services = append(services, sKey.Service)
  1128. allocsByService[sKey] = append(allocsByService[sKey], alloc)
  1129. }
  1130. alloc.Properties.Services = services
  1131. }
  1132. }
  1133. }
  1134. }
  1135. func applyControllersToPods(podMap map[podKey]*Pod, podControllerMap map[podKey]controllerKey) {
  1136. for key, pod := range podMap {
  1137. for _, alloc := range pod.Allocations {
  1138. if controllerKey, ok := podControllerMap[key]; ok {
  1139. alloc.Properties.ControllerKind = controllerKey.ControllerKind
  1140. alloc.Properties.Controller = controllerKey.Controller
  1141. }
  1142. }
  1143. }
  1144. }
  1145. func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerCPUHr []*prom.QueryResult) {
  1146. for _, res := range resNodeCostPerCPUHr {
  1147. cluster, err := res.GetString(env.GetPromClusterLabel())
  1148. if err != nil {
  1149. cluster = env.GetClusterID()
  1150. }
  1151. node, err := res.GetString("node")
  1152. if err != nil {
  1153. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  1154. continue
  1155. }
  1156. instanceType, err := res.GetString("instance_type")
  1157. if err != nil {
  1158. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  1159. continue
  1160. }
  1161. providerID, err := res.GetString("provider_id")
  1162. if err != nil {
  1163. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  1164. continue
  1165. }
  1166. key := newNodeKey(cluster, node)
  1167. if _, ok := nodeMap[key]; !ok {
  1168. nodeMap[key] = &NodePricing{
  1169. Name: node,
  1170. NodeType: instanceType,
  1171. ProviderID: cloud.ParseID(providerID),
  1172. }
  1173. }
  1174. nodeMap[key].CostPerCPUHr = res.Values[0].Value
  1175. }
  1176. }
  1177. func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
  1178. for _, res := range resNodeCostPerRAMGiBHr {
  1179. cluster, err := res.GetString(env.GetPromClusterLabel())
  1180. if err != nil {
  1181. cluster = env.GetClusterID()
  1182. }
  1183. node, err := res.GetString("node")
  1184. if err != nil {
  1185. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  1186. continue
  1187. }
  1188. instanceType, err := res.GetString("instance_type")
  1189. if err != nil {
  1190. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  1191. continue
  1192. }
  1193. providerID, err := res.GetString("provider_id")
  1194. if err != nil {
  1195. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  1196. continue
  1197. }
  1198. key := newNodeKey(cluster, node)
  1199. if _, ok := nodeMap[key]; !ok {
  1200. nodeMap[key] = &NodePricing{
  1201. Name: node,
  1202. NodeType: instanceType,
  1203. ProviderID: cloud.ParseID(providerID),
  1204. }
  1205. }
  1206. nodeMap[key].CostPerRAMGiBHr = res.Values[0].Value
  1207. }
  1208. }
  1209. func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerGPUHr []*prom.QueryResult) {
  1210. for _, res := range resNodeCostPerGPUHr {
  1211. cluster, err := res.GetString(env.GetPromClusterLabel())
  1212. if err != nil {
  1213. cluster = env.GetClusterID()
  1214. }
  1215. node, err := res.GetString("node")
  1216. if err != nil {
  1217. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  1218. continue
  1219. }
  1220. instanceType, err := res.GetString("instance_type")
  1221. if err != nil {
  1222. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  1223. continue
  1224. }
  1225. providerID, err := res.GetString("provider_id")
  1226. if err != nil {
  1227. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  1228. continue
  1229. }
  1230. key := newNodeKey(cluster, node)
  1231. if _, ok := nodeMap[key]; !ok {
  1232. nodeMap[key] = &NodePricing{
  1233. Name: node,
  1234. NodeType: instanceType,
  1235. ProviderID: cloud.ParseID(providerID),
  1236. }
  1237. }
  1238. nodeMap[key].CostPerGPUHr = res.Values[0].Value
  1239. }
  1240. }
  1241. func applyNodeSpot(nodeMap map[nodeKey]*NodePricing, resNodeIsSpot []*prom.QueryResult) {
  1242. for _, res := range resNodeIsSpot {
  1243. cluster, err := res.GetString(env.GetPromClusterLabel())
  1244. if err != nil {
  1245. cluster = env.GetClusterID()
  1246. }
  1247. node, err := res.GetString("node")
  1248. if err != nil {
  1249. log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
  1250. continue
  1251. }
  1252. key := newNodeKey(cluster, node)
  1253. if _, ok := nodeMap[key]; !ok {
  1254. log.Warningf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
  1255. continue
  1256. }
  1257. nodeMap[key].Preemptible = res.Values[0].Value > 0
  1258. }
  1259. }
  1260. func applyNodeDiscount(nodeMap map[nodeKey]*NodePricing, cm *CostModel) {
  1261. if cm == nil {
  1262. return
  1263. }
  1264. c, err := cm.Provider.GetConfig()
  1265. if err != nil {
  1266. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  1267. return
  1268. }
  1269. discount, err := ParsePercentString(c.Discount)
  1270. if err != nil {
  1271. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  1272. return
  1273. }
  1274. negotiatedDiscount, err := ParsePercentString(c.NegotiatedDiscount)
  1275. if err != nil {
  1276. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  1277. return
  1278. }
  1279. for _, node := range nodeMap {
  1280. // TODO GKE Reserved Instances into account
  1281. node.Discount = cm.Provider.CombinedDiscountForNode(node.NodeType, node.Preemptible, discount, negotiatedDiscount)
  1282. node.CostPerCPUHr *= (1.0 - node.Discount)
  1283. node.CostPerRAMGiBHr *= (1.0 - node.Discount)
  1284. }
  1285. }
  1286. func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
  1287. for _, res := range resPVCostPerGiBHour {
  1288. cluster, err := res.GetString(env.GetPromClusterLabel())
  1289. if err != nil {
  1290. cluster = env.GetClusterID()
  1291. }
  1292. name, err := res.GetString("volumename")
  1293. if err != nil {
  1294. log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
  1295. continue
  1296. }
  1297. key := newPVKey(cluster, name)
  1298. pvMap[key] = &PV{
  1299. Cluster: cluster,
  1300. Name: name,
  1301. CostPerGiBHour: res.Values[0].Value,
  1302. }
  1303. }
  1304. }
  1305. func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
  1306. for _, res := range resPVBytes {
  1307. key, err := resultPVKey(res, env.GetPromClusterLabel(), "persistentvolume")
  1308. if err != nil {
  1309. log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
  1310. continue
  1311. }
  1312. if _, ok := pvMap[key]; !ok {
  1313. log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
  1314. continue
  1315. }
  1316. pvMap[key].Bytes = res.Values[0].Value
  1317. }
  1318. }
  1319. func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey]*PV, resPVCInfo []*prom.QueryResult) {
  1320. for _, res := range resPVCInfo {
  1321. cluster, err := res.GetString(env.GetPromClusterLabel())
  1322. if err != nil {
  1323. cluster = env.GetClusterID()
  1324. }
  1325. values, err := res.GetStrings("persistentvolumeclaim", "storageclass", "volumename", "namespace")
  1326. if err != nil {
  1327. log.DedupedWarningf(10, "CostModel.ComputeAllocation: PVC info query result missing field: %s", err)
  1328. continue
  1329. }
  1330. namespace := values["namespace"]
  1331. name := values["persistentvolumeclaim"]
  1332. volume := values["volumename"]
  1333. storageClass := values["storageclass"]
  1334. pvKey := newPVKey(cluster, volume)
  1335. pvcKey := newPVCKey(cluster, namespace, name)
  1336. // pvcStart and pvcEnd are the timestamps of the first and last minutes
  1337. // the PVC was running, respectively. We subtract 1m from pvcStart
  1338. // because this point will actually represent the end of the first
  1339. // minute. We don't subtract from pvcEnd because it already represents
  1340. // the end of the last minute.
  1341. var pvcStart, pvcEnd time.Time
  1342. for _, datum := range res.Values {
  1343. t := time.Unix(int64(datum.Timestamp), 0)
  1344. if pvcStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  1345. pvcStart = t
  1346. }
  1347. if datum.Value > 0 && window.Contains(t) {
  1348. pvcEnd = t
  1349. }
  1350. }
  1351. if pvcStart.IsZero() || pvcEnd.IsZero() {
  1352. log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
  1353. }
  1354. pvcStart = pvcStart.Add(-time.Minute)
  1355. if _, ok := pvMap[pvKey]; !ok {
  1356. continue
  1357. }
  1358. pvMap[pvKey].StorageClass = storageClass
  1359. if _, ok := pvcMap[pvcKey]; !ok {
  1360. pvcMap[pvcKey] = &PVC{}
  1361. }
  1362. pvcMap[pvcKey].Name = name
  1363. pvcMap[pvcKey].Namespace = namespace
  1364. pvcMap[pvcKey].Volume = pvMap[pvKey]
  1365. pvcMap[pvcKey].Start = pvcStart
  1366. pvcMap[pvcKey].End = pvcEnd
  1367. }
  1368. }
  1369. func applyPVCBytesRequested(pvcMap map[pvcKey]*PVC, resPVCBytesRequested []*prom.QueryResult) {
  1370. for _, res := range resPVCBytesRequested {
  1371. key, err := resultPVCKey(res, env.GetPromClusterLabel(), "namespace", "persistentvolumeclaim")
  1372. if err != nil {
  1373. continue
  1374. }
  1375. if _, ok := pvcMap[key]; !ok {
  1376. continue
  1377. }
  1378. pvcMap[key].Bytes = res.Values[0].Value
  1379. }
  1380. }
  1381. func buildPodPVCMap(podPVCMap map[podKey][]*PVC, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC, podMap map[podKey]*Pod, resPodPVCAllocation []*prom.QueryResult) {
  1382. for _, res := range resPodPVCAllocation {
  1383. cluster, err := res.GetString(env.GetPromClusterLabel())
  1384. if err != nil {
  1385. cluster = env.GetClusterID()
  1386. }
  1387. values, err := res.GetStrings("persistentvolume", "persistentvolumeclaim", "pod", "namespace")
  1388. if err != nil {
  1389. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PVC allocation query result missing field: %s", err)
  1390. continue
  1391. }
  1392. namespace := values["namespace"]
  1393. pod := values["pod"]
  1394. name := values["persistentvolumeclaim"]
  1395. volume := values["persistentvolume"]
  1396. podKey := newPodKey(cluster, namespace, pod)
  1397. pvKey := newPVKey(cluster, volume)
  1398. pvcKey := newPVCKey(cluster, namespace, name)
  1399. if _, ok := pvMap[pvKey]; !ok {
  1400. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PV missing for PVC allocation query result: %s", pvKey)
  1401. continue
  1402. }
  1403. if _, ok := podPVCMap[podKey]; !ok {
  1404. podPVCMap[podKey] = []*PVC{}
  1405. }
  1406. pvc, ok := pvcMap[pvcKey]
  1407. if !ok {
  1408. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PVC missing for PVC allocation query: %s", pvcKey)
  1409. continue
  1410. }
  1411. count := 1
  1412. if pod, ok := podMap[podKey]; ok && len(pod.Allocations) > 0 {
  1413. count = len(pod.Allocations)
  1414. } else {
  1415. log.DedupedWarningf(10, "CostModel.ComputeAllocation: PVC %s for missing pod %s", pvcKey, podKey)
  1416. }
  1417. pvc.Count = count
  1418. pvc.Mounted = true
  1419. podPVCMap[podKey] = append(podPVCMap[podKey], pvc)
  1420. }
  1421. }
  1422. func applyUnmountedPVs(window kubecost.Window, podMap map[podKey]*Pod, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC) {
  1423. unmountedPVBytes := map[string]float64{}
  1424. unmountedPVCost := map[string]float64{}
  1425. for _, pv := range pvMap {
  1426. mounted := false
  1427. for _, pvc := range pvcMap {
  1428. if pvc.Volume == nil {
  1429. continue
  1430. }
  1431. if pvc.Volume == pv {
  1432. mounted = true
  1433. break
  1434. }
  1435. }
  1436. if !mounted {
  1437. gib := pv.Bytes / 1024 / 1024 / 1024
  1438. hrs := window.Minutes() / 60.0 // TODO improve with PV hours, not window hours
  1439. cost := pv.CostPerGiBHour * gib * hrs
  1440. unmountedPVCost[pv.Cluster] += cost
  1441. unmountedPVBytes[pv.Cluster] += pv.Bytes
  1442. }
  1443. }
  1444. for cluster, amount := range unmountedPVCost {
  1445. container := kubecost.UnmountedSuffix
  1446. pod := kubecost.UnmountedSuffix
  1447. namespace := kubecost.UnmountedSuffix
  1448. node := ""
  1449. key := newPodKey(cluster, namespace, pod)
  1450. podMap[key] = &Pod{
  1451. Window: window.Clone(),
  1452. Start: *window.Start(),
  1453. End: *window.End(),
  1454. Key: key,
  1455. Allocations: map[string]*kubecost.Allocation{},
  1456. }
  1457. podMap[key].AppendContainer(container)
  1458. podMap[key].Allocations[container].Properties.Cluster = cluster
  1459. podMap[key].Allocations[container].Properties.Node = node
  1460. podMap[key].Allocations[container].Properties.Namespace = namespace
  1461. podMap[key].Allocations[container].Properties.Pod = pod
  1462. podMap[key].Allocations[container].Properties.Container = container
  1463. pvKey := kubecost.PVKey{
  1464. Cluster: cluster,
  1465. Name: kubecost.UnmountedSuffix,
  1466. }
  1467. unmountedPVs := kubecost.PVAllocations{
  1468. pvKey: {
  1469. ByteHours: unmountedPVBytes[cluster] * window.Minutes() / 60.0,
  1470. Cost: amount,
  1471. },
  1472. }
  1473. podMap[key].Allocations[container].PVs = podMap[key].Allocations[container].PVs.Add(unmountedPVs)
  1474. }
  1475. }
  1476. func applyUnmountedPVCs(window kubecost.Window, podMap map[podKey]*Pod, pvcMap map[pvcKey]*PVC) {
  1477. unmountedPVCBytes := map[namespaceKey]float64{}
  1478. unmountedPVCCost := map[namespaceKey]float64{}
  1479. for _, pvc := range pvcMap {
  1480. if !pvc.Mounted && pvc.Volume != nil {
  1481. key := newNamespaceKey(pvc.Cluster, pvc.Namespace)
  1482. gib := pvc.Volume.Bytes / 1024 / 1024 / 1024
  1483. hrs := pvc.Minutes() / 60.0
  1484. cost := pvc.Volume.CostPerGiBHour * gib * hrs
  1485. unmountedPVCCost[key] += cost
  1486. unmountedPVCBytes[key] += pvc.Volume.Bytes
  1487. }
  1488. }
  1489. for key, amount := range unmountedPVCCost {
  1490. container := kubecost.UnmountedSuffix
  1491. pod := kubecost.UnmountedSuffix
  1492. namespace := key.Namespace
  1493. node := ""
  1494. cluster := key.Cluster
  1495. podKey := newPodKey(cluster, namespace, pod)
  1496. podMap[podKey] = &Pod{
  1497. Window: window.Clone(),
  1498. Start: *window.Start(),
  1499. End: *window.End(),
  1500. Key: podKey,
  1501. Allocations: map[string]*kubecost.Allocation{},
  1502. }
  1503. podMap[podKey].AppendContainer(container)
  1504. podMap[podKey].Allocations[container].Properties.Cluster = cluster
  1505. podMap[podKey].Allocations[container].Properties.Node = node
  1506. podMap[podKey].Allocations[container].Properties.Namespace = namespace
  1507. podMap[podKey].Allocations[container].Properties.Pod = pod
  1508. podMap[podKey].Allocations[container].Properties.Container = container
  1509. pvKey := kubecost.PVKey{
  1510. Cluster: cluster,
  1511. Name: kubecost.UnmountedSuffix,
  1512. }
  1513. unmountedPVs := kubecost.PVAllocations{
  1514. pvKey: {
  1515. ByteHours: unmountedPVCBytes[key] * window.Minutes() / 60.0,
  1516. Cost: amount,
  1517. },
  1518. }
  1519. podMap[podKey].Allocations[container].PVs = podMap[podKey].Allocations[container].PVs.Add(unmountedPVs)
  1520. }
  1521. }
  1522. // LB describes the start and end time of a Load Balancer along with cost
  1523. type LB struct {
  1524. TotalCost float64
  1525. Start time.Time
  1526. End time.Time
  1527. }
  1528. func getLoadBalancerCosts(resLBCost, resLBActiveMins []*prom.QueryResult, resolution time.Duration) map[serviceKey]*LB {
  1529. lbMap := make(map[serviceKey]*LB)
  1530. lbHourlyCosts := make(map[serviceKey]float64)
  1531. for _, res := range resLBCost {
  1532. serviceKey, err := resultServiceKey(res, env.GetPromClusterLabel(), "namespace", "service_name")
  1533. if err != nil {
  1534. continue
  1535. }
  1536. lbHourlyCosts[serviceKey] = res.Values[0].Value
  1537. }
  1538. for _, res := range resLBActiveMins {
  1539. serviceKey, err := resultServiceKey(res, env.GetPromClusterLabel(), "namespace", "service_name")
  1540. if err != nil || len(res.Values) == 0 {
  1541. continue
  1542. }
  1543. if _, ok := lbHourlyCosts[serviceKey]; !ok {
  1544. log.Warningf("CostModel: failed to find hourly cost for Load Balancer: %v", serviceKey)
  1545. continue
  1546. }
  1547. s := time.Unix(int64(res.Values[0].Timestamp), 0)
  1548. // subtract resolution from start time to cover full time period
  1549. s = s.Add(-resolution)
  1550. e := time.Unix(int64(res.Values[len(res.Values)-1].Timestamp), 0)
  1551. hours := e.Sub(s).Hours()
  1552. lbMap[serviceKey] = &LB{
  1553. TotalCost: lbHourlyCosts[serviceKey] * hours,
  1554. Start: s,
  1555. End: e,
  1556. }
  1557. }
  1558. return lbMap
  1559. }
  1560. func applyLoadBalancersToPods(lbMap map[serviceKey]*LB, allocsByService map[serviceKey][]*kubecost.Allocation) {
  1561. for sKey, lb := range lbMap {
  1562. totalHours := 0.0
  1563. allocHours := make(map[*kubecost.Allocation]float64)
  1564. // Add portion of load balancing cost to each allocation
  1565. // proportional to the total number of hours allocations used the load balancer
  1566. for _, alloc := range allocsByService[sKey] {
  1567. // Determine the (start, end) of the relationship between the
  1568. // given LB and the associated Allocation so that a precise
  1569. // number of hours can be used to compute cumulative cost.
  1570. s, e := alloc.Start, alloc.End
  1571. if lb.Start.After(alloc.Start) {
  1572. s = lb.Start
  1573. }
  1574. if lb.End.Before(alloc.End) {
  1575. e = lb.End
  1576. }
  1577. hours := e.Sub(s).Hours()
  1578. // A negative number of hours signifies no overlap between the windows
  1579. if hours > 0 {
  1580. totalHours += hours
  1581. allocHours[alloc] = hours
  1582. }
  1583. }
  1584. // Distribute cost of service once total hours is calculated
  1585. for alloc, hours := range allocHours {
  1586. alloc.LoadBalancerCost += lb.TotalCost * hours / totalHours
  1587. }
  1588. }
  1589. }
  1590. // getNodePricing determines node pricing, given a key and a mapping from keys
  1591. // to their NodePricing instances, as well as the custom pricing configuration
  1592. // inherent to the CostModel instance. If custom pricing is set, use that. If
  1593. // not, use the pricing defined by the given key. If that doesn't exist, fall
  1594. // back on custom pricing as a default.
  1595. func (cm *CostModel) getNodePricing(nodeMap map[nodeKey]*NodePricing, nodeKey nodeKey) *NodePricing {
  1596. // Find the relevant NodePricing, if it exists. If not, substitute the
  1597. // custom NodePricing as a default.
  1598. node, ok := nodeMap[nodeKey]
  1599. if !ok || node == nil {
  1600. if nodeKey.Node != "" {
  1601. log.DedupedWarningf(5, "CostModel: failed to find node for %s", nodeKey)
  1602. }
  1603. return cm.getCustomNodePricing(false)
  1604. }
  1605. // If custom pricing is enabled and can be retrieved, override detected
  1606. // node pricing with the custom values.
  1607. customPricingConfig, err := cm.Provider.GetConfig()
  1608. if err != nil {
  1609. log.Warningf("CostModel: failed to load custom pricing: %s", err)
  1610. }
  1611. if cloud.CustomPricesEnabled(cm.Provider) && customPricingConfig != nil {
  1612. return cm.getCustomNodePricing(node.Preemptible)
  1613. }
  1614. node.Source = "prometheus"
  1615. // If any of the values are NaN or zero, replace them with the custom
  1616. // values as default.
  1617. // TODO:CLEANUP can't we parse these custom prices once? why do we store
  1618. // them as strings like this?
  1619. if node.CostPerCPUHr == 0 || math.IsNaN(node.CostPerCPUHr) {
  1620. log.Warningf("CostModel: node pricing has illegal CostPerCPUHr; replacing with custom pricing: %s", nodeKey)
  1621. cpuCostStr := customPricingConfig.CPU
  1622. if node.Preemptible {
  1623. cpuCostStr = customPricingConfig.SpotCPU
  1624. }
  1625. costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
  1626. if err != nil {
  1627. log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
  1628. }
  1629. node.CostPerCPUHr = costPerCPUHr
  1630. node.Source += "/customCPU"
  1631. }
  1632. if math.IsNaN(node.CostPerGPUHr) {
  1633. log.Warningf("CostModel: node pricing has illegal CostPerGPUHr; replacing with custom pricing: %s", nodeKey)
  1634. gpuCostStr := customPricingConfig.GPU
  1635. if node.Preemptible {
  1636. gpuCostStr = customPricingConfig.SpotGPU
  1637. }
  1638. costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
  1639. if err != nil {
  1640. log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
  1641. }
  1642. node.CostPerGPUHr = costPerGPUHr
  1643. node.Source += "/customGPU"
  1644. }
  1645. if node.CostPerRAMGiBHr == 0 || math.IsNaN(node.CostPerRAMGiBHr) {
  1646. log.Warningf("CostModel: node pricing has illegal CostPerRAMHr; replacing with custom pricing: %s", nodeKey)
  1647. ramCostStr := customPricingConfig.RAM
  1648. if node.Preemptible {
  1649. ramCostStr = customPricingConfig.SpotRAM
  1650. }
  1651. costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
  1652. if err != nil {
  1653. log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
  1654. }
  1655. node.CostPerRAMGiBHr = costPerRAMHr
  1656. node.Source += "/customRAM"
  1657. }
  1658. return node
  1659. }
  1660. // getCustomNodePricing converts the CostModel's configured custom pricing
  1661. // values into a NodePricing instance.
  1662. func (cm *CostModel) getCustomNodePricing(spot bool) *NodePricing {
  1663. customPricingConfig, err := cm.Provider.GetConfig()
  1664. if err != nil {
  1665. return nil
  1666. }
  1667. cpuCostStr := customPricingConfig.CPU
  1668. gpuCostStr := customPricingConfig.GPU
  1669. ramCostStr := customPricingConfig.RAM
  1670. if spot {
  1671. cpuCostStr = customPricingConfig.SpotCPU
  1672. gpuCostStr = customPricingConfig.SpotGPU
  1673. ramCostStr = customPricingConfig.SpotRAM
  1674. }
  1675. node := &NodePricing{Source: "custom"}
  1676. costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
  1677. if err != nil {
  1678. log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
  1679. }
  1680. node.CostPerCPUHr = costPerCPUHr
  1681. costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
  1682. if err != nil {
  1683. log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
  1684. }
  1685. node.CostPerGPUHr = costPerGPUHr
  1686. costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
  1687. if err != nil {
  1688. log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
  1689. }
  1690. node.CostPerRAMGiBHr = costPerRAMHr
  1691. return node
  1692. }
  1693. // NodePricing describes the resource costs associated with a given node, as
  1694. // well as the source of the information (e.g. prometheus, custom)
  1695. type NodePricing struct {
  1696. Name string
  1697. NodeType string
  1698. ProviderID string
  1699. Preemptible bool
  1700. CostPerCPUHr float64
  1701. CostPerRAMGiBHr float64
  1702. CostPerGPUHr float64
  1703. Discount float64
  1704. Source string
  1705. }
  1706. // Pod describes a running pod's start and end time within a Window and
  1707. // all the Allocations (i.e. containers) contained within it.
  1708. type Pod struct {
  1709. Window kubecost.Window
  1710. Start time.Time
  1711. End time.Time
  1712. Key podKey
  1713. Allocations map[string]*kubecost.Allocation
  1714. }
  1715. // AppendContainer adds an entry for the given container name to the Pod.
  1716. func (p Pod) AppendContainer(container string) {
  1717. name := fmt.Sprintf("%s/%s/%s/%s", p.Key.Cluster, p.Key.Namespace, p.Key.Pod, container)
  1718. alloc := &kubecost.Allocation{
  1719. Name: name,
  1720. Properties: &kubecost.AllocationProperties{},
  1721. Window: p.Window.Clone(),
  1722. Start: p.Start,
  1723. End: p.End,
  1724. }
  1725. alloc.Properties.Container = container
  1726. alloc.Properties.Pod = p.Key.Pod
  1727. alloc.Properties.Namespace = p.Key.Namespace
  1728. alloc.Properties.Cluster = p.Key.Cluster
  1729. p.Allocations[container] = alloc
  1730. }
  1731. // PVC describes a PersistentVolumeClaim
  1732. // TODO:CLEANUP move to pkg/kubecost?
  1733. // TODO:CLEANUP add PersistentVolumeClaims field to type Allocation?
  1734. type PVC struct {
  1735. Bytes float64 `json:"bytes"`
  1736. Count int `json:"count"`
  1737. Name string `json:"name"`
  1738. Cluster string `json:"cluster"`
  1739. Namespace string `json:"namespace"`
  1740. Volume *PV `json:"persistentVolume"`
  1741. Mounted bool `json:"mounted"`
  1742. Start time.Time `json:"start"`
  1743. End time.Time `json:"end"`
  1744. }
  1745. // Cost computes the cumulative cost of the PVC
  1746. func (pvc *PVC) Cost() float64 {
  1747. if pvc == nil || pvc.Volume == nil {
  1748. return 0.0
  1749. }
  1750. gib := pvc.Bytes / 1024 / 1024 / 1024
  1751. hrs := pvc.Minutes() / 60.0
  1752. return pvc.Volume.CostPerGiBHour * gib * hrs
  1753. }
  1754. // Minutes computes the number of minutes over which the PVC is defined
  1755. func (pvc *PVC) Minutes() float64 {
  1756. if pvc == nil {
  1757. return 0.0
  1758. }
  1759. return pvc.End.Sub(pvc.Start).Minutes()
  1760. }
  1761. // String returns a string representation of the PVC
  1762. func (pvc *PVC) String() string {
  1763. if pvc == nil {
  1764. return "<nil>"
  1765. }
  1766. return fmt.Sprintf("%s/%s/%s{Bytes:%.2f, Cost:%.6f, Start,End:%s}", pvc.Cluster, pvc.Namespace, pvc.Name, pvc.Bytes, pvc.Cost(), kubecost.NewWindow(&pvc.Start, &pvc.End))
  1767. }
  1768. // PV describes a PersistentVolume
  1769. // TODO:CLEANUP move to pkg/kubecost?
  1770. type PV struct {
  1771. Bytes float64 `json:"bytes"`
  1772. CostPerGiBHour float64 `json:"costPerGiBHour"`
  1773. Cluster string `json:"cluster"`
  1774. Name string `json:"name"`
  1775. StorageClass string `json:"storageClass"`
  1776. }
  1777. // String returns a string representation of the PV
  1778. func (pv *PV) String() string {
  1779. if pv == nil {
  1780. return "<nil>"
  1781. }
  1782. return fmt.Sprintf("%s/%s{Bytes:%.2f, Cost/GiB*Hr:%.6f, StorageClass:%s}", pv.Cluster, pv.Name, pv.Bytes, pv.CostPerGiBHour, pv.StorageClass)
  1783. }