allocation.go 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124
  1. package costmodel
  2. import (
  3. "fmt"
  4. "math"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "github.com/kubecost/cost-model/pkg/util/timeutil"
  9. "github.com/kubecost/cost-model/pkg/cloud"
  10. "github.com/kubecost/cost-model/pkg/env"
  11. "github.com/kubecost/cost-model/pkg/kubecost"
  12. "github.com/kubecost/cost-model/pkg/log"
  13. "github.com/kubecost/cost-model/pkg/prom"
  14. "k8s.io/apimachinery/pkg/labels"
  15. "k8s.io/klog"
  16. )
  17. const (
  18. queryFmtPods = `avg(kube_pod_container_status_running{}) by (pod, namespace, %s)[%s:%s]%s`
  19. queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s, provider_id)`
  20. queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  21. queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  22. queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  23. queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  24. queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  25. queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  26. queryFmtCPUUsageMax = `max(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD"}[%s]%s)) by (container_name, container, pod_name, pod, namespace, instance, %s)`
  27. queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  28. queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, %s)`
  29. queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
  30. queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
  31. queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, %s, instance_type, provider_id)`
  32. queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot[%s]%s)`
  33. queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, %s)[%s:%s]%s`
  34. queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, %s)`
  35. queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, %s)`
  36. queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, %s)`
  37. queryFmtPVCostPerGiBHour = `avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, %s)`
  38. queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
  39. queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{}[%s]%s)) by (%s)`
  40. queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
  41. queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{}[%s]%s)) by (%s)`
  42. queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s]%s)) by (pod_name, namespace, %s) / 1024 / 1024 / 1024`
  43. queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{}[%s]%s)) by (%s)`
  44. queryFmtNetReceiveBytes = `sum(increase(container_network_receive_bytes_total{pod!="", container="POD"}[%s]%s)) by (pod_name, pod, namespace, %s)`
  45. queryFmtNetTransferBytes = `sum(increase(container_network_transmit_bytes_total{pod!="", container="POD"}[%s]%s)) by (pod_name, pod, namespace, %s)`
  46. queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels[%s]%s)`
  47. queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations[%s]%s)`
  48. queryFmtPodLabels = `avg_over_time(kube_pod_labels[%s]%s)`
  49. queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations[%s]%s)`
  50. queryFmtServiceLabels = `avg_over_time(service_selector_labels[%s]%s)`
  51. queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels[%s]%s)`
  52. queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels[%s]%s)`
  53. queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet"}[%s]%s)) by (pod, owner_name, namespace, %s)`
  54. queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,%s)`
  55. queryFmtLBCostPerHr = `avg(avg_over_time(kubecost_load_balancer_cost[%s]%s)) by (namespace, service_name, %s)`
  56. queryFmtLBActiveMins = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]%s`
  57. )
  58. // This is a bit of a hack to work around garbage data from cadvisor
  59. // Ideally you cap each pod to the max CPU on its node, but that involves a bit more complexity, as it it would need to be done when allocations joins with asset data.
  60. const MAX_CPU_CAP = 512
  61. // CanCompute should return true if CostModel can act as a valid source for the
  62. // given time range. In the case of CostModel we want to attempt to compute as
  63. // long as the range starts in the past. If the CostModel ends up not having
  64. // data to match, that's okay, and should be communicated with an error
  65. // response from ComputeAllocation.
  66. func (cm *CostModel) CanCompute(start, end time.Time) bool {
  67. return start.Before(time.Now())
  68. }
  69. // Name returns the name of the Source
  70. func (cm *CostModel) Name() string {
  71. return "CostModel"
  72. }
  73. // ComputeAllocation uses the CostModel instance to compute an AllocationSet
  74. // for the window defined by the given start and end times. The Allocations
  75. // returned are unaggregated (i.e. down to the container level).
  76. func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {
  77. // 1. Build out Pod map from resolution-tuned, batched Pod start/end query
  78. // 2. Run and apply the results of the remaining queries to
  79. // 3. Build out AllocationSet from completed Pod map
  80. // Create a window spanning the requested query
  81. window := kubecost.NewWindow(&start, &end)
  82. // Create an empty AllocationSet. For safety, in the case of an error, we
  83. // should prefer to return this empty set with the error. (In the case of
  84. // no error, of course we populate the set and return it.)
  85. allocSet := kubecost.NewAllocationSet(start, end)
  86. // (1) Build out Pod map
  87. // Build out a map of Allocations as a mapping from pod-to-container-to-
  88. // underlying-Allocation instance, starting with (start, end) so that we
  89. // begin with minutes, from which we compute resource allocation and cost
  90. // totals from measured rate data.
  91. podMap := map[podKey]*Pod{}
  92. // clusterStarts and clusterEnds record the earliest start and latest end
  93. // times, respectively, on a cluster-basis. These are used for unmounted
  94. // PVs and other "virtual" Allocations so that minutes are maximally
  95. // accurate during start-up or spin-down of a cluster
  96. clusterStart := map[string]time.Time{}
  97. clusterEnd := map[string]time.Time{}
  98. cm.buildPodMap(window, resolution, env.GetETLMaxBatchDuration(), podMap, clusterStart, clusterEnd)
  99. // (2) Run and apply remaining queries
  100. // Convert window (start, end) to (duration, offset) for querying Prometheus,
  101. // including handling Thanos offset
  102. durStr, offStr, err := window.DurationOffsetForPrometheus()
  103. if err != nil {
  104. // Negative duration, so return empty set
  105. return allocSet, nil
  106. }
  107. // Convert resolution duration to a query-ready string
  108. resStr := timeutil.DurationString(resolution)
  109. ctx := prom.NewNamedContext(cm.PrometheusClient, prom.AllocationContextName)
  110. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr, env.GetPromClusterLabel())
  111. resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
  112. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, offStr, env.GetPromClusterLabel())
  113. resChRAMRequests := ctx.Query(queryRAMRequests)
  114. queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, durStr, offStr, env.GetPromClusterLabel())
  115. resChRAMUsageAvg := ctx.Query(queryRAMUsageAvg)
  116. queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, durStr, offStr, env.GetPromClusterLabel())
  117. resChRAMUsageMax := ctx.Query(queryRAMUsageMax)
  118. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, offStr, env.GetPromClusterLabel())
  119. resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
  120. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, offStr, env.GetPromClusterLabel())
  121. resChCPURequests := ctx.Query(queryCPURequests)
  122. queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, offStr, env.GetPromClusterLabel())
  123. resChCPUUsageAvg := ctx.Query(queryCPUUsageAvg)
  124. queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMax, durStr, offStr, env.GetPromClusterLabel())
  125. resChCPUUsageMax := ctx.Query(queryCPUUsageMax)
  126. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, offStr, env.GetPromClusterLabel())
  127. resChGPUsRequested := ctx.Query(queryGPUsRequested)
  128. queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, durStr, offStr, env.GetPromClusterLabel())
  129. resChGPUsAllocated := ctx.Query(queryGPUsAllocated)
  130. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, offStr, env.GetPromClusterLabel())
  131. resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
  132. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, offStr, env.GetPromClusterLabel())
  133. resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
  134. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, offStr, env.GetPromClusterLabel())
  135. resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
  136. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr, offStr)
  137. resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
  138. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterLabel(), durStr, resStr, offStr)
  139. resChPVCInfo := ctx.Query(queryPVCInfo)
  140. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, offStr, env.GetPromClusterLabel())
  141. resChPVBytes := ctx.Query(queryPVBytes)
  142. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, offStr, env.GetPromClusterLabel())
  143. resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
  144. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, offStr, env.GetPromClusterLabel())
  145. resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
  146. queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, offStr, env.GetPromClusterLabel())
  147. resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
  148. queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, durStr, offStr, env.GetPromClusterLabel())
  149. resChNetTransferBytes := ctx.Query(queryNetTransferBytes)
  150. queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, durStr, offStr, env.GetPromClusterLabel())
  151. resChNetReceiveBytes := ctx.Query(queryNetReceiveBytes)
  152. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, offStr, env.GetPromClusterLabel())
  153. resChNetZoneGiB := ctx.Query(queryNetZoneGiB)
  154. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
  155. resChNetZoneCostPerGiB := ctx.Query(queryNetZoneCostPerGiB)
  156. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, offStr, env.GetPromClusterLabel())
  157. resChNetRegionGiB := ctx.Query(queryNetRegionGiB)
  158. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
  159. resChNetRegionCostPerGiB := ctx.Query(queryNetRegionCostPerGiB)
  160. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, offStr, env.GetPromClusterLabel())
  161. resChNetInternetGiB := ctx.Query(queryNetInternetGiB)
  162. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, offStr, env.GetPromClusterLabel())
  163. resChNetInternetCostPerGiB := ctx.Query(queryNetInternetCostPerGiB)
  164. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr, offStr)
  165. resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
  166. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr, offStr)
  167. resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
  168. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr, offStr)
  169. resChPodLabels := ctx.Query(queryPodLabels)
  170. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr, offStr)
  171. resChPodAnnotations := ctx.Query(queryPodAnnotations)
  172. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr, offStr)
  173. resChServiceLabels := ctx.Query(queryServiceLabels)
  174. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr, offStr)
  175. resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
  176. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr, offStr)
  177. resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
  178. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, offStr, env.GetPromClusterLabel())
  179. resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
  180. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, offStr, env.GetPromClusterLabel())
  181. resChJobLabels := ctx.Query(queryJobLabels)
  182. queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, offStr, env.GetPromClusterLabel())
  183. resChLBCostPerHr := ctx.Query(queryLBCostPerHr)
  184. queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr, offStr)
  185. resChLBActiveMins := ctx.Query(queryLBActiveMins)
  186. resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
  187. resCPURequests, _ := resChCPURequests.Await()
  188. resCPUUsageAvg, _ := resChCPUUsageAvg.Await()
  189. resCPUUsageMax, _ := resChCPUUsageMax.Await()
  190. resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()
  191. resRAMRequests, _ := resChRAMRequests.Await()
  192. resRAMUsageAvg, _ := resChRAMUsageAvg.Await()
  193. resRAMUsageMax, _ := resChRAMUsageMax.Await()
  194. resGPUsRequested, _ := resChGPUsRequested.Await()
  195. resGPUsAllocated, _ := resChGPUsAllocated.Await()
  196. resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
  197. resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
  198. resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
  199. resNodeIsSpot, _ := resChNodeIsSpot.Await()
  200. resPVBytes, _ := resChPVBytes.Await()
  201. resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()
  202. resPVCInfo, _ := resChPVCInfo.Await()
  203. resPVCBytesRequested, _ := resChPVCBytesRequested.Await()
  204. resPodPVCAllocation, _ := resChPodPVCAllocation.Await()
  205. resNetTransferBytes, _ := resChNetTransferBytes.Await()
  206. resNetReceiveBytes, _ := resChNetReceiveBytes.Await()
  207. resNetZoneGiB, _ := resChNetZoneGiB.Await()
  208. resNetZoneCostPerGiB, _ := resChNetZoneCostPerGiB.Await()
  209. resNetRegionGiB, _ := resChNetRegionGiB.Await()
  210. resNetRegionCostPerGiB, _ := resChNetRegionCostPerGiB.Await()
  211. resNetInternetGiB, _ := resChNetInternetGiB.Await()
  212. resNetInternetCostPerGiB, _ := resChNetInternetCostPerGiB.Await()
  213. resNamespaceLabels, _ := resChNamespaceLabels.Await()
  214. resNamespaceAnnotations, _ := resChNamespaceAnnotations.Await()
  215. resPodLabels, _ := resChPodLabels.Await()
  216. resPodAnnotations, _ := resChPodAnnotations.Await()
  217. resServiceLabels, _ := resChServiceLabels.Await()
  218. resDeploymentLabels, _ := resChDeploymentLabels.Await()
  219. resStatefulSetLabels, _ := resChStatefulSetLabels.Await()
  220. resDaemonSetLabels, _ := resChDaemonSetLabels.Await()
  221. resJobLabels, _ := resChJobLabels.Await()
  222. resLBCostPerHr, _ := resChLBCostPerHr.Await()
  223. resLBActiveMins, _ := resChLBActiveMins.Await()
  224. if ctx.HasErrors() {
  225. for _, err := range ctx.Errors() {
  226. log.Errorf("CostModel.ComputeAllocation: %s", err)
  227. }
  228. return allocSet, ctx.ErrorCollection()
  229. }
  230. // We choose to apply allocation before requests in the cases of RAM and
  231. // CPU so that we can assert that allocation should always be greater than
  232. // or equal to request.
  233. applyCPUCoresAllocated(podMap, resCPUCoresAllocated)
  234. applyCPUCoresRequested(podMap, resCPURequests)
  235. applyCPUCoresUsedAvg(podMap, resCPUUsageAvg)
  236. applyCPUCoresUsedMax(podMap, resCPUUsageMax)
  237. applyRAMBytesAllocated(podMap, resRAMBytesAllocated)
  238. applyRAMBytesRequested(podMap, resRAMRequests)
  239. applyRAMBytesUsedAvg(podMap, resRAMUsageAvg)
  240. applyRAMBytesUsedMax(podMap, resRAMUsageMax)
  241. applyGPUsAllocated(podMap, resGPUsRequested, resGPUsAllocated)
  242. applyNetworkTotals(podMap, resNetTransferBytes, resNetReceiveBytes)
  243. applyNetworkAllocation(podMap, resNetZoneGiB, resNetZoneCostPerGiB)
  244. applyNetworkAllocation(podMap, resNetRegionGiB, resNetRegionCostPerGiB)
  245. applyNetworkAllocation(podMap, resNetInternetGiB, resNetInternetCostPerGiB)
  246. namespaceLabels := resToNamespaceLabels(resNamespaceLabels)
  247. podLabels := resToPodLabels(resPodLabels)
  248. namespaceAnnotations := resToNamespaceAnnotations(resNamespaceAnnotations)
  249. podAnnotations := resToPodAnnotations(resPodAnnotations)
  250. applyLabels(podMap, namespaceLabels, podLabels)
  251. applyAnnotations(podMap, namespaceAnnotations, podAnnotations)
  252. serviceLabels := getServiceLabels(resServiceLabels)
  253. allocsByService := map[serviceKey][]*kubecost.Allocation{}
  254. applyServicesToPods(podMap, podLabels, allocsByService, serviceLabels)
  255. podDeploymentMap := labelsToPodControllerMap(podLabels, resToDeploymentLabels(resDeploymentLabels))
  256. podStatefulSetMap := labelsToPodControllerMap(podLabels, resToStatefulSetLabels(resStatefulSetLabels))
  257. podDaemonSetMap := resToPodDaemonSetMap(resDaemonSetLabels)
  258. podJobMap := resToPodJobMap(resJobLabels)
  259. applyControllersToPods(podMap, podDeploymentMap)
  260. applyControllersToPods(podMap, podStatefulSetMap)
  261. applyControllersToPods(podMap, podDaemonSetMap)
  262. applyControllersToPods(podMap, podJobMap)
  263. // TODO breakdown network costs?
  264. // Build out a map of Nodes with resource costs, discounts, and node types
  265. // for converting resource allocation data to cumulative costs.
  266. nodeMap := map[nodeKey]*NodePricing{}
  267. applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
  268. applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
  269. applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
  270. applyNodeSpot(nodeMap, resNodeIsSpot)
  271. applyNodeDiscount(nodeMap, cm)
  272. // Build out the map of all PVs with class, size and cost-per-hour.
  273. // Note: this does not record time running, which we may want to
  274. // include later for increased PV precision. (As long as the PV has
  275. // a PVC, we get time running there, so this is only inaccurate
  276. // for short-lived, unmounted PVs.)
  277. pvMap := map[pvKey]*PV{}
  278. buildPVMap(pvMap, resPVCostPerGiBHour)
  279. applyPVBytes(pvMap, resPVBytes)
  280. // Build out the map of all PVCs with time running, bytes requested,
  281. // and connect to the correct PV from pvMap. (If no PV exists, that
  282. // is noted, but does not result in any allocation/cost.)
  283. pvcMap := map[pvcKey]*PVC{}
  284. buildPVCMap(window, pvcMap, pvMap, resPVCInfo)
  285. applyPVCBytesRequested(pvcMap, resPVCBytesRequested)
  286. // Build out the relationships of pods to their PVCs. This step
  287. // populates the PVC.Count field so that PVC allocation can be
  288. // split appropriately among each pod's container allocation.
  289. podPVCMap := map[podKey][]*PVC{}
  290. buildPodPVCMap(podPVCMap, pvMap, pvcMap, podMap, resPodPVCAllocation)
  291. // Identify unmounted PVs (PVs without PVCs) and add one Allocation per
  292. // cluster representing each cluster's unmounted PVs (if necessary).
  293. applyUnmountedPVs(window, podMap, pvMap, pvcMap)
  294. lbMap := getLoadBalancerCosts(resLBCostPerHr, resLBActiveMins, resolution)
  295. applyLoadBalancersToPods(lbMap, allocsByService)
  296. // (3) Build out AllocationSet from Pod map
  297. for _, pod := range podMap {
  298. for _, alloc := range pod.Allocations {
  299. cluster := alloc.Properties.Cluster
  300. nodeName := alloc.Properties.Node
  301. namespace := alloc.Properties.Namespace
  302. pod := alloc.Properties.Pod
  303. container := alloc.Properties.Container
  304. podKey := newPodKey(cluster, namespace, pod)
  305. nodeKey := newNodeKey(cluster, nodeName)
  306. node := cm.getNodePricing(nodeMap, nodeKey)
  307. alloc.Properties.ProviderID = node.ProviderID
  308. alloc.CPUCost = alloc.CPUCoreHours * node.CostPerCPUHr
  309. alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * node.CostPerRAMGiBHr
  310. alloc.GPUCost = alloc.GPUHours * node.CostPerGPUHr
  311. if pvcs, ok := podPVCMap[podKey]; ok {
  312. for _, pvc := range pvcs {
  313. // Determine the (start, end) of the relationship between the
  314. // given PVC and the associated Allocation so that a precise
  315. // number of hours can be used to compute cumulative cost.
  316. s, e := alloc.Start, alloc.End
  317. if pvc.Start.After(alloc.Start) {
  318. s = pvc.Start
  319. }
  320. if pvc.End.Before(alloc.End) {
  321. e = pvc.End
  322. }
  323. minutes := e.Sub(s).Minutes()
  324. hrs := minutes / 60.0
  325. count := float64(pvc.Count)
  326. if pvc.Count < 1 {
  327. count = 1
  328. }
  329. gib := pvc.Bytes / 1024 / 1024 / 1024
  330. cost := pvc.Volume.CostPerGiBHour * gib * hrs
  331. // Apply the size and cost of the PV to the allocation, each
  332. // weighted by count (i.e. the number of containers in the pod)
  333. // record the amount of total PVBytes Hours attributable to a given PV
  334. if alloc.PVs == nil {
  335. alloc.PVs = kubecost.PVAllocations{}
  336. }
  337. pvKey := kubecost.PVKey{
  338. Cluster: pvc.Cluster,
  339. Name: pvc.Volume.Name,
  340. }
  341. alloc.PVs[pvKey] = &kubecost.PVAllocation{
  342. ByteHours: pvc.Bytes * hrs / count,
  343. Cost: cost / count,
  344. }
  345. }
  346. }
  347. // Make sure that the name is correct (node may not be present at this
  348. // point due to it missing from queryMinutes) then insert.
  349. alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, nodeName, namespace, pod, container)
  350. allocSet.Set(alloc)
  351. }
  352. }
  353. return allocSet, nil
  354. }
  355. func (cm *CostModel) buildPodMap(window kubecost.Window, resolution, maxBatchSize time.Duration, podMap map[podKey]*Pod, clusterStart, clusterEnd map[string]time.Time) error {
  356. // Assumes that window is positive and closed
  357. start, end := *window.Start(), *window.End()
  358. // Convert resolution duration to a query-ready string
  359. resStr := timeutil.DurationString(resolution)
  360. ctx := prom.NewNamedContext(cm.PrometheusClient, prom.AllocationContextName)
  361. // Query for (start, end) by (pod, namespace, cluster) over the given
  362. // window, using the given resolution, and if necessary in batches no
  363. // larger than the given maximum batch size. If working in batches, track
  364. // overall progress by starting with (window.start, window.start) and
  365. // querying in batches no larger than maxBatchSize from start-to-end,
  366. // folding each result set into podMap as the results come back.
  367. coverage := kubecost.NewWindow(&start, &start)
  368. numQuery := 1
  369. for coverage.End().Before(end) {
  370. // Determine the (start, end) of the current batch
  371. batchStart := *coverage.End()
  372. batchEnd := coverage.End().Add(maxBatchSize)
  373. if batchEnd.After(end) {
  374. batchEnd = end
  375. }
  376. batchWindow := kubecost.NewWindow(&batchStart, &batchEnd)
  377. var resPods []*prom.QueryResult
  378. var err error
  379. maxTries := 3
  380. numTries := 0
  381. for resPods == nil && numTries < maxTries {
  382. numTries++
  383. // Convert window (start, end) to (duration, offset) for querying Prometheus,
  384. // including handling Thanos offset
  385. durStr, offStr, err := batchWindow.DurationOffsetForPrometheus()
  386. if err != nil || durStr == "" {
  387. // Negative duration, so set empty results and don't query
  388. resPods = []*prom.QueryResult{}
  389. err = nil
  390. break
  391. }
  392. // Submit and profile query
  393. queryPods := fmt.Sprintf(queryFmtPods, env.GetPromClusterLabel(), durStr, resStr, offStr)
  394. queryProfile := time.Now()
  395. resPods, err = ctx.Query(queryPods).Await()
  396. if err != nil {
  397. log.Profile(queryProfile, fmt.Sprintf("CostModel.ComputeAllocation: pod query %d try %d failed: %s", numQuery, numTries, queryPods))
  398. resPods = nil
  399. }
  400. }
  401. if err != nil {
  402. return err
  403. }
  404. applyPodResults(window, resolution, podMap, clusterStart, clusterEnd, resPods)
  405. coverage = coverage.ExpandEnd(batchEnd)
  406. numQuery++
  407. }
  408. return nil
  409. }
  410. func applyPodResults(window kubecost.Window, resolution time.Duration, podMap map[podKey]*Pod, clusterStart, clusterEnd map[string]time.Time, resPods []*prom.QueryResult) {
  411. for _, res := range resPods {
  412. if len(res.Values) == 0 {
  413. log.Warningf("CostModel.ComputeAllocation: empty minutes result")
  414. continue
  415. }
  416. cluster, err := res.GetString(env.GetPromClusterLabel())
  417. if err != nil {
  418. cluster = env.GetClusterID()
  419. }
  420. labels, err := res.GetStrings("namespace", "pod")
  421. if err != nil {
  422. log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
  423. continue
  424. }
  425. namespace := labels["namespace"]
  426. pod := labels["pod"]
  427. key := newPodKey(cluster, namespace, pod)
  428. // allocStart and allocEnd are the timestamps of the first and last
  429. // minutes the pod was running, respectively. We subtract one resolution
  430. // from allocStart because this point will actually represent the end
  431. // of the first minute. We don't subtract from allocEnd because it
  432. // already represents the end of the last minute.
  433. var allocStart, allocEnd time.Time
  434. startAdjustmentCoeff, endAdjustmentCoeff := 1.0, 1.0
  435. for _, datum := range res.Values {
  436. t := time.Unix(int64(datum.Timestamp), 0)
  437. if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  438. // Set the start timestamp to the earliest non-zero timestamp
  439. allocStart = t
  440. // Record adjustment coefficient, i.e. the portion of the start
  441. // timestamp to "ignore". That is, sometimes the value will be
  442. // 0.5, meaning that we should discount the time running by
  443. // half of the resolution the timestamp stands for.
  444. startAdjustmentCoeff = (1.0 - datum.Value)
  445. }
  446. if datum.Value > 0 && window.Contains(t) {
  447. // Set the end timestamp to the latest non-zero timestamp
  448. allocEnd = t
  449. // Record adjustment coefficient, i.e. the portion of the end
  450. // timestamp to "ignore". (See explanation above for start.)
  451. endAdjustmentCoeff = (1.0 - datum.Value)
  452. }
  453. }
  454. if allocStart.IsZero() || allocEnd.IsZero() {
  455. continue
  456. }
  457. // Adjust timestamps according to the resolution and the adjustment
  458. // coefficients, as described above. That is, count the start timestamp
  459. // from the beginning of the resolution, not the end. Then "reduce" the
  460. // start and end by the correct amount, in the case that the "running"
  461. // value of the first or last timestamp was not a full 1.0.
  462. allocStart = allocStart.Add(-resolution)
  463. // Note: the *100 and /100 are necessary because Duration is an int, so
  464. // 0.5, for instance, will be truncated, resulting in no adjustment.
  465. allocStart = allocStart.Add(time.Duration(startAdjustmentCoeff*100) * resolution / time.Duration(100))
  466. allocEnd = allocEnd.Add(-time.Duration(endAdjustmentCoeff*100) * resolution / time.Duration(100))
  467. // If there is only one point with a value <= 0.5 that the start and
  468. // end timestamps both share, then we will enter this case because at
  469. // least half of a resolution will be subtracted from both the start
  470. // and the end. If that is the case, then add back half of each side
  471. // so that the pod is said to run for half a resolution total.
  472. // e.g. For resolution 1m and a value of 0.5 at one timestamp, we'll
  473. // end up with allocEnd == allocStart and each coeff == 0.5. In
  474. // that case, add 0.25m to each side, resulting in 0.5m duration.
  475. if !allocEnd.After(allocStart) {
  476. allocStart = allocStart.Add(-time.Duration(50*startAdjustmentCoeff) * resolution / time.Duration(100))
  477. allocEnd = allocEnd.Add(time.Duration(50*endAdjustmentCoeff) * resolution / time.Duration(100))
  478. }
  479. // Set start if unset or this datum's start time is earlier than the
  480. // current earliest time.
  481. if _, ok := clusterStart[cluster]; !ok || allocStart.Before(clusterStart[cluster]) {
  482. clusterStart[cluster] = allocStart
  483. }
  484. // Set end if unset or this datum's end time is later than the
  485. // current latest time.
  486. if _, ok := clusterEnd[cluster]; !ok || allocEnd.After(clusterEnd[cluster]) {
  487. clusterEnd[cluster] = allocEnd
  488. }
  489. if pod, ok := podMap[key]; ok {
  490. // Pod has already been recorded, so update it accordingly
  491. if allocStart.Before(pod.Start) {
  492. pod.Start = allocStart
  493. }
  494. if allocEnd.After(pod.End) {
  495. pod.End = allocEnd
  496. }
  497. } else {
  498. // Pod has not been recorded yet, so insert it
  499. podMap[key] = &Pod{
  500. Window: window.Clone(),
  501. Start: allocStart,
  502. End: allocEnd,
  503. Key: key,
  504. Allocations: map[string]*kubecost.Allocation{},
  505. }
  506. }
  507. }
  508. }
  509. func applyCPUCoresAllocated(podMap map[podKey]*Pod, resCPUCoresAllocated []*prom.QueryResult) {
  510. for _, res := range resCPUCoresAllocated {
  511. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  512. if err != nil {
  513. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU allocation result missing field: %s", err)
  514. continue
  515. }
  516. pod, ok := podMap[key]
  517. if !ok {
  518. continue
  519. }
  520. container, err := res.GetString("container")
  521. if err != nil {
  522. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU allocation query result missing 'container': %s", key)
  523. continue
  524. }
  525. if _, ok := pod.Allocations[container]; !ok {
  526. pod.AppendContainer(container)
  527. }
  528. cpuCores := res.Values[0].Value
  529. if cpuCores > MAX_CPU_CAP {
  530. klog.Infof("[WARNING] Very large cpu allocation, clamping to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
  531. cpuCores = 0.0
  532. }
  533. hours := pod.Allocations[container].Minutes() / 60.0
  534. pod.Allocations[container].CPUCoreHours = cpuCores * hours
  535. node, err := res.GetString("node")
  536. if err != nil {
  537. log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing 'node': %s", key)
  538. continue
  539. }
  540. pod.Allocations[container].Properties.Node = node
  541. }
  542. }
  543. func applyCPUCoresRequested(podMap map[podKey]*Pod, resCPUCoresRequested []*prom.QueryResult) {
  544. for _, res := range resCPUCoresRequested {
  545. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  546. if err != nil {
  547. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request result missing field: %s", err)
  548. continue
  549. }
  550. pod, ok := podMap[key]
  551. if !ok {
  552. continue
  553. }
  554. container, err := res.GetString("container")
  555. if err != nil {
  556. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request query result missing 'container': %s", key)
  557. continue
  558. }
  559. if _, ok := pod.Allocations[container]; !ok {
  560. pod.AppendContainer(container)
  561. }
  562. pod.Allocations[container].CPUCoreRequestAverage = res.Values[0].Value
  563. // If CPU allocation is less than requests, set CPUCoreHours to
  564. // request level.
  565. if pod.Allocations[container].CPUCores() < res.Values[0].Value {
  566. pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
  567. }
  568. if pod.Allocations[container].CPUCores() > MAX_CPU_CAP {
  569. klog.Infof("[WARNING] Very large cpu allocation, clamping! to %f", res.Values[0].Value*(pod.Allocations[container].Minutes()/60.0))
  570. pod.Allocations[container].CPUCoreHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
  571. }
  572. node, err := res.GetString("node")
  573. if err != nil {
  574. log.Warningf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)
  575. continue
  576. }
  577. pod.Allocations[container].Properties.Node = node
  578. }
  579. }
  580. func applyCPUCoresUsedAvg(podMap map[podKey]*Pod, resCPUCoresUsedAvg []*prom.QueryResult) {
  581. for _, res := range resCPUCoresUsedAvg {
  582. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  583. if err != nil {
  584. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg result missing field: %s", err)
  585. continue
  586. }
  587. pod, ok := podMap[key]
  588. if !ok {
  589. continue
  590. }
  591. container, err := res.GetString("container")
  592. if container == "" || err != nil {
  593. container, err = res.GetString("container_name")
  594. if err != nil {
  595. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage avg query result missing 'container': %s", key)
  596. continue
  597. }
  598. }
  599. if _, ok := pod.Allocations[container]; !ok {
  600. pod.AppendContainer(container)
  601. }
  602. pod.Allocations[container].CPUCoreUsageAverage = res.Values[0].Value
  603. if res.Values[0].Value > MAX_CPU_CAP {
  604. klog.Infof("[WARNING] Very large cpu USAGE, dropping outlier")
  605. pod.Allocations[container].CPUCoreUsageAverage = 0.0
  606. }
  607. }
  608. }
  609. func applyCPUCoresUsedMax(podMap map[podKey]*Pod, resCPUCoresUsedMax []*prom.QueryResult) {
  610. for _, res := range resCPUCoresUsedMax {
  611. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  612. if err != nil {
  613. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max result missing field: %s", err)
  614. continue
  615. }
  616. pod, ok := podMap[key]
  617. if !ok {
  618. continue
  619. }
  620. container, err := res.GetString("container")
  621. if container == "" || err != nil {
  622. container, err = res.GetString("container_name")
  623. if err != nil {
  624. log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU usage max query result missing 'container': %s", key)
  625. continue
  626. }
  627. }
  628. if _, ok := pod.Allocations[container]; !ok {
  629. pod.AppendContainer(container)
  630. }
  631. if pod.Allocations[container].RawAllocationOnly == nil {
  632. pod.Allocations[container].RawAllocationOnly = &kubecost.RawAllocationOnlyData{
  633. CPUCoreUsageMax: res.Values[0].Value,
  634. }
  635. } else {
  636. pod.Allocations[container].RawAllocationOnly.CPUCoreUsageMax = res.Values[0].Value
  637. }
  638. }
  639. }
  640. func applyRAMBytesAllocated(podMap map[podKey]*Pod, resRAMBytesAllocated []*prom.QueryResult) {
  641. for _, res := range resRAMBytesAllocated {
  642. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  643. if err != nil {
  644. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM allocation result missing field: %s", err)
  645. continue
  646. }
  647. pod, ok := podMap[key]
  648. if !ok {
  649. continue
  650. }
  651. container, err := res.GetString("container")
  652. if err != nil {
  653. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM allocation query result missing 'container': %s", key)
  654. continue
  655. }
  656. if _, ok := pod.Allocations[container]; !ok {
  657. pod.AppendContainer(container)
  658. }
  659. ramBytes := res.Values[0].Value
  660. hours := pod.Allocations[container].Minutes() / 60.0
  661. pod.Allocations[container].RAMByteHours = ramBytes * hours
  662. node, err := res.GetString("node")
  663. if err != nil {
  664. log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing 'node': %s", key)
  665. continue
  666. }
  667. pod.Allocations[container].Properties.Node = node
  668. }
  669. }
  670. func applyRAMBytesRequested(podMap map[podKey]*Pod, resRAMBytesRequested []*prom.QueryResult) {
  671. for _, res := range resRAMBytesRequested {
  672. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  673. if err != nil {
  674. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM request result missing field: %s", err)
  675. continue
  676. }
  677. pod, ok := podMap[key]
  678. if !ok {
  679. continue
  680. }
  681. container, err := res.GetString("container")
  682. if err != nil {
  683. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM request query result missing 'container': %s", key)
  684. continue
  685. }
  686. if _, ok := pod.Allocations[container]; !ok {
  687. pod.AppendContainer(container)
  688. }
  689. pod.Allocations[container].RAMBytesRequestAverage = res.Values[0].Value
  690. // If RAM allocation is less than requests, set RAMByteHours to
  691. // request level.
  692. if pod.Allocations[container].RAMBytes() < res.Values[0].Value {
  693. pod.Allocations[container].RAMByteHours = res.Values[0].Value * (pod.Allocations[container].Minutes() / 60.0)
  694. }
  695. node, err := res.GetString("node")
  696. if err != nil {
  697. log.Warningf("CostModel.ComputeAllocation: RAM request query result missing 'node': %s", key)
  698. continue
  699. }
  700. pod.Allocations[container].Properties.Node = node
  701. }
  702. }
  703. func applyRAMBytesUsedAvg(podMap map[podKey]*Pod, resRAMBytesUsedAvg []*prom.QueryResult) {
  704. for _, res := range resRAMBytesUsedAvg {
  705. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  706. if err != nil {
  707. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM avg usage result missing field: %s", err)
  708. continue
  709. }
  710. pod, ok := podMap[key]
  711. if !ok {
  712. continue
  713. }
  714. container, err := res.GetString("container")
  715. if container == "" || err != nil {
  716. container, err = res.GetString("container_name")
  717. if err != nil {
  718. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage avg query result missing 'container': %s", key)
  719. continue
  720. }
  721. }
  722. if _, ok := pod.Allocations[container]; !ok {
  723. pod.AppendContainer(container)
  724. }
  725. pod.Allocations[container].RAMBytesUsageAverage = res.Values[0].Value
  726. }
  727. }
  728. func applyRAMBytesUsedMax(podMap map[podKey]*Pod, resRAMBytesUsedMax []*prom.QueryResult) {
  729. for _, res := range resRAMBytesUsedMax {
  730. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  731. if err != nil {
  732. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max result missing field: %s", err)
  733. continue
  734. }
  735. pod, ok := podMap[key]
  736. if !ok {
  737. continue
  738. }
  739. container, err := res.GetString("container")
  740. if container == "" || err != nil {
  741. container, err = res.GetString("container_name")
  742. if err != nil {
  743. log.DedupedWarningf(10, "CostModel.ComputeAllocation: RAM usage max query result missing 'container': %s", key)
  744. continue
  745. }
  746. }
  747. if _, ok := pod.Allocations[container]; !ok {
  748. pod.AppendContainer(container)
  749. }
  750. if pod.Allocations[container].RawAllocationOnly == nil {
  751. pod.Allocations[container].RawAllocationOnly = &kubecost.RawAllocationOnlyData{
  752. RAMBytesUsageMax: res.Values[0].Value,
  753. }
  754. } else {
  755. pod.Allocations[container].RawAllocationOnly.RAMBytesUsageMax = res.Values[0].Value
  756. }
  757. }
  758. }
  759. func applyGPUsAllocated(podMap map[podKey]*Pod, resGPUsRequested []*prom.QueryResult, resGPUsAllocated []*prom.QueryResult) {
  760. if len(resGPUsAllocated) > 0 { // Use the new query, when it's become available in a window
  761. resGPUsRequested = resGPUsAllocated
  762. }
  763. for _, res := range resGPUsRequested {
  764. key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  765. if err != nil {
  766. log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU request result missing field: %s", err)
  767. continue
  768. }
  769. pod, ok := podMap[key]
  770. if !ok {
  771. continue
  772. }
  773. container, err := res.GetString("container")
  774. if err != nil {
  775. log.DedupedWarningf(10, "CostModel.ComputeAllocation: GPU request query result missing 'container': %s", key)
  776. continue
  777. }
  778. if _, ok := pod.Allocations[container]; !ok {
  779. pod.AppendContainer(container)
  780. }
  781. hrs := pod.Allocations[container].Minutes() / 60.0
  782. pod.Allocations[container].GPUHours = res.Values[0].Value * hrs
  783. }
  784. }
  785. func applyNetworkTotals(podMap map[podKey]*Pod, resNetworkTransferBytes []*prom.QueryResult, resNetworkReceiveBytes []*prom.QueryResult) {
  786. for _, res := range resNetworkTransferBytes {
  787. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  788. if err != nil {
  789. log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Transfer Bytes query result missing field: %s", err)
  790. continue
  791. }
  792. pod, ok := podMap[podKey]
  793. if !ok {
  794. continue
  795. }
  796. for _, alloc := range pod.Allocations {
  797. alloc.NetworkTransferBytes = res.Values[0].Value / float64(len(pod.Allocations))
  798. }
  799. }
  800. for _, res := range resNetworkReceiveBytes {
  801. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  802. if err != nil {
  803. log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network Receive Bytes query result missing field: %s", err)
  804. continue
  805. }
  806. pod, ok := podMap[podKey]
  807. if !ok {
  808. continue
  809. }
  810. for _, alloc := range pod.Allocations {
  811. alloc.NetworkReceiveBytes = res.Values[0].Value / float64(len(pod.Allocations))
  812. }
  813. }
  814. }
  815. func applyNetworkAllocation(podMap map[podKey]*Pod, resNetworkGiB []*prom.QueryResult, resNetworkCostPerGiB []*prom.QueryResult) {
  816. costPerGiBByCluster := map[string]float64{}
  817. for _, res := range resNetworkCostPerGiB {
  818. cluster, err := res.GetString(env.GetPromClusterLabel())
  819. if err != nil {
  820. cluster = env.GetClusterID()
  821. }
  822. costPerGiBByCluster[cluster] = res.Values[0].Value
  823. }
  824. for _, res := range resNetworkGiB {
  825. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  826. if err != nil {
  827. log.DedupedWarningf(10, "CostModel.ComputeAllocation: Network allocation query result missing field: %s", err)
  828. continue
  829. }
  830. pod, ok := podMap[podKey]
  831. if !ok {
  832. continue
  833. }
  834. for _, alloc := range pod.Allocations {
  835. gib := res.Values[0].Value / float64(len(pod.Allocations))
  836. costPerGiB := costPerGiBByCluster[podKey.Cluster]
  837. alloc.NetworkCost = gib * costPerGiB
  838. }
  839. }
  840. }
  841. func resToNamespaceLabels(resNamespaceLabels []*prom.QueryResult) map[namespaceKey]map[string]string {
  842. namespaceLabels := map[namespaceKey]map[string]string{}
  843. for _, res := range resNamespaceLabels {
  844. nsKey, err := resultNamespaceKey(res, env.GetPromClusterLabel(), "namespace")
  845. if err != nil {
  846. continue
  847. }
  848. if _, ok := namespaceLabels[nsKey]; !ok {
  849. namespaceLabels[nsKey] = map[string]string{}
  850. }
  851. for k, l := range res.GetLabels() {
  852. namespaceLabels[nsKey][k] = l
  853. }
  854. }
  855. return namespaceLabels
  856. }
  857. func resToPodLabels(resPodLabels []*prom.QueryResult) map[podKey]map[string]string {
  858. podLabels := map[podKey]map[string]string{}
  859. for _, res := range resPodLabels {
  860. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  861. if err != nil {
  862. continue
  863. }
  864. if _, ok := podLabels[podKey]; !ok {
  865. podLabels[podKey] = map[string]string{}
  866. }
  867. for k, l := range res.GetLabels() {
  868. podLabels[podKey][k] = l
  869. }
  870. }
  871. return podLabels
  872. }
  873. func resToNamespaceAnnotations(resNamespaceAnnotations []*prom.QueryResult) map[string]map[string]string {
  874. namespaceAnnotations := map[string]map[string]string{}
  875. for _, res := range resNamespaceAnnotations {
  876. namespace, err := res.GetString("namespace")
  877. if err != nil {
  878. continue
  879. }
  880. if _, ok := namespaceAnnotations[namespace]; !ok {
  881. namespaceAnnotations[namespace] = map[string]string{}
  882. }
  883. for k, l := range res.GetAnnotations() {
  884. namespaceAnnotations[namespace][k] = l
  885. }
  886. }
  887. return namespaceAnnotations
  888. }
  889. func resToPodAnnotations(resPodAnnotations []*prom.QueryResult) map[podKey]map[string]string {
  890. podAnnotations := map[podKey]map[string]string{}
  891. for _, res := range resPodAnnotations {
  892. podKey, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")
  893. if err != nil {
  894. continue
  895. }
  896. if _, ok := podAnnotations[podKey]; !ok {
  897. podAnnotations[podKey] = map[string]string{}
  898. }
  899. for k, l := range res.GetAnnotations() {
  900. podAnnotations[podKey][k] = l
  901. }
  902. }
  903. return podAnnotations
  904. }
  905. func applyLabels(podMap map[podKey]*Pod, namespaceLabels map[namespaceKey]map[string]string, podLabels map[podKey]map[string]string) {
  906. for podKey, pod := range podMap {
  907. for _, alloc := range pod.Allocations {
  908. allocLabels := alloc.Properties.Labels
  909. if allocLabels == nil {
  910. allocLabels = make(map[string]string)
  911. }
  912. // Apply namespace labels first, then pod labels so that pod labels
  913. // overwrite namespace labels.
  914. nsKey := newNamespaceKey(podKey.Cluster, podKey.Namespace)
  915. if labels, ok := namespaceLabels[nsKey]; ok {
  916. for k, v := range labels {
  917. allocLabels[k] = v
  918. }
  919. }
  920. if labels, ok := podLabels[podKey]; ok {
  921. for k, v := range labels {
  922. allocLabels[k] = v
  923. }
  924. }
  925. alloc.Properties.Labels = allocLabels
  926. }
  927. }
  928. }
  929. func applyAnnotations(podMap map[podKey]*Pod, namespaceAnnotations map[string]map[string]string, podAnnotations map[podKey]map[string]string) {
  930. for key, pod := range podMap {
  931. for _, alloc := range pod.Allocations {
  932. allocAnnotations := alloc.Properties.Annotations
  933. if allocAnnotations == nil {
  934. allocAnnotations = make(map[string]string)
  935. }
  936. // Apply namespace annotations first, then pod annotations so that
  937. // pod labels overwrite namespace labels.
  938. if labels, ok := namespaceAnnotations[key.Namespace]; ok {
  939. for k, v := range labels {
  940. allocAnnotations[k] = v
  941. }
  942. }
  943. if labels, ok := podAnnotations[key]; ok {
  944. for k, v := range labels {
  945. allocAnnotations[k] = v
  946. }
  947. }
  948. alloc.Properties.Annotations = allocAnnotations
  949. }
  950. }
  951. }
  952. func getServiceLabels(resServiceLabels []*prom.QueryResult) map[serviceKey]map[string]string {
  953. serviceLabels := map[serviceKey]map[string]string{}
  954. for _, res := range resServiceLabels {
  955. serviceKey, err := resultServiceKey(res, env.GetPromClusterLabel(), "namespace", "service")
  956. if err != nil {
  957. continue
  958. }
  959. if _, ok := serviceLabels[serviceKey]; !ok {
  960. serviceLabels[serviceKey] = map[string]string{}
  961. }
  962. for k, l := range res.GetLabels() {
  963. serviceLabels[serviceKey][k] = l
  964. }
  965. }
  966. // Prune duplicate services. That is, if the same service exists with
  967. // hyphens instead of underscores, keep the one that uses hyphens.
  968. for key := range serviceLabels {
  969. if strings.Contains(key.Service, "_") {
  970. duplicateService := strings.Replace(key.Service, "_", "-", -1)
  971. duplicateKey := newServiceKey(key.Cluster, key.Namespace, duplicateService)
  972. if _, ok := serviceLabels[duplicateKey]; ok {
  973. delete(serviceLabels, key)
  974. }
  975. }
  976. }
  977. return serviceLabels
  978. }
  979. func resToDeploymentLabels(resDeploymentLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  980. deploymentLabels := map[controllerKey]map[string]string{}
  981. for _, res := range resDeploymentLabels {
  982. controllerKey, err := resultDeploymentKey(res, env.GetPromClusterLabel(), "namespace", "deployment")
  983. if err != nil {
  984. continue
  985. }
  986. if _, ok := deploymentLabels[controllerKey]; !ok {
  987. deploymentLabels[controllerKey] = map[string]string{}
  988. }
  989. for k, l := range res.GetLabels() {
  990. deploymentLabels[controllerKey][k] = l
  991. }
  992. }
  993. // Prune duplicate deployments. That is, if the same deployment exists with
  994. // hyphens instead of underscores, keep the one that uses hyphens.
  995. for key := range deploymentLabels {
  996. if strings.Contains(key.Controller, "_") {
  997. duplicateController := strings.Replace(key.Controller, "_", "-", -1)
  998. duplicateKey := newControllerKey(key.Cluster, key.Namespace, key.ControllerKind, duplicateController)
  999. if _, ok := deploymentLabels[duplicateKey]; ok {
  1000. delete(deploymentLabels, key)
  1001. }
  1002. }
  1003. }
  1004. return deploymentLabels
  1005. }
  1006. func resToStatefulSetLabels(resStatefulSetLabels []*prom.QueryResult) map[controllerKey]map[string]string {
  1007. statefulSetLabels := map[controllerKey]map[string]string{}
  1008. for _, res := range resStatefulSetLabels {
  1009. controllerKey, err := resultStatefulSetKey(res, env.GetPromClusterLabel(), "namespace", "statefulSet")
  1010. if err != nil {
  1011. continue
  1012. }
  1013. if _, ok := statefulSetLabels[controllerKey]; !ok {
  1014. statefulSetLabels[controllerKey] = map[string]string{}
  1015. }
  1016. for k, l := range res.GetLabels() {
  1017. statefulSetLabels[controllerKey][k] = l
  1018. }
  1019. }
  1020. // Prune duplicate stateful sets. That is, if the same stateful set exists
  1021. // with hyphens instead of underscores, keep the one that uses hyphens.
  1022. for key := range statefulSetLabels {
  1023. if strings.Contains(key.Controller, "_") {
  1024. duplicateController := strings.Replace(key.Controller, "_", "-", -1)
  1025. duplicateKey := newControllerKey(key.Cluster, key.Namespace, key.ControllerKind, duplicateController)
  1026. if _, ok := statefulSetLabels[duplicateKey]; ok {
  1027. delete(statefulSetLabels, key)
  1028. }
  1029. }
  1030. }
  1031. return statefulSetLabels
  1032. }
  1033. func labelsToPodControllerMap(podLabels map[podKey]map[string]string, controllerLabels map[controllerKey]map[string]string) map[podKey]controllerKey {
  1034. podControllerMap := map[podKey]controllerKey{}
  1035. // For each controller, turn the labels into a selector and attempt to
  1036. // match it with each set of pod labels. A match indicates that the pod
  1037. // belongs to the controller.
  1038. for cKey, cLabels := range controllerLabels {
  1039. selector := labels.Set(cLabels).AsSelectorPreValidated()
  1040. for pKey, pLabels := range podLabels {
  1041. // If the pod is in a different cluster or namespace, there is
  1042. // no need to compare the labels.
  1043. if cKey.Cluster != pKey.Cluster || cKey.Namespace != pKey.Namespace {
  1044. continue
  1045. }
  1046. podLabelSet := labels.Set(pLabels)
  1047. if selector.Matches(podLabelSet) {
  1048. if _, ok := podControllerMap[pKey]; ok {
  1049. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PodControllerMap match already exists: %s matches %s and %s", pKey, podControllerMap[pKey], cKey)
  1050. }
  1051. podControllerMap[pKey] = cKey
  1052. }
  1053. }
  1054. }
  1055. return podControllerMap
  1056. }
  1057. func resToPodDaemonSetMap(resDaemonSetLabels []*prom.QueryResult) map[podKey]controllerKey {
  1058. daemonSetLabels := map[podKey]controllerKey{}
  1059. for _, res := range resDaemonSetLabels {
  1060. controllerKey, err := resultDaemonSetKey(res, env.GetPromClusterLabel(), "namespace", "owner_name")
  1061. if err != nil {
  1062. continue
  1063. }
  1064. pod, err := res.GetString("pod")
  1065. if err != nil {
  1066. log.Warningf("CostModel.ComputeAllocation: DaemonSetLabel result without pod: %s", controllerKey)
  1067. }
  1068. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  1069. daemonSetLabels[podKey] = controllerKey
  1070. }
  1071. return daemonSetLabels
  1072. }
  1073. func resToPodJobMap(resJobLabels []*prom.QueryResult) map[podKey]controllerKey {
  1074. jobLabels := map[podKey]controllerKey{}
  1075. for _, res := range resJobLabels {
  1076. controllerKey, err := resultJobKey(res, env.GetPromClusterLabel(), "namespace", "owner_name")
  1077. if err != nil {
  1078. continue
  1079. }
  1080. // Convert the name of Jobs generated by CronJobs to the name of the
  1081. // CronJob by stripping the timestamp off the end.
  1082. match := isCron.FindStringSubmatch(controllerKey.Controller)
  1083. if match != nil {
  1084. controllerKey.Controller = match[1]
  1085. }
  1086. pod, err := res.GetString("pod")
  1087. if err != nil {
  1088. log.Warningf("CostModel.ComputeAllocation: JobLabel result without pod: %s", controllerKey)
  1089. }
  1090. podKey := newPodKey(controllerKey.Cluster, controllerKey.Namespace, pod)
  1091. jobLabels[podKey] = controllerKey
  1092. }
  1093. return jobLabels
  1094. }
  1095. func applyServicesToPods(podMap map[podKey]*Pod, podLabels map[podKey]map[string]string, allocsByService map[serviceKey][]*kubecost.Allocation, serviceLabels map[serviceKey]map[string]string) {
  1096. podServicesMap := map[podKey][]serviceKey{}
  1097. // For each service, turn the labels into a selector and attempt to
  1098. // match it with each set of pod labels. A match indicates that the pod
  1099. // belongs to the service.
  1100. for sKey, sLabels := range serviceLabels {
  1101. selector := labels.Set(sLabels).AsSelectorPreValidated()
  1102. for pKey, pLabels := range podLabels {
  1103. // If the pod is in a different cluster or namespace, there is
  1104. // no need to compare the labels.
  1105. if sKey.Cluster != pKey.Cluster || sKey.Namespace != pKey.Namespace {
  1106. continue
  1107. }
  1108. podLabelSet := labels.Set(pLabels)
  1109. if selector.Matches(podLabelSet) {
  1110. if _, ok := podServicesMap[pKey]; !ok {
  1111. podServicesMap[pKey] = []serviceKey{}
  1112. }
  1113. podServicesMap[pKey] = append(podServicesMap[pKey], sKey)
  1114. }
  1115. }
  1116. }
  1117. // For each allocation in each pod, attempt to find and apply the list of
  1118. // services associated with the allocation's pod.
  1119. for key, pod := range podMap {
  1120. for _, alloc := range pod.Allocations {
  1121. if sKeys, ok := podServicesMap[key]; ok {
  1122. services := []string{}
  1123. for _, sKey := range sKeys {
  1124. services = append(services, sKey.Service)
  1125. allocsByService[sKey] = append(allocsByService[sKey], alloc)
  1126. }
  1127. alloc.Properties.Services = services
  1128. }
  1129. }
  1130. }
  1131. }
  1132. func applyControllersToPods(podMap map[podKey]*Pod, podControllerMap map[podKey]controllerKey) {
  1133. for key, pod := range podMap {
  1134. for _, alloc := range pod.Allocations {
  1135. if controllerKey, ok := podControllerMap[key]; ok {
  1136. alloc.Properties.ControllerKind = controllerKey.ControllerKind
  1137. alloc.Properties.Controller = controllerKey.Controller
  1138. }
  1139. }
  1140. }
  1141. }
  1142. func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerCPUHr []*prom.QueryResult) {
  1143. for _, res := range resNodeCostPerCPUHr {
  1144. cluster, err := res.GetString(env.GetPromClusterLabel())
  1145. if err != nil {
  1146. cluster = env.GetClusterID()
  1147. }
  1148. node, err := res.GetString("node")
  1149. if err != nil {
  1150. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  1151. continue
  1152. }
  1153. instanceType, err := res.GetString("instance_type")
  1154. if err != nil {
  1155. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  1156. continue
  1157. }
  1158. providerID, err := res.GetString("provider_id")
  1159. if err != nil {
  1160. log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
  1161. continue
  1162. }
  1163. key := newNodeKey(cluster, node)
  1164. if _, ok := nodeMap[key]; !ok {
  1165. nodeMap[key] = &NodePricing{
  1166. Name: node,
  1167. NodeType: instanceType,
  1168. ProviderID: cloud.ParseID(providerID),
  1169. }
  1170. }
  1171. nodeMap[key].CostPerCPUHr = res.Values[0].Value
  1172. }
  1173. }
  1174. func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
  1175. for _, res := range resNodeCostPerRAMGiBHr {
  1176. cluster, err := res.GetString(env.GetPromClusterLabel())
  1177. if err != nil {
  1178. cluster = env.GetClusterID()
  1179. }
  1180. node, err := res.GetString("node")
  1181. if err != nil {
  1182. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  1183. continue
  1184. }
  1185. instanceType, err := res.GetString("instance_type")
  1186. if err != nil {
  1187. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  1188. continue
  1189. }
  1190. providerID, err := res.GetString("provider_id")
  1191. if err != nil {
  1192. log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
  1193. continue
  1194. }
  1195. key := newNodeKey(cluster, node)
  1196. if _, ok := nodeMap[key]; !ok {
  1197. nodeMap[key] = &NodePricing{
  1198. Name: node,
  1199. NodeType: instanceType,
  1200. ProviderID: cloud.ParseID(providerID),
  1201. }
  1202. }
  1203. nodeMap[key].CostPerRAMGiBHr = res.Values[0].Value
  1204. }
  1205. }
  1206. func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*NodePricing, resNodeCostPerGPUHr []*prom.QueryResult) {
  1207. for _, res := range resNodeCostPerGPUHr {
  1208. cluster, err := res.GetString(env.GetPromClusterLabel())
  1209. if err != nil {
  1210. cluster = env.GetClusterID()
  1211. }
  1212. node, err := res.GetString("node")
  1213. if err != nil {
  1214. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  1215. continue
  1216. }
  1217. instanceType, err := res.GetString("instance_type")
  1218. if err != nil {
  1219. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  1220. continue
  1221. }
  1222. providerID, err := res.GetString("provider_id")
  1223. if err != nil {
  1224. log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
  1225. continue
  1226. }
  1227. key := newNodeKey(cluster, node)
  1228. if _, ok := nodeMap[key]; !ok {
  1229. nodeMap[key] = &NodePricing{
  1230. Name: node,
  1231. NodeType: instanceType,
  1232. ProviderID: cloud.ParseID(providerID),
  1233. }
  1234. }
  1235. nodeMap[key].CostPerGPUHr = res.Values[0].Value
  1236. }
  1237. }
  1238. func applyNodeSpot(nodeMap map[nodeKey]*NodePricing, resNodeIsSpot []*prom.QueryResult) {
  1239. for _, res := range resNodeIsSpot {
  1240. cluster, err := res.GetString(env.GetPromClusterLabel())
  1241. if err != nil {
  1242. cluster = env.GetClusterID()
  1243. }
  1244. node, err := res.GetString("node")
  1245. if err != nil {
  1246. log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
  1247. continue
  1248. }
  1249. key := newNodeKey(cluster, node)
  1250. if _, ok := nodeMap[key]; !ok {
  1251. log.Warningf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
  1252. continue
  1253. }
  1254. nodeMap[key].Preemptible = res.Values[0].Value > 0
  1255. }
  1256. }
  1257. func applyNodeDiscount(nodeMap map[nodeKey]*NodePricing, cm *CostModel) {
  1258. if cm == nil {
  1259. return
  1260. }
  1261. c, err := cm.Provider.GetConfig()
  1262. if err != nil {
  1263. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  1264. return
  1265. }
  1266. discount, err := ParsePercentString(c.Discount)
  1267. if err != nil {
  1268. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  1269. return
  1270. }
  1271. negotiatedDiscount, err := ParsePercentString(c.NegotiatedDiscount)
  1272. if err != nil {
  1273. log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
  1274. return
  1275. }
  1276. for _, node := range nodeMap {
  1277. // TODO GKE Reserved Instances into account
  1278. node.Discount = cm.Provider.CombinedDiscountForNode(node.NodeType, node.Preemptible, discount, negotiatedDiscount)
  1279. node.CostPerCPUHr *= (1.0 - node.Discount)
  1280. node.CostPerRAMGiBHr *= (1.0 - node.Discount)
  1281. }
  1282. }
  1283. func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
  1284. for _, res := range resPVCostPerGiBHour {
  1285. cluster, err := res.GetString(env.GetPromClusterLabel())
  1286. if err != nil {
  1287. cluster = env.GetClusterID()
  1288. }
  1289. name, err := res.GetString("volumename")
  1290. if err != nil {
  1291. log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
  1292. continue
  1293. }
  1294. key := newPVKey(cluster, name)
  1295. pvMap[key] = &PV{
  1296. Cluster: cluster,
  1297. Name: name,
  1298. CostPerGiBHour: res.Values[0].Value,
  1299. }
  1300. }
  1301. }
  1302. func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
  1303. for _, res := range resPVBytes {
  1304. key, err := resultPVKey(res, env.GetPromClusterLabel(), "persistentvolume")
  1305. if err != nil {
  1306. log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
  1307. continue
  1308. }
  1309. if _, ok := pvMap[key]; !ok {
  1310. log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
  1311. continue
  1312. }
  1313. pvMap[key].Bytes = res.Values[0].Value
  1314. }
  1315. }
  1316. func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey]*PV, resPVCInfo []*prom.QueryResult) {
  1317. for _, res := range resPVCInfo {
  1318. cluster, err := res.GetString(env.GetPromClusterLabel())
  1319. if err != nil {
  1320. cluster = env.GetClusterID()
  1321. }
  1322. values, err := res.GetStrings("persistentvolumeclaim", "storageclass", "volumename", "namespace")
  1323. if err != nil {
  1324. log.DedupedWarningf(10, "CostModel.ComputeAllocation: PVC info query result missing field: %s", err)
  1325. continue
  1326. }
  1327. namespace := values["namespace"]
  1328. name := values["persistentvolumeclaim"]
  1329. volume := values["volumename"]
  1330. storageClass := values["storageclass"]
  1331. pvKey := newPVKey(cluster, volume)
  1332. pvcKey := newPVCKey(cluster, namespace, name)
  1333. // pvcStart and pvcEnd are the timestamps of the first and last minutes
  1334. // the PVC was running, respectively. We subtract 1m from pvcStart
  1335. // because this point will actually represent the end of the first
  1336. // minute. We don't subtract from pvcEnd because it already represents
  1337. // the end of the last minute.
  1338. var pvcStart, pvcEnd time.Time
  1339. for _, datum := range res.Values {
  1340. t := time.Unix(int64(datum.Timestamp), 0)
  1341. if pvcStart.IsZero() && datum.Value > 0 && window.Contains(t) {
  1342. pvcStart = t
  1343. }
  1344. if datum.Value > 0 && window.Contains(t) {
  1345. pvcEnd = t
  1346. }
  1347. }
  1348. if pvcStart.IsZero() || pvcEnd.IsZero() {
  1349. log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
  1350. }
  1351. pvcStart = pvcStart.Add(-time.Minute)
  1352. if _, ok := pvMap[pvKey]; !ok {
  1353. continue
  1354. }
  1355. pvMap[pvKey].StorageClass = storageClass
  1356. if _, ok := pvcMap[pvcKey]; !ok {
  1357. pvcMap[pvcKey] = &PVC{}
  1358. }
  1359. pvcMap[pvcKey].Name = name
  1360. pvcMap[pvcKey].Namespace = namespace
  1361. pvcMap[pvcKey].Volume = pvMap[pvKey]
  1362. pvcMap[pvcKey].Start = pvcStart
  1363. pvcMap[pvcKey].End = pvcEnd
  1364. }
  1365. }
  1366. func applyPVCBytesRequested(pvcMap map[pvcKey]*PVC, resPVCBytesRequested []*prom.QueryResult) {
  1367. for _, res := range resPVCBytesRequested {
  1368. key, err := resultPVCKey(res, env.GetPromClusterLabel(), "namespace", "persistentvolumeclaim")
  1369. if err != nil {
  1370. continue
  1371. }
  1372. if _, ok := pvcMap[key]; !ok {
  1373. continue
  1374. }
  1375. pvcMap[key].Bytes = res.Values[0].Value
  1376. }
  1377. }
  1378. func buildPodPVCMap(podPVCMap map[podKey][]*PVC, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC, podMap map[podKey]*Pod, resPodPVCAllocation []*prom.QueryResult) {
  1379. for _, res := range resPodPVCAllocation {
  1380. cluster, err := res.GetString(env.GetPromClusterLabel())
  1381. if err != nil {
  1382. cluster = env.GetClusterID()
  1383. }
  1384. values, err := res.GetStrings("persistentvolume", "persistentvolumeclaim", "pod", "namespace")
  1385. if err != nil {
  1386. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PVC allocation query result missing field: %s", err)
  1387. continue
  1388. }
  1389. namespace := values["namespace"]
  1390. pod := values["pod"]
  1391. name := values["persistentvolumeclaim"]
  1392. volume := values["persistentvolume"]
  1393. podKey := newPodKey(cluster, namespace, pod)
  1394. pvKey := newPVKey(cluster, volume)
  1395. pvcKey := newPVCKey(cluster, namespace, name)
  1396. if _, ok := pvMap[pvKey]; !ok {
  1397. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PV missing for PVC allocation query result: %s", pvKey)
  1398. continue
  1399. }
  1400. if _, ok := podPVCMap[podKey]; !ok {
  1401. podPVCMap[podKey] = []*PVC{}
  1402. }
  1403. pvc, ok := pvcMap[pvcKey]
  1404. if !ok {
  1405. log.DedupedWarningf(5, "CostModel.ComputeAllocation: PVC missing for PVC allocation query: %s", pvcKey)
  1406. continue
  1407. }
  1408. count := 1
  1409. if pod, ok := podMap[podKey]; ok && len(pod.Allocations) > 0 {
  1410. count = len(pod.Allocations)
  1411. } else {
  1412. log.DedupedWarningf(10, "CostModel.ComputeAllocation: PVC %s for missing pod %s", pvcKey, podKey)
  1413. }
  1414. pvc.Count = count
  1415. pvc.Mounted = true
  1416. podPVCMap[podKey] = append(podPVCMap[podKey], pvc)
  1417. }
  1418. }
  1419. func applyUnmountedPVs(window kubecost.Window, podMap map[podKey]*Pod, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC) {
  1420. unmountedPVBytes := map[string]float64{}
  1421. unmountedPVCost := map[string]float64{}
  1422. for _, pv := range pvMap {
  1423. mounted := false
  1424. for _, pvc := range pvcMap {
  1425. if pvc.Volume == nil {
  1426. continue
  1427. }
  1428. if pvc.Volume == pv {
  1429. mounted = true
  1430. break
  1431. }
  1432. }
  1433. if !mounted {
  1434. gib := pv.Bytes / 1024 / 1024 / 1024
  1435. hrs := window.Minutes() / 60.0 // TODO improve with PV hours, not window hours
  1436. cost := pv.CostPerGiBHour * gib * hrs
  1437. unmountedPVCost[pv.Cluster] += cost
  1438. unmountedPVBytes[pv.Cluster] += pv.Bytes
  1439. }
  1440. }
  1441. for cluster, amount := range unmountedPVCost {
  1442. container := kubecost.UnmountedSuffix
  1443. pod := kubecost.UnmountedSuffix
  1444. namespace := kubecost.UnmountedSuffix
  1445. node := ""
  1446. key := newPodKey(cluster, namespace, pod)
  1447. podMap[key] = &Pod{
  1448. Window: window.Clone(),
  1449. Start: *window.Start(),
  1450. End: *window.End(),
  1451. Key: key,
  1452. Allocations: map[string]*kubecost.Allocation{},
  1453. }
  1454. podMap[key].AppendContainer(container)
  1455. podMap[key].Allocations[container].Properties.Cluster = cluster
  1456. podMap[key].Allocations[container].Properties.Node = node
  1457. podMap[key].Allocations[container].Properties.Namespace = namespace
  1458. podMap[key].Allocations[container].Properties.Pod = pod
  1459. podMap[key].Allocations[container].Properties.Container = container
  1460. pvKey := kubecost.PVKey{
  1461. Cluster: cluster,
  1462. Name: kubecost.UnmountedSuffix,
  1463. }
  1464. unmountedPVs := kubecost.PVAllocations{
  1465. pvKey: {
  1466. ByteHours: unmountedPVBytes[cluster] * window.Minutes() / 60.0,
  1467. Cost: amount,
  1468. },
  1469. }
  1470. podMap[key].Allocations[container].PVs = podMap[key].Allocations[container].PVs.Add(unmountedPVs)
  1471. }
  1472. }
  1473. func applyUnmountedPVCs(window kubecost.Window, podMap map[podKey]*Pod, pvcMap map[pvcKey]*PVC) {
  1474. unmountedPVCBytes := map[namespaceKey]float64{}
  1475. unmountedPVCCost := map[namespaceKey]float64{}
  1476. for _, pvc := range pvcMap {
  1477. if !pvc.Mounted && pvc.Volume != nil {
  1478. key := newNamespaceKey(pvc.Cluster, pvc.Namespace)
  1479. gib := pvc.Volume.Bytes / 1024 / 1024 / 1024
  1480. hrs := pvc.Minutes() / 60.0
  1481. cost := pvc.Volume.CostPerGiBHour * gib * hrs
  1482. unmountedPVCCost[key] += cost
  1483. unmountedPVCBytes[key] += pvc.Volume.Bytes
  1484. }
  1485. }
  1486. for key, amount := range unmountedPVCCost {
  1487. container := kubecost.UnmountedSuffix
  1488. pod := kubecost.UnmountedSuffix
  1489. namespace := key.Namespace
  1490. node := ""
  1491. cluster := key.Cluster
  1492. podKey := newPodKey(cluster, namespace, pod)
  1493. podMap[podKey] = &Pod{
  1494. Window: window.Clone(),
  1495. Start: *window.Start(),
  1496. End: *window.End(),
  1497. Key: podKey,
  1498. Allocations: map[string]*kubecost.Allocation{},
  1499. }
  1500. podMap[podKey].AppendContainer(container)
  1501. podMap[podKey].Allocations[container].Properties.Cluster = cluster
  1502. podMap[podKey].Allocations[container].Properties.Node = node
  1503. podMap[podKey].Allocations[container].Properties.Namespace = namespace
  1504. podMap[podKey].Allocations[container].Properties.Pod = pod
  1505. podMap[podKey].Allocations[container].Properties.Container = container
  1506. pvKey := kubecost.PVKey{
  1507. Cluster: cluster,
  1508. Name: kubecost.UnmountedSuffix,
  1509. }
  1510. unmountedPVs := kubecost.PVAllocations{
  1511. pvKey: {
  1512. ByteHours: unmountedPVCBytes[key] * window.Minutes() / 60.0,
  1513. Cost: amount,
  1514. },
  1515. }
  1516. podMap[podKey].Allocations[container].PVs = podMap[podKey].Allocations[container].PVs.Add(unmountedPVs)
  1517. }
  1518. }
  1519. // LB describes the start and end time of a Load Balancer along with cost
  1520. type LB struct {
  1521. TotalCost float64
  1522. Start time.Time
  1523. End time.Time
  1524. }
  1525. func getLoadBalancerCosts(resLBCost, resLBActiveMins []*prom.QueryResult, resolution time.Duration) map[serviceKey]*LB {
  1526. lbMap := make(map[serviceKey]*LB)
  1527. lbHourlyCosts := make(map[serviceKey]float64)
  1528. for _, res := range resLBCost {
  1529. serviceKey, err := resultServiceKey(res, env.GetPromClusterLabel(), "namespace", "service_name")
  1530. if err != nil {
  1531. continue
  1532. }
  1533. lbHourlyCosts[serviceKey] = res.Values[0].Value
  1534. }
  1535. for _, res := range resLBActiveMins {
  1536. serviceKey, err := resultServiceKey(res, env.GetPromClusterLabel(), "namespace", "service_name")
  1537. if err != nil || len(res.Values) == 0 {
  1538. continue
  1539. }
  1540. if _, ok := lbHourlyCosts[serviceKey]; !ok {
  1541. log.Warningf("CostModel: failed to find hourly cost for Load Balancer: %v", serviceKey)
  1542. continue
  1543. }
  1544. s := time.Unix(int64(res.Values[0].Timestamp), 0)
  1545. // subtract resolution from start time to cover full time period
  1546. s = s.Add(-resolution)
  1547. e := time.Unix(int64(res.Values[len(res.Values)-1].Timestamp), 0)
  1548. hours := e.Sub(s).Hours()
  1549. lbMap[serviceKey] = &LB{
  1550. TotalCost: lbHourlyCosts[serviceKey] * hours,
  1551. Start: s,
  1552. End: e,
  1553. }
  1554. }
  1555. return lbMap
  1556. }
  1557. func applyLoadBalancersToPods(lbMap map[serviceKey]*LB, allocsByService map[serviceKey][]*kubecost.Allocation) {
  1558. for sKey, lb := range lbMap {
  1559. totalHours := 0.0
  1560. allocHours := make(map[*kubecost.Allocation]float64)
  1561. // Add portion of load balancing cost to each allocation
  1562. // proportional to the total number of hours allocations used the load balancer
  1563. for _, alloc := range allocsByService[sKey] {
  1564. // Determine the (start, end) of the relationship between the
  1565. // given LB and the associated Allocation so that a precise
  1566. // number of hours can be used to compute cumulative cost.
  1567. s, e := alloc.Start, alloc.End
  1568. if lb.Start.After(alloc.Start) {
  1569. s = lb.Start
  1570. }
  1571. if lb.End.Before(alloc.End) {
  1572. e = lb.End
  1573. }
  1574. hours := e.Sub(s).Hours()
  1575. // A negative number of hours signifies no overlap between the windows
  1576. if hours > 0 {
  1577. totalHours += hours
  1578. allocHours[alloc] = hours
  1579. }
  1580. }
  1581. // Distribute cost of service once total hours is calculated
  1582. for alloc, hours := range allocHours {
  1583. alloc.LoadBalancerCost += lb.TotalCost * hours / totalHours
  1584. }
  1585. }
  1586. }
  1587. // getNodePricing determines node pricing, given a key and a mapping from keys
  1588. // to their NodePricing instances, as well as the custom pricing configuration
  1589. // inherent to the CostModel instance. If custom pricing is set, use that. If
  1590. // not, use the pricing defined by the given key. If that doesn't exist, fall
  1591. // back on custom pricing as a default.
  1592. func (cm *CostModel) getNodePricing(nodeMap map[nodeKey]*NodePricing, nodeKey nodeKey) *NodePricing {
  1593. // Find the relevant NodePricing, if it exists. If not, substitute the
  1594. // custom NodePricing as a default.
  1595. node, ok := nodeMap[nodeKey]
  1596. if !ok || node == nil {
  1597. if nodeKey.Node != "" {
  1598. log.DedupedWarningf(5, "CostModel: failed to find node for %s", nodeKey)
  1599. }
  1600. return cm.getCustomNodePricing(false)
  1601. }
  1602. // If custom pricing is enabled and can be retrieved, override detected
  1603. // node pricing with the custom values.
  1604. customPricingConfig, err := cm.Provider.GetConfig()
  1605. if err != nil {
  1606. log.Warningf("CostModel: failed to load custom pricing: %s", err)
  1607. }
  1608. if cloud.CustomPricesEnabled(cm.Provider) && customPricingConfig != nil {
  1609. return cm.getCustomNodePricing(node.Preemptible)
  1610. }
  1611. node.Source = "prometheus"
  1612. // If any of the values are NaN or zero, replace them with the custom
  1613. // values as default.
  1614. // TODO:CLEANUP can't we parse these custom prices once? why do we store
  1615. // them as strings like this?
  1616. if node.CostPerCPUHr == 0 || math.IsNaN(node.CostPerCPUHr) {
  1617. log.Warningf("CostModel: node pricing has illegal CostPerCPUHr; replacing with custom pricing: %s", nodeKey)
  1618. cpuCostStr := customPricingConfig.CPU
  1619. if node.Preemptible {
  1620. cpuCostStr = customPricingConfig.SpotCPU
  1621. }
  1622. costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
  1623. if err != nil {
  1624. log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
  1625. }
  1626. node.CostPerCPUHr = costPerCPUHr
  1627. node.Source += "/customCPU"
  1628. }
  1629. if math.IsNaN(node.CostPerGPUHr) {
  1630. log.Warningf("CostModel: node pricing has illegal CostPerGPUHr; replacing with custom pricing: %s", nodeKey)
  1631. gpuCostStr := customPricingConfig.GPU
  1632. if node.Preemptible {
  1633. gpuCostStr = customPricingConfig.SpotGPU
  1634. }
  1635. costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
  1636. if err != nil {
  1637. log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
  1638. }
  1639. node.CostPerGPUHr = costPerGPUHr
  1640. node.Source += "/customGPU"
  1641. }
  1642. if node.CostPerRAMGiBHr == 0 || math.IsNaN(node.CostPerRAMGiBHr) {
  1643. log.Warningf("CostModel: node pricing has illegal CostPerRAMHr; replacing with custom pricing: %s", nodeKey)
  1644. ramCostStr := customPricingConfig.RAM
  1645. if node.Preemptible {
  1646. ramCostStr = customPricingConfig.SpotRAM
  1647. }
  1648. costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
  1649. if err != nil {
  1650. log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
  1651. }
  1652. node.CostPerRAMGiBHr = costPerRAMHr
  1653. node.Source += "/customRAM"
  1654. }
  1655. return node
  1656. }
  1657. // getCustomNodePricing converts the CostModel's configured custom pricing
  1658. // values into a NodePricing instance.
  1659. func (cm *CostModel) getCustomNodePricing(spot bool) *NodePricing {
  1660. customPricingConfig, err := cm.Provider.GetConfig()
  1661. if err != nil {
  1662. return nil
  1663. }
  1664. cpuCostStr := customPricingConfig.CPU
  1665. gpuCostStr := customPricingConfig.GPU
  1666. ramCostStr := customPricingConfig.RAM
  1667. if spot {
  1668. cpuCostStr = customPricingConfig.SpotCPU
  1669. gpuCostStr = customPricingConfig.SpotGPU
  1670. ramCostStr = customPricingConfig.SpotRAM
  1671. }
  1672. node := &NodePricing{Source: "custom"}
  1673. costPerCPUHr, err := strconv.ParseFloat(cpuCostStr, 64)
  1674. if err != nil {
  1675. log.Warningf("CostModel: custom pricing has illegal CPU cost: %s", cpuCostStr)
  1676. }
  1677. node.CostPerCPUHr = costPerCPUHr
  1678. costPerGPUHr, err := strconv.ParseFloat(gpuCostStr, 64)
  1679. if err != nil {
  1680. log.Warningf("CostModel: custom pricing has illegal GPU cost: %s", gpuCostStr)
  1681. }
  1682. node.CostPerGPUHr = costPerGPUHr
  1683. costPerRAMHr, err := strconv.ParseFloat(ramCostStr, 64)
  1684. if err != nil {
  1685. log.Warningf("CostModel: custom pricing has illegal RAM cost: %s", ramCostStr)
  1686. }
  1687. node.CostPerRAMGiBHr = costPerRAMHr
  1688. return node
  1689. }
  1690. // NodePricing describes the resource costs associated with a given node, as
  1691. // well as the source of the information (e.g. prometheus, custom)
  1692. type NodePricing struct {
  1693. Name string
  1694. NodeType string
  1695. ProviderID string
  1696. Preemptible bool
  1697. CostPerCPUHr float64
  1698. CostPerRAMGiBHr float64
  1699. CostPerGPUHr float64
  1700. Discount float64
  1701. Source string
  1702. }
  1703. // Pod describes a running pod's start and end time within a Window and
  1704. // all the Allocations (i.e. containers) contained within it.
  1705. type Pod struct {
  1706. Window kubecost.Window
  1707. Start time.Time
  1708. End time.Time
  1709. Key podKey
  1710. Allocations map[string]*kubecost.Allocation
  1711. }
  1712. // AppendContainer adds an entry for the given container name to the Pod.
  1713. func (p Pod) AppendContainer(container string) {
  1714. name := fmt.Sprintf("%s/%s/%s/%s", p.Key.Cluster, p.Key.Namespace, p.Key.Pod, container)
  1715. alloc := &kubecost.Allocation{
  1716. Name: name,
  1717. Properties: &kubecost.AllocationProperties{},
  1718. Window: p.Window.Clone(),
  1719. Start: p.Start,
  1720. End: p.End,
  1721. }
  1722. alloc.Properties.Container = container
  1723. alloc.Properties.Pod = p.Key.Pod
  1724. alloc.Properties.Namespace = p.Key.Namespace
  1725. alloc.Properties.Cluster = p.Key.Cluster
  1726. p.Allocations[container] = alloc
  1727. }
  1728. // PVC describes a PersistentVolumeClaim
  1729. // TODO:CLEANUP move to pkg/kubecost?
  1730. // TODO:CLEANUP add PersistentVolumeClaims field to type Allocation?
  1731. type PVC struct {
  1732. Bytes float64 `json:"bytes"`
  1733. Count int `json:"count"`
  1734. Name string `json:"name"`
  1735. Cluster string `json:"cluster"`
  1736. Namespace string `json:"namespace"`
  1737. Volume *PV `json:"persistentVolume"`
  1738. Mounted bool `json:"mounted"`
  1739. Start time.Time `json:"start"`
  1740. End time.Time `json:"end"`
  1741. }
  1742. // Cost computes the cumulative cost of the PVC
  1743. func (pvc *PVC) Cost() float64 {
  1744. if pvc == nil || pvc.Volume == nil {
  1745. return 0.0
  1746. }
  1747. gib := pvc.Bytes / 1024 / 1024 / 1024
  1748. hrs := pvc.Minutes() / 60.0
  1749. return pvc.Volume.CostPerGiBHour * gib * hrs
  1750. }
  1751. // Minutes computes the number of minutes over which the PVC is defined
  1752. func (pvc *PVC) Minutes() float64 {
  1753. if pvc == nil {
  1754. return 0.0
  1755. }
  1756. return pvc.End.Sub(pvc.Start).Minutes()
  1757. }
  1758. // String returns a string representation of the PVC
  1759. func (pvc *PVC) String() string {
  1760. if pvc == nil {
  1761. return "<nil>"
  1762. }
  1763. return fmt.Sprintf("%s/%s/%s{Bytes:%.2f, Cost:%.6f, Start,End:%s}", pvc.Cluster, pvc.Namespace, pvc.Name, pvc.Bytes, pvc.Cost(), kubecost.NewWindow(&pvc.Start, &pvc.End))
  1764. }
  1765. // PV describes a PersistentVolume
  1766. // TODO:CLEANUP move to pkg/kubecost?
  1767. type PV struct {
  1768. Bytes float64 `json:"bytes"`
  1769. CostPerGiBHour float64 `json:"costPerGiBHour"`
  1770. Cluster string `json:"cluster"`
  1771. Name string `json:"name"`
  1772. StorageClass string `json:"storageClass"`
  1773. }
  1774. // String returns a string representation of the PV
  1775. func (pv *PV) String() string {
  1776. if pv == nil {
  1777. return "<nil>"
  1778. }
  1779. return fmt.Sprintf("%s/%s{Bytes:%.2f, Cost/GiB*Hr:%.6f, StorageClass:%s}", pv.Cluster, pv.Name, pv.Bytes, pv.CostPerGiBHour, pv.StorageClass)
  1780. }