metricsquerier.go 91 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859
  1. package prom
  2. import (
  3. "fmt"
  4. "time"
  5. "github.com/opencost/opencost/core/pkg/log"
  6. "github.com/opencost/opencost/core/pkg/source"
  7. "github.com/opencost/opencost/core/pkg/util/timeutil"
  8. prometheus "github.com/prometheus/client_golang/api"
  9. )
  10. //--------------------------------------------------------------------------
  11. // PrometheusMetricsQuerier
  12. //--------------------------------------------------------------------------
  13. // PrometheusMetricsQueryLogFormat is the log format used to log metric queries before being sent to the prometheus
  14. // instance
  15. const PrometheusMetricsQueryLogFormat = `[PrometheusMetricsQuerier][%s][At Time: %d]: %s`
  16. // PrometheusMetricsQuerier is the implementation of the data source's MetricsQuerier interface for Prometheus.
  17. type PrometheusMetricsQuerier struct {
  18. promConfig *OpenCostPrometheusConfig
  19. promClient prometheus.Client
  20. promContexts *ContextFactory
  21. }
  22. func newPrometheusMetricsQuerier(
  23. promConfig *OpenCostPrometheusConfig,
  24. promClient prometheus.Client,
  25. promContexts *ContextFactory,
  26. ) *PrometheusMetricsQuerier {
  27. return &PrometheusMetricsQuerier{
  28. promConfig: promConfig,
  29. promClient: promClient,
  30. promContexts: promContexts,
  31. }
  32. }
  33. func (pds *PrometheusMetricsQuerier) QueryPVPricePerGiBHour(start, end time.Time) *source.Future[source.PVPricePerGiBHourResult] {
  34. const queryName = "QueryPVPricePerGiBHour"
  35. const pvCostQuery = `avg(avg_over_time(pv_hourly_cost{%s}[%s])) by (%s, persistentvolume, volumename, uid, provider_id)`
  36. cfg := pds.promConfig
  37. durStr := timeutil.DurationString(end.Sub(start))
  38. if durStr == "" {
  39. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  40. }
  41. queryPVCost := fmt.Sprintf(pvCostQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  42. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCost)
  43. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  44. return source.NewFuture(source.DecodePVPricePerGiBHourResult, ctx.QueryAtTime(queryPVCost, end))
  45. }
  46. func (pds *PrometheusMetricsQuerier) QueryPVUsedAverage(start, end time.Time) *source.Future[source.PVUsedAvgResult] {
  47. const queryName = "QueryPVUsedAverage"
  48. const pvUsedAverageQuery = `avg(avg_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace, uid)`
  49. cfg := pds.promConfig
  50. durStr := timeutil.DurationString(end.Sub(start))
  51. if durStr == "" {
  52. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  53. }
  54. queryPVUsedAvg := fmt.Sprintf(pvUsedAverageQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  55. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVUsedAvg)
  56. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  57. return source.NewFuture(source.DecodePVUsedAvgResult, ctx.QueryAtTime(queryPVUsedAvg, end))
  58. }
  59. func (pds *PrometheusMetricsQuerier) QueryPVUsedMax(start, end time.Time) *source.Future[source.PVUsedMaxResult] {
  60. const queryName = "QueryPVUsedMax"
  61. const pvUsedMaxQuery = `max(max_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace, uid)`
  62. cfg := pds.promConfig
  63. durStr := timeutil.DurationString(end.Sub(start))
  64. if durStr == "" {
  65. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  66. }
  67. queryPVUsedMax := fmt.Sprintf(pvUsedMaxQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  68. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVUsedMax)
  69. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  70. return source.NewFuture(source.DecodePVUsedMaxResult, ctx.QueryAtTime(queryPVUsedMax, end))
  71. }
  72. func (pds *PrometheusMetricsQuerier) QueryPVCInfo(start, end time.Time) *source.Future[source.PVCInfoResult] {
  73. const queryName = "QueryPVCInfo"
  74. const queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != "", %s}) by (persistentvolumeclaim, storageclass, volumename, namespace, uid, %s)[%s:%dm]`
  75. cfg := pds.promConfig
  76. minsPerResolution := cfg.DataResolutionMinutes
  77. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  78. if durStr == "" {
  79. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  80. }
  81. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  82. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCInfo)
  83. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  84. return source.NewFuture(source.DecodePVCInfoResult, ctx.QueryAtTime(queryPVCInfo, end))
  85. }
  86. func (pds *PrometheusMetricsQuerier) QueryPVActiveMinutes(start, end time.Time) *source.Future[source.PVActiveMinutesResult] {
  87. const queryName = "QueryPVActiveMinutes"
  88. const pvActiveMinsQuery = `avg(kube_persistentvolume_capacity_bytes{%s}) by (%s, persistentvolume, uid)[%s:%dm]`
  89. cfg := pds.promConfig
  90. minsPerResolution := cfg.DataResolutionMinutes
  91. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  92. if durStr == "" {
  93. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  94. }
  95. queryPVActiveMins := fmt.Sprintf(pvActiveMinsQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  96. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVActiveMins)
  97. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  98. return source.NewFuture(source.DecodePVActiveMinutesResult, ctx.QueryAtTime(queryPVActiveMins, end))
  99. }
  100. func (pds *PrometheusMetricsQuerier) QueryLocalStorageCost(start, end time.Time) *source.Future[source.LocalStorageCostResult] {
  101. const queryName = "QueryLocalStorageCost"
  102. const localStorageCostQuery = `sum_over_time(sum(container_fs_limit_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`
  103. cfg := pds.promConfig
  104. minsPerResolution := cfg.DataResolutionMinutes
  105. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  106. if durStr == "" {
  107. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  108. }
  109. // hourlyToCumulative is a scaling factor that, when multiplied by an
  110. // hourly value, converts it to a cumulative value; i.e. [$/hr] *
  111. // [min/res]*[hr/min] = [$/res]
  112. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  113. costPerGBHr := 0.04 / 730.0
  114. queryLocalStorageCost := fmt.Sprintf(localStorageCostQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
  115. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageCost)
  116. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  117. return source.NewFuture(source.DecodeLocalStorageCostResult, ctx.QueryAtTime(queryLocalStorageCost, end))
  118. }
  119. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedCost(start, end time.Time) *source.Future[source.LocalStorageUsedCostResult] {
  120. const queryName = "QueryLocalStorageUsedCost"
  121. const localStorageUsedCostQuery = `sum_over_time(sum(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`
  122. cfg := pds.promConfig
  123. minsPerResolution := cfg.DataResolutionMinutes
  124. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  125. if durStr == "" {
  126. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  127. }
  128. // hourlyToCumulative is a scaling factor that, when multiplied by an
  129. // hourly value, converts it to a cumulative value; i.e. [$/hr] *
  130. // [min/res]*[hr/min] = [$/res]
  131. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  132. costPerGBHr := 0.04 / 730.0
  133. queryLocalStorageUsedCost := fmt.Sprintf(localStorageUsedCostQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
  134. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedCost)
  135. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  136. return source.NewFuture(source.DecodeLocalStorageUsedCostResult, ctx.QueryAtTime(queryLocalStorageUsedCost, end))
  137. }
  138. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedAvg(start, end time.Time) *source.Future[source.LocalStorageUsedAvgResult] {
  139. const queryName = "QueryLocalStorageUsedAvg"
  140. const localStorageUsedAvgQuery = `avg(sum(avg_over_time(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}[%s])) by (instance, device, uid, %s, job)) by (instance, device, uid, %s)`
  141. cfg := pds.promConfig
  142. durStr := timeutil.DurationString(end.Sub(start))
  143. if durStr == "" {
  144. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  145. }
  146. queryLocalStorageUsedAvg := fmt.Sprintf(localStorageUsedAvgQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel, cfg.ClusterLabel)
  147. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedAvg)
  148. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  149. return source.NewFuture(source.DecodeLocalStorageUsedAvgResult, ctx.QueryAtTime(queryLocalStorageUsedAvg, end))
  150. }
  151. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedMax(start, end time.Time) *source.Future[source.LocalStorageUsedMaxResult] {
  152. const queryName = "QueryLocalStorageUsedMax"
  153. const localStorageUsedMaxQuery = `max(sum(max_over_time(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}[%s])) by (instance, device, uid, %s, job)) by (instance, device, uid, %s)`
  154. cfg := pds.promConfig
  155. durStr := timeutil.DurationString(end.Sub(start))
  156. if durStr == "" {
  157. panic("failed to parse duration string passed to QueryLocalStorageUsedMax")
  158. }
  159. queryLocalStorageUsedMax := fmt.Sprintf(localStorageUsedMaxQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel, cfg.ClusterLabel)
  160. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedMax)
  161. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  162. return source.NewFuture(source.DecodeLocalStorageUsedMaxResult, ctx.QueryAtTime(queryLocalStorageUsedMax, end))
  163. }
  164. func (pds *PrometheusMetricsQuerier) QueryLocalStorageBytes(start, end time.Time) *source.Future[source.LocalStorageBytesResult] {
  165. const queryName = "QueryLocalStorageBytes"
  166. const localStorageBytesQuery = `avg_over_time(sum(container_fs_limit_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm])`
  167. cfg := pds.promConfig
  168. minsPerResolution := cfg.DataResolutionMinutes
  169. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  170. if durStr == "" {
  171. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  172. }
  173. queryLocalStorageBytes := fmt.Sprintf(localStorageBytesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  174. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageBytes)
  175. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  176. return source.NewFuture(source.DecodeLocalStorageBytesResult, ctx.QueryAtTime(queryLocalStorageBytes, end))
  177. }
  178. func (pds *PrometheusMetricsQuerier) QueryLocalStorageActiveMinutes(start, end time.Time) *source.Future[source.LocalStorageActiveMinutesResult] {
  179. const queryName = "QueryLocalStorageActiveMinutes"
  180. const localStorageActiveMinutesQuery = `count(node_total_hourly_cost{%s}) by (%s, node, uid, instance, provider_id)[%s:%dm]`
  181. cfg := pds.promConfig
  182. minsPerResolution := cfg.DataResolutionMinutes
  183. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  184. if durStr == "" {
  185. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  186. }
  187. queryLocalStorageActiveMins := fmt.Sprintf(localStorageActiveMinutesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  188. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageActiveMins)
  189. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  190. return source.NewFuture(source.DecodeLocalStorageActiveMinutesResult, ctx.QueryAtTime(queryLocalStorageActiveMins, end))
  191. }
  192. func (pds *PrometheusMetricsQuerier) QueryNodeCPUCoresCapacity(start, end time.Time) *source.Future[source.NodeCPUCoresCapacityResult] {
  193. const queryName = "QueryNodeCPUCoresCapacity"
  194. const nodeCPUCoresCapacityQuery = `avg(avg_over_time(kube_node_status_capacity_cpu_cores{%s}[%s])) by (%s, node, uid)`
  195. cfg := pds.promConfig
  196. durStr := timeutil.DurationString(end.Sub(start))
  197. if durStr == "" {
  198. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  199. }
  200. queryNodeCPUCoresCapacity := fmt.Sprintf(nodeCPUCoresCapacityQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  201. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCPUCoresCapacity)
  202. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  203. return source.NewFuture(source.DecodeNodeCPUCoresCapacityResult, ctx.QueryAtTime(queryNodeCPUCoresCapacity, end))
  204. }
  205. func (pds *PrometheusMetricsQuerier) QueryNodeCPUCoresAllocatable(start, end time.Time) *source.Future[source.NodeCPUCoresAllocatableResult] {
  206. const queryName = "QueryNodeCPUCoresAllocatable"
  207. const nodeCPUCoresAllocatableQuery = `avg(avg_over_time(kube_node_status_allocatable_cpu_cores{%s}[%s])) by (%s, node, uid)`
  208. // `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  209. cfg := pds.promConfig
  210. durStr := timeutil.DurationString(end.Sub(start))
  211. if durStr == "" {
  212. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  213. }
  214. queryNodeCPUCoresAllocatable := fmt.Sprintf(nodeCPUCoresAllocatableQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  215. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCPUCoresAllocatable)
  216. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  217. return source.NewFuture(source.DecodeNodeCPUCoresAllocatableResult, ctx.QueryAtTime(queryNodeCPUCoresAllocatable, end))
  218. }
  219. func (pds *PrometheusMetricsQuerier) QueryNodeRAMBytesCapacity(start, end time.Time) *source.Future[source.NodeRAMBytesCapacityResult] {
  220. const queryName = "QueryNodeRAMBytesCapacity"
  221. const nodeRAMBytesCapacityQuery = `avg(avg_over_time(kube_node_status_capacity_memory_bytes{%s}[%s])) by (%s, node, uid)`
  222. cfg := pds.promConfig
  223. durStr := timeutil.DurationString(end.Sub(start))
  224. if durStr == "" {
  225. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  226. }
  227. queryNodeRAMBytesCapacity := fmt.Sprintf(nodeRAMBytesCapacityQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  228. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeRAMBytesCapacity)
  229. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  230. return source.NewFuture(source.DecodeNodeRAMBytesCapacityResult, ctx.QueryAtTime(queryNodeRAMBytesCapacity, end))
  231. }
  232. func (pds *PrometheusMetricsQuerier) QueryNodeRAMBytesAllocatable(start, end time.Time) *source.Future[source.NodeRAMBytesAllocatableResult] {
  233. const queryName = "QueryNodeRAMBytesAllocatable"
  234. const nodeRAMBytesAllocatableQuery = `avg(avg_over_time(kube_node_status_allocatable_memory_bytes{%s}[%s])) by (%s, node, uid)`
  235. cfg := pds.promConfig
  236. durStr := timeutil.DurationString(end.Sub(start))
  237. if durStr == "" {
  238. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  239. }
  240. queryNodeRAMBytesAllocatable := fmt.Sprintf(nodeRAMBytesAllocatableQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  241. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeRAMBytesAllocatable)
  242. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  243. return source.NewFuture(source.DecodeNodeRAMBytesAllocatableResult, ctx.QueryAtTime(queryNodeRAMBytesAllocatable, end))
  244. }
  245. func (pds *PrometheusMetricsQuerier) QueryNodeGPUCount(start, end time.Time) *source.Future[source.NodeGPUCountResult] {
  246. const queryName = "QueryNodeGPUCount"
  247. const nodeGPUCountQuery = `avg(avg_over_time(node_gpu_count{%s}[%s])) by (%s, node, uid, provider_id)`
  248. cfg := pds.promConfig
  249. durStr := timeutil.DurationString(end.Sub(start))
  250. if durStr == "" {
  251. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  252. }
  253. queryNodeGPUCount := fmt.Sprintf(nodeGPUCountQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  254. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeGPUCount)
  255. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  256. return source.NewFuture(source.DecodeNodeGPUCountResult, ctx.QueryAtTime(queryNodeGPUCount, end))
  257. }
  258. func (pds *PrometheusMetricsQuerier) QueryNodeLabels(start, end time.Time) *source.Future[source.NodeLabelsResult] {
  259. const queryName = "QueryNodeLabels"
  260. const labelsQuery = `avg_over_time(kube_node_labels{%s}[%s])`
  261. cfg := pds.promConfig
  262. durStr := timeutil.DurationString(end.Sub(start))
  263. if durStr == "" {
  264. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  265. }
  266. queryLabels := fmt.Sprintf(labelsQuery, cfg.ClusterFilter, durStr)
  267. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLabels)
  268. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  269. return source.NewFuture(source.DecodeNodeLabelsResult, ctx.QueryAtTime(queryLabels, end))
  270. }
  271. func (pds *PrometheusMetricsQuerier) QueryNodeActiveMinutes(start, end time.Time) *source.Future[source.NodeActiveMinutesResult] {
  272. const queryName = "QueryNodeActiveMinutes"
  273. const activeMinsQuery = `avg(node_total_hourly_cost{%s}) by (node, uid, %s, provider_id)[%s:%dm]`
  274. cfg := pds.promConfig
  275. minsPerResolution := cfg.DataResolutionMinutes
  276. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  277. if durStr == "" {
  278. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  279. }
  280. queryActiveMins := fmt.Sprintf(activeMinsQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  281. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryActiveMins)
  282. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  283. return source.NewFuture(source.DecodeNodeActiveMinutesResult, ctx.QueryAtTime(queryActiveMins, end))
  284. }
  285. func (pds *PrometheusMetricsQuerier) QueryNodeCPUModeTotal(start, end time.Time) *source.Future[source.NodeCPUModeTotalResult] {
  286. const queryName = "QueryNodeCPUModeTotal"
  287. const nodeCPUModeTotalQuery = `sum(rate(node_cpu_seconds_total{%s}[%s:%dm])) by (kubernetes_node, uid, %s, mode)`
  288. cfg := pds.promConfig
  289. minsPerResolution := cfg.DataResolutionMinutes
  290. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  291. if durStr == "" {
  292. panic("failed to parse duration string passed to QueryNodeCPUModeTotal")
  293. }
  294. queryCPUModeTotal := fmt.Sprintf(nodeCPUModeTotalQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  295. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUModeTotal)
  296. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  297. return source.NewFuture(source.DecodeNodeCPUModeTotalResult, ctx.QueryAtTime(queryCPUModeTotal, end))
  298. }
  299. func (pds *PrometheusMetricsQuerier) QueryNodeRAMSystemPercent(start, end time.Time) *source.Future[source.NodeRAMSystemPercentResult] {
  300. const queryName = "QueryNodeRAMSystemPercent"
  301. const nodeRAMSystemPctQuery = `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system", %s}[%s:%dm])) by (instance, uid, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, uid, %s), "instance", "$1", "node", "(.*)")) by (instance, uid, %s)`
  302. cfg := pds.promConfig
  303. minsPerResolution := cfg.DataResolutionMinutes
  304. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  305. if durStr == "" {
  306. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  307. }
  308. queryRAMSystemPct := fmt.Sprintf(nodeRAMSystemPctQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterLabel)
  309. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMSystemPct)
  310. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  311. return source.NewFuture(source.DecodeNodeRAMSystemPercentResult, ctx.QueryAtTime(queryRAMSystemPct, end))
  312. }
  313. func (pds *PrometheusMetricsQuerier) QueryNodeRAMUserPercent(start, end time.Time) *source.Future[source.NodeRAMUserPercentResult] {
  314. const queryName = "QueryNodeRAMUserPercent"
  315. const nodeRAMUserPctQuery = `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system", %s}[%s:%dm])) by (instance, uid, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, uid, %s), "instance", "$1", "node", "(.*)")) by (instance, uid, %s)`
  316. cfg := pds.promConfig
  317. minsPerResolution := cfg.DataResolutionMinutes
  318. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  319. if durStr == "" {
  320. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  321. }
  322. queryRAMUserPct := fmt.Sprintf(nodeRAMUserPctQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterLabel)
  323. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUserPct)
  324. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  325. return source.NewFuture(source.DecodeNodeRAMUserPercentResult, ctx.QueryAtTime(queryRAMUserPct, end))
  326. }
  327. func (pds *PrometheusMetricsQuerier) QueryLBPricePerHr(start, end time.Time) *source.Future[source.LBPricePerHrResult] {
  328. const queryName = "QueryLBPricePerHr"
  329. const queryFmtLBCostPerHr = `avg(avg_over_time(kubecost_load_balancer_cost{%s}[%s])) by (namespace, service_name, ingress_ip, uid, %s)`
  330. cfg := pds.promConfig
  331. durStr := timeutil.DurationString(end.Sub(start))
  332. if durStr == "" {
  333. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  334. }
  335. queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  336. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLBCostPerHr)
  337. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  338. return source.NewFuture(source.DecodeLBPricePerHrResult, ctx.QueryAtTime(queryLBCostPerHr, end))
  339. }
  340. func (pds *PrometheusMetricsQuerier) QueryLBActiveMinutes(start, end time.Time) *source.Future[source.LBActiveMinutesResult] {
  341. const queryName = "QueryLBActiveMinutes"
  342. const lbActiveMinutesQuery = `avg(kubecost_load_balancer_cost{%s}) by (namespace, service_name, uid, %s, ingress_ip)[%s:%dm]`
  343. cfg := pds.promConfig
  344. minsPerResolution := cfg.DataResolutionMinutes
  345. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  346. if durStr == "" {
  347. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  348. }
  349. queryLBActiveMins := fmt.Sprintf(lbActiveMinutesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  350. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLBActiveMins)
  351. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  352. return source.NewFuture(source.DecodeLBActiveMinutesResult, ctx.QueryAtTime(queryLBActiveMins, end))
  353. }
  354. func (pds *PrometheusMetricsQuerier) QueryClusterManagementDuration(start, end time.Time) *source.Future[source.ClusterManagementDurationResult] {
  355. const queryName = "QueryClusterManagementDuration"
  356. const clusterManagementDurationQuery = `avg(kubecost_cluster_management_cost{%s}) by (%s, provisioner_name)[%s:%dm]`
  357. cfg := pds.promConfig
  358. minsPerResolution := cfg.DataResolutionMinutes
  359. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  360. if durStr == "" {
  361. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  362. }
  363. queryClusterManagementDuration := fmt.Sprintf(clusterManagementDurationQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  364. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryClusterManagementDuration)
  365. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  366. return source.NewFuture(source.DecodeClusterManagementDurationResult, ctx.QueryAtTime(queryClusterManagementDuration, end))
  367. }
  368. func (pds *PrometheusMetricsQuerier) QueryClusterManagementPricePerHr(start, end time.Time) *source.Future[source.ClusterManagementPricePerHrResult] {
  369. const queryName = "QueryClusterManagementPricePerHr"
  370. const clusterManagementCostQuery = `avg(avg_over_time(kubecost_cluster_management_cost{%s}[%s])) by (%s, provisioner_name)`
  371. cfg := pds.promConfig
  372. durStr := timeutil.DurationString(end.Sub(start))
  373. if durStr == "" {
  374. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  375. }
  376. queryClusterManagementCost := fmt.Sprintf(clusterManagementCostQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  377. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryClusterManagementCost)
  378. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  379. return source.NewFuture(source.DecodeClusterManagementPricePerHrResult, ctx.QueryAtTime(queryClusterManagementCost, end))
  380. }
  381. // AllocationMetricQuerier
  382. func (pds *PrometheusMetricsQuerier) QueryPods(start, end time.Time) *source.Future[source.PodsResult] {
  383. const queryName = "QueryPods"
  384. const queryFmtPods = `avg(kube_pod_container_status_running{%s} != 0) by (pod, namespace, uid, %s)[%s:%dm]`
  385. cfg := pds.promConfig
  386. minsPerResolution := cfg.DataResolutionMinutes
  387. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  388. if durStr == "" {
  389. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  390. }
  391. queryPods := fmt.Sprintf(queryFmtPods, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  392. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPods)
  393. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  394. return source.NewFuture(source.DecodePodsResult, ctx.QueryAtTime(queryPods, end))
  395. }
  396. func (pds *PrometheusMetricsQuerier) QueryPodsUID(start, end time.Time) *source.Future[source.PodsResult] {
  397. const queryName = "QueryPodsUID"
  398. const queryFmtPodsUID = `avg(kube_pod_container_status_running{%s} != 0) by (pod, namespace, uid, %s)[%s:%dm]`
  399. cfg := pds.promConfig
  400. minsPerResolution := cfg.DataResolutionMinutes
  401. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  402. if durStr == "" {
  403. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  404. }
  405. queryPodsUID := fmt.Sprintf(queryFmtPodsUID, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  406. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodsUID)
  407. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  408. return source.NewFuture(source.DecodePodsResult, ctx.QueryAtTime(queryPodsUID, end))
  409. }
  410. func (pds *PrometheusMetricsQuerier) QueryRAMBytesAllocated(start, end time.Time) *source.Future[source.RAMBytesAllocatedResult] {
  411. const queryName = "QueryRAMBytesAllocated"
  412. const queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s, provider_id)`
  413. cfg := pds.promConfig
  414. durStr := timeutil.DurationString(end.Sub(start))
  415. if durStr == "" {
  416. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  417. }
  418. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  419. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMBytesAllocated)
  420. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  421. return source.NewFuture(source.DecodeRAMBytesAllocatedResult, ctx.QueryAtTime(queryRAMBytesAllocated, end))
  422. }
  423. func (pds *PrometheusMetricsQuerier) QueryRAMRequests(start, end time.Time) *source.Future[source.RAMRequestsResult] {
  424. const queryName = "QueryRAMRequests"
  425. const queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  426. cfg := pds.promConfig
  427. durStr := timeutil.DurationString(end.Sub(start))
  428. if durStr == "" {
  429. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  430. }
  431. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  432. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMRequests)
  433. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  434. return source.NewFuture(source.DecodeRAMRequestsResult, ctx.QueryAtTime(queryRAMRequests, end))
  435. }
  436. func (pds *PrometheusMetricsQuerier) QueryRAMLimits(start, end time.Time) *source.Future[source.RAMLimitsResult] {
  437. const queryName = "QueryRAMLimits"
  438. const queryFmtRAMLimits = `avg(avg_over_time(kube_pod_container_resource_limits{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  439. cfg := pds.promConfig
  440. durStr := timeutil.DurationString(end.Sub(start))
  441. if durStr == "" {
  442. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  443. }
  444. queryRAMLimits := fmt.Sprintf(queryFmtRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  445. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMLimits)
  446. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  447. return source.NewFuture(source.DecodeRAMLimitsResult, ctx.QueryAtTime(queryRAMLimits, end))
  448. }
  449. func (pds *PrometheusMetricsQuerier) QueryRAMUsageAvg(start, end time.Time) *source.Future[source.RAMUsageAvgResult] {
  450. const queryName = "QueryRAMUsageAvg"
  451. const queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  452. cfg := pds.promConfig
  453. durStr := timeutil.DurationString(end.Sub(start))
  454. if durStr == "" {
  455. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  456. }
  457. queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  458. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUsageAvg)
  459. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  460. return source.NewFuture(source.DecodeRAMUsageAvgResult, ctx.QueryAtTime(queryRAMUsageAvg, end))
  461. }
  462. func (pds *PrometheusMetricsQuerier) QueryRAMUsageMax(start, end time.Time) *source.Future[source.RAMUsageMaxResult] {
  463. const queryName = "QueryRAMUsageMax"
  464. const queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  465. cfg := pds.promConfig
  466. durStr := timeutil.DurationString(end.Sub(start))
  467. if durStr == "" {
  468. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  469. }
  470. queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  471. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUsageMax)
  472. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  473. return source.NewFuture(source.DecodeRAMUsageMaxResult, ctx.QueryAtTime(queryRAMUsageMax, end))
  474. }
  475. func (pds *PrometheusMetricsQuerier) QueryCPUCoresAllocated(start, end time.Time) *source.Future[source.CPUCoresAllocatedResult] {
  476. const queryName = "QueryCPUCoresAllocated"
  477. const queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  478. cfg := pds.promConfig
  479. durStr := timeutil.DurationString(end.Sub(start))
  480. if durStr == "" {
  481. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  482. }
  483. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  484. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUCoresAllocated)
  485. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  486. return source.NewFuture(source.DecodeCPUCoresAllocatedResult, ctx.QueryAtTime(queryCPUCoresAllocated, end))
  487. }
  488. func (pds *PrometheusMetricsQuerier) QueryCPURequests(start, end time.Time) *source.Future[source.CPURequestsResult] {
  489. const queryName = "QueryCPURequests"
  490. const queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  491. cfg := pds.promConfig
  492. durStr := timeutil.DurationString(end.Sub(start))
  493. if durStr == "" {
  494. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  495. }
  496. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  497. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPURequests)
  498. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  499. return source.NewFuture(source.DecodeCPURequestsResult, ctx.QueryAtTime(queryCPURequests, end))
  500. }
  501. func (pds *PrometheusMetricsQuerier) QueryCPULimits(start, end time.Time) *source.Future[source.CPULimitsResult] {
  502. const queryName = "QueryCPULimits"
  503. const queryFmtCPULimits = `avg(avg_over_time(kube_pod_container_resource_limits{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  504. cfg := pds.promConfig
  505. durStr := timeutil.DurationString(end.Sub(start))
  506. if durStr == "" {
  507. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  508. }
  509. queryCPULimits := fmt.Sprintf(queryFmtCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  510. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPULimits)
  511. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  512. return source.NewFuture(source.DecodeCPULimitsResult, ctx.QueryAtTime(queryCPULimits, end))
  513. }
  514. func (pds *PrometheusMetricsQuerier) QueryCPUUsageAvg(start, end time.Time) *source.Future[source.CPUUsageAvgResult] {
  515. const queryName = "QueryCPUUsageAvg"
  516. const queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  517. cfg := pds.promConfig
  518. durStr := timeutil.DurationString(end.Sub(start))
  519. if durStr == "" {
  520. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  521. }
  522. queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  523. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageAvg)
  524. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  525. return source.NewFuture(source.DecodeCPUUsageAvgResult, ctx.QueryAtTime(queryCPUUsageAvg, end))
  526. }
  527. func (pds *PrometheusMetricsQuerier) QueryCPUUsageMax(start, end time.Time) *source.Future[source.CPUUsageMaxResult] {
  528. const queryName = "QueryCPUUsageMax"
  529. // Because we use container_cpu_usage_seconds_total to calculate CPU usage
  530. // at any given "instant" of time, we need to use an irate or rate. To then
  531. // calculate a max (or any aggregation) we have to perform an aggregation
  532. // query on top of an instant-by-instant maximum. Prometheus supports this
  533. // type of query with a "subquery" [1], however it is reportedly expensive
  534. // to make such a query. By default, Kubecost's Prometheus config includes
  535. // a recording rule that keeps track of the instant-by-instant irate for CPU
  536. // usage. The metric in this query is created by that recording rule.
  537. //
  538. // [1] https://prometheus.io/blog/2019/01/28/subquery-support/
  539. //
  540. // If changing the name of the recording rule, make sure to update the
  541. // corresponding diagnostic query to avoid confusion.
  542. const queryFmtCPUUsageMaxRecordingRule = `max(max_over_time(kubecost_container_cpu_usage_irate{%s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  543. // This is the subquery equivalent of the above recording rule query. It is
  544. // more expensive, but does not require the recording rule. It should be
  545. // used as a fallback query if the recording rule data does not exist.
  546. //
  547. // The parameter after the colon [:<thisone>] in the subquery affects the
  548. // resolution of the subquery.
  549. // The parameter after the metric ...{}[<thisone>] should be set to 2x
  550. // the resolution, to make sure the irate always has two points to query
  551. // in case the Prom scrape duration has been reduced to be equal to the
  552. // query resolution.
  553. const queryFmtCPUUsageMaxSubquery = `max(max_over_time(irate(container_cpu_usage_seconds_total{container!="POD", container!="", %s}[%dm])[%s:%dm])) by (container, pod_name, pod, namespace, node, instance, uid, %s)`
  554. cfg := pds.promConfig
  555. durStr := timeutil.DurationString(end.Sub(start))
  556. if durStr == "" {
  557. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  558. }
  559. queryCPUUsageMaxRecordingRule := fmt.Sprintf(queryFmtCPUUsageMaxRecordingRule, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  560. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageMaxRecordingRule)
  561. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  562. resCPUUsageMaxRR := ctx.QueryAtTime(queryCPUUsageMaxRecordingRule, end)
  563. resCPUUsageMax, _ := resCPUUsageMaxRR.Await()
  564. if len(resCPUUsageMax) > 0 {
  565. return source.NewFutureFrom(source.DecodeAll(resCPUUsageMax, source.DecodeCPUUsageMaxResult))
  566. }
  567. minsPerResolution := cfg.DataResolutionMinutes
  568. durStr = pds.durationStringFor(start, end, minsPerResolution, false)
  569. if durStr == "" {
  570. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  571. }
  572. queryCPUUsageMaxSubquery := fmt.Sprintf(queryFmtCPUUsageMaxSubquery, cfg.ClusterFilter, 2*minsPerResolution, durStr, minsPerResolution, cfg.ClusterLabel)
  573. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageMaxSubquery)
  574. return source.NewFuture(source.DecodeCPUUsageMaxResult, ctx.QueryAtTime(queryCPUUsageMaxSubquery, end))
  575. }
  576. func (pds *PrometheusMetricsQuerier) QueryGPUsRequested(start, end time.Time) *source.Future[source.GPUsRequestedResult] {
  577. const queryName = "QueryGPUsRequested"
  578. const queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  579. cfg := pds.promConfig
  580. durStr := timeutil.DurationString(end.Sub(start))
  581. if durStr == "" {
  582. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  583. }
  584. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  585. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsRequested)
  586. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  587. return source.NewFuture(source.DecodeGPUsRequestedResult, ctx.QueryAtTime(queryGPUsRequested, end))
  588. }
  589. func (pds *PrometheusMetricsQuerier) QueryGPUsUsageAvg(start, end time.Time) *source.Future[source.GPUsUsageAvgResult] {
  590. const queryName = "QueryGPUsUsageAvg"
  591. const queryFmtGPUsUsageAvg = `avg(avg_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, uid, %s)`
  592. cfg := pds.promConfig
  593. durStr := timeutil.DurationString(end.Sub(start))
  594. if durStr == "" {
  595. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  596. }
  597. queryGPUsUsageAvg := fmt.Sprintf(queryFmtGPUsUsageAvg, durStr, cfg.ClusterLabel)
  598. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsUsageAvg)
  599. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  600. return source.NewFuture(source.DecodeGPUsUsageAvgResult, ctx.QueryAtTime(queryGPUsUsageAvg, end))
  601. }
  602. func (pds *PrometheusMetricsQuerier) QueryGPUsUsageMax(start, end time.Time) *source.Future[source.GPUsUsageMaxResult] {
  603. const queryName = "QueryGPUsUsageMax"
  604. const queryFmtGPUsUsageMax = `max(max_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, uid, %s)`
  605. cfg := pds.promConfig
  606. durStr := timeutil.DurationString(end.Sub(start))
  607. if durStr == "" {
  608. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  609. }
  610. queryGPUsUsageMax := fmt.Sprintf(queryFmtGPUsUsageMax, durStr, cfg.ClusterLabel)
  611. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsUsageMax)
  612. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  613. return source.NewFuture(source.DecodeGPUsUsageMaxResult, ctx.QueryAtTime(queryGPUsUsageMax, end))
  614. }
  615. func (pds *PrometheusMetricsQuerier) QueryGPUsAllocated(start, end time.Time) *source.Future[source.GPUsAllocatedResult] {
  616. const queryName = "QueryGPUsAllocated"
  617. const queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  618. cfg := pds.promConfig
  619. durStr := timeutil.DurationString(end.Sub(start))
  620. if durStr == "" {
  621. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  622. }
  623. queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  624. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsAllocated)
  625. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  626. return source.NewFuture(source.DecodeGPUsAllocatedResult, ctx.QueryAtTime(queryGPUsAllocated, end))
  627. }
  628. func (pds *PrometheusMetricsQuerier) QueryIsGPUShared(start, end time.Time) *source.Future[source.IsGPUSharedResult] {
  629. const queryName = "QueryIsGPUShared"
  630. const queryFmtIsGPUShared = `avg(avg_over_time(kube_pod_container_resource_requests{container!="", node != "", pod != "", container!= "", unit = "integer", %s}[%s])) by (container, pod, namespace, node, resource, uid, %s)`
  631. cfg := pds.promConfig
  632. durStr := timeutil.DurationString(end.Sub(start))
  633. if durStr == "" {
  634. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  635. }
  636. queryIsGPUShared := fmt.Sprintf(queryFmtIsGPUShared, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  637. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryIsGPUShared)
  638. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  639. return source.NewFuture(source.DecodeIsGPUSharedResult, ctx.QueryAtTime(queryIsGPUShared, end))
  640. }
  641. func (pds *PrometheusMetricsQuerier) QueryGPUInfo(start, end time.Time) *source.Future[source.GPUInfoResult] {
  642. const queryName = "QueryGPUInfo"
  643. const queryFmtGetGPUInfo = `avg(avg_over_time(DCGM_FI_DEV_DEC_UTIL{container!="",%s}[%s])) by (container, pod, namespace, device, modelName, UUID, uid, %s)`
  644. cfg := pds.promConfig
  645. durStr := timeutil.DurationString(end.Sub(start))
  646. if durStr == "" {
  647. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  648. }
  649. queryGetGPUInfo := fmt.Sprintf(queryFmtGetGPUInfo, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  650. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGetGPUInfo)
  651. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  652. return source.NewFuture(source.DecodeGPUInfoResult, ctx.QueryAtTime(queryGetGPUInfo, end))
  653. }
  654. func (pds *PrometheusMetricsQuerier) QueryNodeCPUPricePerHr(start, end time.Time) *source.Future[source.NodeCPUPricePerHrResult] {
  655. const queryName = "QueryNodeCPUPricePerHr"
  656. const queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  657. cfg := pds.promConfig
  658. durStr := timeutil.DurationString(end.Sub(start))
  659. if durStr == "" {
  660. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  661. }
  662. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  663. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerCPUHr)
  664. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  665. return source.NewFuture(source.DecodeNodeCPUPricePerHrResult, ctx.QueryAtTime(queryNodeCostPerCPUHr, end))
  666. }
  667. func (pds *PrometheusMetricsQuerier) QueryNodeRAMPricePerGiBHr(start, end time.Time) *source.Future[source.NodeRAMPricePerGiBHrResult] {
  668. const queryName = "QueryNodeRAMPricePerGiBHr"
  669. const queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  670. cfg := pds.promConfig
  671. durStr := timeutil.DurationString(end.Sub(start))
  672. if durStr == "" {
  673. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  674. }
  675. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  676. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerRAMGiBHr)
  677. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  678. return source.NewFuture(source.DecodeNodeRAMPricePerGiBHrResult, ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end))
  679. }
  680. func (pds *PrometheusMetricsQuerier) QueryNodeGPUPricePerHr(start, end time.Time) *source.Future[source.NodeGPUPricePerHrResult] {
  681. const queryName = "QueryNodeGPUPricePerHr"
  682. const queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  683. cfg := pds.promConfig
  684. durStr := timeutil.DurationString(end.Sub(start))
  685. if durStr == "" {
  686. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  687. }
  688. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  689. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerGPUHr)
  690. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  691. return source.NewFuture(source.DecodeNodeGPUPricePerHrResult, ctx.QueryAtTime(queryNodeCostPerGPUHr, end))
  692. }
  693. func (pds *PrometheusMetricsQuerier) QueryNodeIsSpot(start, end time.Time) *source.Future[source.NodeIsSpotResult] {
  694. const queryName = "QueryNodeIsSpot"
  695. const queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot{%s}[%s])`
  696. cfg := pds.promConfig
  697. durStr := timeutil.DurationString(end.Sub(start))
  698. if durStr == "" {
  699. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  700. }
  701. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, cfg.ClusterFilter, durStr)
  702. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeIsSpot)
  703. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  704. return source.NewFuture(source.DecodeNodeIsSpotResult, ctx.QueryAtTime(queryNodeIsSpot, end))
  705. }
  706. func (pds *PrometheusMetricsQuerier) QueryPodPVCAllocation(start, end time.Time) *source.Future[source.PodPVCAllocationResult] {
  707. const queryName = "QueryPodPVCAllocation"
  708. const queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation{%s}[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, uid, %s)`
  709. cfg := pds.promConfig
  710. durStr := timeutil.DurationString(end.Sub(start))
  711. if durStr == "" {
  712. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  713. }
  714. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  715. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodPVCAllocation)
  716. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  717. return source.NewFuture(source.DecodePodPVCAllocationResult, ctx.QueryAtTime(queryPodPVCAllocation, end))
  718. }
  719. func (pds *PrometheusMetricsQuerier) QueryPVCBytesRequested(start, end time.Time) *source.Future[source.PVCBytesRequestedResult] {
  720. const queryName = "QueryPVCBytesRequested"
  721. const queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{%s}[%s])) by (persistentvolumeclaim, namespace, uid, %s)`
  722. cfg := pds.promConfig
  723. durStr := timeutil.DurationString(end.Sub(start))
  724. if durStr == "" {
  725. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  726. }
  727. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  728. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCBytesRequested)
  729. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  730. return source.NewFuture(source.DecodePVCBytesRequestedResult, ctx.QueryAtTime(queryPVCBytesRequested, end))
  731. }
  732. func (pds *PrometheusMetricsQuerier) QueryPVBytes(start, end time.Time) *source.Future[source.PVBytesResult] {
  733. const queryName = "QueryPVBytes"
  734. const queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[%s])) by (persistentvolume, uid, %s)`
  735. cfg := pds.promConfig
  736. durStr := timeutil.DurationString(end.Sub(start))
  737. if durStr == "" {
  738. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  739. }
  740. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  741. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVBytes)
  742. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  743. return source.NewFuture(source.DecodePVBytesResult, ctx.QueryAtTime(queryPVBytes, end))
  744. }
  745. func (pds *PrometheusMetricsQuerier) QueryPVInfo(start, end time.Time) *source.Future[source.PVInfoResult] {
  746. const queryName = "QueryPVInfo"
  747. const queryFmtPVMeta = `avg(avg_over_time(kubecost_pv_info{%s}[%s])) by (%s, storageclass, persistentvolume, uid, provider_id)`
  748. cfg := pds.promConfig
  749. durStr := timeutil.DurationString(end.Sub(start))
  750. if durStr == "" {
  751. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  752. }
  753. queryPVMeta := fmt.Sprintf(queryFmtPVMeta, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  754. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVMeta)
  755. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  756. return source.NewFuture(source.DecodePVInfoResult, ctx.QueryAtTime(queryPVMeta, end))
  757. }
  758. func (pds *PrometheusMetricsQuerier) QueryNetZoneGiB(start, end time.Time) *source.Future[source.NetZoneGiBResult] {
  759. const queryName = "QueryNetZoneGiB"
  760. const queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", same_zone="false", same_region="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  761. cfg := pds.promConfig
  762. minsPerResolution := cfg.DataResolutionMinutes
  763. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  764. if durStr == "" {
  765. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  766. }
  767. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  768. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneGiB)
  769. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  770. return source.NewFuture(source.DecodeNetZoneGiBResult, ctx.QueryAtTime(queryNetZoneGiB, end))
  771. }
  772. func (pds *PrometheusMetricsQuerier) QueryNetZonePricePerGiB(start, end time.Time) *source.Future[source.NetZonePricePerGiBResult] {
  773. const queryName = "QueryNetZonePricePerGiB"
  774. const queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{%s}[%s])) by (%s)`
  775. cfg := pds.promConfig
  776. durStr := timeutil.DurationString(end.Sub(start))
  777. if durStr == "" {
  778. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  779. }
  780. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  781. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneCostPerGiB)
  782. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  783. return source.NewFuture(source.DecodeNetZonePricePerGiBResult, ctx.QueryAtTime(queryNetZoneCostPerGiB, end))
  784. }
  785. func (pds *PrometheusMetricsQuerier) QueryNetRegionGiB(start, end time.Time) *source.Future[source.NetRegionGiBResult] {
  786. const queryName = "QueryNetRegionGiB"
  787. const queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", same_zone="false", same_region="false", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  788. cfg := pds.promConfig
  789. minsPerResolution := cfg.DataResolutionMinutes
  790. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  791. if durStr == "" {
  792. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  793. }
  794. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  795. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionGiB)
  796. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  797. return source.NewFuture(source.DecodeNetRegionGiBResult, ctx.QueryAtTime(queryNetRegionGiB, end))
  798. }
  799. func (pds *PrometheusMetricsQuerier) QueryNetRegionPricePerGiB(start, end time.Time) *source.Future[source.NetRegionPricePerGiBResult] {
  800. const queryName = "QueryNetRegionPricePerGiB"
  801. const queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{%s}[%s])) by (%s)`
  802. cfg := pds.promConfig
  803. durStr := timeutil.DurationString(end.Sub(start))
  804. if durStr == "" {
  805. panic("failed to parse duration string passed to QueryNetRegionPricePerGiB")
  806. }
  807. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  808. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionCostPerGiB)
  809. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  810. return source.NewFuture(source.DecodeNetRegionPricePerGiBResult, ctx.QueryAtTime(queryNetRegionCostPerGiB, end))
  811. }
  812. func (pds *PrometheusMetricsQuerier) QueryNetInternetGiB(start, end time.Time) *source.Future[source.NetInternetGiBResult] {
  813. const queryName = "QueryNetInternetGiB"
  814. const queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  815. cfg := pds.promConfig
  816. minsPerResolution := cfg.DataResolutionMinutes
  817. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  818. if durStr == "" {
  819. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  820. }
  821. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  822. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetGiB)
  823. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  824. return source.NewFuture(source.DecodeNetInternetGiBResult, ctx.QueryAtTime(queryNetInternetGiB, end))
  825. }
  826. func (pds *PrometheusMetricsQuerier) QueryNetInternetPricePerGiB(start, end time.Time) *source.Future[source.NetInternetPricePerGiBResult] {
  827. const queryName = "QueryNetInternetPricePerGiB"
  828. const queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{%s}[%s])) by (%s)`
  829. cfg := pds.promConfig
  830. durStr := timeutil.DurationString(end.Sub(start))
  831. if durStr == "" {
  832. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  833. }
  834. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  835. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetCostPerGiB)
  836. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  837. return source.NewFuture(source.DecodeNetInternetPricePerGiBResult, ctx.QueryAtTime(queryNetInternetCostPerGiB, end))
  838. }
  839. func (pds *PrometheusMetricsQuerier) QueryNetInternetServiceGiB(start, end time.Time) *source.Future[source.NetInternetServiceGiBResult] {
  840. const queryName = "QueryNetInternetServiceGiB"
  841. const queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  842. cfg := pds.promConfig
  843. minsPerResolution := cfg.DataResolutionMinutes
  844. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  845. if durStr == "" {
  846. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  847. }
  848. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  849. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetGiB)
  850. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  851. return source.NewFuture(source.DecodeNetInternetServiceGiBResult, ctx.QueryAtTime(queryNetInternetGiB, end))
  852. }
  853. func (pds *PrometheusMetricsQuerier) QueryNetTransferBytes(start, end time.Time) *source.Future[source.NetTransferBytesResult] {
  854. const queryName = "QueryNetTransferBytes"
  855. const queryFmtNetTransferBytes = `sum(increase(container_network_transmit_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, uid, %s)`
  856. cfg := pds.promConfig
  857. minsPerResolution := cfg.DataResolutionMinutes
  858. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  859. if durStr == "" {
  860. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  861. }
  862. queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  863. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetTransferBytes)
  864. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  865. return source.NewFuture(source.DecodeNetTransferBytesResult, ctx.QueryAtTime(queryNetTransferBytes, end))
  866. }
  867. func (pds *PrometheusMetricsQuerier) QueryNetZoneIngressGiB(start, end time.Time) *source.Future[source.NetZoneIngressGiBResult] {
  868. const queryName = "QueryNetZoneIngressGiB"
  869. const queryFmtIngNetZoneGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="false", same_zone="false", same_region="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  870. cfg := pds.promConfig
  871. minsPerResolution := cfg.DataResolutionMinutes
  872. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  873. if durStr == "" {
  874. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  875. }
  876. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtIngNetZoneGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  877. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneCostPerGiB)
  878. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  879. return source.NewFuture(source.DecodeNetZoneIngressGiBResult, ctx.QueryAtTime(queryNetZoneCostPerGiB, end))
  880. }
  881. func (pds *PrometheusMetricsQuerier) QueryNetRegionIngressGiB(start, end time.Time) *source.Future[source.NetRegionIngressGiBResult] {
  882. const queryName = "QueryNetRegionIngressGiB"
  883. const queryFmtIngNetRegionGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="false", same_zone="false", same_region="false", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  884. cfg := pds.promConfig
  885. minsPerResolution := cfg.DataResolutionMinutes
  886. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  887. if durStr == "" {
  888. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  889. }
  890. queryNetRegionIngGiB := fmt.Sprintf(queryFmtIngNetRegionGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  891. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionIngGiB)
  892. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  893. return source.NewFuture(source.DecodeNetRegionIngressGiBResult, ctx.QueryAtTime(queryNetRegionIngGiB, end))
  894. }
  895. func (pds *PrometheusMetricsQuerier) QueryNetInternetIngressGiB(start, end time.Time) *source.Future[source.NetInternetIngressGiBResult] {
  896. const queryName = "QueryNetInternetIngressGiB"
  897. const queryFmtNetIngInternetGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  898. cfg := pds.promConfig
  899. minsPerResolution := cfg.DataResolutionMinutes
  900. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  901. if durStr == "" {
  902. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  903. }
  904. queryNetIngInternetGiB := fmt.Sprintf(queryFmtNetIngInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  905. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetIngInternetGiB)
  906. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  907. return source.NewFuture(source.DecodeNetInternetIngressGiBResult, ctx.QueryAtTime(queryNetIngInternetGiB, end))
  908. }
  909. func (pds *PrometheusMetricsQuerier) QueryNetInternetServiceIngressGiB(start, end time.Time) *source.Future[source.NetInternetServiceIngressGiBResult] {
  910. const queryName = "QueryNetInternetServiceIngressGiB"
  911. const queryFmtIngNetInternetGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  912. cfg := pds.promConfig
  913. minsPerResolution := cfg.DataResolutionMinutes
  914. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  915. if durStr == "" {
  916. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  917. }
  918. queryNetIngInternetGiB := fmt.Sprintf(queryFmtIngNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  919. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetIngInternetGiB)
  920. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  921. return source.NewFuture(source.DecodeNetInternetServiceIngressGiBResult, ctx.QueryAtTime(queryNetIngInternetGiB, end))
  922. }
  923. func (pds *PrometheusMetricsQuerier) QueryNetReceiveBytes(start, end time.Time) *source.Future[source.NetReceiveBytesResult] {
  924. const queryName = "QueryNetReceiveBytes"
  925. const queryFmtNetReceiveBytes = `sum(increase(container_network_receive_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, uid, %s)`
  926. cfg := pds.promConfig
  927. minsPerResolution := cfg.DataResolutionMinutes
  928. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  929. if durStr == "" {
  930. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  931. }
  932. queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  933. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetReceiveBytes)
  934. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  935. return source.NewFuture(source.DecodeNetReceiveBytesResult, ctx.QueryAtTime(queryNetReceiveBytes, end))
  936. }
  937. func (pds *PrometheusMetricsQuerier) QueryNamespaceLabels(start, end time.Time) *source.Future[source.NamespaceLabelsResult] {
  938. const queryName = "QueryNamespaceLabels"
  939. const queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels{%s}[%s])`
  940. cfg := pds.promConfig
  941. durStr := timeutil.DurationString(end.Sub(start))
  942. if durStr == "" {
  943. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  944. }
  945. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, cfg.ClusterFilter, durStr)
  946. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNamespaceLabels)
  947. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  948. return source.NewFuture(source.DecodeNamespaceLabelsResult, ctx.QueryAtTime(queryNamespaceLabels, end))
  949. }
  950. func (pds *PrometheusMetricsQuerier) QueryNamespaceAnnotations(start, end time.Time) *source.Future[source.NamespaceAnnotationsResult] {
  951. const queryName = "QueryNamespaceAnnotations"
  952. const queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations{%s}[%s])`
  953. cfg := pds.promConfig
  954. durStr := timeutil.DurationString(end.Sub(start))
  955. if durStr == "" {
  956. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  957. }
  958. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, cfg.ClusterFilter, durStr)
  959. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNamespaceAnnotations)
  960. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  961. return source.NewFuture(source.DecodeNamespaceAnnotationsResult, ctx.QueryAtTime(queryNamespaceAnnotations, end))
  962. }
  963. func (pds *PrometheusMetricsQuerier) QueryPodLabels(start, end time.Time) *source.Future[source.PodLabelsResult] {
  964. const queryName = "QueryPodLabels"
  965. const queryFmtPodLabels = `avg_over_time(kube_pod_labels{%s}[%s])`
  966. cfg := pds.promConfig
  967. durStr := timeutil.DurationString(end.Sub(start))
  968. if durStr == "" {
  969. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  970. }
  971. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, cfg.ClusterFilter, durStr)
  972. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodLabels)
  973. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  974. return source.NewFuture(source.DecodePodLabelsResult, ctx.QueryAtTime(queryPodLabels, end))
  975. }
  976. func (pds *PrometheusMetricsQuerier) QueryPodAnnotations(start, end time.Time) *source.Future[source.PodAnnotationsResult] {
  977. const queryName = "QueryPodAnnotations"
  978. const queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations{%s}[%s])`
  979. cfg := pds.promConfig
  980. durStr := timeutil.DurationString(end.Sub(start))
  981. if durStr == "" {
  982. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  983. }
  984. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, cfg.ClusterFilter, durStr)
  985. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodAnnotations)
  986. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  987. return source.NewFuture(source.DecodePodAnnotationsResult, ctx.QueryAtTime(queryPodAnnotations, end))
  988. }
  989. func (pds *PrometheusMetricsQuerier) QueryServiceLabels(start, end time.Time) *source.Future[source.ServiceLabelsResult] {
  990. const queryName = "QueryServiceLabels"
  991. const queryFmtServiceLabels = `avg_over_time(service_selector_labels{%s}[%s])`
  992. cfg := pds.promConfig
  993. durStr := timeutil.DurationString(end.Sub(start))
  994. if durStr == "" {
  995. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  996. }
  997. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, cfg.ClusterFilter, durStr)
  998. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryServiceLabels)
  999. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1000. return source.NewFuture(source.DecodeServiceLabelsResult, ctx.QueryAtTime(queryServiceLabels, end))
  1001. }
  1002. func (pds *PrometheusMetricsQuerier) QueryDeploymentLabels(start, end time.Time) *source.Future[source.DeploymentLabelsResult] {
  1003. const queryName = "QueryDeploymentLabels"
  1004. const queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels{%s}[%s])`
  1005. cfg := pds.promConfig
  1006. durStr := timeutil.DurationString(end.Sub(start))
  1007. if durStr == "" {
  1008. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1009. }
  1010. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, cfg.ClusterFilter, durStr)
  1011. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryDeploymentLabels)
  1012. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1013. return source.NewFuture(source.DecodeDeploymentLabelsResult, ctx.QueryAtTime(queryDeploymentLabels, end))
  1014. }
  1015. func (pds *PrometheusMetricsQuerier) QueryStatefulSetLabels(start, end time.Time) *source.Future[source.StatefulSetLabelsResult] {
  1016. const queryName = "QueryStatefulSetLabels"
  1017. const queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels{%s}[%s])`
  1018. cfg := pds.promConfig
  1019. durStr := timeutil.DurationString(end.Sub(start))
  1020. if durStr == "" {
  1021. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1022. }
  1023. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, cfg.ClusterFilter, durStr)
  1024. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryStatefulSetLabels)
  1025. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1026. return source.NewFuture(source.DecodeStatefulSetLabelsResult, ctx.QueryAtTime(queryStatefulSetLabels, end))
  1027. }
  1028. func (pds *PrometheusMetricsQuerier) QueryDaemonSetLabels(start, end time.Time) *source.Future[source.DaemonSetLabelsResult] {
  1029. const queryName = "QueryDaemonSetLabels"
  1030. const queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1031. cfg := pds.promConfig
  1032. durStr := timeutil.DurationString(end.Sub(start))
  1033. if durStr == "" {
  1034. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1035. }
  1036. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1037. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryDaemonSetLabels)
  1038. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1039. return source.NewFuture(source.DecodeDaemonSetLabelsResult, ctx.QueryAtTime(queryDaemonSetLabels, end))
  1040. }
  1041. func (pds *PrometheusMetricsQuerier) QueryJobLabels(start, end time.Time) *source.Future[source.JobLabelsResult] {
  1042. const queryName = "QueryJobLabels"
  1043. const queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1044. cfg := pds.promConfig
  1045. durStr := timeutil.DurationString(end.Sub(start))
  1046. if durStr == "" {
  1047. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1048. }
  1049. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1050. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryJobLabels)
  1051. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1052. return source.NewFuture(source.DecodeJobLabelsResult, ctx.QueryAtTime(queryJobLabels, end))
  1053. }
  1054. func (pds *PrometheusMetricsQuerier) QueryPodsWithReplicaSetOwner(start, end time.Time) *source.Future[source.PodsWithReplicaSetOwnerResult] {
  1055. const queryName = "QueryPodsWithReplicaSetOwner"
  1056. const queryFmtPodsWithReplicaSetOwner = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1057. cfg := pds.promConfig
  1058. durStr := timeutil.DurationString(end.Sub(start))
  1059. if durStr == "" {
  1060. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1061. }
  1062. queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1063. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodsWithReplicaSetOwner)
  1064. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1065. return source.NewFuture(source.DecodePodsWithReplicaSetOwnerResult, ctx.QueryAtTime(queryPodsWithReplicaSetOwner, end))
  1066. }
  1067. func (pds *PrometheusMetricsQuerier) QueryReplicaSetsWithoutOwners(start, end time.Time) *source.Future[source.ReplicaSetsWithoutOwnersResult] {
  1068. const queryName = "QueryReplicaSetsWithoutOwners"
  1069. const queryFmtReplicaSetsWithoutOwners = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>", %s}[%s])) by (replicaset, namespace, uid, %s)`
  1070. cfg := pds.promConfig
  1071. durStr := timeutil.DurationString(end.Sub(start))
  1072. if durStr == "" {
  1073. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1074. }
  1075. queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1076. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryReplicaSetsWithoutOwners)
  1077. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1078. return source.NewFuture(source.DecodeReplicaSetsWithoutOwnersResult, ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end))
  1079. }
  1080. func (pds *PrometheusMetricsQuerier) QueryReplicaSetsWithRollout(start, end time.Time) *source.Future[source.ReplicaSetsWithRolloutResult] {
  1081. const queryName = "QueryReplicaSetsWithRollout"
  1082. const queryFmtReplicaSetsWithRolloutOwner = `avg(avg_over_time(kube_replicaset_owner{owner_kind="Rollout", %s}[%s])) by (replicaset, namespace, owner_kind, owner_name, uid, %s)`
  1083. cfg := pds.promConfig
  1084. durStr := timeutil.DurationString(end.Sub(start))
  1085. if durStr == "" {
  1086. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1087. }
  1088. queryReplicaSetsWithRolloutOwner := fmt.Sprintf(queryFmtReplicaSetsWithRolloutOwner, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1089. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryReplicaSetsWithRolloutOwner)
  1090. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1091. return source.NewFuture(source.DecodeReplicaSetsWithRolloutResult, ctx.QueryAtTime(queryReplicaSetsWithRolloutOwner, end))
  1092. }
  1093. // Note: The ResourceQuota metrics are _not_ emitted at the moment. Leaving the query implementations here in case we add metric emission later on.
  1094. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPURequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPURequestAvgResult] {
  1095. const queryName = "QueryResourceQuotaSpecCPURequestAverage"
  1096. const queryFmtResourceQuotaSpecCPURequests = `avg(avg_over_time(resourcequota_spec_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1097. cfg := pds.promConfig
  1098. durStr := timeutil.DurationString(end.Sub(start))
  1099. if durStr == "" {
  1100. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1101. }
  1102. queryResourceQuotaSpecCPURequests := fmt.Sprintf(queryFmtResourceQuotaSpecCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1103. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPURequests)
  1104. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1105. return source.NewFuture(source.DecodeResourceQuotaSpecCPURequestAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecCPURequests, end))
  1106. }
  1107. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPURequestMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPURequestMaxResult] {
  1108. const queryName = "QueryResourceQuotaSpecCPURequestMax"
  1109. const queryFmtResourceQuotaSpecCPURequests = `max(max_over_time(resourcequota_spec_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1110. cfg := pds.promConfig
  1111. durStr := timeutil.DurationString(end.Sub(start))
  1112. if durStr == "" {
  1113. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1114. }
  1115. queryResourceQuotaSpecCPURequests := fmt.Sprintf(queryFmtResourceQuotaSpecCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1116. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPURequests)
  1117. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1118. return source.NewFuture(source.DecodeResourceQuotaSpecCPURequestMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecCPURequests, end))
  1119. }
  1120. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMRequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMRequestAvgResult] {
  1121. const queryName = "QueryResourceQuotaSpecRAMRequestAverage"
  1122. const queryFmtResourceQuotaSpecRAMRequests = `avg(avg_over_time(resourcequota_spec_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1123. cfg := pds.promConfig
  1124. durStr := timeutil.DurationString(end.Sub(start))
  1125. if durStr == "" {
  1126. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1127. }
  1128. queryResourceQuotaSpecRAMRequests := fmt.Sprintf(queryFmtResourceQuotaSpecRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1129. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMRequests)
  1130. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1131. return source.NewFuture(source.DecodeResourceQuotaSpecRAMRequestAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMRequests, end))
  1132. }
  1133. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMRequestMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMRequestMaxResult] {
  1134. const queryName = "QueryResourceQuotaSpecRAMRequestMax"
  1135. const queryFmtResourceQuotaSpecRAMRequests = `max(max_over_time(resourcequota_spec_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1136. cfg := pds.promConfig
  1137. durStr := timeutil.DurationString(end.Sub(start))
  1138. if durStr == "" {
  1139. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1140. }
  1141. queryResourceQuotaSpecRAMRequests := fmt.Sprintf(queryFmtResourceQuotaSpecRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1142. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMRequests)
  1143. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1144. return source.NewFuture(source.DecodeResourceQuotaSpecRAMRequestMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMRequests, end))
  1145. }
  1146. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPULimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPULimitAvgResult] {
  1147. const queryName = "QueryResourceQuotaSpecCPULimitAverage"
  1148. const queryFmtResourceQuotaSpecCPULimits = `avg(avg_over_time(resourcequota_spec_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1149. cfg := pds.promConfig
  1150. durStr := timeutil.DurationString(end.Sub(start))
  1151. if durStr == "" {
  1152. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1153. }
  1154. queryResourceQuotaSpecCPULimits := fmt.Sprintf(queryFmtResourceQuotaSpecCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1155. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPULimits)
  1156. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1157. return source.NewFuture(source.DecodeResourceQuotaSpecCPULimitAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecCPULimits, end))
  1158. }
  1159. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPULimitMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPULimitMaxResult] {
  1160. const queryName = "QueryResourceQuotaSpecCPULimitMax"
  1161. const queryFmtResourceQuotaSpecCPULimits = `max(max_over_time(resourcequota_spec_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1162. cfg := pds.promConfig
  1163. durStr := timeutil.DurationString(end.Sub(start))
  1164. if durStr == "" {
  1165. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1166. }
  1167. queryResourceQuotaSpecCPULimits := fmt.Sprintf(queryFmtResourceQuotaSpecCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1168. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPULimits)
  1169. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1170. return source.NewFuture(source.DecodeResourceQuotaSpecCPULimitMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecCPULimits, end))
  1171. }
  1172. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMLimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMLimitAvgResult] {
  1173. const queryName = "QueryResourceQuotaSpecRAMLimitAverage"
  1174. const queryFmtResourceQuotaSpecRAMLimits = `avg(avg_over_time(resourcequota_spec_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1175. cfg := pds.promConfig
  1176. durStr := timeutil.DurationString(end.Sub(start))
  1177. if durStr == "" {
  1178. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1179. }
  1180. queryResourceQuotaSpecRAMLimits := fmt.Sprintf(queryFmtResourceQuotaSpecRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1181. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMLimits)
  1182. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1183. return source.NewFuture(source.DecodeResourceQuotaSpecRAMLimitAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMLimits, end))
  1184. }
  1185. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMLimitMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMLimitMaxResult] {
  1186. const queryName = "QueryResourceQuotaSpecRAMLimitMax"
  1187. const queryFmtResourceQuotaSpecRAMLimits = `max(max_over_time(resourcequota_spec_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1188. cfg := pds.promConfig
  1189. durStr := timeutil.DurationString(end.Sub(start))
  1190. if durStr == "" {
  1191. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1192. }
  1193. queryResourceQuotaSpecRAMLimits := fmt.Sprintf(queryFmtResourceQuotaSpecRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1194. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMLimits)
  1195. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1196. return source.NewFuture(source.DecodeResourceQuotaSpecRAMLimitMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMLimits, end))
  1197. }
  1198. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPURequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPURequestAvgResult] {
  1199. const queryName = "QueryResourceQuotaStatusUsedCPURequestAverage"
  1200. const queryFmtResourceQuotaStatusUsedCPURequests = `avg(avg_over_time(resourcequota_status_used_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1201. cfg := pds.promConfig
  1202. durStr := timeutil.DurationString(end.Sub(start))
  1203. if durStr == "" {
  1204. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1205. }
  1206. queryResourceQuotaStatusUsedCPURequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1207. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPURequests)
  1208. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1209. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPURequestAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPURequests, end))
  1210. }
  1211. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPURequestMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPURequestMaxResult] {
  1212. const queryName = "QueryResourceQuotaStatusUsedCPURequestMax"
  1213. const queryFmtResourceQuotaStatusUsedCPURequests = `max(max_over_time(resourcequota_status_used_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1214. cfg := pds.promConfig
  1215. durStr := timeutil.DurationString(end.Sub(start))
  1216. if durStr == "" {
  1217. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1218. }
  1219. queryResourceQuotaStatusUsedCPURequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1220. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPURequests)
  1221. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1222. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPURequestMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPURequests, end))
  1223. }
  1224. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMRequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMRequestAvgResult] {
  1225. const queryName = "QueryResourceQuotaStatusUsedRAMRequestAverage"
  1226. const queryFmtResourceQuotaStatusUsedRAMRequests = `avg(avg_over_time(resourcequota_status_used_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1227. cfg := pds.promConfig
  1228. durStr := timeutil.DurationString(end.Sub(start))
  1229. if durStr == "" {
  1230. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1231. }
  1232. queryResourceQuotaStatusUsedRAMRequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1233. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMRequests)
  1234. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1235. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMRequestAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMRequests, end))
  1236. }
  1237. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMRequestMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMRequestMaxResult] {
  1238. const queryName = "QueryResourceQuotaStatusUsedRAMRequestMax"
  1239. const queryFmtResourceQuotaStatusUsedRAMRequests = `max(max_over_time(resourcequota_status_used_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1240. cfg := pds.promConfig
  1241. durStr := timeutil.DurationString(end.Sub(start))
  1242. if durStr == "" {
  1243. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1244. }
  1245. queryResourceQuotaStatusUsedRAMRequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1246. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMRequests)
  1247. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1248. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMRequestMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMRequests, end))
  1249. }
  1250. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPULimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPULimitAvgResult] {
  1251. const queryName = "QueryResourceQuotaStatusUsedCPULimitAverage"
  1252. const queryFmtResourceQuotaStatusUsedCPULimits = `avg(avg_over_time(resourcequota_status_used_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1253. cfg := pds.promConfig
  1254. durStr := timeutil.DurationString(end.Sub(start))
  1255. if durStr == "" {
  1256. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1257. }
  1258. queryResourceQuotaStatusUsedCPULimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1259. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPULimits)
  1260. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1261. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPULimitAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPULimits, end))
  1262. }
  1263. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPULimitMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPULimitMaxResult] {
  1264. const queryName = "QueryResourceQuotaStatusUsedCPULimitMax"
  1265. const queryFmtResourceQuotaStatusUsedCPULimits = `max(max_over_time(resourcequota_status_used_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1266. cfg := pds.promConfig
  1267. durStr := timeutil.DurationString(end.Sub(start))
  1268. if durStr == "" {
  1269. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1270. }
  1271. queryResourceQuotaStatusUsedCPULimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1272. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPULimits)
  1273. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1274. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPULimitMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPULimits, end))
  1275. }
  1276. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMLimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMLimitAvgResult] {
  1277. const queryName = "QueryResourceQuotaStatusUsedRAMLimitAverage"
  1278. const queryFmtResourceQuotaStatusUsedRAMLimits = `avg(avg_over_time(resourcequota_status_used_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1279. cfg := pds.promConfig
  1280. durStr := timeutil.DurationString(end.Sub(start))
  1281. if durStr == "" {
  1282. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1283. }
  1284. queryResourceQuotaStatusUsedRAMLimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1285. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMLimits)
  1286. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1287. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMLimitAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMLimits, end))
  1288. }
  1289. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMLimitMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMLimitMaxResult] {
  1290. const queryName = "QueryResourceQuotaStatusUsedRAMLimitMax"
  1291. const queryFmtResourceQuotaStatusUsedRAMLimits = `max(max_over_time(resourcequota_status_used_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1292. cfg := pds.promConfig
  1293. durStr := timeutil.DurationString(end.Sub(start))
  1294. if durStr == "" {
  1295. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1296. }
  1297. queryResourceQuotaStatusUsedRAMLimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1298. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMLimits)
  1299. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1300. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMLimitMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMLimits, end))
  1301. }
  1302. func (pds *PrometheusMetricsQuerier) QueryDataCoverage(limitDays int) (time.Time, time.Time, error) {
  1303. const (
  1304. queryName = "QueryDataCoverage"
  1305. queryFmtOldestSample = `min_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
  1306. queryFmtNewestSample = `max_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
  1307. )
  1308. cfg := pds.promConfig
  1309. minutesPerDuration := 60
  1310. dur := time.Duration(limitDays) * timeutil.Day
  1311. end := time.Now().UTC().Truncate(timeutil.Day).Add(timeutil.Day)
  1312. start := end.Add(-dur)
  1313. durStr := pds.durationStringFor(start, end, minutesPerDuration, false)
  1314. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1315. queryOldest := fmt.Sprintf(queryFmtOldestSample, cfg.ClusterFilter, durStr, "1h")
  1316. log.Debugf("[Prometheus][%s[Oldest]][At Time: %d]: %s", queryName, end.Unix(), queryOldest)
  1317. resOldestFut := ctx.QueryAtTime(queryOldest, end)
  1318. resOldest, err := resOldestFut.Await()
  1319. if err != nil {
  1320. return time.Time{}, time.Time{}, fmt.Errorf("querying oldest sample: %w", err)
  1321. }
  1322. if len(resOldest) == 0 || len(resOldest[0].Values) == 0 {
  1323. // If node_cpu_hourly_cost metric is not available, fallback to a reasonable time range
  1324. // This prevents CSV export from failing when the metric doesn't exist yet
  1325. log.Warnf("QueryDataCoverage: node_cpu_hourly_cost metric not available, using fallback time range")
  1326. // Use a reasonable fallback: start from 1 day ago to account for metric collection delay
  1327. fallbackEnd := time.Now().UTC().Truncate(timeutil.Day)
  1328. fallbackStart := fallbackEnd.AddDate(0, 0, -1) // 1 day ago
  1329. return fallbackStart, fallbackEnd, nil
  1330. }
  1331. oldest := time.Unix(int64(resOldest[0].Values[0].Value), 0)
  1332. queryNewest := fmt.Sprintf(queryFmtNewestSample, cfg.ClusterFilter, durStr, "1h")
  1333. log.Debugf("[Prometheus][%s[Newest]][At Time: %d]: %s", queryName, end.Unix(), queryNewest)
  1334. resNewestFut := ctx.QueryAtTime(queryNewest, end)
  1335. resNewest, err := resNewestFut.Await()
  1336. if err != nil {
  1337. return time.Time{}, time.Time{}, fmt.Errorf("querying newest sample: %w", err)
  1338. }
  1339. if len(resNewest) == 0 || len(resNewest[0].Values) == 0 {
  1340. // If newest query fails but oldest succeeded, use oldest as both start and end
  1341. // This allows CSV export to proceed with at least some time range
  1342. log.Warnf("QueryDataCoverage: newest sample query returned no results, using oldest timestamp")
  1343. return oldest, oldest, nil
  1344. }
  1345. newest := time.Unix(int64(resNewest[0].Values[0].Value), 0)
  1346. return oldest, newest, nil
  1347. }
  1348. // durationStringFor simplifies the determination of query duration based on the version of prom and if the function
  1349. // in the query needs all data points in the vector it is provided or if it will extrapolate its own. Functions
  1350. // that extrapolate will add on another resolution if given a duration that is one resolution longer than the intended
  1351. // duration.
  1352. func (pds *PrometheusMetricsQuerier) durationStringFor(start, end time.Time, minsPerResolution int, extrapolated bool) string {
  1353. dur := end.Sub(start)
  1354. // If using a version of Prometheus where the resolution needs duration offset,
  1355. // we need to apply that here.
  1356. //
  1357. // E.g. avg(node_total_hourly_cost{}) by (node, provider_id)[60m:5m] with
  1358. // time=01:00:00 will return, for a node running the entire time, 12
  1359. // timestamps where the first is 00:05:00 and the last is 01:00:00.
  1360. // However, OpenCost expects for there to be 13 timestamps where the first
  1361. // begins at 00:00:00. To achieve this, we must modify our query to
  1362. // avg(node_total_hourly_cost{}) by (node, provider_id)[65m:5m]
  1363. if pds.promConfig.IsOffsetResolution && !extrapolated {
  1364. // increase the query time by the resolution
  1365. dur = dur + (time.Duration(minsPerResolution) * time.Minute)
  1366. }
  1367. return timeutil.DurationString(dur)
  1368. }