metricsquerier.go 97 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995
  1. package prom
  2. import (
  3. "fmt"
  4. "time"
  5. "github.com/opencost/opencost/core/pkg/log"
  6. "github.com/opencost/opencost/core/pkg/source"
  7. "github.com/opencost/opencost/core/pkg/util/timeutil"
  8. prometheus "github.com/prometheus/client_golang/api"
  9. )
  10. //--------------------------------------------------------------------------
  11. // PrometheusMetricsQuerier
  12. //--------------------------------------------------------------------------
  13. // PrometheusMetricsQueryLogFormat is the log format used to log metric queries before being sent to the prometheus
  14. // instance
  15. const PrometheusMetricsQueryLogFormat = `[PrometheusMetricsQuerier][%s][At Time: %d]: %s`
  16. // PrometheusMetricsQuerier is the implementation of the data source's MetricsQuerier interface for Prometheus.
  17. type PrometheusMetricsQuerier struct {
  18. promConfig *OpenCostPrometheusConfig
  19. promClient prometheus.Client
  20. promContexts *ContextFactory
  21. }
  22. func newPrometheusMetricsQuerier(
  23. promConfig *OpenCostPrometheusConfig,
  24. promClient prometheus.Client,
  25. promContexts *ContextFactory,
  26. ) *PrometheusMetricsQuerier {
  27. return &PrometheusMetricsQuerier{
  28. promConfig: promConfig,
  29. promClient: promClient,
  30. promContexts: promContexts,
  31. }
  32. }
  33. func (pds *PrometheusMetricsQuerier) QueryPVPricePerGiBHour(start, end time.Time) *source.Future[source.PVPricePerGiBHourResult] {
  34. const queryName = "QueryPVPricePerGiBHour"
  35. const pvCostQuery = `avg(avg_over_time(pv_hourly_cost{%s}[%s])) by (%s, persistentvolume, volumename, uid, provider_id)`
  36. cfg := pds.promConfig
  37. durStr := timeutil.DurationString(end.Sub(start))
  38. if durStr == "" {
  39. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  40. }
  41. queryPVCost := fmt.Sprintf(pvCostQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  42. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCost)
  43. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  44. return source.NewFuture(source.DecodePVPricePerGiBHourResult, ctx.QueryAtTime(queryPVCost, end))
  45. }
  46. func (pds *PrometheusMetricsQuerier) QueryPVUsedAverage(start, end time.Time) *source.Future[source.PVUsedAvgResult] {
  47. const queryName = "QueryPVUsedAverage"
  48. const pvUsedAverageQuery = `avg(avg_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace, uid)`
  49. cfg := pds.promConfig
  50. durStr := timeutil.DurationString(end.Sub(start))
  51. if durStr == "" {
  52. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  53. }
  54. queryPVUsedAvg := fmt.Sprintf(pvUsedAverageQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  55. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVUsedAvg)
  56. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  57. return source.NewFuture(source.DecodePVUsedAvgResult, ctx.QueryAtTime(queryPVUsedAvg, end))
  58. }
  59. func (pds *PrometheusMetricsQuerier) QueryPVUsedMax(start, end time.Time) *source.Future[source.PVUsedMaxResult] {
  60. const queryName = "QueryPVUsedMax"
  61. const pvUsedMaxQuery = `max(max_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace, uid)`
  62. cfg := pds.promConfig
  63. durStr := timeutil.DurationString(end.Sub(start))
  64. if durStr == "" {
  65. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  66. }
  67. queryPVUsedMax := fmt.Sprintf(pvUsedMaxQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  68. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVUsedMax)
  69. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  70. return source.NewFuture(source.DecodePVUsedMaxResult, ctx.QueryAtTime(queryPVUsedMax, end))
  71. }
  72. func (pds *PrometheusMetricsQuerier) QueryPVCInfo(start, end time.Time) *source.Future[source.PVCInfoResult] {
  73. const queryName = "QueryPVCInfo"
  74. const queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != "", %s}) by (persistentvolumeclaim, storageclass, volumename, namespace, uid, %s)[%s:%dm]`
  75. cfg := pds.promConfig
  76. minsPerResolution := cfg.DataResolutionMinutes
  77. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  78. if durStr == "" {
  79. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  80. }
  81. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  82. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCInfo)
  83. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  84. return source.NewFuture(source.DecodePVCInfoResult, ctx.QueryAtTime(queryPVCInfo, end))
  85. }
  86. func (pds *PrometheusMetricsQuerier) QueryPVActiveMinutes(start, end time.Time) *source.Future[source.PVActiveMinutesResult] {
  87. const queryName = "QueryPVActiveMinutes"
  88. const pvActiveMinsQuery = `avg(kube_persistentvolume_capacity_bytes{%s}) by (%s, persistentvolume, uid)[%s:%dm]`
  89. cfg := pds.promConfig
  90. minsPerResolution := cfg.DataResolutionMinutes
  91. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  92. if durStr == "" {
  93. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  94. }
  95. queryPVActiveMins := fmt.Sprintf(pvActiveMinsQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  96. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVActiveMins)
  97. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  98. return source.NewFuture(source.DecodePVActiveMinutesResult, ctx.QueryAtTime(queryPVActiveMins, end))
  99. }
  100. func (pds *PrometheusMetricsQuerier) QueryLocalStorageCost(start, end time.Time) *source.Future[source.LocalStorageCostResult] {
  101. const queryName = "QueryLocalStorageCost"
  102. const localStorageCostQuery = `sum_over_time(sum(container_fs_limit_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`
  103. cfg := pds.promConfig
  104. minsPerResolution := cfg.DataResolutionMinutes
  105. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  106. if durStr == "" {
  107. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  108. }
  109. // hourlyToCumulative is a scaling factor that, when multiplied by an
  110. // hourly value, converts it to a cumulative value; i.e. [$/hr] *
  111. // [min/res]*[hr/min] = [$/res]
  112. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  113. costPerGBHr := 0.04 / 730.0
  114. queryLocalStorageCost := fmt.Sprintf(localStorageCostQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
  115. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageCost)
  116. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  117. return source.NewFuture(source.DecodeLocalStorageCostResult, ctx.QueryAtTime(queryLocalStorageCost, end))
  118. }
  119. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedCost(start, end time.Time) *source.Future[source.LocalStorageUsedCostResult] {
  120. const queryName = "QueryLocalStorageUsedCost"
  121. const localStorageUsedCostQuery = `sum_over_time(sum(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm]) / 1024 / 1024 / 1024 * %f * %f`
  122. cfg := pds.promConfig
  123. minsPerResolution := cfg.DataResolutionMinutes
  124. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  125. if durStr == "" {
  126. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  127. }
  128. // hourlyToCumulative is a scaling factor that, when multiplied by an
  129. // hourly value, converts it to a cumulative value; i.e. [$/hr] *
  130. // [min/res]*[hr/min] = [$/res]
  131. hourlyToCumulative := float64(minsPerResolution) * (1.0 / 60.0)
  132. costPerGBHr := 0.04 / 730.0
  133. queryLocalStorageUsedCost := fmt.Sprintf(localStorageUsedCostQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution, hourlyToCumulative, costPerGBHr)
  134. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedCost)
  135. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  136. return source.NewFuture(source.DecodeLocalStorageUsedCostResult, ctx.QueryAtTime(queryLocalStorageUsedCost, end))
  137. }
  138. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedAvg(start, end time.Time) *source.Future[source.LocalStorageUsedAvgResult] {
  139. const queryName = "QueryLocalStorageUsedAvg"
  140. const localStorageUsedAvgQuery = `avg(sum(avg_over_time(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}[%s])) by (instance, device, uid, %s, job)) by (instance, device, uid, %s)`
  141. cfg := pds.promConfig
  142. durStr := timeutil.DurationString(end.Sub(start))
  143. if durStr == "" {
  144. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  145. }
  146. queryLocalStorageUsedAvg := fmt.Sprintf(localStorageUsedAvgQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel, cfg.ClusterLabel)
  147. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedAvg)
  148. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  149. return source.NewFuture(source.DecodeLocalStorageUsedAvgResult, ctx.QueryAtTime(queryLocalStorageUsedAvg, end))
  150. }
  151. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedMax(start, end time.Time) *source.Future[source.LocalStorageUsedMaxResult] {
  152. const queryName = "QueryLocalStorageUsedMax"
  153. const localStorageUsedMaxQuery = `max(sum(max_over_time(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}[%s])) by (instance, device, uid, %s, job)) by (instance, device, uid, %s)`
  154. cfg := pds.promConfig
  155. durStr := timeutil.DurationString(end.Sub(start))
  156. if durStr == "" {
  157. panic("failed to parse duration string passed to QueryLocalStorageUsedMax")
  158. }
  159. queryLocalStorageUsedMax := fmt.Sprintf(localStorageUsedMaxQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel, cfg.ClusterLabel)
  160. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedMax)
  161. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  162. return source.NewFuture(source.DecodeLocalStorageUsedMaxResult, ctx.QueryAtTime(queryLocalStorageUsedMax, end))
  163. }
  164. func (pds *PrometheusMetricsQuerier) QueryLocalStorageBytes(start, end time.Time) *source.Future[source.LocalStorageBytesResult] {
  165. const queryName = "QueryLocalStorageBytes"
  166. const localStorageBytesQuery = `avg_over_time(sum(container_fs_limit_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm])`
  167. cfg := pds.promConfig
  168. minsPerResolution := cfg.DataResolutionMinutes
  169. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  170. if durStr == "" {
  171. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  172. }
  173. queryLocalStorageBytes := fmt.Sprintf(localStorageBytesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  174. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageBytes)
  175. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  176. return source.NewFuture(source.DecodeLocalStorageBytesResult, ctx.QueryAtTime(queryLocalStorageBytes, end))
  177. }
  178. func (pds *PrometheusMetricsQuerier) QueryLocalStorageActiveMinutes(start, end time.Time) *source.Future[source.LocalStorageActiveMinutesResult] {
  179. const queryName = "QueryLocalStorageActiveMinutes"
  180. const localStorageActiveMinutesQuery = `count(node_total_hourly_cost{%s}) by (%s, node, uid, instance, provider_id)[%s:%dm]`
  181. cfg := pds.promConfig
  182. minsPerResolution := cfg.DataResolutionMinutes
  183. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  184. if durStr == "" {
  185. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  186. }
  187. queryLocalStorageActiveMins := fmt.Sprintf(localStorageActiveMinutesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  188. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageActiveMins)
  189. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  190. return source.NewFuture(source.DecodeLocalStorageActiveMinutesResult, ctx.QueryAtTime(queryLocalStorageActiveMins, end))
  191. }
  192. func (pds *PrometheusMetricsQuerier) QueryNodeCPUCoresCapacity(start, end time.Time) *source.Future[source.NodeCPUCoresCapacityResult] {
  193. const queryName = "QueryNodeCPUCoresCapacity"
  194. const nodeCPUCoresCapacityQuery = `avg(avg_over_time(kube_node_status_capacity_cpu_cores{%s}[%s])) by (%s, node, uid)`
  195. cfg := pds.promConfig
  196. durStr := timeutil.DurationString(end.Sub(start))
  197. if durStr == "" {
  198. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  199. }
  200. queryNodeCPUCoresCapacity := fmt.Sprintf(nodeCPUCoresCapacityQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  201. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCPUCoresCapacity)
  202. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  203. return source.NewFuture(source.DecodeNodeCPUCoresCapacityResult, ctx.QueryAtTime(queryNodeCPUCoresCapacity, end))
  204. }
  205. func (pds *PrometheusMetricsQuerier) QueryNodeCPUCoresAllocatable(start, end time.Time) *source.Future[source.NodeCPUCoresAllocatableResult] {
  206. const queryName = "QueryNodeCPUCoresAllocatable"
  207. const nodeCPUCoresAllocatableQuery = `avg(avg_over_time(kube_node_status_allocatable_cpu_cores{%s}[%s])) by (%s, node, uid)`
  208. // `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  209. cfg := pds.promConfig
  210. durStr := timeutil.DurationString(end.Sub(start))
  211. if durStr == "" {
  212. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  213. }
  214. queryNodeCPUCoresAllocatable := fmt.Sprintf(nodeCPUCoresAllocatableQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  215. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCPUCoresAllocatable)
  216. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  217. return source.NewFuture(source.DecodeNodeCPUCoresAllocatableResult, ctx.QueryAtTime(queryNodeCPUCoresAllocatable, end))
  218. }
  219. func (pds *PrometheusMetricsQuerier) QueryNodeRAMBytesCapacity(start, end time.Time) *source.Future[source.NodeRAMBytesCapacityResult] {
  220. const queryName = "QueryNodeRAMBytesCapacity"
  221. const nodeRAMBytesCapacityQuery = `avg(avg_over_time(kube_node_status_capacity_memory_bytes{%s}[%s])) by (%s, node, uid)`
  222. cfg := pds.promConfig
  223. durStr := timeutil.DurationString(end.Sub(start))
  224. if durStr == "" {
  225. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  226. }
  227. queryNodeRAMBytesCapacity := fmt.Sprintf(nodeRAMBytesCapacityQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  228. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeRAMBytesCapacity)
  229. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  230. return source.NewFuture(source.DecodeNodeRAMBytesCapacityResult, ctx.QueryAtTime(queryNodeRAMBytesCapacity, end))
  231. }
  232. func (pds *PrometheusMetricsQuerier) QueryNodeRAMBytesAllocatable(start, end time.Time) *source.Future[source.NodeRAMBytesAllocatableResult] {
  233. const queryName = "QueryNodeRAMBytesAllocatable"
  234. const nodeRAMBytesAllocatableQuery = `avg(avg_over_time(kube_node_status_allocatable_memory_bytes{%s}[%s])) by (%s, node, uid)`
  235. cfg := pds.promConfig
  236. durStr := timeutil.DurationString(end.Sub(start))
  237. if durStr == "" {
  238. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  239. }
  240. queryNodeRAMBytesAllocatable := fmt.Sprintf(nodeRAMBytesAllocatableQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  241. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeRAMBytesAllocatable)
  242. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  243. return source.NewFuture(source.DecodeNodeRAMBytesAllocatableResult, ctx.QueryAtTime(queryNodeRAMBytesAllocatable, end))
  244. }
  245. func (pds *PrometheusMetricsQuerier) QueryNodeGPUCount(start, end time.Time) *source.Future[source.NodeGPUCountResult] {
  246. const queryName = "QueryNodeGPUCount"
  247. const nodeGPUCountQuery = `avg(avg_over_time(node_gpu_count{%s}[%s])) by (%s, node, uid, provider_id)`
  248. cfg := pds.promConfig
  249. durStr := timeutil.DurationString(end.Sub(start))
  250. if durStr == "" {
  251. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  252. }
  253. queryNodeGPUCount := fmt.Sprintf(nodeGPUCountQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  254. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeGPUCount)
  255. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  256. return source.NewFuture(source.DecodeNodeGPUCountResult, ctx.QueryAtTime(queryNodeGPUCount, end))
  257. }
  258. func (pds *PrometheusMetricsQuerier) QueryNodeLabels(start, end time.Time) *source.Future[source.NodeLabelsResult] {
  259. const queryName = "QueryNodeLabels"
  260. const labelsQuery = `avg_over_time(kube_node_labels{%s}[%s])`
  261. cfg := pds.promConfig
  262. durStr := timeutil.DurationString(end.Sub(start))
  263. if durStr == "" {
  264. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  265. }
  266. queryLabels := fmt.Sprintf(labelsQuery, cfg.ClusterFilter, durStr)
  267. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLabels)
  268. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  269. return source.NewFuture(source.DecodeNodeLabelsResult, ctx.QueryAtTime(queryLabels, end))
  270. }
  271. func (pds *PrometheusMetricsQuerier) QueryNodeActiveMinutes(start, end time.Time) *source.Future[source.NodeActiveMinutesResult] {
  272. const queryName = "QueryNodeActiveMinutes"
  273. const activeMinsQuery = `avg(node_total_hourly_cost{%s}) by (node, uid, %s, provider_id)[%s:%dm]`
  274. cfg := pds.promConfig
  275. minsPerResolution := cfg.DataResolutionMinutes
  276. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  277. if durStr == "" {
  278. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  279. }
  280. queryActiveMins := fmt.Sprintf(activeMinsQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  281. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryActiveMins)
  282. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  283. return source.NewFuture(source.DecodeNodeActiveMinutesResult, ctx.QueryAtTime(queryActiveMins, end))
  284. }
  285. func (pds *PrometheusMetricsQuerier) QueryNodeCPUModeTotal(start, end time.Time) *source.Future[source.NodeCPUModeTotalResult] {
  286. const queryName = "QueryNodeCPUModeTotal"
  287. const nodeCPUModeTotalQuery = `sum(rate(node_cpu_seconds_total{%s}[%s:%dm])) by (kubernetes_node, uid, %s, mode)`
  288. cfg := pds.promConfig
  289. minsPerResolution := cfg.DataResolutionMinutes
  290. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  291. if durStr == "" {
  292. panic("failed to parse duration string passed to QueryNodeCPUModeTotal")
  293. }
  294. queryCPUModeTotal := fmt.Sprintf(nodeCPUModeTotalQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  295. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUModeTotal)
  296. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  297. return source.NewFuture(source.DecodeNodeCPUModeTotalResult, ctx.QueryAtTime(queryCPUModeTotal, end))
  298. }
  299. func (pds *PrometheusMetricsQuerier) QueryNodeRAMSystemPercent(start, end time.Time) *source.Future[source.NodeRAMSystemPercentResult] {
  300. const queryName = "QueryNodeRAMSystemPercent"
  301. const nodeRAMSystemPctQuery = `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system", %s}[%s:%dm])) by (instance, uid, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, uid, %s), "instance", "$1", "node", "(.*)")) by (instance, uid, %s)`
  302. cfg := pds.promConfig
  303. minsPerResolution := cfg.DataResolutionMinutes
  304. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  305. if durStr == "" {
  306. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  307. }
  308. queryRAMSystemPct := fmt.Sprintf(nodeRAMSystemPctQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterLabel)
  309. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMSystemPct)
  310. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  311. return source.NewFuture(source.DecodeNodeRAMSystemPercentResult, ctx.QueryAtTime(queryRAMSystemPct, end))
  312. }
  313. func (pds *PrometheusMetricsQuerier) QueryNodeRAMUserPercent(start, end time.Time) *source.Future[source.NodeRAMUserPercentResult] {
  314. const queryName = "QueryNodeRAMUserPercent"
  315. const nodeRAMUserPctQuery = `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system", %s}[%s:%dm])) by (instance, uid, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, uid, %s), "instance", "$1", "node", "(.*)")) by (instance, uid, %s)`
  316. cfg := pds.promConfig
  317. minsPerResolution := cfg.DataResolutionMinutes
  318. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  319. if durStr == "" {
  320. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  321. }
  322. queryRAMUserPct := fmt.Sprintf(nodeRAMUserPctQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterLabel)
  323. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUserPct)
  324. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  325. return source.NewFuture(source.DecodeNodeRAMUserPercentResult, ctx.QueryAtTime(queryRAMUserPct, end))
  326. }
  327. func (pds *PrometheusMetricsQuerier) QueryLBPricePerHr(start, end time.Time) *source.Future[source.LBPricePerHrResult] {
  328. const queryName = "QueryLBPricePerHr"
  329. const queryFmtLBCostPerHr = `avg(avg_over_time(kubecost_load_balancer_cost{%s}[%s])) by (namespace, service_name, ingress_ip, uid, %s)`
  330. cfg := pds.promConfig
  331. durStr := timeutil.DurationString(end.Sub(start))
  332. if durStr == "" {
  333. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  334. }
  335. queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  336. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLBCostPerHr)
  337. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  338. return source.NewFuture(source.DecodeLBPricePerHrResult, ctx.QueryAtTime(queryLBCostPerHr, end))
  339. }
  340. func (pds *PrometheusMetricsQuerier) QueryLBActiveMinutes(start, end time.Time) *source.Future[source.LBActiveMinutesResult] {
  341. const queryName = "QueryLBActiveMinutes"
  342. const lbActiveMinutesQuery = `avg(kubecost_load_balancer_cost{%s}) by (namespace, service_name, uid, %s, ingress_ip)[%s:%dm]`
  343. cfg := pds.promConfig
  344. minsPerResolution := cfg.DataResolutionMinutes
  345. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  346. if durStr == "" {
  347. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  348. }
  349. queryLBActiveMins := fmt.Sprintf(lbActiveMinutesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  350. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLBActiveMins)
  351. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  352. return source.NewFuture(source.DecodeLBActiveMinutesResult, ctx.QueryAtTime(queryLBActiveMins, end))
  353. }
  354. // Note: cluster_info is not currently emitted
  355. func (pds *PrometheusMetricsQuerier) QueryClusterUptime(start, end time.Time) *source.Future[source.UptimeResult] {
  356. const queryName = "QueryClusterUptime"
  357. const queryFmtClusterUptime = `avg(cluster_info{%s}) by (%s, uid)[%s:%dm]`
  358. cfg := pds.promConfig
  359. minsPerResolution := cfg.DataResolutionMinutes
  360. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  361. if durStr == "" {
  362. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  363. }
  364. queryClusterUptime := fmt.Sprintf(queryFmtClusterUptime, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  365. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryFmtClusterUptime)
  366. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  367. return source.NewFuture(source.DecodeUptimeResult, ctx.QueryAtTime(queryClusterUptime, end))
  368. }
  369. func (pds *PrometheusMetricsQuerier) QueryClusterManagementDuration(start, end time.Time) *source.Future[source.ClusterManagementDurationResult] {
  370. const queryName = "QueryClusterManagementDuration"
  371. const clusterManagementDurationQuery = `avg(kubecost_cluster_management_cost{%s}) by (%s, provisioner_name)[%s:%dm]`
  372. cfg := pds.promConfig
  373. minsPerResolution := cfg.DataResolutionMinutes
  374. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  375. if durStr == "" {
  376. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  377. }
  378. queryClusterManagementDuration := fmt.Sprintf(clusterManagementDurationQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  379. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryClusterManagementDuration)
  380. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  381. return source.NewFuture(source.DecodeClusterManagementDurationResult, ctx.QueryAtTime(queryClusterManagementDuration, end))
  382. }
  383. func (pds *PrometheusMetricsQuerier) QueryClusterManagementPricePerHr(start, end time.Time) *source.Future[source.ClusterManagementPricePerHrResult] {
  384. const queryName = "QueryClusterManagementPricePerHr"
  385. const clusterManagementCostQuery = `avg(avg_over_time(kubecost_cluster_management_cost{%s}[%s])) by (%s, provisioner_name)`
  386. cfg := pds.promConfig
  387. durStr := timeutil.DurationString(end.Sub(start))
  388. if durStr == "" {
  389. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  390. }
  391. queryClusterManagementCost := fmt.Sprintf(clusterManagementCostQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  392. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryClusterManagementCost)
  393. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  394. return source.NewFuture(source.DecodeClusterManagementPricePerHrResult, ctx.QueryAtTime(queryClusterManagementCost, end))
  395. }
  396. // AllocationMetricQuerier
  397. func (pds *PrometheusMetricsQuerier) QueryPods(start, end time.Time) *source.Future[source.PodsResult] {
  398. const queryName = "QueryPods"
  399. const queryFmtPods = `avg(kube_pod_container_status_running{%s} != 0) by (pod, namespace, uid, %s)[%s:%dm]`
  400. cfg := pds.promConfig
  401. minsPerResolution := cfg.DataResolutionMinutes
  402. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  403. if durStr == "" {
  404. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  405. }
  406. queryPods := fmt.Sprintf(queryFmtPods, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  407. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPods)
  408. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  409. return source.NewFuture(source.DecodePodsResult, ctx.QueryAtTime(queryPods, end))
  410. }
  411. func (pds *PrometheusMetricsQuerier) QueryPodsUID(start, end time.Time) *source.Future[source.PodsResult] {
  412. const queryName = "QueryPodsUID"
  413. const queryFmtPodsUID = `avg(kube_pod_container_status_running{%s} != 0) by (pod, namespace, uid, %s)[%s:%dm]`
  414. cfg := pds.promConfig
  415. minsPerResolution := cfg.DataResolutionMinutes
  416. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  417. if durStr == "" {
  418. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  419. }
  420. queryPodsUID := fmt.Sprintf(queryFmtPodsUID, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  421. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodsUID)
  422. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  423. return source.NewFuture(source.DecodePodsResult, ctx.QueryAtTime(queryPodsUID, end))
  424. }
  425. func (pds *PrometheusMetricsQuerier) QueryRAMBytesAllocated(start, end time.Time) *source.Future[source.RAMBytesAllocatedResult] {
  426. const queryName = "QueryRAMBytesAllocated"
  427. const queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s, provider_id)`
  428. cfg := pds.promConfig
  429. durStr := timeutil.DurationString(end.Sub(start))
  430. if durStr == "" {
  431. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  432. }
  433. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  434. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMBytesAllocated)
  435. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  436. return source.NewFuture(source.DecodeRAMBytesAllocatedResult, ctx.QueryAtTime(queryRAMBytesAllocated, end))
  437. }
  438. func (pds *PrometheusMetricsQuerier) QueryRAMRequests(start, end time.Time) *source.Future[source.RAMRequestsResult] {
  439. const queryName = "QueryRAMRequests"
  440. const queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  441. cfg := pds.promConfig
  442. durStr := timeutil.DurationString(end.Sub(start))
  443. if durStr == "" {
  444. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  445. }
  446. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  447. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMRequests)
  448. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  449. return source.NewFuture(source.DecodeRAMRequestsResult, ctx.QueryAtTime(queryRAMRequests, end))
  450. }
  451. func (pds *PrometheusMetricsQuerier) QueryRAMLimits(start, end time.Time) *source.Future[source.RAMLimitsResult] {
  452. const queryName = "QueryRAMLimits"
  453. const queryFmtRAMLimits = `avg(avg_over_time(kube_pod_container_resource_limits{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  454. cfg := pds.promConfig
  455. durStr := timeutil.DurationString(end.Sub(start))
  456. if durStr == "" {
  457. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  458. }
  459. queryRAMLimits := fmt.Sprintf(queryFmtRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  460. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMLimits)
  461. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  462. return source.NewFuture(source.DecodeRAMLimitsResult, ctx.QueryAtTime(queryRAMLimits, end))
  463. }
  464. func (pds *PrometheusMetricsQuerier) QueryRAMUsageAvg(start, end time.Time) *source.Future[source.RAMUsageAvgResult] {
  465. const queryName = "QueryRAMUsageAvg"
  466. const queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  467. cfg := pds.promConfig
  468. durStr := timeutil.DurationString(end.Sub(start))
  469. if durStr == "" {
  470. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  471. }
  472. queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  473. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUsageAvg)
  474. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  475. return source.NewFuture(source.DecodeRAMUsageAvgResult, ctx.QueryAtTime(queryRAMUsageAvg, end))
  476. }
  477. func (pds *PrometheusMetricsQuerier) QueryRAMUsageMax(start, end time.Time) *source.Future[source.RAMUsageMaxResult] {
  478. const queryName = "QueryRAMUsageMax"
  479. const queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  480. cfg := pds.promConfig
  481. durStr := timeutil.DurationString(end.Sub(start))
  482. if durStr == "" {
  483. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  484. }
  485. queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  486. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUsageMax)
  487. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  488. return source.NewFuture(source.DecodeRAMUsageMaxResult, ctx.QueryAtTime(queryRAMUsageMax, end))
  489. }
  490. func (pds *PrometheusMetricsQuerier) QueryCPUCoresAllocated(start, end time.Time) *source.Future[source.CPUCoresAllocatedResult] {
  491. const queryName = "QueryCPUCoresAllocated"
  492. const queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  493. cfg := pds.promConfig
  494. durStr := timeutil.DurationString(end.Sub(start))
  495. if durStr == "" {
  496. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  497. }
  498. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  499. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUCoresAllocated)
  500. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  501. return source.NewFuture(source.DecodeCPUCoresAllocatedResult, ctx.QueryAtTime(queryCPUCoresAllocated, end))
  502. }
  503. func (pds *PrometheusMetricsQuerier) QueryCPURequests(start, end time.Time) *source.Future[source.CPURequestsResult] {
  504. const queryName = "QueryCPURequests"
  505. const queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  506. cfg := pds.promConfig
  507. durStr := timeutil.DurationString(end.Sub(start))
  508. if durStr == "" {
  509. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  510. }
  511. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  512. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPURequests)
  513. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  514. return source.NewFuture(source.DecodeCPURequestsResult, ctx.QueryAtTime(queryCPURequests, end))
  515. }
  516. func (pds *PrometheusMetricsQuerier) QueryCPULimits(start, end time.Time) *source.Future[source.CPULimitsResult] {
  517. const queryName = "QueryCPULimits"
  518. const queryFmtCPULimits = `avg(avg_over_time(kube_pod_container_resource_limits{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  519. cfg := pds.promConfig
  520. durStr := timeutil.DurationString(end.Sub(start))
  521. if durStr == "" {
  522. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  523. }
  524. queryCPULimits := fmt.Sprintf(queryFmtCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  525. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPULimits)
  526. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  527. return source.NewFuture(source.DecodeCPULimitsResult, ctx.QueryAtTime(queryCPULimits, end))
  528. }
  529. func (pds *PrometheusMetricsQuerier) QueryCPUUsageAvg(start, end time.Time) *source.Future[source.CPUUsageAvgResult] {
  530. const queryName = "QueryCPUUsageAvg"
  531. const queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  532. cfg := pds.promConfig
  533. durStr := timeutil.DurationString(end.Sub(start))
  534. if durStr == "" {
  535. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  536. }
  537. queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  538. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageAvg)
  539. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  540. return source.NewFuture(source.DecodeCPUUsageAvgResult, ctx.QueryAtTime(queryCPUUsageAvg, end))
  541. }
  542. func (pds *PrometheusMetricsQuerier) QueryCPUUsageMax(start, end time.Time) *source.Future[source.CPUUsageMaxResult] {
  543. const queryName = "QueryCPUUsageMax"
  544. // Because we use container_cpu_usage_seconds_total to calculate CPU usage
  545. // at any given "instant" of time, we need to use an irate or rate. To then
  546. // calculate a max (or any aggregation) we have to perform an aggregation
  547. // query on top of an instant-by-instant maximum. Prometheus supports this
  548. // type of query with a "subquery" [1], however it is reportedly expensive
  549. // to make such a query. By default, Kubecost's Prometheus config includes
  550. // a recording rule that keeps track of the instant-by-instant irate for CPU
  551. // usage. The metric in this query is created by that recording rule.
  552. //
  553. // [1] https://prometheus.io/blog/2019/01/28/subquery-support/
  554. //
  555. // If changing the name of the recording rule, make sure to update the
  556. // corresponding diagnostic query to avoid confusion.
  557. const queryFmtCPUUsageMaxRecordingRule = `max(max_over_time(kubecost_container_cpu_usage_irate{%s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  558. // This is the subquery equivalent of the above recording rule query. It is
  559. // more expensive, but does not require the recording rule. It should be
  560. // used as a fallback query if the recording rule data does not exist.
  561. //
  562. // The parameter after the colon [:<thisone>] in the subquery affects the
  563. // resolution of the subquery.
  564. // The parameter after the metric ...{}[<thisone>] should be set to 2x
  565. // the resolution, to make sure the irate always has two points to query
  566. // in case the Prom scrape duration has been reduced to be equal to the
  567. // query resolution.
  568. const queryFmtCPUUsageMaxSubquery = `max(max_over_time(irate(container_cpu_usage_seconds_total{container!="POD", container!="", %s}[%dm])[%s:%dm])) by (container, pod_name, pod, namespace, node, instance, uid, %s)`
  569. cfg := pds.promConfig
  570. durStr := timeutil.DurationString(end.Sub(start))
  571. if durStr == "" {
  572. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  573. }
  574. queryCPUUsageMaxRecordingRule := fmt.Sprintf(queryFmtCPUUsageMaxRecordingRule, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  575. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageMaxRecordingRule)
  576. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  577. resCPUUsageMaxRR := ctx.QueryAtTime(queryCPUUsageMaxRecordingRule, end)
  578. resCPUUsageMax, _ := resCPUUsageMaxRR.Await()
  579. if len(resCPUUsageMax) > 0 {
  580. return source.NewFutureFrom(source.DecodeAll(resCPUUsageMax, source.DecodeCPUUsageMaxResult))
  581. }
  582. minsPerResolution := cfg.DataResolutionMinutes
  583. durStr = pds.durationStringFor(start, end, minsPerResolution, false)
  584. if durStr == "" {
  585. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  586. }
  587. queryCPUUsageMaxSubquery := fmt.Sprintf(queryFmtCPUUsageMaxSubquery, cfg.ClusterFilter, 2*minsPerResolution, durStr, minsPerResolution, cfg.ClusterLabel)
  588. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageMaxSubquery)
  589. return source.NewFuture(source.DecodeCPUUsageMaxResult, ctx.QueryAtTime(queryCPUUsageMaxSubquery, end))
  590. }
  591. func (pds *PrometheusMetricsQuerier) QueryGPUsRequested(start, end time.Time) *source.Future[source.GPUsRequestedResult] {
  592. const queryName = "QueryGPUsRequested"
  593. const queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  594. cfg := pds.promConfig
  595. durStr := timeutil.DurationString(end.Sub(start))
  596. if durStr == "" {
  597. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  598. }
  599. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  600. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsRequested)
  601. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  602. return source.NewFuture(source.DecodeGPUsRequestedResult, ctx.QueryAtTime(queryGPUsRequested, end))
  603. }
  604. func (pds *PrometheusMetricsQuerier) QueryGPUsUsageAvg(start, end time.Time) *source.Future[source.GPUsUsageAvgResult] {
  605. const queryName = "QueryGPUsUsageAvg"
  606. const queryFmtGPUsUsageAvg = `avg(avg_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, uid, %s)`
  607. cfg := pds.promConfig
  608. durStr := timeutil.DurationString(end.Sub(start))
  609. if durStr == "" {
  610. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  611. }
  612. queryGPUsUsageAvg := fmt.Sprintf(queryFmtGPUsUsageAvg, durStr, cfg.ClusterLabel)
  613. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsUsageAvg)
  614. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  615. return source.NewFuture(source.DecodeGPUsUsageAvgResult, ctx.QueryAtTime(queryGPUsUsageAvg, end))
  616. }
  617. func (pds *PrometheusMetricsQuerier) QueryGPUsUsageMax(start, end time.Time) *source.Future[source.GPUsUsageMaxResult] {
  618. const queryName = "QueryGPUsUsageMax"
  619. const queryFmtGPUsUsageMax = `max(max_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, uid, %s)`
  620. cfg := pds.promConfig
  621. durStr := timeutil.DurationString(end.Sub(start))
  622. if durStr == "" {
  623. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  624. }
  625. queryGPUsUsageMax := fmt.Sprintf(queryFmtGPUsUsageMax, durStr, cfg.ClusterLabel)
  626. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsUsageMax)
  627. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  628. return source.NewFuture(source.DecodeGPUsUsageMaxResult, ctx.QueryAtTime(queryGPUsUsageMax, end))
  629. }
  630. func (pds *PrometheusMetricsQuerier) QueryGPUsAllocated(start, end time.Time) *source.Future[source.GPUsAllocatedResult] {
  631. const queryName = "QueryGPUsAllocated"
  632. const queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  633. cfg := pds.promConfig
  634. durStr := timeutil.DurationString(end.Sub(start))
  635. if durStr == "" {
  636. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  637. }
  638. queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  639. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsAllocated)
  640. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  641. return source.NewFuture(source.DecodeGPUsAllocatedResult, ctx.QueryAtTime(queryGPUsAllocated, end))
  642. }
  643. func (pds *PrometheusMetricsQuerier) QueryIsGPUShared(start, end time.Time) *source.Future[source.IsGPUSharedResult] {
  644. const queryName = "QueryIsGPUShared"
  645. const queryFmtIsGPUShared = `avg(avg_over_time(kube_pod_container_resource_requests{container!="", node != "", pod != "", container!= "", unit = "integer", %s}[%s])) by (container, pod, namespace, node, resource, uid, %s)`
  646. cfg := pds.promConfig
  647. durStr := timeutil.DurationString(end.Sub(start))
  648. if durStr == "" {
  649. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  650. }
  651. queryIsGPUShared := fmt.Sprintf(queryFmtIsGPUShared, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  652. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryIsGPUShared)
  653. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  654. return source.NewFuture(source.DecodeIsGPUSharedResult, ctx.QueryAtTime(queryIsGPUShared, end))
  655. }
  656. func (pds *PrometheusMetricsQuerier) QueryGPUInfo(start, end time.Time) *source.Future[source.GPUInfoResult] {
  657. const queryName = "QueryGPUInfo"
  658. const queryFmtGetGPUInfo = `avg(avg_over_time(DCGM_FI_DEV_DEC_UTIL{container!="",%s}[%s])) by (container, pod, namespace, device, modelName, UUID, uid, %s)`
  659. cfg := pds.promConfig
  660. durStr := timeutil.DurationString(end.Sub(start))
  661. if durStr == "" {
  662. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  663. }
  664. queryGetGPUInfo := fmt.Sprintf(queryFmtGetGPUInfo, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  665. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGetGPUInfo)
  666. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  667. return source.NewFuture(source.DecodeGPUInfoResult, ctx.QueryAtTime(queryGetGPUInfo, end))
  668. }
  669. func (pds *PrometheusMetricsQuerier) QueryNodeCPUPricePerHr(start, end time.Time) *source.Future[source.NodeCPUPricePerHrResult] {
  670. const queryName = "QueryNodeCPUPricePerHr"
  671. const queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  672. cfg := pds.promConfig
  673. durStr := timeutil.DurationString(end.Sub(start))
  674. if durStr == "" {
  675. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  676. }
  677. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  678. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerCPUHr)
  679. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  680. return source.NewFuture(source.DecodeNodeCPUPricePerHrResult, ctx.QueryAtTime(queryNodeCostPerCPUHr, end))
  681. }
  682. func (pds *PrometheusMetricsQuerier) QueryNodeRAMPricePerGiBHr(start, end time.Time) *source.Future[source.NodeRAMPricePerGiBHrResult] {
  683. const queryName = "QueryNodeRAMPricePerGiBHr"
  684. const queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  685. cfg := pds.promConfig
  686. durStr := timeutil.DurationString(end.Sub(start))
  687. if durStr == "" {
  688. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  689. }
  690. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  691. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerRAMGiBHr)
  692. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  693. return source.NewFuture(source.DecodeNodeRAMPricePerGiBHrResult, ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end))
  694. }
  695. func (pds *PrometheusMetricsQuerier) QueryNodeGPUPricePerHr(start, end time.Time) *source.Future[source.NodeGPUPricePerHrResult] {
  696. const queryName = "QueryNodeGPUPricePerHr"
  697. const queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  698. cfg := pds.promConfig
  699. durStr := timeutil.DurationString(end.Sub(start))
  700. if durStr == "" {
  701. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  702. }
  703. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  704. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerGPUHr)
  705. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  706. return source.NewFuture(source.DecodeNodeGPUPricePerHrResult, ctx.QueryAtTime(queryNodeCostPerGPUHr, end))
  707. }
  708. func (pds *PrometheusMetricsQuerier) QueryNodeIsSpot(start, end time.Time) *source.Future[source.NodeIsSpotResult] {
  709. const queryName = "QueryNodeIsSpot"
  710. const queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot{%s}[%s])`
  711. cfg := pds.promConfig
  712. durStr := timeutil.DurationString(end.Sub(start))
  713. if durStr == "" {
  714. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  715. }
  716. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, cfg.ClusterFilter, durStr)
  717. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeIsSpot)
  718. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  719. return source.NewFuture(source.DecodeNodeIsSpotResult, ctx.QueryAtTime(queryNodeIsSpot, end))
  720. }
  721. func (pds *PrometheusMetricsQuerier) QueryPodPVCAllocation(start, end time.Time) *source.Future[source.PodPVCAllocationResult] {
  722. const queryName = "QueryPodPVCAllocation"
  723. const queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation{%s}[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, uid, %s)`
  724. cfg := pds.promConfig
  725. durStr := timeutil.DurationString(end.Sub(start))
  726. if durStr == "" {
  727. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  728. }
  729. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  730. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodPVCAllocation)
  731. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  732. return source.NewFuture(source.DecodePodPVCAllocationResult, ctx.QueryAtTime(queryPodPVCAllocation, end))
  733. }
  734. func (pds *PrometheusMetricsQuerier) QueryPVCBytesRequested(start, end time.Time) *source.Future[source.PVCBytesRequestedResult] {
  735. const queryName = "QueryPVCBytesRequested"
  736. const queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{%s}[%s])) by (persistentvolumeclaim, namespace, uid, %s)`
  737. cfg := pds.promConfig
  738. durStr := timeutil.DurationString(end.Sub(start))
  739. if durStr == "" {
  740. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  741. }
  742. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  743. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCBytesRequested)
  744. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  745. return source.NewFuture(source.DecodePVCBytesRequestedResult, ctx.QueryAtTime(queryPVCBytesRequested, end))
  746. }
  747. func (pds *PrometheusMetricsQuerier) QueryPVBytes(start, end time.Time) *source.Future[source.PVBytesResult] {
  748. const queryName = "QueryPVBytes"
  749. const queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[%s])) by (persistentvolume, uid, %s)`
  750. cfg := pds.promConfig
  751. durStr := timeutil.DurationString(end.Sub(start))
  752. if durStr == "" {
  753. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  754. }
  755. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  756. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVBytes)
  757. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  758. return source.NewFuture(source.DecodePVBytesResult, ctx.QueryAtTime(queryPVBytes, end))
  759. }
  760. func (pds *PrometheusMetricsQuerier) QueryPVInfo(start, end time.Time) *source.Future[source.PVInfoResult] {
  761. const queryName = "QueryPVInfo"
  762. const queryFmtPVMeta = `avg(avg_over_time(kubecost_pv_info{%s}[%s])) by (%s, storageclass, persistentvolume, uid, provider_id)`
  763. cfg := pds.promConfig
  764. durStr := timeutil.DurationString(end.Sub(start))
  765. if durStr == "" {
  766. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  767. }
  768. queryPVMeta := fmt.Sprintf(queryFmtPVMeta, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  769. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVMeta)
  770. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  771. return source.NewFuture(source.DecodePVInfoResult, ctx.QueryAtTime(queryPVMeta, end))
  772. }
  773. func (pds *PrometheusMetricsQuerier) QueryNetZoneGiB(start, end time.Time) *source.Future[source.NetZoneGiBResult] {
  774. const queryName = "QueryNetZoneGiB"
  775. const queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", same_zone="false", same_region="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  776. cfg := pds.promConfig
  777. minsPerResolution := cfg.DataResolutionMinutes
  778. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  779. if durStr == "" {
  780. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  781. }
  782. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  783. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneGiB)
  784. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  785. return source.NewFuture(source.DecodeNetZoneGiBResult, ctx.QueryAtTime(queryNetZoneGiB, end))
  786. }
  787. func (pds *PrometheusMetricsQuerier) QueryNetZonePricePerGiB(start, end time.Time) *source.Future[source.NetZonePricePerGiBResult] {
  788. const queryName = "QueryNetZonePricePerGiB"
  789. const queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{%s}[%s])) by (%s)`
  790. cfg := pds.promConfig
  791. durStr := timeutil.DurationString(end.Sub(start))
  792. if durStr == "" {
  793. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  794. }
  795. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  796. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneCostPerGiB)
  797. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  798. return source.NewFuture(source.DecodeNetZonePricePerGiBResult, ctx.QueryAtTime(queryNetZoneCostPerGiB, end))
  799. }
  800. func (pds *PrometheusMetricsQuerier) QueryNetRegionGiB(start, end time.Time) *source.Future[source.NetRegionGiBResult] {
  801. const queryName = "QueryNetRegionGiB"
  802. const queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", same_zone="false", same_region="false", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  803. cfg := pds.promConfig
  804. minsPerResolution := cfg.DataResolutionMinutes
  805. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  806. if durStr == "" {
  807. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  808. }
  809. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  810. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionGiB)
  811. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  812. return source.NewFuture(source.DecodeNetRegionGiBResult, ctx.QueryAtTime(queryNetRegionGiB, end))
  813. }
  814. func (pds *PrometheusMetricsQuerier) QueryNetRegionPricePerGiB(start, end time.Time) *source.Future[source.NetRegionPricePerGiBResult] {
  815. const queryName = "QueryNetRegionPricePerGiB"
  816. const queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{%s}[%s])) by (%s)`
  817. cfg := pds.promConfig
  818. durStr := timeutil.DurationString(end.Sub(start))
  819. if durStr == "" {
  820. panic("failed to parse duration string passed to QueryNetRegionPricePerGiB")
  821. }
  822. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  823. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionCostPerGiB)
  824. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  825. return source.NewFuture(source.DecodeNetRegionPricePerGiBResult, ctx.QueryAtTime(queryNetRegionCostPerGiB, end))
  826. }
  827. func (pds *PrometheusMetricsQuerier) QueryNetInternetGiB(start, end time.Time) *source.Future[source.NetInternetGiBResult] {
  828. const queryName = "QueryNetInternetGiB"
  829. const queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  830. cfg := pds.promConfig
  831. minsPerResolution := cfg.DataResolutionMinutes
  832. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  833. if durStr == "" {
  834. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  835. }
  836. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  837. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetGiB)
  838. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  839. return source.NewFuture(source.DecodeNetInternetGiBResult, ctx.QueryAtTime(queryNetInternetGiB, end))
  840. }
  841. func (pds *PrometheusMetricsQuerier) QueryNetInternetPricePerGiB(start, end time.Time) *source.Future[source.NetInternetPricePerGiBResult] {
  842. const queryName = "QueryNetInternetPricePerGiB"
  843. const queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{%s}[%s])) by (%s)`
  844. cfg := pds.promConfig
  845. durStr := timeutil.DurationString(end.Sub(start))
  846. if durStr == "" {
  847. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  848. }
  849. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  850. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetCostPerGiB)
  851. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  852. return source.NewFuture(source.DecodeNetInternetPricePerGiBResult, ctx.QueryAtTime(queryNetInternetCostPerGiB, end))
  853. }
  854. func (pds *PrometheusMetricsQuerier) QueryNetInternetServiceGiB(start, end time.Time) *source.Future[source.NetInternetServiceGiBResult] {
  855. const queryName = "QueryNetInternetServiceGiB"
  856. const queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  857. cfg := pds.promConfig
  858. minsPerResolution := cfg.DataResolutionMinutes
  859. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  860. if durStr == "" {
  861. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  862. }
  863. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  864. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetGiB)
  865. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  866. return source.NewFuture(source.DecodeNetInternetServiceGiBResult, ctx.QueryAtTime(queryNetInternetGiB, end))
  867. }
  868. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayPricePerGiB(start, end time.Time) *source.Future[source.NetNatGatewayPricePerGiBResult] {
  869. const queryName = "QueryNetNatGatewayPricePerGiB"
  870. const queryFmtNetNatGatewayPricePerGiB = `avg(avg_over_time(kubecost_network_nat_gateway_egress_cost{%s}[%s])) by (%s)`
  871. cfg := pds.promConfig
  872. durStr := timeutil.DurationString(end.Sub(start))
  873. if durStr == "" {
  874. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  875. }
  876. queryNetNatGatewayPricePerGiB := fmt.Sprintf(queryFmtNetNatGatewayPricePerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  877. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayPricePerGiB)
  878. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  879. return source.NewFuture(source.DecodeNetNatGatewayPricePerGiBResult, ctx.QueryAtTime(queryNetNatGatewayPricePerGiB, end))
  880. }
  881. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayGiB(start, end time.Time) *source.Future[source.NetNatGatewayGiBResult] {
  882. const queryName = "QueryNetNatGatewayGiB"
  883. const queryFmtNetNatGatewayGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{nat_gateway="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  884. cfg := pds.promConfig
  885. minsPerResolution := cfg.DataResolutionMinutes
  886. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  887. if durStr == "" {
  888. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  889. }
  890. queryNetNatGatewayGiB := fmt.Sprintf(queryFmtNetNatGatewayGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  891. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayGiB)
  892. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  893. return source.NewFuture(source.DecodeNetNatGatewayGiBResult, ctx.QueryAtTime(queryNetNatGatewayGiB, end))
  894. }
  895. func (pds *PrometheusMetricsQuerier) QueryNetTransferBytes(start, end time.Time) *source.Future[source.NetTransferBytesResult] {
  896. const queryName = "QueryNetTransferBytes"
  897. const queryFmtNetTransferBytes = `sum(increase(container_network_transmit_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, uid, %s)`
  898. cfg := pds.promConfig
  899. minsPerResolution := cfg.DataResolutionMinutes
  900. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  901. if durStr == "" {
  902. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  903. }
  904. queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  905. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetTransferBytes)
  906. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  907. return source.NewFuture(source.DecodeNetTransferBytesResult, ctx.QueryAtTime(queryNetTransferBytes, end))
  908. }
  909. func (pds *PrometheusMetricsQuerier) QueryNetZoneIngressGiB(start, end time.Time) *source.Future[source.NetZoneIngressGiBResult] {
  910. const queryName = "QueryNetZoneIngressGiB"
  911. const queryFmtIngNetZoneGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="false", same_zone="false", same_region="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  912. cfg := pds.promConfig
  913. minsPerResolution := cfg.DataResolutionMinutes
  914. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  915. if durStr == "" {
  916. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  917. }
  918. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtIngNetZoneGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  919. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneCostPerGiB)
  920. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  921. return source.NewFuture(source.DecodeNetZoneIngressGiBResult, ctx.QueryAtTime(queryNetZoneCostPerGiB, end))
  922. }
  923. func (pds *PrometheusMetricsQuerier) QueryNetRegionIngressGiB(start, end time.Time) *source.Future[source.NetRegionIngressGiBResult] {
  924. const queryName = "QueryNetRegionIngressGiB"
  925. const queryFmtIngNetRegionGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="false", same_zone="false", same_region="false", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  926. cfg := pds.promConfig
  927. minsPerResolution := cfg.DataResolutionMinutes
  928. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  929. if durStr == "" {
  930. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  931. }
  932. queryNetRegionIngGiB := fmt.Sprintf(queryFmtIngNetRegionGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  933. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionIngGiB)
  934. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  935. return source.NewFuture(source.DecodeNetRegionIngressGiBResult, ctx.QueryAtTime(queryNetRegionIngGiB, end))
  936. }
  937. func (pds *PrometheusMetricsQuerier) QueryNetInternetIngressGiB(start, end time.Time) *source.Future[source.NetInternetIngressGiBResult] {
  938. const queryName = "QueryNetInternetIngressGiB"
  939. const queryFmtNetIngInternetGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  940. cfg := pds.promConfig
  941. minsPerResolution := cfg.DataResolutionMinutes
  942. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  943. if durStr == "" {
  944. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  945. }
  946. queryNetIngInternetGiB := fmt.Sprintf(queryFmtNetIngInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  947. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetIngInternetGiB)
  948. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  949. return source.NewFuture(source.DecodeNetInternetIngressGiBResult, ctx.QueryAtTime(queryNetIngInternetGiB, end))
  950. }
  951. func (pds *PrometheusMetricsQuerier) QueryNetInternetServiceIngressGiB(start, end time.Time) *source.Future[source.NetInternetServiceIngressGiBResult] {
  952. const queryName = "QueryNetInternetServiceIngressGiB"
  953. const queryFmtIngNetInternetGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  954. cfg := pds.promConfig
  955. minsPerResolution := cfg.DataResolutionMinutes
  956. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  957. if durStr == "" {
  958. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  959. }
  960. queryNetIngInternetGiB := fmt.Sprintf(queryFmtIngNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  961. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetIngInternetGiB)
  962. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  963. return source.NewFuture(source.DecodeNetInternetServiceIngressGiBResult, ctx.QueryAtTime(queryNetIngInternetGiB, end))
  964. }
  965. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayIngressPricePerGiB(start, end time.Time) *source.Future[source.NetNatGatewayPricePerGiBResult] {
  966. const queryName = "QueryNetNatGatewayIngressPricePerGiB"
  967. const queryFmtNetNatGatewayIngressPricePerGiB = `avg(avg_over_time(kubecost_network_nat_gateway_ingress_cost{%s}[%s])) by (%s)`
  968. cfg := pds.promConfig
  969. durStr := timeutil.DurationString(end.Sub(start))
  970. if durStr == "" {
  971. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  972. }
  973. queryNetNatGatewayIngressPricePerGiB := fmt.Sprintf(queryFmtNetNatGatewayIngressPricePerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  974. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayIngressPricePerGiB)
  975. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  976. return source.NewFuture(source.DecodeNetNatGatewayPricePerGiBResult, ctx.QueryAtTime(queryNetNatGatewayIngressPricePerGiB, end))
  977. }
  978. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayIngressGiB(start, end time.Time) *source.Future[source.NetNatGatewayIngressGiBResult] {
  979. const queryName = "QueryNetNatGatewayIngressGiB"
  980. const queryFmtNetNatGatewayIngressGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{nat_gateway="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  981. cfg := pds.promConfig
  982. minsPerResolution := cfg.DataResolutionMinutes
  983. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  984. if durStr == "" {
  985. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  986. }
  987. queryNetNatGatewayIngressGiB := fmt.Sprintf(queryFmtNetNatGatewayIngressGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  988. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayIngressGiB)
  989. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  990. return source.NewFuture(source.DecodeNetNatGatewayIngressGiBResult, ctx.QueryAtTime(queryNetNatGatewayIngressGiB, end))
  991. }
  992. func (pds *PrometheusMetricsQuerier) QueryNetReceiveBytes(start, end time.Time) *source.Future[source.NetReceiveBytesResult] {
  993. const queryName = "QueryNetReceiveBytes"
  994. const queryFmtNetReceiveBytes = `sum(increase(container_network_receive_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, uid, %s)`
  995. cfg := pds.promConfig
  996. minsPerResolution := cfg.DataResolutionMinutes
  997. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  998. if durStr == "" {
  999. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1000. }
  1001. queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  1002. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetReceiveBytes)
  1003. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1004. return source.NewFuture(source.DecodeNetReceiveBytesResult, ctx.QueryAtTime(queryNetReceiveBytes, end))
  1005. }
  1006. // Note: namespace_info is not currently emitted
  1007. func (pds *PrometheusMetricsQuerier) QueryNamespaceUptime(start, end time.Time) *source.Future[source.UptimeResult] {
  1008. const queryName = "QueryNamespaceUptime"
  1009. const queryFmtNamespaceUptime = `avg(namespace_info{%s}) by (%s, uid)[%s:%dm]`
  1010. cfg := pds.promConfig
  1011. minsPerResolution := cfg.DataResolutionMinutes
  1012. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  1013. if durStr == "" {
  1014. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1015. }
  1016. queryNamespaceUptime := fmt.Sprintf(queryFmtNamespaceUptime, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  1017. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryFmtNamespaceUptime)
  1018. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1019. return source.NewFuture(source.DecodeUptimeResult, ctx.QueryAtTime(queryNamespaceUptime, end))
  1020. }
  1021. func (pds *PrometheusMetricsQuerier) QueryNamespaceLabels(start, end time.Time) *source.Future[source.NamespaceLabelsResult] {
  1022. const queryName = "QueryNamespaceLabels"
  1023. const queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels{%s}[%s])`
  1024. cfg := pds.promConfig
  1025. durStr := timeutil.DurationString(end.Sub(start))
  1026. if durStr == "" {
  1027. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1028. }
  1029. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, cfg.ClusterFilter, durStr)
  1030. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNamespaceLabels)
  1031. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1032. return source.NewFuture(source.DecodeNamespaceLabelsResult, ctx.QueryAtTime(queryNamespaceLabels, end))
  1033. }
  1034. func (pds *PrometheusMetricsQuerier) QueryNamespaceAnnotations(start, end time.Time) *source.Future[source.NamespaceAnnotationsResult] {
  1035. const queryName = "QueryNamespaceAnnotations"
  1036. const queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations{%s}[%s])`
  1037. cfg := pds.promConfig
  1038. durStr := timeutil.DurationString(end.Sub(start))
  1039. if durStr == "" {
  1040. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1041. }
  1042. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, cfg.ClusterFilter, durStr)
  1043. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNamespaceAnnotations)
  1044. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1045. return source.NewFuture(source.DecodeNamespaceAnnotationsResult, ctx.QueryAtTime(queryNamespaceAnnotations, end))
  1046. }
  1047. func (pds *PrometheusMetricsQuerier) QueryPodLabels(start, end time.Time) *source.Future[source.PodLabelsResult] {
  1048. const queryName = "QueryPodLabels"
  1049. const queryFmtPodLabels = `avg_over_time(kube_pod_labels{%s}[%s])`
  1050. cfg := pds.promConfig
  1051. durStr := timeutil.DurationString(end.Sub(start))
  1052. if durStr == "" {
  1053. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1054. }
  1055. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, cfg.ClusterFilter, durStr)
  1056. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodLabels)
  1057. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1058. return source.NewFuture(source.DecodePodLabelsResult, ctx.QueryAtTime(queryPodLabels, end))
  1059. }
  1060. func (pds *PrometheusMetricsQuerier) QueryPodAnnotations(start, end time.Time) *source.Future[source.PodAnnotationsResult] {
  1061. const queryName = "QueryPodAnnotations"
  1062. const queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations{%s}[%s])`
  1063. cfg := pds.promConfig
  1064. durStr := timeutil.DurationString(end.Sub(start))
  1065. if durStr == "" {
  1066. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1067. }
  1068. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, cfg.ClusterFilter, durStr)
  1069. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodAnnotations)
  1070. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1071. return source.NewFuture(source.DecodePodAnnotationsResult, ctx.QueryAtTime(queryPodAnnotations, end))
  1072. }
  1073. func (pds *PrometheusMetricsQuerier) QueryServiceLabels(start, end time.Time) *source.Future[source.ServiceLabelsResult] {
  1074. const queryName = "QueryServiceLabels"
  1075. const queryFmtServiceLabels = `avg_over_time(service_selector_labels{%s}[%s])`
  1076. cfg := pds.promConfig
  1077. durStr := timeutil.DurationString(end.Sub(start))
  1078. if durStr == "" {
  1079. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1080. }
  1081. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, cfg.ClusterFilter, durStr)
  1082. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryServiceLabels)
  1083. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1084. return source.NewFuture(source.DecodeServiceLabelsResult, ctx.QueryAtTime(queryServiceLabels, end))
  1085. }
  1086. func (pds *PrometheusMetricsQuerier) QueryDeploymentLabels(start, end time.Time) *source.Future[source.DeploymentLabelsResult] {
  1087. const queryName = "QueryDeploymentLabels"
  1088. const queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels{%s}[%s])`
  1089. cfg := pds.promConfig
  1090. durStr := timeutil.DurationString(end.Sub(start))
  1091. if durStr == "" {
  1092. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1093. }
  1094. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, cfg.ClusterFilter, durStr)
  1095. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryDeploymentLabels)
  1096. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1097. return source.NewFuture(source.DecodeDeploymentLabelsResult, ctx.QueryAtTime(queryDeploymentLabels, end))
  1098. }
  1099. func (pds *PrometheusMetricsQuerier) QueryStatefulSetLabels(start, end time.Time) *source.Future[source.StatefulSetLabelsResult] {
  1100. const queryName = "QueryStatefulSetLabels"
  1101. const queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels{%s}[%s])`
  1102. cfg := pds.promConfig
  1103. durStr := timeutil.DurationString(end.Sub(start))
  1104. if durStr == "" {
  1105. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1106. }
  1107. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, cfg.ClusterFilter, durStr)
  1108. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryStatefulSetLabels)
  1109. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1110. return source.NewFuture(source.DecodeStatefulSetLabelsResult, ctx.QueryAtTime(queryStatefulSetLabels, end))
  1111. }
  1112. func (pds *PrometheusMetricsQuerier) QueryDaemonSetLabels(start, end time.Time) *source.Future[source.DaemonSetLabelsResult] {
  1113. const queryName = "QueryDaemonSetLabels"
  1114. const queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1115. cfg := pds.promConfig
  1116. durStr := timeutil.DurationString(end.Sub(start))
  1117. if durStr == "" {
  1118. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1119. }
  1120. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1121. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryDaemonSetLabels)
  1122. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1123. return source.NewFuture(source.DecodeDaemonSetLabelsResult, ctx.QueryAtTime(queryDaemonSetLabels, end))
  1124. }
  1125. func (pds *PrometheusMetricsQuerier) QueryJobLabels(start, end time.Time) *source.Future[source.JobLabelsResult] {
  1126. const queryName = "QueryJobLabels"
  1127. const queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1128. cfg := pds.promConfig
  1129. durStr := timeutil.DurationString(end.Sub(start))
  1130. if durStr == "" {
  1131. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1132. }
  1133. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1134. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryJobLabels)
  1135. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1136. return source.NewFuture(source.DecodeJobLabelsResult, ctx.QueryAtTime(queryJobLabels, end))
  1137. }
  1138. func (pds *PrometheusMetricsQuerier) QueryPodsWithReplicaSetOwner(start, end time.Time) *source.Future[source.PodsWithReplicaSetOwnerResult] {
  1139. const queryName = "QueryPodsWithReplicaSetOwner"
  1140. const queryFmtPodsWithReplicaSetOwner = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1141. cfg := pds.promConfig
  1142. durStr := timeutil.DurationString(end.Sub(start))
  1143. if durStr == "" {
  1144. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1145. }
  1146. queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1147. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodsWithReplicaSetOwner)
  1148. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1149. return source.NewFuture(source.DecodePodsWithReplicaSetOwnerResult, ctx.QueryAtTime(queryPodsWithReplicaSetOwner, end))
  1150. }
  1151. func (pds *PrometheusMetricsQuerier) QueryReplicaSetsWithoutOwners(start, end time.Time) *source.Future[source.ReplicaSetsWithoutOwnersResult] {
  1152. const queryName = "QueryReplicaSetsWithoutOwners"
  1153. const queryFmtReplicaSetsWithoutOwners = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>", %s}[%s])) by (replicaset, namespace, uid, %s)`
  1154. cfg := pds.promConfig
  1155. durStr := timeutil.DurationString(end.Sub(start))
  1156. if durStr == "" {
  1157. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1158. }
  1159. queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1160. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryReplicaSetsWithoutOwners)
  1161. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1162. return source.NewFuture(source.DecodeReplicaSetsWithoutOwnersResult, ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end))
  1163. }
  1164. func (pds *PrometheusMetricsQuerier) QueryReplicaSetsWithRollout(start, end time.Time) *source.Future[source.ReplicaSetsWithRolloutResult] {
  1165. const queryName = "QueryReplicaSetsWithRollout"
  1166. const queryFmtReplicaSetsWithRolloutOwner = `avg(avg_over_time(kube_replicaset_owner{owner_kind="Rollout", %s}[%s])) by (replicaset, namespace, owner_kind, owner_name, uid, %s)`
  1167. cfg := pds.promConfig
  1168. durStr := timeutil.DurationString(end.Sub(start))
  1169. if durStr == "" {
  1170. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1171. }
  1172. queryReplicaSetsWithRolloutOwner := fmt.Sprintf(queryFmtReplicaSetsWithRolloutOwner, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1173. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryReplicaSetsWithRolloutOwner)
  1174. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1175. return source.NewFuture(source.DecodeReplicaSetsWithRolloutResult, ctx.QueryAtTime(queryReplicaSetsWithRolloutOwner, end))
  1176. }
  1177. // Note: The ResourceQuota metrics are _not_ emitted at the moment. Leaving the query implementations here in case we add metric emission later on.
  1178. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaUptime(start, end time.Time) *source.Future[source.UptimeResult] {
  1179. const queryName = "QueryResourceQuotaUptime"
  1180. const queryFmtResourceQuotaUptime = `avg(resourcequota_info{%s}) by (%s, uid)[%s:%dm]`
  1181. cfg := pds.promConfig
  1182. minsPerResolution := cfg.DataResolutionMinutes
  1183. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  1184. if durStr == "" {
  1185. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1186. }
  1187. queryResourceQuotaUptime := fmt.Sprintf(queryFmtResourceQuotaUptime, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  1188. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryFmtResourceQuotaUptime)
  1189. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1190. return source.NewFuture(source.DecodeUptimeResult, ctx.QueryAtTime(queryResourceQuotaUptime, end))
  1191. }
  1192. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPURequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPURequestAvgResult] {
  1193. const queryName = "QueryResourceQuotaSpecCPURequestAverage"
  1194. const queryFmtResourceQuotaSpecCPURequests = `avg(avg_over_time(resourcequota_spec_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1195. cfg := pds.promConfig
  1196. durStr := timeutil.DurationString(end.Sub(start))
  1197. if durStr == "" {
  1198. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1199. }
  1200. queryResourceQuotaSpecCPURequests := fmt.Sprintf(queryFmtResourceQuotaSpecCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1201. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPURequests)
  1202. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1203. return source.NewFuture(source.DecodeResourceQuotaSpecCPURequestAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecCPURequests, end))
  1204. }
  1205. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPURequestMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPURequestMaxResult] {
  1206. const queryName = "QueryResourceQuotaSpecCPURequestMax"
  1207. const queryFmtResourceQuotaSpecCPURequests = `max(max_over_time(resourcequota_spec_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1208. cfg := pds.promConfig
  1209. durStr := timeutil.DurationString(end.Sub(start))
  1210. if durStr == "" {
  1211. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1212. }
  1213. queryResourceQuotaSpecCPURequests := fmt.Sprintf(queryFmtResourceQuotaSpecCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1214. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPURequests)
  1215. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1216. return source.NewFuture(source.DecodeResourceQuotaSpecCPURequestMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecCPURequests, end))
  1217. }
  1218. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMRequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMRequestAvgResult] {
  1219. const queryName = "QueryResourceQuotaSpecRAMRequestAverage"
  1220. const queryFmtResourceQuotaSpecRAMRequests = `avg(avg_over_time(resourcequota_spec_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1221. cfg := pds.promConfig
  1222. durStr := timeutil.DurationString(end.Sub(start))
  1223. if durStr == "" {
  1224. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1225. }
  1226. queryResourceQuotaSpecRAMRequests := fmt.Sprintf(queryFmtResourceQuotaSpecRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1227. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMRequests)
  1228. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1229. return source.NewFuture(source.DecodeResourceQuotaSpecRAMRequestAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMRequests, end))
  1230. }
  1231. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMRequestMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMRequestMaxResult] {
  1232. const queryName = "QueryResourceQuotaSpecRAMRequestMax"
  1233. const queryFmtResourceQuotaSpecRAMRequests = `max(max_over_time(resourcequota_spec_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1234. cfg := pds.promConfig
  1235. durStr := timeutil.DurationString(end.Sub(start))
  1236. if durStr == "" {
  1237. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1238. }
  1239. queryResourceQuotaSpecRAMRequests := fmt.Sprintf(queryFmtResourceQuotaSpecRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1240. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMRequests)
  1241. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1242. return source.NewFuture(source.DecodeResourceQuotaSpecRAMRequestMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMRequests, end))
  1243. }
  1244. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPULimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPULimitAvgResult] {
  1245. const queryName = "QueryResourceQuotaSpecCPULimitAverage"
  1246. const queryFmtResourceQuotaSpecCPULimits = `avg(avg_over_time(resourcequota_spec_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1247. cfg := pds.promConfig
  1248. durStr := timeutil.DurationString(end.Sub(start))
  1249. if durStr == "" {
  1250. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1251. }
  1252. queryResourceQuotaSpecCPULimits := fmt.Sprintf(queryFmtResourceQuotaSpecCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1253. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPULimits)
  1254. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1255. return source.NewFuture(source.DecodeResourceQuotaSpecCPULimitAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecCPULimits, end))
  1256. }
  1257. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPULimitMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPULimitMaxResult] {
  1258. const queryName = "QueryResourceQuotaSpecCPULimitMax"
  1259. const queryFmtResourceQuotaSpecCPULimits = `max(max_over_time(resourcequota_spec_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1260. cfg := pds.promConfig
  1261. durStr := timeutil.DurationString(end.Sub(start))
  1262. if durStr == "" {
  1263. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1264. }
  1265. queryResourceQuotaSpecCPULimits := fmt.Sprintf(queryFmtResourceQuotaSpecCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1266. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPULimits)
  1267. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1268. return source.NewFuture(source.DecodeResourceQuotaSpecCPULimitMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecCPULimits, end))
  1269. }
  1270. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMLimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMLimitAvgResult] {
  1271. const queryName = "QueryResourceQuotaSpecRAMLimitAverage"
  1272. const queryFmtResourceQuotaSpecRAMLimits = `avg(avg_over_time(resourcequota_spec_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1273. cfg := pds.promConfig
  1274. durStr := timeutil.DurationString(end.Sub(start))
  1275. if durStr == "" {
  1276. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1277. }
  1278. queryResourceQuotaSpecRAMLimits := fmt.Sprintf(queryFmtResourceQuotaSpecRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1279. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMLimits)
  1280. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1281. return source.NewFuture(source.DecodeResourceQuotaSpecRAMLimitAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMLimits, end))
  1282. }
  1283. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMLimitMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMLimitMaxResult] {
  1284. const queryName = "QueryResourceQuotaSpecRAMLimitMax"
  1285. const queryFmtResourceQuotaSpecRAMLimits = `max(max_over_time(resourcequota_spec_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1286. cfg := pds.promConfig
  1287. durStr := timeutil.DurationString(end.Sub(start))
  1288. if durStr == "" {
  1289. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1290. }
  1291. queryResourceQuotaSpecRAMLimits := fmt.Sprintf(queryFmtResourceQuotaSpecRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1292. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMLimits)
  1293. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1294. return source.NewFuture(source.DecodeResourceQuotaSpecRAMLimitMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMLimits, end))
  1295. }
  1296. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPURequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPURequestAvgResult] {
  1297. const queryName = "QueryResourceQuotaStatusUsedCPURequestAverage"
  1298. const queryFmtResourceQuotaStatusUsedCPURequests = `avg(avg_over_time(resourcequota_status_used_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1299. cfg := pds.promConfig
  1300. durStr := timeutil.DurationString(end.Sub(start))
  1301. if durStr == "" {
  1302. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1303. }
  1304. queryResourceQuotaStatusUsedCPURequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1305. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPURequests)
  1306. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1307. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPURequestAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPURequests, end))
  1308. }
  1309. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPURequestMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPURequestMaxResult] {
  1310. const queryName = "QueryResourceQuotaStatusUsedCPURequestMax"
  1311. const queryFmtResourceQuotaStatusUsedCPURequests = `max(max_over_time(resourcequota_status_used_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1312. cfg := pds.promConfig
  1313. durStr := timeutil.DurationString(end.Sub(start))
  1314. if durStr == "" {
  1315. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1316. }
  1317. queryResourceQuotaStatusUsedCPURequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1318. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPURequests)
  1319. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1320. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPURequestMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPURequests, end))
  1321. }
  1322. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMRequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMRequestAvgResult] {
  1323. const queryName = "QueryResourceQuotaStatusUsedRAMRequestAverage"
  1324. const queryFmtResourceQuotaStatusUsedRAMRequests = `avg(avg_over_time(resourcequota_status_used_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1325. cfg := pds.promConfig
  1326. durStr := timeutil.DurationString(end.Sub(start))
  1327. if durStr == "" {
  1328. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1329. }
  1330. queryResourceQuotaStatusUsedRAMRequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1331. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMRequests)
  1332. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1333. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMRequestAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMRequests, end))
  1334. }
  1335. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMRequestMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMRequestMaxResult] {
  1336. const queryName = "QueryResourceQuotaStatusUsedRAMRequestMax"
  1337. const queryFmtResourceQuotaStatusUsedRAMRequests = `max(max_over_time(resourcequota_status_used_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1338. cfg := pds.promConfig
  1339. durStr := timeutil.DurationString(end.Sub(start))
  1340. if durStr == "" {
  1341. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1342. }
  1343. queryResourceQuotaStatusUsedRAMRequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1344. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMRequests)
  1345. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1346. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMRequestMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMRequests, end))
  1347. }
  1348. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPULimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPULimitAvgResult] {
  1349. const queryName = "QueryResourceQuotaStatusUsedCPULimitAverage"
  1350. const queryFmtResourceQuotaStatusUsedCPULimits = `avg(avg_over_time(resourcequota_status_used_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1351. cfg := pds.promConfig
  1352. durStr := timeutil.DurationString(end.Sub(start))
  1353. if durStr == "" {
  1354. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1355. }
  1356. queryResourceQuotaStatusUsedCPULimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1357. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPULimits)
  1358. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1359. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPULimitAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPULimits, end))
  1360. }
  1361. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPULimitMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPULimitMaxResult] {
  1362. const queryName = "QueryResourceQuotaStatusUsedCPULimitMax"
  1363. const queryFmtResourceQuotaStatusUsedCPULimits = `max(max_over_time(resourcequota_status_used_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1364. cfg := pds.promConfig
  1365. durStr := timeutil.DurationString(end.Sub(start))
  1366. if durStr == "" {
  1367. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1368. }
  1369. queryResourceQuotaStatusUsedCPULimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1370. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPULimits)
  1371. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1372. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPULimitMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPULimits, end))
  1373. }
  1374. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMLimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMLimitAvgResult] {
  1375. const queryName = "QueryResourceQuotaStatusUsedRAMLimitAverage"
  1376. const queryFmtResourceQuotaStatusUsedRAMLimits = `avg(avg_over_time(resourcequota_status_used_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1377. cfg := pds.promConfig
  1378. durStr := timeutil.DurationString(end.Sub(start))
  1379. if durStr == "" {
  1380. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1381. }
  1382. queryResourceQuotaStatusUsedRAMLimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1383. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMLimits)
  1384. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1385. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMLimitAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMLimits, end))
  1386. }
  1387. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMLimitMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMLimitMaxResult] {
  1388. const queryName = "QueryResourceQuotaStatusUsedRAMLimitMax"
  1389. const queryFmtResourceQuotaStatusUsedRAMLimits = `max(max_over_time(resourcequota_status_used_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1390. cfg := pds.promConfig
  1391. durStr := timeutil.DurationString(end.Sub(start))
  1392. if durStr == "" {
  1393. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1394. }
  1395. queryResourceQuotaStatusUsedRAMLimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1396. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMLimits)
  1397. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1398. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMLimitMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMLimits, end))
  1399. }
  1400. func (pds *PrometheusMetricsQuerier) QueryDataCoverage(limitDays int) (time.Time, time.Time, error) {
  1401. const (
  1402. queryName = "QueryDataCoverage"
  1403. queryFmtOldestSample = `min_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
  1404. queryFmtNewestSample = `max_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
  1405. )
  1406. cfg := pds.promConfig
  1407. minutesPerDuration := 60
  1408. dur := time.Duration(limitDays) * timeutil.Day
  1409. end := time.Now().UTC().Truncate(timeutil.Day).Add(timeutil.Day)
  1410. start := end.Add(-dur)
  1411. durStr := pds.durationStringFor(start, end, minutesPerDuration, false)
  1412. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1413. queryOldest := fmt.Sprintf(queryFmtOldestSample, cfg.ClusterFilter, durStr, "1h")
  1414. log.Debugf("[Prometheus][%s[Oldest]][At Time: %d]: %s", queryName, end.Unix(), queryOldest)
  1415. resOldestFut := ctx.QueryAtTime(queryOldest, end)
  1416. resOldest, err := resOldestFut.Await()
  1417. if err != nil {
  1418. return time.Time{}, time.Time{}, fmt.Errorf("querying oldest sample: %w", err)
  1419. }
  1420. if len(resOldest) == 0 || len(resOldest[0].Values) == 0 {
  1421. // If node_cpu_hourly_cost metric is not available, fallback to a reasonable time range
  1422. // This prevents CSV export from failing when the metric doesn't exist yet
  1423. log.Warnf("QueryDataCoverage: node_cpu_hourly_cost metric not available, using fallback time range")
  1424. // Use a reasonable fallback: start from 1 day ago to account for metric collection delay
  1425. fallbackEnd := time.Now().UTC().Truncate(timeutil.Day)
  1426. fallbackStart := fallbackEnd.AddDate(0, 0, -1) // 1 day ago
  1427. return fallbackStart, fallbackEnd, nil
  1428. }
  1429. oldest := time.Unix(int64(resOldest[0].Values[0].Value), 0)
  1430. queryNewest := fmt.Sprintf(queryFmtNewestSample, cfg.ClusterFilter, durStr, "1h")
  1431. log.Debugf("[Prometheus][%s[Newest]][At Time: %d]: %s", queryName, end.Unix(), queryNewest)
  1432. resNewestFut := ctx.QueryAtTime(queryNewest, end)
  1433. resNewest, err := resNewestFut.Await()
  1434. if err != nil {
  1435. return time.Time{}, time.Time{}, fmt.Errorf("querying newest sample: %w", err)
  1436. }
  1437. if len(resNewest) == 0 || len(resNewest[0].Values) == 0 {
  1438. // If newest query fails but oldest succeeded, use oldest as both start and end
  1439. // This allows CSV export to proceed with at least some time range
  1440. log.Warnf("QueryDataCoverage: newest sample query returned no results, using oldest timestamp")
  1441. return oldest, oldest, nil
  1442. }
  1443. newest := time.Unix(int64(resNewest[0].Values[0].Value), 0)
  1444. return oldest, newest, nil
  1445. }
  1446. // durationStringFor simplifies the determination of query duration based on the version of prom and if the function
  1447. // in the query needs all data points in the vector it is provided or if it will extrapolate its own. Functions
  1448. // that extrapolate will add on another resolution if given a duration that is one resolution longer than the intended
  1449. // duration.
  1450. func (pds *PrometheusMetricsQuerier) durationStringFor(start, end time.Time, minsPerResolution int, extrapolated bool) string {
  1451. dur := end.Sub(start)
  1452. // If using a version of Prometheus where the resolution needs duration offset,
  1453. // we need to apply that here.
  1454. //
  1455. // E.g. avg(node_total_hourly_cost{}) by (node, provider_id)[60m:5m] with
  1456. // time=01:00:00 will return, for a node running the entire time, 12
  1457. // timestamps where the first is 00:05:00 and the last is 01:00:00.
  1458. // However, OpenCost expects for there to be 13 timestamps where the first
  1459. // begins at 00:00:00. To achieve this, we must modify our query to
  1460. // avg(node_total_hourly_cost{}) by (node, provider_id)[65m:5m]
  1461. if pds.promConfig.IsOffsetResolution && !extrapolated {
  1462. // increase the query time by the resolution
  1463. dur = dur + (time.Duration(minsPerResolution) * time.Minute)
  1464. }
  1465. return timeutil.DurationString(dur)
  1466. }