2
0

metricsquerier.go 95 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945
  1. package prom
  2. import (
  3. "fmt"
  4. "time"
  5. "github.com/opencost/opencost/core/pkg/log"
  6. "github.com/opencost/opencost/core/pkg/source"
  7. "github.com/opencost/opencost/core/pkg/util/timeutil"
  8. prometheus "github.com/prometheus/client_golang/api"
  9. )
  10. //--------------------------------------------------------------------------
  11. // PrometheusMetricsQuerier
  12. //--------------------------------------------------------------------------
  13. // PrometheusMetricsQueryLogFormat is the log format used to log metric queries before being sent to the prometheus
  14. // instance
  15. const PrometheusMetricsQueryLogFormat = `[PrometheusMetricsQuerier][%s][At Time: %d]: %s`
  16. // PrometheusMetricsQuerier is the implementation of the data source's MetricsQuerier interface for Prometheus.
  17. type PrometheusMetricsQuerier struct {
  18. promConfig *OpenCostPrometheusConfig
  19. promClient prometheus.Client
  20. promContexts *ContextFactory
  21. }
  22. func newPrometheusMetricsQuerier(
  23. promConfig *OpenCostPrometheusConfig,
  24. promClient prometheus.Client,
  25. promContexts *ContextFactory,
  26. ) *PrometheusMetricsQuerier {
  27. return &PrometheusMetricsQuerier{
  28. promConfig: promConfig,
  29. promClient: promClient,
  30. promContexts: promContexts,
  31. }
  32. }
  33. func (pds *PrometheusMetricsQuerier) QueryPVPricePerGiBHour(start, end time.Time) *source.Future[source.PVPricePerGiBHourResult] {
  34. const queryName = "QueryPVPricePerGiBHour"
  35. const pvCostQuery = `avg(avg_over_time(pv_hourly_cost{%s}[%s])) by (%s, persistentvolume, volumename, uid, provider_id)`
  36. cfg := pds.promConfig
  37. durStr := timeutil.DurationString(end.Sub(start))
  38. if durStr == "" {
  39. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  40. }
  41. queryPVCost := fmt.Sprintf(pvCostQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  42. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCost)
  43. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  44. return source.NewFuture(source.DecodePVPricePerGiBHourResult, ctx.QueryAtTime(queryPVCost, end))
  45. }
  46. func (pds *PrometheusMetricsQuerier) QueryPVUsedAverage(start, end time.Time) *source.Future[source.PVUsedAvgResult] {
  47. const queryName = "QueryPVUsedAverage"
  48. const pvUsedAverageQuery = `avg(avg_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace, uid)`
  49. cfg := pds.promConfig
  50. durStr := timeutil.DurationString(end.Sub(start))
  51. if durStr == "" {
  52. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  53. }
  54. queryPVUsedAvg := fmt.Sprintf(pvUsedAverageQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  55. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVUsedAvg)
  56. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  57. return source.NewFuture(source.DecodePVUsedAvgResult, ctx.QueryAtTime(queryPVUsedAvg, end))
  58. }
  59. func (pds *PrometheusMetricsQuerier) QueryPVUsedMax(start, end time.Time) *source.Future[source.PVUsedMaxResult] {
  60. const queryName = "QueryPVUsedMax"
  61. const pvUsedMaxQuery = `max(max_over_time(kubelet_volume_stats_used_bytes{%s}[%s])) by (%s, persistentvolumeclaim, namespace, uid)`
  62. cfg := pds.promConfig
  63. durStr := timeutil.DurationString(end.Sub(start))
  64. if durStr == "" {
  65. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  66. }
  67. queryPVUsedMax := fmt.Sprintf(pvUsedMaxQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  68. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVUsedMax)
  69. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  70. return source.NewFuture(source.DecodePVUsedMaxResult, ctx.QueryAtTime(queryPVUsedMax, end))
  71. }
  72. func (pds *PrometheusMetricsQuerier) QueryPVCInfo(start, end time.Time) *source.Future[source.PVCInfoResult] {
  73. const queryName = "QueryPVCInfo"
  74. const queryFmtPVCInfo = `avg(kube_persistentvolumeclaim_info{volumename != "", %s}) by (persistentvolumeclaim, storageclass, volumename, namespace, uid, %s)[%s:%dm]`
  75. cfg := pds.promConfig
  76. minsPerResolution := cfg.DataResolutionMinutes
  77. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  78. if durStr == "" {
  79. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  80. }
  81. queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  82. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCInfo)
  83. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  84. return source.NewFuture(source.DecodePVCInfoResult, ctx.QueryAtTime(queryPVCInfo, end))
  85. }
  86. func (pds *PrometheusMetricsQuerier) QueryPVActiveMinutes(start, end time.Time) *source.Future[source.PVActiveMinutesResult] {
  87. const queryName = "QueryPVActiveMinutes"
  88. const pvActiveMinsQuery = `avg(kube_persistentvolume_capacity_bytes{%s}) by (%s, persistentvolume, uid)[%s:%dm]`
  89. cfg := pds.promConfig
  90. minsPerResolution := cfg.DataResolutionMinutes
  91. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  92. if durStr == "" {
  93. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  94. }
  95. queryPVActiveMins := fmt.Sprintf(pvActiveMinsQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  96. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVActiveMins)
  97. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  98. return source.NewFuture(source.DecodePVActiveMinutesResult, ctx.QueryAtTime(queryPVActiveMins, end))
  99. }
  100. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedAvg(start, end time.Time) *source.Future[source.LocalStorageUsedAvgResult] {
  101. const queryName = "QueryLocalStorageUsedAvg"
  102. const localStorageUsedAvgQuery = `avg(sum(avg_over_time(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}[%s])) by (instance, device, uid, %s, job)) by (instance, device, uid, %s)`
  103. cfg := pds.promConfig
  104. durStr := timeutil.DurationString(end.Sub(start))
  105. if durStr == "" {
  106. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  107. }
  108. queryLocalStorageUsedAvg := fmt.Sprintf(localStorageUsedAvgQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel, cfg.ClusterLabel)
  109. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedAvg)
  110. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  111. return source.NewFuture(source.DecodeLocalStorageUsedAvgResult, ctx.QueryAtTime(queryLocalStorageUsedAvg, end))
  112. }
  113. func (pds *PrometheusMetricsQuerier) QueryLocalStorageUsedMax(start, end time.Time) *source.Future[source.LocalStorageUsedMaxResult] {
  114. const queryName = "QueryLocalStorageUsedMax"
  115. const localStorageUsedMaxQuery = `max(sum(max_over_time(container_fs_usage_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}[%s])) by (instance, device, uid, %s, job)) by (instance, device, uid, %s)`
  116. cfg := pds.promConfig
  117. durStr := timeutil.DurationString(end.Sub(start))
  118. if durStr == "" {
  119. panic("failed to parse duration string passed to QueryLocalStorageUsedMax")
  120. }
  121. queryLocalStorageUsedMax := fmt.Sprintf(localStorageUsedMaxQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel, cfg.ClusterLabel)
  122. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageUsedMax)
  123. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  124. return source.NewFuture(source.DecodeLocalStorageUsedMaxResult, ctx.QueryAtTime(queryLocalStorageUsedMax, end))
  125. }
  126. func (pds *PrometheusMetricsQuerier) QueryLocalStorageBytes(start, end time.Time) *source.Future[source.LocalStorageBytesResult] {
  127. const queryName = "QueryLocalStorageBytes"
  128. const localStorageBytesQuery = `avg_over_time(sum(container_fs_limit_bytes{device=~"/dev/(nvme|sda).*", id="/", %s}) by (instance, device, uid, %s)[%s:%dm])`
  129. cfg := pds.promConfig
  130. minsPerResolution := cfg.DataResolutionMinutes
  131. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  132. if durStr == "" {
  133. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  134. }
  135. queryLocalStorageBytes := fmt.Sprintf(localStorageBytesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  136. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageBytes)
  137. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  138. return source.NewFuture(source.DecodeLocalStorageBytesResult, ctx.QueryAtTime(queryLocalStorageBytes, end))
  139. }
  140. func (pds *PrometheusMetricsQuerier) QueryLocalStorageActiveMinutes(start, end time.Time) *source.Future[source.LocalStorageActiveMinutesResult] {
  141. const queryName = "QueryLocalStorageActiveMinutes"
  142. const localStorageActiveMinutesQuery = `count(node_total_hourly_cost{%s}) by (%s, node, uid, instance, provider_id)[%s:%dm]`
  143. cfg := pds.promConfig
  144. minsPerResolution := cfg.DataResolutionMinutes
  145. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  146. if durStr == "" {
  147. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  148. }
  149. queryLocalStorageActiveMins := fmt.Sprintf(localStorageActiveMinutesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  150. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLocalStorageActiveMins)
  151. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  152. return source.NewFuture(source.DecodeLocalStorageActiveMinutesResult, ctx.QueryAtTime(queryLocalStorageActiveMins, end))
  153. }
  154. func (pds *PrometheusMetricsQuerier) QueryNodeCPUCoresCapacity(start, end time.Time) *source.Future[source.NodeCPUCoresCapacityResult] {
  155. const queryName = "QueryNodeCPUCoresCapacity"
  156. const nodeCPUCoresCapacityQuery = `avg(avg_over_time(kube_node_status_capacity_cpu_cores{%s}[%s])) by (%s, node, uid)`
  157. cfg := pds.promConfig
  158. durStr := timeutil.DurationString(end.Sub(start))
  159. if durStr == "" {
  160. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  161. }
  162. queryNodeCPUCoresCapacity := fmt.Sprintf(nodeCPUCoresCapacityQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  163. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCPUCoresCapacity)
  164. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  165. return source.NewFuture(source.DecodeNodeCPUCoresCapacityResult, ctx.QueryAtTime(queryNodeCPUCoresCapacity, end))
  166. }
  167. func (pds *PrometheusMetricsQuerier) QueryNodeCPUCoresAllocatable(start, end time.Time) *source.Future[source.NodeCPUCoresAllocatableResult] {
  168. const queryName = "QueryNodeCPUCoresAllocatable"
  169. const nodeCPUCoresAllocatableQuery = `avg(avg_over_time(kube_node_status_allocatable_cpu_cores{%s}[%s])) by (%s, node, uid)`
  170. // `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  171. cfg := pds.promConfig
  172. durStr := timeutil.DurationString(end.Sub(start))
  173. if durStr == "" {
  174. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  175. }
  176. queryNodeCPUCoresAllocatable := fmt.Sprintf(nodeCPUCoresAllocatableQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  177. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCPUCoresAllocatable)
  178. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  179. return source.NewFuture(source.DecodeNodeCPUCoresAllocatableResult, ctx.QueryAtTime(queryNodeCPUCoresAllocatable, end))
  180. }
  181. func (pds *PrometheusMetricsQuerier) QueryNodeRAMBytesCapacity(start, end time.Time) *source.Future[source.NodeRAMBytesCapacityResult] {
  182. const queryName = "QueryNodeRAMBytesCapacity"
  183. const nodeRAMBytesCapacityQuery = `avg(avg_over_time(kube_node_status_capacity_memory_bytes{%s}[%s])) by (%s, node, uid)`
  184. cfg := pds.promConfig
  185. durStr := timeutil.DurationString(end.Sub(start))
  186. if durStr == "" {
  187. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  188. }
  189. queryNodeRAMBytesCapacity := fmt.Sprintf(nodeRAMBytesCapacityQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  190. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeRAMBytesCapacity)
  191. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  192. return source.NewFuture(source.DecodeNodeRAMBytesCapacityResult, ctx.QueryAtTime(queryNodeRAMBytesCapacity, end))
  193. }
  194. func (pds *PrometheusMetricsQuerier) QueryNodeRAMBytesAllocatable(start, end time.Time) *source.Future[source.NodeRAMBytesAllocatableResult] {
  195. const queryName = "QueryNodeRAMBytesAllocatable"
  196. const nodeRAMBytesAllocatableQuery = `avg(avg_over_time(kube_node_status_allocatable_memory_bytes{%s}[%s])) by (%s, node, uid)`
  197. cfg := pds.promConfig
  198. durStr := timeutil.DurationString(end.Sub(start))
  199. if durStr == "" {
  200. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  201. }
  202. queryNodeRAMBytesAllocatable := fmt.Sprintf(nodeRAMBytesAllocatableQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  203. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeRAMBytesAllocatable)
  204. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  205. return source.NewFuture(source.DecodeNodeRAMBytesAllocatableResult, ctx.QueryAtTime(queryNodeRAMBytesAllocatable, end))
  206. }
  207. func (pds *PrometheusMetricsQuerier) QueryNodeGPUCount(start, end time.Time) *source.Future[source.NodeGPUCountResult] {
  208. const queryName = "QueryNodeGPUCount"
  209. const nodeGPUCountQuery = `avg(avg_over_time(node_gpu_count{%s}[%s])) by (%s, node, uid, provider_id)`
  210. cfg := pds.promConfig
  211. durStr := timeutil.DurationString(end.Sub(start))
  212. if durStr == "" {
  213. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  214. }
  215. queryNodeGPUCount := fmt.Sprintf(nodeGPUCountQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  216. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeGPUCount)
  217. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  218. return source.NewFuture(source.DecodeNodeGPUCountResult, ctx.QueryAtTime(queryNodeGPUCount, end))
  219. }
  220. func (pds *PrometheusMetricsQuerier) QueryNodeLabels(start, end time.Time) *source.Future[source.NodeLabelsResult] {
  221. const queryName = "QueryNodeLabels"
  222. const labelsQuery = `avg_over_time(kube_node_labels{%s}[%s])`
  223. cfg := pds.promConfig
  224. durStr := timeutil.DurationString(end.Sub(start))
  225. if durStr == "" {
  226. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  227. }
  228. queryLabels := fmt.Sprintf(labelsQuery, cfg.ClusterFilter, durStr)
  229. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLabels)
  230. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  231. return source.NewFuture(source.DecodeNodeLabelsResult, ctx.QueryAtTime(queryLabels, end))
  232. }
  233. func (pds *PrometheusMetricsQuerier) QueryNodeActiveMinutes(start, end time.Time) *source.Future[source.NodeActiveMinutesResult] {
  234. const queryName = "QueryNodeActiveMinutes"
  235. const activeMinsQuery = `avg(node_total_hourly_cost{%s}) by (node, uid, %s, provider_id)[%s:%dm]`
  236. cfg := pds.promConfig
  237. minsPerResolution := cfg.DataResolutionMinutes
  238. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  239. if durStr == "" {
  240. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  241. }
  242. queryActiveMins := fmt.Sprintf(activeMinsQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  243. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryActiveMins)
  244. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  245. return source.NewFuture(source.DecodeNodeActiveMinutesResult, ctx.QueryAtTime(queryActiveMins, end))
  246. }
  247. func (pds *PrometheusMetricsQuerier) QueryNodeCPUModeTotal(start, end time.Time) *source.Future[source.NodeCPUModeTotalResult] {
  248. const queryName = "QueryNodeCPUModeTotal"
  249. const nodeCPUModeTotalQuery = `sum(rate(node_cpu_seconds_total{%s}[%s:%dm])) by (kubernetes_node, uid, %s, mode)`
  250. cfg := pds.promConfig
  251. minsPerResolution := cfg.DataResolutionMinutes
  252. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  253. if durStr == "" {
  254. panic("failed to parse duration string passed to QueryNodeCPUModeTotal")
  255. }
  256. queryCPUModeTotal := fmt.Sprintf(nodeCPUModeTotalQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  257. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUModeTotal)
  258. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  259. return source.NewFuture(source.DecodeNodeCPUModeTotalResult, ctx.QueryAtTime(queryCPUModeTotal, end))
  260. }
  261. func (pds *PrometheusMetricsQuerier) QueryNodeRAMSystemPercent(start, end time.Time) *source.Future[source.NodeRAMSystemPercentResult] {
  262. const queryName = "QueryNodeRAMSystemPercent"
  263. const nodeRAMSystemPctQuery = `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system", %s}[%s:%dm])) by (instance, uid, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, uid, %s), "instance", "$1", "node", "(.*)")) by (instance, uid, %s)`
  264. cfg := pds.promConfig
  265. minsPerResolution := cfg.DataResolutionMinutes
  266. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  267. if durStr == "" {
  268. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  269. }
  270. queryRAMSystemPct := fmt.Sprintf(nodeRAMSystemPctQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterLabel)
  271. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMSystemPct)
  272. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  273. return source.NewFuture(source.DecodeNodeRAMSystemPercentResult, ctx.QueryAtTime(queryRAMSystemPct, end))
  274. }
  275. func (pds *PrometheusMetricsQuerier) QueryNodeRAMUserPercent(start, end time.Time) *source.Future[source.NodeRAMUserPercentResult] {
  276. const queryName = "QueryNodeRAMUserPercent"
  277. const nodeRAMUserPctQuery = `sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system", %s}[%s:%dm])) by (instance, uid, %s) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes{%s}[%s:%dm])) by (node, uid, %s), "instance", "$1", "node", "(.*)")) by (instance, uid, %s)`
  278. cfg := pds.promConfig
  279. minsPerResolution := cfg.DataResolutionMinutes
  280. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  281. if durStr == "" {
  282. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  283. }
  284. queryRAMUserPct := fmt.Sprintf(nodeRAMUserPctQuery, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel, cfg.ClusterLabel)
  285. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUserPct)
  286. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  287. return source.NewFuture(source.DecodeNodeRAMUserPercentResult, ctx.QueryAtTime(queryRAMUserPct, end))
  288. }
  289. func (pds *PrometheusMetricsQuerier) QueryLBPricePerHr(start, end time.Time) *source.Future[source.LBPricePerHrResult] {
  290. const queryName = "QueryLBPricePerHr"
  291. const queryFmtLBCostPerHr = `avg(avg_over_time(kubecost_load_balancer_cost{%s}[%s])) by (namespace, service_name, ingress_ip, uid, %s)`
  292. cfg := pds.promConfig
  293. durStr := timeutil.DurationString(end.Sub(start))
  294. if durStr == "" {
  295. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  296. }
  297. queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  298. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLBCostPerHr)
  299. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  300. return source.NewFuture(source.DecodeLBPricePerHrResult, ctx.QueryAtTime(queryLBCostPerHr, end))
  301. }
  302. func (pds *PrometheusMetricsQuerier) QueryLBActiveMinutes(start, end time.Time) *source.Future[source.LBActiveMinutesResult] {
  303. const queryName = "QueryLBActiveMinutes"
  304. const lbActiveMinutesQuery = `avg(kubecost_load_balancer_cost{%s}) by (namespace, service_name, uid, %s, ingress_ip)[%s:%dm]`
  305. cfg := pds.promConfig
  306. minsPerResolution := cfg.DataResolutionMinutes
  307. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  308. if durStr == "" {
  309. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  310. }
  311. queryLBActiveMins := fmt.Sprintf(lbActiveMinutesQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  312. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryLBActiveMins)
  313. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  314. return source.NewFuture(source.DecodeLBActiveMinutesResult, ctx.QueryAtTime(queryLBActiveMins, end))
  315. }
  316. // Note: cluster_info is not currently emitted
  317. func (pds *PrometheusMetricsQuerier) QueryClusterUptime(start, end time.Time) *source.Future[source.UptimeResult] {
  318. const queryName = "QueryClusterUptime"
  319. const queryFmtClusterUptime = `avg(cluster_info{%s}) by (%s, uid)[%s:%dm]`
  320. cfg := pds.promConfig
  321. minsPerResolution := cfg.DataResolutionMinutes
  322. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  323. if durStr == "" {
  324. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  325. }
  326. queryClusterUptime := fmt.Sprintf(queryFmtClusterUptime, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  327. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryFmtClusterUptime)
  328. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  329. return source.NewFuture(source.DecodeUptimeResult, ctx.QueryAtTime(queryClusterUptime, end))
  330. }
  331. func (pds *PrometheusMetricsQuerier) QueryClusterManagementDuration(start, end time.Time) *source.Future[source.ClusterManagementDurationResult] {
  332. const queryName = "QueryClusterManagementDuration"
  333. const clusterManagementDurationQuery = `avg(kubecost_cluster_management_cost{%s}) by (%s, provisioner_name)[%s:%dm]`
  334. cfg := pds.promConfig
  335. minsPerResolution := cfg.DataResolutionMinutes
  336. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  337. if durStr == "" {
  338. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  339. }
  340. queryClusterManagementDuration := fmt.Sprintf(clusterManagementDurationQuery, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  341. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryClusterManagementDuration)
  342. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  343. return source.NewFuture(source.DecodeClusterManagementDurationResult, ctx.QueryAtTime(queryClusterManagementDuration, end))
  344. }
  345. func (pds *PrometheusMetricsQuerier) QueryClusterManagementPricePerHr(start, end time.Time) *source.Future[source.ClusterManagementPricePerHrResult] {
  346. const queryName = "QueryClusterManagementPricePerHr"
  347. const clusterManagementCostQuery = `avg(avg_over_time(kubecost_cluster_management_cost{%s}[%s])) by (%s, provisioner_name)`
  348. cfg := pds.promConfig
  349. durStr := timeutil.DurationString(end.Sub(start))
  350. if durStr == "" {
  351. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  352. }
  353. queryClusterManagementCost := fmt.Sprintf(clusterManagementCostQuery, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  354. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryClusterManagementCost)
  355. ctx := pds.promContexts.NewNamedContext(ClusterContextName)
  356. return source.NewFuture(source.DecodeClusterManagementPricePerHrResult, ctx.QueryAtTime(queryClusterManagementCost, end))
  357. }
  358. // AllocationMetricQuerier
  359. func (pds *PrometheusMetricsQuerier) QueryPods(start, end time.Time) *source.Future[source.PodsResult] {
  360. const queryName = "QueryPods"
  361. const queryFmtPods = `avg(kube_pod_container_status_running{%s} != 0) by (pod, namespace, uid, %s)[%s:%dm]`
  362. cfg := pds.promConfig
  363. minsPerResolution := cfg.DataResolutionMinutes
  364. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  365. if durStr == "" {
  366. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  367. }
  368. queryPods := fmt.Sprintf(queryFmtPods, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  369. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPods)
  370. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  371. return source.NewFuture(source.DecodePodsResult, ctx.QueryAtTime(queryPods, end))
  372. }
  373. func (pds *PrometheusMetricsQuerier) QueryPodsUID(start, end time.Time) *source.Future[source.PodsResult] {
  374. const queryName = "QueryPodsUID"
  375. const queryFmtPodsUID = `avg(kube_pod_container_status_running{%s} != 0) by (pod, namespace, uid, %s)[%s:%dm]`
  376. cfg := pds.promConfig
  377. minsPerResolution := cfg.DataResolutionMinutes
  378. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  379. if durStr == "" {
  380. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  381. }
  382. queryPodsUID := fmt.Sprintf(queryFmtPodsUID, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  383. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodsUID)
  384. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  385. return source.NewFuture(source.DecodePodsResult, ctx.QueryAtTime(queryPodsUID, end))
  386. }
  387. func (pds *PrometheusMetricsQuerier) QueryRAMBytesAllocated(start, end time.Time) *source.Future[source.RAMBytesAllocatedResult] {
  388. const queryName = "QueryRAMBytesAllocated"
  389. const queryFmtRAMBytesAllocated = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s, provider_id)`
  390. cfg := pds.promConfig
  391. durStr := timeutil.DurationString(end.Sub(start))
  392. if durStr == "" {
  393. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  394. }
  395. queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  396. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMBytesAllocated)
  397. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  398. return source.NewFuture(source.DecodeRAMBytesAllocatedResult, ctx.QueryAtTime(queryRAMBytesAllocated, end))
  399. }
  400. func (pds *PrometheusMetricsQuerier) QueryRAMRequests(start, end time.Time) *source.Future[source.RAMRequestsResult] {
  401. const queryName = "QueryRAMRequests"
  402. const queryFmtRAMRequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  403. cfg := pds.promConfig
  404. durStr := timeutil.DurationString(end.Sub(start))
  405. if durStr == "" {
  406. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  407. }
  408. queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  409. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMRequests)
  410. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  411. return source.NewFuture(source.DecodeRAMRequestsResult, ctx.QueryAtTime(queryRAMRequests, end))
  412. }
  413. func (pds *PrometheusMetricsQuerier) QueryRAMLimits(start, end time.Time) *source.Future[source.RAMLimitsResult] {
  414. const queryName = "QueryRAMLimits"
  415. const queryFmtRAMLimits = `avg(avg_over_time(kube_pod_container_resource_limits{resource="memory", unit="byte", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  416. cfg := pds.promConfig
  417. durStr := timeutil.DurationString(end.Sub(start))
  418. if durStr == "" {
  419. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  420. }
  421. queryRAMLimits := fmt.Sprintf(queryFmtRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  422. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMLimits)
  423. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  424. return source.NewFuture(source.DecodeRAMLimitsResult, ctx.QueryAtTime(queryRAMLimits, end))
  425. }
  426. func (pds *PrometheusMetricsQuerier) QueryRAMUsageAvg(start, end time.Time) *source.Future[source.RAMUsageAvgResult] {
  427. const queryName = "QueryRAMUsageAvg"
  428. const queryFmtRAMUsageAvg = `avg(avg_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  429. cfg := pds.promConfig
  430. durStr := timeutil.DurationString(end.Sub(start))
  431. if durStr == "" {
  432. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  433. }
  434. queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  435. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUsageAvg)
  436. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  437. return source.NewFuture(source.DecodeRAMUsageAvgResult, ctx.QueryAtTime(queryRAMUsageAvg, end))
  438. }
  439. func (pds *PrometheusMetricsQuerier) QueryRAMUsageMax(start, end time.Time) *source.Future[source.RAMUsageMaxResult] {
  440. const queryName = "QueryRAMUsageMax"
  441. const queryFmtRAMUsageMax = `max(max_over_time(container_memory_working_set_bytes{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  442. cfg := pds.promConfig
  443. durStr := timeutil.DurationString(end.Sub(start))
  444. if durStr == "" {
  445. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  446. }
  447. queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  448. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryRAMUsageMax)
  449. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  450. return source.NewFuture(source.DecodeRAMUsageMaxResult, ctx.QueryAtTime(queryRAMUsageMax, end))
  451. }
  452. func (pds *PrometheusMetricsQuerier) QueryCPUCoresAllocated(start, end time.Time) *source.Future[source.CPUCoresAllocatedResult] {
  453. const queryName = "QueryCPUCoresAllocated"
  454. const queryFmtCPUCoresAllocated = `avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  455. cfg := pds.promConfig
  456. durStr := timeutil.DurationString(end.Sub(start))
  457. if durStr == "" {
  458. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  459. }
  460. queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  461. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUCoresAllocated)
  462. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  463. return source.NewFuture(source.DecodeCPUCoresAllocatedResult, ctx.QueryAtTime(queryCPUCoresAllocated, end))
  464. }
  465. func (pds *PrometheusMetricsQuerier) QueryCPURequests(start, end time.Time) *source.Future[source.CPURequestsResult] {
  466. const queryName = "QueryCPURequests"
  467. const queryFmtCPURequests = `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  468. cfg := pds.promConfig
  469. durStr := timeutil.DurationString(end.Sub(start))
  470. if durStr == "" {
  471. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  472. }
  473. queryCPURequests := fmt.Sprintf(queryFmtCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  474. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPURequests)
  475. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  476. return source.NewFuture(source.DecodeCPURequestsResult, ctx.QueryAtTime(queryCPURequests, end))
  477. }
  478. func (pds *PrometheusMetricsQuerier) QueryCPULimits(start, end time.Time) *source.Future[source.CPULimitsResult] {
  479. const queryName = "QueryCPULimits"
  480. const queryFmtCPULimits = `avg(avg_over_time(kube_pod_container_resource_limits{resource="cpu", unit="core", container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, %s)`
  481. cfg := pds.promConfig
  482. durStr := timeutil.DurationString(end.Sub(start))
  483. if durStr == "" {
  484. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  485. }
  486. queryCPULimits := fmt.Sprintf(queryFmtCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  487. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPULimits)
  488. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  489. return source.NewFuture(source.DecodeCPULimitsResult, ctx.QueryAtTime(queryCPULimits, end))
  490. }
  491. func (pds *PrometheusMetricsQuerier) QueryCPUUsageAvg(start, end time.Time) *source.Future[source.CPUUsageAvgResult] {
  492. const queryName = "QueryCPUUsageAvg"
  493. const queryFmtCPUUsageAvg = `avg(rate(container_cpu_usage_seconds_total{container!="", container_name!="POD", container!="POD", %s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  494. cfg := pds.promConfig
  495. durStr := timeutil.DurationString(end.Sub(start))
  496. if durStr == "" {
  497. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  498. }
  499. queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  500. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageAvg)
  501. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  502. return source.NewFuture(source.DecodeCPUUsageAvgResult, ctx.QueryAtTime(queryCPUUsageAvg, end))
  503. }
  504. func (pds *PrometheusMetricsQuerier) QueryCPUUsageMax(start, end time.Time) *source.Future[source.CPUUsageMaxResult] {
  505. const queryName = "QueryCPUUsageMax"
  506. // Because we use container_cpu_usage_seconds_total to calculate CPU usage
  507. // at any given "instant" of time, we need to use an irate or rate. To then
  508. // calculate a max (or any aggregation) we have to perform an aggregation
  509. // query on top of an instant-by-instant maximum. Prometheus supports this
  510. // type of query with a "subquery" [1], however it is reportedly expensive
  511. // to make such a query. By default, Kubecost's Prometheus config includes
  512. // a recording rule that keeps track of the instant-by-instant irate for CPU
  513. // usage. The metric in this query is created by that recording rule.
  514. //
  515. // [1] https://prometheus.io/blog/2019/01/28/subquery-support/
  516. //
  517. // If changing the name of the recording rule, make sure to update the
  518. // corresponding diagnostic query to avoid confusion.
  519. const queryFmtCPUUsageMaxRecordingRule = `max(max_over_time(kubecost_container_cpu_usage_irate{%s}[%s])) by (container_name, container, pod_name, pod, namespace, node, instance, uid, %s)`
  520. // This is the subquery equivalent of the above recording rule query. It is
  521. // more expensive, but does not require the recording rule. It should be
  522. // used as a fallback query if the recording rule data does not exist.
  523. //
  524. // The parameter after the colon [:<thisone>] in the subquery affects the
  525. // resolution of the subquery.
  526. // The parameter after the metric ...{}[<thisone>] should be set to 2x
  527. // the resolution, to make sure the irate always has two points to query
  528. // in case the Prom scrape duration has been reduced to be equal to the
  529. // query resolution.
  530. const queryFmtCPUUsageMaxSubquery = `max(max_over_time(irate(container_cpu_usage_seconds_total{container!="POD", container!="", %s}[%dm])[%s:%dm])) by (container, pod_name, pod, namespace, node, instance, uid, %s)`
  531. cfg := pds.promConfig
  532. durStr := timeutil.DurationString(end.Sub(start))
  533. if durStr == "" {
  534. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  535. }
  536. queryCPUUsageMaxRecordingRule := fmt.Sprintf(queryFmtCPUUsageMaxRecordingRule, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  537. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageMaxRecordingRule)
  538. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  539. resCPUUsageMaxRR := ctx.QueryAtTime(queryCPUUsageMaxRecordingRule, end)
  540. resCPUUsageMax, _ := resCPUUsageMaxRR.Await()
  541. if len(resCPUUsageMax) > 0 {
  542. return source.NewFutureFrom(source.DecodeAll(resCPUUsageMax, source.DecodeCPUUsageMaxResult))
  543. }
  544. minsPerResolution := cfg.DataResolutionMinutes
  545. durStr = pds.durationStringFor(start, end, minsPerResolution, false)
  546. if durStr == "" {
  547. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  548. }
  549. queryCPUUsageMaxSubquery := fmt.Sprintf(queryFmtCPUUsageMaxSubquery, cfg.ClusterFilter, 2*minsPerResolution, durStr, minsPerResolution, cfg.ClusterLabel)
  550. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryCPUUsageMaxSubquery)
  551. return source.NewFuture(source.DecodeCPUUsageMaxResult, ctx.QueryAtTime(queryCPUUsageMaxSubquery, end))
  552. }
  553. func (pds *PrometheusMetricsQuerier) QueryGPUsRequested(start, end time.Time) *source.Future[source.GPUsRequestedResult] {
  554. const queryName = "QueryGPUsRequested"
  555. const queryFmtGPUsRequested = `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  556. cfg := pds.promConfig
  557. durStr := timeutil.DurationString(end.Sub(start))
  558. if durStr == "" {
  559. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  560. }
  561. queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  562. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsRequested)
  563. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  564. return source.NewFuture(source.DecodeGPUsRequestedResult, ctx.QueryAtTime(queryGPUsRequested, end))
  565. }
  566. func (pds *PrometheusMetricsQuerier) QueryGPUsUsageAvg(start, end time.Time) *source.Future[source.GPUsUsageAvgResult] {
  567. const queryName = "QueryGPUsUsageAvg"
  568. const queryFmtGPUsUsageAvg = `avg(avg_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, uid, %s)`
  569. cfg := pds.promConfig
  570. durStr := timeutil.DurationString(end.Sub(start))
  571. if durStr == "" {
  572. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  573. }
  574. queryGPUsUsageAvg := fmt.Sprintf(queryFmtGPUsUsageAvg, durStr, cfg.ClusterLabel)
  575. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsUsageAvg)
  576. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  577. return source.NewFuture(source.DecodeGPUsUsageAvgResult, ctx.QueryAtTime(queryGPUsUsageAvg, end))
  578. }
  579. func (pds *PrometheusMetricsQuerier) QueryGPUsUsageMax(start, end time.Time) *source.Future[source.GPUsUsageMaxResult] {
  580. const queryName = "QueryGPUsUsageMax"
  581. const queryFmtGPUsUsageMax = `max(max_over_time(DCGM_FI_PROF_GR_ENGINE_ACTIVE{container!=""}[%s])) by (container, pod, namespace, uid, %s)`
  582. cfg := pds.promConfig
  583. durStr := timeutil.DurationString(end.Sub(start))
  584. if durStr == "" {
  585. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  586. }
  587. queryGPUsUsageMax := fmt.Sprintf(queryFmtGPUsUsageMax, durStr, cfg.ClusterLabel)
  588. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsUsageMax)
  589. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  590. return source.NewFuture(source.DecodeGPUsUsageMaxResult, ctx.QueryAtTime(queryGPUsUsageMax, end))
  591. }
  592. func (pds *PrometheusMetricsQuerier) QueryGPUsAllocated(start, end time.Time) *source.Future[source.GPUsAllocatedResult] {
  593. const queryName = "QueryGPUsAllocated"
  594. const queryFmtGPUsAllocated = `avg(avg_over_time(container_gpu_allocation{container!="", container!="POD", node!="", %s}[%s])) by (container, pod, namespace, node, uid, %s)`
  595. cfg := pds.promConfig
  596. durStr := timeutil.DurationString(end.Sub(start))
  597. if durStr == "" {
  598. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  599. }
  600. queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  601. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGPUsAllocated)
  602. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  603. return source.NewFuture(source.DecodeGPUsAllocatedResult, ctx.QueryAtTime(queryGPUsAllocated, end))
  604. }
  605. func (pds *PrometheusMetricsQuerier) QueryIsGPUShared(start, end time.Time) *source.Future[source.IsGPUSharedResult] {
  606. const queryName = "QueryIsGPUShared"
  607. const queryFmtIsGPUShared = `avg(avg_over_time(kube_pod_container_resource_requests{container!="", node != "", pod != "", container!= "", unit = "integer", %s}[%s])) by (container, pod, namespace, node, resource, uid, %s)`
  608. cfg := pds.promConfig
  609. durStr := timeutil.DurationString(end.Sub(start))
  610. if durStr == "" {
  611. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  612. }
  613. queryIsGPUShared := fmt.Sprintf(queryFmtIsGPUShared, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  614. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryIsGPUShared)
  615. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  616. return source.NewFuture(source.DecodeIsGPUSharedResult, ctx.QueryAtTime(queryIsGPUShared, end))
  617. }
  618. func (pds *PrometheusMetricsQuerier) QueryGPUInfo(start, end time.Time) *source.Future[source.GPUInfoResult] {
  619. const queryName = "QueryGPUInfo"
  620. const queryFmtGetGPUInfo = `avg(avg_over_time(DCGM_FI_DEV_DEC_UTIL{container!="",%s}[%s])) by (container, pod, namespace, device, modelName, UUID, uid, %s)`
  621. cfg := pds.promConfig
  622. durStr := timeutil.DurationString(end.Sub(start))
  623. if durStr == "" {
  624. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  625. }
  626. queryGetGPUInfo := fmt.Sprintf(queryFmtGetGPUInfo, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  627. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryGetGPUInfo)
  628. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  629. return source.NewFuture(source.DecodeGPUInfoResult, ctx.QueryAtTime(queryGetGPUInfo, end))
  630. }
  631. func (pds *PrometheusMetricsQuerier) QueryNodeCPUPricePerHr(start, end time.Time) *source.Future[source.NodeCPUPricePerHrResult] {
  632. const queryName = "QueryNodeCPUPricePerHr"
  633. const queryFmtNodeCostPerCPUHr = `avg(avg_over_time(node_cpu_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  634. cfg := pds.promConfig
  635. durStr := timeutil.DurationString(end.Sub(start))
  636. if durStr == "" {
  637. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  638. }
  639. queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  640. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerCPUHr)
  641. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  642. return source.NewFuture(source.DecodeNodeCPUPricePerHrResult, ctx.QueryAtTime(queryNodeCostPerCPUHr, end))
  643. }
  644. func (pds *PrometheusMetricsQuerier) QueryNodeRAMPricePerGiBHr(start, end time.Time) *source.Future[source.NodeRAMPricePerGiBHrResult] {
  645. const queryName = "QueryNodeRAMPricePerGiBHr"
  646. const queryFmtNodeCostPerRAMGiBHr = `avg(avg_over_time(node_ram_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  647. cfg := pds.promConfig
  648. durStr := timeutil.DurationString(end.Sub(start))
  649. if durStr == "" {
  650. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  651. }
  652. queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  653. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerRAMGiBHr)
  654. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  655. return source.NewFuture(source.DecodeNodeRAMPricePerGiBHrResult, ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end))
  656. }
  657. func (pds *PrometheusMetricsQuerier) QueryNodeGPUPricePerHr(start, end time.Time) *source.Future[source.NodeGPUPricePerHrResult] {
  658. const queryName = "QueryNodeGPUPricePerHr"
  659. const queryFmtNodeCostPerGPUHr = `avg(avg_over_time(node_gpu_hourly_cost{%s}[%s])) by (node, uid, %s, instance_type, provider_id)`
  660. cfg := pds.promConfig
  661. durStr := timeutil.DurationString(end.Sub(start))
  662. if durStr == "" {
  663. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  664. }
  665. queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  666. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeCostPerGPUHr)
  667. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  668. return source.NewFuture(source.DecodeNodeGPUPricePerHrResult, ctx.QueryAtTime(queryNodeCostPerGPUHr, end))
  669. }
  670. func (pds *PrometheusMetricsQuerier) QueryNodeIsSpot(start, end time.Time) *source.Future[source.NodeIsSpotResult] {
  671. const queryName = "QueryNodeIsSpot"
  672. const queryFmtNodeIsSpot = `avg_over_time(kubecost_node_is_spot{%s}[%s])`
  673. cfg := pds.promConfig
  674. durStr := timeutil.DurationString(end.Sub(start))
  675. if durStr == "" {
  676. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  677. }
  678. queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, cfg.ClusterFilter, durStr)
  679. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNodeIsSpot)
  680. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  681. return source.NewFuture(source.DecodeNodeIsSpotResult, ctx.QueryAtTime(queryNodeIsSpot, end))
  682. }
  683. func (pds *PrometheusMetricsQuerier) QueryPodPVCAllocation(start, end time.Time) *source.Future[source.PodPVCAllocationResult] {
  684. const queryName = "QueryPodPVCAllocation"
  685. const queryFmtPodPVCAllocation = `avg(avg_over_time(pod_pvc_allocation{%s}[%s])) by (persistentvolume, persistentvolumeclaim, pod, namespace, uid, %s)`
  686. cfg := pds.promConfig
  687. durStr := timeutil.DurationString(end.Sub(start))
  688. if durStr == "" {
  689. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  690. }
  691. queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  692. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodPVCAllocation)
  693. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  694. return source.NewFuture(source.DecodePodPVCAllocationResult, ctx.QueryAtTime(queryPodPVCAllocation, end))
  695. }
  696. func (pds *PrometheusMetricsQuerier) QueryPVCBytesRequested(start, end time.Time) *source.Future[source.PVCBytesRequestedResult] {
  697. const queryName = "QueryPVCBytesRequested"
  698. const queryFmtPVCBytesRequested = `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{%s}[%s])) by (persistentvolumeclaim, namespace, uid, %s)`
  699. cfg := pds.promConfig
  700. durStr := timeutil.DurationString(end.Sub(start))
  701. if durStr == "" {
  702. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  703. }
  704. queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  705. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVCBytesRequested)
  706. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  707. return source.NewFuture(source.DecodePVCBytesRequestedResult, ctx.QueryAtTime(queryPVCBytesRequested, end))
  708. }
  709. func (pds *PrometheusMetricsQuerier) QueryPVBytes(start, end time.Time) *source.Future[source.PVBytesResult] {
  710. const queryName = "QueryPVBytes"
  711. const queryFmtPVBytes = `avg(avg_over_time(kube_persistentvolume_capacity_bytes{%s}[%s])) by (persistentvolume, uid, %s)`
  712. cfg := pds.promConfig
  713. durStr := timeutil.DurationString(end.Sub(start))
  714. if durStr == "" {
  715. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  716. }
  717. queryPVBytes := fmt.Sprintf(queryFmtPVBytes, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  718. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVBytes)
  719. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  720. return source.NewFuture(source.DecodePVBytesResult, ctx.QueryAtTime(queryPVBytes, end))
  721. }
  722. func (pds *PrometheusMetricsQuerier) QueryPVInfo(start, end time.Time) *source.Future[source.PVInfoResult] {
  723. const queryName = "QueryPVInfo"
  724. const queryFmtPVMeta = `avg(avg_over_time(kubecost_pv_info{%s}[%s])) by (%s, storageclass, persistentvolume, uid, provider_id)`
  725. cfg := pds.promConfig
  726. durStr := timeutil.DurationString(end.Sub(start))
  727. if durStr == "" {
  728. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  729. }
  730. queryPVMeta := fmt.Sprintf(queryFmtPVMeta, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  731. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPVMeta)
  732. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  733. return source.NewFuture(source.DecodePVInfoResult, ctx.QueryAtTime(queryPVMeta, end))
  734. }
  735. func (pds *PrometheusMetricsQuerier) QueryNetZoneGiB(start, end time.Time) *source.Future[source.NetZoneGiBResult] {
  736. const queryName = "QueryNetZoneGiB"
  737. const queryFmtNetZoneGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", same_zone="false", same_region="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  738. cfg := pds.promConfig
  739. minsPerResolution := cfg.DataResolutionMinutes
  740. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  741. if durStr == "" {
  742. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  743. }
  744. queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  745. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneGiB)
  746. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  747. return source.NewFuture(source.DecodeNetZoneGiBResult, ctx.QueryAtTime(queryNetZoneGiB, end))
  748. }
  749. func (pds *PrometheusMetricsQuerier) QueryNetZonePricePerGiB(start, end time.Time) *source.Future[source.NetZonePricePerGiBResult] {
  750. const queryName = "QueryNetZonePricePerGiB"
  751. const queryFmtNetZoneCostPerGiB = `avg(avg_over_time(kubecost_network_zone_egress_cost{%s}[%s])) by (%s)`
  752. cfg := pds.promConfig
  753. durStr := timeutil.DurationString(end.Sub(start))
  754. if durStr == "" {
  755. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  756. }
  757. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  758. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneCostPerGiB)
  759. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  760. return source.NewFuture(source.DecodeNetZonePricePerGiBResult, ctx.QueryAtTime(queryNetZoneCostPerGiB, end))
  761. }
  762. func (pds *PrometheusMetricsQuerier) QueryNetRegionGiB(start, end time.Time) *source.Future[source.NetRegionGiBResult] {
  763. const queryName = "QueryNetRegionGiB"
  764. const queryFmtNetRegionGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", same_zone="false", same_region="false", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  765. cfg := pds.promConfig
  766. minsPerResolution := cfg.DataResolutionMinutes
  767. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  768. if durStr == "" {
  769. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  770. }
  771. queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  772. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionGiB)
  773. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  774. return source.NewFuture(source.DecodeNetRegionGiBResult, ctx.QueryAtTime(queryNetRegionGiB, end))
  775. }
  776. func (pds *PrometheusMetricsQuerier) QueryNetRegionPricePerGiB(start, end time.Time) *source.Future[source.NetRegionPricePerGiBResult] {
  777. const queryName = "QueryNetRegionPricePerGiB"
  778. const queryFmtNetRegionCostPerGiB = `avg(avg_over_time(kubecost_network_region_egress_cost{%s}[%s])) by (%s)`
  779. cfg := pds.promConfig
  780. durStr := timeutil.DurationString(end.Sub(start))
  781. if durStr == "" {
  782. panic("failed to parse duration string passed to QueryNetRegionPricePerGiB")
  783. }
  784. queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  785. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionCostPerGiB)
  786. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  787. return source.NewFuture(source.DecodeNetRegionPricePerGiBResult, ctx.QueryAtTime(queryNetRegionCostPerGiB, end))
  788. }
  789. func (pds *PrometheusMetricsQuerier) QueryNetInternetGiB(start, end time.Time) *source.Future[source.NetInternetGiBResult] {
  790. const queryName = "QueryNetInternetGiB"
  791. const queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  792. cfg := pds.promConfig
  793. minsPerResolution := cfg.DataResolutionMinutes
  794. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  795. if durStr == "" {
  796. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  797. }
  798. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  799. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetGiB)
  800. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  801. return source.NewFuture(source.DecodeNetInternetGiBResult, ctx.QueryAtTime(queryNetInternetGiB, end))
  802. }
  803. func (pds *PrometheusMetricsQuerier) QueryNetInternetPricePerGiB(start, end time.Time) *source.Future[source.NetInternetPricePerGiBResult] {
  804. const queryName = "QueryNetInternetPricePerGiB"
  805. const queryFmtNetInternetCostPerGiB = `avg(avg_over_time(kubecost_network_internet_egress_cost{%s}[%s])) by (%s)`
  806. cfg := pds.promConfig
  807. durStr := timeutil.DurationString(end.Sub(start))
  808. if durStr == "" {
  809. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  810. }
  811. queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  812. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetCostPerGiB)
  813. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  814. return source.NewFuture(source.DecodeNetInternetPricePerGiBResult, ctx.QueryAtTime(queryNetInternetCostPerGiB, end))
  815. }
  816. func (pds *PrometheusMetricsQuerier) QueryNetInternetServiceGiB(start, end time.Time) *source.Future[source.NetInternetServiceGiBResult] {
  817. const queryName = "QueryNetInternetServiceGiB"
  818. const queryFmtNetInternetGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  819. cfg := pds.promConfig
  820. minsPerResolution := cfg.DataResolutionMinutes
  821. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  822. if durStr == "" {
  823. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  824. }
  825. queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  826. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetInternetGiB)
  827. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  828. return source.NewFuture(source.DecodeNetInternetServiceGiBResult, ctx.QueryAtTime(queryNetInternetGiB, end))
  829. }
  830. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayPricePerGiB(start, end time.Time) *source.Future[source.NetNatGatewayPricePerGiBResult] {
  831. const queryName = "QueryNetNatGatewayPricePerGiB"
  832. const queryFmtNetNatGatewayPricePerGiB = `avg(avg_over_time(kubecost_network_nat_gateway_egress_cost{%s}[%s])) by (%s)`
  833. cfg := pds.promConfig
  834. durStr := timeutil.DurationString(end.Sub(start))
  835. if durStr == "" {
  836. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  837. }
  838. queryNetNatGatewayPricePerGiB := fmt.Sprintf(queryFmtNetNatGatewayPricePerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  839. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayPricePerGiB)
  840. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  841. return source.NewFuture(source.DecodeNetNatGatewayPricePerGiBResult, ctx.QueryAtTime(queryNetNatGatewayPricePerGiB, end))
  842. }
  843. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayGiB(start, end time.Time) *source.Future[source.NetNatGatewayGiBResult] {
  844. const queryName = "QueryNetNatGatewayGiB"
  845. const queryFmtNetNatGatewayGiB = `sum(increase(kubecost_pod_network_egress_bytes_total{nat_gateway="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  846. cfg := pds.promConfig
  847. minsPerResolution := cfg.DataResolutionMinutes
  848. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  849. if durStr == "" {
  850. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  851. }
  852. queryNetNatGatewayGiB := fmt.Sprintf(queryFmtNetNatGatewayGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  853. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayGiB)
  854. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  855. return source.NewFuture(source.DecodeNetNatGatewayGiBResult, ctx.QueryAtTime(queryNetNatGatewayGiB, end))
  856. }
  857. func (pds *PrometheusMetricsQuerier) QueryNetTransferBytes(start, end time.Time) *source.Future[source.NetTransferBytesResult] {
  858. const queryName = "QueryNetTransferBytes"
  859. const queryFmtNetTransferBytes = `sum(increase(container_network_transmit_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, uid, %s)`
  860. cfg := pds.promConfig
  861. minsPerResolution := cfg.DataResolutionMinutes
  862. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  863. if durStr == "" {
  864. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  865. }
  866. queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  867. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetTransferBytes)
  868. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  869. return source.NewFuture(source.DecodeNetTransferBytesResult, ctx.QueryAtTime(queryNetTransferBytes, end))
  870. }
  871. func (pds *PrometheusMetricsQuerier) QueryNetZoneIngressGiB(start, end time.Time) *source.Future[source.NetZoneIngressGiBResult] {
  872. const queryName = "QueryNetZoneIngressGiB"
  873. const queryFmtIngNetZoneGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="false", same_zone="false", same_region="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  874. cfg := pds.promConfig
  875. minsPerResolution := cfg.DataResolutionMinutes
  876. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  877. if durStr == "" {
  878. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  879. }
  880. queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtIngNetZoneGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  881. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetZoneCostPerGiB)
  882. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  883. return source.NewFuture(source.DecodeNetZoneIngressGiBResult, ctx.QueryAtTime(queryNetZoneCostPerGiB, end))
  884. }
  885. func (pds *PrometheusMetricsQuerier) QueryNetRegionIngressGiB(start, end time.Time) *source.Future[source.NetRegionIngressGiBResult] {
  886. const queryName = "QueryNetRegionIngressGiB"
  887. const queryFmtIngNetRegionGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="false", same_zone="false", same_region="false", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  888. cfg := pds.promConfig
  889. minsPerResolution := cfg.DataResolutionMinutes
  890. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  891. if durStr == "" {
  892. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  893. }
  894. queryNetRegionIngGiB := fmt.Sprintf(queryFmtIngNetRegionGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  895. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetRegionIngGiB)
  896. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  897. return source.NewFuture(source.DecodeNetRegionIngressGiBResult, ctx.QueryAtTime(queryNetRegionIngGiB, end))
  898. }
  899. func (pds *PrometheusMetricsQuerier) QueryNetInternetIngressGiB(start, end time.Time) *source.Future[source.NetInternetIngressGiBResult] {
  900. const queryName = "QueryNetInternetIngressGiB"
  901. const queryFmtNetIngInternetGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, uid, %s) / 1024 / 1024 / 1024`
  902. cfg := pds.promConfig
  903. minsPerResolution := cfg.DataResolutionMinutes
  904. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  905. if durStr == "" {
  906. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  907. }
  908. queryNetIngInternetGiB := fmt.Sprintf(queryFmtNetIngInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  909. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetIngInternetGiB)
  910. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  911. return source.NewFuture(source.DecodeNetInternetIngressGiBResult, ctx.QueryAtTime(queryNetIngInternetGiB, end))
  912. }
  913. func (pds *PrometheusMetricsQuerier) QueryNetInternetServiceIngressGiB(start, end time.Time) *source.Future[source.NetInternetServiceIngressGiBResult] {
  914. const queryName = "QueryNetInternetServiceIngressGiB"
  915. const queryFmtIngNetInternetGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{internet="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  916. cfg := pds.promConfig
  917. minsPerResolution := cfg.DataResolutionMinutes
  918. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  919. if durStr == "" {
  920. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  921. }
  922. queryNetIngInternetGiB := fmt.Sprintf(queryFmtIngNetInternetGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  923. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetIngInternetGiB)
  924. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  925. return source.NewFuture(source.DecodeNetInternetServiceIngressGiBResult, ctx.QueryAtTime(queryNetIngInternetGiB, end))
  926. }
  927. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayIngressPricePerGiB(start, end time.Time) *source.Future[source.NetNatGatewayPricePerGiBResult] {
  928. const queryName = "QueryNetNatGatewayIngressPricePerGiB"
  929. const queryFmtNetNatGatewayIngressPricePerGiB = `avg(avg_over_time(kubecost_network_nat_gateway_ingress_cost{%s}[%s])) by (%s)`
  930. cfg := pds.promConfig
  931. durStr := timeutil.DurationString(end.Sub(start))
  932. if durStr == "" {
  933. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  934. }
  935. queryNetNatGatewayIngressPricePerGiB := fmt.Sprintf(queryFmtNetNatGatewayIngressPricePerGiB, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  936. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayIngressPricePerGiB)
  937. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  938. return source.NewFuture(source.DecodeNetNatGatewayPricePerGiBResult, ctx.QueryAtTime(queryNetNatGatewayIngressPricePerGiB, end))
  939. }
  940. func (pds *PrometheusMetricsQuerier) QueryNetNatGatewayIngressGiB(start, end time.Time) *source.Future[source.NetNatGatewayIngressGiBResult] {
  941. const queryName = "QueryNetNatGatewayIngressGiB"
  942. const queryFmtNetNatGatewayIngressGiB = `sum(increase(kubecost_pod_network_ingress_bytes_total{nat_gateway="true", %s}[%s:%dm])) by (pod_name, namespace, service, uid, %s) / 1024 / 1024 / 1024`
  943. cfg := pds.promConfig
  944. minsPerResolution := cfg.DataResolutionMinutes
  945. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  946. if durStr == "" {
  947. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  948. }
  949. queryNetNatGatewayIngressGiB := fmt.Sprintf(queryFmtNetNatGatewayIngressGiB, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  950. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetNatGatewayIngressGiB)
  951. ctx := pds.promContexts.NewNamedContext(NetworkInsightsContextName)
  952. return source.NewFuture(source.DecodeNetNatGatewayIngressGiBResult, ctx.QueryAtTime(queryNetNatGatewayIngressGiB, end))
  953. }
  954. func (pds *PrometheusMetricsQuerier) QueryNetReceiveBytes(start, end time.Time) *source.Future[source.NetReceiveBytesResult] {
  955. const queryName = "QueryNetReceiveBytes"
  956. const queryFmtNetReceiveBytes = `sum(increase(container_network_receive_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, uid, %s)`
  957. cfg := pds.promConfig
  958. minsPerResolution := cfg.DataResolutionMinutes
  959. durStr := pds.durationStringFor(start, end, minsPerResolution, true)
  960. if durStr == "" {
  961. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  962. }
  963. queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, cfg.ClusterFilter, durStr, minsPerResolution, cfg.ClusterLabel)
  964. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNetReceiveBytes)
  965. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  966. return source.NewFuture(source.DecodeNetReceiveBytesResult, ctx.QueryAtTime(queryNetReceiveBytes, end))
  967. }
  968. // Note: namespace_info is not currently emitted
  969. func (pds *PrometheusMetricsQuerier) QueryNamespaceUptime(start, end time.Time) *source.Future[source.UptimeResult] {
  970. const queryName = "QueryNamespaceUptime"
  971. const queryFmtNamespaceUptime = `avg(namespace_info{%s}) by (%s, uid)[%s:%dm]`
  972. cfg := pds.promConfig
  973. minsPerResolution := cfg.DataResolutionMinutes
  974. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  975. if durStr == "" {
  976. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  977. }
  978. queryNamespaceUptime := fmt.Sprintf(queryFmtNamespaceUptime, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  979. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryFmtNamespaceUptime)
  980. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  981. return source.NewFuture(source.DecodeUptimeResult, ctx.QueryAtTime(queryNamespaceUptime, end))
  982. }
  983. func (pds *PrometheusMetricsQuerier) QueryNamespaceLabels(start, end time.Time) *source.Future[source.NamespaceLabelsResult] {
  984. const queryName = "QueryNamespaceLabels"
  985. const queryFmtNamespaceLabels = `avg_over_time(kube_namespace_labels{%s}[%s])`
  986. cfg := pds.promConfig
  987. durStr := timeutil.DurationString(end.Sub(start))
  988. if durStr == "" {
  989. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  990. }
  991. queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, cfg.ClusterFilter, durStr)
  992. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNamespaceLabels)
  993. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  994. return source.NewFuture(source.DecodeNamespaceLabelsResult, ctx.QueryAtTime(queryNamespaceLabels, end))
  995. }
  996. func (pds *PrometheusMetricsQuerier) QueryNamespaceAnnotations(start, end time.Time) *source.Future[source.NamespaceAnnotationsResult] {
  997. const queryName = "QueryNamespaceAnnotations"
  998. const queryFmtNamespaceAnnotations = `avg_over_time(kube_namespace_annotations{%s}[%s])`
  999. cfg := pds.promConfig
  1000. durStr := timeutil.DurationString(end.Sub(start))
  1001. if durStr == "" {
  1002. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1003. }
  1004. queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, cfg.ClusterFilter, durStr)
  1005. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryNamespaceAnnotations)
  1006. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1007. return source.NewFuture(source.DecodeNamespaceAnnotationsResult, ctx.QueryAtTime(queryNamespaceAnnotations, end))
  1008. }
  1009. func (pds *PrometheusMetricsQuerier) QueryPodLabels(start, end time.Time) *source.Future[source.PodLabelsResult] {
  1010. const queryName = "QueryPodLabels"
  1011. const queryFmtPodLabels = `avg_over_time(kube_pod_labels{%s}[%s])`
  1012. cfg := pds.promConfig
  1013. durStr := timeutil.DurationString(end.Sub(start))
  1014. if durStr == "" {
  1015. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1016. }
  1017. queryPodLabels := fmt.Sprintf(queryFmtPodLabels, cfg.ClusterFilter, durStr)
  1018. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodLabels)
  1019. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1020. return source.NewFuture(source.DecodePodLabelsResult, ctx.QueryAtTime(queryPodLabels, end))
  1021. }
  1022. func (pds *PrometheusMetricsQuerier) QueryPodAnnotations(start, end time.Time) *source.Future[source.PodAnnotationsResult] {
  1023. const queryName = "QueryPodAnnotations"
  1024. const queryFmtPodAnnotations = `avg_over_time(kube_pod_annotations{%s}[%s])`
  1025. cfg := pds.promConfig
  1026. durStr := timeutil.DurationString(end.Sub(start))
  1027. if durStr == "" {
  1028. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1029. }
  1030. queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, cfg.ClusterFilter, durStr)
  1031. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodAnnotations)
  1032. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1033. return source.NewFuture(source.DecodePodAnnotationsResult, ctx.QueryAtTime(queryPodAnnotations, end))
  1034. }
  1035. func (pds *PrometheusMetricsQuerier) QueryServiceLabels(start, end time.Time) *source.Future[source.ServiceLabelsResult] {
  1036. const queryName = "QueryServiceLabels"
  1037. const queryFmtServiceLabels = `avg_over_time(service_selector_labels{%s}[%s])`
  1038. cfg := pds.promConfig
  1039. durStr := timeutil.DurationString(end.Sub(start))
  1040. if durStr == "" {
  1041. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1042. }
  1043. queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, cfg.ClusterFilter, durStr)
  1044. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryServiceLabels)
  1045. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1046. return source.NewFuture(source.DecodeServiceLabelsResult, ctx.QueryAtTime(queryServiceLabels, end))
  1047. }
  1048. func (pds *PrometheusMetricsQuerier) QueryDeploymentLabels(start, end time.Time) *source.Future[source.DeploymentLabelsResult] {
  1049. const queryName = "QueryDeploymentLabels"
  1050. const queryFmtDeploymentLabels = `avg_over_time(deployment_match_labels{%s}[%s])`
  1051. cfg := pds.promConfig
  1052. durStr := timeutil.DurationString(end.Sub(start))
  1053. if durStr == "" {
  1054. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1055. }
  1056. queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, cfg.ClusterFilter, durStr)
  1057. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryDeploymentLabels)
  1058. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1059. return source.NewFuture(source.DecodeDeploymentLabelsResult, ctx.QueryAtTime(queryDeploymentLabels, end))
  1060. }
  1061. func (pds *PrometheusMetricsQuerier) QueryStatefulSetLabels(start, end time.Time) *source.Future[source.StatefulSetLabelsResult] {
  1062. const queryName = "QueryStatefulSetLabels"
  1063. const queryFmtStatefulSetLabels = `avg_over_time(statefulSet_match_labels{%s}[%s])`
  1064. cfg := pds.promConfig
  1065. durStr := timeutil.DurationString(end.Sub(start))
  1066. if durStr == "" {
  1067. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1068. }
  1069. queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, cfg.ClusterFilter, durStr)
  1070. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryStatefulSetLabels)
  1071. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1072. return source.NewFuture(source.DecodeStatefulSetLabelsResult, ctx.QueryAtTime(queryStatefulSetLabels, end))
  1073. }
  1074. func (pds *PrometheusMetricsQuerier) QueryDaemonSetLabels(start, end time.Time) *source.Future[source.DaemonSetLabelsResult] {
  1075. const queryName = "QueryDaemonSetLabels"
  1076. const queryFmtDaemonSetLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="DaemonSet", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1077. cfg := pds.promConfig
  1078. durStr := timeutil.DurationString(end.Sub(start))
  1079. if durStr == "" {
  1080. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1081. }
  1082. queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1083. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryDaemonSetLabels)
  1084. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1085. return source.NewFuture(source.DecodeDaemonSetLabelsResult, ctx.QueryAtTime(queryDaemonSetLabels, end))
  1086. }
  1087. func (pds *PrometheusMetricsQuerier) QueryJobLabels(start, end time.Time) *source.Future[source.JobLabelsResult] {
  1088. const queryName = "QueryJobLabels"
  1089. const queryFmtJobLabels = `sum(avg_over_time(kube_pod_owner{owner_kind="Job", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1090. cfg := pds.promConfig
  1091. durStr := timeutil.DurationString(end.Sub(start))
  1092. if durStr == "" {
  1093. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1094. }
  1095. queryJobLabels := fmt.Sprintf(queryFmtJobLabels, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1096. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryJobLabels)
  1097. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1098. return source.NewFuture(source.DecodeJobLabelsResult, ctx.QueryAtTime(queryJobLabels, end))
  1099. }
  1100. func (pds *PrometheusMetricsQuerier) QueryPodsWithReplicaSetOwner(start, end time.Time) *source.Future[source.PodsWithReplicaSetOwnerResult] {
  1101. const queryName = "QueryPodsWithReplicaSetOwner"
  1102. const queryFmtPodsWithReplicaSetOwner = `sum(avg_over_time(kube_pod_owner{owner_kind="ReplicaSet", %s}[%s])) by (pod, owner_name, namespace, uid, %s)`
  1103. cfg := pds.promConfig
  1104. durStr := timeutil.DurationString(end.Sub(start))
  1105. if durStr == "" {
  1106. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1107. }
  1108. queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1109. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryPodsWithReplicaSetOwner)
  1110. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1111. return source.NewFuture(source.DecodePodsWithReplicaSetOwnerResult, ctx.QueryAtTime(queryPodsWithReplicaSetOwner, end))
  1112. }
  1113. func (pds *PrometheusMetricsQuerier) QueryReplicaSetsWithoutOwners(start, end time.Time) *source.Future[source.ReplicaSetsWithoutOwnersResult] {
  1114. const queryName = "QueryReplicaSetsWithoutOwners"
  1115. const queryFmtReplicaSetsWithoutOwners = `avg(avg_over_time(kube_replicaset_owner{owner_kind="<none>", owner_name="<none>", %s}[%s])) by (replicaset, namespace, uid, %s)`
  1116. cfg := pds.promConfig
  1117. durStr := timeutil.DurationString(end.Sub(start))
  1118. if durStr == "" {
  1119. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1120. }
  1121. queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1122. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryReplicaSetsWithoutOwners)
  1123. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1124. return source.NewFuture(source.DecodeReplicaSetsWithoutOwnersResult, ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end))
  1125. }
  1126. func (pds *PrometheusMetricsQuerier) QueryReplicaSetsWithRollout(start, end time.Time) *source.Future[source.ReplicaSetsWithRolloutResult] {
  1127. const queryName = "QueryReplicaSetsWithRollout"
  1128. const queryFmtReplicaSetsWithRolloutOwner = `avg(avg_over_time(kube_replicaset_owner{owner_kind="Rollout", %s}[%s])) by (replicaset, namespace, owner_kind, owner_name, uid, %s)`
  1129. cfg := pds.promConfig
  1130. durStr := timeutil.DurationString(end.Sub(start))
  1131. if durStr == "" {
  1132. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1133. }
  1134. queryReplicaSetsWithRolloutOwner := fmt.Sprintf(queryFmtReplicaSetsWithRolloutOwner, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1135. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryReplicaSetsWithRolloutOwner)
  1136. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1137. return source.NewFuture(source.DecodeReplicaSetsWithRolloutResult, ctx.QueryAtTime(queryReplicaSetsWithRolloutOwner, end))
  1138. }
  1139. // Note: The ResourceQuota metrics are _not_ emitted at the moment. Leaving the query implementations here in case we add metric emission later on.
  1140. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaUptime(start, end time.Time) *source.Future[source.UptimeResult] {
  1141. const queryName = "QueryResourceQuotaUptime"
  1142. const queryFmtResourceQuotaUptime = `avg(resourcequota_info{%s}) by (%s, uid)[%s:%dm]`
  1143. cfg := pds.promConfig
  1144. minsPerResolution := cfg.DataResolutionMinutes
  1145. durStr := pds.durationStringFor(start, end, minsPerResolution, false)
  1146. if durStr == "" {
  1147. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1148. }
  1149. queryResourceQuotaUptime := fmt.Sprintf(queryFmtResourceQuotaUptime, cfg.ClusterFilter, cfg.ClusterLabel, durStr, minsPerResolution)
  1150. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryFmtResourceQuotaUptime)
  1151. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1152. return source.NewFuture(source.DecodeUptimeResult, ctx.QueryAtTime(queryResourceQuotaUptime, end))
  1153. }
  1154. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPURequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPURequestAvgResult] {
  1155. const queryName = "QueryResourceQuotaSpecCPURequestAverage"
  1156. const queryFmtResourceQuotaSpecCPURequests = `avg(avg_over_time(resourcequota_spec_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1157. cfg := pds.promConfig
  1158. durStr := timeutil.DurationString(end.Sub(start))
  1159. if durStr == "" {
  1160. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1161. }
  1162. queryResourceQuotaSpecCPURequests := fmt.Sprintf(queryFmtResourceQuotaSpecCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1163. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPURequests)
  1164. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1165. return source.NewFuture(source.DecodeResourceQuotaSpecCPURequestAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecCPURequests, end))
  1166. }
  1167. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPURequestMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPURequestMaxResult] {
  1168. const queryName = "QueryResourceQuotaSpecCPURequestMax"
  1169. const queryFmtResourceQuotaSpecCPURequests = `max(max_over_time(resourcequota_spec_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1170. cfg := pds.promConfig
  1171. durStr := timeutil.DurationString(end.Sub(start))
  1172. if durStr == "" {
  1173. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1174. }
  1175. queryResourceQuotaSpecCPURequests := fmt.Sprintf(queryFmtResourceQuotaSpecCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1176. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPURequests)
  1177. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1178. return source.NewFuture(source.DecodeResourceQuotaSpecCPURequestMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecCPURequests, end))
  1179. }
  1180. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMRequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMRequestAvgResult] {
  1181. const queryName = "QueryResourceQuotaSpecRAMRequestAverage"
  1182. const queryFmtResourceQuotaSpecRAMRequests = `avg(avg_over_time(resourcequota_spec_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1183. cfg := pds.promConfig
  1184. durStr := timeutil.DurationString(end.Sub(start))
  1185. if durStr == "" {
  1186. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1187. }
  1188. queryResourceQuotaSpecRAMRequests := fmt.Sprintf(queryFmtResourceQuotaSpecRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1189. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMRequests)
  1190. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1191. return source.NewFuture(source.DecodeResourceQuotaSpecRAMRequestAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMRequests, end))
  1192. }
  1193. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMRequestMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMRequestMaxResult] {
  1194. const queryName = "QueryResourceQuotaSpecRAMRequestMax"
  1195. const queryFmtResourceQuotaSpecRAMRequests = `max(max_over_time(resourcequota_spec_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1196. cfg := pds.promConfig
  1197. durStr := timeutil.DurationString(end.Sub(start))
  1198. if durStr == "" {
  1199. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1200. }
  1201. queryResourceQuotaSpecRAMRequests := fmt.Sprintf(queryFmtResourceQuotaSpecRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1202. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMRequests)
  1203. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1204. return source.NewFuture(source.DecodeResourceQuotaSpecRAMRequestMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMRequests, end))
  1205. }
  1206. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPULimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPULimitAvgResult] {
  1207. const queryName = "QueryResourceQuotaSpecCPULimitAverage"
  1208. const queryFmtResourceQuotaSpecCPULimits = `avg(avg_over_time(resourcequota_spec_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1209. cfg := pds.promConfig
  1210. durStr := timeutil.DurationString(end.Sub(start))
  1211. if durStr == "" {
  1212. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1213. }
  1214. queryResourceQuotaSpecCPULimits := fmt.Sprintf(queryFmtResourceQuotaSpecCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1215. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPULimits)
  1216. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1217. return source.NewFuture(source.DecodeResourceQuotaSpecCPULimitAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecCPULimits, end))
  1218. }
  1219. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecCPULimitMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecCPULimitMaxResult] {
  1220. const queryName = "QueryResourceQuotaSpecCPULimitMax"
  1221. const queryFmtResourceQuotaSpecCPULimits = `max(max_over_time(resourcequota_spec_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1222. cfg := pds.promConfig
  1223. durStr := timeutil.DurationString(end.Sub(start))
  1224. if durStr == "" {
  1225. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1226. }
  1227. queryResourceQuotaSpecCPULimits := fmt.Sprintf(queryFmtResourceQuotaSpecCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1228. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecCPULimits)
  1229. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1230. return source.NewFuture(source.DecodeResourceQuotaSpecCPULimitMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecCPULimits, end))
  1231. }
  1232. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMLimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMLimitAvgResult] {
  1233. const queryName = "QueryResourceQuotaSpecRAMLimitAverage"
  1234. const queryFmtResourceQuotaSpecRAMLimits = `avg(avg_over_time(resourcequota_spec_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1235. cfg := pds.promConfig
  1236. durStr := timeutil.DurationString(end.Sub(start))
  1237. if durStr == "" {
  1238. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1239. }
  1240. queryResourceQuotaSpecRAMLimits := fmt.Sprintf(queryFmtResourceQuotaSpecRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1241. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMLimits)
  1242. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1243. return source.NewFuture(source.DecodeResourceQuotaSpecRAMLimitAvgResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMLimits, end))
  1244. }
  1245. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaSpecRAMLimitMax(start, end time.Time) *source.Future[source.ResourceQuotaSpecRAMLimitMaxResult] {
  1246. const queryName = "QueryResourceQuotaSpecRAMLimitMax"
  1247. const queryFmtResourceQuotaSpecRAMLimits = `max(max_over_time(resourcequota_spec_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1248. cfg := pds.promConfig
  1249. durStr := timeutil.DurationString(end.Sub(start))
  1250. if durStr == "" {
  1251. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1252. }
  1253. queryResourceQuotaSpecRAMLimits := fmt.Sprintf(queryFmtResourceQuotaSpecRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1254. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaSpecRAMLimits)
  1255. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1256. return source.NewFuture(source.DecodeResourceQuotaSpecRAMLimitMaxResult, ctx.QueryAtTime(queryResourceQuotaSpecRAMLimits, end))
  1257. }
  1258. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPURequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPURequestAvgResult] {
  1259. const queryName = "QueryResourceQuotaStatusUsedCPURequestAverage"
  1260. const queryFmtResourceQuotaStatusUsedCPURequests = `avg(avg_over_time(resourcequota_status_used_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1261. cfg := pds.promConfig
  1262. durStr := timeutil.DurationString(end.Sub(start))
  1263. if durStr == "" {
  1264. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1265. }
  1266. queryResourceQuotaStatusUsedCPURequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1267. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPURequests)
  1268. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1269. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPURequestAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPURequests, end))
  1270. }
  1271. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPURequestMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPURequestMaxResult] {
  1272. const queryName = "QueryResourceQuotaStatusUsedCPURequestMax"
  1273. const queryFmtResourceQuotaStatusUsedCPURequests = `max(max_over_time(resourcequota_status_used_resource_requests{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1274. cfg := pds.promConfig
  1275. durStr := timeutil.DurationString(end.Sub(start))
  1276. if durStr == "" {
  1277. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1278. }
  1279. queryResourceQuotaStatusUsedCPURequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPURequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1280. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPURequests)
  1281. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1282. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPURequestMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPURequests, end))
  1283. }
  1284. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMRequestAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMRequestAvgResult] {
  1285. const queryName = "QueryResourceQuotaStatusUsedRAMRequestAverage"
  1286. const queryFmtResourceQuotaStatusUsedRAMRequests = `avg(avg_over_time(resourcequota_status_used_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1287. cfg := pds.promConfig
  1288. durStr := timeutil.DurationString(end.Sub(start))
  1289. if durStr == "" {
  1290. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1291. }
  1292. queryResourceQuotaStatusUsedRAMRequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1293. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMRequests)
  1294. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1295. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMRequestAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMRequests, end))
  1296. }
  1297. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMRequestMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMRequestMaxResult] {
  1298. const queryName = "QueryResourceQuotaStatusUsedRAMRequestMax"
  1299. const queryFmtResourceQuotaStatusUsedRAMRequests = `max(max_over_time(resourcequota_status_used_resource_requests{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1300. cfg := pds.promConfig
  1301. durStr := timeutil.DurationString(end.Sub(start))
  1302. if durStr == "" {
  1303. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1304. }
  1305. queryResourceQuotaStatusUsedRAMRequests := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMRequests, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1306. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMRequests)
  1307. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1308. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMRequestMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMRequests, end))
  1309. }
  1310. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPULimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPULimitAvgResult] {
  1311. const queryName = "QueryResourceQuotaStatusUsedCPULimitAverage"
  1312. const queryFmtResourceQuotaStatusUsedCPULimits = `avg(avg_over_time(resourcequota_status_used_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1313. cfg := pds.promConfig
  1314. durStr := timeutil.DurationString(end.Sub(start))
  1315. if durStr == "" {
  1316. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1317. }
  1318. queryResourceQuotaStatusUsedCPULimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1319. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPULimits)
  1320. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1321. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPULimitAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPULimits, end))
  1322. }
  1323. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedCPULimitMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedCPULimitMaxResult] {
  1324. const queryName = "QueryResourceQuotaStatusUsedCPULimitMax"
  1325. const queryFmtResourceQuotaStatusUsedCPULimits = `max(max_over_time(resourcequota_status_used_resource_limits{resource="cpu",unit="core", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1326. cfg := pds.promConfig
  1327. durStr := timeutil.DurationString(end.Sub(start))
  1328. if durStr == "" {
  1329. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1330. }
  1331. queryResourceQuotaStatusUsedCPULimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedCPULimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1332. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedCPULimits)
  1333. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1334. return source.NewFuture(source.DecodeResourceQuotaStatusUsedCPULimitMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedCPULimits, end))
  1335. }
  1336. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMLimitAverage(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMLimitAvgResult] {
  1337. const queryName = "QueryResourceQuotaStatusUsedRAMLimitAverage"
  1338. const queryFmtResourceQuotaStatusUsedRAMLimits = `avg(avg_over_time(resourcequota_status_used_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1339. cfg := pds.promConfig
  1340. durStr := timeutil.DurationString(end.Sub(start))
  1341. if durStr == "" {
  1342. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1343. }
  1344. queryResourceQuotaStatusUsedRAMLimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1345. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMLimits)
  1346. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1347. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMLimitAvgResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMLimits, end))
  1348. }
  1349. func (pds *PrometheusMetricsQuerier) QueryResourceQuotaStatusUsedRAMLimitMax(start, end time.Time) *source.Future[source.ResourceQuotaStatusUsedRAMLimitMaxResult] {
  1350. const queryName = "QueryResourceQuotaStatusUsedRAMLimitMax"
  1351. const queryFmtResourceQuotaStatusUsedRAMLimits = `max(max_over_time(resourcequota_status_used_resource_limits{resource="memory",unit="byte", %s}[%s])) by (resourcequota, namespace, uid, %s)`
  1352. cfg := pds.promConfig
  1353. durStr := timeutil.DurationString(end.Sub(start))
  1354. if durStr == "" {
  1355. panic(fmt.Sprintf("failed to parse duration string passed to %s", queryName))
  1356. }
  1357. queryResourceQuotaStatusUsedRAMLimits := fmt.Sprintf(queryFmtResourceQuotaStatusUsedRAMLimits, cfg.ClusterFilter, durStr, cfg.ClusterLabel)
  1358. log.Debugf(PrometheusMetricsQueryLogFormat, queryName, end.Unix(), queryResourceQuotaStatusUsedRAMLimits)
  1359. ctx := pds.promContexts.NewNamedContext(KubeModelContextName)
  1360. return source.NewFuture(source.DecodeResourceQuotaStatusUsedRAMLimitMaxResult, ctx.QueryAtTime(queryResourceQuotaStatusUsedRAMLimits, end))
  1361. }
  1362. func (pds *PrometheusMetricsQuerier) QueryDataCoverage(limitDays int) (time.Time, time.Time, error) {
  1363. const (
  1364. queryName = "QueryDataCoverage"
  1365. queryFmtOldestSample = `min_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
  1366. queryFmtNewestSample = `max_over_time(timestamp(group(node_cpu_hourly_cost{%s}))[%s:%s])`
  1367. )
  1368. cfg := pds.promConfig
  1369. minutesPerDuration := 60
  1370. dur := time.Duration(limitDays) * timeutil.Day
  1371. end := time.Now().UTC().Truncate(timeutil.Day).Add(timeutil.Day)
  1372. start := end.Add(-dur)
  1373. durStr := pds.durationStringFor(start, end, minutesPerDuration, false)
  1374. ctx := pds.promContexts.NewNamedContext(AllocationContextName)
  1375. queryOldest := fmt.Sprintf(queryFmtOldestSample, cfg.ClusterFilter, durStr, "1h")
  1376. log.Debugf("[Prometheus][%s[Oldest]][At Time: %d]: %s", queryName, end.Unix(), queryOldest)
  1377. resOldestFut := ctx.QueryAtTime(queryOldest, end)
  1378. resOldest, err := resOldestFut.Await()
  1379. if err != nil {
  1380. return time.Time{}, time.Time{}, fmt.Errorf("querying oldest sample: %w", err)
  1381. }
  1382. if len(resOldest) == 0 || len(resOldest[0].Values) == 0 {
  1383. // If node_cpu_hourly_cost metric is not available, fallback to a reasonable time range
  1384. // This prevents CSV export from failing when the metric doesn't exist yet
  1385. log.Warnf("QueryDataCoverage: node_cpu_hourly_cost metric not available, using fallback time range")
  1386. // Use a reasonable fallback: start from 1 day ago to account for metric collection delay
  1387. fallbackEnd := time.Now().UTC().Truncate(timeutil.Day)
  1388. fallbackStart := fallbackEnd.AddDate(0, 0, -1) // 1 day ago
  1389. return fallbackStart, fallbackEnd, nil
  1390. }
  1391. oldest := time.Unix(int64(resOldest[0].Values[0].Value), 0)
  1392. queryNewest := fmt.Sprintf(queryFmtNewestSample, cfg.ClusterFilter, durStr, "1h")
  1393. log.Debugf("[Prometheus][%s[Newest]][At Time: %d]: %s", queryName, end.Unix(), queryNewest)
  1394. resNewestFut := ctx.QueryAtTime(queryNewest, end)
  1395. resNewest, err := resNewestFut.Await()
  1396. if err != nil {
  1397. return time.Time{}, time.Time{}, fmt.Errorf("querying newest sample: %w", err)
  1398. }
  1399. if len(resNewest) == 0 || len(resNewest[0].Values) == 0 {
  1400. // If newest query fails but oldest succeeded, use oldest as both start and end
  1401. // This allows CSV export to proceed with at least some time range
  1402. log.Warnf("QueryDataCoverage: newest sample query returned no results, using oldest timestamp")
  1403. return oldest, oldest, nil
  1404. }
  1405. newest := time.Unix(int64(resNewest[0].Values[0].Value), 0)
  1406. return oldest, newest, nil
  1407. }
  1408. // durationStringFor simplifies the determination of query duration based on the version of prom and if the function
  1409. // in the query needs all data points in the vector it is provided or if it will extrapolate its own. Functions
  1410. // that extrapolate will add on another resolution if given a duration that is one resolution longer than the intended
  1411. // duration.
  1412. func (pds *PrometheusMetricsQuerier) durationStringFor(start, end time.Time, minsPerResolution int, extrapolated bool) string {
  1413. dur := end.Sub(start)
  1414. // If using a version of Prometheus where the resolution needs duration offset,
  1415. // we need to apply that here.
  1416. //
  1417. // E.g. avg(node_total_hourly_cost{}) by (node, provider_id)[60m:5m] with
  1418. // time=01:00:00 will return, for a node running the entire time, 12
  1419. // timestamps where the first is 00:05:00 and the last is 01:00:00.
  1420. // However, OpenCost expects for there to be 13 timestamps where the first
  1421. // begins at 00:00:00. To achieve this, we must modify our query to
  1422. // avg(node_total_hourly_cost{}) by (node, provider_id)[65m:5m]
  1423. if pds.promConfig.IsOffsetResolution && !extrapolated {
  1424. // increase the query time by the resolution
  1425. dur = dur + (time.Duration(minsPerResolution) * time.Minute)
  1426. }
  1427. return timeutil.DurationString(dur)
  1428. }