queries.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. package prom
  2. var queries = map[string]string{
  3. "RAMRequests": `
  4. avg(
  5. count_over_time(
  6. kube_pod_container_resource_requests{
  7. resource="memory",
  8. unit="byte",
  9. node!="",
  10. << .container >>!="",
  11. << .container >>!="POD",
  12. << .filter >>
  13. }[<< .duration >>] << .offset >>
  14. )
  15. *
  16. avg_over_time(
  17. kube_pod_container_resource_requests{
  18. resource="memory",
  19. unit="byte",
  20. node!="",
  21. << .container >>!="",
  22. << .container >>!="POD",
  23. << .filter >>
  24. }[<< .duration >>] << .offset >>
  25. )
  26. ) by (
  27. namespace,
  28. node,
  29. << .container >>,
  30. << .pod >>,
  31. << .cluster >>
  32. )`,
  33. "RAMUsage": `
  34. sort_desc(
  35. avg(
  36. count_over_time(
  37. container_memory_working_set_bytes{
  38. node!="",
  39. << .container >>!="",
  40. << .container >>!="POD",
  41. << .filter >>
  42. }[<< .duration >>] << .offset >>
  43. )
  44. *
  45. avg_over_time(
  46. container_memory_working_set_bytes{
  47. node!=""
  48. << .container >>!="",
  49. << .container >>!="POD",
  50. << .filter >>
  51. }[<< .duration >>] << .offset >>
  52. )
  53. ) by (
  54. namespace,
  55. node,
  56. << .container >>,
  57. << .pod >>,
  58. << .cluster >>
  59. )
  60. )`,
  61. "CPURequests": `
  62. avg(
  63. count_over_time(
  64. kube_pod_container_resource_requests{
  65. resource="cpu",
  66. unit="core",
  67. node!="",
  68. << .container >>!="",
  69. << .container >>!="POD",
  70. << .filter >>
  71. }[<< .duration >>] << .offset >>
  72. )
  73. *
  74. avg_over_time(
  75. kube_pod_container_resource_requests{
  76. resource="cpu",
  77. unit="core",
  78. node!="",
  79. << .container >>!="",
  80. << .container >>!="POD",
  81. << .filter >>
  82. }[<< .duration >>] << .offset >>
  83. )
  84. ) by (
  85. namespace,
  86. node,
  87. << .container >>,
  88. << .pod >>,
  89. << .cluster >>
  90. )`,
  91. "CPUUsage": `
  92. avg(
  93. rate(
  94. container_cpu_usage_seconds_total{
  95. node!="",
  96. << .container >>!="",
  97. << .container >>!="POD",
  98. << .filter >>
  99. }[<< .duration >>] << .offset >>
  100. )
  101. ) by (
  102. namespace,
  103. node,
  104. << .container >>,
  105. << .pod >>,
  106. << .cluster >>
  107. )`,
  108. "GPURequests": `
  109. avg(
  110. count_over_time(
  111. kube_pod_container_resource_requests{
  112. resource="nvidia_com_gpu",
  113. node!="",
  114. << .container >>!="",
  115. << .container >>!="POD",
  116. << .filter >>
  117. }[<< .duration >>] << .offset >>
  118. )
  119. *
  120. avg_over_time(
  121. kube_pod_container_resource_requests{
  122. resource="nvidia_com_gpu",
  123. node!="",
  124. << .container >>!="",
  125. << .container >>!="POD",
  126. << .filter >>
  127. }[<< .duration >>] << .offset >>
  128. )
  129. *
  130. << .interval >>
  131. ) by (
  132. namespace,
  133. node,
  134. << .container >>,
  135. << .pod >>,
  136. << .cluster >>
  137. )
  138. * on (
  139. namespace,
  140. << .pod >>,
  141. << .cluster >>
  142. ) group_left(
  143. << .container >>
  144. ) avg(
  145. avg_over_time(
  146. kube_pod_status_phase{
  147. phase="Running",
  148. << .filter >>
  149. }[<< .duration >>] << .offset >>
  150. )
  151. ) by (
  152. namespace,
  153. << .pod >>,
  154. << .cluster >>
  155. )`,
  156. "PVRequests": `
  157. avg(
  158. avg(
  159. kube_persistentvolumeclaim_info{
  160. volumename!="",
  161. << .filter >>
  162. }
  163. ) by (
  164. persistentvolumeclaim,
  165. storageclass,
  166. namespace,
  167. volumename,
  168. kubernetes_node,
  169. << .cluster >>
  170. )
  171. *
  172. on (
  173. persistentvolumeclaim,
  174. namespace,
  175. kubernetes_node,
  176. << .cluster >>
  177. ) group_right(
  178. storageclass,
  179. volumename
  180. ) sum(
  181. kube_persistentvolumeclaim_resource_requests_storage_bytes{
  182. << .filter >>
  183. }
  184. ) by (
  185. persistentvolumeclaim,
  186. namespace,
  187. kubernetes_node,
  188. kubernetes_name,
  189. << .cluster >>
  190. )
  191. ) by (
  192. persistentvolumeclaim,
  193. storageclass,
  194. namespace,
  195. volumename,
  196. kubernetes_node,
  197. << .cluster >>
  198. )`,
  199. "RAMAlloc": `
  200. sum(
  201. sum_over_time(
  202. container_memory_allocation_bytes{
  203. node!="",
  204. << .container >>!="",
  205. << .container >>!="POD",
  206. << .filter >>
  207. }[<< .duration >>]
  208. )
  209. ) by (
  210. namespace,
  211. node,
  212. << .pod >>,
  213. << .container >>,
  214. << .cluster >>
  215. )
  216. *
  217. << .interval >> / 60 / 60`,
  218. "CPUAlloc": `
  219. sum(
  220. sum_over_time(
  221. container_cpu_allocation{
  222. node!="",
  223. << .container >>!="",
  224. << .container >>!="POD",
  225. << .filter >>
  226. }[<< .duration >>]
  227. )
  228. ) by (
  229. namespace,
  230. node,
  231. << .pod >>,
  232. << .container >>,
  233. << .cluster >>
  234. )
  235. *
  236. << .interval >> / 60 / 60`,
  237. "PVCAlloc": `
  238. sum(
  239. sum_over_time(
  240. pod_pvc_allocation{
  241. << .filter >>
  242. }[<< .duration >>]
  243. )
  244. ) by (
  245. namespace,
  246. persistentvolume,
  247. persistentvolumeclaim,
  248. << .cluster >>
  249. )
  250. *
  251. << .interval >>/60/60`,
  252. "PVHourlyCost": `
  253. avg_over_time(
  254. pv_hourly_cost{
  255. << .filter >>
  256. }[<< .duration >>]
  257. )`,
  258. "NamespaceLabels": `
  259. avg_over_time(
  260. kube_namespace_labels{
  261. << .filter >>
  262. }[<< .duration >>]
  263. )`,
  264. "PodLabels": `
  265. avg_over_time(
  266. kube_pod_labels{
  267. << .filter >>
  268. }[<< .duration >>]
  269. )`,
  270. "NamespaceAnnotations": `
  271. avg_over_time(
  272. kube_namespace_annotations{
  273. << .filter >>
  274. }[<< .duration >>]
  275. )`,
  276. "PodAnnotations": `
  277. avg_over_time(
  278. kube_pod_annotations{
  279. << .filter >>
  280. }[<< .duration >>]
  281. )`,
  282. "DeploymentLabels": `
  283. avg_over_time(
  284. deployment_match_labels{
  285. << .filter >>
  286. }[<< .duration >>]
  287. )`,
  288. "StatefulSetLabels": `
  289. avg_over_time(
  290. statefulSet_match_labels{
  291. << .filter >>
  292. }[<< .duration >>]
  293. )`,
  294. "DaemonSetLabels": `
  295. sum(
  296. kube_pod_owner{
  297. owner_kind="DaemonSet",
  298. << .filter >>
  299. }
  300. ) by (
  301. namespace,
  302. owner_name,
  303. << .pod >>,
  304. << .cluster >>
  305. )`,
  306. "JobLabels": `
  307. sum(
  308. kube_pod_owner{
  309. owner_kind="Job",
  310. << .filter >>
  311. }
  312. ) by (
  313. namespace,
  314. owner_name,
  315. << .pod >>,
  316. << .cluster >>
  317. )`,
  318. "ServiceLabels": `
  319. avg_over_time(
  320. service_selector_labels{
  321. << .filter >>
  322. }[<< .duration >>]
  323. )`,
  324. "NetZoneRequests": `
  325. sum(
  326. increase(
  327. kubecost_pod_network_egress_bytes_total{
  328. internet="false",
  329. sameZone="false",
  330. sameRegion="true",
  331. << .filter >>
  332. }[<< .duration >>] << .offset >>
  333. )
  334. ) by (
  335. namespace,
  336. << .pod >>,
  337. << .cluster >>
  338. ) / 1024 / 1024 / 1024`,
  339. "NetRegionRequests": `
  340. sum(
  341. increase(
  342. kubecost_pod_network_egress_bytes_total{
  343. internet="false",
  344. sameZone="false",
  345. sameRegion="false",
  346. << .filter >>
  347. }[<< .duration >>] << .offset >>
  348. )
  349. ) by (
  350. namespace,
  351. << .pod >>,
  352. << .cluster >>
  353. ) / 1024 / 1024 / 1024`,
  354. "NetInternetRequests": `
  355. sum(
  356. increase(
  357. kubecost_pod_network_egress_bytes_total{
  358. internet="true",
  359. << .filter >>
  360. }[<< .duration >>] << .offset >>
  361. )
  362. ) by (
  363. namespace,
  364. << .pod >>,
  365. << .cluster >>
  366. ) / 1024 / 1024 / 1024`,
  367. "Normalization": `
  368. max(
  369. count_over_time(
  370. kube_pod_container_resource_requests{
  371. resource="memory",
  372. unit="byte",
  373. << .filter >>
  374. }[<< .duration >>] << .offset >>
  375. )
  376. )`,
  377. "CPUCost": `
  378. avg(
  379. avg_over_time(
  380. node_cpu_hourly_cost{
  381. << .filter >>
  382. }[<< .duration >>] << .offset >>
  383. )
  384. ) by (
  385. node,
  386. instance,
  387. << .cluster >>
  388. )`,
  389. "RAMCost": `
  390. avg(
  391. avg_over_time(
  392. node_ram_hourly_cost{
  393. << .filter >>
  394. }[<< .duration >>] << .offset >>
  395. )
  396. ) by (
  397. node,
  398. instance,
  399. << .cluster >>
  400. )`,
  401. "GPUCost": `
  402. avg(
  403. avg_over_time(
  404. node_gpu_hourly_cost{
  405. << .filter >>
  406. }[<< .duration >>] << .offset >>
  407. )
  408. ) by (
  409. node,
  410. instance,
  411. << .cluster >>
  412. )`,
  413. "Pods": `avg(kube_pod_container_status_running{<< .filter >>}) by (namespace, << .pod >>, << .cluster >>)[<< .duration >>: << .resolution >>]`,
  414. "PodsUID": `avg(kube_pod_container_status_running{<< .filter >>}) by (namespace, uid, << .pod >>, << .cluster >>)[<< .duration >>: << .resolution >>]`,
  415. "RAMBytesAllocated": `avg(avg_over_time(container_memory_allocation_bytes{<< .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, provider_id, node, << .container >>, << .pod >>, << .cluster >>)`,
  416. "RAMReq": `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", << .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  417. "RAMUsageAvg": `avg(avg_over_time(container_memory_working_set_bytes{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  418. "RAMUsageMax": `max(max_over_time(container_memory_working_set_bytes{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  419. "CPUCoresAllocated": `avg(avg_over_time(container_cpu_allocation{<< .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  420. "CPUReq": `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", << .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster>>)`,
  421. "CPUUsageAvg": `avg(rate(container_cpu_usage_seconds_total{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  422. "CPUUsageMax": `max(rate(container_cpu_usage_seconds_total{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  423. "GPUsRequested": `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", << .container >>!="",<< .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  424. "GPUsAllocated": `avg(avg_over_time(container_gpu_allocation{<< .container >>!="", << .container >>!="POD", node!=""}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
  425. "NodeCostPerCPUHr": `avg(avg_over_time(node_cpu_hourly_cost{<< .filter >>}[<< .duration >>])) by (instance_type, provider_id, node, << .cluster >>)`,
  426. "NodeCostPerRAMGiBHr": `avg(avg_over_time(node_ram_hourly_cost{<< .filter >>}[<< .duration >>])) by (instance_type, provider_id, node, << .cluster >>)`,
  427. "NodeCostPerGPUHr": `avg(avg_over_time(node_gpu_hourly_cost{<< .filter >>}[<< .duration >>])) by (instance_type, provider_id, node, << .cluster >>)`,
  428. "NodeIsSpot": `avg_over_time(kubecost_node_is_spot{<< .filter >>}[<< .duration >>])`,
  429. "PVCInfo": `avg(kube_persistentvolumeclaim_info{volumename != "", << .filter >>}) by (persistentvolumeclaim, storageclass, volumename, namespace, << .cluster >>)[<< .duration >>: << .resolution >>]`,
  430. "PVBytes": `avg(avg_over_time(kube_persistentvolume_capacity_bytes{<< .filter >>}[<< .duration >>])) by (persistentvolume, << .cluster >>)`,
  431. "PodPVCAllocation": `avg(avg_over_time(pod_pvc_allocation{<< .filter >>}[<< .duration >>])) by (persistentvolume, persistentvolumeclaim, namespace, << .pod >>, << .cluster >>)`,
  432. "PVCBytesRequested": `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{<< .filter >>}[<< .duration >>])) by (persistentvolumeclaim, namespace, << .cluster >>)`,
  433. "PVCostPerGiBHour": `avg(avg_over_time(pv_hourly_cost{<< .filter >>}[<< .duration >>])) by (volumename, << .cluster >>)`,
  434. "NetZoneGiB": `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>) / 1024 / 1024 / 1024`,
  435. "NetZoneCostPerGiB": `avg(avg_over_time(kubecost_network_zone_egress_cost{<< .filter >>}[<< .duration >>])) by (<< .cluster >>)`,
  436. "NetRegionGiB": `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>) / 1024 / 1024 / 1024`,
  437. "NetRegionCostPerGiB": `avg(avg_over_time(kubecost_network_region_egress_cost{<< .filter >>}[<< .duration >>])) by (<< .cluster >>)`,
  438. "NetInternetGiB": `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>) / 1024 / 1024 / 1024`,
  439. "NetInternetCostPerGiB": `avg(avg_over_time(kubecost_network_internet_egress_cost{<< .filter >>}[<< .duration >>])) by (<< .cluster >>)`,
  440. "NetReceiveBytes": `sum(increase(container_network_receive_bytes_total{<< .pod >>!="", << .container >>="POD", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>)`,
  441. "NetTransferBytes": `sum(increase(container_network_transmit_bytes_total{<< .pod >>!="", << .container >>="POD", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>)`,
  442. "PodsWithReplicaSetOwner": `
  443. sum(
  444. avg_over_time(kube_pod_owner{owner_kind="ReplicaSet", << .filter >>}[<< .duration >>])) by (owner_name, namespace, << .pod >>, << .cluster >>)`,
  445. "ReplicaSetsWithoutOwners": `
  446. avg(
  447. avg_over_time(
  448. kube_replicaset_owner{
  449. owner_kind="<none>",
  450. owner_name="<none>",
  451. << .filter >>
  452. }[<< .duration >>]
  453. )
  454. ) by (
  455. replicaset,
  456. namespace,
  457. << .cluster >>
  458. )`,
  459. "LBCostPerHr": `
  460. avg(
  461. avg_over_time(
  462. kubecost_load_balancer_cost{
  463. << .filter >>
  464. }[<< .duration >>]
  465. )
  466. ) by (
  467. namespace,
  468. service_name,
  469. << .cluster >>
  470. )`,
  471. "LBActiveMins": `
  472. count(
  473. kubecost_load_balancer_cost{
  474. << .filter >>
  475. }
  476. ) by (
  477. namespace,
  478. service_name,
  479. << .cluster >>
  480. )[<< .duration >>: << .resolution >>]`,
  481. }