datasource.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. package source
  2. import (
  3. "time"
  4. "github.com/julienschmidt/httprouter"
  5. "github.com/opencost/opencost/core/pkg/clusters"
  6. "github.com/opencost/opencost/core/pkg/diagnostics"
  7. )
  8. // Query name constants for use with MockMetricsQuerier.SetOverride.
  9. const (
  10. // Local Cluster Disks
  11. QueryLocalStorageActiveMinutes = "QueryLocalStorageActiveMinutes"
  12. QueryLocalStorageUsedAvg = "QueryLocalStorageUsedAvg"
  13. QueryLocalStorageUsedMax = "QueryLocalStorageUsedMax"
  14. QueryLocalStorageBytes = "QueryLocalStorageBytes"
  15. QueryKMLocalStorageUsedAvg = "QueryKMLocalStorageUsedAvg"
  16. QueryKMLocalStorageUsedMax = "QueryKMLocalStorageUsedMax"
  17. QueryKMLocalStorageBytes = "QueryKMLocalStorageBytes"
  18. // Nodes
  19. QueryNodeInfo = "QueryNodeInfo"
  20. QueryNodeUptime = "QueryNodeUptime"
  21. QueryNodeActiveMinutes = "QueryNodeActiveMinutes"
  22. QueryNodeCPUCoresCapacity = "QueryNodeCPUCoresCapacity"
  23. QueryNodeCPUCoresAllocatable = "QueryNodeCPUCoresAllocatable"
  24. QueryNodeRAMBytesCapacity = "QueryNodeRAMBytesCapacity"
  25. QueryNodeRAMBytesAllocatable = "QueryNodeRAMBytesAllocatable"
  26. QueryNodeGPUCount = "QueryNodeGPUCount"
  27. QueryNodeCPUModeTotal = "QueryNodeCPUModeTotal"
  28. QueryNodeIsSpot = "QueryNodeIsSpot"
  29. QueryNodeRAMSystemPercent = "QueryNodeRAMSystemPercent"
  30. QueryNodeRAMUserPercent = "QueryNodeRAMUserPercent"
  31. QueryNodeResourceCapacities = "QueryNodeResourceCapacities"
  32. QueryNodeResourcesAllocatable = "QueryNodeResourcesAllocatable"
  33. // Load Balancers
  34. QueryLBActiveMinutes = "QueryLBActiveMinutes"
  35. QueryLBPricePerHr = "QueryLBPricePerHr"
  36. // Cluster Management
  37. QueryClusterInfo = "QueryClusterInfo"
  38. QueryClusterKubeModelVersion = "QueryClusterKubeModelVersion"
  39. QueryClusterUptime = "QueryClusterUptime"
  40. QueryClusterManagementDuration = "QueryClusterManagementDuration"
  41. QueryClusterManagementPricePerHr = "QueryClusterManagementPricePerHr"
  42. // Pods
  43. QueryPods = "QueryPods"
  44. QueryPodsUID = "QueryPodsUID"
  45. QueryPodInfo = "QueryPodInfo"
  46. QueryPodUptime = "QueryPodUptime"
  47. QueryPodOwners = "QueryPodOwners"
  48. QueryPodPVCVolumes = "QueryPodPVCVolumes"
  49. QueryPodNetworkEgressBytes = "QueryPodNetworkEgressBytes"
  50. QueryPodNetworkIngressBytes = "QueryPodNetworkIngressBytes"
  51. // Container
  52. QueryContainerUptime = "QueryContainerUptime"
  53. QueryContainerResourceRequests = "QueryContainerResourceRequests"
  54. QueryContainerResourceLimits = "QueryContainerResourceLimits"
  55. // RAM
  56. QueryRAMBytesAllocated = "QueryRAMBytesAllocated"
  57. QueryRAMRequests = "QueryRAMRequests"
  58. QueryRAMLimits = "QueryRAMLimits"
  59. QueryRAMUsageAvg = "QueryRAMUsageAvg"
  60. QueryRAMUsageMax = "QueryRAMUsageMax"
  61. QueryNodeRAMPricePerGiBHr = "QueryNodeRAMPricePerGiBHr"
  62. // CPU
  63. QueryCPUCoresAllocated = "QueryCPUCoresAllocated"
  64. QueryCPURequests = "QueryCPURequests"
  65. QueryCPULimits = "QueryCPULimits"
  66. QueryCPUUsageAvg = "QueryCPUUsageAvg"
  67. QueryCPUUsageMax = "QueryCPUUsageMax"
  68. QueryNodeCPUPricePerHr = "QueryNodeCPUPricePerHr"
  69. // GPU
  70. QueryGPUsAllocated = "QueryGPUsAllocated"
  71. QueryGPUsRequested = "QueryGPUsRequested"
  72. QueryGPUsUsageAvg = "QueryGPUsUsageAvg"
  73. QueryGPUsUsageMax = "QueryGPUsUsageMax"
  74. QueryNodeGPUPricePerHr = "QueryNodeGPUPricePerHr"
  75. QueryGPUInfo = "QueryGPUInfo"
  76. QueryIsGPUShared = "QueryIsGPUShared"
  77. // Device
  78. QueryDCGMDeviceInfo = "QueryDCGMDeviceInfo"
  79. QueryDCGMDeviceUptime = "QueryDCGMDeviceUptime"
  80. QueryDCGMContainerUsageAvg = "QueryDCGMContainerUsageAvg"
  81. QueryDCGMContainerUsageMax = "QueryDCGMContainerUsageMax"
  82. // PVC
  83. QueryPodPVCAllocation = "QueryPodPVCAllocation"
  84. QueryPVCBytesRequested = "QueryPVCBytesRequested"
  85. QueryPVCInfo = "QueryPVCInfo"
  86. QueryKMPVCInfo = "QueryKMPVCInfo"
  87. QueryPVCUptime = "QueryPVCUptime"
  88. QueryPVCBytesUsedAverage = "QueryPVCBytesUsedAverage"
  89. QueryPVCBytesUsedMax = "QueryPVCBytesUsedMax"
  90. // PV
  91. QueryPVBytes = "QueryPVBytes"
  92. QueryPVPricePerGiBHour = "QueryPVPricePerGiBHour"
  93. QueryPVInfo = "QueryPVInfo"
  94. QueryPVActiveMinutes = "QueryPVActiveMinutes"
  95. QueryPVUsedAverage = "QueryPVUsedAverage"
  96. QueryPVUsedMax = "QueryPVUsedMax"
  97. QueryKMPVInfo = "QueryKMPVInfo"
  98. QueryPVUptime = "QueryPVUptime"
  99. // Deployment
  100. QueryDeploymentInfo = "QueryDeploymentInfo"
  101. QueryDeploymentUptime = "QueryDeploymentUptime"
  102. QueryDeploymentLabels = "QueryDeploymentLabels"
  103. QueryDeploymentAnnotations = "QueryDeploymentAnnotations"
  104. QueryDeploymentMatchLabels = "QueryDeploymentMatchLabels"
  105. // StatefulSet
  106. QueryStatefulSetInfo = "QueryStatefulSetInfo"
  107. QueryStatefulSetUptime = "QueryStatefulSetUptime"
  108. QueryStatefulSetLabels = "QueryStatefulSetLabels"
  109. QueryStatefulSetAnnotations = "QueryStatefulSetAnnotations"
  110. QueryStatefulSetMatchLabels = "QueryStatefulSetMatchLabels"
  111. // DaemonSet
  112. QueryDaemonSetInfo = "QueryDaemonSetInfo"
  113. QueryDaemonSetUptime = "QueryDaemonSetUptime"
  114. QueryDaemonSetLabels = "QueryDaemonSetLabels"
  115. QueryDaemonSetAnnotations = "QueryDaemonSetAnnotations"
  116. // Job
  117. QueryJobInfo = "QueryJobInfo"
  118. QueryJobUptime = "QueryJobUptime"
  119. QueryJobLabels = "QueryJobLabels"
  120. QueryJobAnnotations = "QueryJobAnnotations"
  121. // CronJob
  122. QueryCronJobInfo = "QueryCronJobInfo"
  123. QueryCronJobUptime = "QueryCronJobUptime"
  124. QueryCronJobLabels = "QueryCronJobLabels"
  125. QueryCronJobAnnotations = "QueryCronJobAnnotations"
  126. // ReplicaSet
  127. QueryReplicaSetInfo = "QueryReplicaSetInfo"
  128. QueryReplicaSetUptime = "QueryReplicaSetUptime"
  129. QueryReplicaSetLabels = "QueryReplicaSetLabels"
  130. QueryReplicaSetAnnotations = "QueryReplicaSetAnnotations"
  131. QueryReplicaSetOwners = "QueryReplicaSetOwners"
  132. QueryPodsWithReplicaSetOwner = "QueryPodsWithReplicaSetOwner"
  133. QueryReplicaSetsWithoutOwners = "QueryReplicaSetsWithoutOwners"
  134. QueryReplicaSetsWithRollout = "QueryReplicaSetsWithRollout"
  135. // Namespace
  136. QueryNamespaceInfo = "QueryNamespaceInfo"
  137. QueryNamespaceUptime = "QueryNamespaceUptime"
  138. QueryNamespaceAnnotations = "QueryNamespaceAnnotations"
  139. QueryNamespaceLabels = "QueryNamespaceLabels"
  140. // Service
  141. QueryServiceInfo = "QueryServiceInfo"
  142. QueryServiceUptime = "QueryServiceUptime"
  143. QueryServiceSelectorLabels = "QueryServiceSelectorLabels"
  144. // Network Egress
  145. QueryNetZoneGiB = "QueryNetZoneGiB"
  146. QueryNetZonePricePerGiB = "QueryNetZonePricePerGiB"
  147. QueryNetRegionGiB = "QueryNetRegionGiB"
  148. QueryNetRegionPricePerGiB = "QueryNetRegionPricePerGiB"
  149. QueryNetInternetGiB = "QueryNetInternetGiB"
  150. QueryNetInternetPricePerGiB = "QueryNetInternetPricePerGiB"
  151. QueryNetInternetServiceGiB = "QueryNetInternetServiceGiB"
  152. QueryNetNatGatewayPricePerGiB = "QueryNetNatGatewayPricePerGiB"
  153. QueryNetNatGatewayGiB = "QueryNetNatGatewayGiB"
  154. QueryNetTransferBytes = "QueryNetTransferBytes"
  155. // Network Ingress
  156. QueryNetZoneIngressGiB = "QueryNetZoneIngressGiB"
  157. QueryNetRegionIngressGiB = "QueryNetRegionIngressGiB"
  158. QueryNetInternetIngressGiB = "QueryNetInternetIngressGiB"
  159. QueryNetInternetServiceIngressGiB = "QueryNetInternetServiceIngressGiB"
  160. QueryNetNatGatewayIngressPricePerGiB = "QueryNetNatGatewayIngressPricePerGiB"
  161. QueryNetNatGatewayIngressGiB = "QueryNetNatGatewayIngressGiB"
  162. QueryNetReceiveBytes = "QueryNetReceiveBytes"
  163. // Labels
  164. QueryNodeLabels = "QueryNodeLabels"
  165. QueryPodLabels = "QueryPodLabels"
  166. // Pod ownership
  167. QueryPodAnnotations = "QueryPodAnnotations"
  168. QueryPodsWithDaemonSetOwner = "QueryPodsWithDaemonSetOwner"
  169. QueryPodsWithJobOwner = "QueryPodsWithJobOwner"
  170. // ResourceQuotas
  171. QueryResourceQuotaInfo = "QueryResourceQuotaInfo"
  172. QueryResourceQuotaUptime = "QueryResourceQuotaUptime"
  173. QueryResourceQuotaSpecCPURequestAverage = "QueryResourceQuotaSpecCPURequestAverage"
  174. QueryResourceQuotaSpecCPURequestMax = "QueryResourceQuotaSpecCPURequestMax"
  175. QueryResourceQuotaSpecRAMRequestAverage = "QueryResourceQuotaSpecRAMRequestAverage"
  176. QueryResourceQuotaSpecRAMRequestMax = "QueryResourceQuotaSpecRAMRequestMax"
  177. QueryResourceQuotaSpecCPULimitAverage = "QueryResourceQuotaSpecCPULimitAverage"
  178. QueryResourceQuotaSpecCPULimitMax = "QueryResourceQuotaSpecCPULimitMax"
  179. QueryResourceQuotaSpecRAMLimitAverage = "QueryResourceQuotaSpecRAMLimitAverage"
  180. QueryResourceQuotaSpecRAMLimitMax = "QueryResourceQuotaSpecRAMLimitMax"
  181. QueryResourceQuotaStatusUsedCPURequestAverage = "QueryResourceQuotaStatusUsedCPURequestAverage"
  182. QueryResourceQuotaStatusUsedCPURequestMax = "QueryResourceQuotaStatusUsedCPURequestMax"
  183. QueryResourceQuotaStatusUsedRAMRequestAverage = "QueryResourceQuotaStatusUsedRAMRequestAverage"
  184. QueryResourceQuotaStatusUsedRAMRequestMax = "QueryResourceQuotaStatusUsedRAMRequestMax"
  185. QueryResourceQuotaStatusUsedCPULimitAverage = "QueryResourceQuotaStatusUsedCPULimitAverage"
  186. QueryResourceQuotaStatusUsedCPULimitMax = "QueryResourceQuotaStatusUsedCPULimitMax"
  187. QueryResourceQuotaStatusUsedRAMLimitAverage = "QueryResourceQuotaStatusUsedRAMLimitAverage"
  188. QueryResourceQuotaStatusUsedRAMLimitMax = "QueryResourceQuotaStatusUsedRAMLimitMax"
  189. // Data Coverage
  190. QueryDataCoverage = "QueryDataCoverage"
  191. // Inference Metrics
  192. QueryInferencePromptTokens = "QueryInferencePromptTokens"
  193. QueryInferenceGenerationTokens = "QueryInferenceGenerationTokens"
  194. QueryInferenceInputProcessingTime = "QueryInferenceInputProcessingTime"
  195. QueryInferenceOutputProcessingTime = "QueryInferenceOutputProcessingTime"
  196. QueryInferenceCachedTokens = "QueryInferenceCachedTokens"
  197. QueryInferenceCacheConfig = "QueryInferenceCacheConfig"
  198. )
  199. type MetricsQuerier interface {
  200. // Cluster Disks
  201. // Local Cluster Disks
  202. QueryLocalStorageActiveMinutes(start, end time.Time) *Future[LocalStorageActiveMinutesResult]
  203. QueryLocalStorageUsedAvg(start, end time.Time) *Future[LocalStorageUsedAvgResult]
  204. QueryLocalStorageUsedMax(start, end time.Time) *Future[LocalStorageUsedMaxResult]
  205. QueryLocalStorageBytes(start, end time.Time) *Future[LocalStorageBytesResult]
  206. // Local Storage Metrics aggregated exclusively on NodeUID
  207. QueryKMLocalStorageUsedAvg(start, end time.Time) *Future[NodeUIDValueResult]
  208. QueryKMLocalStorageUsedMax(start, end time.Time) *Future[NodeUIDValueResult]
  209. QueryKMLocalStorageBytes(start, end time.Time) *Future[UIDValueResult]
  210. // Nodes
  211. QueryNodeInfo(start, end time.Time) *Future[NodeInfoResult]
  212. QueryNodeUptime(start, end time.Time) *Future[UptimeResult]
  213. QueryNodeActiveMinutes(start, end time.Time) *Future[NodeActiveMinutesResult]
  214. QueryNodeCPUCoresCapacity(start, end time.Time) *Future[NodeCPUCoresCapacityResult]
  215. QueryNodeCPUCoresAllocatable(start, end time.Time) *Future[NodeCPUCoresAllocatableResult]
  216. QueryNodeRAMBytesCapacity(start, end time.Time) *Future[NodeRAMBytesCapacityResult]
  217. QueryNodeRAMBytesAllocatable(start, end time.Time) *Future[NodeRAMBytesAllocatableResult]
  218. QueryNodeGPUCount(start, end time.Time) *Future[NodeGPUCountResult]
  219. QueryNodeCPUModeTotal(start, end time.Time) *Future[NodeCPUModeTotalResult]
  220. QueryNodeIsSpot(start, end time.Time) *Future[NodeIsSpotResult]
  221. QueryNodeRAMSystemPercent(start, end time.Time) *Future[NodeRAMSystemPercentResult]
  222. QueryNodeRAMUserPercent(start, end time.Time) *Future[NodeRAMUserPercentResult]
  223. QueryNodeResourceCapacities(start, end time.Time) *Future[ResourceResult]
  224. QueryNodeResourcesAllocatable(start, end time.Time) *Future[ResourceResult]
  225. // Load Balancers
  226. QueryLBActiveMinutes(start, end time.Time) *Future[LBActiveMinutesResult]
  227. QueryLBPricePerHr(start, end time.Time) *Future[LBPricePerHrResult]
  228. // Cluster Management
  229. QueryClusterInfo(start, end time.Time) *Future[ClusterInfoResult]
  230. QueryClusterKubeModelVersion(start, end time.Time) *Future[ClusterKubeModelVersionResult]
  231. QueryClusterUptime(start, end time.Time) *Future[UptimeResult]
  232. QueryClusterManagementDuration(start, end time.Time) *Future[ClusterManagementDurationResult]
  233. QueryClusterManagementPricePerHr(start, end time.Time) *Future[ClusterManagementPricePerHrResult]
  234. // Pods
  235. QueryPods(start, end time.Time) *Future[PodsResult]
  236. QueryPodsUID(start, end time.Time) *Future[PodsResult]
  237. QueryPodInfo(start, end time.Time) *Future[PodInfoResult]
  238. QueryPodUptime(start, end time.Time) *Future[UptimeResult]
  239. QueryPodOwners(start, end time.Time) *Future[OwnerResult]
  240. QueryPodPVCVolumes(start, end time.Time) *Future[PodPVCVolumeResult]
  241. QueryPodNetworkEgressBytes(start, end time.Time) *Future[PodNetworkBytesResult]
  242. QueryPodNetworkIngressBytes(start, end time.Time) *Future[PodNetworkBytesResult]
  243. // Container
  244. QueryContainerUptime(start, end time.Time) *Future[ContainerUptimeResult]
  245. QueryContainerResourceRequests(start, end time.Time) *Future[ContainerResourceResult]
  246. QueryContainerResourceLimits(start, end time.Time) *Future[ContainerResourceResult]
  247. // RAM
  248. QueryRAMBytesAllocated(start, end time.Time) *Future[RAMBytesAllocatedResult]
  249. QueryRAMRequests(start, end time.Time) *Future[RAMRequestsResult]
  250. QueryRAMLimits(start, end time.Time) *Future[RAMLimitsResult]
  251. QueryRAMUsageAvg(start, end time.Time) *Future[RAMUsageAvgResult]
  252. QueryRAMUsageMax(start, end time.Time) *Future[RAMUsageMaxResult]
  253. QueryNodeRAMPricePerGiBHr(start, end time.Time) *Future[NodeRAMPricePerGiBHrResult]
  254. // CPU
  255. QueryCPUCoresAllocated(start, end time.Time) *Future[CPUCoresAllocatedResult]
  256. QueryCPURequests(start, end time.Time) *Future[CPURequestsResult]
  257. QueryCPULimits(start, end time.Time) *Future[CPULimitsResult]
  258. QueryCPUUsageAvg(start, end time.Time) *Future[CPUUsageAvgResult]
  259. QueryCPUUsageMax(start, end time.Time) *Future[CPUUsageMaxResult]
  260. QueryNodeCPUPricePerHr(start, end time.Time) *Future[NodeCPUPricePerHrResult]
  261. // GPU
  262. QueryGPUsAllocated(start, end time.Time) *Future[GPUsAllocatedResult]
  263. QueryGPUsRequested(start, end time.Time) *Future[GPUsRequestedResult]
  264. QueryGPUsUsageAvg(start, end time.Time) *Future[GPUsUsageAvgResult]
  265. QueryGPUsUsageMax(start, end time.Time) *Future[GPUsUsageMaxResult]
  266. QueryNodeGPUPricePerHr(start, end time.Time) *Future[NodeGPUPricePerHrResult]
  267. QueryGPUInfo(start, end time.Time) *Future[GPUInfoResult]
  268. QueryIsGPUShared(start, end time.Time) *Future[IsGPUSharedResult]
  269. // Device
  270. QueryDCGMDeviceInfo(start, end time.Time) *Future[DCGMDeviceInfoResult]
  271. QueryDCGMDeviceUptime(start, end time.Time) *Future[DCGMDeviceUptimeResult]
  272. QueryDCGMContainerUsageAvg(start, end time.Time) *Future[DCGMDeviceContainerUsageResult]
  273. QueryDCGMContainerUsageMax(start, end time.Time) *Future[DCGMDeviceContainerUsageResult]
  274. // PVC
  275. QueryPodPVCAllocation(start, end time.Time) *Future[PodPVCAllocationResult]
  276. QueryPVCBytesRequested(start, end time.Time) *Future[PVCBytesRequestedResult]
  277. QueryPVCInfo(start, end time.Time) *Future[PVCInfoResult]
  278. // UID aggregated version of PVCInfo query
  279. QueryKMPVCInfo(start, end time.Time) *Future[PVCInfoResult]
  280. QueryPVCUptime(start, end time.Time) *Future[UptimeResult]
  281. QueryPVCBytesUsedAverage(start, end time.Time) *Future[PVCUIDValueResult]
  282. QueryPVCBytesUsedMax(start, end time.Time) *Future[PVCUIDValueResult]
  283. // PV
  284. QueryPVBytes(start, end time.Time) *Future[PVBytesResult]
  285. QueryPVPricePerGiBHour(start, end time.Time) *Future[PVPricePerGiBHourResult]
  286. QueryPVInfo(start, end time.Time) *Future[PVInfoResult]
  287. QueryPVActiveMinutes(start, end time.Time) *Future[PVActiveMinutesResult]
  288. QueryPVUsedAverage(start, end time.Time) *Future[PVUsedAvgResult]
  289. QueryPVUsedMax(start, end time.Time) *Future[PVUsedMaxResult]
  290. QueryKMPVInfo(start, end time.Time) *Future[PVInfoResult]
  291. QueryPVUptime(start, end time.Time) *Future[UptimeResult]
  292. // Deployment
  293. QueryDeploymentInfo(start, end time.Time) *Future[DeploymentInfoResult]
  294. QueryDeploymentUptime(start, end time.Time) *Future[UptimeResult]
  295. QueryDeploymentLabels(start, end time.Time) *Future[LabelsResult]
  296. QueryDeploymentAnnotations(start, end time.Time) *Future[AnnotationsResult]
  297. QueryDeploymentMatchLabels(start, end time.Time) *Future[DeploymentLabelsResult]
  298. // StatefulSet
  299. QueryStatefulSetInfo(start, end time.Time) *Future[StatefulSetInfoResult]
  300. QueryStatefulSetUptime(start, end time.Time) *Future[UptimeResult]
  301. QueryStatefulSetLabels(start, end time.Time) *Future[LabelsResult]
  302. QueryStatefulSetAnnotations(start, end time.Time) *Future[AnnotationsResult]
  303. QueryStatefulSetMatchLabels(start, end time.Time) *Future[StatefulSetLabelsResult]
  304. // DaemonSet
  305. QueryDaemonSetInfo(start, end time.Time) *Future[DaemonSetInfoResult]
  306. QueryDaemonSetUptime(start, end time.Time) *Future[UptimeResult]
  307. QueryDaemonSetLabels(start, end time.Time) *Future[LabelsResult]
  308. QueryDaemonSetAnnotations(start, end time.Time) *Future[AnnotationsResult]
  309. // Job
  310. QueryJobInfo(start, end time.Time) *Future[JobInfoResult]
  311. QueryJobUptime(start, end time.Time) *Future[UptimeResult]
  312. QueryJobLabels(start, end time.Time) *Future[LabelsResult]
  313. QueryJobAnnotations(start, end time.Time) *Future[AnnotationsResult]
  314. // CronJob
  315. QueryCronJobInfo(start, end time.Time) *Future[CronJobInfoResult]
  316. QueryCronJobUptime(start, end time.Time) *Future[UptimeResult]
  317. QueryCronJobLabels(start, end time.Time) *Future[LabelsResult]
  318. QueryCronJobAnnotations(start, end time.Time) *Future[AnnotationsResult]
  319. // ReplicaSet
  320. QueryReplicaSetInfo(start, end time.Time) *Future[ReplicaSetInfoResult]
  321. QueryReplicaSetUptime(start, end time.Time) *Future[UptimeResult]
  322. QueryReplicaSetLabels(start, end time.Time) *Future[LabelsResult]
  323. QueryReplicaSetAnnotations(start, end time.Time) *Future[AnnotationsResult]
  324. QueryReplicaSetOwners(start, end time.Time) *Future[OwnerResult]
  325. // Namespace
  326. QueryNamespaceInfo(start, end time.Time) *Future[NamespaceInfoResult]
  327. QueryNamespaceUptime(start, end time.Time) *Future[UptimeResult]
  328. // Service
  329. QueryServiceInfo(start, end time.Time) *Future[ServiceInfoResult]
  330. QueryServiceUptime(start, end time.Time) *Future[UptimeResult]
  331. QueryServiceSelectorLabels(start, end time.Time) *Future[ServiceLabelsResult]
  332. // Network Egress
  333. QueryNetZoneGiB(start, end time.Time) *Future[NetZoneGiBResult]
  334. QueryNetZonePricePerGiB(start, end time.Time) *Future[NetZonePricePerGiBResult]
  335. QueryNetRegionGiB(start, end time.Time) *Future[NetRegionGiBResult]
  336. QueryNetRegionPricePerGiB(start, end time.Time) *Future[NetRegionPricePerGiBResult]
  337. QueryNetInternetGiB(start, end time.Time) *Future[NetInternetGiBResult]
  338. QueryNetInternetPricePerGiB(start, end time.Time) *Future[NetInternetPricePerGiBResult]
  339. QueryNetInternetServiceGiB(start, end time.Time) *Future[NetInternetServiceGiBResult]
  340. QueryNetNatGatewayPricePerGiB(start, end time.Time) *Future[NetNatGatewayPricePerGiBResult]
  341. QueryNetNatGatewayGiB(start, end time.Time) *Future[NetNatGatewayGiBResult]
  342. QueryNetTransferBytes(start, end time.Time) *Future[NetTransferBytesResult]
  343. // Network Ingress
  344. QueryNetZoneIngressGiB(start, end time.Time) *Future[NetZoneIngressGiBResult]
  345. QueryNetRegionIngressGiB(start, end time.Time) *Future[NetRegionIngressGiBResult]
  346. QueryNetInternetIngressGiB(start, end time.Time) *Future[NetInternetIngressGiBResult]
  347. QueryNetInternetServiceIngressGiB(start, end time.Time) *Future[NetInternetServiceIngressGiBResult]
  348. QueryNetNatGatewayIngressPricePerGiB(start, end time.Time) *Future[NetNatGatewayPricePerGiBResult]
  349. QueryNetNatGatewayIngressGiB(start, end time.Time) *Future[NetNatGatewayIngressGiBResult]
  350. QueryNetReceiveBytes(start, end time.Time) *Future[NetReceiveBytesResult]
  351. // Annotations
  352. QueryNamespaceAnnotations(start, end time.Time) *Future[NamespaceAnnotationsResult]
  353. QueryPodAnnotations(start, end time.Time) *Future[PodAnnotationsResult]
  354. // Labels
  355. QueryNodeLabels(start, end time.Time) *Future[NodeLabelsResult]
  356. QueryNamespaceLabels(start, end time.Time) *Future[NamespaceLabelsResult]
  357. QueryPodLabels(start, end time.Time) *Future[PodLabelsResult]
  358. QueryPodsWithDaemonSetOwner(start, end time.Time) *Future[PodsWithDaemonSetOwnerResult]
  359. QueryPodsWithJobOwner(start, end time.Time) *Future[PodsWithJobOwnerResult]
  360. // ReplicaSet -> Controller mapping
  361. QueryPodsWithReplicaSetOwner(start, end time.Time) *Future[PodsWithReplicaSetOwnerResult]
  362. QueryReplicaSetsWithoutOwners(start, end time.Time) *Future[ReplicaSetsWithoutOwnersResult]
  363. QueryReplicaSetsWithRollout(start, end time.Time) *Future[ReplicaSetsWithRolloutResult]
  364. // ResourceQuotas
  365. QueryResourceQuotaInfo(start, end time.Time) *Future[ResourceQuotaInfoResult]
  366. QueryResourceQuotaUptime(start, end time.Time) *Future[UptimeResult]
  367. QueryResourceQuotaSpecCPURequestAverage(start, end time.Time) *Future[ResourceResult]
  368. QueryResourceQuotaSpecCPURequestMax(start, end time.Time) *Future[ResourceResult]
  369. QueryResourceQuotaSpecRAMRequestAverage(start, end time.Time) *Future[ResourceResult]
  370. QueryResourceQuotaSpecRAMRequestMax(start, end time.Time) *Future[ResourceResult]
  371. QueryResourceQuotaSpecCPULimitAverage(start, end time.Time) *Future[ResourceResult]
  372. QueryResourceQuotaSpecCPULimitMax(start, end time.Time) *Future[ResourceResult]
  373. QueryResourceQuotaSpecRAMLimitAverage(start, end time.Time) *Future[ResourceResult]
  374. QueryResourceQuotaSpecRAMLimitMax(start, end time.Time) *Future[ResourceResult]
  375. QueryResourceQuotaStatusUsedCPURequestAverage(start, end time.Time) *Future[ResourceResult]
  376. QueryResourceQuotaStatusUsedCPURequestMax(start, end time.Time) *Future[ResourceResult]
  377. QueryResourceQuotaStatusUsedRAMRequestAverage(start, end time.Time) *Future[ResourceResult]
  378. QueryResourceQuotaStatusUsedRAMRequestMax(start, end time.Time) *Future[ResourceResult]
  379. QueryResourceQuotaStatusUsedCPULimitAverage(start, end time.Time) *Future[ResourceResult]
  380. QueryResourceQuotaStatusUsedCPULimitMax(start, end time.Time) *Future[ResourceResult]
  381. QueryResourceQuotaStatusUsedRAMLimitAverage(start, end time.Time) *Future[ResourceResult]
  382. QueryResourceQuotaStatusUsedRAMLimitMax(start, end time.Time) *Future[ResourceResult]
  383. // Data Coverage Query
  384. QueryDataCoverage(limitDays int) (time.Time, time.Time, error)
  385. // Inference Metrics (vLLM) - relevant when INFERENCE_COST_ENABLED is set to true
  386. // QueryInferencePromptTokens returns prompt token counts by model_name and namespace
  387. QueryInferencePromptTokens(start, end time.Time) *Future[InferenceTokensResult]
  388. // QueryInferenceGenerationTokens returns generation token counts by model_name and namespace
  389. QueryInferenceGenerationTokens(start, end time.Time) *Future[InferenceTokensResult]
  390. // QueryInferenceInputProcessingTime returns input processing time in seconds by model_name and namespace
  391. QueryInferenceInputProcessingTime(start, end time.Time) *Future[InferenceProcessingTimeResult]
  392. // QueryInferenceOutputProcessingTime returns output processing time in seconds by model_name and namespace
  393. QueryInferenceOutputProcessingTime(start, end time.Time) *Future[InferenceProcessingTimeResult]
  394. // QueryInferenceCachedTokens returns KV cache hit counts by model_name and namespace
  395. QueryInferenceCachedTokens(start, end time.Time) *Future[InferenceTokensResult]
  396. // QueryInferenceCacheConfig returns cache configuration (prefix caching enabled) by model_name and namespace
  397. QueryInferenceCacheConfig(t time.Time) *Future[InferenceCacheConfigResult]
  398. }
  399. type OpenCostDataSource interface {
  400. // RegisterEndPoints registers any custom endpoints that can be used for diagnostics or debug purposes.
  401. RegisterEndPoints(router *httprouter.Router)
  402. // RegisterDiagnostics registers any custom data source diagnostics with the `DiagnosticService` that can
  403. // be used to report externally.
  404. RegisterDiagnostics(diagService diagnostics.DiagnosticService)
  405. // Metrics returns a MetricsQuerier that can be used to query historical metrics data from the data source.
  406. Metrics() MetricsQuerier
  407. // ClusterMap returns a mapping of cluster identifier to ClusterInfo for all known clusters (local only for
  408. // single cluster deployments).
  409. ClusterMap() clusters.ClusterMap
  410. // ClusterInfo returns the ClusterInfoProvider for the local cluster.
  411. ClusterInfo() clusters.ClusterInfoProvider
  412. BatchDuration() time.Duration
  413. Resolution() time.Duration
  414. }