collector.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. package metric
  2. import (
  3. "maps"
  4. "time"
  5. "github.com/opencost/opencost/modules/collector-source/pkg/metric/aggregator"
  6. "github.com/opencost/opencost/modules/collector-source/pkg/util"
  7. )
  8. // MetricCollectorID is a unique identifier for a specific metric collector instance. We
  9. // use this identifier to register and unregister metric instances from the metrics metric
  10. // instead of the metric name and aggregation type to allow selectable cardinality (via Labels)
  11. // across multiple instances of the same aggregation type and metric name.
  12. type MetricCollectorID string
  13. const (
  14. PVPricePerGiBHourID MetricCollectorID = "PVPricePerGiBHour"
  15. PVUsedAverageID MetricCollectorID = "PVUsedAverage"
  16. PVUsedMaxID MetricCollectorID = "PVUsedMax"
  17. PVCInfoID MetricCollectorID = "PVCInfo"
  18. KMPVCInfoID MetricCollectorID = "KMPVCInfo"
  19. PVCUptimeID MetricCollectorID = "PVCUptime"
  20. PVActiveMinutesID MetricCollectorID = "PVActiveMinutes"
  21. PVUptimeID MetricCollectorID = "PVUptime"
  22. LocalStorageUsedActiveMinutesID MetricCollectorID = "LocalStorageUsedCost"
  23. LocalStorageUsedAverageID MetricCollectorID = "LocalStorageUsedAverage"
  24. LocalStorageUsedMaxID MetricCollectorID = "LocalStorageUsedMax"
  25. LocalStorageBytesID MetricCollectorID = "LocalStorageBytesID"
  26. LocalStorageActiveMinutesID MetricCollectorID = "LocalStorageActiveMinutes"
  27. KMLocalStorageUsedAverageID MetricCollectorID = "KMLocalStorageUsedAverage"
  28. KMLocalStorageUsedMaxID MetricCollectorID = "KMLocalStorageUsedMax"
  29. KMLocalStorageBytesID MetricCollectorID = "KMLocalStorageBytes"
  30. NodeCPUCoresCapacityID MetricCollectorID = "NodeCPUCoresCapacity"
  31. NodeCPUCoresAllocatableID MetricCollectorID = "NodeCPUCoresAllocatable"
  32. NodeRAMBytesCapacityID MetricCollectorID = "NodeRAMBytesCapacity"
  33. NodeRAMBytesAllocatableID MetricCollectorID = "NodeRAMBytesAllocatable"
  34. NodeGPUCountID MetricCollectorID = "NodeGPUCount"
  35. NodeLabelsID MetricCollectorID = "NodeLabels"
  36. NodeInfoID MetricCollectorID = "NodeInfo"
  37. NodeUptimeID MetricCollectorID = "NodeUptime"
  38. NodeActiveMinutesID MetricCollectorID = "NodeActiveMinutes"
  39. NodeCPUModeTotalID MetricCollectorID = "NodeCPUModeTotal"
  40. NodeRAMSystemUsageAverageID MetricCollectorID = "NodeRAMSystemUsageAverage"
  41. NodeRAMUserUsageAverageID MetricCollectorID = "NodeRAMUserUsageAverage"
  42. NodeResourceCapacitiesID MetricCollectorID = "NodeResourceCapacities"
  43. NodeResourcesAllocatableID MetricCollectorID = "NodeResourcesAllocatable"
  44. LBPricePerHourID MetricCollectorID = "LBPricePerHour"
  45. LBActiveMinutesID MetricCollectorID = "LBActiveMinutes"
  46. ClusterInfoID MetricCollectorID = "ClusterInfo"
  47. ClusterUptimeID MetricCollectorID = "ClusterUptime"
  48. ClusterManagementDurationID MetricCollectorID = "ClusterManagementDuration"
  49. ClusterManagementPricePerHourID MetricCollectorID = "ClusterManagementPricePerHour"
  50. PodActiveMinutesID MetricCollectorID = "PodActiveMinutes"
  51. PodInfoID MetricCollectorID = "PodInfo"
  52. PodUptimeID MetricCollectorID = "PodUptime"
  53. PodOwnerID MetricCollectorID = "PodOwner"
  54. PodPVCVolumeID MetricCollectorID = "PodPVCVolume"
  55. ContainerUptimeID MetricCollectorID = "ContainerUptime"
  56. PodNetworkEgressBytesID MetricCollectorID = "PodNetworkEgressBytes"
  57. PodNetworkIngressBytesID MetricCollectorID = "PodNetworkIngressBytes"
  58. RAMBytesAllocatedID MetricCollectorID = "RAMBytesAllocated"
  59. RAMRequestsID MetricCollectorID = "RAMRequests"
  60. RAMLimitsID MetricCollectorID = "RAMLimits"
  61. RAMUsageAverageID MetricCollectorID = "RAMUsageAverage"
  62. RAMUsageMaxID MetricCollectorID = "RAMBytesUsageMax"
  63. CPUCoresAllocatedID MetricCollectorID = "CPUCoresAllocated"
  64. CPURequestsID MetricCollectorID = "CPURequestsID"
  65. CPULimitsID MetricCollectorID = "CPULimitsID"
  66. CPUUsageAverageID MetricCollectorID = "CPUUsageAverage"
  67. CPUUsageMaxID MetricCollectorID = "CPUUsageMax"
  68. GPUsRequestedID MetricCollectorID = "GPUsRequested"
  69. GPUsUsageAverageID MetricCollectorID = "GPUsUsageAverage"
  70. GPUsUsageMaxID MetricCollectorID = "GPUsUsageMax"
  71. GPUsAllocatedID MetricCollectorID = "GPUsAllocated"
  72. IsGPUSharedID MetricCollectorID = "IsGPUShared"
  73. GPUInfoID MetricCollectorID = "GPUInfo"
  74. NodeCPUPricePerHourID MetricCollectorID = "NodeCPUPricePerHour"
  75. NodeRAMPricePerGiBHourID MetricCollectorID = "NodeRAMPricePerGiBHour"
  76. NodeGPUPricePerHourID MetricCollectorID = "NodeGPUPricePerHour"
  77. NodeIsSpotID MetricCollectorID = "NodeIsSpot"
  78. DCGMInfoID MetricCollectorID = "DCGMInfo"
  79. DCGMUptimeID MetricCollectorID = "DCGMUptime"
  80. DCGMContainerUsageAvgID MetricCollectorID = "DCGMContainerUsageAvg"
  81. DCGMContainerUsageMaxID MetricCollectorID = "DCGMContainerUsageMax"
  82. PodPVCAllocationID MetricCollectorID = "PodPVCAllocation"
  83. PVCBytesRequestedID MetricCollectorID = "PVCBytesRequested"
  84. PVCBytesUsedAverageID MetricCollectorID = "PVCBytesUsedAverage"
  85. PVCBytesUsedMaxID MetricCollectorID = "PVCBytesUsedMax"
  86. PVBytesID MetricCollectorID = "PVBytesID"
  87. PVInfoID MetricCollectorID = "PVInfo"
  88. KMPVInfoID MetricCollectorID = "KMPVInfo"
  89. NetZoneGiBID MetricCollectorID = "NetZoneGiB"
  90. NetZonePricePerGiBID MetricCollectorID = "NetZonePricePerGiB"
  91. NetRegionGiBID MetricCollectorID = "NetRegionGiB"
  92. NetRegionPricePerGiBID MetricCollectorID = "NetRegionPricePerGiB"
  93. NetInternetGiBID MetricCollectorID = "NetInternetGiB"
  94. NetInternetPricePerGiBID MetricCollectorID = "NetInternetPricePerGiB"
  95. NetInternetServiceGiBID MetricCollectorID = "NetInternetServiceGiB"
  96. NetNatGatewayPricePerGiBID MetricCollectorID = "NetNatGatewayPricePerGiB"
  97. NetNatGatewayIngressPricePerGiBID MetricCollectorID = "NetNatGatewayIngressPricePerGiB"
  98. NetNatGatewayGiBID MetricCollectorID = "NetNatGatewayGiB"
  99. NetTransferBytesID MetricCollectorID = "NetTransferBytes"
  100. NetZoneIngressGiBID MetricCollectorID = "NetZoneIngressGiB"
  101. NetRegionIngressGiBID MetricCollectorID = "NetRegionIngressGiB"
  102. NetInternetIngressGiBID MetricCollectorID = "NetInternetIngressGiB"
  103. NetInternetServiceIngressGiBID MetricCollectorID = "NetInternetServiceIngressGiB"
  104. NetNatGatewayIngressGiBID MetricCollectorID = "NetNatGatewayIngressGiB"
  105. NetReceiveBytesID MetricCollectorID = "NetReceiveBytes"
  106. NamespaceInfoID MetricCollectorID = "NamespaceInfo"
  107. NamespaceUptimeID MetricCollectorID = "NamespaceUptime"
  108. NamespaceLabelsID MetricCollectorID = "NamespaceLabels"
  109. NamespaceAnnotationsID MetricCollectorID = "NamespaceAnnotations"
  110. PodLabelsID MetricCollectorID = "PodLabels"
  111. PodAnnotationsID MetricCollectorID = "PodAnnotations"
  112. ServiceLabelsID MetricCollectorID = "ServiceLabels"
  113. ServiceInfoID MetricCollectorID = "ServiceInfo"
  114. ServiceUptimeID MetricCollectorID = "ServiceUptime"
  115. DeploymentInfoID MetricCollectorID = "DeploymentInfo"
  116. DeploymentUptimeID MetricCollectorID = "DeploymentUptime"
  117. DeploymentLabelsID MetricCollectorID = "DeploymentLabels"
  118. DeploymentAnnotationsID MetricCollectorID = "DeploymentAnnotations"
  119. DeploymentMatchLabelsID MetricCollectorID = "DeploymentMatchLabels"
  120. StatefulSetInfoID MetricCollectorID = "StatefulSetInfo"
  121. StatefulSetUptimeID MetricCollectorID = "StatefulSetUptime"
  122. StatefulSetLabelsID MetricCollectorID = "StatefulSetLabels"
  123. StatefulSetAnnotationsID MetricCollectorID = "StatefulSetAnnotations"
  124. StatefulSetMatchLabelsID MetricCollectorID = "StatefulSetMatchLabels"
  125. DaemonSetInfoID MetricCollectorID = "DaemonSetInfo"
  126. DaemonSetUptimeID MetricCollectorID = "DaemonSetUptime"
  127. DaemonSetLabelsID MetricCollectorID = "DaemonSetLabels"
  128. DaemonSetAnnotationsID MetricCollectorID = "DaemonSetAnnotations"
  129. JobInfoID MetricCollectorID = "JobInfo"
  130. JobUptimeID MetricCollectorID = "JobUptime"
  131. JobLabelsID MetricCollectorID = "JobLabels"
  132. JobAnnotationsID MetricCollectorID = "JobAnnotations"
  133. CronJobInfoID MetricCollectorID = "CronJobInfo"
  134. CronJobUptimeID MetricCollectorID = "CronJobUptime"
  135. CronJobLabelsID MetricCollectorID = "CronJobLabels"
  136. CronJobAnnotationsID MetricCollectorID = "CronJobAnnotations"
  137. ReplicaSetInfoID MetricCollectorID = "ReplicaSetInfo"
  138. ReplicaSetUptimeID MetricCollectorID = "ReplicaSetUptime"
  139. ReplicaSetLabelsID MetricCollectorID = "ReplicaSetLabels"
  140. ReplicaSetAnnotationsID MetricCollectorID = "ReplicaSetAnnotations"
  141. ReplicaSetOwnerID MetricCollectorID = "ReplicaSetOwner"
  142. PodsWithDaemonSetOwnerID MetricCollectorID = "PodsWithDaemonSetOwner"
  143. PodsWithJobOwnerID MetricCollectorID = "PodsWithJobOwner"
  144. PodsWithReplicaSetOwnerID MetricCollectorID = "PodsWithReplicaSetOwner"
  145. ReplicaSetsWithoutOwnersID MetricCollectorID = "ReplicaSetsWithoutOwners"
  146. ReplicaSetsWithRolloutID MetricCollectorID = "ReplicaSetsWithRollout"
  147. ContainerResourceRequestsID MetricCollectorID = "ContainerResourceRequests"
  148. ContainerResourceLimitsID MetricCollectorID = "ContainerResourceLimits"
  149. ResourceQuotaInfoID MetricCollectorID = "ResourceQuotaInfo"
  150. ResourceQuotaUptimeID MetricCollectorID = "ResourceQuotaUptime"
  151. ResourceQuotaSpecCPURequestAverageID MetricCollectorID = "ResourceQuotaSpecCPURequestAverage"
  152. ResourceQuotaSpecCPURequestMaxID MetricCollectorID = "ResourceQuotaSpecCPURequestMax"
  153. ResourceQuotaSpecRAMRequestAverageID MetricCollectorID = "ResourceQuotaSpecRAMRequestAverage"
  154. ResourceQuotaSpecRAMRequestMaxID MetricCollectorID = "ResourceQuotaSpecRAMRequestMax"
  155. ResourceQuotaSpecCPULimitAverageID MetricCollectorID = "ResourceQuotaSpecCPULimitAverage"
  156. ResourceQuotaSpecCPULimitMaxID MetricCollectorID = "ResourceQuotaSpecCPULimitMax"
  157. ResourceQuotaSpecRAMLimitAverageID MetricCollectorID = "ResourceQuotaSpecRAMLimitAverage"
  158. ResourceQuotaSpecRAMLimitMaxID MetricCollectorID = "ResourceQuotaSpecRAMLimitMax"
  159. ResourceQuotaStatusUsedCPURequestAverageID MetricCollectorID = "ResourceQuotaStatusUsedCPURequestAverage"
  160. ResourceQuotaStatusUsedCPURequestMaxID MetricCollectorID = "ResourceQuotaStatusUsedCPURequestMax"
  161. ResourceQuotaStatusUsedRAMRequestAverageID MetricCollectorID = "ResourceQuotaStatusUsedRAMRequestAverage"
  162. ResourceQuotaStatusUsedRAMRequestMaxID MetricCollectorID = "ResourceQuotaStatusUsedRAMRequestMax"
  163. ResourceQuotaStatusUsedCPULimitAverageID MetricCollectorID = "ResourceQuotaStatusUsedCPULimitAverage"
  164. ResourceQuotaStatusUsedCPULimitMaxID MetricCollectorID = "ResourceQuotaStatusUsedCPULimitMax"
  165. ResourceQuotaStatusUsedRAMLimitAverageID MetricCollectorID = "ResourceQuotaStatusUsedRAMLimitAverage"
  166. ResourceQuotaStatusUsedRAMLimitMaxID MetricCollectorID = "ResourceQuotaStatusUsedRAMLimitMax"
  167. )
  168. // MetricCollector is a data structure that represents a specific MetricCollector metric instance that contains its own breakdown
  169. // of stored metrics by a specific label set.
  170. type MetricCollector struct {
  171. id MetricCollectorID // ie: RAMUsageAverage
  172. metricName string // ie: container_memory_working_set_bytes
  173. labels []string
  174. aggregatorFactory aggregator.MetricAggregatorFactory
  175. metrics map[uint64]aggregator.MetricAggregator // map[Hash(labelValues)] = aggregator
  176. filter func(map[string]string) bool
  177. }
  178. // NewMetricCollector creates a new MetricCollector instance with a unique identifier. The metric name is the specific
  179. // name of the collected metric that will be used to query the
  180. func NewMetricCollector(id MetricCollectorID, metricName string, labels []string, aggregatorFactory aggregator.MetricAggregatorFactory, fn func(map[string]string) bool) *MetricCollector {
  181. return &MetricCollector{
  182. id: id,
  183. metricName: metricName,
  184. labels: labels,
  185. aggregatorFactory: aggregatorFactory,
  186. metrics: make(map[uint64]aggregator.MetricAggregator),
  187. filter: fn,
  188. }
  189. }
  190. func (mi *MetricCollector) Update(labels map[string]string, value float64, timestamp time.Time, additionalInfo map[string]string) {
  191. if mi.filter != nil && !mi.filter(labels) {
  192. return
  193. }
  194. labelValues := make([]string, len(mi.labels))
  195. for i, key := range mi.labels {
  196. labelValues[i] = labels[key]
  197. }
  198. key := util.Hash(labelValues)
  199. if mi.metrics[key] == nil {
  200. mi.metrics[key] = mi.aggregatorFactory(labelValues)
  201. }
  202. mi.metrics[key].Update(value, timestamp, additionalInfo)
  203. }
  204. func (mi *MetricCollector) Get() []*aggregator.MetricResult {
  205. results := make([]*aggregator.MetricResult, 0, len(mi.metrics))
  206. for _, metric := range mi.metrics {
  207. labels := util.ToMap(mi.labels, metric.LabelValues())
  208. maps.Copy(labels, metric.AdditionInfo())
  209. mr := &aggregator.MetricResult{
  210. MetricLabels: labels,
  211. Values: metric.Value(),
  212. }
  213. results = append(results, mr)
  214. }
  215. return results
  216. }
  217. func (mi *MetricCollector) Labels() []string {
  218. return mi.labels
  219. }