Просмотр исходного кода

added gp3 volumes support

Signed-off-by: Andrii Chubatiuk <andrew.chubatiuk@motional.com>
Signed-off-by: Andrii Chubatiuk <andrew.chubatiuk@gmail.com>
Signed-off-by: Kaelan Patel <kaelanspatel@gmail.com>
Andrii Chubatiuk 3 лет назад
Родитель
Сommit
d814de4165
2 измененных файлов с 487 добавлено и 0 удалено
  1. 2 0
      pkg/cloud/awsprovider.go
  2. 485 0
      pkg/prom/queries.go

+ 2 - 0
pkg/cloud/awsprovider.go

@@ -306,12 +306,14 @@ const HourlyRateCodeCn = ".Q7UJUT2CE6"
 // name and the EC2 API.
 var volTypes = map[string]string{
 	"EBS:VolumeUsage.gp2":    "gp2",
+	"EBS:VolumeUsage.gp3":    "gp3",
 	"EBS:VolumeUsage":        "standard",
 	"EBS:VolumeUsage.sc1":    "sc1",
 	"EBS:VolumeP-IOPS.piops": "io1",
 	"EBS:VolumeUsage.st1":    "st1",
 	"EBS:VolumeUsage.piops":  "io1",
 	"gp2":                    "EBS:VolumeUsage.gp2",
+	"gp3":                    "EBS:VolumeUsage.gp3",
 	"standard":               "EBS:VolumeUsage",
 	"sc1":                    "EBS:VolumeUsage.sc1",
 	"io1":                    "EBS:VolumeUsage.piops",

+ 485 - 0
pkg/prom/queries.go

@@ -0,0 +1,485 @@
+package prom
+
+var queries = map[string]string{
+	"RAMRequests": `
+		avg(
+			count_over_time(
+				kube_pod_container_resource_requests{
+					resource="memory",
+					unit="byte",
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+			*
+			avg_over_time(
+				kube_pod_container_resource_requests{
+					resource="memory",
+					unit="byte",
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+				)
+			) by (
+				namespace,
+				node,
+				<< .container >>,
+				<< .pod >>,
+				<< .cluster >>
+			)`,
+	"RAMUsage": `
+		sort_desc(
+			avg(
+				count_over_time(
+					container_memory_working_set_bytes{
+						node!="",
+						<< .container >>!="",
+						<< .container >>!="POD",
+						<< .filter >>
+					}[<< .duration >>] << .offset >>
+				)
+				*
+				avg_over_time(
+					container_memory_working_set_bytes{
+						node!=""
+						<< .container >>!="",
+						<< .container >>!="POD",
+						<< .filter >>
+					}[<< .duration >>] << .offset >>
+				)
+			) by (
+				namespace,
+				node,
+				<< .container >>,
+				<< .pod >>,
+				<< .cluster >>
+			)
+		)`,
+	"CPURequests": `
+		avg(
+			count_over_time(
+				kube_pod_container_resource_requests{
+					resource="cpu",
+					unit="core",
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+			*
+			avg_over_time(
+				kube_pod_container_resource_requests{
+					resource="cpu",
+					unit="core",
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			namespace,
+			node,
+			<< .container >>,
+			<< .pod >>,
+			<< .cluster >>
+		)`,
+	"CPUUsage": `
+		avg(
+			rate(
+				container_cpu_usage_seconds_total{
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			namespace,
+			node,
+			<< .container >>,
+			<< .pod >>,
+			<< .cluster >>
+		)`,
+	"GPURequests": `
+		avg(
+			count_over_time(
+				kube_pod_container_resource_requests{
+					resource="nvidia_com_gpu",
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+			*
+			avg_over_time(
+				kube_pod_container_resource_requests{
+					resource="nvidia_com_gpu",
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+			* 
+			<< .interval >>
+		) by (
+			namespace,
+			node,
+			<< .container >>,
+			<< .pod >>,
+			<< .cluster >>
+		)
+		* on (
+			namespace,
+			<< .pod >>,
+			<< .cluster >>
+		) group_left(
+			<< .container >>
+		) avg(
+			avg_over_time(
+				kube_pod_status_phase{
+					phase="Running",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			namespace,
+			<< .pod >>,
+			<< .cluster >>
+		)`,
+	"PVRequests": `
+		avg(
+			avg(
+				kube_persistentvolumeclaim_info{
+					volumename!="",
+					<< .filter >>
+				}
+			) by (
+				persistentvolumeclaim,
+				storageclass,
+				namespace,
+				volumename,
+				kubernetes_node,
+				<< .cluster >>
+			)
+			*
+			on (
+				persistentvolumeclaim,
+				namespace,
+				kubernetes_node,
+				<< .cluster >>
+			) group_right(
+				storageclass,
+				volumename
+			) sum(
+				kube_persistentvolumeclaim_resource_requests_storage_bytes{
+					<< .filter >>
+				}
+			) by (
+				persistentvolumeclaim,
+				namespace,
+				kubernetes_node,
+				kubernetes_name,
+				<< .cluster >>
+			)
+		) by (
+			persistentvolumeclaim,
+			storageclass,
+			namespace,
+			volumename,
+			kubernetes_node,
+			<< .cluster >>
+		)`,
+	"RAMAlloc": `
+		sum(
+			sum_over_time(
+				container_memory_allocation_bytes{
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>]
+			)
+		) by (
+			namespace,
+			node,
+			<< .pod >>,
+			<< .container >>,
+			<< .cluster >>
+		)
+		* 
+		<< .interval >> / 60 / 60`,
+	"CPUAlloc": `
+		sum(
+			sum_over_time(
+				container_cpu_allocation{
+					node!="",
+					<< .container >>!="",
+					<< .container >>!="POD",
+					<< .filter >>
+				}[<< .duration >>]
+			)
+		) by (
+			namespace,
+			node,
+			<< .pod >>,
+			<< .container >>,
+			<< .cluster >>
+		) 
+		* 
+		<< .interval >> / 60 / 60`,
+	"PVCAlloc": `
+		sum(
+			sum_over_time(
+				pod_pvc_allocation{
+					<< .filter >>
+				}[<< .duration >>]
+			)
+		) by (
+			namespace,
+			persistentvolume,
+			persistentvolumeclaim,
+			<< .cluster >>
+		)
+		*
+		<< .interval >>/60/60`,
+	"PVHourlyCost": `
+		avg_over_time(
+			pv_hourly_cost{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"NamespaceLabels": `
+		avg_over_time(
+			kube_namespace_labels{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"PodLabels": `
+		avg_over_time(
+			kube_pod_labels{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"NamespaceAnnotations": `
+		avg_over_time(
+			kube_namespace_annotations{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"PodAnnotations": `
+		avg_over_time(
+			kube_pod_annotations{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"DeploymentLabels": `
+		avg_over_time(
+			deployment_match_labels{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"StatefulSetLabels": `
+		avg_over_time(
+			statefulSet_match_labels{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"DaemonSetLabels": `
+		sum(
+			kube_pod_owner{
+				owner_kind="DaemonSet",
+				<< .filter >>
+			}
+		) by (
+			namespace,
+			owner_name,
+			<< .pod >>,
+			<< .cluster >>
+		)`,
+	"JobLabels": `
+		sum(
+			kube_pod_owner{
+				owner_kind="Job",
+				<< .filter >>
+			}
+		) by (
+			namespace,
+			owner_name,
+			<< .pod >>,
+			<< .cluster >>
+		)`,
+	"ServiceLabels": `
+		avg_over_time(
+			service_selector_labels{
+				<< .filter >>
+			}[<< .duration >>]
+		)`,
+	"NetZoneRequests": `
+		sum(
+			increase(
+				kubecost_pod_network_egress_bytes_total{
+					internet="false",
+					sameZone="false",
+					sameRegion="true",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			namespace,
+			<< .pod >>,
+			<< .cluster >>
+		) / 1024 / 1024 / 1024`,
+	"NetRegionRequests": `
+		sum(
+			increase(
+				kubecost_pod_network_egress_bytes_total{
+					internet="false",
+					sameZone="false",
+					sameRegion="false",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			namespace,
+			<< .pod >>,
+			<< .cluster >>
+		) / 1024 / 1024 / 1024`,
+	"NetInternetRequests": `
+		sum(
+			increase(
+				kubecost_pod_network_egress_bytes_total{
+					internet="true",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			namespace,
+			<< .pod >>,
+			<< .cluster >>
+		) / 1024 / 1024 / 1024`,
+	"Normalization": `
+		max(
+			count_over_time(
+				kube_pod_container_resource_requests{
+					resource="memory",
+					unit="byte",
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		)`,
+	"CPUCost": `
+		avg(
+			avg_over_time(
+				node_cpu_hourly_cost{
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			node,
+			instance,
+			<< .cluster >>
+		)`,
+	"RAMCost": `
+		avg(
+			avg_over_time(
+				node_ram_hourly_cost{
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			node,
+			instance,
+			<< .cluster >>
+		)`,
+	"GPUCost": `
+		avg(
+			avg_over_time(
+				node_gpu_hourly_cost{
+					<< .filter >>
+				}[<< .duration >>] << .offset >>
+			)
+		) by (
+			node,
+			instance,
+			<< .cluster >>
+		)`,
+
+
+
+	"Pods": `avg(kube_pod_container_status_running{<< .filter >>}) by (namespace, << .pod >>, << .cluster >>)[<< .duration >>: << .resolution >>]`,
+	"PodsUID": `avg(kube_pod_container_status_running{<< .filter >>}) by (namespace, uid, << .pod >>, << .cluster >>)[<< .duration >>: << .resolution >>]`,
+	"RAMBytesAllocated": `avg(avg_over_time(container_memory_allocation_bytes{<< .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, provider_id, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"RAMReq": `avg(avg_over_time(kube_pod_container_resource_requests{resource="memory", unit="byte", << .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"RAMUsageAvg": `avg(avg_over_time(container_memory_working_set_bytes{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"RAMUsageMax": `max(max_over_time(container_memory_working_set_bytes{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"CPUCoresAllocated": `avg(avg_over_time(container_cpu_allocation{<< .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"CPUReq": `avg(avg_over_time(kube_pod_container_resource_requests{resource="cpu", unit="core", << .container >>!="", << .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster>>)`,
+	"CPUUsageAvg": `avg(rate(container_cpu_usage_seconds_total{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"CPUUsageMax": `max(rate(container_cpu_usage_seconds_total{<< .container >>!="", << .container >>!="POD", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"GPUsRequested": `avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", << .container >>!="",<< .container >>!="POD", node!="", << .filter >>}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>,  << .cluster >>)`,
+	"GPUsAllocated": `avg(avg_over_time(container_gpu_allocation{<< .container >>!="", << .container >>!="POD", node!=""}[<< .duration >>])) by (namespace, node, << .container >>, << .pod >>, << .cluster >>)`,
+	"NodeCostPerCPUHr": `avg(avg_over_time(node_cpu_hourly_cost{<< .filter >>}[<< .duration >>])) by (instance_type, provider_id, node, << .cluster >>)`,
+	"NodeCostPerRAMGiBHr": `avg(avg_over_time(node_ram_hourly_cost{<< .filter >>}[<< .duration >>])) by (instance_type, provider_id, node, << .cluster >>)`,
+	"NodeCostPerGPUHr": `avg(avg_over_time(node_gpu_hourly_cost{<< .filter >>}[<< .duration >>])) by (instance_type, provider_id, node, << .cluster >>)`,
+	"NodeIsSpot": `avg_over_time(kubecost_node_is_spot{<< .filter >>}[<< .duration >>])`,
+	"PVCInfo": `avg(kube_persistentvolumeclaim_info{volumename != "", << .filter >>}) by (persistentvolumeclaim, storageclass, volumename, namespace, << .cluster >>)[<< .duration >>: << .resolution >>]`,
+	"PVBytes": `avg(avg_over_time(kube_persistentvolume_capacity_bytes{<< .filter >>}[<< .duration >>])) by (persistentvolume, << .cluster >>)`,
+	"PodPVCAllocation": `avg(avg_over_time(pod_pvc_allocation{<< .filter >>}[<< .duration >>])) by (persistentvolume, persistentvolumeclaim, namespace, << .pod >>, << .cluster >>)`,
+	"PVCBytesRequested": `avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{<< .filter >>}[<< .duration >>])) by (persistentvolumeclaim, namespace, << .cluster >>)`,
+	"PVCostPerGiBHour": `avg(avg_over_time(pv_hourly_cost{<< .filter >>}[<< .duration >>])) by (volumename, << .cluster >>)`,
+	"NetZoneGiB": `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>) / 1024 / 1024 / 1024`,
+	"NetZoneCostPerGiB": `avg(avg_over_time(kubecost_network_zone_egress_cost{<< .filter >>}[<< .duration >>])) by (<< .cluster >>)`,
+	"NetRegionGiB": `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>) / 1024 / 1024 / 1024`,
+	"NetRegionCostPerGiB": `avg(avg_over_time(kubecost_network_region_egress_cost{<< .filter >>}[<< .duration >>])) by (<< .cluster >>)`,
+	"NetInternetGiB": `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>) / 1024 / 1024 / 1024`,
+	"NetInternetCostPerGiB": `avg(avg_over_time(kubecost_network_internet_egress_cost{<< .filter >>}[<< .duration >>])) by (<< .cluster >>)`,
+	"NetReceiveBytes": `sum(increase(container_network_receive_bytes_total{<< .pod >>!="", << .container >>="POD", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>)`,
+	"NetTransferBytes": `sum(increase(container_network_transmit_bytes_total{<< .pod >>!="", << .container >>="POD", << .filter >>}[<< .duration >>])) by (namespace, << .pod >>, << .cluster >>)`,
+	"PodsWithReplicaSetOwner": `
+		sum(
+			avg_over_time(kube_pod_owner{owner_kind="ReplicaSet", << .filter >>}[<< .duration >>])) by (owner_name, namespace, << .pod >>, << .cluster >>)`,
+	"ReplicaSetsWithoutOwners": `
+		avg(
+			avg_over_time(
+				kube_replicaset_owner{
+					owner_kind="<none>",
+					owner_name="<none>",
+					<< .filter >>
+				}[<< .duration >>]
+			)
+		) by (
+			replicaset,
+			namespace,
+			<< .cluster >>
+		)`,
+	"LBCostPerHr": `
+		avg(
+			avg_over_time(
+				kubecost_load_balancer_cost{
+					<< .filter >>
+				}[<< .duration >>]
+			)
+		) by (
+			namespace,
+			service_name,
+			<< .cluster >>
+		)`,
+	"LBActiveMins": `
+		count(
+			kubecost_load_balancer_cost{
+				<< .filter >>
+			}
+		) by (
+			namespace,
+			service_name,
+			<< .cluster >>
+		)[<< .duration >>: << .resolution >>]`,
+}