|
@@ -472,6 +472,10 @@ func ClusterDisks(client prometheus.Client, provider models.Provider, start, end
|
|
|
return diskMap, nil
|
|
return diskMap, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+type NodeOverhead struct {
|
|
|
|
|
+ CpuOverheadFraction float64
|
|
|
|
|
+ RamOverheadFraction float64
|
|
|
|
|
+}
|
|
|
type Node struct {
|
|
type Node struct {
|
|
|
Cluster string
|
|
Cluster string
|
|
|
Name string
|
|
Name string
|
|
@@ -494,6 +498,7 @@ type Node struct {
|
|
|
CostPerCPUHr float64
|
|
CostPerCPUHr float64
|
|
|
CostPerRAMGiBHr float64
|
|
CostPerRAMGiBHr float64
|
|
|
CostPerGPUHr float64
|
|
CostPerGPUHr float64
|
|
|
|
|
+ Overhead *NodeOverhead
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// GKE lies about the number of cores e2 nodes have. This table
|
|
// GKE lies about the number of cores e2 nodes have. This table
|
|
@@ -567,9 +572,11 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
|
|
|
optionalCtx := prom.NewNamedContext(client, prom.ClusterOptionalContextName)
|
|
optionalCtx := prom.NewNamedContext(client, prom.ClusterOptionalContextName)
|
|
|
|
|
|
|
|
queryNodeCPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
|
|
queryNodeCPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
|
|
|
- queryNodeCPUCores := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
|
|
|
|
|
|
|
+ queryNodeCPUCoresCapacity := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
|
|
|
|
|
+ queryNodeCPUCoresAllocatable := fmt.Sprintf(`avg(avg_over_time(kube_node_status_allocatable_cpu_cores[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
|
|
|
queryNodeRAMHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s])) by (%s, node, instance_type, provider_id) / 1024 / 1024 / 1024`, durStr, env.GetPromClusterLabel())
|
|
queryNodeRAMHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s])) by (%s, node, instance_type, provider_id) / 1024 / 1024 / 1024`, durStr, env.GetPromClusterLabel())
|
|
|
- queryNodeRAMBytes := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
|
|
|
|
|
|
|
+ queryNodeRAMBytesCapacity := fmt.Sprintf(`avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
|
|
|
|
|
+ queryNodeRAMBytesAllocatable := fmt.Sprintf(`avg(avg_over_time(kube_node_status_allocatable_memory_bytes[%s])) by (%s, node)`, durStr, env.GetPromClusterLabel())
|
|
|
queryNodeGPUCount := fmt.Sprintf(`avg(avg_over_time(node_gpu_count[%s])) by (%s, node, provider_id)`, durStr, env.GetPromClusterLabel())
|
|
queryNodeGPUCount := fmt.Sprintf(`avg(avg_over_time(node_gpu_count[%s])) by (%s, node, provider_id)`, durStr, env.GetPromClusterLabel())
|
|
|
queryNodeGPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
|
|
queryNodeGPUHourlyCost := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s])) by (%s, node, instance_type, provider_id)`, durStr, env.GetPromClusterLabel())
|
|
|
queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm])) by (kubernetes_node, %s, mode)`, durStr, minsPerResolution, env.GetPromClusterLabel())
|
|
queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm])) by (kubernetes_node, %s, mode)`, durStr, minsPerResolution, env.GetPromClusterLabel())
|
|
@@ -581,9 +588,11 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
|
|
|
|
|
|
|
|
// Return errors if these fail
|
|
// Return errors if these fail
|
|
|
resChNodeCPUHourlyCost := requiredCtx.QueryAtTime(queryNodeCPUHourlyCost, t)
|
|
resChNodeCPUHourlyCost := requiredCtx.QueryAtTime(queryNodeCPUHourlyCost, t)
|
|
|
- resChNodeCPUCores := requiredCtx.QueryAtTime(queryNodeCPUCores, t)
|
|
|
|
|
|
|
+ resChNodeCPUCoresCapacity := requiredCtx.QueryAtTime(queryNodeCPUCoresCapacity, t)
|
|
|
|
|
+ resChNodeCPUCoresAllocatable := requiredCtx.QueryAtTime(queryNodeCPUCoresAllocatable, t)
|
|
|
resChNodeRAMHourlyCost := requiredCtx.QueryAtTime(queryNodeRAMHourlyCost, t)
|
|
resChNodeRAMHourlyCost := requiredCtx.QueryAtTime(queryNodeRAMHourlyCost, t)
|
|
|
- resChNodeRAMBytes := requiredCtx.QueryAtTime(queryNodeRAMBytes, t)
|
|
|
|
|
|
|
+ resChNodeRAMBytesCapacity := requiredCtx.QueryAtTime(queryNodeRAMBytesCapacity, t)
|
|
|
|
|
+ resChNodeRAMBytesAllocatable := requiredCtx.QueryAtTime(queryNodeRAMBytesAllocatable, t)
|
|
|
resChNodeGPUCount := requiredCtx.QueryAtTime(queryNodeGPUCount, t)
|
|
resChNodeGPUCount := requiredCtx.QueryAtTime(queryNodeGPUCount, t)
|
|
|
resChNodeGPUHourlyCost := requiredCtx.QueryAtTime(queryNodeGPUHourlyCost, t)
|
|
resChNodeGPUHourlyCost := requiredCtx.QueryAtTime(queryNodeGPUHourlyCost, t)
|
|
|
resChActiveMins := requiredCtx.QueryAtTime(queryActiveMins, t)
|
|
resChActiveMins := requiredCtx.QueryAtTime(queryActiveMins, t)
|
|
@@ -596,11 +605,13 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
|
|
|
resChLabels := optionalCtx.QueryAtTime(queryLabels, t)
|
|
resChLabels := optionalCtx.QueryAtTime(queryLabels, t)
|
|
|
|
|
|
|
|
resNodeCPUHourlyCost, _ := resChNodeCPUHourlyCost.Await()
|
|
resNodeCPUHourlyCost, _ := resChNodeCPUHourlyCost.Await()
|
|
|
- resNodeCPUCores, _ := resChNodeCPUCores.Await()
|
|
|
|
|
|
|
+ resNodeCPUCoresCapacity, _ := resChNodeCPUCoresCapacity.Await()
|
|
|
|
|
+ resNodeCPUCoresAllocatable, _ := resChNodeCPUCoresAllocatable.Await()
|
|
|
resNodeGPUCount, _ := resChNodeGPUCount.Await()
|
|
resNodeGPUCount, _ := resChNodeGPUCount.Await()
|
|
|
resNodeGPUHourlyCost, _ := resChNodeGPUHourlyCost.Await()
|
|
resNodeGPUHourlyCost, _ := resChNodeGPUHourlyCost.Await()
|
|
|
resNodeRAMHourlyCost, _ := resChNodeRAMHourlyCost.Await()
|
|
resNodeRAMHourlyCost, _ := resChNodeRAMHourlyCost.Await()
|
|
|
- resNodeRAMBytes, _ := resChNodeRAMBytes.Await()
|
|
|
|
|
|
|
+ resNodeRAMBytesCapacity, _ := resChNodeRAMBytesCapacity.Await()
|
|
|
|
|
+ resNodeRAMBytesAllocatable, _ := resChNodeRAMBytesAllocatable.Await()
|
|
|
resIsSpot, _ := resChIsSpot.Await()
|
|
resIsSpot, _ := resChIsSpot.Await()
|
|
|
resNodeCPUModeTotal, _ := resChNodeCPUModeTotal.Await()
|
|
resNodeCPUModeTotal, _ := resChNodeCPUModeTotal.Await()
|
|
|
resNodeRAMSystemPct, _ := resChNodeRAMSystemPct.Await()
|
|
resNodeRAMSystemPct, _ := resChNodeRAMSystemPct.Await()
|
|
@@ -633,8 +644,12 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
|
|
|
clusterAndNameToTypeIntermediate := mergeTypeMaps(clusterAndNameToType1, clusterAndNameToType2)
|
|
clusterAndNameToTypeIntermediate := mergeTypeMaps(clusterAndNameToType1, clusterAndNameToType2)
|
|
|
clusterAndNameToType := mergeTypeMaps(clusterAndNameToTypeIntermediate, clusterAndNameToType3)
|
|
clusterAndNameToType := mergeTypeMaps(clusterAndNameToTypeIntermediate, clusterAndNameToType3)
|
|
|
|
|
|
|
|
- cpuCoresMap := buildCPUCoresMap(resNodeCPUCores)
|
|
|
|
|
- ramBytesMap := buildRAMBytesMap(resNodeRAMBytes)
|
|
|
|
|
|
|
+ cpuCoresCapacityMap := buildCPUCoresMap(resNodeCPUCoresCapacity)
|
|
|
|
|
+ ramBytesCapacityMap := buildRAMBytesMap(resNodeRAMBytesCapacity)
|
|
|
|
|
+
|
|
|
|
|
+ cpuCoresAllocatableMap := buildCPUCoresMap(resNodeCPUCoresAllocatable)
|
|
|
|
|
+ ramBytesAllocatableMap := buildRAMBytesMap(resNodeRAMBytesAllocatable)
|
|
|
|
|
+ overheadMap := buildOverheadMap(ramBytesCapacityMap, ramBytesAllocatableMap, cpuCoresCapacityMap, cpuCoresAllocatableMap)
|
|
|
|
|
|
|
|
ramUserPctMap := buildRAMUserPctMap(resNodeRAMUserPct)
|
|
ramUserPctMap := buildRAMUserPctMap(resNodeRAMUserPct)
|
|
|
ramSystemPctMap := buildRAMSystemPctMap(resNodeRAMSystemPct)
|
|
ramSystemPctMap := buildRAMSystemPctMap(resNodeRAMSystemPct)
|
|
@@ -643,13 +658,13 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
|
|
|
|
|
|
|
|
labelsMap := buildLabelsMap(resLabels)
|
|
labelsMap := buildLabelsMap(resLabels)
|
|
|
|
|
|
|
|
- costTimesMinuteAndCount(activeDataMap, cpuCostMap, cpuCoresMap)
|
|
|
|
|
- costTimesMinuteAndCount(activeDataMap, ramCostMap, ramBytesMap)
|
|
|
|
|
|
|
+ costTimesMinuteAndCount(activeDataMap, cpuCostMap, cpuCoresCapacityMap)
|
|
|
|
|
+ costTimesMinuteAndCount(activeDataMap, ramCostMap, ramBytesCapacityMap)
|
|
|
costTimesMinute(activeDataMap, gpuCostMap) // there's no need to do a weird "nodeIdentifierNoProviderID" type match since gpuCounts have a providerID
|
|
costTimesMinute(activeDataMap, gpuCostMap) // there's no need to do a weird "nodeIdentifierNoProviderID" type match since gpuCounts have a providerID
|
|
|
|
|
|
|
|
nodeMap := buildNodeMap(
|
|
nodeMap := buildNodeMap(
|
|
|
cpuCostMap, ramCostMap, gpuCostMap, gpuCountMap,
|
|
cpuCostMap, ramCostMap, gpuCostMap, gpuCountMap,
|
|
|
- cpuCoresMap, ramBytesMap, ramUserPctMap,
|
|
|
|
|
|
|
+ cpuCoresCapacityMap, ramBytesCapacityMap, ramUserPctMap,
|
|
|
ramSystemPctMap,
|
|
ramSystemPctMap,
|
|
|
cpuBreakdownMap,
|
|
cpuBreakdownMap,
|
|
|
activeDataMap,
|
|
activeDataMap,
|
|
@@ -657,6 +672,7 @@ func ClusterNodes(cp models.Provider, client prometheus.Client, start, end time.
|
|
|
labelsMap,
|
|
labelsMap,
|
|
|
clusterAndNameToType,
|
|
clusterAndNameToType,
|
|
|
resolution,
|
|
resolution,
|
|
|
|
|
+ overheadMap,
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
c, err := cp.GetConfig()
|
|
c, err := cp.GetConfig()
|