|
@@ -14,6 +14,9 @@ import (
|
|
|
|
|
|
|
|
costAnalyzerCloud "github.com/kubecost/cost-model/pkg/cloud"
|
|
costAnalyzerCloud "github.com/kubecost/cost-model/pkg/cloud"
|
|
|
"github.com/kubecost/cost-model/pkg/clustercache"
|
|
"github.com/kubecost/cost-model/pkg/clustercache"
|
|
|
|
|
+ "github.com/kubecost/cost-model/pkg/errors"
|
|
|
|
|
+ "github.com/kubecost/cost-model/pkg/log"
|
|
|
|
|
+ "github.com/kubecost/cost-model/pkg/prom"
|
|
|
"github.com/kubecost/cost-model/pkg/util"
|
|
"github.com/kubecost/cost-model/pkg/util"
|
|
|
prometheusClient "github.com/prometheus/client_golang/api"
|
|
prometheusClient "github.com/prometheus/client_golang/api"
|
|
|
v1 "k8s.io/api/core/v1"
|
|
v1 "k8s.io/api/core/v1"
|
|
@@ -121,42 +124,6 @@ func (cd *CostData) GetController() (name string, kind string, hasController boo
|
|
|
return name, kind, hasController
|
|
return name, kind, hasController
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// Error collection helper
|
|
|
|
|
-type ErrorCollector struct {
|
|
|
|
|
- m sync.Mutex
|
|
|
|
|
- errors []error
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// Reports an error to the collector. Ignores if the error is nil.
|
|
|
|
|
-func (ec *ErrorCollector) Report(e error) {
|
|
|
|
|
- if e == nil {
|
|
|
|
|
- return
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- ec.m.Lock()
|
|
|
|
|
- defer ec.m.Unlock()
|
|
|
|
|
-
|
|
|
|
|
- ec.errors = append(ec.errors, e)
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// Whether or not the collector caught errors
|
|
|
|
|
-func (ec *ErrorCollector) IsError() bool {
|
|
|
|
|
- ec.m.Lock()
|
|
|
|
|
- defer ec.m.Unlock()
|
|
|
|
|
-
|
|
|
|
|
- return len(ec.errors) > 0
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// Errors caught by the collector
|
|
|
|
|
-func (ec *ErrorCollector) Errors() []error {
|
|
|
|
|
- ec.m.Lock()
|
|
|
|
|
- defer ec.m.Unlock()
|
|
|
|
|
-
|
|
|
|
|
- errs := make([]error, len(ec.errors))
|
|
|
|
|
- copy(errs, ec.errors)
|
|
|
|
|
- return errs
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
const (
|
|
const (
|
|
|
queryRAMRequestsStr = `avg(
|
|
queryRAMRequestsStr = `avg(
|
|
|
label_replace(
|
|
label_replace(
|
|
@@ -207,10 +174,10 @@ const (
|
|
|
)
|
|
)
|
|
|
) by (namespace,container_name,pod_name,node,cluster_id)
|
|
) by (namespace,container_name,pod_name,node,cluster_id)
|
|
|
* on (pod_name, namespace, cluster_id) group_left(container) label_replace(avg(avg_over_time(kube_pod_status_phase{phase="Running"}[%s] %s)) by (pod,namespace,cluster_id), "pod_name","$1","pod","(.+)")`
|
|
* on (pod_name, namespace, cluster_id) group_left(container) label_replace(avg(avg_over_time(kube_pod_status_phase{phase="Running"}[%s] %s)) by (pod,namespace,cluster_id), "pod_name","$1","pod","(.+)")`
|
|
|
- queryPVRequestsStr = `avg(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass, namespace, volumename, cluster_id)
|
|
|
|
|
- *
|
|
|
|
|
- on (persistentvolumeclaim, namespace, cluster_id) group_right(storageclass, volumename)
|
|
|
|
|
- sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace, cluster_id)`
|
|
|
|
|
|
|
+ queryPVRequestsStr = `avg(avg(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass, namespace, volumename, cluster_id)
|
|
|
|
|
+ *
|
|
|
|
|
+ on (persistentvolumeclaim, namespace, cluster_id) group_right(storageclass, volumename)
|
|
|
|
|
+ sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace, cluster_id, kubernetes_name)) by (persistentvolumeclaim, storageclass, namespace, volumename, cluster_id)`
|
|
|
// queryRAMAllocationByteHours yields the total byte-hour RAM allocation over the given
|
|
// queryRAMAllocationByteHours yields the total byte-hour RAM allocation over the given
|
|
|
// window, aggregated by container.
|
|
// window, aggregated by container.
|
|
|
// [line 3] sum(all byte measurements) = [byte*scrape] by metric
|
|
// [line 3] sum(all byte measurements) = [byte*scrape] by metric
|
|
@@ -351,27 +318,6 @@ func getUptimeData(qr interface{}) ([]*util.Vector, bool, error) {
|
|
|
return jobData, kubecostMetrics, nil
|
|
return jobData, kubecostMetrics, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func ComputeUptimes(cli prometheusClient.Client) (map[string]float64, error) {
|
|
|
|
|
- res, err := Query(cli, `container_start_time_seconds{container_name != "POD",container_name != ""}`)
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- return nil, err
|
|
|
|
|
- }
|
|
|
|
|
- vectors, err := GetContainerMetricVector(res, false, 0, os.Getenv(clusterIDKey))
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- return nil, err
|
|
|
|
|
- }
|
|
|
|
|
- results := make(map[string]float64)
|
|
|
|
|
- for key, vector := range vectors {
|
|
|
|
|
- if err != nil {
|
|
|
|
|
- return nil, err
|
|
|
|
|
- }
|
|
|
|
|
- val := vector[0].Value
|
|
|
|
|
- uptime := time.Now().Sub(time.Unix(int64(val), 0)).Seconds()
|
|
|
|
|
- results[key] = uptime
|
|
|
|
|
- }
|
|
|
|
|
- return results, nil
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider, window string, offset string, filterNamespace string) (map[string]*CostData, error) {
|
|
func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider, window string, offset string, filterNamespace string) (map[string]*CostData, error) {
|
|
|
queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, window, offset, window, offset)
|
|
queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, window, offset, window, offset)
|
|
|
queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, window, offset, window, offset)
|
|
queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, window, offset, window, offset)
|
|
@@ -390,97 +336,127 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
|
|
|
var wg sync.WaitGroup
|
|
var wg sync.WaitGroup
|
|
|
wg.Add(11)
|
|
wg.Add(11)
|
|
|
|
|
|
|
|
- var ec ErrorCollector
|
|
|
|
|
|
|
+ var ec errors.ErrorCollector
|
|
|
var resultRAMRequests interface{}
|
|
var resultRAMRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultRAMRequests, promErr = Query(cli, queryRAMRequests)
|
|
resultRAMRequests, promErr = Query(cli, queryRAMRequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("RAMRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
|
|
|
|
|
var resultRAMUsage interface{}
|
|
var resultRAMUsage interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultRAMUsage, promErr = Query(cli, queryRAMUsage)
|
|
resultRAMUsage, promErr = Query(cli, queryRAMUsage)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("RAMUsage: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultCPURequests interface{}
|
|
var resultCPURequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultCPURequests, promErr = Query(cli, queryCPURequests)
|
|
resultCPURequests, promErr = Query(cli, queryCPURequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("CPURequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultCPUUsage interface{}
|
|
var resultCPUUsage interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultCPUUsage, promErr = Query(cli, queryCPUUsage)
|
|
resultCPUUsage, promErr = Query(cli, queryCPUUsage)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("CPUUsage: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultGPURequests interface{}
|
|
var resultGPURequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultGPURequests, promErr = Query(cli, queryGPURequests)
|
|
resultGPURequests, promErr = Query(cli, queryGPURequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("GPURequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultPVRequests interface{}
|
|
var resultPVRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultPVRequests, promErr = Query(cli, queryPVRequests)
|
|
resultPVRequests, promErr = Query(cli, queryPVRequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("PVRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultNetZoneRequests interface{}
|
|
var resultNetZoneRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultNetZoneRequests, promErr = Query(cli, queryNetZoneRequests)
|
|
resultNetZoneRequests, promErr = Query(cli, queryNetZoneRequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NetZoneRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultNetRegionRequests interface{}
|
|
var resultNetRegionRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultNetRegionRequests, promErr = Query(cli, queryNetRegionRequests)
|
|
resultNetRegionRequests, promErr = Query(cli, queryNetRegionRequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NetRegionRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultNetInternetRequests interface{}
|
|
var resultNetInternetRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultNetInternetRequests, promErr = Query(cli, queryNetInternetRequests)
|
|
resultNetInternetRequests, promErr = Query(cli, queryNetInternetRequests)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NetInternetRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var normalizationResult interface{}
|
|
var normalizationResult interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
normalizationResult, promErr = Query(cli, normalization)
|
|
normalizationResult, promErr = Query(cli, normalization)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("normalization: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
|
|
|
|
|
podDeploymentsMapping := make(map[string]map[string][]string)
|
|
podDeploymentsMapping := make(map[string]map[string][]string)
|
|
@@ -490,6 +466,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
|
|
|
var k8sErr error
|
|
var k8sErr error
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
podDeploymentsMapping, k8sErr = getPodDeployments(cm.Cache, podlist, clusterID)
|
|
podDeploymentsMapping, k8sErr = getPodDeployments(cm.Cache, podlist, clusterID)
|
|
|
if k8sErr != nil {
|
|
if k8sErr != nil {
|
|
@@ -513,7 +490,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
|
|
|
|
|
|
|
|
if ec.IsError() {
|
|
if ec.IsError() {
|
|
|
for _, promErr := range ec.Errors() {
|
|
for _, promErr := range ec.Errors() {
|
|
|
- klog.V(1).Infof("[Warning] Query Error: %s", promErr.Error())
|
|
|
|
|
|
|
+ log.Errorf("ComputeCostData: Prometheus error: %s", promErr.Error())
|
|
|
}
|
|
}
|
|
|
// TODO: Categorize fatal prometheus query failures
|
|
// TODO: Categorize fatal prometheus query failures
|
|
|
// return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
|
|
// return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
|
|
@@ -1161,7 +1138,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
|
|
|
nodeLabels := n.GetObjectMeta().GetLabels()
|
|
nodeLabels := n.GetObjectMeta().GetLabels()
|
|
|
nodeLabels["providerID"] = n.Spec.ProviderID
|
|
nodeLabels["providerID"] = n.Spec.ProviderID
|
|
|
|
|
|
|
|
- cnode, err := cp.NodePricing(cp.GetKey(nodeLabels))
|
|
|
|
|
|
|
+ cnode, err := cp.NodePricing(cp.GetKey(nodeLabels, n))
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
klog.V(1).Infof("[Warning] Error getting node pricing. Error: " + err.Error())
|
|
klog.V(1).Infof("[Warning] Error getting node pricing. Error: " + err.Error())
|
|
|
if cnode != nil {
|
|
if cnode != nil {
|
|
@@ -1233,7 +1210,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
|
|
|
|
|
|
|
|
if newCnode.GPU != "" && newCnode.GPUCost == "" {
|
|
if newCnode.GPU != "" && newCnode.GPUCost == "" {
|
|
|
// We couldn't find a gpu cost, so fix cpu and ram, then accordingly
|
|
// We couldn't find a gpu cost, so fix cpu and ram, then accordingly
|
|
|
- klog.V(4).Infof("GPU without cost found for %s, calculating...", cp.GetKey(nodeLabels).Features())
|
|
|
|
|
|
|
+ klog.V(4).Infof("GPU without cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
|
|
|
|
|
|
|
|
defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
|
|
defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
@@ -1323,7 +1300,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
|
|
|
newCnode.GPUCost = fmt.Sprintf("%f", gpuPrice)
|
|
newCnode.GPUCost = fmt.Sprintf("%f", gpuPrice)
|
|
|
} else if newCnode.RAMCost == "" {
|
|
} else if newCnode.RAMCost == "" {
|
|
|
// We couldn't find a ramcost, so fix cpu and allocate ram accordingly
|
|
// We couldn't find a ramcost, so fix cpu and allocate ram accordingly
|
|
|
- klog.V(4).Infof("No RAM cost found for %s, calculating...", cp.GetKey(nodeLabels).Features())
|
|
|
|
|
|
|
+ klog.V(4).Infof("No RAM cost found for %s, calculating...", cp.GetKey(nodeLabels, n).Features())
|
|
|
|
|
|
|
|
defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
|
|
defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
@@ -1772,205 +1749,266 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
queryProfileStart := time.Now()
|
|
queryProfileStart := time.Now()
|
|
|
queryProfileCh := make(chan string, numQueries)
|
|
queryProfileCh := make(chan string, numQueries)
|
|
|
|
|
|
|
|
- var ec ErrorCollector
|
|
|
|
|
|
|
+ var ec errors.ErrorCollector
|
|
|
var resultRAMRequests interface{}
|
|
var resultRAMRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "RAMRequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "RAMRequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultRAMRequests, promErr = QueryRange(cli, queryRAMRequests, start, end, window)
|
|
resultRAMRequests, promErr = QueryRange(cli, queryRAMRequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("RAMRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultRAMUsage interface{}
|
|
var resultRAMUsage interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "RAMUsage", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "RAMUsage", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultRAMUsage, promErr = QueryRange(cli, queryRAMUsage, start, end, window)
|
|
resultRAMUsage, promErr = QueryRange(cli, queryRAMUsage, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("RAMUsage: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultCPURequests interface{}
|
|
var resultCPURequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "CPURequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "CPURequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultCPURequests, promErr = QueryRange(cli, queryCPURequests, start, end, window)
|
|
resultCPURequests, promErr = QueryRange(cli, queryCPURequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("CPURequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultCPUUsage interface{}
|
|
var resultCPUUsage interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "CPUUsage", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "CPUUsage", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultCPUUsage, promErr = QueryRange(cli, queryCPUUsage, start, end, window)
|
|
resultCPUUsage, promErr = QueryRange(cli, queryCPUUsage, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("CPUUsage: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultRAMAllocations interface{}
|
|
var resultRAMAllocations interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "RAMAllocations", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "RAMAllocations", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultRAMAllocations, promErr = QueryRange(cli, queryRAMAlloc, start, end, window)
|
|
resultRAMAllocations, promErr = QueryRange(cli, queryRAMAlloc, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("RAMAllocations: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultCPUAllocations interface{}
|
|
var resultCPUAllocations interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "CPUAllocations", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "CPUAllocations", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultCPUAllocations, promErr = QueryRange(cli, queryCPUAlloc, start, end, window)
|
|
resultCPUAllocations, promErr = QueryRange(cli, queryCPUAlloc, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("CPUAllocations: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultGPURequests interface{}
|
|
var resultGPURequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "GPURequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "GPURequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultGPURequests, promErr = QueryRange(cli, queryGPURequests, start, end, window)
|
|
resultGPURequests, promErr = QueryRange(cli, queryGPURequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("GPURequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultPVRequests interface{}
|
|
var resultPVRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PVRequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PVRequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultPVRequests, promErr = QueryRange(cli, queryPVRequests, start, end, window)
|
|
resultPVRequests, promErr = QueryRange(cli, queryPVRequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("PVRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultNetZoneRequests interface{}
|
|
var resultNetZoneRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NetZoneRequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NetZoneRequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultNetZoneRequests, promErr = QueryRange(cli, queryNetZoneRequests, start, end, window)
|
|
resultNetZoneRequests, promErr = QueryRange(cli, queryNetZoneRequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NetZoneRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultNetRegionRequests interface{}
|
|
var resultNetRegionRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NetRegionRequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NetRegionRequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultNetRegionRequests, promErr = QueryRange(cli, queryNetRegionRequests, start, end, window)
|
|
resultNetRegionRequests, promErr = QueryRange(cli, queryNetRegionRequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NetRegionRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var resultNetInternetRequests interface{}
|
|
var resultNetInternetRequests interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NetInternetRequests", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NetInternetRequests", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
resultNetInternetRequests, promErr = QueryRange(cli, queryNetInternetRequests, start, end, window)
|
|
resultNetInternetRequests, promErr = QueryRange(cli, queryNetInternetRequests, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NetInternetRequests: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var pvPodAllocationResults interface{}
|
|
var pvPodAllocationResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PVPodAllocation", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PVPodAllocation", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
pvPodAllocationResults, promErr = QueryRange(cli, queryPVCAllocation, start, end, window)
|
|
pvPodAllocationResults, promErr = QueryRange(cli, queryPVCAllocation, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("PVPodAllocation: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var pvCostResults interface{}
|
|
var pvCostResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PVCost", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PVCost", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
pvCostResults, promErr = QueryRange(cli, queryPVHourlyCost, start, end, window)
|
|
pvCostResults, promErr = QueryRange(cli, queryPVHourlyCost, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("PVCost: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var nsLabelsResults interface{}
|
|
var nsLabelsResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NSLabels", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "NSLabels", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
nsLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryNSLabels, windowString), start, end, window)
|
|
nsLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryNSLabels, windowString), start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("NSLabels: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var podLabelsResults interface{}
|
|
var podLabelsResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PodLabels", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "PodLabels", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
podLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryPodLabels, windowString), start, end, window)
|
|
podLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryPodLabels, windowString), start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("PodLabels: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var serviceLabelsResults interface{}
|
|
var serviceLabelsResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "ServiceLabels", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "ServiceLabels", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
serviceLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryServiceLabels, windowString), start, end, window)
|
|
serviceLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryServiceLabels, windowString), start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("ServiceLabels: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var deploymentLabelsResults interface{}
|
|
var deploymentLabelsResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "DeploymentLabels", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "DeploymentLabels", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
deploymentLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryDeploymentLabels, windowString), start, end, window)
|
|
deploymentLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryDeploymentLabels, windowString), start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("DeploymentLabels: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var daemonsetResults interface{}
|
|
var daemonsetResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "Daemonsets", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "Daemonsets", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
daemonsetResults, promErr = QueryRange(cli, fmt.Sprintf(queryPodDaemonsets), start, end, window)
|
|
daemonsetResults, promErr = QueryRange(cli, fmt.Sprintf(queryPodDaemonsets), start, end, window)
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("Daemonsets: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var statefulsetLabelsResults interface{}
|
|
var statefulsetLabelsResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "StatefulSetLabels", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "StatefulSetLabels", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
statefulsetLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryStatefulsetLabels, windowString), start, end, window)
|
|
statefulsetLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryStatefulsetLabels, windowString), start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("StatefulSetLabels: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
var normalizationResults interface{}
|
|
var normalizationResults interface{}
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "Normalization", queryProfileCh)
|
|
defer measureTimeAsync(time.Now(), profileThreshold, "Normalization", queryProfileCh)
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
var promErr error
|
|
var promErr error
|
|
|
normalizationResults, promErr = QueryRange(cli, normalization, start, end, window)
|
|
normalizationResults, promErr = QueryRange(cli, normalization, start, end, window)
|
|
|
|
|
|
|
|
- ec.Report(promErr)
|
|
|
|
|
|
|
+ if promErr != nil {
|
|
|
|
|
+ ec.Report(fmt.Errorf("Normalization: %s", promErr))
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
|
|
|
|
|
podDeploymentsMapping := make(map[string]map[string][]string)
|
|
podDeploymentsMapping := make(map[string]map[string][]string)
|
|
@@ -1981,6 +2019,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
var k8sErr error
|
|
var k8sErr error
|
|
|
go func() {
|
|
go func() {
|
|
|
defer wg.Done()
|
|
defer wg.Done()
|
|
|
|
|
+ defer errors.HandlePanic()
|
|
|
|
|
|
|
|
podDeploymentsMapping, k8sErr = getPodDeployments(cm.Cache, podlist, clusterID)
|
|
podDeploymentsMapping, k8sErr = getPodDeployments(cm.Cache, podlist, clusterID)
|
|
|
if k8sErr != nil {
|
|
if k8sErr != nil {
|
|
@@ -2016,7 +2055,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
if ec.IsError() {
|
|
if ec.IsError() {
|
|
|
for _, promErr := range ec.Errors() {
|
|
for _, promErr := range ec.Errors() {
|
|
|
- klog.V(1).Infof("[Warning] Query Error: %s", promErr.Error())
|
|
|
|
|
|
|
+ log.Errorf("CostDataRange: Prometheus error: %s", promErr.Error())
|
|
|
}
|
|
}
|
|
|
// TODO: Categorize fatal prometheus query failures
|
|
// TODO: Categorize fatal prometheus query failures
|
|
|
// return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
|
|
// return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
|
|
@@ -2029,8 +2068,11 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
normalizationValue, err := getNormalizations(normalizationResults)
|
|
normalizationValue, err := getNormalizations(normalizationResults)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, fmt.Errorf("error computing normalization %s for start=%s, end=%s, window=%s, res=%f: %s", normalization,
|
|
|
|
|
- start, end, window, resolutionHours*60*60, err.Error())
|
|
|
|
|
|
|
+ msg := fmt.Sprintf("error computing normalization %s for start=%s, end=%s, window=%s, res=%f", normalization, start, end, window, resolutionHours*60*60)
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap(msg)
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("%s: %s", msg, err)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
measureTime(profileStart, profileThreshold, fmt.Sprintf("costDataRange(%fh): compute normalizations", durHrs))
|
|
measureTime(profileStart, profileThreshold, fmt.Sprintf("costDataRange(%fh): compute normalizations", durHrs))
|
|
@@ -2045,7 +2087,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
if pvClaimMapping != nil {
|
|
if pvClaimMapping != nil {
|
|
|
err = addPVData(cm.Cache, pvClaimMapping, cp)
|
|
err = addPVData(cm.Cache, pvClaimMapping, cp)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ return nil, fmt.Errorf("pvClaimMapping: %s", err)
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -2141,7 +2183,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
RAMReqMap, err := GetNormalizedContainerMetricVectors(resultRAMRequests, normalizationValue, clusterID)
|
|
RAMReqMap, err := GetNormalizedContainerMetricVectors(resultRAMRequests, normalizationValue, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetNormalizedContainerMetricVectors(RAMRequests)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetNormalizedContainerMetricVectors(RAMRequests): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range RAMReqMap {
|
|
for key := range RAMReqMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2149,7 +2194,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
RAMUsedMap, err := GetNormalizedContainerMetricVectors(resultRAMUsage, normalizationValue, clusterID)
|
|
RAMUsedMap, err := GetNormalizedContainerMetricVectors(resultRAMUsage, normalizationValue, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetNormalizedContainerMetricVectors(RAMUsage)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetNormalizedContainerMetricVectors(RAMUsage): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range RAMUsedMap {
|
|
for key := range RAMUsedMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2157,7 +2205,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
CPUReqMap, err := GetNormalizedContainerMetricVectors(resultCPURequests, normalizationValue, clusterID)
|
|
CPUReqMap, err := GetNormalizedContainerMetricVectors(resultCPURequests, normalizationValue, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetNormalizedContainerMetricVectors(CPURequests)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetNormalizedContainerMetricVectors(CPURequests): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range CPUReqMap {
|
|
for key := range CPUReqMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2167,7 +2218,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
// rate(container_cpu_usage_seconds_total) which properly accounts for normalized rates
|
|
// rate(container_cpu_usage_seconds_total) which properly accounts for normalized rates
|
|
|
CPUUsedMap, err := GetContainerMetricVectors(resultCPUUsage, clusterID)
|
|
CPUUsedMap, err := GetContainerMetricVectors(resultCPUUsage, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetContainerMetricVectors(CPUUsage)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetContainerMetricVectors(CPUUsage): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range CPUUsedMap {
|
|
for key := range CPUUsedMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2175,7 +2229,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
RAMAllocMap, err := GetContainerMetricVectors(resultRAMAllocations, clusterID)
|
|
RAMAllocMap, err := GetContainerMetricVectors(resultRAMAllocations, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetContainerMetricVectors(RAMAllocations)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetContainerMetricVectors(RAMAllocations): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range RAMAllocMap {
|
|
for key := range RAMAllocMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2183,7 +2240,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
CPUAllocMap, err := GetContainerMetricVectors(resultCPUAllocations, clusterID)
|
|
CPUAllocMap, err := GetContainerMetricVectors(resultCPUAllocations, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetContainerMetricVectors(CPUAllocations)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetContainerMetricVectors(CPUAllocations): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range CPUAllocMap {
|
|
for key := range CPUAllocMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2191,7 +2251,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
|
|
|
|
|
GPUReqMap, err := GetNormalizedContainerMetricVectors(resultGPURequests, normalizationValue, clusterID)
|
|
GPUReqMap, err := GetNormalizedContainerMetricVectors(resultGPURequests, normalizationValue, clusterID)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
- return nil, err
|
|
|
|
|
|
|
+ if pce, ok := err.(prom.CommError); ok {
|
|
|
|
|
+ return nil, pce.Wrap("GetContainerMetricVectors(GPURequests)")
|
|
|
|
|
+ }
|
|
|
|
|
+ return nil, fmt.Errorf("GetContainerMetricVectors(GPURequests): %s", err)
|
|
|
}
|
|
}
|
|
|
for key := range GPUReqMap {
|
|
for key := range GPUReqMap {
|
|
|
containers[key] = true
|
|
containers[key] = true
|
|
@@ -2398,7 +2461,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- return containerNameCost, err
|
|
|
|
|
|
|
+ return containerNameCost, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func applyAllocationToRequests(allocationMap map[string][]*util.Vector, requestMap map[string][]*util.Vector) {
|
|
func applyAllocationToRequests(allocationMap map[string][]*util.Vector, requestMap map[string][]*util.Vector) {
|
|
@@ -2563,12 +2626,14 @@ func QueryRange(cli prometheusClient.Client, query string, start, end time.Time,
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
return nil, fmt.Errorf("[Error] %s fetching query %s", err.Error(), query)
|
|
return nil, fmt.Errorf("[Error] %s fetching query %s", err.Error(), query)
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
var toReturn interface{}
|
|
var toReturn interface{}
|
|
|
err = json.Unmarshal(body, &toReturn)
|
|
err = json.Unmarshal(body, &toReturn)
|
|
|
if err != nil {
|
|
if err != nil {
|
|
|
return nil, fmt.Errorf("[Error] %d %s fetching query %s", resp.StatusCode, err.Error(), query)
|
|
return nil, fmt.Errorf("[Error] %d %s fetching query %s", resp.StatusCode, err.Error(), query)
|
|
|
}
|
|
}
|
|
|
- return toReturn, err
|
|
|
|
|
|
|
+
|
|
|
|
|
+ return toReturn, nil
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func Query(cli prometheusClient.Client, query string) (interface{}, error) {
|
|
func Query(cli prometheusClient.Client, query string) (interface{}, error) {
|
|
@@ -2642,10 +2707,15 @@ type ContainerMetric struct {
|
|
|
ContainerName string
|
|
ContainerName string
|
|
|
NodeName string
|
|
NodeName string
|
|
|
ClusterID string
|
|
ClusterID string
|
|
|
|
|
+ key string
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func (c *ContainerMetric) Key() string {
|
|
func (c *ContainerMetric) Key() string {
|
|
|
- return c.Namespace + "," + c.PodName + "," + c.ContainerName + "," + c.NodeName + "," + c.ClusterID
|
|
|
|
|
|
|
+ return c.key
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+func containerMetricKey(ns, podName, containerName, nodeName, clusterID string) string {
|
|
|
|
|
+ return ns + "," + podName + "," + containerName + "," + nodeName + "," + clusterID
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
|
|
func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
|
|
@@ -2657,6 +2727,7 @@ func NewContainerMetricFromKey(key string) (*ContainerMetric, error) {
|
|
|
ContainerName: s[2],
|
|
ContainerName: s[2],
|
|
|
NodeName: s[3],
|
|
NodeName: s[3],
|
|
|
ClusterID: s[4],
|
|
ClusterID: s[4],
|
|
|
|
|
+ key: key,
|
|
|
}, nil
|
|
}, nil
|
|
|
}
|
|
}
|
|
|
return nil, fmt.Errorf("Not a valid key")
|
|
return nil, fmt.Errorf("Not a valid key")
|
|
@@ -2669,6 +2740,7 @@ func newContainerMetricFromValues(ns string, podName string, containerName strin
|
|
|
ContainerName: containerName,
|
|
ContainerName: containerName,
|
|
|
NodeName: nodeName,
|
|
NodeName: nodeName,
|
|
|
ClusterID: clusterId,
|
|
ClusterID: clusterId,
|
|
|
|
|
+ key: containerMetricKey(ns, podName, containerName, nodeName, clusterId),
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -2685,6 +2757,7 @@ func newContainerMetricsFromPod(pod v1.Pod, clusterID string) ([]*ContainerMetri
|
|
|
ContainerName: containerName,
|
|
ContainerName: containerName,
|
|
|
NodeName: node,
|
|
NodeName: node,
|
|
|
ClusterID: clusterID,
|
|
ClusterID: clusterID,
|
|
|
|
|
+ key: containerMetricKey(ns, podName, containerName, node, clusterID),
|
|
|
})
|
|
})
|
|
|
}
|
|
}
|
|
|
return cs, nil
|
|
return cs, nil
|
|
@@ -2739,6 +2812,7 @@ func newContainerMetricFromPrometheus(metrics map[string]interface{}, defaultClu
|
|
|
Namespace: namespace,
|
|
Namespace: namespace,
|
|
|
NodeName: nodeName,
|
|
NodeName: nodeName,
|
|
|
ClusterID: clusterID,
|
|
ClusterID: clusterID,
|
|
|
|
|
+ key: containerMetricKey(namespace, podName, containerName, nodeName, clusterID),
|
|
|
}, nil
|
|
}, nil
|
|
|
}
|
|
}
|
|
|
|
|
|