Просмотр исходного кода

Merge pull request #276 from kubecost/bolt/nan-checks

NaN Checks
Matt Bolt 6 лет назад
Родитель
Сommit
e154293ffd
5 измененных файлов с 45 добавлено и 18 удалено
  1. 1 1
      cloud/azureprovider.go
  2. 1 1
      cloud/gcpprovider.go
  3. 21 10
      costmodel/costmodel.go
  4. 21 5
      costmodel/promparsers.go
  5. 1 1
      costmodel/router.go

+ 1 - 1
cloud/azureprovider.go

@@ -416,7 +416,7 @@ func (az *Azure) NodePricing(key Key) (*Node, error) {
 		klog.V(4).Infof("Returning pricing for node %s: %+v from key %s", key, n, key.Features())
 		return n, nil
 	}
-	klog.V(1).Infof("Warning: no pricing data found for %s: %s", key.Features(), key)
+	klog.V(1).Infof("[Warning] no pricing data found for %s: %s", key.Features(), key)
 	c, err := az.GetConfig()
 	if err != nil {
 		return nil, fmt.Errorf("No default pricing data available")

+ 1 - 1
cloud/gcpprovider.go

@@ -1106,6 +1106,6 @@ func (gcp *GCP) NodePricing(key Key) (*Node, error) {
 		n.Node.BaseCPUPrice = gcp.BaseCPUPrice
 		return n.Node, nil
 	}
-	klog.V(1).Infof("Warning: no pricing data found for %s: %s", key.Features(), key)
+	klog.V(1).Infof("[Warning] no pricing data found for %s: %s", key.Features(), key)
 	return nil, fmt.Errorf("Warning: no pricing data found for %s", key)
 }

+ 21 - 10
costmodel/costmodel.go

@@ -403,13 +403,13 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 
 	nodes, err := cm.GetNodeCost(cp)
 	if err != nil {
-		klog.V(1).Infof("Warning, no Node cost model available: " + err.Error())
+		klog.V(1).Infof("[Warning] no Node cost model available: " + err.Error())
 		return nil, err
 	}
 
 	pvClaimMapping, err := GetPVInfo(resultPVRequests, clusterID)
 	if err != nil {
-		klog.Infof("Unable to get PV Data: %s", err.Error())
+		klog.Infof("[Warning] Unable to get PV Data: %s", err.Error())
 	}
 	if pvClaimMapping != nil {
 		err = addPVData(cm.Cache, pvClaimMapping, cp)
@@ -420,7 +420,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 
 	networkUsageMap, err := GetNetworkUsageData(resultNetZoneRequests, resultNetRegionRequests, resultNetInternetRequests, clusterID)
 	if err != nil {
-		klog.V(1).Infof("Unable to get Network Cost Data: %s", err.Error())
+		klog.V(1).Infof("[Warning] Unable to get Network Cost Data: %s", err.Error())
 		networkUsageMap = make(map[string]*NetworkUsageData)
 	}
 
@@ -605,8 +605,8 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 					NamespaceLabels: nsLabels,
 					ClusterID:       clusterID,
 				}
-				costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed)
-				costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed)
+				costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed, "CPU")
+				costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed, "RAM")
 				if filterNamespace == "" {
 					containerNameCost[newKey] = costs
 				} else if costs.Namespace == filterNamespace {
@@ -675,8 +675,8 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 				NamespaceLabels: namespacelabels,
 				ClusterID:       c.ClusterID,
 			}
-			costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed)
-			costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed)
+			costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed, "CPU")
+			costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed, "RAM")
 			if filterNamespace == "" {
 				containerNameCost[key] = costs
 				missingContainers[key] = costs
@@ -843,11 +843,22 @@ func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*c
 	return nil
 }
 
-func getContainerAllocation(req []*Vector, used []*Vector) []*Vector {
+func getContainerAllocation(req []*Vector, used []*Vector, allocationType string) []*Vector {
 	// The result of the normalize operation will be a new []*Vector to replace the requests
 	allocationOp := func(r *Vector, x *float64, y *float64) bool {
 		if x != nil && y != nil {
-			r.Value = math.Max(*x, *y)
+			x1 := *x
+			if math.IsNaN(x1) {
+				klog.V(1).Infof("[Warning] NaN value found during %s allocation calculation for requests.", allocationType)
+				x1 = 0.0
+			}
+			y1 := *y
+			if math.IsNaN(y1) {
+				klog.V(1).Infof("[Warning] NaN value found during %s allocation calculation for used.", allocationType)
+				y1 = 0.0
+			}
+
+			r.Value = math.Max(x1, y1)
 		} else if x != nil {
 			r.Value = *x
 		} else if y != nil {
@@ -1593,7 +1604,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
 	nodes, err := cm.GetNodeCost(cp)
 	if err != nil {
-		klog.V(1).Infof("Warning, no cost model available: " + err.Error())
+		klog.V(1).Infof("[Warning] no cost model available: " + err.Error())
 		return nil, err
 	}
 

+ 21 - 5
costmodel/promparsers.go

@@ -96,6 +96,8 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 			return nil, fmt.Errorf("Metric field is improperly formatted")
 		}
 
+		labels := labelsForMetric(metricMap)
+
 		// Determine if the result is a ranged data set or single value
 		_, isRange := resultInterface["values"]
 
@@ -106,7 +108,7 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 				return nil, fmt.Errorf("Value field does not exist in data result vector")
 			}
 
-			v, err := parseDataPoint(dataPoint)
+			v, err := parseDataPoint(dataPoint, labels)
 			if err != nil {
 				return nil, err
 			}
@@ -118,7 +120,7 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 			}
 
 			for _, value := range values {
-				v, err := parseDataPoint(value)
+				v, err := parseDataPoint(value, labels)
 				if err != nil {
 					return nil, err
 				}
@@ -136,7 +138,7 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 	return result, nil
 }
 
-func parseDataPoint(dataPoint interface{}) (*Vector, error) {
+func parseDataPoint(dataPoint interface{}, labels string) (*Vector, error) {
 	value, ok := dataPoint.([]interface{})
 	if !ok || len(value) != 2 {
 		return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
@@ -148,6 +150,11 @@ func parseDataPoint(dataPoint interface{}) (*Vector, error) {
 		return nil, err
 	}
 
+	if math.IsNaN(v) {
+		klog.V(1).Infof("[Warning] Found NaN value parsing vector data point for metric: %s", labels)
+		v = 0.0
+	}
+
 	return &Vector{
 		Timestamp: math.Round(value[0].(float64)/10) * 10,
 		Value:     v,
@@ -179,14 +186,14 @@ func GetPVInfo(qr interface{}, defaultClusterID string) (map[string]*PersistentV
 
 		volumeName, err := val.GetString("volumename")
 		if err != nil {
-			klog.V(4).Infof("Warning: Unfulfilled claim %s: volumename field does not exist in data result vector", pvcName)
+			klog.V(4).Infof("[Warning] Unfulfilled claim %s: volumename field does not exist in data result vector", pvcName)
 			volumeName = ""
 		}
 
 		pvClass, err := val.GetString("storageclass")
 		if err != nil {
 			// TODO: We need to look up the actual PV and PV capacity. For now just proceed with "".
-			klog.V(2).Infof("Storage Class not found for claim \"%s/%s\".", ns, pvcName)
+			klog.V(2).Infof("[Warning] Storage Class not found for claim \"%s/%s\".", ns, pvcName)
 			pvClass = ""
 		}
 
@@ -429,3 +436,12 @@ func GetServiceSelectorLabelsMetrics(queryResult interface{}, defaultClusterID s
 
 	return toReturn, nil
 }
+
+func labelsForMetric(metricMap map[string]interface{}) string {
+	var pairs []string
+	for k, v := range metricMap {
+		pairs = append(pairs, fmt.Sprintf("%s: %+v", k, v))
+	}
+
+	return fmt.Sprintf("{%s}", strings.Join(pairs, ", "))
+}

+ 1 - 1
costmodel/router.go

@@ -982,7 +982,7 @@ func Initialize() {
 
 			_, err = ValidatePrometheus(thanosCli, true)
 			if err != nil {
-				klog.V(1).Infof("Warning: Failed to query Thanos at %s. Error: %s.", thanosUrl, err.Error())
+				klog.V(1).Infof("[Warning] Failed to query Thanos at %s. Error: %s.", thanosUrl, err.Error())
 				A.ThanosClient = thanosCli
 			} else {
 				klog.V(1).Info("Success: retrieved the 'up' query against Thanos at: " + thanosUrl)