Просмотр исходного кода

Merge branch 'master' of github.com:kubecost/cost-model into AjayTripathy-update-fixes

AjayTripathy 6 лет назад
Родитель
Сommit
c4101ddbbe
9 измененных файлов с 645 добавлено и 145 удалено
  1. 1 1
      cloud/azureprovider.go
  2. 1 1
      cloud/gcpprovider.go
  3. 11 0
      cloud/provider.go
  4. 28 67
      costmodel/cluster.go
  5. 356 42
      costmodel/costmodel.go
  6. 131 0
      costmodel/pool.go
  7. 26 5
      costmodel/promparsers.go
  8. 34 2
      costmodel/router.go
  9. 57 27
      costmodel/vector.go

+ 1 - 1
cloud/azureprovider.go

@@ -416,7 +416,7 @@ func (az *Azure) NodePricing(key Key) (*Node, error) {
 		klog.V(4).Infof("Returning pricing for node %s: %+v from key %s", key, n, key.Features())
 		klog.V(4).Infof("Returning pricing for node %s: %+v from key %s", key, n, key.Features())
 		return n, nil
 		return n, nil
 	}
 	}
-	klog.V(1).Infof("Warning: no pricing data found for %s: %s", key.Features(), key)
+	klog.V(1).Infof("[Warning] no pricing data found for %s: %s", key.Features(), key)
 	c, err := az.GetConfig()
 	c, err := az.GetConfig()
 	if err != nil {
 	if err != nil {
 		return nil, fmt.Errorf("No default pricing data available")
 		return nil, fmt.Errorf("No default pricing data available")

+ 1 - 1
cloud/gcpprovider.go

@@ -1106,6 +1106,6 @@ func (gcp *GCP) NodePricing(key Key) (*Node, error) {
 		n.Node.BaseCPUPrice = gcp.BaseCPUPrice
 		n.Node.BaseCPUPrice = gcp.BaseCPUPrice
 		return n.Node, nil
 		return n.Node, nil
 	}
 	}
-	klog.V(1).Infof("Warning: no pricing data found for %s: %s", key.Features(), key)
+	klog.V(1).Infof("[Warning] no pricing data found for %s: %s", key.Features(), key)
 	return nil, fmt.Errorf("Warning: no pricing data found for %s", key)
 	return nil, fmt.Errorf("Warning: no pricing data found for %s", key)
 }
 }

+ 11 - 0
cloud/provider.go

@@ -11,6 +11,7 @@ import (
 	"os"
 	"os"
 	"reflect"
 	"reflect"
 	"strings"
 	"strings"
+	"sync"
 
 
 	"k8s.io/klog"
 	"k8s.io/klog"
 
 
@@ -33,6 +34,9 @@ var createTableStatements = []string{
 	);`,
 	);`,
 }
 }
 
 
+// This Mutex is used to control read/writes to our default config file
+var configLock sync.Mutex
+
 // ReservedInstanceData keeps record of resources on a node should be
 // ReservedInstanceData keeps record of resources on a node should be
 // priced at reserved rates
 // priced at reserved rates
 type ReservedInstanceData struct {
 type ReservedInstanceData struct {
@@ -206,6 +210,9 @@ func CustomPricesEnabled(p Provider) bool {
 
 
 // GetDefaultPricingData will search for a json file representing pricing data in /models/ and use it for base pricing info.
 // GetDefaultPricingData will search for a json file representing pricing data in /models/ and use it for base pricing info.
 func GetDefaultPricingData(fname string) (*CustomPricing, error) {
 func GetDefaultPricingData(fname string) (*CustomPricing, error) {
+	configLock.Lock()
+	defer configLock.Unlock()
+
 	path := os.Getenv("CONFIG_PATH")
 	path := os.Getenv("CONFIG_PATH")
 	if path == "" {
 	if path == "" {
 		path = "/models/"
 		path = "/models/"
@@ -265,6 +272,10 @@ func configmapUpdate(c *CustomPricing, path string, a map[string]string) (*Custo
 			return nil, err
 			return nil, err
 		}
 		}
 	}
 	}
+
+	configLock.Lock()
+	defer configLock.Unlock()
+
 	cj, err := json.Marshal(c)
 	cj, err := json.Marshal(c)
 	if err != nil {
 	if err != nil {
 		return nil, err
 		return nil, err

+ 28 - 67
costmodel/cluster.go

@@ -41,39 +41,23 @@ type Totals struct {
 }
 }
 
 
 func resultToTotals(qr interface{}) ([][]string, error) {
 func resultToTotals(qr interface{}) ([][]string, error) {
-	data, ok := qr.(map[string]interface{})["data"]
-	if !ok {
-		e, err := wrapPrometheusError(qr)
-		if err != nil {
-			return nil, err
-		}
-		return nil, fmt.Errorf(e)
-	}
-	r, ok := data.(map[string]interface{})["result"]
-	if !ok {
-		return nil, fmt.Errorf("Improperly formatted data from prometheus, data has no result field")
-	}
-	results, ok := r.([]interface{})
-	if !ok {
-		return nil, fmt.Errorf("Improperly formatted results from prometheus, result field is not a slice")
+	results, err := NewQueryResults(qr)
+	if err != nil {
+		return nil, err
 	}
 	}
+
 	if len(results) == 0 {
 	if len(results) == 0 {
 		return nil, fmt.Errorf("Not enough data available in the selected time range")
 		return nil, fmt.Errorf("Not enough data available in the selected time range")
 	}
 	}
-	res, ok := results[0].(map[string]interface{})["values"]
+
+	result := results[0]
 	totals := [][]string{}
 	totals := [][]string{}
-	for _, val := range res.([]interface{}) {
-		if !ok {
-			return nil, fmt.Errorf("Improperly formatted results from prometheus, value is not a field in the vector")
-		}
-		dataPoint, ok := val.([]interface{})
-		if !ok || len(dataPoint) != 2 {
-			return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
-		}
-		d0 := fmt.Sprintf("%f", dataPoint[0].(float64))
+	for _, value := range result.Values {
+		d0 := fmt.Sprintf("%f", value.Timestamp)
+		d1 := fmt.Sprintf("%f", value.Value)
 		toAppend := []string{
 		toAppend := []string{
 			d0,
 			d0,
-			dataPoint[1].(string),
+			d1,
 		}
 		}
 		totals = append(totals, toAppend)
 		totals = append(totals, toAppend)
 	}
 	}
@@ -83,54 +67,30 @@ func resultToTotals(qr interface{}) ([][]string, error) {
 func resultToTotal(qr interface{}) (map[string][][]string, error) {
 func resultToTotal(qr interface{}) (map[string][][]string, error) {
 	defaultClusterID := os.Getenv(clusterIDKey)
 	defaultClusterID := os.Getenv(clusterIDKey)
 
 
-	data, ok := qr.(map[string]interface{})["data"]
-	if !ok {
-		e, err := wrapPrometheusError(qr)
-		if err != nil {
-			return nil, err
-		}
-		return nil, fmt.Errorf("Prometheus query error: %s", e)
-	}
-	r, ok := data.(map[string]interface{})["result"]
-	if !ok {
-		return nil, fmt.Errorf("Improperly formatted data from prometheus, data has no result field")
-	}
-	results, ok := r.([]interface{})
-	if !ok {
-		return nil, fmt.Errorf("Improperly formatted results from prometheus, result field is not a slice")
-	}
-	if len(results) == 0 {
-		return nil, fmt.Errorf("Not enough data available in the selected time range")
+	results, err := NewQueryResults(qr)
+	if err != nil {
+		return nil, err
 	}
 	}
+
 	toReturn := make(map[string][][]string)
 	toReturn := make(map[string][][]string)
-	for i := range results {
-		metrics, ok := results[i].(map[string]interface{})["metric"]
-		if !ok {
-			return nil, fmt.Errorf("Improperly formatted results from prometheus, metric is not a field in the vector")
-		}
-		metricMap, ok := metrics.(map[string]interface{})
-		cid, ok := metricMap["cluster_id"]
-		if !ok {
-			klog.V(4).Info("Prometheus vector does not have cluster id")
-			cid = defaultClusterID
-		}
-		clusterID, ok := cid.(string)
-		if !ok {
-			return nil, fmt.Errorf("Prometheus vector does not have string cluster_id")
+	for _, result := range results {
+		clusterID, _ := result.GetString("cluster_id")
+		if clusterID == "" {
+			clusterID = defaultClusterID
 		}
 		}
 
 
-		val, ok := results[i].(map[string]interface{})["value"]
-		if !ok {
-			return nil, fmt.Errorf("Improperly formatted results from prometheus, value is not a field in the vector")
-		}
-		dataPoint, ok := val.([]interface{})
-		if !ok || len(dataPoint) != 2 {
-			return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
+		// Expect a single value only
+		if len(result.Values) == 0 {
+			klog.V(1).Infof("[Warning] Metric values did not contain any valid data.")
+			continue
 		}
 		}
-		d0 := fmt.Sprintf("%f", dataPoint[0].(float64))
+
+		value := result.Values[0]
+		d0 := fmt.Sprintf("%f", value.Timestamp)
+		d1 := fmt.Sprintf("%f", value.Value)
 		toAppend := []string{
 		toAppend := []string{
 			d0,
 			d0,
-			dataPoint[1].(string),
+			d1,
 		}
 		}
 		if t, ok := toReturn[clusterID]; ok {
 		if t, ok := toReturn[clusterID]; ok {
 			t = append(t, toAppend)
 			t = append(t, toAppend)
@@ -138,6 +98,7 @@ func resultToTotal(qr interface{}) (map[string][][]string, error) {
 			toReturn[clusterID] = [][]string{toAppend}
 			toReturn[clusterID] = [][]string{toAppend}
 		}
 		}
 	}
 	}
+
 	return toReturn, nil
 	return toReturn, nil
 }
 }
 
 

+ 356 - 42
costmodel/costmodel.go

@@ -94,6 +94,42 @@ func (cd *CostData) String() string {
 		len(cd.RAMReq), len(cd.RAMUsed), len(cd.RAMAllocation))
 		len(cd.RAMReq), len(cd.RAMUsed), len(cd.RAMAllocation))
 }
 }
 
 
+// Error collection helper
+type ErrorCollector struct {
+	m      sync.Mutex
+	errors []error
+}
+
+// Reports an error to the collector. Ignores if the error is nil.
+func (ec *ErrorCollector) Report(e error) {
+	if e == nil {
+		return
+	}
+
+	ec.m.Lock()
+	defer ec.m.Unlock()
+
+	ec.errors = append(ec.errors, e)
+}
+
+// Whether or not the collector caught errors
+func (ec *ErrorCollector) IsError() bool {
+	ec.m.Lock()
+	defer ec.m.Unlock()
+
+	return len(ec.errors) > 0
+}
+
+// Errors caught by the collector
+func (ec *ErrorCollector) Errors() []error {
+	ec.m.Lock()
+	defer ec.m.Unlock()
+
+	errs := make([]error, len(ec.errors))
+	copy(errs, ec.errors)
+	return errs
+}
+
 const (
 const (
 	queryRAMRequestsStr = `avg(
 	queryRAMRequestsStr = `avg(
 		label_replace(
 		label_replace(
@@ -179,7 +215,7 @@ const (
 	queryZoneNetworkUsage     = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s] %s)) by (namespace,pod_name,cluster_id) / 1024 / 1024 / 1024`
 	queryZoneNetworkUsage     = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="true"}[%s] %s)) by (namespace,pod_name,cluster_id) / 1024 / 1024 / 1024`
 	queryRegionNetworkUsage   = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s] %s)) by (namespace,pod_name,cluster_id) / 1024 / 1024 / 1024`
 	queryRegionNetworkUsage   = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="false", sameZone="false", sameRegion="false"}[%s] %s)) by (namespace,pod_name,cluster_id) / 1024 / 1024 / 1024`
 	queryInternetNetworkUsage = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s] %s)) by (namespace,pod_name,cluster_id) / 1024 / 1024 / 1024`
 	queryInternetNetworkUsage = `sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[%s] %s)) by (namespace,pod_name,cluster_id) / 1024 / 1024 / 1024`
-	normalizationStr          = `max(count_over_time(kube_pod_container_resource_requests_memory_bytes{}[%s] %s))`
+	normalizationStr          = `max(count_over_time(kube_pod_container_resource_requests_memory_bytes{}[%s] %s) / %f)`
 )
 )
 
 
 type PrometheusMetadata struct {
 type PrometheusMetadata struct {
@@ -302,7 +338,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, window, "")
 	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, window, "")
 	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, window, "")
 	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, window, "")
 	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, window, "")
 	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, window, "")
-	normalization := fmt.Sprintf(normalizationStr, window, offset)
+	normalization := fmt.Sprintf(normalizationStr, window, offset, 1.0)
 
 
 	// Cluster ID is specific to the source cluster
 	// Cluster ID is specific to the source cluster
 	clusterID := os.Getenv(clusterIDKey)
 	clusterID := os.Getenv(clusterIDKey)
@@ -310,56 +346,97 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 	var wg sync.WaitGroup
 	var wg sync.WaitGroup
 	wg.Add(11)
 	wg.Add(11)
 
 
-	var promErr error
+	var ec ErrorCollector
 	var resultRAMRequests interface{}
 	var resultRAMRequests interface{}
 	go func() {
 	go func() {
-		resultRAMRequests, promErr = Query(cli, queryRAMRequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultRAMRequests, promErr = Query(cli, queryRAMRequests)
+
+		ec.Report(promErr)
 	}()
 	}()
+
 	var resultRAMUsage interface{}
 	var resultRAMUsage interface{}
 	go func() {
 	go func() {
-		resultRAMUsage, promErr = Query(cli, queryRAMUsage)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultRAMUsage, promErr = Query(cli, queryRAMUsage)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultCPURequests interface{}
 	var resultCPURequests interface{}
 	go func() {
 	go func() {
-		resultCPURequests, promErr = Query(cli, queryCPURequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultCPURequests, promErr = Query(cli, queryCPURequests)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultCPUUsage interface{}
 	var resultCPUUsage interface{}
 	go func() {
 	go func() {
-		resultCPUUsage, promErr = Query(cli, queryCPUUsage)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultCPUUsage, promErr = Query(cli, queryCPUUsage)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultGPURequests interface{}
 	var resultGPURequests interface{}
 	go func() {
 	go func() {
-		resultGPURequests, promErr = Query(cli, queryGPURequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultGPURequests, promErr = Query(cli, queryGPURequests)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultPVRequests interface{}
 	var resultPVRequests interface{}
 	go func() {
 	go func() {
-		resultPVRequests, promErr = Query(cli, queryPVRequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultPVRequests, promErr = Query(cli, queryPVRequests)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultNetZoneRequests interface{}
 	var resultNetZoneRequests interface{}
 	go func() {
 	go func() {
-		resultNetZoneRequests, promErr = Query(cli, queryNetZoneRequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultNetZoneRequests, promErr = Query(cli, queryNetZoneRequests)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultNetRegionRequests interface{}
 	var resultNetRegionRequests interface{}
 	go func() {
 	go func() {
-		resultNetRegionRequests, promErr = Query(cli, queryNetRegionRequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultNetRegionRequests, promErr = Query(cli, queryNetRegionRequests)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultNetInternetRequests interface{}
 	var resultNetInternetRequests interface{}
 	go func() {
 	go func() {
-		resultNetInternetRequests, promErr = Query(cli, queryNetInternetRequests)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		resultNetInternetRequests, promErr = Query(cli, queryNetInternetRequests)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var normalizationResult interface{}
 	var normalizationResult interface{}
 	go func() {
 	go func() {
-		normalizationResult, promErr = Query(cli, normalization)
 		defer wg.Done()
 		defer wg.Done()
+
+		var promErr error
+		normalizationResult, promErr = Query(cli, normalization)
+
+		ec.Report(promErr)
 	}()
 	}()
 
 
 	podDeploymentsMapping := make(map[string]map[string][]string)
 	podDeploymentsMapping := make(map[string]map[string][]string)
@@ -379,6 +456,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 		if k8sErr != nil {
 		if k8sErr != nil {
 			return
 			return
 		}
 		}
+
 		namespaceLabelsMapping, k8sErr = getNamespaceLabels(cm.Cache, clusterID)
 		namespaceLabelsMapping, k8sErr = getNamespaceLabels(cm.Cache, clusterID)
 		if k8sErr != nil {
 		if k8sErr != nil {
 			return
 			return
@@ -389,8 +467,12 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 
 
 	defer measureTime(time.Now(), "ComputeCostData: Processing Query Data")
 	defer measureTime(time.Now(), "ComputeCostData: Processing Query Data")
 
 
-	if promErr != nil {
-		return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
+	if ec.IsError() {
+		for _, promErr := range ec.Errors() {
+			klog.V(1).Infof("[Warning] Query Error: %s", promErr.Error())
+		}
+		// TODO: Categorize fatal prometheus query failures
+		// return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
 	}
 	}
 	if k8sErr != nil {
 	if k8sErr != nil {
 		return nil, fmt.Errorf("Error querying the kubernetes api: %s", k8sErr.Error())
 		return nil, fmt.Errorf("Error querying the kubernetes api: %s", k8sErr.Error())
@@ -403,13 +485,13 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 
 
 	nodes, err := cm.GetNodeCost(cp)
 	nodes, err := cm.GetNodeCost(cp)
 	if err != nil {
 	if err != nil {
-		klog.V(1).Infof("Warning, no Node cost model available: " + err.Error())
+		klog.V(1).Infof("[Warning] no Node cost model available: " + err.Error())
 		return nil, err
 		return nil, err
 	}
 	}
 
 
 	pvClaimMapping, err := GetPVInfo(resultPVRequests, clusterID)
 	pvClaimMapping, err := GetPVInfo(resultPVRequests, clusterID)
 	if err != nil {
 	if err != nil {
-		klog.Infof("Unable to get PV Data: %s", err.Error())
+		klog.Infof("[Warning] Unable to get PV Data: %s", err.Error())
 	}
 	}
 	if pvClaimMapping != nil {
 	if pvClaimMapping != nil {
 		err = addPVData(cm.Cache, pvClaimMapping, cp)
 		err = addPVData(cm.Cache, pvClaimMapping, cp)
@@ -420,7 +502,7 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 
 
 	networkUsageMap, err := GetNetworkUsageData(resultNetZoneRequests, resultNetRegionRequests, resultNetInternetRequests, clusterID)
 	networkUsageMap, err := GetNetworkUsageData(resultNetZoneRequests, resultNetRegionRequests, resultNetInternetRequests, clusterID)
 	if err != nil {
 	if err != nil {
-		klog.V(1).Infof("Unable to get Network Cost Data: %s", err.Error())
+		klog.V(1).Infof("[Warning] Unable to get Network Cost Data: %s", err.Error())
 		networkUsageMap = make(map[string]*NetworkUsageData)
 		networkUsageMap = make(map[string]*NetworkUsageData)
 	}
 	}
 
 
@@ -605,8 +687,8 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 					NamespaceLabels: nsLabels,
 					NamespaceLabels: nsLabels,
 					ClusterID:       clusterID,
 					ClusterID:       clusterID,
 				}
 				}
-				costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed)
-				costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed)
+				costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed, "CPU")
+				costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed, "RAM")
 				if filterNamespace == "" {
 				if filterNamespace == "" {
 					containerNameCost[newKey] = costs
 					containerNameCost[newKey] = costs
 				} else if costs.Namespace == filterNamespace {
 				} else if costs.Namespace == filterNamespace {
@@ -675,8 +757,8 @@ func (cm *CostModel) ComputeCostData(cli prometheusClient.Client, clientset kube
 				NamespaceLabels: namespacelabels,
 				NamespaceLabels: namespacelabels,
 				ClusterID:       c.ClusterID,
 				ClusterID:       c.ClusterID,
 			}
 			}
-			costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed)
-			costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed)
+			costs.CPUAllocation = getContainerAllocation(costs.CPUReq, costs.CPUUsed, "CPU")
+			costs.RAMAllocation = getContainerAllocation(costs.RAMReq, costs.RAMUsed, "RAM")
 			if filterNamespace == "" {
 			if filterNamespace == "" {
 				containerNameCost[key] = costs
 				containerNameCost[key] = costs
 				missingContainers[key] = costs
 				missingContainers[key] = costs
@@ -843,11 +925,22 @@ func findDeletedNodeInfo(cli prometheusClient.Client, missingNodes map[string]*c
 	return nil
 	return nil
 }
 }
 
 
-func getContainerAllocation(req []*Vector, used []*Vector) []*Vector {
+func getContainerAllocation(req []*Vector, used []*Vector, allocationType string) []*Vector {
 	// The result of the normalize operation will be a new []*Vector to replace the requests
 	// The result of the normalize operation will be a new []*Vector to replace the requests
 	allocationOp := func(r *Vector, x *float64, y *float64) bool {
 	allocationOp := func(r *Vector, x *float64, y *float64) bool {
 		if x != nil && y != nil {
 		if x != nil && y != nil {
-			r.Value = math.Max(*x, *y)
+			x1 := *x
+			if math.IsNaN(x1) {
+				klog.V(1).Infof("[Warning] NaN value found during %s allocation calculation for requests.", allocationType)
+				x1 = 0.0
+			}
+			y1 := *y
+			if math.IsNaN(y1) {
+				klog.V(1).Infof("[Warning] NaN value found during %s allocation calculation for used.", allocationType)
+				y1 = 0.0
+			}
+
+			r.Value = math.Max(x1, y1)
 		} else if x != nil {
 		} else if x != nil {
 			r.Value = *x
 			r.Value = *x
 		} else if y != nil {
 		} else if y != nil {
@@ -943,7 +1036,7 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 
 
 		cnode, err := cp.NodePricing(cp.GetKey(nodeLabels))
 		cnode, err := cp.NodePricing(cp.GetKey(nodeLabels))
 		if err != nil {
 		if err != nil {
-			klog.V(1).Infof("Error getting node. Error: " + err.Error())
+			klog.V(1).Infof("[Warning] Error getting node pricing. Error: " + err.Error())
 			nodes[name] = cnode
 			nodes[name] = cnode
 			continue
 			continue
 		}
 		}
@@ -954,7 +1047,14 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			cpu = float64(n.Status.Capacity.Cpu().Value())
 			cpu = float64(n.Status.Capacity.Cpu().Value())
 			newCnode.VCPU = n.Status.Capacity.Cpu().String()
 			newCnode.VCPU = n.Status.Capacity.Cpu().String()
 		} else {
 		} else {
-			cpu, _ = strconv.ParseFloat(newCnode.VCPU, 64)
+			cpu, err = strconv.ParseFloat(newCnode.VCPU, 64)
+			if err != nil {
+				klog.V(1).Infof("[Warning] parsing VCPU value: \"%s\" as float64", newCnode.VCPU)
+			}
+		}
+		if math.IsNaN(cpu) {
+			klog.V(1).Infof("[Warning] cpu parsed as NaN. Setting to 0.")
+			cpu = 0
 		}
 		}
 
 
 		var ram float64
 		var ram float64
@@ -962,6 +1062,11 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 			newCnode.RAM = n.Status.Capacity.Memory().String()
 			newCnode.RAM = n.Status.Capacity.Memory().String()
 		}
 		}
 		ram = float64(n.Status.Capacity.Memory().Value())
 		ram = float64(n.Status.Capacity.Memory().Value())
+		if math.IsNaN(ram) {
+			klog.V(1).Infof("[Warning] ram parsed as NaN. Setting to 0.")
+			ram = 0
+		}
+
 		newCnode.RAMBytes = fmt.Sprintf("%f", ram)
 		newCnode.RAMBytes = fmt.Sprintf("%f", ram)
 
 
 		if newCnode.GPU != "" && newCnode.GPUCost == "" {
 		if newCnode.GPU != "" && newCnode.GPUCost == "" {
@@ -973,24 +1078,54 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 				klog.V(3).Infof("Could not parse default cpu price")
 				klog.V(3).Infof("Could not parse default cpu price")
 				return nil, err
 				return nil, err
 			}
 			}
+			if math.IsNaN(defaultCPU) {
+				klog.V(1).Infof("[Warning] defaultCPU parsed as NaN. Setting to 0.")
+				defaultCPU = 0
+			}
 
 
 			defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
 			defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
 			if err != nil {
 			if err != nil {
 				klog.V(3).Infof("Could not parse default ram price")
 				klog.V(3).Infof("Could not parse default ram price")
 				return nil, err
 				return nil, err
 			}
 			}
+			if math.IsNaN(defaultRAM) {
+				klog.V(1).Infof("[Warning] defaultRAM parsed as NaN. Setting to 0.")
+				defaultRAM = 0
+			}
 
 
 			defaultGPU, err := strconv.ParseFloat(cfg.GPU, 64)
 			defaultGPU, err := strconv.ParseFloat(cfg.GPU, 64)
 			if err != nil {
 			if err != nil {
 				klog.V(3).Infof("Could not parse default gpu price")
 				klog.V(3).Infof("Could not parse default gpu price")
 				return nil, err
 				return nil, err
 			}
 			}
+			if math.IsNaN(defaultGPU) {
+				klog.V(1).Infof("[Warning] defaultGPU parsed as NaN. Setting to 0.")
+				defaultGPU = 0
+			}
 
 
 			cpuToRAMRatio := defaultCPU / defaultRAM
 			cpuToRAMRatio := defaultCPU / defaultRAM
+			if math.IsNaN(cpuToRAMRatio) {
+				klog.V(1).Infof("[Warning] cpuToRAMRatio[defaultCPU: %f / defaultRam: %f] is NaN. Setting to 0.", defaultCPU, defaultRAM)
+				cpuToRAMRatio = 0
+			}
+
 			gpuToRAMRatio := defaultGPU / defaultRAM
 			gpuToRAMRatio := defaultGPU / defaultRAM
+			if math.IsNaN(gpuToRAMRatio) {
+				klog.V(1).Infof("[Warning] gpuToRAMRatio is NaN. Setting to 0.")
+				gpuToRAMRatio = 0
+			}
 
 
 			ramGB := ram / 1024 / 1024 / 1024
 			ramGB := ram / 1024 / 1024 / 1024
+			if math.IsNaN(ramGB) {
+				klog.V(1).Infof("[Warning] ramGB is NaN. Setting to 0.")
+				ramGB = 0
+			}
+
 			ramMultiple := gpuToRAMRatio + cpu*cpuToRAMRatio + ramGB
 			ramMultiple := gpuToRAMRatio + cpu*cpuToRAMRatio + ramGB
+			if math.IsNaN(ramMultiple) {
+				klog.V(1).Infof("[Warning] ramMultiple is NaN. Setting to 0.")
+				ramMultiple = 0
+			}
 
 
 			var nodePrice float64
 			var nodePrice float64
 			if newCnode.Cost != "" {
 			if newCnode.Cost != "" {
@@ -1006,8 +1141,17 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 					return nil, err
 					return nil, err
 				}
 				}
 			}
 			}
+			if math.IsNaN(nodePrice) {
+				klog.V(1).Infof("[Warning] nodePrice parsed as NaN. Setting to 0.")
+				nodePrice = 0
+			}
 
 
 			ramPrice := (nodePrice / ramMultiple)
 			ramPrice := (nodePrice / ramMultiple)
+			if math.IsNaN(ramPrice) {
+				klog.V(1).Infof("[Warning] ramPrice[nodePrice: %f / ramMultiple: %f] parsed as NaN. Setting to 0.", nodePrice, ramMultiple)
+				ramPrice = 0
+			}
+
 			cpuPrice := ramPrice * cpuToRAMRatio
 			cpuPrice := ramPrice * cpuToRAMRatio
 			gpuPrice := ramPrice * gpuToRAMRatio
 			gpuPrice := ramPrice * gpuToRAMRatio
 
 
@@ -1024,16 +1168,38 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 				klog.V(3).Infof("Could not parse default cpu price")
 				klog.V(3).Infof("Could not parse default cpu price")
 				return nil, err
 				return nil, err
 			}
 			}
+			if math.IsNaN(defaultCPU) {
+				klog.V(1).Infof("[Warning] defaultCPU parsed as NaN. Setting to 0.")
+				defaultCPU = 0
+			}
 
 
 			defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
 			defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
 			if err != nil {
 			if err != nil {
 				klog.V(3).Infof("Could not parse default ram price")
 				klog.V(3).Infof("Could not parse default ram price")
 				return nil, err
 				return nil, err
 			}
 			}
+			if math.IsNaN(defaultRAM) {
+				klog.V(1).Infof("[Warning] defaultRAM parsed as NaN. Setting to 0.")
+				defaultRAM = 0
+			}
 
 
 			cpuToRAMRatio := defaultCPU / defaultRAM
 			cpuToRAMRatio := defaultCPU / defaultRAM
+			if math.IsNaN(cpuToRAMRatio) {
+				klog.V(1).Infof("[Warning] cpuToRAMRatio[defaultCPU: %f / defaultRam: %f] is NaN. Setting to 0.", defaultCPU, defaultRAM)
+				cpuToRAMRatio = 0
+			}
+
 			ramGB := ram / 1024 / 1024 / 1024
 			ramGB := ram / 1024 / 1024 / 1024
+			if math.IsNaN(ramGB) {
+				klog.V(1).Infof("[Warning] ramGB is NaN. Setting to 0.")
+				ramGB = 0
+			}
+
 			ramMultiple := cpu*cpuToRAMRatio + ramGB
 			ramMultiple := cpu*cpuToRAMRatio + ramGB
+			if math.IsNaN(ramMultiple) {
+				klog.V(1).Infof("[Warning] ramMultiple is NaN. Setting to 0.")
+				ramMultiple = 0
+			}
 
 
 			var nodePrice float64
 			var nodePrice float64
 			if newCnode.Cost != "" {
 			if newCnode.Cost != "" {
@@ -1049,8 +1215,17 @@ func (cm *CostModel) GetNodeCost(cp costAnalyzerCloud.Provider) (map[string]*cos
 					return nil, err
 					return nil, err
 				}
 				}
 			}
 			}
+			if math.IsNaN(nodePrice) {
+				klog.V(1).Infof("[Warning] nodePrice parsed as NaN. Setting to 0.")
+				nodePrice = 0
+			}
 
 
 			ramPrice := (nodePrice / ramMultiple)
 			ramPrice := (nodePrice / ramMultiple)
+			if math.IsNaN(ramPrice) {
+				klog.V(1).Infof("[Warning] ramPrice[nodePrice: %f / ramMultiple: %f] parsed as NaN. Setting to 0.", nodePrice, ramMultiple)
+				ramPrice = 0
+			}
+
 			cpuPrice := ramPrice * cpuToRAMRatio
 			cpuPrice := ramPrice * cpuToRAMRatio
 
 
 			newCnode.VCPUCost = fmt.Sprintf("%f", cpuPrice)
 			newCnode.VCPUCost = fmt.Sprintf("%f", cpuPrice)
@@ -1300,6 +1475,11 @@ func costDataPassesFilters(costs *CostData, namespace string, cluster string) bo
 	return passesNamespace && passesCluster
 	return passesNamespace && passesCluster
 }
 }
 
 
+// Finds the a closest multiple less than value
+func floorMultiple(value int64, multiple int64) int64 {
+	return (value / multiple) * multiple
+}
+
 // Attempt to create a key for the request. Reduce the times to minutes in order to more easily group requests based on
 // Attempt to create a key for the request. Reduce the times to minutes in order to more easily group requests based on
 // real time ranges. If for any reason, the key generation fails, return a uuid to ensure uniqueness.
 // real time ranges. If for any reason, the key generation fails, return a uuid to ensure uniqueness.
 func requestKeyFor(startString string, endString string, windowString string, filterNamespace string, filterCluster string, remoteEnabled bool) string {
 func requestKeyFor(startString string, endString string, windowString string, filterNamespace string, filterCluster string, remoteEnabled bool) string {
@@ -1308,13 +1488,26 @@ func requestKeyFor(startString string, endString string, windowString string, fi
 
 
 	sTime, err := time.Parse(fullLayout, startString)
 	sTime, err := time.Parse(fullLayout, startString)
 	if err != nil {
 	if err != nil {
+		klog.V(1).Infof("[Warning] Start=%s failed to parse when generating request key: %s", startString, err.Error())
 		return uuid.New().String()
 		return uuid.New().String()
 	}
 	}
-	eTime, err := time.Parse(fullLayout, startString)
+	eTime, err := time.Parse(fullLayout, endString)
 	if err != nil {
 	if err != nil {
+		klog.V(1).Infof("[Warning] End=%s failed to parse when generating request key: %s", endString, err.Error())
 		return uuid.New().String()
 		return uuid.New().String()
 	}
 	}
 
 
+	// We "snap" start time and duration to their closest 5 min multiple less than itself, by
+	// applying a snapped duration to a snapped start time.
+	durMins := int64(eTime.Sub(sTime).Minutes())
+	durMins = floorMultiple(durMins, 5)
+
+	sMins := int64(sTime.Minute())
+	sOffset := sMins - floorMultiple(sMins, 5)
+
+	sTime = sTime.Add(-time.Duration(sOffset) * time.Minute)
+	eTime = sTime.Add(time.Duration(durMins) * time.Minute)
+
 	startKey := sTime.Format(keyLayout)
 	startKey := sTime.Format(keyLayout)
 	endKey := eTime.Format(keyLayout)
 	endKey := eTime.Format(keyLayout)
 
 
@@ -1323,7 +1516,7 @@ func requestKeyFor(startString string, endString string, windowString string, fi
 
 
 // Executes a range query for cost data
 // Executes a range query for cost data
 func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider,
 func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider,
-	startString, endString, windowString string, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
+	startString, endString, windowString string, resolutionHours float64, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
 	// Create a request key for request grouping. This key will be used to represent the cost-model result
 	// Create a request key for request grouping. This key will be used to represent the cost-model result
 	// for the specific inputs to prevent multiple queries for identical data.
 	// for the specific inputs to prevent multiple queries for identical data.
 	key := requestKeyFor(startString, endString, windowString, filterNamespace, filterCluster, remoteEnabled)
 	key := requestKeyFor(startString, endString, windowString, filterNamespace, filterCluster, remoteEnabled)
@@ -1333,7 +1526,7 @@ func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset
 	// If there is already a request out that uses the same data, wait for it to return to share the results.
 	// If there is already a request out that uses the same data, wait for it to return to share the results.
 	// Otherwise, start executing.
 	// Otherwise, start executing.
 	result, err, _ := cm.RequestGroup.Do(key, func() (interface{}, error) {
 	result, err, _ := cm.RequestGroup.Do(key, func() (interface{}, error) {
-		return cm.costDataRange(cli, clientset, cp, startString, endString, windowString, filterNamespace, filterCluster, remoteEnabled)
+		return cm.costDataRange(cli, clientset, cp, startString, endString, windowString, resolutionHours, filterNamespace, filterCluster, remoteEnabled)
 	})
 	})
 
 
 	data, ok := result.(map[string]*CostData)
 	data, ok := result.(map[string]*CostData)
@@ -1345,7 +1538,7 @@ func (cm *CostModel) ComputeCostDataRange(cli prometheusClient.Client, clientset
 }
 }
 
 
 func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider,
 func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cp costAnalyzerCloud.Provider,
-	startString, endString, windowString string, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
+	startString, endString, windowString string, resolutionHours float64, filterNamespace string, filterCluster string, remoteEnabled bool) (map[string]*CostData, error) {
 	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, windowString, "", windowString, "")
 	queryRAMRequests := fmt.Sprintf(queryRAMRequestsStr, windowString, "", windowString, "")
 	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, windowString, "", windowString, "")
 	queryRAMUsage := fmt.Sprintf(queryRAMUsageStr, windowString, "", windowString, "")
 	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, windowString, "", windowString, "")
 	queryCPURequests := fmt.Sprintf(queryCPURequestsStr, windowString, "", windowString, "")
@@ -1357,7 +1550,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, windowString, "")
 	queryNetZoneRequests := fmt.Sprintf(queryZoneNetworkUsage, windowString, "")
 	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, windowString, "")
 	queryNetRegionRequests := fmt.Sprintf(queryRegionNetworkUsage, windowString, "")
 	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, windowString, "")
 	queryNetInternetRequests := fmt.Sprintf(queryInternetNetworkUsage, windowString, "")
-	normalization := fmt.Sprintf(normalizationStr, windowString, "")
+	normalization := fmt.Sprintf(normalizationStr, windowString, "", resolutionHours)
 
 
 	layout := "2006-01-02T15:04:05.000Z"
 	layout := "2006-01-02T15:04:05.000Z"
 
 
@@ -1378,6 +1571,8 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 	}
 	}
 	clusterID := os.Getenv(clusterIDKey)
 	clusterID := os.Getenv(clusterIDKey)
 
 
+	durHrs := end.Sub(start).Hours() + 1
+
 	if remoteEnabled == true {
 	if remoteEnabled == true {
 		remoteLayout := "2006-01-02T15:04:05Z"
 		remoteLayout := "2006-01-02T15:04:05Z"
 		remoteStartStr := start.Format(remoteLayout)
 		remoteStartStr := start.Format(remoteLayout)
@@ -1386,122 +1581,204 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		return CostDataRangeFromSQL("", "", windowString, remoteStartStr, remoteEndStr)
 		return CostDataRangeFromSQL("", "", windowString, remoteStartStr, remoteEndStr)
 	}
 	}
 
 
+	numQueries := 20
+
 	var wg sync.WaitGroup
 	var wg sync.WaitGroup
-	wg.Add(20)
+	wg.Add(numQueries)
+
+	queryProfileStart := time.Now()
+	queryProfileCh := make(chan string, numQueries)
 
 
-	var promErr error
+	var ec ErrorCollector
 	var resultRAMRequests interface{}
 	var resultRAMRequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "RAMRequests", queryProfileCh)
 
 
+		var promErr error
 		resultRAMRequests, promErr = QueryRange(cli, queryRAMRequests, start, end, window)
 		resultRAMRequests, promErr = QueryRange(cli, queryRAMRequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultRAMUsage interface{}
 	var resultRAMUsage interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "RAMUsage", queryProfileCh)
 
 
+		var promErr error
 		resultRAMUsage, promErr = QueryRange(cli, queryRAMUsage, start, end, window)
 		resultRAMUsage, promErr = QueryRange(cli, queryRAMUsage, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultCPURequests interface{}
 	var resultCPURequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "CPURequests", queryProfileCh)
 
 
+		var promErr error
 		resultCPURequests, promErr = QueryRange(cli, queryCPURequests, start, end, window)
 		resultCPURequests, promErr = QueryRange(cli, queryCPURequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultCPUUsage interface{}
 	var resultCPUUsage interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "CPUUsage", queryProfileCh)
 
 
+		var promErr error
 		resultCPUUsage, promErr = QueryRange(cli, queryCPUUsage, start, end, window)
 		resultCPUUsage, promErr = QueryRange(cli, queryCPUUsage, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultRAMAllocations interface{}
 	var resultRAMAllocations interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "RAMAllocations", queryProfileCh)
 
 
+		var promErr error
 		resultRAMAllocations, promErr = QueryRange(cli, queryRAMAlloc, start, end, window)
 		resultRAMAllocations, promErr = QueryRange(cli, queryRAMAlloc, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultCPUAllocations interface{}
 	var resultCPUAllocations interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "CPUAllocations", queryProfileCh)
 
 
+		var promErr error
 		resultCPUAllocations, promErr = QueryRange(cli, queryCPUAlloc, start, end, window)
 		resultCPUAllocations, promErr = QueryRange(cli, queryCPUAlloc, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultGPURequests interface{}
 	var resultGPURequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "GPURequests", queryProfileCh)
 
 
+		var promErr error
 		resultGPURequests, promErr = QueryRange(cli, queryGPURequests, start, end, window)
 		resultGPURequests, promErr = QueryRange(cli, queryGPURequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultPVRequests interface{}
 	var resultPVRequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "PVRequests", queryProfileCh)
 
 
+		var promErr error
 		resultPVRequests, promErr = QueryRange(cli, queryPVRequests, start, end, window)
 		resultPVRequests, promErr = QueryRange(cli, queryPVRequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultNetZoneRequests interface{}
 	var resultNetZoneRequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "NetZoneRequests", queryProfileCh)
 
 
+		var promErr error
 		resultNetZoneRequests, promErr = QueryRange(cli, queryNetZoneRequests, start, end, window)
 		resultNetZoneRequests, promErr = QueryRange(cli, queryNetZoneRequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultNetRegionRequests interface{}
 	var resultNetRegionRequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "NetRegionRequests", queryProfileCh)
 
 
+		var promErr error
 		resultNetRegionRequests, promErr = QueryRange(cli, queryNetRegionRequests, start, end, window)
 		resultNetRegionRequests, promErr = QueryRange(cli, queryNetRegionRequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var resultNetInternetRequests interface{}
 	var resultNetInternetRequests interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "NetInternetRequests", queryProfileCh)
 
 
+		var promErr error
 		resultNetInternetRequests, promErr = QueryRange(cli, queryNetInternetRequests, start, end, window)
 		resultNetInternetRequests, promErr = QueryRange(cli, queryNetInternetRequests, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var pvPodAllocationResults interface{}
 	var pvPodAllocationResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "PVPodAllocation", queryProfileCh)
 
 
+		var promErr error
 		pvPodAllocationResults, promErr = QueryRange(cli, fmt.Sprintf(queryPVCAllocation, windowString), start, end, window)
 		pvPodAllocationResults, promErr = QueryRange(cli, fmt.Sprintf(queryPVCAllocation, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var pvCostResults interface{}
 	var pvCostResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "PVCost", queryProfileCh)
 
 
+		var promErr error
 		pvCostResults, promErr = QueryRange(cli, fmt.Sprintf(queryPVHourlyCost, windowString), start, end, window)
 		pvCostResults, promErr = QueryRange(cli, fmt.Sprintf(queryPVHourlyCost, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var nsLabelsResults interface{}
 	var nsLabelsResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "NSLabels", queryProfileCh)
 
 
+		var promErr error
 		nsLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryNSLabels, windowString), start, end, window)
 		nsLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryNSLabels, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var podLabelsResults interface{}
 	var podLabelsResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "PodLabels", queryProfileCh)
 
 
+		var promErr error
 		podLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryPodLabels, windowString), start, end, window)
 		podLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryPodLabels, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var serviceLabelsResults interface{}
 	var serviceLabelsResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "ServiceLabels", queryProfileCh)
 
 
+		var promErr error
 		serviceLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryServiceLabels, windowString), start, end, window)
 		serviceLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryServiceLabels, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var deploymentLabelsResults interface{}
 	var deploymentLabelsResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "DeploymentLabels", queryProfileCh)
 
 
+		var promErr error
 		deploymentLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryDeploymentLabels, windowString), start, end, window)
 		deploymentLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryDeploymentLabels, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var statefulsetLabelsResults interface{}
 	var statefulsetLabelsResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "StatefulSetLabels", queryProfileCh)
+
+		var promErr error
 		statefulsetLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryStatefulsetLabels, windowString), start, end, window)
 		statefulsetLabelsResults, promErr = QueryRange(cli, fmt.Sprintf(queryStatefulsetLabels, windowString), start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 	var normalizationResults interface{}
 	var normalizationResults interface{}
 	go func() {
 	go func() {
 		defer wg.Done()
 		defer wg.Done()
+		defer measureTimeAsync(time.Now(), "Normalization", queryProfileCh)
 
 
+		var promErr error
 		normalizationResults, promErr = QueryRange(cli, normalization, start, end, window)
 		normalizationResults, promErr = QueryRange(cli, normalization, start, end, window)
+
+		ec.Report(promErr)
 	}()
 	}()
 
 
 	podDeploymentsMapping := make(map[string]map[string][]string)
 	podDeploymentsMapping := make(map[string]map[string][]string)
@@ -1535,30 +1812,48 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 
 
 	wg.Wait()
 	wg.Wait()
 
 
-	defer measureTime(time.Now(), "costDataRange: Processing Query Data")
+	// collect all query profiling messages
+	close(queryProfileCh)
+	queryProfileBreakdown := ""
+	for msg := range queryProfileCh {
+		queryProfileBreakdown += "\n - " + msg
+	}
+	measureTime(queryProfileStart, fmt.Sprintf("costDataRange(%fh): Prom/k8s Queries: %s", durHrs, queryProfileBreakdown))
+
+	defer measureTime(time.Now(), fmt.Sprintf("costDataRange(%fh): Processing Query Data", durHrs))
 
 
-	if promErr != nil {
-		return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
+	if ec.IsError() {
+		for _, promErr := range ec.Errors() {
+			klog.V(1).Infof("[Warning] Query Error: %s", promErr.Error())
+		}
+		// TODO: Categorize fatal prometheus query failures
+		// return nil, fmt.Errorf("Error querying prometheus: %s", promErr.Error())
 	}
 	}
 	if k8sErr != nil {
 	if k8sErr != nil {
 		return nil, fmt.Errorf("Error querying the kubernetes api: %s", k8sErr.Error())
 		return nil, fmt.Errorf("Error querying the kubernetes api: %s", k8sErr.Error())
 	}
 	}
 
 
+	profileStart := time.Now()
+
 	normalizationValue, err := getNormalizations(normalizationResults)
 	normalizationValue, err := getNormalizations(normalizationResults)
 	if err != nil {
 	if err != nil {
 		return nil, fmt.Errorf("error computing normalization for start=%s, end=%s, window=%s: %s",
 		return nil, fmt.Errorf("error computing normalization for start=%s, end=%s, window=%s: %s",
 			start, end, window, err.Error())
 			start, end, window, err.Error())
 	}
 	}
 
 
-	profileStart := time.Now()
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): compute normalizations", durHrs))
+
+	profileStart = time.Now()
 
 
 	nodes, err := cm.GetNodeCost(cp)
 	nodes, err := cm.GetNodeCost(cp)
 	if err != nil {
 	if err != nil {
-		klog.V(1).Infof("Warning, no cost model available: " + err.Error())
+		klog.V(1).Infof("[Warning] no cost model available: " + err.Error())
 		return nil, err
 		return nil, err
 	}
 	}
 
 
-	measureTime(profileStart, "GetNodeCost")
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): GetNodeCost", durHrs))
+
+	profileStart = time.Now()
 
 
 	pvClaimMapping, err := GetPVInfo(resultPVRequests, clusterID)
 	pvClaimMapping, err := GetPVInfo(resultPVRequests, clusterID)
 	if err != nil {
 	if err != nil {
@@ -1585,6 +1880,10 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		addMetricPVData(pvAllocationMapping, pvCostMapping, cp)
 		addMetricPVData(pvAllocationMapping, pvCostMapping, cp)
 	}
 	}
 
 
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): process PV data", durHrs))
+
+	profileStart = time.Now()
+
 	nsLabels, err := GetNamespaceLabelsMetrics(nsLabelsResults, clusterID)
 	nsLabels, err := GetNamespaceLabelsMetrics(nsLabelsResults, clusterID)
 	if err != nil {
 	if err != nil {
 		klog.V(1).Infof("Unable to get Namespace Labels for Metrics: %s", err.Error())
 		klog.V(1).Infof("Unable to get Namespace Labels for Metrics: %s", err.Error())
@@ -1612,6 +1911,11 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 	if err != nil {
 	if err != nil {
 		klog.V(1).Infof("Unable to get Deployment Match Labels for Metrics: %s", err.Error())
 		klog.V(1).Infof("Unable to get Deployment Match Labels for Metrics: %s", err.Error())
 	}
 	}
+
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): process labels", durHrs))
+
+	profileStart = time.Now()
+
 	podStatefulsetMetricsMapping, err := getPodDeploymentsWithMetrics(statefulsetLabels, podLabels)
 	podStatefulsetMetricsMapping, err := getPodDeploymentsWithMetrics(statefulsetLabels, podLabels)
 	if err != nil {
 	if err != nil {
 		klog.V(1).Infof("Unable to get match Statefulset Labels Metrics to Pods: %s", err.Error())
 		klog.V(1).Infof("Unable to get match Statefulset Labels Metrics to Pods: %s", err.Error())
@@ -1636,6 +1940,8 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		networkUsageMap = make(map[string]*NetworkUsageData)
 		networkUsageMap = make(map[string]*NetworkUsageData)
 	}
 	}
 
 
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): process deployments, services, and network usage", durHrs))
+
 	profileStart = time.Now()
 	profileStart = time.Now()
 
 
 	containerNameCost := make(map[string]*CostData)
 	containerNameCost := make(map[string]*CostData)
@@ -1694,7 +2000,9 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		containers[key] = true
 		containers[key] = true
 	}
 	}
 
 
-	measureTime(profileStart, "GetContainerMetricVectors")
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): GetContainerMetricVectors", durHrs))
+
+	profileStart = time.Now()
 
 
 	// Request metrics can show up after pod eviction and completion.
 	// Request metrics can show up after pod eviction and completion.
 	// This method synchronizes requests to allocations such that when
 	// This method synchronizes requests to allocations such that when
@@ -1717,6 +2025,8 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		}
 		}
 	}
 	}
 
 
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): applyAllocationToRequests", durHrs))
+
 	profileStart = time.Now()
 	profileStart = time.Now()
 
 
 	missingNodes := make(map[string]*costAnalyzerCloud.Node)
 	missingNodes := make(map[string]*costAnalyzerCloud.Node)
@@ -2018,7 +2328,7 @@ func (cm *CostModel) costDataRange(cli prometheusClient.Client, clientset kubern
 		}
 		}
 	}
 	}
 
 
-	measureTime(profileStart, "Build CostData map")
+	measureTime(profileStart, fmt.Sprintf("costDataRange(%fh): build CostData map", durHrs))
 
 
 	w := end.Sub(start)
 	w := end.Sub(start)
 	w += window
 	w += window
@@ -2442,5 +2752,9 @@ func wrapPrometheusError(qr interface{}) (string, error) {
 func measureTime(start time.Time, name string) {
 func measureTime(start time.Time, name string) {
 	elapsed := time.Since(start)
 	elapsed := time.Since(start)
 
 
-	klog.V(3).Infof("[Profiler] %s took %s", name, elapsed)
+	klog.V(3).Infof("[Profiler] %s: %s", elapsed, name)
+}
+
+func measureTimeAsync(start time.Time, name string, ch chan string) {
+	ch <- fmt.Sprintf("%s took %s", name, time.Since(start))
 }
 }

+ 131 - 0
costmodel/pool.go

@@ -0,0 +1,131 @@
+package costmodel
+
+import (
+	"sync"
+)
+
+// A pool of vector maps for mapping float64 timestamps
+// to float64 values
+type VectorMapPool interface {
+	Get() map[float64]float64
+	Put(map[float64]float64)
+}
+
+// ------------
+
+// A buffered channel implementation of a vector map pool which
+// controls the total number of maps allowed in/out of the pool
+// at any given moment. Attempting to Get() with no available
+// maps will block until one is available. You will be unable to
+// Put() a map if the buffer is full.
+type FixedMapPool struct {
+	maps chan map[float64]float64
+	size int
+}
+
+// Returns a map from the pool. Blocks if no maps are available for re-use
+func (mp *FixedMapPool) Get() map[float64]float64 {
+	return <-mp.maps
+}
+
+// Adds a map back to the pool if there is room. Does not block on overflow.
+func (mp *FixedMapPool) Put(m map[float64]float64) {
+	if len(mp.maps) >= mp.size {
+		return
+	}
+
+	for k := range m {
+		delete(m, k)
+	}
+
+	mp.maps <- m
+}
+
+// Creates a new fixed map pool which maintains a fixed pool size
+func NewFixedMapPool(size int) VectorMapPool {
+	mp := &FixedMapPool{
+		maps: make(chan map[float64]float64, size),
+		size: size,
+	}
+
+	// Pre-Populate the buffer with maps
+	for i := 0; i < size; i++ {
+		mp.maps <- make(map[float64]float64)
+	}
+
+	return mp
+}
+
+// ------------
+
+// A buffered channel implementation of a vector map pool which
+// controls the total number of maps allowed in/out of the pool
+// at any given moment. Unlike the FixedMapPool, this pool will
+// not block if maps are over requested, but will only maintain
+// a buffer up the size limitation.
+type FlexibleMapPool struct {
+	maps chan map[float64]float64
+}
+
+// Returns a map from the pool. Does not block on over-request.
+func (mp *FlexibleMapPool) Get() map[float64]float64 {
+	select {
+	case m := <-mp.maps:
+		return m
+	default:
+		return make(map[float64]float64)
+	}
+}
+
+// Adds a map back to the pool if there is room. Does not block on overflow.
+func (mp *FlexibleMapPool) Put(m map[float64]float64) {
+	for k := range m {
+		delete(m, k)
+	}
+
+	// Either return the map to the buffered channel, or do nothing
+	select {
+	case mp.maps <- m:
+	default:
+	}
+}
+
+// Creates a new fixed map pool which maintains a fixed pool size
+func NewFlexibleMapPool(size int) VectorMapPool {
+	return &FlexibleMapPool{
+		maps: make(chan map[float64]float64, size),
+	}
+}
+
+// ------------
+
+// Implementation backed by sync.Pool
+type UnboundedMapPool struct {
+	maps *sync.Pool
+}
+
+// Returns a map from the pool. Does not block on over-request.
+func (mp *UnboundedMapPool) Get() map[float64]float64 {
+	return mp.maps.Get().(map[float64]float64)
+}
+
+// Adds a map back to the pool if there is room. Does not block on overflow.
+func (mp *UnboundedMapPool) Put(m map[float64]float64) {
+	for k := range m {
+		delete(m, k)
+	}
+
+	mp.maps.Put(m)
+}
+
+// Creates a new unbounded map pool which allows the runtime to decide when
+// pooled values should be evicted
+func NewUnboundedMapPool() VectorMapPool {
+	return &UnboundedMapPool{
+		maps: &sync.Pool{
+			New: func() interface{} {
+				return make(map[float64]float64)
+			},
+		},
+	}
+}

+ 26 - 5
costmodel/promparsers.go

@@ -96,6 +96,9 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 			return nil, fmt.Errorf("Metric field is improperly formatted")
 			return nil, fmt.Errorf("Metric field is improperly formatted")
 		}
 		}
 
 
+		// Wrap execution of this lazily in case the data is not used
+		labels := func() string { return labelsForMetric(metricMap) }
+
 		// Determine if the result is a ranged data set or single value
 		// Determine if the result is a ranged data set or single value
 		_, isRange := resultInterface["values"]
 		_, isRange := resultInterface["values"]
 
 
@@ -106,7 +109,7 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 				return nil, fmt.Errorf("Value field does not exist in data result vector")
 				return nil, fmt.Errorf("Value field does not exist in data result vector")
 			}
 			}
 
 
-			v, err := parseDataPoint(dataPoint)
+			v, err := parseDataPoint(dataPoint, labels)
 			if err != nil {
 			if err != nil {
 				return nil, err
 				return nil, err
 			}
 			}
@@ -118,7 +121,7 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 			}
 			}
 
 
 			for _, value := range values {
 			for _, value := range values {
-				v, err := parseDataPoint(value)
+				v, err := parseDataPoint(value, labels)
 				if err != nil {
 				if err != nil {
 					return nil, err
 					return nil, err
 				}
 				}
@@ -136,7 +139,7 @@ func NewQueryResults(queryResult interface{}) ([]*PromQueryResult, error) {
 	return result, nil
 	return result, nil
 }
 }
 
 
-func parseDataPoint(dataPoint interface{}) (*Vector, error) {
+func parseDataPoint(dataPoint interface{}, labels func() string) (*Vector, error) {
 	value, ok := dataPoint.([]interface{})
 	value, ok := dataPoint.([]interface{})
 	if !ok || len(value) != 2 {
 	if !ok || len(value) != 2 {
 		return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
 		return nil, fmt.Errorf("Improperly formatted datapoint from Prometheus")
@@ -148,6 +151,15 @@ func parseDataPoint(dataPoint interface{}) (*Vector, error) {
 		return nil, err
 		return nil, err
 	}
 	}
 
 
+	// Test for +Inf and -Inf (sign: 0), Test for NaN
+	if math.IsInf(v, 0) {
+		klog.V(1).Infof("[Warning] Found Inf value parsing vector data point for metric: %s", labels())
+		v = 0.0
+	} else if math.IsNaN(v) {
+		klog.V(1).Infof("[Warning] Found NaN value parsing vector data point for metric: %s", labels())
+		v = 0.0
+	}
+
 	return &Vector{
 	return &Vector{
 		Timestamp: math.Round(value[0].(float64)/10) * 10,
 		Timestamp: math.Round(value[0].(float64)/10) * 10,
 		Value:     v,
 		Value:     v,
@@ -179,14 +191,14 @@ func GetPVInfo(qr interface{}, defaultClusterID string) (map[string]*PersistentV
 
 
 		volumeName, err := val.GetString("volumename")
 		volumeName, err := val.GetString("volumename")
 		if err != nil {
 		if err != nil {
-			klog.V(4).Infof("Warning: Unfulfilled claim %s: volumename field does not exist in data result vector", pvcName)
+			klog.V(4).Infof("[Warning] Unfulfilled claim %s: volumename field does not exist in data result vector", pvcName)
 			volumeName = ""
 			volumeName = ""
 		}
 		}
 
 
 		pvClass, err := val.GetString("storageclass")
 		pvClass, err := val.GetString("storageclass")
 		if err != nil {
 		if err != nil {
 			// TODO: We need to look up the actual PV and PV capacity. For now just proceed with "".
 			// TODO: We need to look up the actual PV and PV capacity. For now just proceed with "".
-			klog.V(2).Infof("Storage Class not found for claim \"%s/%s\".", ns, pvcName)
+			klog.V(2).Infof("[Warning] Storage Class not found for claim \"%s/%s\".", ns, pvcName)
 			pvClass = ""
 			pvClass = ""
 		}
 		}
 
 
@@ -429,3 +441,12 @@ func GetServiceSelectorLabelsMetrics(queryResult interface{}, defaultClusterID s
 
 
 	return toReturn, nil
 	return toReturn, nil
 }
 }
+
+func labelsForMetric(metricMap map[string]interface{}) string {
+	var pairs []string
+	for k, v := range metricMap {
+		pairs = append(pairs, fmt.Sprintf("%s: %+v", k, v))
+	}
+
+	return fmt.Sprintf("{%s}", strings.Join(pairs, ", "))
+}

+ 34 - 2
costmodel/router.go

@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"encoding/json"
 	"flag"
 	"flag"
 	"fmt"
 	"fmt"
+	"math"
 	"net"
 	"net"
 	"net/http"
 	"net/http"
 	"os"
 	"os"
@@ -359,7 +360,8 @@ func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps
 		pClient = a.PrometheusClient
 		pClient = a.PrometheusClient
 	}
 	}
 
 
-	data, err := a.Model.ComputeCostDataRange(pClient, a.KubeClientSet, a.Cloud, start, end, window, namespace, cluster, remoteEnabled)
+	resolutionHours := 1.0
+	data, err := a.Model.ComputeCostDataRange(pClient, a.KubeClientSet, a.Cloud, start, end, window, resolutionHours, namespace, cluster, remoteEnabled)
 	if err != nil {
 	if err != nil {
 		w.Write(WrapData(nil, err))
 		w.Write(WrapData(nil, err))
 	}
 	}
@@ -616,6 +618,8 @@ func (a *Accesses) recordPrices() {
 				podStatus[pod.Name] = pod.Status.Phase
 				podStatus[pod.Name] = pod.Status.Phase
 			}
 			}
 
 
+			cfg, _ := a.Cloud.GetConfig()
+
 			// Record network pricing at global scope
 			// Record network pricing at global scope
 			networkCosts, err := a.Cloud.NetworkPricing()
 			networkCosts, err := a.Cloud.NetworkPricing()
 			if err != nil {
 			if err != nil {
@@ -635,12 +639,40 @@ func (a *Accesses) recordPrices() {
 
 
 			nodes, err := a.Model.GetNodeCost(a.Cloud)
 			nodes, err := a.Model.GetNodeCost(a.Cloud)
 			for nodeName, node := range nodes {
 			for nodeName, node := range nodes {
+				// Emit costs, guarding against NaN inputs for custom pricing.
 				cpuCost, _ := strconv.ParseFloat(node.VCPUCost, 64)
 				cpuCost, _ := strconv.ParseFloat(node.VCPUCost, 64)
+				if math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) {
+					cpuCost, _ = strconv.ParseFloat(cfg.CPU, 64)
+					if  math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) {
+						cpuCost = 0
+					}
+				}
 				cpu, _ := strconv.ParseFloat(node.VCPU, 64)
 				cpu, _ := strconv.ParseFloat(node.VCPU, 64)
+				if math.IsNaN(cpu) || math.IsInf(cpu, 0) {
+					cpu = 1 // Assume 1 CPU
+				}
 				ramCost, _ := strconv.ParseFloat(node.RAMCost, 64)
 				ramCost, _ := strconv.ParseFloat(node.RAMCost, 64)
+				if math.IsNaN(ramCost) || math.IsInf(ramCost, 0) {
+					ramCost, _ = strconv.ParseFloat(cfg.RAM, 64)
+					if  math.IsNaN(ramCost) || math.IsInf(ramCost, 0) {
+						ramCost = 0
+					}
+				}
 				ram, _ := strconv.ParseFloat(node.RAMBytes, 64)
 				ram, _ := strconv.ParseFloat(node.RAMBytes, 64)
+				if math.IsNaN(ram) || math.IsInf(ram, 0) {
+					ram = 0
+				}
 				gpu, _ := strconv.ParseFloat(node.GPU, 64)
 				gpu, _ := strconv.ParseFloat(node.GPU, 64)
+				if math.IsNaN(gpu) || math.IsInf(gpu, 0) {
+					gpu = 0
+				}
 				gpuCost, _ := strconv.ParseFloat(node.GPUCost, 64)
 				gpuCost, _ := strconv.ParseFloat(node.GPUCost, 64)
+				if math.IsNaN(gpuCost) || math.IsInf(gpuCost, 0) {
+					gpuCost, _ = strconv.ParseFloat(cfg.GPU, 64)
+					if  math.IsNaN(gpuCost) || math.IsInf(gpuCost, 0) {
+						gpuCost = 0
+					}
+				}
 
 
 				totalCost := cpu*cpuCost + ramCost*(ram/1024/1024/1024) + gpu*gpuCost
 				totalCost := cpu*cpuCost + ramCost*(ram/1024/1024/1024) + gpu*gpuCost
 
 
@@ -983,7 +1015,7 @@ func Initialize() {
 
 
 			_, err = ValidatePrometheus(thanosCli, true)
 			_, err = ValidatePrometheus(thanosCli, true)
 			if err != nil {
 			if err != nil {
-				klog.V(1).Infof("Warning: Failed to query Thanos at %s. Error: %s.", thanosUrl, err.Error())
+				klog.V(1).Infof("[Warning] Failed to query Thanos at %s. Error: %s.", thanosUrl, err.Error())
 				A.ThanosClient = thanosCli
 				A.ThanosClient = thanosCli
 			} else {
 			} else {
 				klog.V(1).Info("Success: retrieved the 'up' query against Thanos at: " + thanosUrl)
 				klog.V(1).Info("Success: retrieved the 'up' query against Thanos at: " + thanosUrl)

+ 57 - 27
costmodel/vector.go

@@ -10,6 +10,16 @@ type Vector struct {
 	Value     float64 `json:"value"`
 	Value     float64 `json:"value"`
 }
 }
 
 
+const MapPoolSize = 4
+
+type VectorSlice []*Vector
+
+func (p VectorSlice) Len() int           { return len(p) }
+func (p VectorSlice) Less(i, j int) bool { return p[i].Timestamp < p[j].Timestamp }
+func (p VectorSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+var mapPool VectorMapPool = NewFlexibleMapPool(MapPoolSize)
+
 // roundTimestamp rounds the given timestamp to the given precision; e.g. a
 // roundTimestamp rounds the given timestamp to the given precision; e.g. a
 // timestamp given in seconds, rounded to precision 10, will be rounded
 // timestamp given in seconds, rounded to precision 10, will be rounded
 // to the nearest value dividible by 10 (24 goes to 20, but 25 goes to 30).
 // to the nearest value dividible by 10 (24 goes to 20, but 25 goes to 30).
@@ -17,21 +27,21 @@ func roundTimestamp(ts float64, precision float64) float64 {
 	return math.Round(ts/precision) * precision
 	return math.Round(ts/precision) * precision
 }
 }
 
 
+// Makes a reasonable guess at capacity for vector join
+func capacityFor(xvs []*Vector, yvs []*Vector) int {
+	x := len(xvs)
+	y := len(yvs)
+
+	if x >= y {
+		return x + (y / 4)
+	}
+
+	return y + (x / 4)
+}
+
 // ApplyVectorOp accepts two vectors, synchronizes timestamps, and executes an operation
 // ApplyVectorOp accepts two vectors, synchronizes timestamps, and executes an operation
 // on each vector. See VectorJoinOp for details.
 // on each vector. See VectorJoinOp for details.
 func ApplyVectorOp(xvs []*Vector, yvs []*Vector, op VectorJoinOp) []*Vector {
 func ApplyVectorOp(xvs []*Vector, yvs []*Vector, op VectorJoinOp) []*Vector {
-	// round all non-zero timestamps to the nearest 10 second mark
-	for _, yv := range yvs {
-		if yv.Timestamp != 0 {
-			yv.Timestamp = roundTimestamp(yv.Timestamp, 10.0)
-		}
-	}
-	for _, xv := range xvs {
-		if xv.Timestamp != 0 {
-			xv.Timestamp = roundTimestamp(xv.Timestamp, 10.0)
-		}
-	}
-
 	// if xvs is empty, return yvs
 	// if xvs is empty, return yvs
 	if xvs == nil || len(xvs) == 0 {
 	if xvs == nil || len(xvs) == 0 {
 		return yvs
 		return yvs
@@ -42,47 +52,67 @@ func ApplyVectorOp(xvs []*Vector, yvs []*Vector, op VectorJoinOp) []*Vector {
 		return xvs
 		return xvs
 	}
 	}
 
 
-	// result contains the final vector slice after joining xvs and yvs
-	var result []*Vector
-
-	// timestamps stores all timestamps present in both vector slices
-	// without duplicates
-	var timestamps []float64
+	// timestamps contains the vector slice after joining xvs and yvs
+	var timestamps []*Vector
 
 
 	// turn each vector slice into a map of timestamp-to-value so that
 	// turn each vector slice into a map of timestamp-to-value so that
 	// values at equal timestamps can be lined-up and summed
 	// values at equal timestamps can be lined-up and summed
-	xMap := make(map[float64]float64)
+	xMap := mapPool.Get()
+	defer mapPool.Put(xMap)
+
 	for _, xv := range xvs {
 	for _, xv := range xvs {
 		if xv.Timestamp == 0 {
 		if xv.Timestamp == 0 {
 			continue
 			continue
 		}
 		}
+
+		// round all non-zero timestamps to the nearest 10 second mark
+		xv.Timestamp = roundTimestamp(xv.Timestamp, 10.0)
+
 		xMap[xv.Timestamp] = xv.Value
 		xMap[xv.Timestamp] = xv.Value
-		timestamps = append(timestamps, xv.Timestamp)
+		timestamps = append(timestamps, &Vector{
+			Timestamp: xv.Timestamp,
+		})
 	}
 	}
-	yMap := make(map[float64]float64)
+
+	yMap := mapPool.Get()
+	defer mapPool.Put(yMap)
+
 	for _, yv := range yvs {
 	for _, yv := range yvs {
 		if yv.Timestamp == 0 {
 		if yv.Timestamp == 0 {
 			continue
 			continue
 		}
 		}
+
+		// round all non-zero timestamps to the nearest 10 second mark
+		yv.Timestamp = roundTimestamp(yv.Timestamp, 10.0)
+
 		yMap[yv.Timestamp] = yv.Value
 		yMap[yv.Timestamp] = yv.Value
 		if _, ok := xMap[yv.Timestamp]; !ok {
 		if _, ok := xMap[yv.Timestamp]; !ok {
 			// no need to double add, since we'll range over sorted timestamps and check.
 			// no need to double add, since we'll range over sorted timestamps and check.
-			timestamps = append(timestamps, yv.Timestamp)
+			timestamps = append(timestamps, &Vector{
+				Timestamp: yv.Timestamp,
+			})
 		}
 		}
 	}
 	}
 
 
 	// iterate over each timestamp to produce a final op vector slice
 	// iterate over each timestamp to produce a final op vector slice
-	sort.Float64s(timestamps)
-	for _, t := range timestamps {
-		x, okX := xMap[t]
-		y, okY := yMap[t]
-		sv := &Vector{Timestamp: t}
+	// reuse the existing slice to reduce allocations
+	result := timestamps[:0]
+	for _, sv := range timestamps {
+		x, okX := xMap[sv.Timestamp]
+		y, okY := yMap[sv.Timestamp]
 
 
 		if op(sv, VectorValue(x, okX), VectorValue(y, okY)) {
 		if op(sv, VectorValue(x, okX), VectorValue(y, okY)) {
 			result = append(result, sv)
 			result = append(result, sv)
 		}
 		}
 	}
 	}
 
 
+	// nil out remaining vectors in timestamps to GC
+	for i := len(result); i < len(timestamps); i++ {
+		timestamps[i] = nil
+	}
+
+	sort.Sort(VectorSlice(result))
+
 	return result
 	return result
 }
 }