Просмотр исходного кода

Merge pull request #673 from kubecost/mmd-provider-id-fix

Add provider ID to ClusterNode map key when possible
Michael Dresser 5 лет назад
Родитель
Сommit
57c143458c
4 измененных файлов с 1259 добавлено и 360 удалено
  1. 1 0
      go.mod
  2. 36 360
      pkg/costmodel/cluster.go
  3. 663 0
      pkg/costmodel/cluster_helpers.go
  4. 559 0
      pkg/costmodel/cluster_helpers_test.go

+ 1 - 0
go.mod

@@ -8,6 +8,7 @@ require (
 	github.com/Azure/azure-sdk-for-go v24.1.0+incompatible
 	github.com/Azure/go-autorest v11.3.2+incompatible
 	github.com/aws/aws-sdk-go v1.28.9
+	github.com/davecgh/go-spew v1.1.1
 	github.com/dimchansky/utfbom v1.1.0 // indirect
 	github.com/getsentry/sentry-go v0.6.1
 	github.com/google/martian v2.1.0+incompatible

+ 36 - 360
pkg/costmodel/cluster.go

@@ -400,7 +400,18 @@ var partialCPUMap = map[string]float64{
 	"e2-medium": 1.0,
 }
 
-func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset time.Duration) (map[string]*Node, error) {
+type NodeIdentifier struct {
+	Cluster    string
+	Name       string
+	ProviderID string
+}
+
+type nodeIdentifierNoProviderID struct {
+	Cluster string
+	Name    string
+}
+
+func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset time.Duration) (map[NodeIdentifier]*Node, error) {
 	durationStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
 	offsetStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
 	if offset < time.Minute {
@@ -429,7 +440,7 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 	queryNodeCPUModeTotal := fmt.Sprintf(`sum(rate(node_cpu_seconds_total[%s:%dm]%s)) by (kubernetes_node, cluster_id, mode)`, durationStr, minsPerResolution, offsetStr)
 	queryNodeRAMSystemPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace="kube-system"}[%s:%dm]%s)) by (instance, cluster_id) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (node, cluster_id), "instance", "$1", "node", "(.*)")) by (instance, cluster_id)`, durationStr, minsPerResolution, offsetStr, durationStr, minsPerResolution, offsetStr)
 	queryNodeRAMUserPct := fmt.Sprintf(`sum(sum_over_time(container_memory_working_set_bytes{container_name!="POD",container_name!="",namespace!="kube-system"}[%s:%dm]%s)) by (instance, cluster_id) / avg(label_replace(sum(sum_over_time(kube_node_status_capacity_memory_bytes[%s:%dm]%s)) by (node, cluster_id), "instance", "$1", "node", "(.*)")) by (instance, cluster_id)`, durationStr, minsPerResolution, offsetStr, durationStr, minsPerResolution, offsetStr)
-	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost) by (node,cluster_id)[%s:%dm]%s`, durationStr, minsPerResolution, offsetStr)
+	queryActiveMins := fmt.Sprintf(`avg(node_total_hourly_cost) by (node, cluster_id, provider_id)[%s:%dm]%s`, durationStr, minsPerResolution, offsetStr)
 	queryIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
 	queryLabels := fmt.Sprintf(`count_over_time(kube_node_labels[%s:%dm]%s)`, durationStr, minsPerResolution, offsetStr)
 
@@ -473,370 +484,35 @@ func ClusterNodes(cp cloud.Provider, client prometheus.Client, duration, offset
 		return nil, requiredCtx.ErrorCollection()
 	}
 
-	nodeMap := map[string]*Node{}
-
-	for _, result := range resNodeCPUCost {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("node")
-		if err != nil {
-			log.Warningf("ClusterNodes: CPU cost data missing node")
-			continue
-		}
-
-		nodeType, _ := result.GetString("instance_type")
-		providerID, _ := result.GetString("provider_id")
-
-		cpuCost := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
-				Cluster:      cluster,
-				Name:         name,
-				NodeType:     nodeType,
-				ProviderID:   cp.ParseID(providerID),
-				CPUBreakdown: &ClusterCostsBreakdown{},
-				RAMBreakdown: &ClusterCostsBreakdown{},
-				Labels:       map[string]string{},
-			}
-		}
-		nodeMap[key].CPUCost += cpuCost
-		nodeMap[key].NodeType = nodeType
-		if nodeMap[key].ProviderID == "" {
-			nodeMap[key].ProviderID = cp.ParseID(providerID)
-		}
-	}
-
-	for _, result := range resNodeCPUCores {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("node")
-		if err != nil {
-			log.Warningf("ClusterNodes: CPU cores data missing node")
-			continue
-		}
-
-		cpuCores := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
-				Cluster:      cluster,
-				Name:         name,
-				CPUBreakdown: &ClusterCostsBreakdown{},
-				RAMBreakdown: &ClusterCostsBreakdown{},
-				Labels:       map[string]string{},
-			}
-		}
-		node := nodeMap[key]
-		if v, ok := partialCPUMap[node.NodeType]; ok {
-			node.CPUCores = v
-			if cpuCores > 0 {
-				adjustmentFactor := v / cpuCores
-				node.CPUCost = node.CPUCost * adjustmentFactor
-			}
-		} else {
-			nodeMap[key].CPUCores = cpuCores
-		}
-	}
-
-	for _, result := range resNodeRAMCost {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("node")
-		if err != nil {
-			log.Warningf("ClusterNodes: RAM cost data missing node")
-			continue
-		}
-
-		nodeType, _ := result.GetString("instance_type")
-		providerID, _ := result.GetString("provider_id")
-
-		ramCost := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
-				Cluster:      cluster,
-				Name:         name,
-				NodeType:     nodeType,
-				ProviderID:   cp.ParseID(providerID),
-				CPUBreakdown: &ClusterCostsBreakdown{},
-				RAMBreakdown: &ClusterCostsBreakdown{},
-				Labels:       map[string]string{},
-			}
-		}
-		nodeMap[key].RAMCost += ramCost
-		nodeMap[key].NodeType = nodeType
-		if nodeMap[key].ProviderID == "" {
-			nodeMap[key].ProviderID = cp.ParseID(providerID)
-		}
-	}
-
-	for _, result := range resNodeRAMBytes {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("node")
-		if err != nil {
-			log.Warningf("ClusterNodes: RAM bytes data missing node")
-			continue
-		}
-
-		ramBytes := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
-				Cluster:      cluster,
-				Name:         name,
-				CPUBreakdown: &ClusterCostsBreakdown{},
-				RAMBreakdown: &ClusterCostsBreakdown{},
-				Labels:       map[string]string{},
-			}
-		}
-		nodeMap[key].RAMBytes = ramBytes
-	}
-
-	for _, result := range resNodeGPUCost {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
+	cpuCostMap, clusterAndNameToType1 := buildCPUCostMap(resNodeCPUCost, cp.ParseID)
+	ramCostMap, clusterAndNameToType2 := buildRAMCostMap(resNodeRAMCost, cp.ParseID)
+	gpuCostMap, clusterAndNameToType3 := buildGPUCostMap(resNodeGPUCost, cp.ParseID)
 
-		name, err := result.GetString("node")
-		if err != nil {
-			log.Warningf("ClusterNodes: GPU cost data missing node")
-			continue
-		}
-
-		nodeType, _ := result.GetString("instance_type")
-		providerID, _ := result.GetString("provider_id")
-
-		gpuCost := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			nodeMap[key] = &Node{
-				Cluster:      cluster,
-				Name:         name,
-				NodeType:     nodeType,
-				ProviderID:   cp.ParseID(providerID),
-				CPUBreakdown: &ClusterCostsBreakdown{},
-				RAMBreakdown: &ClusterCostsBreakdown{},
-				Labels:       map[string]string{},
-			}
-		}
-		nodeMap[key].GPUCost += gpuCost
-		if nodeMap[key].ProviderID == "" {
-			nodeMap[key].ProviderID = cp.ParseID(providerID)
-		}
-	}
+	clusterAndNameToTypeIntermediate := mergeTypeMaps(clusterAndNameToType1, clusterAndNameToType2)
+	clusterAndNameToType := mergeTypeMaps(clusterAndNameToTypeIntermediate, clusterAndNameToType3)
 
-	// Mapping of cluster/node=cpu for computing resource efficiency
-	clusterNodeCPUTotal := map[string]float64{}
-	// Mapping of cluster/node:mode=cpu for computing resource efficiency
-	clusterNodeModeCPUTotal := map[string]map[string]float64{}
+	cpuCoresMap := buildCPUCoresMap(resNodeCPUCores, clusterAndNameToType)
 
-	// Build intermediate structures for CPU usage by (cluster, node) and by
-	// (cluster, node, mode) for computing resouce efficiency
-	for _, result := range resNodeCPUModeTotal {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
+	ramBytesMap := buildRAMBytesMap(resNodeRAMBytes)
 
-		node, err := result.GetString("kubernetes_node")
-		if err != nil {
-			log.DedupedWarningf(5, "ClusterNodes: CPU mode data missing node")
-			continue
-		}
+	ramUserPctMap := buildRAMUserPctMap(resNodeRAMUserPct)
+	ramSystemPctMap := buildRAMSystemPctMap(resNodeRAMSystemPct)
 
-		mode, err := result.GetString("mode")
-		if err != nil {
-			log.Warningf("ClusterNodes: unable to read CPU mode: %s", err)
-			mode = "other"
-		}
+	cpuBreakdownMap := buildCPUBreakdownMap(resNodeCPUModeTotal)
+	activeDataMap := buildActiveDataMap(resActiveMins, resolution, cp.ParseID)
+	preemptibleMap := buildPreemptibleMap(resIsSpot, cp.ParseID)
+	labelsMap := buildLabelsMap(resLabels)
 
-		key := fmt.Sprintf("%s/%s", cluster, node)
-
-		total := result.Values[0].Value
-
-		// Increment total
-		clusterNodeCPUTotal[key] += total
-
-		// Increment mode
-		if _, ok := clusterNodeModeCPUTotal[key]; !ok {
-			clusterNodeModeCPUTotal[key] = map[string]float64{}
-		}
-		clusterNodeModeCPUTotal[key][mode] += total
-	}
-
-	// Compute resource efficiency from intermediate structures
-	for key, total := range clusterNodeCPUTotal {
-		if modeTotals, ok := clusterNodeModeCPUTotal[key]; ok {
-			for mode, subtotal := range modeTotals {
-				// Compute percentage for the current cluster, node, mode
-				pct := 0.0
-				if total > 0 {
-					pct = subtotal / total
-				}
-
-				if _, ok := nodeMap[key]; !ok {
-					log.Warningf("ClusterNodes: CPU mode data for unidentified node")
-					continue
-				}
-
-				switch mode {
-				case "idle":
-					nodeMap[key].CPUBreakdown.Idle += pct
-				case "system":
-					nodeMap[key].CPUBreakdown.System += pct
-				case "user":
-					nodeMap[key].CPUBreakdown.User += pct
-				default:
-					nodeMap[key].CPUBreakdown.Other += pct
-				}
-			}
-		}
-	}
-
-	for _, result := range resNodeRAMSystemPct {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("instance")
-		if err != nil {
-			log.Warningf("ClusterNodes: RAM system percent missing node")
-			continue
-		}
-
-		pct := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			log.Warningf("ClusterNodes: RAM system percent for unidentified node")
-			continue
-		}
-
-		nodeMap[key].RAMBreakdown.System += pct
-	}
-
-	for _, result := range resNodeRAMUserPct {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("instance")
-		if err != nil {
-			log.Warningf("ClusterNodes: RAM system percent missing node")
-			continue
-		}
-
-		pct := result.Values[0].Value
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			log.Warningf("ClusterNodes: RAM system percent for unidentified node")
-			continue
-		}
-
-		nodeMap[key].RAMBreakdown.User += pct
-	}
-
-	for _, result := range resActiveMins {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-
-		name, err := result.GetString("node")
-		if err != nil {
-			log.Warningf("ClusterNodes: active mins missing node")
-			continue
-		}
-
-		key := fmt.Sprintf("%s/%s", cluster, name)
-		if _, ok := nodeMap[key]; !ok {
-			log.Warningf("ClusterNodes: active mins for unidentified node")
-			continue
-		}
-
-		if len(result.Values) == 0 {
-			continue
-		}
-
-		s := time.Unix(int64(result.Values[0].Timestamp), 0)
-		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
-		mins := e.Sub(s).Minutes()
-
-		// TODO niko/assets if mins >= threshold, interpolate for missing data?
-
-		nodeMap[key].End = e
-		nodeMap[key].Start = s
-		nodeMap[key].Minutes = mins
-	}
-
-	// Determine preemptibility with node labels
-	for _, result := range resIsSpot {
-		nodeName, err := result.GetString("node")
-		if err != nil {
-			continue
-		}
-
-		// GCP preemptible label
-		pre := result.Values[0].Value
-
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-		key := fmt.Sprintf("%s/%s", cluster, nodeName)
-		if node, ok := nodeMap[key]; pre > 0.0 && ok {
-			node.Preemptible = true
-		}
-
-		// TODO AWS preemptible
-
-		// TODO Azure preemptible
-	}
-
-	// Copy labels into node
-	for _, result := range resLabels {
-		cluster, err := result.GetString("cluster_id")
-		if err != nil {
-			cluster = env.GetClusterID()
-		}
-		node, err := result.GetString("kubernetes_node")
-		if err != nil {
-			log.DedupedWarningf(5, "ClusterNodes: label data missing node")
-			continue
-		}
-		key := fmt.Sprintf("%s/%s", cluster, node)
-		if _, ok := nodeMap[key]; !ok {
-			continue
-		}
-		for name, value := range result.Metric {
-			if val, ok := value.(string); ok {
-				nodeMap[key].Labels[name] = val
-			}
-		}
-	}
+	nodeMap := buildNodeMap(
+		cpuCostMap, ramCostMap, gpuCostMap,
+		cpuCoresMap, ramBytesMap, ramUserPctMap,
+		ramSystemPctMap,
+		cpuBreakdownMap,
+		activeDataMap,
+		preemptibleMap,
+		labelsMap,
+		clusterAndNameToType,
+	)
 
 	c, err := cp.GetConfig()
 	if err != nil {

+ 663 - 0
pkg/costmodel/cluster_helpers.go

@@ -0,0 +1,663 @@
+package costmodel
+
+import (
+	"time"
+
+	"github.com/kubecost/cost-model/pkg/env"
+	"github.com/kubecost/cost-model/pkg/log"
+	"github.com/kubecost/cost-model/pkg/prom"
+)
+
+// mergeTypeMaps takes two maps of (cluster name, node name) -> node type
+// and combines them into a single map, preferring the k/v pairs in
+// the first map.
+func mergeTypeMaps(clusterAndNameToType1, clusterAndNameToType2 map[nodeIdentifierNoProviderID]string) map[nodeIdentifierNoProviderID]string {
+	merged := map[nodeIdentifierNoProviderID]string{}
+	for k, v := range clusterAndNameToType2 {
+		merged[k] = v
+	}
+
+	// This ordering ensures the mappings in the first arg are preferred.
+	for k, v := range clusterAndNameToType1 {
+		merged[k] = v
+	}
+
+	return merged
+}
+
+func buildCPUCostMap(
+	resNodeCPUCost []*prom.QueryResult,
+	providerIDParser func(string) string,
+) (
+	map[NodeIdentifier]float64,
+	map[nodeIdentifierNoProviderID]string,
+) {
+
+	cpuCostMap := make(map[NodeIdentifier]float64)
+	clusterAndNameToType := make(map[nodeIdentifierNoProviderID]string)
+
+	for _, result := range resNodeCPUCost {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("node")
+		if err != nil {
+			log.Warningf("ClusterNodes: CPU cost data missing node")
+			continue
+		}
+
+		nodeType, _ := result.GetString("instance_type")
+		providerID, _ := result.GetString("provider_id")
+
+		cpuCost := result.Values[0].Value
+
+		key := NodeIdentifier{
+			Cluster:    cluster,
+			Name:       name,
+			ProviderID: providerIDParser(providerID),
+		}
+		keyNon := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+
+		clusterAndNameToType[keyNon] = nodeType
+
+		cpuCostMap[key] = cpuCost
+	}
+
+	return cpuCostMap, clusterAndNameToType
+}
+
+func buildRAMCostMap(
+	resNodeRAMCost []*prom.QueryResult,
+	providerIDParser func(string) string,
+) (
+	map[NodeIdentifier]float64,
+	map[nodeIdentifierNoProviderID]string,
+) {
+
+	ramCostMap := make(map[NodeIdentifier]float64)
+	clusterAndNameToType := make(map[nodeIdentifierNoProviderID]string)
+
+	for _, result := range resNodeRAMCost {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("node")
+		if err != nil {
+			log.Warningf("ClusterNodes: RAM cost data missing node")
+			continue
+		}
+
+		nodeType, _ := result.GetString("instance_type")
+		providerID, _ := result.GetString("provider_id")
+
+		ramCost := result.Values[0].Value
+
+		key := NodeIdentifier{
+			Cluster:    cluster,
+			Name:       name,
+			ProviderID: providerIDParser(providerID),
+		}
+		keyNon := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+
+		clusterAndNameToType[keyNon] = nodeType
+		ramCostMap[key] = ramCost
+	}
+
+	return ramCostMap, clusterAndNameToType
+}
+
+func buildGPUCostMap(
+	resNodeGPUCost []*prom.QueryResult,
+	providerIDParser func(string) string,
+) (
+	map[NodeIdentifier]float64,
+	map[nodeIdentifierNoProviderID]string,
+) {
+
+	gpuCostMap := make(map[NodeIdentifier]float64)
+	clusterAndNameToType := make(map[nodeIdentifierNoProviderID]string)
+
+	for _, result := range resNodeGPUCost {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("node")
+		if err != nil {
+			log.Warningf("ClusterNodes: GPU cost data missing node")
+			continue
+		}
+
+		nodeType, _ := result.GetString("instance_type")
+		providerID, _ := result.GetString("provider_id")
+
+		gpuCost := result.Values[0].Value
+
+		key := NodeIdentifier{
+			Cluster:    cluster,
+			Name:       name,
+			ProviderID: providerIDParser(providerID),
+		}
+		keyNon := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+
+		clusterAndNameToType[keyNon] = nodeType
+
+		gpuCostMap[key] = gpuCost
+	}
+
+	return gpuCostMap, clusterAndNameToType
+}
+
+func buildCPUCoresMap(
+	resNodeCPUCores []*prom.QueryResult,
+	clusterAndNameToType map[nodeIdentifierNoProviderID]string,
+) map[nodeIdentifierNoProviderID]float64 {
+
+	m := make(map[nodeIdentifierNoProviderID]float64)
+
+	for _, result := range resNodeCPUCores {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("node")
+		if err != nil {
+			log.Warningf("ClusterNodes: CPU cores data missing node")
+			continue
+		}
+
+		cpuCores := result.Values[0].Value
+
+		key := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+		if nodeType, ok := clusterAndNameToType[key]; ok {
+			if v, ok := partialCPUMap[nodeType]; ok {
+				m[key] = v
+				if cpuCores > 0 {
+					adjustmentFactor := v / cpuCores
+					m[key] = m[key] * adjustmentFactor
+				}
+			} else {
+				m[key] = cpuCores
+			}
+		} else {
+			m[key] = cpuCores
+		}
+	}
+
+	return m
+}
+
+func buildRAMBytesMap(resNodeRAMBytes []*prom.QueryResult) map[nodeIdentifierNoProviderID]float64 {
+
+	m := make(map[nodeIdentifierNoProviderID]float64)
+
+	for _, result := range resNodeRAMBytes {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("node")
+		if err != nil {
+			log.Warningf("ClusterNodes: RAM bytes data missing node")
+			continue
+		}
+
+		ramBytes := result.Values[0].Value
+
+		key := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+		m[key] = ramBytes
+	}
+
+	return m
+}
+
+// Mapping of cluster/node=cpu for computing resource efficiency
+func buildCPUBreakdownMap(resNodeCPUModeTotal []*prom.QueryResult) map[nodeIdentifierNoProviderID]*ClusterCostsBreakdown {
+
+	cpuBreakdownMap := make(map[nodeIdentifierNoProviderID]*ClusterCostsBreakdown)
+
+	// Mapping of cluster/node=cpu for computing resource efficiency
+	clusterNodeCPUTotal := map[nodeIdentifierNoProviderID]float64{}
+	// Mapping of cluster/node:mode=cpu for computing resource efficiency
+	clusterNodeModeCPUTotal := map[nodeIdentifierNoProviderID]map[string]float64{}
+
+	// Build intermediate structures for CPU usage by (cluster, node) and by
+	// (cluster, node, mode) for computing resouce efficiency
+	for _, result := range resNodeCPUModeTotal {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		node, err := result.GetString("kubernetes_node")
+		if err != nil {
+			log.DedupedWarningf(5, "ClusterNodes: CPU mode data missing node")
+			continue
+		}
+
+		mode, err := result.GetString("mode")
+		if err != nil {
+			log.Warningf("ClusterNodes: unable to read CPU mode: %s", err)
+			mode = "other"
+		}
+
+		key := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    node,
+		}
+
+		total := result.Values[0].Value
+
+		// Increment total
+		clusterNodeCPUTotal[key] += total
+
+		// Increment mode
+		if _, ok := clusterNodeModeCPUTotal[key]; !ok {
+			clusterNodeModeCPUTotal[key] = map[string]float64{}
+		}
+		clusterNodeModeCPUTotal[key][mode] += total
+	}
+
+	// Compute resource efficiency from intermediate structures
+	for key, total := range clusterNodeCPUTotal {
+		if modeTotals, ok := clusterNodeModeCPUTotal[key]; ok {
+			for mode, subtotal := range modeTotals {
+				// Compute percentage for the current cluster, node, mode
+				pct := 0.0
+				if total > 0 {
+					pct = subtotal / total
+				}
+
+				if _, ok := cpuBreakdownMap[key]; !ok {
+					cpuBreakdownMap[key] = &ClusterCostsBreakdown{}
+				}
+
+				switch mode {
+				case "idle":
+					cpuBreakdownMap[key].Idle += pct
+				case "system":
+					cpuBreakdownMap[key].System += pct
+				case "user":
+					cpuBreakdownMap[key].User += pct
+				default:
+					cpuBreakdownMap[key].Other += pct
+				}
+			}
+		}
+	}
+
+	return cpuBreakdownMap
+}
+
+func buildRAMUserPctMap(resNodeRAMUserPct []*prom.QueryResult) map[nodeIdentifierNoProviderID]float64 {
+
+	m := make(map[nodeIdentifierNoProviderID]float64)
+
+	for _, result := range resNodeRAMUserPct {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("instance")
+		if err != nil {
+			log.Warningf("ClusterNodes: RAM user percent missing node")
+			continue
+		}
+
+		pct := result.Values[0].Value
+
+		key := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+
+		m[key] = pct
+	}
+
+	return m
+}
+
+func buildRAMSystemPctMap(resNodeRAMSystemPct []*prom.QueryResult) map[nodeIdentifierNoProviderID]float64 {
+
+	m := make(map[nodeIdentifierNoProviderID]float64)
+
+	for _, result := range resNodeRAMSystemPct {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("instance")
+		if err != nil {
+			log.Warningf("ClusterNodes: RAM system percent missing node")
+			continue
+		}
+
+		pct := result.Values[0].Value
+
+		key := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    name,
+		}
+
+		m[key] = pct
+	}
+
+	return m
+}
+
+type activeData struct {
+	start   time.Time
+	end     time.Time
+	minutes float64
+}
+
+func buildActiveDataMap(resActiveMins []*prom.QueryResult, resolution time.Duration, providerIDParser func(string) string) map[NodeIdentifier]activeData {
+
+	m := make(map[NodeIdentifier]activeData)
+
+	for _, result := range resActiveMins {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		name, err := result.GetString("node")
+		if err != nil {
+			log.Warningf("ClusterNodes: active mins missing node")
+			continue
+		}
+
+		providerID, _ := result.GetString("provider_id")
+
+		key := NodeIdentifier{
+			Cluster:    cluster,
+			Name:       name,
+			ProviderID: providerIDParser(providerID),
+		}
+
+		if len(result.Values) == 0 {
+			continue
+		}
+
+		s := time.Unix(int64(result.Values[0].Timestamp), 0)
+		e := time.Unix(int64(result.Values[len(result.Values)-1].Timestamp), 0).Add(resolution)
+		mins := e.Sub(s).Minutes()
+
+		// TODO niko/assets if mins >= threshold, interpolate for missing data?
+		m[key] = activeData{
+			start:   s,
+			end:     e,
+			minutes: mins,
+		}
+	}
+
+	return m
+}
+
+// Determine preemptibility with node labels
+// node id -> is preemptible?
+func buildPreemptibleMap(
+	resIsSpot []*prom.QueryResult,
+	providerIDParser func(string) string,
+) map[NodeIdentifier]bool {
+
+	m := make(map[NodeIdentifier]bool)
+
+	for _, result := range resIsSpot {
+		nodeName, err := result.GetString("node")
+		if err != nil {
+			continue
+		}
+
+		// GCP preemptible label
+		pre := result.Values[0].Value
+
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		providerID, _ := result.GetString("provider_id")
+
+		key := NodeIdentifier{
+			Cluster:    cluster,
+			Name:       nodeName,
+			ProviderID: providerIDParser(providerID),
+		}
+
+		// TODO(michaelmdresser): check this condition at merge time?
+		// if node, ok := nodeMap[key]; pre > 0.0 && ok {
+		// 	node.Preemptible = true
+		// }
+		m[key] = pre > 0.0
+
+		// TODO AWS preemptible
+
+		// TODO Azure preemptible
+	}
+
+	return m
+}
+
+func buildLabelsMap(
+	resLabels []*prom.QueryResult,
+) map[nodeIdentifierNoProviderID]map[string]string {
+
+	m := make(map[nodeIdentifierNoProviderID]map[string]string)
+
+	// Copy labels into node
+	for _, result := range resLabels {
+		cluster, err := result.GetString("cluster_id")
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+		node, err := result.GetString("kubernetes_node")
+		if err != nil {
+			log.DedupedWarningf(5, "ClusterNodes: label data missing node")
+			continue
+		}
+		key := nodeIdentifierNoProviderID{
+			Cluster: cluster,
+			Name:    node,
+		}
+
+		m[key] = make(map[string]string)
+
+		for name, value := range result.Metric {
+			if val, ok := value.(string); ok {
+				m[key][name] = val
+			}
+		}
+	}
+	return m
+}
+
+// checkForKeyAndInitIfMissing inits a key in the provided nodemap if
+// it does not exist. Intended to be called ONLY by buildNodeMap
+func checkForKeyAndInitIfMissing(
+	nodeMap map[NodeIdentifier]*Node,
+	key NodeIdentifier,
+	clusterAndNameToType map[nodeIdentifierNoProviderID]string,
+) {
+	if _, ok := nodeMap[key]; !ok {
+		// default nodeType in case we don't have the mapping
+		var nodeType string
+		if t, ok := clusterAndNameToType[nodeIdentifierNoProviderID{
+			Cluster: key.Cluster,
+			Name:    key.Name,
+		}]; ok {
+			nodeType = t
+		} else {
+			log.Warningf("ClusterNodes: Type does not exist for node identifier %s", key)
+		}
+
+		nodeMap[key] = &Node{
+			Cluster:      key.Cluster,
+			Name:         key.Name,
+			NodeType:     nodeType,
+			ProviderID:   key.ProviderID,
+			CPUBreakdown: &ClusterCostsBreakdown{},
+			RAMBreakdown: &ClusterCostsBreakdown{},
+		}
+	}
+}
+
+// buildNodeMap creates the main set of node data for ClusterNodes from
+// the data maps built from Prometheus queries. Some of the Prometheus
+// data has access to the provider_id field and some does not. To get
+// around this problem, we use the data that includes provider_id
+// to build up the definitive set of nodes and then use the data
+// with less-specific identifiers (i.e. without provider_id) to fill
+// in the remaining fields.
+//
+// For example, let's say we have nodes identified like so:
+// cluster name/node name/provider_id. For the sake of the example,
+// we will also limit data to CPU cost, CPU cores, and preemptibility.
+//
+// We have CPU cost data that looks like this:
+// cluster1/node1/prov_node1_A: $10
+// cluster1/node1/prov_node1_B: $8
+// cluster1/node2/prov_node2: $15
+//
+// We have Preemptible data that looks like this:
+// cluster1/node1/prov_node1_A: true
+// cluster1/node1/prov_node1_B: false
+// cluster1/node2/prov_node2_B: false
+//
+// We have CPU cores data that looks like this:
+// cluster1/node1: 4
+// cluster1/node2: 6
+//
+// This function first combines the data that is fully identified,
+// creating the following:
+// cluster1/node1/prov_node1_A: CPUCost($10), Preemptible(true)
+// cluster1/node1/prov_node1_B: CPUCost($8), Preemptible(false)
+// cluster1/node2/prov_node2: CPUCost($15), Preemptible(false)
+//
+// It then uses the less-specific data to extend the specific data,
+// making the following:
+// cluster1/node1/prov_node1_A: CPUCost($10), Preemptible(true), Cores(4)
+// cluster1/node1/prov_node1_B: CPUCost($8), Preemptible(false), Cores(4)
+// cluster1/node2/prov_node2: CPUCost($15), Preemptible(false), Cores(6)
+//
+// In the situation where provider_id doesn't exist for any metrics,
+// that is the same as all provider_ids being empty strings. If
+// provider_id doesn't exist at all, then we (without having to do
+// extra work) easily fall back on identifying nodes only by cluster name
+// and node name because the provider_id part of the key will always
+// be the empty string.
+//
+// It is worth nothing that, in this approach, if a node is not present
+// in the more specific data but is present in the less-specific data,
+// that data is never processed into the final node map. For example,
+// let's say the CPU cores map has the following entry:
+// cluster1/node8: 6
+// But none of the maps with provider_id (CPU cost, RAM cost, etc.)
+// have an identifier for cluster1/node8 (regardless of provider_id).
+// In this situation, the final node map will not have a cluster1/node8
+// entry. This could be fixed by iterating over all of the less specific
+// identifiers and, inside that iteration, all of the identifiers in
+// the node map, but this would introduce a roughly quadratic time
+// complexity.
+func buildNodeMap(
+	cpuCostMap, ramCostMap, gpuCostMap map[NodeIdentifier]float64,
+	cpuCoresMap, ramBytesMap, ramUserPctMap,
+	ramSystemPctMap map[nodeIdentifierNoProviderID]float64,
+	cpuBreakdownMap map[nodeIdentifierNoProviderID]*ClusterCostsBreakdown,
+	activeDataMap map[NodeIdentifier]activeData,
+	preemptibleMap map[NodeIdentifier]bool,
+	labelsMap map[nodeIdentifierNoProviderID]map[string]string,
+	clusterAndNameToType map[nodeIdentifierNoProviderID]string,
+) map[NodeIdentifier]*Node {
+
+	nodeMap := make(map[NodeIdentifier]*Node)
+
+	// Initialize the map with the most-specific data:
+
+	for id, cost := range cpuCostMap {
+		checkForKeyAndInitIfMissing(nodeMap, id, clusterAndNameToType)
+		nodeMap[id].CPUCost = cost
+	}
+
+	for id, cost := range ramCostMap {
+		checkForKeyAndInitIfMissing(nodeMap, id, clusterAndNameToType)
+		nodeMap[id].RAMCost = cost
+	}
+
+	for id, cost := range gpuCostMap {
+		checkForKeyAndInitIfMissing(nodeMap, id, clusterAndNameToType)
+		nodeMap[id].GPUCost = cost
+	}
+
+	for id, preemptible := range preemptibleMap {
+		checkForKeyAndInitIfMissing(nodeMap, id, clusterAndNameToType)
+		nodeMap[id].Preemptible = preemptible
+	}
+
+	for id, activeData := range activeDataMap {
+		checkForKeyAndInitIfMissing(nodeMap, id, clusterAndNameToType)
+		nodeMap[id].Start = activeData.start
+		nodeMap[id].End = activeData.end
+		nodeMap[id].Minutes = activeData.minutes
+	}
+
+	// We now merge in data that doesn't have a provider id by looping over
+	// all keys already added and inserting data according to their
+	// cluster name/node name combos.
+	for id, nodePtr := range nodeMap {
+		clusterAndNameID := nodeIdentifierNoProviderID{
+			Cluster: id.Cluster,
+			Name:    id.Name,
+		}
+
+		if cores, ok := cpuCoresMap[clusterAndNameID]; ok {
+			nodePtr.CPUCores = cores
+		}
+
+		if ramBytes, ok := ramBytesMap[clusterAndNameID]; ok {
+			nodePtr.RAMBytes = ramBytes
+		}
+
+		if ramUserPct, ok := ramUserPctMap[clusterAndNameID]; ok {
+			nodePtr.RAMBreakdown.User = ramUserPct
+		}
+
+		if ramSystemPct, ok := ramSystemPctMap[clusterAndNameID]; ok {
+			nodePtr.RAMBreakdown.System = ramSystemPct
+		}
+
+		if cpuBreakdown, ok := cpuBreakdownMap[clusterAndNameID]; ok {
+			nodePtr.CPUBreakdown = cpuBreakdown
+		}
+
+		if labels, ok := labelsMap[clusterAndNameID]; ok {
+			nodePtr.Labels = labels
+		}
+	}
+
+	return nodeMap
+}

+ 559 - 0
pkg/costmodel/cluster_helpers_test.go

@@ -0,0 +1,559 @@
+package costmodel
+
+import (
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/davecgh/go-spew/spew"
+)
+
+func TestMergeTypeMaps(t *testing.T) {
+	cases := []struct {
+		name     string
+		map1     map[nodeIdentifierNoProviderID]string
+		map2     map[nodeIdentifierNoProviderID]string
+		expected map[nodeIdentifierNoProviderID]string
+	}{
+		{
+			name:     "both empty",
+			map1:     map[nodeIdentifierNoProviderID]string{},
+			map2:     map[nodeIdentifierNoProviderID]string{},
+			expected: map[nodeIdentifierNoProviderID]string{},
+		},
+		{
+			name: "map2 empty",
+			map1: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			map2: map[nodeIdentifierNoProviderID]string{},
+			expected: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+		},
+		{
+			name: "map2 empty",
+			map1: map[nodeIdentifierNoProviderID]string{},
+			map2: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			expected: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+		},
+		{
+			name: "no overlap",
+			map1: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			map2: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: "type2",
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node4",
+				}: "type4",
+			},
+			expected: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: "type2",
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node4",
+				}: "type4",
+			},
+		},
+		{
+			name: "with overlap",
+			map1: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			map2: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: "type2",
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type4",
+			},
+			expected: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: "type2",
+			},
+		},
+	}
+
+	for _, testCase := range cases {
+		result := mergeTypeMaps(testCase.map1, testCase.map2)
+
+		if !reflect.DeepEqual(result, testCase.expected) {
+			t.Errorf("mergeTypeMaps case %s failed. Got %+v but expected %+v", testCase.name, result, testCase.expected)
+		}
+	}
+}
+
+func TestBuildNodeMap(t *testing.T) {
+	cases := []struct {
+		name                 string
+		cpuCostMap           map[NodeIdentifier]float64
+		ramCostMap           map[NodeIdentifier]float64
+		gpuCostMap           map[NodeIdentifier]float64
+		cpuCoresMap          map[nodeIdentifierNoProviderID]float64
+		ramBytesMap          map[nodeIdentifierNoProviderID]float64
+		ramUserPctMap        map[nodeIdentifierNoProviderID]float64
+		ramSystemPctMap      map[nodeIdentifierNoProviderID]float64
+		cpuBreakdownMap      map[nodeIdentifierNoProviderID]*ClusterCostsBreakdown
+		activeDataMap        map[NodeIdentifier]activeData
+		preemptibleMap       map[NodeIdentifier]bool
+		labelsMap            map[nodeIdentifierNoProviderID]map[string]string
+		clusterAndNameToType map[nodeIdentifierNoProviderID]string
+		expected             map[NodeIdentifier]*Node
+	}{
+		{
+			name:     "empty",
+			expected: map[NodeIdentifier]*Node{},
+		},
+		{
+			name: "just cpu cost",
+			cpuCostMap: map[NodeIdentifier]float64{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1",
+				}: 0.048,
+			},
+			clusterAndNameToType: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			expected: map[NodeIdentifier]*Node{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1",
+				}: &Node{
+					Cluster:      "cluster1",
+					Name:         "node1",
+					ProviderID:   "prov_node1",
+					NodeType:     "type1",
+					CPUCost:      0.048,
+					CPUBreakdown: &ClusterCostsBreakdown{},
+					RAMBreakdown: &ClusterCostsBreakdown{},
+				},
+			},
+		},
+		{
+			name: "just cpu cost with empty provider ID",
+			cpuCostMap: map[NodeIdentifier]float64{
+				NodeIdentifier{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: 0.048,
+			},
+			clusterAndNameToType: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			expected: map[NodeIdentifier]*Node{
+				NodeIdentifier{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: &Node{
+					Cluster:      "cluster1",
+					Name:         "node1",
+					NodeType:     "type1",
+					CPUCost:      0.048,
+					CPUBreakdown: &ClusterCostsBreakdown{},
+					RAMBreakdown: &ClusterCostsBreakdown{},
+				},
+			},
+		},
+		{
+			name: "cpu cost with overlapping node names",
+			cpuCostMap: map[NodeIdentifier]float64{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: 0.048,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: 0.087,
+			},
+			clusterAndNameToType: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+			},
+			expected: map[NodeIdentifier]*Node{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: &Node{
+					Cluster:      "cluster1",
+					Name:         "node1",
+					ProviderID:   "prov_node1_A",
+					NodeType:     "type1",
+					CPUCost:      0.048,
+					CPUBreakdown: &ClusterCostsBreakdown{},
+					RAMBreakdown: &ClusterCostsBreakdown{},
+				},
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: &Node{
+					Cluster:      "cluster1",
+					Name:         "node1",
+					ProviderID:   "prov_node1_B",
+					NodeType:     "type1",
+					CPUCost:      0.087,
+					CPUBreakdown: &ClusterCostsBreakdown{},
+					RAMBreakdown: &ClusterCostsBreakdown{},
+				},
+			},
+		},
+		{
+			name: "all fields + overlapping node names",
+			cpuCostMap: map[NodeIdentifier]float64{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: 0.048,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: 0.087,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+				}: 0.033,
+			},
+			ramCostMap: map[NodeIdentifier]float64{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: 0.09,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: 0.3,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+				}: 0.024,
+			},
+			gpuCostMap: map[NodeIdentifier]float64{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: 0.8,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: 1.4,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+				}: 3.1,
+			},
+			cpuCoresMap: map[nodeIdentifierNoProviderID]float64{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: 2.0,
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: 5.0,
+			},
+			ramBytesMap: map[nodeIdentifierNoProviderID]float64{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: 2048.0,
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: 6303.0,
+			},
+			ramUserPctMap: map[nodeIdentifierNoProviderID]float64{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: 30.0,
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: 42.6,
+			},
+			ramSystemPctMap: map[nodeIdentifierNoProviderID]float64{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: 15.0,
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: 20.1,
+			},
+			cpuBreakdownMap: map[nodeIdentifierNoProviderID]*ClusterCostsBreakdown{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: &ClusterCostsBreakdown{
+					System: 20.2,
+					User:   68.0,
+				},
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: &ClusterCostsBreakdown{
+					System: 28.9,
+					User:   34.0,
+				},
+			},
+			activeDataMap: map[NodeIdentifier]activeData{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: activeData{
+					start:   time.Date(2020, 6, 16, 3, 45, 28, 0, time.UTC),
+					end:     time.Date(2020, 6, 16, 9, 20, 39, 0, time.UTC),
+					minutes: 5*60 + 35 + (11.0 / 60.0),
+				},
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: activeData{
+					start:   time.Date(2020, 6, 16, 3, 45, 28, 0, time.UTC),
+					end:     time.Date(2020, 6, 16, 9, 21, 39, 0, time.UTC),
+					minutes: 5*60 + 36 + (11.0 / 60.0),
+				},
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+				}: activeData{
+					start:   time.Date(2020, 6, 16, 3, 45, 28, 0, time.UTC),
+					end:     time.Date(2020, 6, 16, 9, 10, 39, 0, time.UTC),
+					minutes: 5*60 + 25 + (11.0 / 60.0),
+				},
+			},
+			preemptibleMap: map[NodeIdentifier]bool{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: true,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: false,
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+				}: false,
+			},
+			labelsMap: map[nodeIdentifierNoProviderID]map[string]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: map[string]string{
+					"labelname1_A": "labelvalue1_A",
+					"labelname1_B": "labelvalue1_B",
+				},
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: map[string]string{
+					"labelname2_A": "labelvalue2_A",
+					"labelname2_B": "labelvalue2_B",
+				},
+			},
+			clusterAndNameToType: map[nodeIdentifierNoProviderID]string{
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node1",
+				}: "type1",
+				nodeIdentifierNoProviderID{
+					Cluster: "cluster1",
+					Name:    "node2",
+				}: "type2",
+			},
+			expected: map[NodeIdentifier]*Node{
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+				}: &Node{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_A",
+					NodeType:   "type1",
+					CPUCost:    0.048,
+					RAMCost:    0.09,
+					GPUCost:    0.8,
+					CPUCores:   2.0,
+					RAMBytes:   2048.0,
+					RAMBreakdown: &ClusterCostsBreakdown{
+						User:   30.0,
+						System: 15.0,
+					},
+					CPUBreakdown: &ClusterCostsBreakdown{
+						System: 20.2,
+						User:   68.0,
+					},
+					Start:       time.Date(2020, 6, 16, 3, 45, 28, 0, time.UTC),
+					End:         time.Date(2020, 6, 16, 9, 20, 39, 0, time.UTC),
+					Minutes:     5*60 + 35 + (11.0 / 60.0),
+					Preemptible: true,
+					Labels: map[string]string{
+						"labelname1_A": "labelvalue1_A",
+						"labelname1_B": "labelvalue1_B",
+					},
+				},
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+				}: &Node{
+					Cluster:    "cluster1",
+					Name:       "node1",
+					ProviderID: "prov_node1_B",
+					NodeType:   "type1",
+					CPUCost:    0.087,
+					RAMCost:    0.3,
+					GPUCost:    1.4,
+					CPUCores:   2.0,
+					RAMBytes:   2048.0,
+					RAMBreakdown: &ClusterCostsBreakdown{
+						User:   30.0,
+						System: 15.0,
+					},
+					CPUBreakdown: &ClusterCostsBreakdown{
+						System: 20.2,
+						User:   68.0,
+					},
+					Start:       time.Date(2020, 6, 16, 3, 45, 28, 0, time.UTC),
+					End:         time.Date(2020, 6, 16, 9, 21, 39, 0, time.UTC),
+					Minutes:     5*60 + 36 + (11.0 / 60.0),
+					Preemptible: false,
+					Labels: map[string]string{
+						"labelname1_A": "labelvalue1_A",
+						"labelname1_B": "labelvalue1_B",
+					},
+				},
+				NodeIdentifier{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+				}: &Node{
+					Cluster:    "cluster1",
+					Name:       "node2",
+					ProviderID: "prov_node2_A",
+					NodeType:   "type2",
+					CPUCost:    0.033,
+					RAMCost:    0.024,
+					GPUCost:    3.1,
+					CPUCores:   5.0,
+					RAMBytes:   6303.0,
+					RAMBreakdown: &ClusterCostsBreakdown{
+						User:   42.6,
+						System: 20.1,
+					},
+					CPUBreakdown: &ClusterCostsBreakdown{
+						System: 28.9,
+						User:   34.0,
+					},
+					Start:       time.Date(2020, 6, 16, 3, 45, 28, 0, time.UTC),
+					End:         time.Date(2020, 6, 16, 9, 10, 39, 0, time.UTC),
+					Minutes:     5*60 + 25 + (11.0 / 60.0),
+					Preemptible: false,
+					Labels: map[string]string{
+						"labelname2_A": "labelvalue2_A",
+						"labelname2_B": "labelvalue2_B",
+					},
+				},
+			},
+		},
+	}
+
+	for _, testCase := range cases {
+
+		result := buildNodeMap(
+			testCase.cpuCostMap, testCase.ramCostMap, testCase.gpuCostMap,
+			testCase.cpuCoresMap, testCase.ramBytesMap, testCase.ramUserPctMap,
+			testCase.ramSystemPctMap,
+			testCase.cpuBreakdownMap,
+			testCase.activeDataMap,
+			testCase.preemptibleMap,
+			testCase.labelsMap,
+			testCase.clusterAndNameToType,
+		)
+
+		if !reflect.DeepEqual(result, testCase.expected) {
+			t.Errorf("buildNodeMap case %s failed. Got %+v but expected %+v", testCase.name, result, testCase.expected)
+
+			// Use spew because we have to follow pointers to figure out
+			// what isn't matching up
+			t.Logf("Got: %s", spew.Sdump(result))
+			t.Logf("Expected: %s", spew.Sdump(testCase.expected))
+		}
+	}
+}