Explorar el Código

Merge branch 'develop' into parcs-accum-support

Alex Meijer hace 3 años
padre
commit
cd5b35d459

+ 16 - 0
ADOPTERS.MD

@@ -0,0 +1,16 @@
+# OpenCost Adopters
+
+This page contains a list of organizations who are users of OpenCost, following the [definitions provided by the CNCF](https://github.com/cncf/toc/blob/main/FAQ.md#what-is-the-definition-of-an-adopter).
+
+If you would like to be included in this table, please submit a PR to this file or comment to [this issue](https://github.com/opencost/opencost/issues/1831) and your information will be added.
+
+## Adopters
+
+| Organization                               | Product/Project Name              | Status                 | More Information           |
+| ------------------------------------------ | --------------------------------- | ---------------------- | -------------------------- |
+| Kubecost                                   | Kubecost Free/Business/Enterprise | Service Provider       | [Kubecost](https://kubecost.com) |
+| CloudAdmin                                 | *                                 | Service Provider       | [CloudAdmin](https://www.cloudadmin.io) |
+| National Information Solutions Cooperative | *                                 | end user               | [National Information Solutions Cooperative](https://www.nisc.coop) |
+| Grafana Labs                               | *                                 | end user               | [How Grafana Labs uses and contributes to OpenCost](https://grafana.com/blog/2023/02/02/how-grafana-labs-uses-and-contributes-to-opencost-the-open-source-project-for-real-time-cost-monitoring-in-kubernetes/) |
+| Microsoft                                  | *                                 | Service Provider       | [Leverage OpenCost on Azure Kubernetes Service](http://aka.ms/aks/OpenCost-AKS) |
+| mindcurv group                             | *                                 | Consultancy            | [mindcurv group](https://mindcurv.com/en/) |

+ 1 - 0
pkg/cloud/awsprovider.go

@@ -60,6 +60,7 @@ const (
 	AWSHourlyPublicIPCost    = 0.005
 	EKSCapacityTypeLabel     = "eks.amazonaws.com/capacityType"
 	EKSCapacitySpotTypeValue = "SPOT"
+	
 )
 
 var (

+ 1 - 0
pkg/cloud/gcpprovider.go

@@ -46,6 +46,7 @@ const (
 
 	GKEPreemptibleLabel = "cloud.google.com/gke-preemptible"
 	GKESpotLabel        = "cloud.google.com/gke-spot"
+	
 )
 
 // List obtained by installing the `gcloud` CLI tool,

+ 0 - 1
pkg/cloud/scalewayprovider.go

@@ -49,7 +49,6 @@ type Scaleway struct {
 func (c *Scaleway) PricingSourceSummary() interface{} {
 	return c.Pricing
 }
-
 func (c *Scaleway) DownloadPricingData() error {
 	c.DownloadPricingDataLock.Lock()
 	defer c.DownloadPricingDataLock.Unlock()

+ 1 - 0
pkg/cmd/costmodel/costmodel.go

@@ -43,6 +43,7 @@ func Execute(opts *CostModelOpts) error {
 	a.Router.GET("/healthz", Healthz)
 	a.Router.GET("/allocation", a.ComputeAllocationHandler)
 	a.Router.GET("/allocation/summary", a.ComputeAllocationHandlerSummary)
+	a.Router.GET("/assets", a.ComputeAssetsHandler)
 	rootMux.Handle("/", a.Router)
 	rootMux.Handle("/metrics", promhttp.Handler())
 	telemetryHandler := metrics.ResponseMetricMiddleware(rootMux)

+ 9 - 6
pkg/costmodel/allocation.go

@@ -115,7 +115,8 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 
 	// If the duration is short enough, compute the AllocationSet directly
 	if end.Sub(start) <= cm.MaxPrometheusQueryDuration {
-		return cm.computeAllocation(start, end, resolution)
+		as, _, err := cm.computeAllocation(start, end, resolution)
+		return as, err
 	}
 
 	// If the duration exceeds the configured MaxPrometheusQueryDuration, then
@@ -142,7 +143,7 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Dur
 		e = s.Add(duration)
 
 		// Compute the individual AllocationSet for just (s, e)
-		as, err := cm.computeAllocation(s, e, resolution)
+		as, _, err := cm.computeAllocation(s, e, resolution)
 		if err != nil {
 			return kubecost.NewAllocationSet(start, end), fmt.Errorf("error computing allocation for %s: %s", kubecost.NewClosedWindow(s, e), err)
 		}
@@ -307,7 +308,7 @@ func (cm *CostModel) DateRange() (time.Time, time.Time, error) {
 	return oldest, newest, nil
 }
 
-func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {
+func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, map[nodeKey]*nodePricing, error) {
 	// 1. Build out Pod map from resolution-tuned, batched Pod start/end query
 	// 2. Run and apply the results of the remaining queries to
 	// 3. Build out AllocationSet from completed Pod map
@@ -364,7 +365,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	// Query for the duration between start and end
 	durStr := timeutil.DurationString(end.Sub(start))
 	if durStr == "" {
-		return allocSet, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))
+		return allocSet, nil, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))
 	}
 
 	// Convert resolution duration to a query-ready string
@@ -536,6 +537,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
 	resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
 	resNodeIsSpot, _ := resChNodeIsSpot.Await()
+	nodeExtendedData, _ := queryExtendedNodeData(ctx, start, end, durStr, resStr)
 
 	resPVActiveMins, _ := resChPVActiveMins.Await()
 	resPVBytes, _ := resChPVBytes.Await()
@@ -580,7 +582,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 			log.Errorf("CostModel.ComputeAllocation: query context error %s", err)
 		}
 
-		return allocSet, ctx.ErrorCollection()
+		return allocSet, nil, ctx.ErrorCollection()
 	}
 
 	// We choose to apply allocation before requests in the cases of RAM and
@@ -681,6 +683,7 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 	applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
 	applyNodeSpot(nodeMap, resNodeIsSpot)
 	applyNodeDiscount(nodeMap, cm)
+	applyExtendedNodeData(nodeMap, nodeExtendedData)
 	cm.applyNodesToPod(podMap, nodeMap)
 
 	// (3) Build out AllocationSet from Pod map
@@ -699,5 +702,5 @@ func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Dur
 		}
 	}
 
-	return allocSet, nil
+	return allocSet, nodeMap, nil
 }

+ 4 - 1
pkg/costmodel/allocation_helpers.go

@@ -1642,7 +1642,10 @@ func (cm *CostModel) getNodePricing(nodeMap map[nodeKey]*nodePricing, nodeKey no
 		if nodeKey.Node != "" {
 			log.DedupedWarningf(5, "CostModel: failed to find node for %s", nodeKey)
 		}
-		return cm.getCustomNodePricing(false, "")
+		// since the node pricing data is not found, and this won't change for the duration of the allocation
+		// build process, we can update the node map with the defaults to prevent future failed lookups
+		nodeMap[nodeKey] = cm.getCustomNodePricing(false, "")
+		return nodeMap[nodeKey]
 	}
 
 	// If custom pricing is enabled and can be retrieved, override detected

+ 377 - 0
pkg/costmodel/allocation_incubating.go

@@ -0,0 +1,377 @@
+//go:build incubating
+
+package costmodel
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/opencost/opencost/pkg/env"
+	"github.com/opencost/opencost/pkg/kubecost"
+	"github.com/opencost/opencost/pkg/log"
+	"github.com/opencost/opencost/pkg/prom"
+)
+
+const (
+	queryFmtNodeCPUCores = `avg(avg_over_time(kube_node_status_capacity_cpu_cores[%s])) by (%s, node)`
+	queryFmtNodeRAMBytes = `avg(avg_over_time(kube_node_status_capacity_memory_bytes[%s])) by (%s, node)`
+	queryFmtNodeGPUCount = `avg(avg_over_time(node_gpu_count[%s])) by (%s, node, provider_id)`
+)
+
+// NodeTotals contains the cpu, ram, and gpu costs for a given node over a specific timeframe.
+type NodeTotals struct {
+	Start   time.Time
+	End     time.Time
+	Cluster string
+	Node    string
+	CPUCost float64
+	RAMCost float64
+	GPUCost float64
+}
+
+// ComputeAllocationWithNodeTotals uses the CostModel instance to compute an AllocationSet
+// for the window defined by the given start and end times. The Allocations returned are unaggregated
+// (i.e. down to the container level), and the node totals should contained additional data that can be
+// used to calculate the idle costs at the node level.
+func (cm *CostModel) ComputeAllocationWithNodeTotals(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, map[string]*NodeTotals, error) {
+	nodeMap := make(map[string]*NodeTotals)
+
+	// If the duration is short enough, compute the AllocationSet directly
+	if end.Sub(start) <= cm.MaxPrometheusQueryDuration {
+		as, nodeData, err := cm.computeAllocation(start, end, resolution)
+		appendNodeData(nodeMap, start, end, nodeData)
+
+		return as, nodeMap, err
+	}
+
+	// If the duration exceeds the configured MaxPrometheusQueryDuration, then
+	// query for maximum-sized AllocationSets, collect them, and accumulate.
+
+	// s and e track the coverage of the entire given window over multiple
+	// internal queries.
+	s, e := start, start
+
+	// Collect AllocationSets in a range, then accumulate
+	// TODO optimize by collecting consecutive AllocationSets, accumulating as we go
+	asr := kubecost.NewAllocationSetRange()
+
+	for e.Before(end) {
+		// By default, query for the full remaining duration. But do not let
+		// any individual query duration exceed the configured max Prometheus
+		// query duration.
+		duration := end.Sub(e)
+		if duration > cm.MaxPrometheusQueryDuration {
+			duration = cm.MaxPrometheusQueryDuration
+		}
+
+		// Set start and end parameters (s, e) for next individual computation.
+		e = s.Add(duration)
+
+		// Compute the individual AllocationSet for just (s, e)
+		as, nodeData, err := cm.computeAllocation(s, e, resolution)
+		appendNodeData(nodeMap, s, e, nodeData)
+		if err != nil {
+			return kubecost.NewAllocationSet(start, end), nodeMap, fmt.Errorf("error computing allocation for %s: %s", kubecost.NewClosedWindow(s, e), err)
+		}
+
+		// Append to the range
+		asr.Append(as)
+
+		// Set s equal to e to set up the next query, if one exists.
+		s = e
+	}
+
+	// Populate annotations, labels, and services on each Allocation. This is
+	// necessary because Properties.Intersection does not propagate any values
+	// stored in maps or slices for performance reasons. In this case, however,
+	// it is both acceptable and necessary to do so.
+	allocationAnnotations := map[string]map[string]string{}
+	allocationLabels := map[string]map[string]string{}
+	allocationServices := map[string]map[string]bool{}
+
+	// Also record errors and warnings, then append them to the results later.
+	errors := []string{}
+	warnings := []string{}
+
+	for _, as := range asr.Allocations {
+		for k, a := range as.Allocations {
+			if len(a.Properties.Annotations) > 0 {
+				if _, ok := allocationAnnotations[k]; !ok {
+					allocationAnnotations[k] = map[string]string{}
+				}
+				for name, val := range a.Properties.Annotations {
+					allocationAnnotations[k][name] = val
+				}
+			}
+
+			if len(a.Properties.Labels) > 0 {
+				if _, ok := allocationLabels[k]; !ok {
+					allocationLabels[k] = map[string]string{}
+				}
+				for name, val := range a.Properties.Labels {
+					allocationLabels[k][name] = val
+				}
+			}
+
+			if len(a.Properties.Services) > 0 {
+				if _, ok := allocationServices[k]; !ok {
+					allocationServices[k] = map[string]bool{}
+				}
+				for _, val := range a.Properties.Services {
+					allocationServices[k][val] = true
+				}
+			}
+		}
+
+		errors = append(errors, as.Errors...)
+		warnings = append(warnings, as.Warnings...)
+	}
+
+	// Accumulate to yield the result AllocationSet. After this step, we will
+	// be nearly complete, but without the raw allocation data, which must be
+	// recomputed.
+	resultASR, err := asr.Accumulate(kubecost.AccumulateOptionAll)
+	if err != nil {
+		return kubecost.NewAllocationSet(start, end), nil, fmt.Errorf("error accumulating data for %s: %s", kubecost.NewClosedWindow(s, e), err)
+	}
+	if resultASR != nil && len(resultASR.Allocations) == 0 {
+		return kubecost.NewAllocationSet(start, end), nil, nil
+	}
+	if length := len(resultASR.Allocations); length != 1 {
+		return kubecost.NewAllocationSet(start, end), nil, fmt.Errorf("expected 1 accumulated allocation set, found %d sets", length)
+	}
+	result := resultASR.Allocations[0]
+
+	// Apply the annotations, labels, and services to the post-accumulation
+	// results. (See above for why this is necessary.)
+	for k, a := range result.Allocations {
+		if annotations, ok := allocationAnnotations[k]; ok {
+			a.Properties.Annotations = annotations
+		}
+
+		if labels, ok := allocationLabels[k]; ok {
+			a.Properties.Labels = labels
+		}
+
+		if services, ok := allocationServices[k]; ok {
+			a.Properties.Services = []string{}
+			for s := range services {
+				a.Properties.Services = append(a.Properties.Services, s)
+			}
+		}
+
+		// Expand the Window of all Allocations within the AllocationSet
+		// to match the Window of the AllocationSet, which gets expanded
+		// at the end of this function.
+		a.Window = a.Window.ExpandStart(start).ExpandEnd(end)
+	}
+
+	// Maintain RAM and CPU max usage values by iterating over the range,
+	// computing maximums on a rolling basis, and setting on the result set.
+	for _, as := range asr.Allocations {
+		for key, alloc := range as.Allocations {
+			resultAlloc := result.Get(key)
+			if resultAlloc == nil {
+				continue
+			}
+
+			if resultAlloc.RawAllocationOnly == nil {
+				resultAlloc.RawAllocationOnly = &kubecost.RawAllocationOnlyData{}
+			}
+
+			if alloc.RawAllocationOnly == nil {
+				// This will happen inevitably for unmounted disks, but should
+				// ideally not happen for any allocation with CPU and RAM data.
+				if !alloc.IsUnmounted() {
+					log.DedupedWarningf(10, "ComputeAllocation: raw allocation data missing for %s", key)
+				}
+				continue
+			}
+
+			if alloc.RawAllocationOnly.CPUCoreUsageMax > resultAlloc.RawAllocationOnly.CPUCoreUsageMax {
+				resultAlloc.RawAllocationOnly.CPUCoreUsageMax = alloc.RawAllocationOnly.CPUCoreUsageMax
+			}
+
+			if alloc.RawAllocationOnly.RAMBytesUsageMax > resultAlloc.RawAllocationOnly.RAMBytesUsageMax {
+				resultAlloc.RawAllocationOnly.RAMBytesUsageMax = alloc.RawAllocationOnly.RAMBytesUsageMax
+			}
+		}
+	}
+
+	// Expand the window to match the queried time range.
+	result.Window = result.Window.ExpandStart(start).ExpandEnd(end)
+
+	// Append errors and warnings
+	result.Errors = errors
+	result.Warnings = warnings
+
+	return result, nodeMap, nil
+}
+
+func appendNodeData(nodeMap map[string]*NodeTotals, s, e time.Time, nodeData map[nodeKey]*nodePricing) {
+	for k, v := range nodeData {
+		key := k.String()
+		if _, ok := nodeMap[key]; !ok {
+			nodeMap[key] = &NodeTotals{
+				Start:   s,
+				End:     e,
+				Cluster: k.Cluster,
+				Node:    k.Node,
+				CPUCost: 0.0,
+				RAMCost: 0.0,
+				GPUCost: 0.0,
+			}
+		}
+
+		hours := e.Sub(s).Hours()
+
+		// NOTE: These theoretically shouldn't overlap due to the way the
+		// NOTE: metrics are accumulated, so this logic is safe.
+		if s.Before(nodeMap[key].Start) {
+			nodeMap[key].Start = s
+		}
+		if e.After(nodeMap[key].End) {
+			nodeMap[key].End = e
+		}
+		nodeMap[key].CPUCost += v.CPUCores * (v.CostPerCPUHr * hours)
+		nodeMap[key].RAMCost += v.RAMGiB * (v.CostPerRAMGiBHr * hours)
+		nodeMap[key].GPUCost += v.GPUCount * (v.CostPerGPUHr * hours)
+	}
+}
+
+// extendedNodeQueryResults is a place holder data type for the incubating
+// feature for extending the node details that can be returned with allocation
+// data
+type extendedNodeQueryResults struct {
+	nodeCPUCoreResults  []*prom.QueryResult
+	nodeRAMByteResults  []*prom.QueryResult
+	nodeGPUCountResults []*prom.QueryResult
+}
+
+// queryExtendedNodeData makes additional prometheus queries for node data to append on
+// the AllocationNodePricing struct.
+func queryExtendedNodeData(ctx *prom.Context, start, end time.Time, durStr, resStr string) (*extendedNodeQueryResults, error) {
+	queryNodeCPUCores := fmt.Sprintf(queryFmtNodeCPUCores, durStr, env.GetPromClusterLabel())
+	resChQueryNodeCPUCores := ctx.QueryAtTime(queryNodeCPUCores, end)
+
+	queryNodeRAMBytes := fmt.Sprintf(queryFmtNodeRAMBytes, durStr, env.GetPromClusterLabel())
+	resChQueryNodeRAMBytes := ctx.QueryAtTime(queryNodeRAMBytes, end)
+
+	queryNodeGPUCount := fmt.Sprintf(queryFmtNodeGPUCount, durStr, env.GetPromClusterLabel())
+	resChQueryNodeGPUCount := ctx.QueryAtTime(queryNodeGPUCount, end)
+
+	nodeCPUCoreResults, _ := resChQueryNodeCPUCores.Await()
+	nodeRAMByteResults, _ := resChQueryNodeRAMBytes.Await()
+	nodeGPUCountResults, _ := resChQueryNodeGPUCount.Await()
+
+	return &extendedNodeQueryResults{
+		nodeCPUCoreResults:  nodeCPUCoreResults,
+		nodeRAMByteResults:  nodeRAMByteResults,
+		nodeGPUCountResults: nodeGPUCountResults,
+	}, nil
+}
+
+// applyExtendedNodeData is a place holder function for the incubating feature
+// which appends additional node data to the given node map
+func applyExtendedNodeData(nodeMap map[nodeKey]*nodePricing, results *extendedNodeQueryResults) {
+	if results == nil {
+		log.Warnf("Extended Node Results were nil. Ignoring...")
+		return
+	}
+
+	applyNodeCPUCores(nodeMap, results.nodeCPUCoreResults)
+	applyNodeRAMBytes(nodeMap, results.nodeRAMByteResults)
+	applyNodeGPUCount(nodeMap, results.nodeGPUCountResults)
+}
+
+func applyNodeCPUCores(nodeMap map[nodeKey]*nodePricing, nodeCPUCoreResults []*prom.QueryResult) {
+	for _, res := range nodeCPUCoreResults {
+		cluster, err := res.GetString(env.GetPromClusterLabel())
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		node, err := res.GetString("node")
+		if err != nil {
+			log.Warnf("CostModel.ComputeAllocation: Node CPU Cores query result missing field: %s", err)
+			continue
+		}
+
+		key := newNodeKey(cluster, node)
+		if _, ok := nodeMap[key]; !ok {
+			log.Warnf("Unexpectedly found node key that doesn't exist: %s-%s", cluster, node)
+			nodeMap[key] = &nodePricing{
+				Name: node,
+			}
+		}
+
+		nodeMap[key].CPUCores = res.Values[0].Value
+	}
+}
+
+func applyNodeRAMBytes(nodeMap map[nodeKey]*nodePricing, nodeRAMByteResults []*prom.QueryResult) {
+	for _, res := range nodeRAMByteResults {
+		cluster, err := res.GetString(env.GetPromClusterLabel())
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		node, err := res.GetString("node")
+		if err != nil {
+			log.Warnf("CostModel.ComputeAllocation: Node CPU Cores query result missing field: %s", err)
+			continue
+		}
+
+		key := newNodeKey(cluster, node)
+		if _, ok := nodeMap[key]; !ok {
+			log.Warnf("Unexpectedly found node key that doesn't exist: %s-%s", cluster, node)
+			nodeMap[key] = &nodePricing{
+				Name: node,
+			}
+		}
+
+		nodeMap[key].RAMGiB = res.Values[0].Value / 1024.0 / 1024.0 / 1024.0
+	}
+}
+
+func applyNodeGPUCount(nodeMap map[nodeKey]*nodePricing, nodeGPUCountResults []*prom.QueryResult) {
+	for _, res := range nodeGPUCountResults {
+		cluster, err := res.GetString(env.GetPromClusterLabel())
+		if err != nil {
+			cluster = env.GetClusterID()
+		}
+
+		node, err := res.GetString("node")
+		if err != nil {
+			log.Warnf("CostModel.ComputeAllocation: Node CPU Cores query result missing field: %s", err)
+			continue
+		}
+
+		key := newNodeKey(cluster, node)
+		if _, ok := nodeMap[key]; !ok {
+			log.Warnf("Unexpectedly found node key that doesn't exist: %s-%s", cluster, node)
+			nodeMap[key] = &nodePricing{
+				Name: node,
+			}
+		}
+
+		nodeMap[key].GPUCount = res.Values[0].Value
+	}
+}
+
+// nodePricing describes the resource costs associated with a given node,
+// as well as the source of the information (e.g. prometheus, custom)
+type nodePricing struct {
+	Name            string
+	NodeType        string
+	ProviderID      string
+	Preemptible     bool
+	CPUCores        float64
+	CostPerCPUHr    float64
+	RAMGiB          float64
+	CostPerRAMGiBHr float64
+	GPUCount        float64
+	CostPerGPUHr    float64
+	Discount        float64
+	Source          string
+}

+ 0 - 14
pkg/costmodel/allocation_types.go

@@ -212,17 +212,3 @@ type lbCost struct {
 	Start     time.Time
 	End       time.Time
 }
-
-// NodePricing describes the resource costs associated with a given node, as
-// well as the source of the information (e.g. prometheus, custom)
-type nodePricing struct {
-	Name            string
-	NodeType        string
-	ProviderID      string
-	Preemptible     bool
-	CostPerCPUHr    float64
-	CostPerRAMGiBHr float64
-	CostPerGPUHr    float64
-	Discount        float64
-	Source          string
-}

+ 46 - 0
pkg/costmodel/allocationnode.go

@@ -0,0 +1,46 @@
+//go:build !incubating
+
+package costmodel
+
+import (
+	"time"
+
+	"github.com/opencost/opencost/pkg/prom"
+)
+
+// These implementations are placeholders to allow conditional compilation of
+// incubating features to be enabled specifically without introducing conflicts
+// with the existing codebase. Since go only supports file scoped conditional
+// compilation, we need to define these no-op functions in a separate file.
+
+// Once a change is approved to move from incubation to a feature, the methods
+// defined here can be moved to the calling file and the build tag removed.
+
+// ExtendedNodeQueryResults is a place holder data type for the incubating
+// feature for extending the node details that can be returned with allocation
+// data
+type extendedNodeQueryResults struct{}
+
+// queryExtendedNodeData is a place holder function for the incubating feature
+func queryExtendedNodeData(ctx *prom.Context, start, end time.Time, durStr, resStr string) (*extendedNodeQueryResults, error) {
+	return &extendedNodeQueryResults{}, nil
+}
+
+// applyExtendedNodeData is a place holder function for the incubating feature
+// which appends additional node data to the given node map
+func applyExtendedNodeData(nodeMap map[nodeKey]*nodePricing, results *extendedNodeQueryResults) {
+}
+
+// nodePricing describes the resource costs associated with a given node,
+// as well as the source of the information (e.g. prometheus, custom)
+type nodePricing struct {
+	Name            string
+	NodeType        string
+	ProviderID      string
+	Preemptible     bool
+	CostPerCPUHr    float64
+	CostPerRAMGiBHr float64
+	CostPerGPUHr    float64
+	Discount        float64
+	Source          string
+}

+ 34 - 0
pkg/costmodel/handlers.go

@@ -0,0 +1,34 @@
+package costmodel
+
+import (
+	"fmt"
+	"net/http"
+
+	"github.com/julienschmidt/httprouter"
+	"github.com/opencost/opencost/pkg/env"
+	"github.com/opencost/opencost/pkg/kubecost"
+	"github.com/opencost/opencost/pkg/util/httputil"
+)
+
+// ComputeAllocationHandler returns the assets from the CostModel.
+func (a *Accesses) ComputeAssetsHandler(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	w.Header().Set("Content-Type", "application/json")
+
+	qp := httputil.NewQueryParams(r.URL.Query())
+
+	// Window is a required field describing the window of time over which to
+	// compute allocation data.
+	window, err := kubecost.ParseWindowWithOffset(qp.Get("window", ""), env.GetParsedUTCOffset())
+	if err != nil {
+		http.Error(w, fmt.Sprintf("Invalid 'window' parameter: %s", err), http.StatusBadRequest)
+		return
+	}
+
+	assetSet, err := a.Model.ComputeAssets(*window.Start(), *window.End())
+	if err != nil {
+		http.Error(w, fmt.Sprintf("Error computing asset set: %s", err), http.StatusInternalServerError)
+		return
+	}
+
+	w.Write(WrapData(assetSet, nil))
+}

+ 64 - 64
pkg/costmodel/metrics.go

@@ -144,131 +144,131 @@ func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider model
 
 	metricsInit.Do(func() {
 
+		cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "node_cpu_hourly_cost",
+			Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
+		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 		if _, disabled := disabledMetrics["node_cpu_hourly_cost"]; !disabled {
-			cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "node_cpu_hourly_cost",
-				Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
-			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 			toRegisterGV = append(toRegisterGV, cpuGv)
 		}
 
+		ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "node_ram_hourly_cost",
+			Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
+		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 		if _, disabled := disabledMetrics["node_ram_hourly_cost"]; !disabled {
-			ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "node_ram_hourly_cost",
-				Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
-			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 			toRegisterGV = append(toRegisterGV, ramGv)
 		}
 
+		gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "node_gpu_hourly_cost",
+			Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
+		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 		if _, disabled := disabledMetrics["node_gpu_hourly_cost"]; !disabled {
-			gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "node_gpu_hourly_cost",
-				Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
-			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 			toRegisterGV = append(toRegisterGV, gpuGv)
 		}
 
+		gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "node_gpu_count",
+			Help: "node_gpu_count count of gpu on this node",
+		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 		if _, disabled := disabledMetrics["node_gpu_count"]; !disabled {
-			gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "node_gpu_count",
-				Help: "node_gpu_count count of gpu on this node",
-			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 			toRegisterGV = append(toRegisterGV, gpuCountGv)
 		}
 
+		pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "pv_hourly_cost",
+			Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
+		}, []string{"volumename", "persistentvolume", "provider_id"})
 		if _, disabled := disabledMetrics["pv_hourly_cost"]; !disabled {
-			pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "pv_hourly_cost",
-				Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
-			}, []string{"volumename", "persistentvolume", "provider_id"})
 			toRegisterGV = append(toRegisterGV, pvGv)
 		}
 
+		spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "kubecost_node_is_spot",
+			Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
+		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 		if _, disabled := disabledMetrics["kubecost_node_is_spot"]; !disabled {
-			spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "kubecost_node_is_spot",
-				Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
-			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 			toRegisterGV = append(toRegisterGV, spotGv)
 		}
 
+		totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "node_total_hourly_cost",
+			Help: "node_total_hourly_cost Total node cost per hour",
+		}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 		if _, disabled := disabledMetrics["node_total_hourly_cost"]; !disabled {
-			totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "node_total_hourly_cost",
-				Help: "node_total_hourly_cost Total node cost per hour",
-			}, []string{"instance", "node", "instance_type", "region", "provider_id"})
 			toRegisterGV = append(toRegisterGV, totalGv)
 		}
 
+		ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "container_memory_allocation_bytes",
+			Help: "container_memory_allocation_bytes Bytes of RAM used",
+		}, []string{"namespace", "pod", "container", "instance", "node"})
 		if _, disabled := disabledMetrics["container_memory_allocation_bytes"]; !disabled {
-			ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "container_memory_allocation_bytes",
-				Help: "container_memory_allocation_bytes Bytes of RAM used",
-			}, []string{"namespace", "pod", "container", "instance", "node"})
 			toRegisterGV = append(toRegisterGV, ramAllocGv)
 		}
 
+		cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "container_cpu_allocation",
+			Help: "container_cpu_allocation Percent of a single CPU used in a minute",
+		}, []string{"namespace", "pod", "container", "instance", "node"})
 		if _, disabled := disabledMetrics["container_cpu_allocation"]; !disabled {
-			cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "container_cpu_allocation",
-				Help: "container_cpu_allocation Percent of a single CPU used in a minute",
-			}, []string{"namespace", "pod", "container", "instance", "node"})
 			toRegisterGV = append(toRegisterGV, cpuAllocGv)
 		}
 
+		gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "container_gpu_allocation",
+			Help: "container_gpu_allocation GPU used",
+		}, []string{"namespace", "pod", "container", "instance", "node"})
 		if _, disabled := disabledMetrics["container_gpu_allocation"]; !disabled {
-			gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "container_gpu_allocation",
-				Help: "container_gpu_allocation GPU used",
-			}, []string{"namespace", "pod", "container", "instance", "node"})
 			toRegisterGV = append(toRegisterGV, gpuAllocGv)
 		}
 
+		pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "pod_pvc_allocation",
+			Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
+		}, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
 		if _, disabled := disabledMetrics["pod_pvc_allocation"]; !disabled {
-			pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "pod_pvc_allocation",
-				Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
-			}, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
 			toRegisterGV = append(toRegisterGV, pvAllocGv)
 		}
 
+		networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+			Name: "kubecost_network_zone_egress_cost",
+			Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
+		})
 		if _, disabled := disabledMetrics["kubecost_network_zone_egress_cost"]; !disabled {
-			networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-				Name: "kubecost_network_zone_egress_cost",
-				Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
-			})
 			toRegisterGauge = append(toRegisterGauge, networkZoneEgressCostG)
 		}
 
+		networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+			Name: "kubecost_network_region_egress_cost",
+			Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
+		})
 		if _, disabled := disabledMetrics["kubecost_network_region_egress_cost"]; !disabled {
-			networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-				Name: "kubecost_network_region_egress_cost",
-				Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
-			})
 			toRegisterGauge = append(toRegisterGauge, networkRegionEgressCostG)
 		}
 
+		networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
+			Name: "kubecost_network_internet_egress_cost",
+			Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
+		})
 		if _, disabled := disabledMetrics["kubecost_network_internet_egress_cost"]; !disabled {
-			networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
-				Name: "kubecost_network_internet_egress_cost",
-				Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
-			})
 			toRegisterGauge = append(toRegisterGauge, networkInternetEgressCostG)
 		}
 
+		clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+			Name: "kubecost_cluster_management_cost",
+			Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
+		}, []string{"provisioner_name"})
 		if _, disabled := disabledMetrics["kubecost_cluster_management_cost"]; !disabled {
-			clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
-				Name: "kubecost_cluster_management_cost",
-				Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
-			}, []string{"provisioner_name"})
 			toRegisterGV = append(toRegisterGV, clusterManagementCostGv)
 		}
 
+		lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
+			Name: "kubecost_load_balancer_cost",
+			Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
+		}, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
 		if _, disabled := disabledMetrics["kubecost_load_balancer_cost"]; !disabled {
-			lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
-				Name: "kubecost_load_balancer_cost",
-				Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
-			}, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
 			toRegisterGV = append(toRegisterGV, lbCostGv)
 		}
 

+ 1 - 1
pkg/kubecost/allocationprops.go

@@ -105,7 +105,7 @@ type AllocationProperties struct {
 	Annotations    AllocationAnnotations `json:"annotations,omitempty"`
 	// When set to true, maintain the intersection of all labels + annotations
 	// in the aggregated AllocationProperties object
-	AggregatedMetadata bool `json:"-"`
+	AggregatedMetadata bool `json:"-"` //@bingen:field[ignore]
 }
 
 // AllocationLabels is a schema-free mapping of key/value pairs that can be

+ 26 - 0
pkg/kubecost/asset.go

@@ -4,6 +4,7 @@ import (
 	"encoding"
 	"fmt"
 	"math"
+	"regexp"
 	"strings"
 	"time"
 
@@ -3792,3 +3793,28 @@ func contains(slice []string, item string) bool {
 	}
 	return false
 }
+
+func GetNodePoolName(provider string, labels map[string]string) string {
+
+	switch provider {
+	case AzureProvider:
+		return getPoolNameHelper(AKSNodepoolLabel, labels)
+	case AWSProvider:
+		return getPoolNameHelper(EKSNodepoolLabel, labels)
+	case GCPProvider:
+		return getPoolNameHelper(GKENodePoolLabel, labels)
+	default:
+		log.Warnf("node pool name not supported for this provider")
+		return ""
+	}
+}
+
+func getPoolNameHelper(label string, labels map[string]string) string {
+	sanitizedLabel := regexp.MustCompile(`[^a-zA-Z0-9 ]+`).ReplaceAllString(label, "_")
+	if poolName, found := labels[fmt.Sprintf("label_%s", sanitizedLabel)]; found {
+		return poolName
+	} else {
+		log.Warnf("unable to derive node pool name from node labels")
+		return ""
+	}
+}

+ 4 - 0
pkg/kubecost/asset_json.go

@@ -477,6 +477,9 @@ func (n *Node) MarshalJSON() ([]byte, error) {
 	jsonEncodeString(buffer, "end", n.End.Format(time.RFC3339), ",")
 	jsonEncodeFloat64(buffer, "minutes", n.Minutes(), ",")
 	jsonEncodeString(buffer, "nodeType", n.NodeType, ",")
+	if poolName := GetNodePoolName(n.Properties.Provider, n.Labels); poolName != "" {
+		jsonEncodeString(buffer, "pool", poolName, ",")
+	}
 	jsonEncodeFloat64(buffer, "cpuCores", n.CPUCores(), ",")
 	jsonEncodeFloat64(buffer, "ramBytes", n.RAMBytes(), ",")
 	jsonEncodeFloat64(buffer, "cpuCoreHours", n.CPUCoreHours, ",")
@@ -561,6 +564,7 @@ func (n *Node) InterfaceToNode(itf interface{}) error {
 	if NodeType, err := getTypedVal(fmap["nodeType"]); err == nil {
 		n.NodeType = NodeType.(string)
 	}
+	
 	if CPUCoreHours, err := getTypedVal(fmap["cpuCoreHours"]); err == nil {
 		n.CPUCoreHours = CPUCoreHours.(float64)
 	}

+ 9 - 0
pkg/kubecost/assetprops.go

@@ -118,12 +118,21 @@ const OtherCategory = "Other"
 // AWSProvider describes the provider AWS
 const AWSProvider = "AWS"
 
+// describes how AWS labels nodepool nodes
+const EKSNodepoolLabel = "eks.amazonaws.com/nodegroup"
+
 // GCPProvider describes the provider GCP
 const GCPProvider = "GCP"
 
+// describes how nodepool nodes are labeled in GKE
+const GKENodePoolLabel = "cloud.google.com/gke-nodepool"
+
 // AzureProvider describes the provider Azure
 const AzureProvider = "Azure"
 
+// describes how Azure labels nodepool nodes
+const AKSNodepoolLabel = "kubernetes.azure.com/agentpool"
+
 // AlibabaProvider describes the provider for Alibaba Cloud
 const AlibabaProvider = "Alibaba"