Просмотр исходного кода

Merge branch 'develop' into perf/align-step-range-queries

Ajay Tripathy 3 лет назад
Родитель
Сommit
bbc56e3e00

+ 2 - 0
configs/pricing_schema.csv

@@ -1,2 +1,4 @@
 EndTimestamp,InstanceID,Region,AssetClass,InstanceIDField,InstanceType,MarketPriceHourly,Version
 2019-04-17 23:34:22 UTC,gke-standard-cluster-1-pool-1-91dc432d-cg69,,node,metadata.name,,0.1337,
+2019-04-17 23:34:22 UTC,Quadro_RTX_4000,,gpu,nvidia.com/gpu_type,,0.75,
+2019-04-17 23:34:22 UTC,Quadro_RTX_4001,,gpu,gpu.nvidia.com/class,,0.80,

+ 4 - 0
pkg/cloud/awsprovider.go

@@ -612,6 +612,10 @@ type awsKey struct {
 	ProviderID     string
 }
 
+func (k *awsKey) GPUCount() int {
+	return 0
+}
+
 func (k *awsKey) GPUType() string {
 	return ""
 }

+ 4 - 0
pkg/cloud/azureprovider.go

@@ -419,6 +419,10 @@ func (k *azureKey) Features() string {
 	return fmt.Sprintf("%s,%s,%s", region, instance, usageType)
 }
 
+func (k *azureKey) GPUCount() int {
+	return 0
+}
+
 // GPUType returns value of GPULabel if present
 func (k *azureKey) GPUType() string {
 	if t, ok := k.Labels[k.GPULabel]; ok {

+ 64 - 7
pkg/cloud/csvprovider.go

@@ -33,6 +33,8 @@ type CSVProvider struct {
 	NodeMapField            string
 	PricingPV               map[string]*price
 	PVMapField              string
+	GPUClassPricing         map[string]*price
+	GPUMapFields            []string // Fields in a node's labels that represent the GPU class.
 	UsesRegion              bool
 	DownloadPricingDataLock sync.RWMutex
 }
@@ -59,6 +61,8 @@ func (c *CSVProvider) DownloadPricingData() error {
 	nodeclasspricing := make(map[string]float64)
 	nodeclasscount := make(map[string]float64)
 	pvpricing := make(map[string]*price)
+	gpupricing := make(map[string]*price)
+	c.GPUMapFields = make([]string, 0, 1)
 	header, err := csvutil.Header(price{}, "csv")
 	if err != nil {
 		return err
@@ -87,6 +91,7 @@ func (c *CSVProvider) DownloadPricingData() error {
 			c.NodeClassPricing = nodeclasspricing
 			c.NodeClassCount = nodeclasscount
 			c.PricingPV = pvpricing
+			c.GPUClassPricing = gpupricing
 			return fmt.Errorf("Invalid s3 URI: %s", c.CSVLocation)
 		}
 	} else {
@@ -98,6 +103,7 @@ func (c *CSVProvider) DownloadPricingData() error {
 		c.NodeClassPricing = nodeclasspricing
 		c.NodeClassCount = nodeclasscount
 		c.PricingPV = pvpricing
+		c.GPUClassPricing = gpupricing
 		return nil
 	}
 	csvReader := csv.NewReader(csvr)
@@ -110,6 +116,7 @@ func (c *CSVProvider) DownloadPricingData() error {
 		c.NodeClassPricing = nodeclasspricing
 		c.NodeClassCount = nodeclasscount
 		c.PricingPV = pvpricing
+		c.GPUClassPricing = gpupricing
 		return err
 	}
 	for {
@@ -163,6 +170,9 @@ func (c *CSVProvider) DownloadPricingData() error {
 			}
 
 			c.NodeMapField = p.InstanceIDField
+		} else if p.AssetClass == "gpu" {
+			gpupricing[key] = &p
+			c.GPUMapFields = append(c.GPUMapFields, strings.ToLower(p.InstanceIDField))
 		} else {
 			log.Infof("Unrecognized asset class %s, defaulting to node", p.AssetClass)
 			pricing[key] = &p
@@ -174,6 +184,7 @@ func (c *CSVProvider) DownloadPricingData() error {
 		c.NodeClassPricing = nodeclasspricing
 		c.NodeClassCount = nodeclasscount
 		c.PricingPV = pvpricing
+		c.GPUClassPricing = gpupricing
 	} else {
 		log.DedupedWarningf(5, "No data received from csv at %s", c.CSVLocation)
 	}
@@ -183,6 +194,8 @@ func (c *CSVProvider) DownloadPricingData() error {
 type csvKey struct {
 	Labels     map[string]string
 	ProviderID string
+	GPULabel   []string
+	GPU        int64
 }
 
 func (k *csvKey) Features() string {
@@ -192,7 +205,17 @@ func (k *csvKey) Features() string {
 
 	return region + "," + instanceType + "," + class
 }
+
+func (k *csvKey) GPUCount() int {
+	return int(k.GPU)
+}
+
 func (k *csvKey) GPUType() string {
+	for _, label := range k.GPULabel {
+		if val, ok := k.Labels[label]; ok {
+			return val
+		}
+	}
 	return ""
 }
 func (k *csvKey) ID() string {
@@ -202,30 +225,56 @@ func (k *csvKey) ID() string {
 func (c *CSVProvider) NodePricing(key Key) (*Node, error) {
 	c.DownloadPricingDataLock.RLock()
 	defer c.DownloadPricingDataLock.RUnlock()
+	var node *Node
 	if p, ok := c.Pricing[key.ID()]; ok {
-		return &Node{
+		node = &Node{
 			Cost:        p.MarketPriceHourly,
 			PricingType: CsvExact,
-		}, nil
+		}
 	}
 	s := strings.Split(key.ID(), ",") // Try without a region to be sure
 	if len(s) == 2 {
 		if p, ok := c.Pricing[s[1]]; ok {
-			return &Node{
+			node = &Node{
 				Cost:        p.MarketPriceHourly,
 				PricingType: CsvExact,
-			}, nil
+			}
 		}
 	}
 	classKey := key.Features() // Use node attributes to try and do a class match
 	if cost, ok := c.NodeClassPricing[classKey]; ok {
 		log.Infof("Unable to find provider ID `%s`, using features:`%s`", key.ID(), key.Features())
-		return &Node{
+		node = &Node{
 			Cost:        fmt.Sprintf("%f", cost),
 			PricingType: CsvClass,
-		}, nil
+		}
+	}
+
+	if node != nil {
+		if t := key.GPUType(); t != "" {
+			t = strings.ToLower(t)
+			count := key.GPUCount()
+			node.GPU = strconv.Itoa(count)
+			hourly := 0.0
+			if p, ok := c.GPUClassPricing[t]; ok {
+				var err error
+				hourly, err = strconv.ParseFloat(p.MarketPriceHourly, 64)
+				if err != nil {
+					log.Errorf("Unable to parse %s as float", p.MarketPriceHourly)
+				}
+			}
+			totalCost := hourly * float64(count)
+			node.GPUCost = fmt.Sprintf("%f", totalCost)
+			nc, err := strconv.ParseFloat(node.Cost, 64)
+			if err != nil {
+				log.Errorf("Unable to parse %s as float", node.Cost)
+			}
+			node.Cost = fmt.Sprintf("%f", nc+totalCost)
+		}
+		return node, nil
+	} else {
+		return nil, fmt.Errorf("Unable to find Node matching `%s`:`%s`", key.ID(), key.Features())
 	}
-	return nil, fmt.Errorf("Unable to find Node matching `%s`:`%s`", key.ID(), key.Features())
 }
 
 func NodeValueFromMapField(m string, n *v1.Node, useRegion bool) string {
@@ -299,9 +348,16 @@ func PVValueFromMapField(m string, n *v1.PersistentVolume) string {
 
 func (c *CSVProvider) GetKey(l map[string]string, n *v1.Node) Key {
 	id := NodeValueFromMapField(c.NodeMapField, n, c.UsesRegion)
+	var gpuCount int64
+	gpuCount = 0
+	if gpuc, ok := n.Status.Capacity["nvidia.com/gpu"]; ok { // TODO: support non-nvidia GPUs
+		gpuCount = gpuc.Value()
+	}
 	return &csvKey{
 		ProviderID: id,
 		Labels:     l,
+		GPULabel:   c.GPUMapFields,
+		GPU:        gpuCount,
 	}
 }
 
@@ -368,3 +424,4 @@ func (c *CSVProvider) CombinedDiscountForNode(instanceType string, isPreemptible
 func (c *CSVProvider) Regions() []string {
 	return []string{}
 }
+

+ 4 - 0
pkg/cloud/customprovider.go

@@ -277,6 +277,10 @@ func (*CustomProvider) GetPVKey(pv *v1.PersistentVolume, parameters map[string]s
 	}
 }
 
+func (k *customProviderKey) GPUCount() int {
+	return 0
+}
+
 func (cpk *customProviderKey) GPUType() string {
 	if t, ok := cpk.Labels[cpk.GPULabel]; ok {
 		return t

+ 4 - 0
pkg/cloud/gcpprovider.go

@@ -1261,6 +1261,10 @@ func (gcp *gcpKey) ID() string {
 	return ""
 }
 
+func (k *gcpKey) GPUCount() int {
+	return 0
+}
+
 func (gcp *gcpKey) GPUType() string {
 	if t, ok := gcp.Labels[GKE_GPU_TAG]; ok {
 		usageType := getUsageType(gcp.Labels)

+ 2 - 1
pkg/cloud/provider.go

@@ -118,7 +118,8 @@ type PV struct {
 type Key interface {
 	ID() string       // ID represents an exact match
 	Features() string // Features are a comma separated string of node metadata that could match pricing
-	GPUType() string  // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
+	GPUType() string  // GPUType returns "" if no GPU exists or GPUs, but the name of the GPU otherwise
+	GPUCount() int    // GPUCount returns 0 if no GPU exists or GPUs, but the number of attached GPUs otherwise
 }
 
 type PVKey interface {

+ 4 - 0
pkg/cloud/scalewayprovider.go

@@ -104,6 +104,10 @@ func (k *scalewayKey) Features() string {
 	return zone + "," + instanceType
 }
 
+func (k *scalewayKey) GPUCount() int {
+	return 0
+}
+
 func (k *scalewayKey) GPUType() string {
 	instanceType, _ := util.GetInstanceType(k.Labels)
 	if strings.HasPrefix(instanceType, "RENDER") || strings.HasPrefix(instanceType, "GPU") {

+ 45 - 6
pkg/costmodel/allocation_helpers.go

@@ -2,6 +2,11 @@ package costmodel
 
 import (
 	"fmt"
+	"math"
+	"strconv"
+	"strings"
+	"time"
+
 	"github.com/opencost/opencost/pkg/cloud"
 	"github.com/opencost/opencost/pkg/env"
 	"github.com/opencost/opencost/pkg/kubecost"
@@ -9,10 +14,6 @@ import (
 	"github.com/opencost/opencost/pkg/prom"
 	"github.com/opencost/opencost/pkg/util/timeutil"
 	"k8s.io/apimachinery/pkg/labels"
-	"math"
-	"strconv"
-	"strings"
-	"time"
 )
 
 // This is a bit of a hack to work around garbage data from cadvisor
@@ -1794,7 +1795,9 @@ func buildPodPVCMap(podPVCMap map[podKey][]*pvc, pvMap map[pvKey]*pv, pvcMap map
 
 			if pod, ok := podMap[key]; !ok || len(pod.Allocations) <= 0 {
 				log.DedupedWarningf(10, "CostModel.ComputeAllocation: pvc %s for missing pod %s", pvcKey, key)
+				continue
 			}
+
 			pvc.Mounted = true
 
 			podPVCMap[key] = append(podPVCMap[key], pvc)
@@ -1859,7 +1862,8 @@ func applyPVCsToPods(window kubecost.Window, podMap map[podKey]*pod, podPVCMap m
 			// If pod does not exist or the pod does not have any allocations
 			// get unmounted pod for cluster
 			if !ok2 || len(pod.Allocations) == 0 {
-				pod = getUnmountedPodForCluster(window, podMap, pvc.Cluster)
+				// Get namespace unmounted pod, as pvc will have a namespace
+				pod = getUnmountedPodForNamespace(window, podMap, pvc.Cluster, pvc.Namespace)
 			}
 			for _, alloc := range pod.Allocations {
 				s, e := pod.Start, pod.End
@@ -1909,6 +1913,8 @@ func applyUnmountedPVs(window kubecost.Window, podMap map[podKey]*pod, pvMap map
 		}
 
 		if !mounted {
+
+			// a pv without a pvc will not have a namespace, so get the cluster unmounted pod
 			pod := getUnmountedPodForCluster(window, podMap, pv.Cluster)
 
 			// Calculate pv Cost
@@ -1935,7 +1941,9 @@ func applyUnmountedPVs(window kubecost.Window, podMap map[podKey]*pod, pvMap map
 func applyUnmountedPVCs(window kubecost.Window, podMap map[podKey]*pod, pvcMap map[pvcKey]*pvc) {
 	for _, pvc := range pvcMap {
 		if !pvc.Mounted && pvc.Volume != nil {
-			pod := getUnmountedPodForCluster(window, podMap, pvc.Cluster)
+
+			// Get namespace unmounted pod, as pvc will have a namespace
+			pod := getUnmountedPodForNamespace(window, podMap, pvc.Cluster, pvc.Namespace)
 
 			// Calculate pv Cost
 
@@ -1994,6 +2002,37 @@ func getUnmountedPodForCluster(window kubecost.Window, podMap map[podKey]*pod, c
 	return thisPod
 }
 
+// getUnmountedPodForNamespace is as getUnmountedPodForCluster, but keys allocation property pod/namespace field off namespace
+// This creates or adds allocations to an unmounted pod in the specified namespace, rather than in __unmounted__
+func getUnmountedPodForNamespace(window kubecost.Window, podMap map[podKey]*pod, cluster string, namespace string) *pod {
+	container := kubecost.UnmountedSuffix
+	podName := fmt.Sprintf("%s-unmounted-pvcs", namespace)
+	node := ""
+
+	thisPodKey := newPodKey(cluster, namespace, podName)
+	// Initialize pod and container if they do not already exist
+	thisPod, ok := podMap[thisPodKey]
+	if !ok {
+		thisPod = &pod{
+			Window:      window.Clone(),
+			Start:       *window.Start(),
+			End:         *window.End(),
+			Key:         thisPodKey,
+			Allocations: map[string]*kubecost.Allocation{},
+		}
+
+		thisPod.appendContainer(container)
+		thisPod.Allocations[container].Properties.Cluster = cluster
+		thisPod.Allocations[container].Properties.Node = node
+		thisPod.Allocations[container].Properties.Namespace = namespace
+		thisPod.Allocations[container].Properties.Pod = podName
+		thisPod.Allocations[container].Properties.Container = container
+
+		podMap[thisPodKey] = thisPod
+	}
+	return thisPod
+}
+
 func calculateStartAndEnd(result *prom.QueryResult, resolution time.Duration) (time.Time, time.Time) {
 	s := time.Unix(int64(result.Values[0].Timestamp), 0).UTC()
 	// subtract resolution from start time to cover full time period

+ 74 - 3
test/cloud_test.go

@@ -16,6 +16,7 @@ import (
 
 	appsv1 "k8s.io/api/apps/v1"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 )
 
 const (
@@ -123,6 +124,76 @@ func TestPVPriceFromCSV(t *testing.T) {
 
 }
 
+func TestNodePriceFromCSVWithGPU(t *testing.T) {
+	providerIDWant := "providerid"
+	nameWant := "gke-standard-cluster-1-pool-1-91dc432d-cg69"
+	labelFooWant := "labelfoo"
+	wantGPU := "2"
+
+	confMan := config.NewConfigFileManager(&config.ConfigFileManagerOpts{
+		LocalConfigPath: "./",
+	})
+
+	n := &v1.Node{}
+	n.Spec.ProviderID = providerIDWant
+	n.Name = nameWant
+	n.Labels = make(map[string]string)
+	n.Labels["foo"] = labelFooWant
+	n.Labels["nvidia.com/gpu_type"] = "Quadro_RTX_4000"
+	n.Status.Capacity = v1.ResourceList{"nvidia.com/gpu": *resource.NewScaledQuantity(2, 0)}
+	wantPrice := "1.633700"
+
+	n2 := &v1.Node{}
+	n2.Spec.ProviderID = providerIDWant
+	n2.Name = nameWant
+	n2.Labels = make(map[string]string)
+	n2.Labels["foo"] = labelFooWant
+	n2.Labels["gpu.nvidia.com/class"] = "Quadro_RTX_4001"
+	n2.Status.Capacity = v1.ResourceList{"nvidia.com/gpu": *resource.NewScaledQuantity(2, 0)}
+	wantPrice2 := "1.733700"
+
+	c := &cloud.CSVProvider{
+		CSVLocation: "../configs/pricing_schema.csv",
+		CustomProvider: &cloud.CustomProvider{
+			Config: cloud.NewProviderConfig(confMan, "../configs/default.json"),
+		},
+	}
+
+	c.DownloadPricingData()
+	k := c.GetKey(n.Labels, n)
+	resN, err := c.NodePricing(k)
+	if err != nil {
+		t.Errorf("Error in NodePricing: %s", err.Error())
+	} else {
+		gotGPU := resN.GPU
+		gotPrice := resN.Cost
+		if gotGPU != wantGPU {
+			t.Errorf("Wanted gpu count '%s' got gpu count '%s'", wantGPU, gotGPU)
+		}
+		if gotPrice != wantPrice {
+			t.Errorf("Wanted price '%s' got price '%s'", wantPrice, gotPrice)
+		}
+
+	}
+
+	k2 := c.GetKey(n2.Labels, n2)
+	resN2, err := c.NodePricing(k2)
+	if err != nil {
+		t.Errorf("Error in NodePricing: %s", err.Error())
+	} else {
+		gotGPU := resN2.GPU
+		gotPrice := resN2.Cost
+		if gotGPU != wantGPU {
+			t.Errorf("Wanted gpu count '%s' got gpu count '%s'", wantGPU, gotGPU)
+		}
+		if gotPrice != wantPrice2 {
+			t.Errorf("Wanted price '%s' got price '%s'", wantPrice2, gotPrice)
+		}
+
+	}
+
+}
+
 func TestNodePriceFromCSV(t *testing.T) {
 	providerIDWant := "providerid"
 	nameWant := "gke-standard-cluster-1-pool-1-91dc432d-cg69"
@@ -138,7 +209,7 @@ func TestNodePriceFromCSV(t *testing.T) {
 	n.Labels = make(map[string]string)
 	n.Labels["foo"] = labelFooWant
 
-	wantPrice := "0.1337"
+	wantPrice := "0.133700"
 
 	c := &cloud.CSVProvider{
 		CSVLocation: "../configs/pricing_schema.csv",
@@ -198,7 +269,7 @@ func TestNodePriceFromCSVWithRegion(t *testing.T) {
 	n.Labels = make(map[string]string)
 	n.Labels["foo"] = labelFooWant
 	n.Labels[v1.LabelZoneRegion] = "regionone"
-	wantPrice := "0.1337"
+	wantPrice := "0.133700"
 
 	n2 := &v1.Node{}
 	n2.Spec.ProviderID = providerIDWant
@@ -206,7 +277,7 @@ func TestNodePriceFromCSVWithRegion(t *testing.T) {
 	n2.Labels = make(map[string]string)
 	n2.Labels["foo"] = labelFooWant
 	n2.Labels[v1.LabelZoneRegion] = "regiontwo"
-	wantPrice2 := "0.1338"
+	wantPrice2 := "0.133800"
 
 	n3 := &v1.Node{}
 	n3.Spec.ProviderID = providerIDWant