Browse Source

Merge pull request #67 from kubecost/AjayTripathy-fix-historical

Ajay tripathy fix historical
Ajay Tripathy 7 years ago
parent
commit
e2c1d5a031
5 changed files with 491 additions and 98 deletions
  1. 72 1
      cloud/awsprovider.go
  2. 125 33
      cloud/gcpprovider.go
  3. 59 12
      cloud/provider.go
  4. 210 52
      costmodel/costmodel.go
  5. 25 0
      main.go

+ 72 - 1
cloud/awsprovider.go

@@ -76,6 +76,8 @@ type AWSProductAttributes struct {
 	UsageType       string `json:"usagetype"`
 	OperatingSystem string `json:"operatingSystem"`
 	PreInstalledSw  string `json:"preInstalledSw"`
+	InstanceFamily  string `json:"instanceFamily"`
+	GPU             string `json:"gpu"` // GPU represents the number of GPU on the instance
 }
 
 // AWSPricingTerms are how you pay for the node: OnDemand, Reserved, or (TODO) Spot
@@ -109,6 +111,7 @@ type AWSProductTerms struct {
 	Memory   string        `json:"memory"`
 	Storage  string        `json:"storage"`
 	VCpu     string        `json:"vcpu"`
+	GPU      string        `json:"gpu"` // GPU represents the number of GPU on the instance
 }
 
 // ClusterIdEnvVar is the environment variable in which one can manually set the ClusterId
@@ -155,6 +158,62 @@ func (aws *AWS) KubeAttrConversion(location, instanceType, operatingSystem strin
 	return region + "," + instanceType + "," + operatingSystem
 }
 
+type AwsSpotFeedInfo struct {
+	BucketName       string `json:"bucketName"`
+	Prefix           string `json:"prefix"`
+	Region           string `json:"region"`
+	AccountID        string `json:"accountId"`
+	ServiceKeyName   string `json:"serviceKeyName"`
+	ServiceKeySecret string `json:"serviceKeySecret"`
+	SpotLabel        string `json:"spotLabel"`
+	SpotLabelValue   string `json:"spotLabelValue"`
+}
+
+func (aws *AWS) GetConfig() (*CustomPricing, error) {
+	c, err := GetDefaultPricingData("aws.json")
+	if err != nil {
+		return nil, err
+	}
+	return c, nil
+}
+
+func (aws *AWS) UpdateConfig(r io.Reader) (*CustomPricing, error) {
+	a := AwsSpotFeedInfo{}
+	err := json.NewDecoder(r).Decode(&a)
+	if err != nil {
+		return nil, err
+	}
+
+	c, err := GetDefaultPricingData("aws.json")
+	if err != nil {
+		return nil, err
+	}
+	c.ServiceKeyName = a.ServiceKeyName
+	c.ServiceKeySecret = a.ServiceKeySecret
+	c.SpotDataPrefix = a.Prefix
+	c.SpotDataBucket = a.BucketName
+	c.ProjectID = a.AccountID
+	c.SpotDataRegion = a.Region
+	c.SpotLabel = a.SpotLabel
+	c.SpotLabelValue = a.SpotLabelValue
+
+	cj, err := json.Marshal(c)
+	if err != nil {
+		return nil, err
+	}
+	path := os.Getenv("CONFIG_PATH")
+	if path == "" {
+		path = "/models/"
+	}
+	path += "aws.json"
+	err = ioutil.WriteFile(path, cj, 0644)
+	if err != nil {
+		return nil, err
+	}
+	return c, nil
+
+}
+
 type awsKey struct {
 	SpotLabelName  string
 	SpotLabelValue string
@@ -162,6 +221,10 @@ type awsKey struct {
 	ProviderID     string
 }
 
+func (k *awsKey) GPUType() string {
+	return ""
+}
+
 func (k *awsKey) ID() string {
 	provIdRx := regexp.MustCompile("aws:///([^/]+)/([^/]+)") // It's of the form aws:///us-east-2a/i-0fea4fd46592d050b and we want i-0fea4fd46592d050b, if it exists
 	for matchNum, group := range provIdRx.FindStringSubmatch(k.ProviderID) {
@@ -294,6 +357,7 @@ func (aws *AWS) DownloadPricingData() error {
 							Memory:  product.Attributes.Memory,
 							Storage: product.Attributes.Storage,
 							VCpu:    product.Attributes.VCpu,
+							GPU:     product.Attributes.GPU,
 						}
 						aws.Pricing[key] = productTerms
 						aws.Pricing[spotKey] = productTerms
@@ -387,6 +451,7 @@ func (aws *AWS) createNode(terms *AWSProductTerms, usageType string, k Key) (*No
 				Cost:         spotcost,
 				VCPU:         terms.VCpu,
 				RAM:          terms.Memory,
+				GPU:          terms.GPU,
 				Storage:      terms.Storage,
 				BaseCPUPrice: aws.BaseCPUPrice,
 				UsageType:    usageType,
@@ -396,17 +461,23 @@ func (aws *AWS) createNode(terms *AWSProductTerms, usageType string, k Key) (*No
 			VCPU:         terms.VCpu,
 			VCPUCost:     aws.BaseSpotCPUPrice,
 			RAM:          terms.Memory,
+			GPU:          terms.GPU,
 			RAMCost:      aws.BaseSpotRAMPrice,
 			Storage:      terms.Storage,
 			BaseCPUPrice: aws.BaseCPUPrice,
 			UsageType:    usageType,
 		}, nil
 	}
-	cost := terms.OnDemand.PriceDimensions[terms.Sku+OnDemandRateCode+HourlyRateCode].PricePerUnit.USD
+	c, ok := terms.OnDemand.PriceDimensions[terms.Sku+OnDemandRateCode+HourlyRateCode]
+	if !ok {
+		return nil, fmt.Errorf("Could not fetch data for \"%s\"", k.ID())
+	}
+	cost := c.PricePerUnit.USD
 	return &Node{
 		Cost:         cost,
 		VCPU:         terms.VCpu,
 		RAM:          terms.Memory,
+		GPU:          terms.GPU,
 		Storage:      terms.Storage,
 		BaseCPUPrice: aws.BaseCPUPrice,
 		UsageType:    usageType,

+ 125 - 33
cloud/gcpprovider.go

@@ -9,6 +9,7 @@ import (
 	"math"
 	"net/http"
 	"net/url"
+	"regexp"
 	"strconv"
 	"strings"
 
@@ -25,6 +26,8 @@ import (
 	"k8s.io/client-go/kubernetes"
 )
 
+const GKE_GPU_TAG = "cloud.google.com/gke-accelerator"
+
 type userAgentTransport struct {
 	userAgent string
 	base      http.RoundTripper
@@ -71,6 +74,14 @@ func gcpAllocationToOutOfClusterAllocation(gcpAlloc gcpAllocation) *OutOfCluster
 	}
 }
 
+func (gcp *GCP) GetConfig() (*CustomPricing, error) {
+	return nil, nil
+}
+
+func (gcp *GCP) UpdateConfig(r io.Reader) (*CustomPricing, error) {
+	return nil, nil
+}
+
 func (gcp *GCP) ExternalAllocations(start string, end string) ([]*OutOfClusterAllocation, error) {
 	// start, end formatted like: "2019-04-20 00:00:00"
 	queryString := fmt.Sprintf(`SELECT
@@ -225,7 +236,7 @@ type GCPResourceInfo struct {
 	UsageType          string `json:"usageType"`
 }
 
-func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]bool) (map[string]*GCPPricing, string, error) {
+func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key) (map[string]*GCPPricing, string, error) {
 	gcpPricingList := make(map[string]*GCPPricing)
 	var nextPageToken string
 	dec := json.NewDecoder(r)
@@ -252,6 +263,7 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]bool) (map[string]*G
 				if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "CUSTOM") {
 					instanceType = "custom"
 				}
+
 				var partialCPU float64
 				if strings.ToLower(instanceType) == "f1micro" {
 					partialCPU = 0.2
@@ -259,11 +271,21 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]bool) (map[string]*G
 					partialCPU = 0.5
 				}
 
+				var gpuType string
+				provIdRx := regexp.MustCompile("(Nvidia Tesla [^ ]+) ")
+				for matchnum, group := range provIdRx.FindStringSubmatch(product.Description) {
+					if matchnum == 1 {
+						gpuType = strings.ToLower(strings.Join(strings.Split(group, " "), "-"))
+						klog.V(3).Info("GPU TYPE FOUND: " + gpuType)
+					}
+				}
+
 				for _, sr := range product.ServiceRegions {
 					region := sr
-
 					candidateKey := region + "," + instanceType + "," + usageType
-					if _, ok := inputKeys[candidateKey]; ok {
+					candidateKeyGPU := candidateKey + ",gpu"
+
+					if gpuType != "" {
 						lastRateIndex := len(product.PricingInfo[0].PricingExpression.TieredRates) - 1
 						var nanos float64
 						if len(product.PricingInfo) > 0 {
@@ -271,41 +293,98 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]bool) (map[string]*G
 						} else {
 							continue
 						}
-
 						hourlyPrice := nanos * math.Pow10(-9)
-						if hourlyPrice == 0 {
-							continue
-						} else if strings.Contains(strings.ToUpper(product.Description), "RAM") {
-							if instanceType == "custom" {
-								klog.V(2).Infof("RAM custom sku is: " + product.Name)
+
+						for k, key := range inputKeys {
+							if key.GPUType() == gpuType {
+								if region == strings.Split(k, ",")[0] {
+									klog.V(3).Infof("Matched GPU to node in region \"%s\"", region)
+									candidateKeyGPU = key.Features()
+									if pl, ok := gcpPricingList[candidateKeyGPU]; ok {
+										pl.Node.GPUName = gpuType
+										pl.Node.GPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+										pl.Node.GPU = "1"
+									} else {
+										product.Node = &Node{
+											GPUName: gpuType,
+											GPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+											GPU:     "1",
+										}
+										klog.V(3).Infof("Added data for " + candidateKeyGPU)
+										gcpPricingList[candidateKeyGPU] = product
+									}
+								}
 							}
-							if _, ok := gcpPricingList[candidateKey]; ok {
-								gcpPricingList[candidateKey].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+						}
+					} else {
+						if _, ok := inputKeys[candidateKey]; ok {
+							lastRateIndex := len(product.PricingInfo[0].PricingExpression.TieredRates) - 1
+							var nanos float64
+							if len(product.PricingInfo) > 0 {
+								nanos = product.PricingInfo[0].PricingExpression.TieredRates[lastRateIndex].UnitPrice.Nanos
 							} else {
-								product.Node = &Node{
-									RAMCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+								continue
+							}
+							hourlyPrice := nanos * math.Pow10(-9)
+
+							if hourlyPrice == 0 {
+								continue
+							} else if strings.Contains(strings.ToUpper(product.Description), "RAM") {
+								if instanceType == "custom" {
+									klog.V(2).Infof("RAM custom sku is: " + product.Name)
 								}
-								if partialCPU != 0 {
-									product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
+								if _, ok := gcpPricingList[candidateKey]; ok {
+									gcpPricingList[candidateKey].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+								} else {
+									product.Node = &Node{
+										RAMCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+									}
+									if partialCPU != 0 {
+										product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
+									}
+									product.Node.UsageType = usageType
+									gcpPricingList[candidateKey] = product
 								}
-								product.Node.UsageType = usageType
-								gcpPricingList[candidateKey] = product
-							}
-							break
-						} else {
-							if _, ok := gcpPricingList[candidateKey]; ok {
-								gcpPricingList[candidateKey].Node.VCPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+								if _, ok := gcpPricingList[candidateKeyGPU]; ok {
+									gcpPricingList[candidateKeyGPU].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+								} else {
+									product.Node = &Node{
+										RAMCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+									}
+									if partialCPU != 0 {
+										product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
+									}
+									product.Node.UsageType = usageType
+									gcpPricingList[candidateKeyGPU] = product
+								}
+								break
 							} else {
-								product.Node = &Node{
-									VCPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+								if _, ok := gcpPricingList[candidateKey]; ok {
+									gcpPricingList[candidateKey].Node.VCPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+								} else {
+									product.Node = &Node{
+										VCPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+									}
+									if partialCPU != 0 {
+										product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
+									}
+									product.Node.UsageType = usageType
+									gcpPricingList[candidateKey] = product
 								}
-								if partialCPU != 0 {
-									product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
+								if _, ok := gcpPricingList[candidateKeyGPU]; ok {
+									gcpPricingList[candidateKey].Node.VCPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
+								} else {
+									product.Node = &Node{
+										VCPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
+									}
+									if partialCPU != 0 {
+										product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
+									}
+									product.Node.UsageType = usageType
+									gcpPricingList[candidateKeyGPU] = product
 								}
-								product.Node.UsageType = usageType
-								gcpPricingList[candidateKey] = product
+								break
 							}
-							break
 						}
 					}
 				}
@@ -327,7 +406,7 @@ func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]bool) (map[string]*G
 	return gcpPricingList, nextPageToken, nil
 }
 
-func (gcp *GCP) parsePages(inputKeys map[string]bool) (map[string]*GCPPricing, error) {
+func (gcp *GCP) parsePages(inputKeys map[string]Key) (map[string]*GCPPricing, error) {
 	var pages []map[string]*GCPPricing
 	url := "https://cloudbilling.googleapis.com/v1/services/6F81-5844-456A/skus?key=" + gcp.APIKey
 	klog.V(2).Infof("Fetch GCP Billing Data from URL: %s", url)
@@ -387,12 +466,12 @@ func (gcp *GCP) DownloadPricingData() error {
 	if err != nil {
 		return err
 	}
-	inputkeys := make(map[string]bool)
+	inputkeys := make(map[string]Key)
 
 	for _, n := range nodeList.Items {
 		labels := n.GetObjectMeta().GetLabels()
 		key := gcp.GetKey(labels)
-		inputkeys[key.Features()] = true
+		inputkeys[key.Features()] = key
 	}
 
 	pages, err := gcp.parsePages(inputkeys)
@@ -419,6 +498,14 @@ func (gcp *gcpKey) ID() string {
 	return ""
 }
 
+func (gcp *gcpKey) GPUType() string {
+	if t, ok := gcp.Labels[GKE_GPU_TAG]; ok {
+		klog.V(3).Infof("GPU of type: \"%s\" found", t)
+		return t
+	}
+	return ""
+}
+
 // GetKey maps node labels to information needed to retrieve pricing data
 func (gcp *gcpKey) Features() string {
 	instanceType := strings.ToLower(strings.Join(strings.Split(gcp.Labels[v1.LabelInstanceType], "-")[:2], ""))
@@ -435,6 +522,11 @@ func (gcp *gcpKey) Features() string {
 	} else {
 		usageType = "ondemand"
 	}
+
+	if _, ok := gcp.Labels[GKE_GPU_TAG]; ok {
+		return region + "," + instanceType + "," + usageType + "," + "gpu"
+	}
+
 	return region + "," + instanceType + "," + usageType
 }
 
@@ -450,6 +542,6 @@ func (gcp *GCP) NodePricing(key Key) (*Node, error) {
 		n.Node.BaseCPUPrice = gcp.BaseCPUPrice
 		return n.Node, nil
 	}
-	klog.V(1).Infof("Warning: no pricing data found for %s", key)
+	klog.V(1).Infof("Warning: no pricing data found for %s: %s", key.Features(), key)
 	return nil, fmt.Errorf("Warning: no pricing data found for %s", key)
 }

+ 59 - 12
cloud/provider.go

@@ -3,6 +3,7 @@ package cloud
 import (
 	"encoding/json"
 	"errors"
+	"io"
 	"io/ioutil"
 	"net/url"
 	"os"
@@ -30,12 +31,16 @@ type Node struct {
 	UsesBaseCPUPrice bool   `json:"usesDefaultPrice"`
 	BaseCPUPrice     string `json:"baseCPUPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
 	UsageType        string `json:"usageType"`
+	GPU              string `json:"gpu"` // GPU represents the number of GPU on the instance
+	GPUName          string `json:"gpuName"`
+	GPUCost          string `json:"gpuCost"`
 }
 
 // Key represents a way for nodes to match between the k8s API and a pricing API
 type Key interface {
 	ID() string       // ID represents an exact match
 	Features() string // Features are a comma separated string of node metadata that could match pricing
+	GPUType() string  // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
 }
 
 // OutOfClusterAllocation represents a cloud provider cost not associated with kubernetes
@@ -56,27 +61,57 @@ type Provider interface {
 	AllNodePricing() (interface{}, error)
 	DownloadPricingData() error
 	GetKey(map[string]string) Key
+	UpdateConfig(r io.Reader) (*CustomPricing, error)
+	GetConfig() (*CustomPricing, error)
 
 	ExternalAllocations(string, string) ([]*OutOfClusterAllocation, error)
 }
 
 // GetDefaultPricingData will search for a json file representing pricing data in /models/ and use it for base pricing info.
 func GetDefaultPricingData(fname string) (*CustomPricing, error) {
-	jsonFile, err := os.Open("/models/" + fname)
-	if err != nil {
-		return nil, err
-	}
-	defer jsonFile.Close()
-	byteValue, err := ioutil.ReadAll(jsonFile)
-	if err != nil {
-		return nil, err
+	path := os.Getenv("CONFIG_PATH")
+	if path == "" {
+		path = "/models/"
 	}
-	var customPricing = &CustomPricing{}
-	err = json.Unmarshal([]byte(byteValue), customPricing)
-	if err != nil {
+	path += fname
+	if _, err := os.Stat(path); err == nil {
+		jsonFile, err := os.Open(path)
+		if err != nil {
+			return nil, err
+		}
+		defer jsonFile.Close()
+		byteValue, err := ioutil.ReadAll(jsonFile)
+		if err != nil {
+			return nil, err
+		}
+		var customPricing = &CustomPricing{}
+		err = json.Unmarshal([]byte(byteValue), customPricing)
+		if err != nil {
+			return nil, err
+		}
+		return customPricing, nil
+	} else if os.IsNotExist(err) {
+		c := &CustomPricing{
+			Provider:    fname,
+			Description: "Default prices based on GCP us-central1",
+			CPU:         "0.031611",
+			SpotCPU:     "0.006655",
+			RAM:         "0.004237",
+			SpotRAM:     "0.000892",
+		}
+		cj, err := json.Marshal(c)
+		if err != nil {
+			return nil, err
+		}
+
+		err = ioutil.WriteFile(path, cj, 0644)
+		if err != nil {
+			return nil, err
+		}
+		return c, nil
+	} else {
 		return nil, err
 	}
-	return customPricing, nil
 }
 
 type CustomPricing struct {
@@ -109,6 +144,14 @@ type CustomProvider struct {
 	SpotLabelValue string
 }
 
+func (*CustomProvider) GetConfig() (*CustomPricing, error) {
+	return nil, nil
+}
+
+func (*CustomProvider) UpdateConfig(r io.Reader) (*CustomPricing, error) {
+	return nil, nil
+}
+
 func (*CustomProvider) ClusterName() ([]byte, error) {
 	return nil, nil
 }
@@ -163,6 +206,10 @@ type customProviderKey struct {
 	Labels         map[string]string
 }
 
+func (c *customProviderKey) GPUType() string {
+	return ""
+}
+
 func (c *customProviderKey) ID() string {
 	return ""
 }

+ 210 - 52
costmodel/costmodel.go

@@ -67,11 +67,53 @@ type Vector struct {
 }
 
 func ComputeCostData(cli prometheusClient.Client, clientset kubernetes.Interface, cloud costAnalyzerCloud.Provider, window string) (map[string]*CostData, error) {
-	queryRAMRequests := `avg(label_replace(label_replace(avg((count_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD"}[` + window + `]) *  avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD"}[` + window + `]))) by (namespace,container,pod) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)") ) by (namespace,container_name, pod_name)`
-	queryRAMUsage := `sort_desc(avg(count_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD"}[` + window + `]) * avg_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD"}[` + window + `])) by (namespace,container_name,pod_name,instance))`
-	queryCPURequests := `avg(label_replace(label_replace(avg((count_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD"}[` + window + `]) *  avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD"}[` + window + `]))) by (namespace,container,pod) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)") ) by (namespace,container_name, pod_name)`
-	queryCPUUsage := `avg(rate(container_cpu_usage_seconds_total{container_name!="",container_name!="POD"}[` + window + `])) by (namespace,container_name,pod_name,instance)`
-	queryGPURequests := `avg(label_replace(label_replace(avg((count_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD"}[` + window + `]) *  avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD"}[` + window + `]))) by (namespace,container,pod) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)") ) by (namespace,container_name, pod_name)`
+	queryRAMRequests := `avg(
+		label_replace(
+			label_replace(
+				avg(
+					count_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD", node!=""}[` + window + `]) 
+					*  
+					avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD", node!=""}[` + window + `])
+				) by (namespace,container,pod,node) , "container_name","$1","container","(.+)"
+			), "pod_name","$1","pod","(.+)"
+		)
+	) by (namespace,container_name,pod_name,node)`
+	queryRAMUsage := `sort_desc(
+		avg(
+			label_replace(count_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD", instance!=""}[` + window + `]), "node", "$1", "instance","(.+)") 
+			* 
+			label_replace(avg_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD", instance!=""}[` + window + `]), "node", "$1", "instance","(.+)") 
+		) by (namespace,container_name,pod_name,node)
+	)`
+	queryCPURequests := `avg(
+		label_replace(
+			label_replace(
+				avg(
+					count_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD", node!=""}[` + window + `]) 
+					*  
+					avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD", node!=""}[` + window + `])
+				) by (namespace,container,pod,node) , "container_name","$1","container","(.+)"
+			), "pod_name","$1","pod","(.+)"
+		) 
+	) by (namespace,container_name,pod_name,node)`
+	queryCPUUsage := `avg(
+		label_replace(
+		  rate( 
+			container_cpu_usage_seconds_total{container_name!="",container_name!="POD",instance!=""}[` + window + `]
+		  ) , "node", "$1", "instance", "(.+)"
+		)
+	) by (namespace,container_name,pod_name,node)`
+	queryGPURequests := `avg(
+		label_replace(
+			label_replace(
+				avg(
+					count_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[` + window + `]) 
+					*  
+					avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[` + window + `])
+				) by (namespace,container,pod,node) , "container_name","$1","container","(.+)"
+			), "pod_name","$1","pod","(.+)"
+		) 
+	) by (namespace,container_name,pod_name,node)`
 	queryPVRequests := `avg(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass, namespace) 
 	                    * 
 	                    on (persistentvolumeclaim, namespace) group_right(storageclass) 
@@ -240,31 +282,31 @@ func ComputeCostData(cli prometheusClient.Client, clientset kubernetes.Interface
 				containerName := container.Name
 
 				// recreate the key and look up data for this container
-				newKey := ns + "," + podName + "," + containerName
+				newKey := newContainerMetricFromValues(ns, podName, containerName, pod.Spec.NodeName).Key()
 
 				RAMReqV, ok := RAMReqMap[newKey]
 				if !ok {
-					klog.V(2).Info("no RAM requests for " + newKey)
+					klog.V(4).Info("no RAM requests for " + newKey)
 					RAMReqV = []*Vector{&Vector{}}
 				}
 				RAMUsedV, ok := RAMUsedMap[newKey]
 				if !ok {
-					klog.V(2).Info("no RAM usage for " + newKey)
+					klog.V(4).Info("no RAM usage for " + newKey)
 					RAMUsedV = []*Vector{&Vector{}}
 				}
 				CPUReqV, ok := CPUReqMap[newKey]
 				if !ok {
-					klog.V(2).Info("no CPU requests for " + newKey)
+					klog.V(4).Info("no CPU requests for " + newKey)
 					CPUReqV = []*Vector{&Vector{}}
 				}
 				GPUReqV, ok := GPUReqMap[newKey]
 				if !ok {
-					klog.V(2).Info("no GPU requests for " + newKey)
+					klog.V(4).Info("no GPU requests for " + newKey)
 					GPUReqV = []*Vector{&Vector{}}
 				}
 				CPUUsedV, ok := CPUUsedMap[newKey]
 				if !ok {
-					klog.V(2).Info("no CPU usage for " + newKey)
+					klog.V(4).Info("no CPU usage for " + newKey)
 					CPUUsedV = []*Vector{&Vector{}}
 				}
 
@@ -300,37 +342,49 @@ func ComputeCostData(cli prometheusClient.Client, clientset kubernetes.Interface
 
 		} else {
 			// The container has been deleted. Not all information is sent to prometheus via ksm, so fill out what we can without k8s api
-			// TODO: The nodename should be available from the prometheus query. Check if that node still exists and use that price
-			klog.V(3).Info("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
-			c, _ := newContainerMetricFromKey(key)
+			klog.V(4).Info("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
+			c, err := newContainerMetricFromKey(key)
+			if err != nil {
+				return nil, err
+			}
 			RAMReqV, ok := RAMReqMap[key]
 			if !ok {
-				klog.V(2).Info("no RAM requests for " + key)
+				klog.V(4).Info("no RAM requests for " + key)
 				RAMReqV = []*Vector{&Vector{}}
 			}
 			RAMUsedV, ok := RAMUsedMap[key]
 			if !ok {
-				klog.V(2).Info("no RAM usage for " + key)
+				klog.V(4).Info("no RAM usage for " + key)
 				RAMUsedV = []*Vector{&Vector{}}
 			}
 			CPUReqV, ok := CPUReqMap[key]
 			if !ok {
-				klog.V(2).Info("no CPU requests for " + key)
+				klog.V(4).Info("no CPU requests for " + key)
 				CPUReqV = []*Vector{&Vector{}}
 			}
 			GPUReqV, ok := GPUReqMap[key]
 			if !ok {
-				klog.V(2).Info("no GPU requests for " + key)
+				klog.V(4).Info("no GPU requests for " + key)
 				GPUReqV = []*Vector{&Vector{}}
 			}
 			CPUUsedV, ok := CPUUsedMap[key]
 			if !ok {
-				klog.V(2).Info("no CPU usage for " + key)
+				klog.V(4).Info("no CPU usage for " + key)
 				CPUUsedV = []*Vector{&Vector{}}
 			}
-			costs := &CostData{ // TODO: Expand the prometheus query/use prometheus to query for more data here if it exists.
+
+			var node *costAnalyzerCloud.Node
+			if n, ok := nodes[c.NodeName]; !ok {
+				//TODO: The node has been deleted from kubernetes as well. You will need to query historical node data to get it.
+				klog.V(2).Infof("Node \"%s\" has been deleted from Kubernetes. Query historical data to get it.", c.NodeName)
+			} else {
+				node = n
+			}
+			costs := &CostData{
 				Name:      c.ContainerName,
 				PodName:   c.PodName,
+				NodeName:  c.NodeName,
+				NodeData:  node,
 				Namespace: c.Namespace,
 				RAMReq:    RAMReqV,
 				RAMUsed:   RAMUsedV,
@@ -425,28 +479,60 @@ func getNodeCost(clientset kubernetes.Interface, cloud costAnalyzerCloud.Provide
 			cnode.RAM = n.Status.Capacity.Memory().String()
 		}
 		ram = float64(n.Status.Capacity.Memory().Value())
-		if cnode.RAMCost == "" { // We couldn't find a ramcost, so fix cpu and allocate ram accordingly
-			basePrice, _ := strconv.ParseFloat(cnode.BaseCPUPrice, 64)
-			totalCPUPrice := basePrice * cpu
-			var nodePrice float64
-			if cnode.Cost != "" {
-				klog.V(3).Infof("Use given nodeprice as whole node price")
-				nodePrice, _ = strconv.ParseFloat(cnode.Cost, 64)
-			} else {
-				klog.V(3).Infof("Use cpuprice as whole node price")
-				nodePrice, _ = strconv.ParseFloat(cnode.VCPUCost, 64) // all the price was allocated the the CPU
+
+		if cnode.GPU != "" && cnode.GPUCost == "" { // We couldn't find a gpu cost, so fix cpu and ram, then accordingly
+			klog.V(3).Infof("GPU without cost found, calculating...")
+			basePrice, err := strconv.ParseFloat(cnode.BaseCPUPrice, 64)
+			if err != nil {
+				return nil, err
 			}
-			if totalCPUPrice >= nodePrice {
-				totalCPUPrice = 0.9 * nodePrice // just allocate RAM costs to 10% of the node price here to avoid 0 or negative in the numerator
+			nodePrice, err := strconv.ParseFloat(cnode.Cost, 64)
+			if err != nil {
+				return nil, err
 			}
-			ramPrice := (nodePrice - totalCPUPrice) / (ram / 1024 / 1024 / 1024)
-			cpuPrice := totalCPUPrice / cpu
-
-			cnode.VCPUCost = fmt.Sprintf("%f", cpuPrice)
+			totalCPUPrice := basePrice * cpu
+			totalRAMPrice := 0.1 * totalCPUPrice
+			ramPrice := totalRAMPrice / (ram / 1024 / 1024 / 1024)
+			gpuPrice := nodePrice - totalCPUPrice - totalRAMPrice
+			cnode.VCPUCost = fmt.Sprintf("%f", basePrice)
 			cnode.RAMCost = fmt.Sprintf("%f", ramPrice)
 			cnode.RAMBytes = fmt.Sprintf("%f", ram)
-			klog.V(2).Infof("Node \"%s\" RAM Cost := %v", name, cnode.RAMCost)
+			cnode.GPUCost = fmt.Sprintf("%f", gpuPrice)
+			klog.V(2).Infof("Computed \"%s\" GPU Cost := %v", name, cnode.GPUCost)
+		} else {
+			if cnode.RAMCost == "" { // We couldn't find a ramcost, so fix cpu and allocate ram accordingly
+				basePrice, err := strconv.ParseFloat(cnode.BaseCPUPrice, 64)
+				if err != nil {
+					return nil, err
+				}
+				totalCPUPrice := basePrice * cpu
+				var nodePrice float64
+				if cnode.Cost != "" {
+					klog.V(3).Infof("Use given nodeprice as whole node price")
+					nodePrice, err = strconv.ParseFloat(cnode.Cost, 64)
+					if err != nil {
+						return nil, err
+					}
+				} else {
+					klog.V(3).Infof("Use cpuprice as whole node price")
+					nodePrice, err = strconv.ParseFloat(cnode.VCPUCost, 64) // all the price was allocated the the CPU
+					if err != nil {
+						return nil, err
+					}
+				}
+				if totalCPUPrice >= nodePrice {
+					totalCPUPrice = 0.9 * nodePrice // just allocate RAM costs to 10% of the node price here to avoid 0 or negative in the numerator
+				}
+				ramPrice := (nodePrice - totalCPUPrice) / (ram / 1024 / 1024 / 1024)
+				cpuPrice := totalCPUPrice / cpu
+
+				cnode.VCPUCost = fmt.Sprintf("%f", cpuPrice)
+				cnode.RAMCost = fmt.Sprintf("%f", ramPrice)
+				cnode.RAMBytes = fmt.Sprintf("%f", ram)
+				klog.V(3).Infof("Computed \"%s\" RAM Cost := %v", name, cnode.RAMCost)
+			}
 		}
+
 		nodes[name] = cnode
 	}
 	return nodes, nil
@@ -514,15 +600,57 @@ func getPodDeployments(clientset kubernetes.Interface, podList *v1.PodList) (map
 
 func ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Interface, cloud costAnalyzerCloud.Provider,
 	startString, endString, windowString string) (map[string]*CostData, error) {
-	queryRAMRequests := `avg(label_replace(label_replace(avg((count_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD"}[` + windowString + `]) *  avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD"}[` + windowString + `]))) by (namespace,container,pod) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)") ) by (namespace,container_name, pod_name)`
-	queryRAMUsage := `sort_desc(avg(count_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD"}[` + windowString + `]) * avg_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD"}[` + windowString + `])) by (namespace,container_name,pod_name,instance))`
-	queryCPURequests := `avg(label_replace(label_replace(avg((count_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD"}[` + windowString + `]) *  avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD"}[` + windowString + `]))) by (namespace,container,pod) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)") ) by (namespace,container_name, pod_name)`
-	queryCPUUsage := `avg(rate(container_cpu_usage_seconds_total{container_name!="",container_name!="POD"}[` + windowString + `])) by (namespace,container_name,pod_name,instance)`
-	queryGPURequests := `avg(label_replace(label_replace(avg((count_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD"}[` + windowString + `]) *  avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD"}[` + windowString + `]))) by (namespace,container,pod) , "container_name","$1","container","(.+)"), "pod_name","$1","pod","(.+)") ) by (namespace,container_name, pod_name)`
+	queryRAMRequests := `avg(
+			label_replace(
+				label_replace(
+					avg(
+						count_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD", node!=""}[` + windowString + `]) 
+						*  
+						avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="",container!="POD", node!=""}[` + windowString + `])
+					) by (namespace,container,pod,node) , "container_name","$1","container","(.+)"
+				), "pod_name","$1","pod","(.+)"
+			)
+		) by (namespace,container_name,pod_name,node)`
+	queryRAMUsage := `sort_desc(
+		avg(
+			label_replace(count_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD", instance!=""}[` + windowString + `]), "node", "$1", "instance","(.+)") 
+			* 
+			label_replace(avg_over_time(container_memory_usage_bytes{container_name!="",container_name!="POD", instance!=""}[` + windowString + `]), "node", "$1", "instance","(.+)") 
+		) by (namespace,container_name,pod_name,node)
+	)`
+	queryCPURequests := `avg(
+			label_replace(
+				label_replace(
+					avg(
+						count_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD", node!=""}[` + windowString + `]) 
+						*  
+						avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="",container!="POD", node!=""}[` + windowString + `])
+					) by (namespace,container,pod,node) , "container_name","$1","container","(.+)"
+				), "pod_name","$1","pod","(.+)"
+			) 
+		) by (namespace,container_name,pod_name,node)`
+	queryCPUUsage := `avg(
+			label_replace(
+			  rate( 
+				container_cpu_usage_seconds_total{container_name!="",container_name!="POD", instance!=""}[` + windowString + `]
+			  ) , "node", "$1", "instance", "(.+)"
+			)
+		) by (namespace,container_name,pod_name,node)`
+	queryGPURequests := `avg(
+			label_replace(
+				label_replace(
+					avg(
+						count_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[` + windowString + `]) 
+						*  
+						avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[` + windowString + `])
+					) by (namespace,container,pod,node) , "container_name","$1","container","(.+)"
+				), "pod_name","$1","pod","(.+)"
+			) 
+		) by (namespace,container_name,pod_name,node)`
 	queryPVRequests := `avg(kube_persistentvolumeclaim_info) by (persistentvolumeclaim, storageclass, namespace) 
-	                    * 
-	                    on (persistentvolumeclaim, namespace) group_right(storageclass) 
-			    sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace)`
+							* 
+							on (persistentvolumeclaim, namespace) group_right(storageclass) 
+					sum(kube_persistentvolumeclaim_resource_requests_storage_bytes) by (persistentvolumeclaim, namespace)`
 	normalization := `max(count_over_time(kube_pod_container_resource_requests_memory_bytes{}[` + windowString + `]))`
 
 	layout := "2006-01-02T15:04:05.000Z"
@@ -701,7 +829,7 @@ func ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Inte
 			for i, container := range pod.Spec.Containers {
 				containerName := container.Name
 
-				newKey := ns + "," + podName + "," + containerName
+				newKey := newContainerMetricFromValues(ns, podName, containerName, pod.Spec.NodeName).Key()
 
 				RAMReqV, ok := RAMReqMap[newKey]
 				if !ok {
@@ -761,8 +889,7 @@ func ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Inte
 
 		} else {
 			// The container has been deleted. Not all information is sent to prometheus via ksm, so fill out what we can without k8s api
-			// TODO: The nodename should be available from the prometheus query. Check if that node still exists and use that price
-			klog.V(3).Info("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
+			klog.V(4).Info("The container " + key + " has been deleted. Calculating allocation but resulting object will be missing data.")
 			c, _ := newContainerMetricFromKey(key)
 			RAMReqV, ok := RAMReqMap[key]
 			if !ok {
@@ -789,9 +916,18 @@ func ComputeCostDataRange(cli prometheusClient.Client, clientset kubernetes.Inte
 				klog.V(2).Info("no CPU usage for " + key)
 				CPUUsedV = []*Vector{}
 			}
-			costs := &CostData{ // TODO: Expand the prometheus query/use prometheus to query for more data here if it exists.
+			var node *costAnalyzerCloud.Node
+			if n, ok := nodes[c.NodeName]; !ok {
+				//TODO: The node has been deleted from kubernetes as well. You will need to query historical node data to get it.
+				klog.V(2).Infof("Node \"%s\" has been deleted from Kubernetes. Query historical data to get it.", c.NodeName)
+			} else {
+				node = n
+			}
+			costs := &CostData{
 				Name:      c.ContainerName,
 				PodName:   c.PodName,
+				NodeName:  c.NodeName,
+				NodeData:  node,
 				Namespace: c.Namespace,
 				RAMReq:    RAMReqV,
 				RAMUsed:   RAMUsedV,
@@ -1058,27 +1194,39 @@ type ContainerMetric struct {
 	Namespace     string
 	PodName       string
 	ContainerName string
+	NodeName      string
 }
 
 func (c *ContainerMetric) Key() string {
-	return c.Namespace + "," + c.PodName + "," + c.ContainerName
+	return c.Namespace + "," + c.PodName + "," + c.ContainerName + "," + c.NodeName
 }
 
 func newContainerMetricFromKey(key string) (*ContainerMetric, error) {
 	s := strings.Split(key, ",")
-	if len(s) == 3 {
+	if len(s) == 4 {
 		return &ContainerMetric{
 			Namespace:     s[0],
 			PodName:       s[1],
 			ContainerName: s[2],
+			NodeName:      s[3],
 		}, nil
 	}
 	return nil, fmt.Errorf("Not a valid key")
 }
 
+func newContainerMetricFromValues(ns string, podName string, containerName string, nodeName string) *ContainerMetric {
+	return &ContainerMetric{
+		Namespace:     ns,
+		PodName:       podName,
+		ContainerName: containerName,
+		NodeName:      nodeName,
+	}
+}
+
 func newContainerMetricsFromPod(pod v1.Pod) ([]*ContainerMetric, error) {
 	podName := pod.GetObjectMeta().GetName()
 	ns := pod.GetObjectMeta().GetNamespace()
+	node := pod.Spec.NodeName
 	var cs []*ContainerMetric
 	for _, container := range pod.Spec.Containers {
 		containerName := container.Name
@@ -1086,6 +1234,7 @@ func newContainerMetricsFromPod(pod v1.Pod) ([]*ContainerMetric, error) {
 			Namespace:     ns,
 			PodName:       podName,
 			ContainerName: containerName,
+			NodeName:      node,
 		})
 	}
 	return cs, nil
@@ -1116,10 +1265,19 @@ func newContainerMetricFromPrometheus(metrics map[string]interface{}) (*Containe
 	if !ok {
 		return nil, fmt.Errorf("Prometheus vector does not have string namespace")
 	}
+	node, ok := metrics["node"]
+	if !ok {
+		return nil, fmt.Errorf("Prometheus vector does not have node name")
+	}
+	nodeName, ok := node.(string)
+	if !ok {
+		return nil, fmt.Errorf("Prometheus vector does not have string nodename")
+	}
 	return &ContainerMetric{
 		ContainerName: containerName,
 		PodName:       podName,
 		Namespace:     namespace,
+		NodeName:      nodeName,
 	}, nil
 }
 
@@ -1163,7 +1321,7 @@ func getContainerMetricVector(qr interface{}, normalize bool, normalizationValue
 			Timestamp: dataPoint[0].(float64),
 			Value:     v,
 		}
-		klog.V(2).Info("key: " + containerMetric.Key())
+		klog.V(4).Info("key: " + containerMetric.Key())
 		containerData[containerMetric.Key()] = []*Vector{toReturn}
 	}
 	return containerData, nil

+ 25 - 0
main.go

@@ -123,6 +123,29 @@ func (p *Accesses) GetAllNodePricing(w http.ResponseWriter, r *http.Request, ps
 	w.Write(wrapData(data, err))
 }
 
+func (p *Accesses) GetConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	w.Header().Set("Content-Type", "application/json")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+	data, err := p.Cloud.GetConfig()
+	w.Write(wrapData(data, err))
+}
+
+func (p *Accesses) UpdateConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	w.Header().Set("Content-Type", "application/json")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+	data, err := p.Cloud.UpdateConfig(r.Body)
+	if err != nil {
+		w.Write(wrapData(data, err))
+		return
+	}
+	w.Write(wrapData(data, err))
+	err = p.Cloud.DownloadPricingData()
+	if err != nil {
+		klog.V(1).Infof("Error redownloading data on config update: %s", err.Error())
+	}
+	return
+}
+
 func Healthz(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
 	w.WriteHeader(200)
 	w.Header().Set("Content-Length", "0")
@@ -266,7 +289,9 @@ func main() {
 	router.GET("/outOfClusterCosts", a.OutofClusterCosts)
 	router.GET("/allNodePricing", a.GetAllNodePricing)
 	router.GET("/healthz", Healthz)
+	router.GET("/getConfigs", a.GetConfigs)
 	router.POST("/refreshPricing", a.RefreshPricingData)
+	router.POST("/updateConfigs", a.UpdateConfigs)
 
 	rootMux := http.NewServeMux()
 	rootMux.Handle("/", router)