浏览代码

assorted model accuracy fixes

AjayTripathy 7 年之前
父节点
当前提交
8b1a1b9093
共有 4 个文件被更改,包括 34 次插入3 次删除
  1. 1 0
      Dockerfile
  2. 1 0
      cloud/provider.go
  3. 2 2
      costmodel/costmodel.go
  4. 30 1
      main.go

+ 1 - 0
Dockerfile

@@ -34,4 +34,5 @@ ADD ./cloud/default.json /models/default.json
 ADD ./cloud/azure.json /models/azure.json
 ADD ./cloud/aws.json /models/aws.json
 ADD ./cloud/gcp.json /models/gcp.json
+USER 1001
 ENTRYPOINT ["/go/bin/app"]

+ 1 - 0
cloud/provider.go

@@ -164,6 +164,7 @@ type CustomPricing struct {
 	AzureClientID       string `json:"azureClientID"`
 	AzureClientSecret   string `json:"azureClientSecret"`
 	AzureTenantID       string `json:"azureTenantID"`
+	CurrencyCode        string `json:"currencyCode"`
 }
 
 func SetCustomPricingField(obj *CustomPricing, name string, value string) error {

+ 2 - 2
costmodel/costmodel.go

@@ -1277,7 +1277,7 @@ func getPVInfoVectors(qr interface{}) (map[string]*PersistentVolumeClaimData, er
 			strVal := dataPoint[1].(string)
 			v, _ := strconv.ParseFloat(strVal, 64)
 			vectors = append(vectors, &Vector{
-				Timestamp: dataPoint[0].(float64),
+				Timestamp: math.Round(dataPoint[0].(float64)/10) * 10,
 				Value:     v,
 			})
 		}
@@ -1620,7 +1620,7 @@ func getContainerMetricVectors(qr interface{}, normalize bool, normalizationValu
 				v = v / normalizationValue
 			}
 			vectors = append(vectors, &Vector{
-				Timestamp: dataPoint[0].(float64),
+				Timestamp: math.Round(dataPoint[0].(float64)/10) * 10,
 				Value:     v,
 			})
 		}

+ 30 - 1
main.go

@@ -50,6 +50,7 @@ type Accesses struct {
 	NodeTotalPriceRecorder        *prometheus.GaugeVec
 	RAMAllocationRecorder         *prometheus.GaugeVec
 	CPUAllocationRecorder         *prometheus.GaugeVec
+	GPUAllocationRecorder         *prometheus.GaugeVec
 }
 
 type DataEnvelope struct {
@@ -138,6 +139,23 @@ func (a *Accesses) CostDataModel(w http.ResponseWriter, r *http.Request, ps http
 	}
 }
 
+func (a *Accesses) ClusterCostsOverTime(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
+	w.Header().Set("Content-Type", "application/json")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+
+	start := r.URL.Query().Get("start")
+	end := r.URL.Query().Get("end")
+	window := r.URL.Query().Get("timeWindow")
+	offset := r.URL.Query().Get("offset")
+
+	if offset != "" {
+		offset = "offset " + offset
+	}
+
+	data, err := costModel.ClusterCostsOverTime(a.PrometheusClient, start, end, window, offset)
+	w.Write(wrapData(data, err))
+}
+
 func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
 	w.Header().Set("Content-Type", "application/json")
 	w.Header().Set("Access-Control-Allow-Origin", "*")
@@ -245,7 +263,7 @@ func (a *Accesses) recordPrices() {
 	go func() {
 		for {
 			klog.V(3).Info("Recording prices...")
-			data, err := costModel.ComputeCostData(a.PrometheusClient, a.KubeClientSet, a.Cloud, "1m", "")
+			data, err := costModel.ComputeCostData(a.PrometheusClient, a.KubeClientSet, a.Cloud, "2m", "")
 			if err != nil {
 				klog.V(1).Info("Error in price recording: " + err.Error())
 				// zero the for loop so the time.Sleep will still work
@@ -289,6 +307,10 @@ func (a *Accesses) recordPrices() {
 				if len(costs.CPUAllocation) > 0 {
 					a.CPUAllocationRecorder.WithLabelValues(namespace, podName, containerName, nodeName, nodeName).Set(costs.CPUAllocation[0].Value)
 				}
+				if len(costs.GPUReq) > 0 {
+					// allocation here is set to the request because shared GPU usage not yet supported.
+					a.GPUAllocationRecorder.WithLabelValues(namespace, podName, containerName, nodeName, nodeName).Set(costs.GPUReq[0].Value)
+				}
 
 				storageClasses, _ := a.KubeClientSet.StorageV1().StorageClasses().List(metav1.ListOptions{})
 
@@ -400,6 +422,11 @@ func main() {
 		Help: "container_cpu_allocation Percent of a single CPU used in a minute",
 	}, []string{"namespace", "pod", "container", "instance", "node"})
 
+	GPUAllocation := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Name: "container_gpu_allocation",
+		Help: "container_gpu_allocation GPU used",
+	}, []string{"namespace", "pod", "container", "instance", "node"})
+
 	prometheus.MustRegister(cpuGv)
 	prometheus.MustRegister(ramGv)
 	prometheus.MustRegister(gpuGv)
@@ -418,6 +445,7 @@ func main() {
 		NodeTotalPriceRecorder:        totalGv,
 		RAMAllocationRecorder:         RAMAllocation,
 		CPUAllocationRecorder:         CPUAllocation,
+		GPUAllocationRecorder:         GPUAllocation,
 		PersistentVolumePriceRecorder: pvGv,
 	}
 
@@ -440,6 +468,7 @@ func main() {
 	router.POST("/updateAthenaInfoConfigs", a.UpdateAthenaInfoConfigs)
 	router.POST("/updateBigQueryInfoConfigs", a.UpdateBigQueryInfoConfigs)
 	router.POST("/updateConfigByKey", a.UpdateConfigByKey)
+	router.GET("/clusterCostsOverTime", a.ClusterCostsOverTime)
 
 	rootMux := http.NewServeMux()
 	rootMux.Handle("/", router)