Browse Source

CostModel.ComputeAllocation: error handling; variable resolution; notes

Niko Kovacevic 5 years ago
parent
commit
6181c25aad
2 changed files with 61 additions and 46 deletions
  1. 51 25
      pkg/costmodel/allocation.go
  2. 10 21
      pkg/util/time.go

+ 51 - 25
pkg/costmodel/allocation.go

@@ -9,9 +9,12 @@ import (
 	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/prom"
 	"github.com/kubecost/cost-model/pkg/thanos"
+	"github.com/kubecost/cost-model/pkg/util"
 	"k8s.io/apimachinery/pkg/labels"
 )
 
+// TODO niko/allocation drop "container" from queryFmtMinutes?
+
 const (
 	queryFmtMinutes               = `avg(kube_pod_container_status_running{}) by (container, pod, namespace, cluster_id)[%s:%s]%s`
 	queryFmtRAMBytesAllocated     = `avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`
@@ -47,16 +50,17 @@ const (
 	queryFmtJobLabels             = `sum(avg_over_time(kube_pod_owner{owner_kind="Job"}[%s]%s)) by (pod, owner_name, namespace ,cluster_id)`
 )
 
-// TODO niko/computeallocation idle minutes = 1?
-
 // ComputeAllocation uses the CostModel instance to compute an AllocationSet
 // for the window defined by the given start and end times. The Allocations
 // returned are unaggregated (i.e. down to the container level).
-func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.AllocationSet, error) {
+func (cm *CostModel) ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {
 	// Create a window spanning the requested query
 	s, e := start, end
 	window := kubecost.NewWindow(&s, &e)
 
+	// Convert resolution duration to a query-ready string
+	resStr := util.DurationString(resolution)
+
 	// Create an empty AllocationSet. For safety, in the case of an error, we
 	// should prefer to return this empty set with the error. (In the case of
 	// no error, of course we populate the set and return it.)
@@ -99,19 +103,47 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.Allocati
 		offStr = ""
 	}
 
-	// TODO niko/computeallocation dynamic resolution? add to ComputeAllocation() in allocation.Source?
-	resStr := "1m"
-	// resPerHr := 60
+	// Build out a map of Allocations, starting with (start, end) so that we
+	// begin with minutes, from which we compute resource allocation and cost
+	// totals from measured rate data.
+	allocationMap := map[containerKey]*kubecost.Allocation{}
 
-	startQuerying := time.Now()
+	// Keep track of the allocations per pod, for the sake of splitting PVC and
+	// Network allocation into per-Allocation from per-Pod.
+	podAllocation := map[podKey][]*kubecost.Allocation{}
 
-	ctx := prom.NewContext(cm.PrometheusClient)
+	// clusterStarts and clusterEnds record the earliest start and latest end
+	// times, respectively, on a cluster-basis. These are used for unmounted
+	// PVs and other "virtual" Allocations so that minutes are maximally
+	// accurate during start-up or spin-down of a cluster
+	clusterStart := map[string]time.Time{}
+	clusterEnd := map[string]time.Time{}
 
-	// TODO niko/computeallocation split into required and optional queries?
+	ctx := prom.NewContext(cm.PrometheusClient)
+	startQuerying := time.Now()
 
 	queryMinutes := fmt.Sprintf(queryFmtMinutes, durStr, resStr, offStr)
 	resChMinutes := ctx.Query(queryMinutes)
 
+	// ------------------------------------------------------------------------
+	// TODO niko/compute-allocation remove logs
+	log.Infof("CostModel.ComputeAllocation: %s", queryMinutes)
+	// ------------------------------------------------------------------------
+
+	// TODO niko/computeallocation make this a loop in the case that the window
+	// is greater than... 4h? 6h?
+
+	resMinutes, err := resChMinutes.Await()
+	if err != nil {
+		// TODO niko/computeallocation do what with the error?
+	}
+
+	buildAllocationMap(window, allocationMap, podAllocation, clusterStart, clusterEnd, resMinutes)
+
+	log.Profile(startQuerying, "CostModel.ComputeAllocation: allocation map built")
+
+	// TODO niko/computeallocation split into required and optional queries?
+
 	queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, offStr)
 	resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
 
@@ -205,7 +237,6 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.Allocati
 	queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, offStr)
 	resChJobLabels := ctx.Query(queryJobLabels)
 
-	resMinutes, _ := resChMinutes.Await()
 	resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
 	resCPURequests, _ := resChCPURequests.Await()
 	resCPUUsage, _ := resChCPUUsage.Await()
@@ -244,25 +275,17 @@ func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.Allocati
 	resJobLabels, _ := resChJobLabels.Await()
 
 	log.Profile(startQuerying, "CostModel.ComputeAllocation: queries complete")
-	defer log.Profile(time.Now(), "CostModel.ComputeAllocation: processing complete")
 
-	// Build out a map of Allocations, starting with (start, end) so that we
-	// begin with minutes, from which we compute resource allocation and cost
-	// totals from measured rate data.
-	allocationMap := map[containerKey]*kubecost.Allocation{}
+	if ctx.HasErrors() {
+		for _, err := range ctx.Errors() {
+			log.Errorf("CostModel.ComputeAllocation: %s", err)
+		}
 
-	// Keep track of the allocations per pod, for the sake of splitting PVC and
-	// Network allocation into per-Allocation from per-Pod.
-	podAllocation := map[podKey][]*kubecost.Allocation{}
+		return allocSet, ctx.ErrorCollection()
+	}
 
-	// clusterStarts and clusterEnds record the earliest start and latest end
-	// times, respectively, on a cluster-basis. These are used for unmounted
-	// PVs and other "virtual" Allocations so that minutes are maximally
-	// accurate during start-up or spin-down of a cluster
-	clusterStart := map[string]time.Time{}
-	clusterEnd := map[string]time.Time{}
+	defer log.Profile(time.Now(), "CostModel.ComputeAllocation: processing complete")
 
-	buildAllocationMap(window, allocationMap, podAllocation, clusterStart, clusterEnd, resMinutes)
 	applyCPUCoresAllocated(allocationMap, resCPUCoresAllocated)
 	applyCPUCoresRequested(allocationMap, resCPURequests)
 	applyCPUCoresUsed(allocationMap, resCPUUsage)
@@ -433,6 +456,9 @@ func buildAllocationMap(window kubecost.Window, allocationMap map[containerKey]*
 		// already represents the end of the last minute.
 		var allocStart, allocEnd time.Time
 		for _, datum := range res.Values {
+
+			// TODO niko/computeallocation if Value == 0.5, scale down by 50% in both directions!
+
 			t := time.Unix(int64(datum.Timestamp), 0)
 			if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
 				allocStart = t

+ 10 - 21
pkg/util/time.go

@@ -32,11 +32,10 @@ const (
 	DaysPerMonth = 30.42
 )
 
-// DurationOffsetStrings converts a (duration, offset) pair to Prometheus-
-// compatible strings in terms of days, hours, minutes, or seconds.
-func DurationOffsetStrings(duration, offset time.Duration) (string, string) {
+// DurationString converts a duration to a Prometheus-compatible string in
+// terms of days, hours, minutes, or seconds.
+func DurationString(duration time.Duration) string {
 	durSecs := int64(duration.Seconds())
-	offSecs := int64(offset.Seconds())
 
 	durStr := ""
 	if durSecs > 0 {
@@ -55,23 +54,13 @@ func DurationOffsetStrings(duration, offset time.Duration) (string, string) {
 		}
 	}
 
-	offStr := ""
-	if offSecs > 0 {
-		if offSecs%SecsPerDay == 0 {
-			// convert to days
-			offStr = fmt.Sprintf("%dd", offSecs/SecsPerDay)
-		} else if offSecs%SecsPerHour == 0 {
-			// convert to hours
-			offStr = fmt.Sprintf("%dh", offSecs/SecsPerHour)
-		} else if offSecs%SecsPerMin == 0 {
-			// convert to mins
-			offStr = fmt.Sprintf("%dm", offSecs/SecsPerMin)
-		} else if offSecs > 0 {
-			// default to mins, as long as offation is positive
-			offStr = fmt.Sprintf("%ds", offSecs)
-		}
-	}
-	return durStr, offStr
+	return durStr
+}
+
+// DurationOffsetStrings converts a (duration, offset) pair to Prometheus-
+// compatible strings in terms of days, hours, minutes, or seconds.
+func DurationOffsetStrings(duration, offset time.Duration) (string, string) {
+	return DurationString(duration), DurationString(offset)
 }
 
 // ParseDuration converts a Prometheus-style duration string into a Duration