| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197 |
- package costmodel
- import (
- "fmt"
- "time"
- "github.com/kubecost/cost-model/pkg/env"
- "github.com/kubecost/cost-model/pkg/kubecost"
- "github.com/kubecost/cost-model/pkg/log"
- "github.com/kubecost/cost-model/pkg/prom"
- "github.com/kubecost/cost-model/pkg/thanos"
- )
- // TODO niko/cdmr move to pkg/kubecost
- // TODO niko/cdmr add PersistenVolumeClaims to type Allocation?
- type PVC struct {
- Bytes float64 `json:"bytes"`
- Count int `json:"count"`
- Name string `json:"name"`
- Cluster string `json:"cluster"`
- Namespace string `json:"namespace"`
- Volume *PV `json:"persistentVolume"`
- Start time.Time `json:"start"`
- End time.Time `json:"end"`
- }
- func (pvc *PVC) Cost() float64 {
- if pvc == nil || pvc.Volume == nil {
- return 0.0
- }
- gib := pvc.Bytes / 1024 / 1024 / 1024
- hrs := pvc.Minutes() / 60.0
- return pvc.Volume.CostPerGiBHour * gib * hrs
- }
- func (pvc *PVC) Minutes() float64 {
- if pvc == nil {
- return 0.0
- }
- return pvc.End.Sub(pvc.Start).Minutes()
- }
- func (pvc *PVC) String() string {
- if pvc == nil {
- return "<nil>"
- }
- return fmt.Sprintf("%s/%s/%s{Bytes:%.2f, Cost:%.6f, Start,End:%s}", pvc.Cluster, pvc.Namespace, pvc.Name, pvc.Bytes, pvc.Cost(), kubecost.NewWindow(&pvc.Start, &pvc.End))
- }
- // TODO niko/cdmr move to pkg/kubecost
- type PV struct {
- Bytes float64 `json:"bytes"`
- CostPerGiBHour float64 `json:"costPerGiBHour"` // TODO niko/cdmr GiB or GB?
- Cluster string `json:"cluster"`
- Name string `json:"name"`
- StorageClass string `json:"storageClass"`
- }
- func (pv *PV) String() string {
- if pv == nil {
- return "<nil>"
- }
- return fmt.Sprintf("%s/%s{Bytes:%.2f, Cost/GiB*Hr:%.6f, StorageClass:%s}", pv.Cluster, pv.Name, pv.Bytes, pv.CostPerGiBHour, pv.StorageClass)
- }
- // ComputeAllocation uses the CostModel instance to compute an AllocationSet
- // for the window defined by the given start and end times. The Allocations
- // returned are unaggregated (i.e. down to the container level).
- func (cm *CostModel) ComputeAllocation(start, end time.Time) (*kubecost.AllocationSet, error) {
- // Create a window spanning the requested query
- s, e := start, end
- window := kubecost.NewWindow(&s, &e)
- // Create an empty AllocationSet. For safety, in the case of an error, we
- // should prefer to return this empty set with the error. (In the case of
- // no error, of course we populate the set and return it.)
- allocSet := kubecost.NewAllocationSet(start, end)
- // Convert window (start, end) to (duration, offset) for querying Prometheus
- timesToDurations := func(s, e time.Time) (dur, off time.Duration) {
- now := time.Now()
- off = now.Sub(e)
- dur = e.Sub(s)
- return dur, off
- }
- duration, offset := timesToDurations(start, end)
- // If using Thanos, increase offset to 3 hours, reducing the duration by
- // equal measure to maintain the same starting point.
- thanosDur := thanos.OffsetDuration()
- // TODO niko/cdmr confirm that this flag works interchangeably with ThanosClient != nil
- if offset < thanosDur && env.IsThanosEnabled() {
- diff := thanosDur - offset
- offset += diff
- duration -= diff
- }
- // If duration < 0, return an empty set
- if duration < 0 {
- return allocSet, nil
- }
- // Negative offset means that the end time is in the future. Prometheus
- // fails for non-positive offset values, so shrink the duration and
- // remove the offset altogether.
- if offset < 0 {
- duration = duration + offset
- offset = 0
- }
- durStr := fmt.Sprintf("%dm", int64(duration.Minutes()))
- offStr := fmt.Sprintf(" offset %dm", int64(offset.Minutes()))
- if offset < time.Minute {
- offStr = ""
- }
- // TODO niko/cdmr dynamic resolution? add to ComputeAllocation() in allocation.Source?
- resStr := "1m"
- // resPerHr := 60
- // TODO niko/cdmr remove after testing
- startQuerying := time.Now()
- ctx := prom.NewContext(cm.PrometheusClient)
- // TODO niko/cdmr retries? (That should probably go into the Store.)
- // TODO niko/cmdr check: will multiple Prometheus jobs multiply the totals?
- // TODO niko/cdmr should we try doing this without resolution? Could yield
- // more accurate results, but might also be more challenging in some
- // respects; e.g. "correcting" the start point by what amount?
- queryMinutes := fmt.Sprintf(`avg(kube_pod_container_status_running{}) by (container, pod, namespace, kubernetes_node, cluster_id)[%s:%s]%s`, durStr, resStr, offStr)
- resChMinutes := ctx.Query(queryMinutes)
- queryRAMBytesAllocated := fmt.Sprintf(`avg(avg_over_time(container_memory_allocation_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
- resChRAMBytesAllocated := ctx.Query(queryRAMBytesAllocated)
- queryRAMRequests := fmt.Sprintf(`avg(avg_over_time(kube_pod_container_resource_requests_memory_bytes{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
- resChRAMRequests := ctx.Query(queryRAMRequests)
- queryRAMUsage := fmt.Sprintf(`avg(avg_over_time(container_memory_working_set_bytes{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`, durStr, offStr)
- resChRAMUsage := ctx.Query(queryRAMUsage)
- queryCPUCoresAllocated := fmt.Sprintf(`avg(avg_over_time(container_cpu_allocation{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
- resChCPUCoresAllocated := ctx.Query(queryCPUCoresAllocated)
- queryCPURequests := fmt.Sprintf(`avg(avg_over_time(kube_pod_container_resource_requests_cpu_cores{container!="", container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
- resChCPURequests := ctx.Query(queryCPURequests)
- queryCPUUsage := fmt.Sprintf(`avg(rate(container_cpu_usage_seconds_total{container_name!="", container_name!="POD", instance!=""}[%s]%s)) by (container_name, pod_name, namespace, instance, cluster_id)`, durStr, offStr)
- resChCPUUsage := ctx.Query(queryCPUUsage)
- // TODO niko/cdmr find an env with GPUs to test this (generate one?)
- queryGPUsRequested := fmt.Sprintf(`avg(avg_over_time(kube_pod_container_resource_requests{resource="nvidia_com_gpu", container!="",container!="POD", node!=""}[%s]%s)) by (container, pod, namespace, node, cluster_id)`, durStr, offStr)
- resChGPUsRequested := ctx.Query(queryGPUsRequested)
- queryNodeCostPerCPUHr := fmt.Sprintf(`avg(avg_over_time(node_cpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`, durStr, offStr)
- resChNodeCostPerCPUHr := ctx.Query(queryNodeCostPerCPUHr)
- queryNodeCostPerRAMGiBHr := fmt.Sprintf(`avg(avg_over_time(node_ram_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`, durStr, offStr)
- resChNodeCostPerRAMGiBHr := ctx.Query(queryNodeCostPerRAMGiBHr)
- queryNodeCostPerGPUHr := fmt.Sprintf(`avg(avg_over_time(node_gpu_hourly_cost[%s]%s)) by (node, cluster_id, instance_type)`, durStr, offStr)
- resChNodeCostPerGPUHr := ctx.Query(queryNodeCostPerGPUHr)
- queryNodeIsSpot := fmt.Sprintf(`avg_over_time(kubecost_node_is_spot[%s]%s)`, durStr, offStr)
- resChNodeIsSpot := ctx.Query(queryNodeIsSpot)
- queryPVCInfo := fmt.Sprintf(`avg(kube_persistentvolumeclaim_info{volumename != ""}) by (persistentvolumeclaim, storageclass, volumename, namespace, cluster_id)[%s:%s]%s`, durStr, resStr, offStr)
- resChPVCInfo := ctx.Query(queryPVCInfo)
- queryPVBytes := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolume_capacity_bytes[%s]%s)) by (persistentvolume, cluster_id)`, durStr, offStr)
- resChPVBytes := ctx.Query(queryPVBytes)
- queryPodPVCAllocation := fmt.Sprintf(`avg(avg_over_time(pod_pvc_allocation[%s]%s)) by (persistentvolume, persistentvolumeclaim, pod, namespace, cluster_id)`, durStr, offStr)
- resChPodPVCAllocation := ctx.Query(queryPodPVCAllocation)
- queryPVCBytesRequested := fmt.Sprintf(`avg(avg_over_time(kube_persistentvolumeclaim_resource_requests_storage_bytes{}[%s]%s)) by (persistentvolumeclaim, namespace, cluster_id)`, durStr, offStr)
- resChPVCBytesRequested := ctx.Query(queryPVCBytesRequested)
- queryPVCostPerGiBHour := fmt.Sprintf(`avg(avg_over_time(pv_hourly_cost[%s]%s)) by (volumename, cluster_id)`, durStr, offStr)
- resChPVCostPerGiBHour := ctx.Query(queryPVCostPerGiBHour)
- // TODO niko/cdmr
- // queryNetZoneRequests := fmt.Sprintf()
- // resChNetZoneRequests := ctx.Query(queryNetZoneRequests)
- // TODO niko/cdmr
- // queryNetRegionRequests := fmt.Sprintf()
- // resChNetRegionRequests := ctx.Query(queryNetRegionRequests)
- // TODO niko/cdmr
- // queryNetInternetRequests := fmt.Sprintf()
- // resChNetInternetRequests := ctx.Query(queryNetInternetRequests)
- // TODO niko/cdmr
- // queryNamespaceLabels := fmt.Sprintf()
- // resChNamespaceLabels := ctx.Query(queryNamespaceLabels)
- // TODO niko/cdmr
- // queryPodLabels := fmt.Sprintf()
- // resChPodLabels := ctx.Query(queryPodLabels)
- // TODO niko/cdmr
- // queryNamespaceAnnotations := fmt.Sprintf()
- // resChNamespaceAnnotations := ctx.Query(queryNamespaceAnnotations)
- // TODO niko/cdmr
- // queryPodAnnotations := fmt.Sprintf()
- // resChPodAnnotations := ctx.Query(queryPodAnnotations)
- // TODO niko/cdmr
- // queryServiceLabels := fmt.Sprintf()
- // resChServiceLabels := ctx.Query(queryServiceLabels)
- // TODO niko/cdmr
- // queryDeploymentLabels := fmt.Sprintf()
- // resChDeploymentLabels := ctx.Query(queryDeploymentLabels)
- // TODO niko/cdmr
- // queryStatefulSetLabels := fmt.Sprintf()
- // resChStatefulSetLabels := ctx.Query(queryStatefulSetLabels)
- // TODO niko/cdmr
- // queryDaemonSetLabels := fmt.Sprintf()
- // resChDaemonSetLabels := ctx.Query(queryDaemonSetLabels)
- // TODO niko/cdmr
- // queryJobLabels := fmt.Sprintf()
- // resChJobLabels := ctx.Query(queryJobLabels)
- resMinutes, _ := resChMinutes.Await()
- resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()
- resCPURequests, _ := resChCPURequests.Await()
- resCPUUsage, _ := resChCPUUsage.Await()
- resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()
- resRAMRequests, _ := resChRAMRequests.Await()
- resRAMUsage, _ := resChRAMUsage.Await()
- resGPUsRequested, _ := resChGPUsRequested.Await()
- resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()
- resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()
- resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()
- resNodeIsSpot, _ := resChNodeIsSpot.Await()
- resPVBytes, _ := resChPVBytes.Await()
- resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()
- resPVCInfo, _ := resChPVCInfo.Await()
- resPVCBytesRequested, _ := resChPVCBytesRequested.Await()
- resPodPVCAllocation, _ := resChPodPVCAllocation.Await()
- // TODO niko/cdmr remove after testing
- log.Infof("CostModel.ComputeAllocation: minutes : %s", queryMinutes)
- log.Infof("CostModel.ComputeAllocation: CPU cores: %s", queryCPUCoresAllocated)
- log.Infof("CostModel.ComputeAllocation: CPU req : %s", queryCPURequests)
- log.Infof("CostModel.ComputeAllocation: CPU use : %s", queryCPUUsage)
- log.Infof("CostModel.ComputeAllocation: $/CPU*Hr : %s", queryNodeCostPerCPUHr)
- log.Infof("CostModel.ComputeAllocation: RAM bytes: %s", queryRAMBytesAllocated)
- log.Infof("CostModel.ComputeAllocation: RAM req : %s", queryRAMRequests)
- log.Infof("CostModel.ComputeAllocation: RAM use : %s", queryRAMUsage)
- log.Infof("CostModel.ComputeAllocation: $/GiB*Hr : %s", queryNodeCostPerRAMGiBHr)
- log.Infof("CostModel.ComputeAllocation: PV $/gbhr: %s", queryPVCostPerGiBHour)
- log.Infof("CostModel.ComputeAllocation: PV bytes : %s", queryPVBytes)
- log.Infof("CostModel.ComputeAllocation: PVC alloc: %s", queryPodPVCAllocation)
- log.Infof("CostModel.ComputeAllocation: PVC bytes: %s", queryPVCBytesRequested)
- log.Infof("CostModel.ComputeAllocation: PVC info : %s", queryPVCInfo)
- log.Profile(startQuerying, "CostModel.ComputeAllocation: queries complete")
- // Build out a map of Allocations, starting with (start, end) so that we
- // begin with minutes, from which we compute resource allocation and cost
- // totals from measured rate data.
- // TODO niko/cdmr can we start with a reasonable guess at map size?
- allocationMap := map[containerKey]*kubecost.Allocation{}
- // Keep track of the number of allocations per pod, for the sake of
- // splitting PVC allocation into per-Allocation from per-Pod.
- podAllocationCount := map[podKey]int{}
- // clusterStarts and clusterEnds record the earliest start and latest end
- // times, respectively, on a cluster-basis. These are used for unmounted
- // PVs and other "virtual" Allocations so that minutes are maximally
- // accurate during start-up or spin-down of a cluster
- clusterStart := map[string]time.Time{}
- clusterEnd := map[string]time.Time{}
- buildAllocationMap(window, allocationMap, podAllocationCount, clusterStart, clusterEnd, resMinutes)
- applyCPUCoresAllocated(allocationMap, resCPUCoresAllocated)
- applyCPUCoresRequested(allocationMap, resCPURequests)
- applyCPUCoresUsed(allocationMap, resCPUUsage)
- applyRAMBytesAllocated(allocationMap, resRAMBytesAllocated)
- applyRAMBytesRequested(allocationMap, resRAMRequests)
- applyRAMBytesUsed(allocationMap, resRAMUsage)
- applyGPUsRequested(allocationMap, resGPUsRequested)
- // Build out a map of Nodes with resource costs, discounts, and node types
- // for converting resource allocation data to cumulative costs.
- nodeMap := map[nodeKey]*Node{}
- applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)
- applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)
- applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)
- applyNodeSpot(nodeMap, resNodeIsSpot)
- applyNodeDiscount(nodeMap, cm)
- // TODO niko/cdmr comment
- pvMap := map[pvKey]*PV{}
- buildPVMap(pvMap, resPVCostPerGiBHour)
- applyPVBytes(pvMap, resPVBytes)
- // TODO niko/cdmr apply PV bytes?
- // TODO niko/cdmr comment
- pvcMap := map[pvcKey]*PVC{}
- buildPVCMap(window, pvcMap, pvMap, resPVCInfo)
- applyPVCBytesRequested(pvcMap, resPVCBytesRequested)
- // TODO niko/cdmr comment
- podPVCMap := map[podKey][]*PVC{}
- buildPodPVCMap(podPVCMap, pvMap, pvcMap, podAllocationCount, resPodPVCAllocation)
- // Identify unmounted PVs (PVs without PVCs) and add one Allocation per
- // cluster representing each cluster's unmounted PVs (if necessary).
- applyUnmountedPVs(window, allocationMap, pvMap, pvcMap)
- // TODO niko/cdmr remove logs
- log.Infof("CostModel.ComputeAllocation: %d allocations", len(allocationMap))
- log.Infof("CostModel.ComputeAllocation: %d nodes", len(nodeMap))
- log.Infof("CostModel.ComputeAllocation: %d PVs", len(pvMap))
- log.Infof("CostModel.ComputeAllocation: %d PVCs", len(pvcMap))
- log.Infof("CostModel.ComputeAllocation: %d pods with PVCs", len(podPVCMap))
- for _, node := range nodeMap {
- log.Infof("CostModel.ComputeAllocation: Node: %s: %f/CPUHr; %f/RAMHr; %f/GPUHr; %f discount", node.Name, node.CostPerCPUHr, node.CostPerRAMGiBHr, node.CostPerGPUHr, node.Discount)
- }
- for _, pv := range pvMap {
- log.Infof("CostModel.ComputeAllocation: PV: %s", pv)
- }
- for pod, pvcs := range podPVCMap {
- for _, pvc := range pvcs {
- log.Infof("CostModel.ComputeAllocation: Pod %s: PVC: %s", pod, pvc)
- }
- }
- for _, alloc := range allocationMap {
- cluster, _ := alloc.Properties.GetCluster()
- node, _ := alloc.Properties.GetNode()
- namespace, _ := alloc.Properties.GetNamespace()
- pod, _ := alloc.Properties.GetPod()
- podKey := newPodKey(cluster, namespace, pod)
- nodeKey := newNodeKey(cluster, node)
- if n, ok := nodeMap[nodeKey]; !ok {
- if pod != "unmounted-pvs" {
- log.Warningf("CostModel.ComputeAllocation: failed to find node %s for %s", nodeKey, alloc.Name)
- }
- } else {
- alloc.CPUCost = alloc.CPUCoreHours * n.CostPerCPUHr
- alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * n.CostPerRAMGiBHr
- alloc.GPUCost = alloc.GPUHours * n.CostPerGPUHr
- }
- if pvcs, ok := podPVCMap[podKey]; ok {
- for _, pvc := range pvcs {
- // Determine the (start, end) of the relationship between the
- // given PVC and the associated Allocation so that a precise
- // number of hours can be used to compute cumulative cost.
- s, e := alloc.Start, alloc.End
- if pvc.Start.After(alloc.Start) {
- s = pvc.Start
- }
- if pvc.End.Before(alloc.End) {
- e = pvc.End
- }
- minutes := e.Sub(s).Minutes()
- hrs := minutes / 60.0
- gib := pvc.Bytes / 1024 / 1024 / 1024
- alloc.PVByteHours += pvc.Bytes * hrs
- alloc.PVCost += pvc.Volume.CostPerGiBHour * gib * hrs / float64(pvc.Count)
- }
- }
- alloc.TotalCost = 0.0
- alloc.TotalCost += alloc.CPUCost
- alloc.TotalCost += alloc.RAMCost
- alloc.TotalCost += alloc.GPUCost
- alloc.TotalCost += alloc.PVCost
- alloc.TotalCost += alloc.NetworkCost
- alloc.TotalCost += alloc.SharedCost
- alloc.TotalCost += alloc.ExternalCost
- allocSet.Set(alloc)
- }
- return allocSet, nil
- }
- func buildAllocationMap(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, podAllocationCount map[podKey]int, clusterStart, clusterEnd map[string]time.Time, resMinutes []*prom.QueryResult) {
- for _, res := range resMinutes {
- if len(res.Values) == 0 {
- log.Warningf("CostModel.ComputeAllocation: empty minutes result")
- continue
- }
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- labels, err := res.GetStrings("kubernetes_node", "namespace", "pod", "container")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: minutes query result missing field: %s", err)
- continue
- }
- node := labels["kubernetes_node"]
- namespace := labels["namespace"]
- pod := labels["pod"]
- container := labels["container"]
- containerKey := newContainerKey(cluster, namespace, pod, container)
- podKey := newPodKey(cluster, namespace, pod)
- // allocStart and allocEnd are the timestamps of the first and last
- // minutes the allocation was running, respectively. We subtract 1m
- // from allocStart because this point will actually represent the end
- // of the first minute. We don't subtract from allocEnd because it
- // already represents the end of the last minute.
- var allocStart, allocEnd time.Time
- for _, datum := range res.Values {
- t := time.Unix(int64(datum.Timestamp), 0)
- if allocStart.IsZero() && datum.Value > 0 && window.Contains(t) {
- allocStart = t
- }
- if datum.Value > 0 && window.Contains(t) {
- allocEnd = t
- }
- }
- if allocStart.IsZero() || allocEnd.IsZero() {
- // TODO niko/cdmr remove log?
- // log.Warningf("CostModel.ComputeAllocation: allocation %s has no running time, skipping", containerKey)
- continue
- }
- allocStart = allocStart.Add(-time.Minute)
- // Set start if unset or this datum's start time is earlier than the
- // current earliest time.
- if _, ok := clusterStart[cluster]; !ok || allocStart.Before(clusterStart[cluster]) {
- clusterStart[cluster] = allocStart
- }
- // Set end if unset or this datum's end time is later than the
- // current latest time.
- if _, ok := clusterEnd[cluster]; !ok || allocEnd.After(clusterEnd[cluster]) {
- clusterEnd[cluster] = allocEnd
- }
- name := fmt.Sprintf("%s/%s/%s/%s", cluster, namespace, pod, container)
- alloc := &kubecost.Allocation{
- Name: name,
- Properties: kubecost.Properties{},
- Window: window.Clone(),
- Start: allocStart,
- End: allocEnd,
- }
- alloc.Properties.SetContainer(container)
- alloc.Properties.SetPod(pod)
- alloc.Properties.SetNamespace(namespace)
- alloc.Properties.SetNode(node)
- alloc.Properties.SetCluster(cluster)
- allocationMap[containerKey] = alloc
- podAllocationCount[podKey]++
- }
- }
- func applyCPUCoresAllocated(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresAllocated []*prom.QueryResult) {
- for _, res := range resCPUCoresAllocated {
- // TODO niko/cdmr do we need node here?
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: CPU allocation query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- log.Warningf("CostModel.ComputeAllocation: unidentified CPU allocation query result: %s", key)
- continue
- }
- cpuCores := res.Values[0].Value
- hours := allocationMap[key].Minutes() / 60.0
- allocationMap[key].CPUCoreHours = cpuCores * hours
- }
- }
- func applyCPUCoresRequested(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresRequested []*prom.QueryResult) {
- for _, res := range resCPUCoresRequested {
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: CPU request query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- // TODO niko/cdmr remove log?
- // log.Warningf("CostModel.ComputeAllocation: unidentified CPU request query result: %s", key)
- continue
- }
- allocationMap[key].CPUCoreRequestAverage = res.Values[0].Value
- // CPU allocation is less than requests, so set CPUCoreHours to
- // request level.
- // TODO niko/cdmr why is this happening?
- if allocationMap[key].CPUCores() < res.Values[0].Value {
- allocationMap[key].CPUCoreHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
- }
- }
- }
- func applyCPUCoresUsed(allocationMap map[containerKey]*kubecost.Allocation, resCPUCoresUsed []*prom.QueryResult) {
- for _, res := range resCPUCoresUsed {
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: CPU usage query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- log.Warningf("CostModel.ComputeAllocation: unidentified CPU usage query result: %s", key)
- continue
- }
- allocationMap[key].CPUCoreUsageAverage = res.Values[0].Value
- }
- }
- func applyRAMBytesRequested(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesRequested []*prom.QueryResult) {
- for _, res := range resRAMBytesRequested {
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: RAM request query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- // TODO niko/cdmr remove log?
- // log.Warningf("CostModel.ComputeAllocation: unidentified RAM request query result: %s", key)
- continue
- }
- allocationMap[key].RAMBytesRequestAverage = res.Values[0].Value
- // RAM allocation is less than requests, so set RAMByteHours to
- // request level.
- // TODO niko/cdmr why is this happening?
- if allocationMap[key].RAMBytes() < res.Values[0].Value {
- allocationMap[key].RAMByteHours = res.Values[0].Value * (allocationMap[key].Minutes() / 60.0)
- }
- }
- }
- func applyRAMBytesUsed(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesUsed []*prom.QueryResult) {
- for _, res := range resRAMBytesUsed {
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod_name", "container_name")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: RAM usage query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- log.Warningf("CostModel.ComputeAllocation: unidentified RAM usage query result: %s", key)
- continue
- }
- allocationMap[key].RAMBytesUsageAverage = res.Values[0].Value
- }
- }
- func applyRAMBytesAllocated(allocationMap map[containerKey]*kubecost.Allocation, resRAMBytesAllocated []*prom.QueryResult) {
- for _, res := range resRAMBytesAllocated {
- // TODO niko/cdmr do we need node here?
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: RAM allocation query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- log.Warningf("CostModel.ComputeAllocation: unidentified RAM allocation query result: %s", key)
- continue
- }
- ramBytes := res.Values[0].Value
- hours := allocationMap[key].Minutes() / 60.0
- allocationMap[key].RAMByteHours = ramBytes * hours
- }
- }
- func applyGPUsRequested(allocationMap map[containerKey]*kubecost.Allocation, resGPUsRequested []*prom.QueryResult) {
- for _, res := range resGPUsRequested {
- // TODO niko/cdmr do we need node here?
- key, err := resultContainerKey(res, "cluster_id", "namespace", "pod", "container")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: GPU allocation query result missing field: %s", err)
- continue
- }
- _, ok := allocationMap[key]
- if !ok {
- log.Warningf("CostModel.ComputeAllocation: unidentified GPU allocation query result: %s", key)
- continue
- }
- // TODO niko/cdmr complete
- log.Infof("CostModel.ComputeAllocation: GPU results: %s=%f", key, res.Values[0].Value)
- }
- }
- func applyNodeCostPerCPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerCPUHr []*prom.QueryResult) {
- for _, res := range resNodeCostPerCPUHr {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- node, err := res.GetString("node")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
- continue
- }
- instanceType, err := res.GetString("instance_type")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node CPU cost query result missing field: %s", err)
- continue
- }
- key := newNodeKey(cluster, node)
- if _, ok := nodeMap[key]; !ok {
- nodeMap[key] = &Node{
- Name: node,
- NodeType: instanceType,
- }
- }
- nodeMap[key].CostPerCPUHr = res.Values[0].Value
- }
- }
- func applyNodeCostPerRAMGiBHr(nodeMap map[nodeKey]*Node, resNodeCostPerRAMGiBHr []*prom.QueryResult) {
- for _, res := range resNodeCostPerRAMGiBHr {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- node, err := res.GetString("node")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
- continue
- }
- instanceType, err := res.GetString("instance_type")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node RAM cost query result missing field: %s", err)
- continue
- }
- key := newNodeKey(cluster, node)
- if _, ok := nodeMap[key]; !ok {
- nodeMap[key] = &Node{
- Name: node,
- NodeType: instanceType,
- }
- }
- nodeMap[key].CostPerRAMGiBHr = res.Values[0].Value
- }
- }
- func applyNodeCostPerGPUHr(nodeMap map[nodeKey]*Node, resNodeCostPerGPUHr []*prom.QueryResult) {
- for _, res := range resNodeCostPerGPUHr {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- node, err := res.GetString("node")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
- continue
- }
- instanceType, err := res.GetString("instance_type")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node GPU cost query result missing field: %s", err)
- continue
- }
- key := newNodeKey(cluster, node)
- if _, ok := nodeMap[key]; !ok {
- nodeMap[key] = &Node{
- Name: node,
- NodeType: instanceType,
- }
- }
- nodeMap[key].CostPerGPUHr = res.Values[0].Value
- }
- }
- func applyNodeSpot(nodeMap map[nodeKey]*Node, resNodeIsSpot []*prom.QueryResult) {
- for _, res := range resNodeIsSpot {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- node, err := res.GetString("node")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: Node spot query result missing field: %s", err)
- continue
- }
- key := newNodeKey(cluster, node)
- if _, ok := nodeMap[key]; !ok {
- log.Warningf("CostModel.ComputeAllocation: Node spot query result for missing node: %s", key)
- continue
- }
- nodeMap[key].Preemptible = res.Values[0].Value > 0
- }
- }
- func applyNodeDiscount(nodeMap map[nodeKey]*Node, cm *CostModel) {
- if cm == nil {
- return
- }
- c, err := cm.Provider.GetConfig()
- if err != nil {
- log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
- return
- }
- discount, err := ParsePercentString(c.Discount)
- if err != nil {
- log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
- return
- }
- negotiatedDiscount, err := ParsePercentString(c.NegotiatedDiscount)
- if err != nil {
- log.Errorf("CostModel.ComputeAllocation: applyNodeDiscount: %s", err)
- return
- }
- for _, node := range nodeMap {
- // TODO niko/cdmr take RI into account?
- node.Discount = cm.Provider.CombinedDiscountForNode(node.NodeType, node.Preemptible, discount, negotiatedDiscount)
- node.CostPerCPUHr *= (1.0 - node.Discount)
- node.CostPerRAMGiBHr *= (1.0 - node.Discount)
- }
- }
- func buildPVMap(pvMap map[pvKey]*PV, resPVCostPerGiBHour []*prom.QueryResult) {
- for _, res := range resPVCostPerGiBHour {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- name, err := res.GetString("volumename")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: PV cost without volumename")
- continue
- }
- key := newPVKey(cluster, name)
- pvMap[key] = &PV{
- Cluster: cluster,
- Name: name,
- CostPerGiBHour: res.Values[0].Value,
- }
- }
- }
- func applyPVBytes(pvMap map[pvKey]*PV, resPVBytes []*prom.QueryResult) {
- for _, res := range resPVBytes {
- key, err := resultPVKey(res, "cluster_id", "persistentvolume")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: PV bytes query result missing field: %s", err)
- continue
- }
- if _, ok := pvMap[key]; !ok {
- log.Warningf("CostModel.ComputeAllocation: PV bytes result for missing PV: %s", err)
- continue
- }
- pvMap[key].Bytes = res.Values[0].Value
- }
- }
- func buildPVCMap(window kubecost.Window, pvcMap map[pvcKey]*PVC, pvMap map[pvKey]*PV, resPVCInfo []*prom.QueryResult) {
- for _, res := range resPVCInfo {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- values, err := res.GetStrings("persistentvolumeclaim", "storageclass", "volumename", "namespace")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: PVC info query result missing field: %s", err)
- continue
- }
- // TODO niko/cdmr ?
- namespace := values["namespace"]
- name := values["persistentvolumeclaim"]
- volume := values["volumename"]
- storageClass := values["storageclass"]
- pvKey := newPVKey(cluster, volume)
- pvcKey := newPVCKey(cluster, namespace, name)
- // pvcStart and pvcEnd are the timestamps of the first and last minutes
- // the PVC was running, respectively. We subtract 1m from pvcStart
- // because this point will actually represent the end of the first
- // minute. We don't subtract from pvcEnd because it already represents
- // the end of the last minute.
- var pvcStart, pvcEnd time.Time
- for _, datum := range res.Values {
- t := time.Unix(int64(datum.Timestamp), 0)
- if pvcStart.IsZero() && datum.Value > 0 && window.Contains(t) {
- pvcStart = t
- }
- if datum.Value > 0 && window.Contains(t) {
- pvcEnd = t
- }
- }
- if pvcStart.IsZero() || pvcEnd.IsZero() {
- log.Warningf("CostModel.ComputeAllocation: PVC %s has no running time", pvcKey)
- }
- pvcStart = pvcStart.Add(-time.Minute)
- if _, ok := pvMap[pvKey]; !ok {
- log.Warningf("CostModel.ComputeAllocation: PV missing for PVC info query result: %s", pvKey)
- continue
- }
- pvMap[pvKey].StorageClass = storageClass
- if _, ok := pvcMap[pvcKey]; !ok {
- pvcMap[pvcKey] = &PVC{}
- }
- pvcMap[pvcKey].Name = name
- pvcMap[pvcKey].Namespace = namespace
- pvcMap[pvcKey].Volume = pvMap[pvKey]
- pvcMap[pvcKey].Start = pvcStart
- pvcMap[pvcKey].End = pvcEnd
- }
- }
- func applyPVCBytesRequested(pvcMap map[pvcKey]*PVC, resPVCBytesRequested []*prom.QueryResult) {
- for _, res := range resPVCBytesRequested {
- key, err := resultPVCKey(res, "cluster_id", "namespace", "persistentvolumeclaim")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: PVC bytes requested query result missing field: %s", err)
- continue
- }
- if _, ok := pvcMap[key]; !ok {
- log.Warningf("CostModel.ComputeAllocation: PVC bytes requested result for missing PVC: %s", err)
- continue
- }
- pvcMap[key].Bytes = res.Values[0].Value
- }
- }
- func buildPodPVCMap(podPVCMap map[podKey][]*PVC, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC, podAllocationCount map[podKey]int, resPodPVCAllocation []*prom.QueryResult) {
- for _, res := range resPodPVCAllocation {
- cluster, err := res.GetString("cluster_id")
- if err != nil {
- cluster = env.GetClusterID()
- }
- values, err := res.GetStrings("persistentvolume", "persistentvolumeclaim", "pod", "namespace")
- if err != nil {
- log.Warningf("CostModel.ComputeAllocation: PVC allocation query result missing field: %s", err)
- continue
- }
- namespace := values["namespace"]
- pod := values["pod"]
- name := values["persistentvolumeclaim"]
- volume := values["persistentvolume"]
- podKey := newPodKey(cluster, namespace, pod)
- pvKey := newPVKey(cluster, volume)
- pvcKey := newPVCKey(cluster, namespace, name)
- if _, ok := pvMap[pvKey]; !ok {
- log.Warningf("CostModel.ComputeAllocation: PV missing for PVC allocation query result: %s", pvKey)
- continue
- }
- if _, ok := podPVCMap[podKey]; !ok {
- podPVCMap[podKey] = []*PVC{}
- }
- pvc, ok := pvcMap[pvcKey]
- if !ok {
- log.Warningf("CostModel.ComputeAllocation: PVC missing for PVC allocation query: %s", pvcKey)
- continue
- }
- pvc.Count = podAllocationCount[podKey]
- podPVCMap[podKey] = append(podPVCMap[podKey], pvc)
- }
- }
- func applyUnmountedPVs(window kubecost.Window, allocationMap map[containerKey]*kubecost.Allocation, pvMap map[pvKey]*PV, pvcMap map[pvcKey]*PVC) {
- unmountedPVBytes := map[string]float64{}
- unmountedPVCost := map[string]float64{}
- for _, pv := range pvMap {
- mounted := false
- for _, pvc := range pvcMap {
- if pvc.Volume == nil {
- continue
- }
- if pvc.Volume == pv {
- mounted = true
- break
- }
- }
- log.Infof("CostModel.ComputeAllocation: PV %s is mounted? %t", pv.Name, mounted)
- if !mounted {
- gib := pv.Bytes / 1024 / 1024 / 1024
- hrs := window.Minutes() / 60.0
- cost := pv.CostPerGiBHour * gib * hrs
- unmountedPVCost[pv.Cluster] += cost
- unmountedPVBytes[pv.Cluster] += pv.Bytes
- }
- }
- for cluster, amount := range unmountedPVCost {
- container := "unmounted-pvs"
- pod := "unmounted-pvs"
- namespace := "" // TODO niko/cdmr what about this?
- containerKey := newContainerKey(cluster, namespace, pod, container)
- allocationMap[containerKey] = &kubecost.Allocation{
- Name: fmt.Sprintf("%s/%s/%s/%s", cluster, namespace, pod, container),
- Properties: kubecost.Properties{
- kubecost.ClusterProp: cluster,
- kubecost.NamespaceProp: namespace,
- kubecost.PodProp: pod,
- kubecost.ContainerProp: container,
- },
- Window: window.Clone(),
- Start: *window.Start(),
- End: *window.End(),
- PVByteHours: unmountedPVBytes[cluster] * window.Minutes() / 60.0,
- PVCost: amount,
- TotalCost: amount,
- }
- }
- }
- type containerKey struct {
- Cluster string
- Namespace string
- Pod string
- Container string
- }
- func (k containerKey) String() string {
- return fmt.Sprintf("%s/%s/%s/%s", k.Cluster, k.Namespace, k.Pod, k.Container)
- }
- func newContainerKey(cluster, namespace, pod, container string) containerKey {
- return containerKey{
- Cluster: cluster,
- Namespace: namespace,
- Pod: pod,
- Container: container,
- }
- }
- func resultContainerKey(res *prom.QueryResult, clusterLabel, namespaceLabel, podLabel, containerLabel string) (containerKey, error) {
- key := containerKey{}
- cluster, err := res.GetString(clusterLabel)
- if err != nil {
- cluster = env.GetClusterID()
- }
- key.Cluster = cluster
- namespace, err := res.GetString(namespaceLabel)
- if err != nil {
- return key, err
- }
- key.Namespace = namespace
- pod, err := res.GetString(podLabel)
- if err != nil {
- return key, err
- }
- key.Pod = pod
- container, err := res.GetString(containerLabel)
- if err != nil {
- return key, err
- }
- key.Container = container
- return key, nil
- }
- type podKey struct {
- Cluster string
- Namespace string
- Pod string
- }
- func (k podKey) String() string {
- return fmt.Sprintf("%s/%s/%s", k.Cluster, k.Namespace, k.Pod)
- }
- func newPodKey(cluster, namespace, pod string) podKey {
- return podKey{
- Cluster: cluster,
- Namespace: namespace,
- Pod: pod,
- }
- }
- func resultPodKey(res *prom.QueryResult, clusterLabel, namespaceLabel, podLabel string) (podKey, error) {
- key := podKey{}
- cluster, err := res.GetString(clusterLabel)
- if err != nil {
- cluster = env.GetClusterID()
- }
- key.Cluster = cluster
- namespace, err := res.GetString(namespaceLabel)
- if err != nil {
- return key, err
- }
- key.Namespace = namespace
- pod, err := res.GetString(podLabel)
- if err != nil {
- return key, err
- }
- key.Pod = pod
- return key, nil
- }
- type nodeKey struct {
- Cluster string
- Node string
- }
- func (k nodeKey) String() string {
- return fmt.Sprintf("%s/%s", k.Cluster, k.Node)
- }
- func newNodeKey(cluster, node string) nodeKey {
- return nodeKey{
- Cluster: cluster,
- Node: node,
- }
- }
- func resultNodeKey(res *prom.QueryResult, clusterLabel, nodeLabel string) (nodeKey, error) {
- key := nodeKey{}
- cluster, err := res.GetString(clusterLabel)
- if err != nil {
- cluster = env.GetClusterID()
- }
- key.Cluster = cluster
- node, err := res.GetString(nodeLabel)
- if err != nil {
- return key, err
- }
- key.Node = node
- return key, nil
- }
- type pvcKey struct {
- Cluster string
- Namespace string
- PersistentVolumeClaim string
- }
- func (k pvcKey) String() string {
- return fmt.Sprintf("%s/%s/%s", k.Cluster, k.Namespace, k.PersistentVolumeClaim)
- }
- func newPVCKey(cluster, namespace, persistentVolumeClaim string) pvcKey {
- return pvcKey{
- Cluster: cluster,
- Namespace: namespace,
- PersistentVolumeClaim: persistentVolumeClaim,
- }
- }
- func resultPVCKey(res *prom.QueryResult, clusterLabel, namespaceLabel, pvcLabel string) (pvcKey, error) {
- key := pvcKey{}
- cluster, err := res.GetString(clusterLabel)
- if err != nil {
- cluster = env.GetClusterID()
- }
- key.Cluster = cluster
- namespace, err := res.GetString(namespaceLabel)
- if err != nil {
- return key, err
- }
- key.Namespace = namespace
- pvc, err := res.GetString(pvcLabel)
- if err != nil {
- return key, err
- }
- key.PersistentVolumeClaim = pvc
- return key, nil
- }
- type pvKey struct {
- Cluster string
- PersistentVolume string
- }
- func (k pvKey) String() string {
- return fmt.Sprintf("%s/%s", k.Cluster, k.PersistentVolume)
- }
- func newPVKey(cluster, persistentVolume string) pvKey {
- return pvKey{
- Cluster: cluster,
- PersistentVolume: persistentVolume,
- }
- }
- func resultPVKey(res *prom.QueryResult, clusterLabel, persistentVolumeLabel string) (pvKey, error) {
- key := pvKey{}
- cluster, err := res.GetString(clusterLabel)
- if err != nil {
- cluster = env.GetClusterID()
- }
- key.Cluster = cluster
- persistentVolume, err := res.GetString(persistentVolumeLabel)
- if err != nil {
- return key, err
- }
- key.PersistentVolume = persistentVolume
- return key, nil
- }
|