| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516 |
- package costmodel
- import (
- "math"
- "strconv"
- "strings"
- "sync"
- "time"
- "github.com/kubecost/cost-model/pkg/cloud"
- "github.com/kubecost/cost-model/pkg/clustercache"
- "github.com/kubecost/cost-model/pkg/env"
- "github.com/kubecost/cost-model/pkg/errors"
- "github.com/kubecost/cost-model/pkg/log"
- "github.com/kubecost/cost-model/pkg/prom"
- "github.com/kubecost/cost-model/pkg/util"
- promclient "github.com/prometheus/client_golang/api"
- "github.com/prometheus/client_golang/prometheus"
- dto "github.com/prometheus/client_model/go"
- v1 "k8s.io/api/core/v1"
- "k8s.io/client-go/kubernetes"
- "k8s.io/klog"
- )
- //--------------------------------------------------------------------------
- // StatefulsetCollector
- //--------------------------------------------------------------------------
- // StatefulsetCollector is a prometheus collector that generates StatefulsetMetrics
- type StatefulsetCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (sc StatefulsetCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("statefulSet_match_labels", "statfulSet match labels", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (sc StatefulsetCollector) Collect(ch chan<- prometheus.Metric) {
- ds := sc.KubeClusterCache.GetAllStatefulSets()
- for _, statefulset := range ds {
- labels, values := prom.KubeLabelsToLabels(statefulset.Spec.Selector.MatchLabels)
- if len(labels) > 0 {
- m := newStatefulsetMetric(statefulset.GetName(), statefulset.GetNamespace(), "statefulSet_match_labels", labels, values)
- ch <- m
- }
- }
- }
- //--------------------------------------------------------------------------
- // StatefulsetMetric
- //--------------------------------------------------------------------------
- // StatefulsetMetric is a prometheus.Metric used to encode statefulset match labels
- type StatefulsetMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- statefulsetName string
- namespace string
- }
- // Creates a new StatefulsetMetric, implementation of prometheus.Metric
- func newStatefulsetMetric(name, namespace, fqname string, labelNames []string, labelvalues []string) StatefulsetMetric {
- return StatefulsetMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelvalues,
- help: "statefulSet_match_labels StatefulSet Match Labels",
- statefulsetName: name,
- namespace: namespace,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (s StatefulsetMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"statefulSet": s.statefulsetName, "namespace": s.namespace}
- return prometheus.NewDesc(s.fqName, s.help, s.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (s StatefulsetMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range s.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &s.labelNames[i],
- Value: &s.labelValues[i],
- })
- }
- n := "namespace"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &s.namespace,
- })
- r := "statefulSet"
- labels = append(labels, &dto.LabelPair{
- Name: &r,
- Value: &s.statefulsetName,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // DeploymentCollector
- //--------------------------------------------------------------------------
- // DeploymentCollector is a prometheus collector that generates DeploymentMetrics
- type DeploymentCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (sc DeploymentCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("deployment_match_labels", "deployment match labels", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (sc DeploymentCollector) Collect(ch chan<- prometheus.Metric) {
- ds := sc.KubeClusterCache.GetAllDeployments()
- for _, deployment := range ds {
- labels, values := prom.KubeLabelsToLabels(deployment.Spec.Selector.MatchLabels)
- if len(labels) > 0 {
- m := newDeploymentMetric(deployment.GetName(), deployment.GetNamespace(), "deployment_match_labels", labels, values)
- ch <- m
- }
- }
- }
- //--------------------------------------------------------------------------
- // DeploymentMetric
- //--------------------------------------------------------------------------
- // DeploymentMetric is a prometheus.Metric used to encode deployment match labels
- type DeploymentMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- deploymentName string
- namespace string
- }
- // Creates a new DeploymentMetric, implementation of prometheus.Metric
- func newDeploymentMetric(name, namespace, fqname string, labelNames []string, labelvalues []string) DeploymentMetric {
- return DeploymentMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelvalues,
- help: "deployment_match_labels Deployment Match Labels",
- deploymentName: name,
- namespace: namespace,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (s DeploymentMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"deployment": s.deploymentName, "namespace": s.namespace}
- return prometheus.NewDesc(s.fqName, s.help, s.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (s DeploymentMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range s.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &s.labelNames[i],
- Value: &s.labelValues[i],
- })
- }
- n := "namespace"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &s.namespace,
- })
- r := "deployment"
- labels = append(labels, &dto.LabelPair{
- Name: &r,
- Value: &s.deploymentName,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // ServiceCollector
- //--------------------------------------------------------------------------
- // ServiceCollector is a prometheus collector that generates ServiceMetrics
- type ServiceCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (sc ServiceCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("service_selector_labels", "service selector labels", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (sc ServiceCollector) Collect(ch chan<- prometheus.Metric) {
- svcs := sc.KubeClusterCache.GetAllServices()
- for _, svc := range svcs {
- labels, values := prom.KubeLabelsToLabels(svc.Spec.Selector)
- if len(labels) > 0 {
- m := newServiceMetric(svc.GetName(), svc.GetNamespace(), "service_selector_labels", labels, values)
- ch <- m
- }
- }
- }
- //--------------------------------------------------------------------------
- // ServiceMetric
- //--------------------------------------------------------------------------
- // ServiceMetric is a prometheus.Metric used to encode service selector labels
- type ServiceMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- serviceName string
- namespace string
- }
- // Creates a new ServiceMetric, implementation of prometheus.Metric
- func newServiceMetric(name, namespace, fqname string, labelNames []string, labelvalues []string) ServiceMetric {
- return ServiceMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelvalues,
- help: "service_selector_labels Service Selector Labels",
- serviceName: name,
- namespace: namespace,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (s ServiceMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"service": s.serviceName, "namespace": s.namespace}
- return prometheus.NewDesc(s.fqName, s.help, s.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (s ServiceMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range s.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &s.labelNames[i],
- Value: &s.labelValues[i],
- })
- }
- n := "namespace"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &s.namespace,
- })
- r := "service"
- labels = append(labels, &dto.LabelPair{
- Name: &r,
- Value: &s.serviceName,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // NamespaceAnnotationCollector
- //--------------------------------------------------------------------------
- // NamespaceAnnotationCollector is a prometheus collector that generates NamespaceAnnotationMetrics
- type NamespaceAnnotationCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (nsac NamespaceAnnotationCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kube_namespace_annotations", "namespace annotations", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (nsac NamespaceAnnotationCollector) Collect(ch chan<- prometheus.Metric) {
- namespaces := nsac.KubeClusterCache.GetAllNamespaces()
- for _, namespace := range namespaces {
- labels, values := prom.KubeAnnotationsToLabels(namespace.Annotations)
- if len(labels) > 0 {
- m := newNamespaceAnnotationsMetric(namespace.GetName(), "kube_namespace_annotations", labels, values)
- ch <- m
- }
- }
- }
- //--------------------------------------------------------------------------
- // NamespaceAnnotationsMetric
- //--------------------------------------------------------------------------
- // NamespaceAnnotationsMetric is a prometheus.Metric used to encode namespace annotations
- type NamespaceAnnotationsMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- namespace string
- }
- // Creates a new NamespaceAnnotationsMetric, implementation of prometheus.Metric
- func newNamespaceAnnotationsMetric(namespace, fqname string, labelNames []string, labelValues []string) NamespaceAnnotationsMetric {
- return NamespaceAnnotationsMetric{
- namespace: namespace,
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelValues,
- help: "kube_namespace_annotations Namespace Annotations",
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (nam NamespaceAnnotationsMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"namespace": nam.namespace}
- return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (nam NamespaceAnnotationsMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range nam.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &nam.labelNames[i],
- Value: &nam.labelValues[i],
- })
- }
- n := "namespace"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &nam.namespace,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // PodAnnotationCollector
- //--------------------------------------------------------------------------
- // PodAnnotationCollector is a prometheus collector that generates PodAnnotationMetrics
- type PodAnnotationCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (pac PodAnnotationCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kube_pod_annotations", "pod annotations", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (pac PodAnnotationCollector) Collect(ch chan<- prometheus.Metric) {
- pods := pac.KubeClusterCache.GetAllPods()
- for _, pod := range pods {
- labels, values := prom.KubeAnnotationsToLabels(pod.Annotations)
- if len(labels) > 0 {
- m := newPodAnnotationMetric(pod.GetNamespace(), pod.GetName(), "kube_pod_annotations", labels, values)
- ch <- m
- }
- }
- }
- //--------------------------------------------------------------------------
- // PodAnnotationsMetric
- //--------------------------------------------------------------------------
- // PodAnnotationsMetric is a prometheus.Metric used to encode namespace annotations
- type PodAnnotationsMetric struct {
- name string
- fqName string
- help string
- labelNames []string
- labelValues []string
- namespace string
- }
- // Creates a new PodAnnotationsMetric, implementation of prometheus.Metric
- func newPodAnnotationMetric(namespace, name, fqname string, labelNames []string, labelValues []string) PodAnnotationsMetric {
- return PodAnnotationsMetric{
- namespace: namespace,
- name: name,
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelValues,
- help: "kube_pod_annotations Pod Annotations",
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (pam PodAnnotationsMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"namespace": pam.namespace, "pod": pam.name}
- return prometheus.NewDesc(pam.fqName, pam.help, pam.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (pam PodAnnotationsMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range pam.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &pam.labelNames[i],
- Value: &pam.labelValues[i],
- })
- }
- n := "namespace"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &pam.namespace,
- })
- r := "pod"
- labels = append(labels, &dto.LabelPair{
- Name: &r,
- Value: &pam.name,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // ClusterInfoCollector
- //--------------------------------------------------------------------------
- // ClusterInfoCollector is a prometheus collector that generates ClusterInfoMetrics
- type ClusterInfoCollector struct {
- Cloud cloud.Provider
- KubeClientSet kubernetes.Interface
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (cic ClusterInfoCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kubecost_cluster_info", "Kubecost Cluster Info", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (cic ClusterInfoCollector) Collect(ch chan<- prometheus.Metric) {
- clusterInfo := GetClusterInfo(cic.KubeClientSet, cic.Cloud)
- labels := prom.MapToLabels(clusterInfo)
- m := newClusterInfoMetric("kubecost_cluster_info", labels)
- ch <- m
- }
- //--------------------------------------------------------------------------
- // ClusterInfoMetric
- //--------------------------------------------------------------------------
- // ClusterInfoMetric is a prometheus.Metric used to encode the local cluster info
- type ClusterInfoMetric struct {
- fqName string
- help string
- labels map[string]string
- }
- // Creates a new ClusterInfoMetric, implementation of prometheus.Metric
- func newClusterInfoMetric(fqName string, labels map[string]string) ClusterInfoMetric {
- return ClusterInfoMetric{
- fqName: fqName,
- labels: labels,
- help: "kubecost_cluster_info ClusterInfo",
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (cim ClusterInfoMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{}
- return prometheus.NewDesc(cim.fqName, cim.help, prom.LabelNamesFrom(cim.labels), l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (cim ClusterInfoMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for k, v := range cim.labels {
- labels = append(labels, &dto.LabelPair{
- Name: toStringPtr(k),
- Value: toStringPtr(v),
- })
- }
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityMemoryBytesCollector
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityMemoryBytesCollector is a prometheus collector that generates
- // KubeNodeStatusCapacityMemoryBytesMetrics
- type KubeNodeStatusCapacityMemoryBytesCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (nsac KubeNodeStatusCapacityMemoryBytesCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kube_node_status_capacity_memory_bytes", "node capacity memory bytes", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (nsac KubeNodeStatusCapacityMemoryBytesCollector) Collect(ch chan<- prometheus.Metric) {
- nodes := nsac.KubeClusterCache.GetAllNodes()
- for _, node := range nodes {
- // k8s.io/apimachinery/pkg/api/resource/amount.go and
- // k8s.io/apimachinery/pkg/api/resource/quantity.go for
- // details on the "amount" API. See
- // https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-types
- // for the units of memory and CPU.
- memoryBytes := node.Status.Capacity.Memory().Value()
- m := newKubeNodeStatusCapacityMemoryBytesMetric(node.GetName(), memoryBytes, "kube_node_status_capacity_memory_bytes", nil, nil)
- ch <- m
- }
- }
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityMemoryBytesMetric
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityMemoryBytesMetric is a prometheus.Metric used to encode
- // a duplicate of the deprecated kube-state-metrics metric
- // kube_node_status_capacity_memory_bytes
- type KubeNodeStatusCapacityMemoryBytesMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- bytes int64
- node string
- }
- // Creates a new KubeNodeStatusCapacityMemoryBytesMetric, implementation of prometheus.Metric
- func newKubeNodeStatusCapacityMemoryBytesMetric(node string, bytes int64, fqname string, labelNames []string, labelValues []string) KubeNodeStatusCapacityMemoryBytesMetric {
- return KubeNodeStatusCapacityMemoryBytesMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelValues,
- help: "kube_node_status_capacity_memory_bytes Node Capacity Memory Bytes",
- bytes: bytes,
- node: node,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (nam KubeNodeStatusCapacityMemoryBytesMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"node": nam.node}
- return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (nam KubeNodeStatusCapacityMemoryBytesMetric) Write(m *dto.Metric) error {
- h := float64(nam.bytes)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range nam.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &nam.labelNames[i],
- Value: &nam.labelValues[i],
- })
- }
- n := "node"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &nam.node,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityCPUCoresCollector
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityCPUCoresCollector is a prometheus collector that generates
- // KubeNodeStatusCapacityCPUCoresMetrics
- type KubeNodeStatusCapacityCPUCoresCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (nsac KubeNodeStatusCapacityCPUCoresCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kube_node_status_capacity_cpu_cores", "node capacity cpu cores", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (nsac KubeNodeStatusCapacityCPUCoresCollector) Collect(ch chan<- prometheus.Metric) {
- nodes := nsac.KubeClusterCache.GetAllNodes()
- for _, node := range nodes {
- // k8s.io/apimachinery/pkg/api/resource/amount.go and
- // k8s.io/apimachinery/pkg/api/resource/quantity.go for
- // details on the "amount" API. See
- // https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-types
- // for the units of memory and CPU.
- cpuCores := float64(node.Status.Capacity.Cpu().MilliValue()) / 1000
- m := newKubeNodeStatusCapacityCPUCoresMetric(node.GetName(), cpuCores, "kube_node_status_capacity_cpu_cores", nil, nil)
- ch <- m
- }
- }
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityCPUCoresMetric
- //--------------------------------------------------------------------------
- // KubeNodeStatusCapacityCPUCoresMetric is a prometheus.Metric used to encode
- // a duplicate of the deprecated kube-state-metrics metric
- // kube_node_status_capacity_memory_bytes
- type KubeNodeStatusCapacityCPUCoresMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- cores float64
- node string
- }
- // Creates a new KubeNodeStatusCapacityCPUCoresMetric, implementation of prometheus.Metric
- func newKubeNodeStatusCapacityCPUCoresMetric(node string, cores float64, fqname string, labelNames []string, labelValues []string) KubeNodeStatusCapacityCPUCoresMetric {
- return KubeNodeStatusCapacityCPUCoresMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelValues,
- help: "kube_node_status_capacity_cpu_cores Node Capacity CPU Cores",
- cores: cores,
- node: node,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (nam KubeNodeStatusCapacityCPUCoresMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{"node": nam.node}
- return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (nam KubeNodeStatusCapacityCPUCoresMetric) Write(m *dto.Metric) error {
- h := nam.cores
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range nam.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &nam.labelNames[i],
- Value: &nam.labelValues[i],
- })
- }
- n := "node"
- labels = append(labels, &dto.LabelPair{
- Name: &n,
- Value: &nam.node,
- })
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // KubePodLabelsCollector
- //--------------------------------------------------------------------------
- //
- // We use this to emit kube_pod_labels with all of a pod's labels, regardless
- // of the whitelist setting introduced in KSM v2. See
- // https://github.com/kubernetes/kube-state-metrics/issues/1270#issuecomment-712986441
- // KubePodLabelsCollector is a prometheus collector that generates
- // KubePodLabelsMetrics
- type KubePodLabelsCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (nsac KubePodLabelsCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kube_pod_labels", "all labels for each pod prefixed with label_", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (nsac KubePodLabelsCollector) Collect(ch chan<- prometheus.Metric) {
- pods := nsac.KubeClusterCache.GetAllPods()
- for _, pod := range pods {
- labelNames, labelValues := prom.KubePrependQualifierToLabels(pod.GetLabels(), "label_")
- m := newKubePodLabelsMetric(
- pod.GetName(),
- pod.GetNamespace(),
- string(pod.GetUID()),
- "kube_pod_labels",
- labelNames,
- labelValues,
- )
- ch <- m
- }
- }
- //--------------------------------------------------------------------------
- // KubePodLabelsMetric
- //--------------------------------------------------------------------------
- // KubePodLabelsMetric is a prometheus.Metric used to encode
- // a duplicate of the deprecated kube-state-metrics metric
- // kube_pod_labels
- type KubePodLabelsMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- pod string
- namespace string
- uid string
- }
- // Creates a new KubePodLabelsMetric, implementation of prometheus.Metric
- func newKubePodLabelsMetric(pod string, namespace string, uid string, fqname string, labelNames []string, labelValues []string) KubePodLabelsMetric {
- return KubePodLabelsMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelValues,
- help: "kube_pod_labels all labels for each pod prefixed with label_",
- pod: pod,
- namespace: namespace,
- uid: uid,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (nam KubePodLabelsMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{
- "pod": nam.pod,
- "namespace": nam.namespace,
- "uid": nam.uid,
- }
- return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (nam KubePodLabelsMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range nam.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &nam.labelNames[i],
- Value: &nam.labelValues[i],
- })
- }
- podString := "pod"
- namespaceString := "namespace"
- uidString := "uid"
- labels = append(labels,
- &dto.LabelPair{
- Name: &podString,
- Value: &nam.pod,
- },
- &dto.LabelPair{
- Name: &namespaceString,
- Value: &nam.namespace,
- }, &dto.LabelPair{
- Name: &uidString,
- Value: &nam.uid,
- },
- )
- m.Label = labels
- return nil
- }
- //--------------------------------------------------------------------------
- // KubeNodeLabelsCollector
- //--------------------------------------------------------------------------
- //
- // We use this to emit kube_node_labels with all of a node's labels, regardless
- // of the whitelist setting introduced in KSM v2. See
- // https://github.com/kubernetes/kube-state-metrics/issues/1270#issuecomment-712986441
- // KubeNodeLabelsCollector is a prometheus collector that generates
- // KubeNodeLabelsMetrics
- type KubeNodeLabelsCollector struct {
- KubeClusterCache clustercache.ClusterCache
- }
- // Describe sends the super-set of all possible descriptors of metrics
- // collected by this Collector.
- func (nsac KubeNodeLabelsCollector) Describe(ch chan<- *prometheus.Desc) {
- ch <- prometheus.NewDesc("kube_node_labels", "all labels for each node prefixed with label_", []string{}, nil)
- }
- // Collect is called by the Prometheus registry when collecting metrics.
- func (nsac KubeNodeLabelsCollector) Collect(ch chan<- prometheus.Metric) {
- nodes := nsac.KubeClusterCache.GetAllNodes()
- for _, node := range nodes {
- labelNames, labelValues := prom.KubePrependQualifierToLabels(node.GetLabels(), "label_")
- m := newKubeNodeLabelsMetric(
- node.GetName(),
- "kube_node_labels",
- labelNames,
- labelValues,
- )
- ch <- m
- }
- }
- //--------------------------------------------------------------------------
- // KubeNodeLabelsMetric
- //--------------------------------------------------------------------------
- // KubeNodeLabelsMetric is a prometheus.Metric used to encode
- // a duplicate of the deprecated kube-state-metrics metric
- // kube_node_labels
- type KubeNodeLabelsMetric struct {
- fqName string
- help string
- labelNames []string
- labelValues []string
- node string
- }
- // Creates a new KubeNodeLabelsMetric, implementation of prometheus.Metric
- func newKubeNodeLabelsMetric(node string, fqname string, labelNames []string, labelValues []string) KubeNodeLabelsMetric {
- return KubeNodeLabelsMetric{
- fqName: fqname,
- labelNames: labelNames,
- labelValues: labelValues,
- help: "kube_node_labels all labels for each node prefixed with label_",
- node: node,
- }
- }
- // Desc returns the descriptor for the Metric. This method idempotently
- // returns the same descriptor throughout the lifetime of the Metric.
- func (nam KubeNodeLabelsMetric) Desc() *prometheus.Desc {
- l := prometheus.Labels{
- "node": nam.node,
- }
- return prometheus.NewDesc(nam.fqName, nam.help, nam.labelNames, l)
- }
- // Write encodes the Metric into a "Metric" Protocol Buffer data
- // transmission object.
- func (nam KubeNodeLabelsMetric) Write(m *dto.Metric) error {
- h := float64(1)
- m.Gauge = &dto.Gauge{
- Value: &h,
- }
- var labels []*dto.LabelPair
- for i := range nam.labelNames {
- labels = append(labels, &dto.LabelPair{
- Name: &nam.labelNames[i],
- Value: &nam.labelValues[i],
- })
- }
- nodeString := "node"
- labels = append(labels, &dto.LabelPair{Name: &nodeString, Value: &nam.node})
- m.Label = labels
- return nil
- }
- // toStringPtr is used to create a new string pointer from iteration vars
- func toStringPtr(s string) *string {
- return &s
- }
- //--------------------------------------------------------------------------
- // Cost Model Metrics Initialization
- //--------------------------------------------------------------------------
- // Only allow the metrics to be instantiated and registered once
- var metricsInit sync.Once
- var (
- cpuGv *prometheus.GaugeVec
- ramGv *prometheus.GaugeVec
- gpuGv *prometheus.GaugeVec
- gpuCountGv *prometheus.GaugeVec
- pvGv *prometheus.GaugeVec
- spotGv *prometheus.GaugeVec
- totalGv *prometheus.GaugeVec
- ramAllocGv *prometheus.GaugeVec
- cpuAllocGv *prometheus.GaugeVec
- gpuAllocGv *prometheus.GaugeVec
- pvAllocGv *prometheus.GaugeVec
- networkZoneEgressCostG prometheus.Gauge
- networkRegionEgressCostG prometheus.Gauge
- networkInternetEgressCostG prometheus.Gauge
- clusterManagementCostGv *prometheus.GaugeVec
- lbCostGv *prometheus.GaugeVec
- )
- // initCostModelMetrics uses a sync.Once to ensure that these metrics are only created once
- func initCostModelMetrics(clusterCache clustercache.ClusterCache, provider cloud.Provider) {
- metricsInit.Do(func() {
- cpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "node_cpu_hourly_cost",
- Help: "node_cpu_hourly_cost hourly cost for each cpu on this node",
- }, []string{"instance", "node", "instance_type", "region", "provider_id"})
- ramGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "node_ram_hourly_cost",
- Help: "node_ram_hourly_cost hourly cost for each gb of ram on this node",
- }, []string{"instance", "node", "instance_type", "region", "provider_id"})
- gpuGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "node_gpu_hourly_cost",
- Help: "node_gpu_hourly_cost hourly cost for each gpu on this node",
- }, []string{"instance", "node", "instance_type", "region", "provider_id"})
- gpuCountGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "node_gpu_count",
- Help: "node_gpu_count count of gpu on this node",
- }, []string{"instance", "node", "instance_type", "region", "provider_id"})
- pvGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "pv_hourly_cost",
- Help: "pv_hourly_cost Cost per GB per hour on a persistent disk",
- }, []string{"volumename", "persistentvolume", "provider_id"})
- spotGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "kubecost_node_is_spot",
- Help: "kubecost_node_is_spot Cloud provider info about node preemptibility",
- }, []string{"instance", "node", "instance_type", "region", "provider_id"})
- totalGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "node_total_hourly_cost",
- Help: "node_total_hourly_cost Total node cost per hour",
- }, []string{"instance", "node", "instance_type", "region", "provider_id"})
- ramAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "container_memory_allocation_bytes",
- Help: "container_memory_allocation_bytes Bytes of RAM used",
- }, []string{"namespace", "pod", "container", "instance", "node"})
- cpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "container_cpu_allocation",
- Help: "container_cpu_allocation Percent of a single CPU used in a minute",
- }, []string{"namespace", "pod", "container", "instance", "node"})
- gpuAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "container_gpu_allocation",
- Help: "container_gpu_allocation GPU used",
- }, []string{"namespace", "pod", "container", "instance", "node"})
- pvAllocGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "pod_pvc_allocation",
- Help: "pod_pvc_allocation Bytes used by a PVC attached to a pod",
- }, []string{"namespace", "pod", "persistentvolumeclaim", "persistentvolume"})
- networkZoneEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
- Name: "kubecost_network_zone_egress_cost",
- Help: "kubecost_network_zone_egress_cost Total cost per GB egress across zones",
- })
- networkRegionEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
- Name: "kubecost_network_region_egress_cost",
- Help: "kubecost_network_region_egress_cost Total cost per GB egress across regions",
- })
- networkInternetEgressCostG = prometheus.NewGauge(prometheus.GaugeOpts{
- Name: "kubecost_network_internet_egress_cost",
- Help: "kubecost_network_internet_egress_cost Total cost per GB of internet egress.",
- })
- clusterManagementCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Name: "kubecost_cluster_management_cost",
- Help: "kubecost_cluster_management_cost Hourly cost paid as a cluster management fee.",
- }, []string{"provisioner_name"})
- lbCostGv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ // no differentiation between ELB and ALB right now
- Name: "kubecost_load_balancer_cost",
- Help: "kubecost_load_balancer_cost Hourly cost of load balancer",
- }, []string{"ingress_ip", "namespace", "service_name"}) // assumes one ingress IP per load balancer
- // Register cost-model metrics for emission
- prometheus.MustRegister(cpuGv, ramGv, gpuGv, gpuCountGv, totalGv, pvGv, spotGv)
- prometheus.MustRegister(ramAllocGv, cpuAllocGv, gpuAllocGv, pvAllocGv)
- prometheus.MustRegister(networkZoneEgressCostG, networkRegionEgressCostG, networkInternetEgressCostG)
- prometheus.MustRegister(clusterManagementCostGv, lbCostGv)
- // General Metric Collectors
- prometheus.MustRegister(ServiceCollector{
- KubeClusterCache: clusterCache,
- })
- prometheus.MustRegister(DeploymentCollector{
- KubeClusterCache: clusterCache,
- })
- prometheus.MustRegister(StatefulsetCollector{
- KubeClusterCache: clusterCache,
- })
- prometheus.MustRegister(ClusterInfoCollector{
- KubeClientSet: clusterCache.GetClient(),
- Cloud: provider,
- })
- if env.IsEmitNamespaceAnnotationsMetric() {
- prometheus.MustRegister(NamespaceAnnotationCollector{
- KubeClusterCache: clusterCache,
- })
- }
- if env.IsEmitPodAnnotationsMetric() {
- prometheus.MustRegister(PodAnnotationCollector{
- KubeClusterCache: clusterCache,
- })
- }
- if env.IsEmitKsmV1Metrics() {
- prometheus.MustRegister(KubeNodeStatusCapacityMemoryBytesCollector{
- KubeClusterCache: clusterCache,
- })
- prometheus.MustRegister(KubeNodeStatusCapacityCPUCoresCollector{
- KubeClusterCache: clusterCache,
- })
- prometheus.MustRegister(KubePodLabelsCollector{
- KubeClusterCache: clusterCache,
- })
- prometheus.MustRegister(KubeNodeLabelsCollector{
- KubeClusterCache: clusterCache,
- })
- }
- })
- }
- //--------------------------------------------------------------------------
- // CostModelMetricsEmitter
- //--------------------------------------------------------------------------
- // CostModelMetricsEmitter emits all cost-model specific metrics calculated by
- // the CostModel.ComputeCostData() method.
- type CostModelMetricsEmitter struct {
- PrometheusClient promclient.Client
- KubeClusterCache clustercache.ClusterCache
- CloudProvider cloud.Provider
- Model *CostModel
- // Metrics
- CPUPriceRecorder *prometheus.GaugeVec
- RAMPriceRecorder *prometheus.GaugeVec
- PersistentVolumePriceRecorder *prometheus.GaugeVec
- GPUPriceRecorder *prometheus.GaugeVec
- GPUCountRecorder *prometheus.GaugeVec
- PVAllocationRecorder *prometheus.GaugeVec
- NodeSpotRecorder *prometheus.GaugeVec
- NodeTotalPriceRecorder *prometheus.GaugeVec
- RAMAllocationRecorder *prometheus.GaugeVec
- CPUAllocationRecorder *prometheus.GaugeVec
- GPUAllocationRecorder *prometheus.GaugeVec
- ClusterManagementCostRecorder *prometheus.GaugeVec
- LBCostRecorder *prometheus.GaugeVec
- NetworkZoneEgressRecorder prometheus.Gauge
- NetworkRegionEgressRecorder prometheus.Gauge
- NetworkInternetEgressRecorder prometheus.Gauge
- // Flow Control
- recordingLock *sync.Mutex
- recordingStopping bool
- recordingStop chan bool
- }
- // NewCostModelMetricsEmitter creates a new cost-model metrics emitter. Use Start() to begin metric emission.
- func NewCostModelMetricsEmitter(promClient promclient.Client, clusterCache clustercache.ClusterCache, provider cloud.Provider, model *CostModel) *CostModelMetricsEmitter {
- // init will only actually execute once to register the custom gauges
- initCostModelMetrics(clusterCache, provider)
- return &CostModelMetricsEmitter{
- PrometheusClient: promClient,
- KubeClusterCache: clusterCache,
- CloudProvider: provider,
- Model: model,
- CPUPriceRecorder: cpuGv,
- RAMPriceRecorder: ramGv,
- GPUPriceRecorder: gpuGv,
- GPUCountRecorder: gpuCountGv,
- PersistentVolumePriceRecorder: pvGv,
- NodeSpotRecorder: spotGv,
- NodeTotalPriceRecorder: totalGv,
- RAMAllocationRecorder: ramAllocGv,
- CPUAllocationRecorder: cpuAllocGv,
- GPUAllocationRecorder: gpuAllocGv,
- PVAllocationRecorder: pvAllocGv,
- NetworkZoneEgressRecorder: networkZoneEgressCostG,
- NetworkRegionEgressRecorder: networkRegionEgressCostG,
- NetworkInternetEgressRecorder: networkInternetEgressCostG,
- ClusterManagementCostRecorder: clusterManagementCostGv,
- LBCostRecorder: lbCostGv,
- recordingLock: new(sync.Mutex),
- recordingStopping: false,
- recordingStop: nil,
- }
- }
- // Checks to see if there is a metric recording stop channel. If it exists, a new
- // channel is not created and false is returned. If it doesn't exist, a new channel
- // is created and true is returned.
- func (cmme *CostModelMetricsEmitter) checkOrCreateRecordingChan() bool {
- cmme.recordingLock.Lock()
- defer cmme.recordingLock.Unlock()
- if cmme.recordingStop != nil {
- return false
- }
- cmme.recordingStop = make(chan bool, 1)
- return true
- }
- // IsRunning returns true if metric recording is running.
- func (cmme *CostModelMetricsEmitter) IsRunning() bool {
- cmme.recordingLock.Lock()
- defer cmme.recordingLock.Unlock()
- return cmme.recordingStop != nil
- }
- // StartCostModelMetricRecording starts the go routine that emits metrics used to determine
- // cluster costs.
- func (cmme *CostModelMetricsEmitter) Start() bool {
- // Check to see if we're already recording
- // This function will create the stop recording channel and return true
- // if it doesn't exist.
- if !cmme.checkOrCreateRecordingChan() {
- log.Errorf("Attempted to start cost model metric recording when it's already running.")
- return false
- }
- go func() {
- defer errors.HandlePanic()
- containerSeen := make(map[string]bool)
- nodeSeen := make(map[string]bool)
- loadBalancerSeen := make(map[string]bool)
- pvSeen := make(map[string]bool)
- pvcSeen := make(map[string]bool)
- getKeyFromLabelStrings := func(labels ...string) string {
- return strings.Join(labels, ",")
- }
- getLabelStringsFromKey := func(key string) []string {
- return strings.Split(key, ",")
- }
- var defaultRegion string = ""
- nodeList := cmme.KubeClusterCache.GetAllNodes()
- if len(nodeList) > 0 {
- var ok bool
- defaultRegion, ok = util.GetRegion(nodeList[0].Labels)
- if !ok {
- log.DedupedWarningf(5, "Failed to locate default region")
- }
- }
- for {
- klog.V(4).Info("Recording prices...")
- podlist := cmme.KubeClusterCache.GetAllPods()
- podStatus := make(map[string]v1.PodPhase)
- for _, pod := range podlist {
- podStatus[pod.Name] = pod.Status.Phase
- }
- cfg, _ := cmme.CloudProvider.GetConfig()
- provisioner, clusterManagementCost, err := cmme.CloudProvider.ClusterManagementPricing()
- if err != nil {
- klog.V(1).Infof("Error getting cluster management cost %s", err.Error())
- }
- cmme.ClusterManagementCostRecorder.WithLabelValues(provisioner).Set(clusterManagementCost)
- // Record network pricing at global scope
- networkCosts, err := cmme.CloudProvider.NetworkPricing()
- if err != nil {
- klog.V(4).Infof("Failed to retrieve network costs: %s", err.Error())
- } else {
- cmme.NetworkZoneEgressRecorder.Set(networkCosts.ZoneNetworkEgressCost)
- cmme.NetworkRegionEgressRecorder.Set(networkCosts.RegionNetworkEgressCost)
- cmme.NetworkInternetEgressRecorder.Set(networkCosts.InternetNetworkEgressCost)
- }
- // TODO: Pass PrometheusClient and CloudProvider into CostModel on instantiation so this isn't so awkward
- data, err := cmme.Model.ComputeCostData(cmme.PrometheusClient, cmme.CloudProvider, "2m", "", "")
- if err != nil {
- // For an error collection, we'll just log the length of the errors (ComputeCostData already logs the
- // actual errors)
- if prom.IsErrorCollection(err) {
- if ec, ok := err.(prom.QueryErrorCollection); ok {
- log.Errorf("Error in price recording: %d errors occurred", len(ec.Errors()))
- }
- } else {
- log.Errorf("Error in price recording: " + err.Error())
- }
- // zero the for loop so the time.Sleep will still work
- data = map[string]*CostData{}
- }
- // TODO: Pass CloudProvider into CostModel on instantiation so this isn't so awkward
- nodes, err := cmme.Model.GetNodeCost(cmme.CloudProvider)
- for nodeName, node := range nodes {
- // Emit costs, guarding against NaN inputs for custom pricing.
- cpuCost, _ := strconv.ParseFloat(node.VCPUCost, 64)
- if math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) {
- cpuCost, _ = strconv.ParseFloat(cfg.CPU, 64)
- if math.IsNaN(cpuCost) || math.IsInf(cpuCost, 0) {
- cpuCost = 0
- }
- }
- cpu, _ := strconv.ParseFloat(node.VCPU, 64)
- if math.IsNaN(cpu) || math.IsInf(cpu, 0) {
- cpu = 1 // Assume 1 CPU
- }
- ramCost, _ := strconv.ParseFloat(node.RAMCost, 64)
- if math.IsNaN(ramCost) || math.IsInf(ramCost, 0) {
- ramCost, _ = strconv.ParseFloat(cfg.RAM, 64)
- if math.IsNaN(ramCost) || math.IsInf(ramCost, 0) {
- ramCost = 0
- }
- }
- ram, _ := strconv.ParseFloat(node.RAMBytes, 64)
- if math.IsNaN(ram) || math.IsInf(ram, 0) {
- ram = 0
- }
- gpu, _ := strconv.ParseFloat(node.GPU, 64)
- if math.IsNaN(gpu) || math.IsInf(gpu, 0) {
- gpu = 0
- }
- gpuCost, _ := strconv.ParseFloat(node.GPUCost, 64)
- if math.IsNaN(gpuCost) || math.IsInf(gpuCost, 0) {
- gpuCost, _ = strconv.ParseFloat(cfg.GPU, 64)
- if math.IsNaN(gpuCost) || math.IsInf(gpuCost, 0) {
- gpuCost = 0
- }
- }
- nodeType := node.InstanceType
- nodeRegion := node.Region
- totalCost := cpu*cpuCost + ramCost*(ram/1024/1024/1024) + gpu*gpuCost
- cmme.CPUPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(cpuCost)
- cmme.RAMPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(ramCost)
- cmme.GPUPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(gpuCost)
- cmme.GPUCountRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(gpu)
- cmme.NodeTotalPriceRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(totalCost)
- if node.IsSpot() {
- cmme.NodeSpotRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(1.0)
- } else {
- cmme.NodeSpotRecorder.WithLabelValues(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID).Set(0.0)
- }
- labelKey := getKeyFromLabelStrings(nodeName, nodeName, nodeType, nodeRegion, node.ProviderID)
- nodeSeen[labelKey] = true
- }
- // TODO: Pass CloudProvider into CostModel on instantiation so this isn't so awkward
- loadBalancers, err := cmme.Model.GetLBCost(cmme.CloudProvider)
- for lbKey, lb := range loadBalancers {
- // TODO: parse (if necessary) and calculate cost associated with loadBalancer based on dynamic cloud prices fetched into each lb struct on GetLBCost() call
- keyParts := getLabelStringsFromKey(lbKey)
- namespace := keyParts[0]
- serviceName := keyParts[1]
- ingressIP := ""
- if len(lb.IngressIPAddresses) > 0 {
- ingressIP = lb.IngressIPAddresses[0] // assumes one ingress IP per load balancer
- }
- cmme.LBCostRecorder.WithLabelValues(ingressIP, namespace, serviceName).Set(lb.Cost)
- labelKey := getKeyFromLabelStrings(namespace, serviceName)
- loadBalancerSeen[labelKey] = true
- }
- for _, costs := range data {
- nodeName := costs.NodeName
- namespace := costs.Namespace
- podName := costs.PodName
- containerName := costs.Name
- if costs.PVCData != nil {
- for _, pvc := range costs.PVCData {
- if pvc.Volume != nil {
- timesClaimed := pvc.TimesClaimed
- if timesClaimed == 0 {
- timesClaimed = 1 // unallocated PVs are unclaimed but have a full allocation
- }
- cmme.PVAllocationRecorder.WithLabelValues(namespace, podName, pvc.Claim, pvc.VolumeName).Set(pvc.Values[0].Value / float64(timesClaimed))
- labelKey := getKeyFromLabelStrings(namespace, podName, pvc.Claim, pvc.VolumeName)
- pvcSeen[labelKey] = true
- }
- }
- }
- if len(costs.RAMAllocation) > 0 {
- cmme.RAMAllocationRecorder.WithLabelValues(namespace, podName, containerName, nodeName, nodeName).Set(costs.RAMAllocation[0].Value)
- }
- if len(costs.CPUAllocation) > 0 {
- cmme.CPUAllocationRecorder.WithLabelValues(namespace, podName, containerName, nodeName, nodeName).Set(costs.CPUAllocation[0].Value)
- }
- if len(costs.GPUReq) > 0 {
- // allocation here is set to the request because shared GPU usage not yet supported.
- cmme.GPUAllocationRecorder.WithLabelValues(namespace, podName, containerName, nodeName, nodeName).Set(costs.GPUReq[0].Value)
- }
- labelKey := getKeyFromLabelStrings(namespace, podName, containerName, nodeName, nodeName)
- if podStatus[podName] == v1.PodRunning { // Only report data for current pods
- containerSeen[labelKey] = true
- } else {
- containerSeen[labelKey] = false
- }
- }
- storageClasses := cmme.KubeClusterCache.GetAllStorageClasses()
- storageClassMap := make(map[string]map[string]string)
- for _, storageClass := range storageClasses {
- params := storageClass.Parameters
- storageClassMap[storageClass.ObjectMeta.Name] = params
- if storageClass.GetAnnotations()["storageclass.kubernetes.io/is-default-class"] == "true" || storageClass.GetAnnotations()["storageclass.beta.kubernetes.io/is-default-class"] == "true" {
- storageClassMap["default"] = params
- storageClassMap[""] = params
- }
- }
- pvs := cmme.KubeClusterCache.GetAllPersistentVolumes()
- for _, pv := range pvs {
- parameters, ok := storageClassMap[pv.Spec.StorageClassName]
- if !ok {
- klog.V(4).Infof("Unable to find parameters for storage class \"%s\". Does pv \"%s\" have a storageClassName?", pv.Spec.StorageClassName, pv.Name)
- }
- var region string
- if r, ok := util.GetRegion(pv.Labels); ok {
- region = r
- } else {
- region = defaultRegion
- }
- cacPv := &cloud.PV{
- Class: pv.Spec.StorageClassName,
- Region: region,
- Parameters: parameters,
- }
- // TODO: GetPVCost should be a method in CostModel?
- GetPVCost(cacPv, pv, cmme.CloudProvider, region)
- c, _ := strconv.ParseFloat(cacPv.Cost, 64)
- cmme.PersistentVolumePriceRecorder.WithLabelValues(pv.Name, pv.Name, cacPv.ProviderID).Set(c)
- labelKey := getKeyFromLabelStrings(pv.Name, pv.Name)
- pvSeen[labelKey] = true
- }
- for labelString, seen := range nodeSeen {
- if !seen {
- klog.V(4).Infof("Removing %s from nodes", labelString)
- labels := getLabelStringsFromKey(labelString)
- ok := cmme.NodeTotalPriceRecorder.DeleteLabelValues(labels...)
- if ok {
- klog.V(4).Infof("removed %s from totalprice", labelString)
- } else {
- klog.Infof("FAILURE TO REMOVE %s from totalprice", labelString)
- }
- ok = cmme.NodeSpotRecorder.DeleteLabelValues(labels...)
- if ok {
- klog.V(4).Infof("removed %s from spot records", labelString)
- } else {
- klog.Infof("FAILURE TO REMOVE %s from spot records", labelString)
- }
- ok = cmme.CPUPriceRecorder.DeleteLabelValues(labels...)
- if ok {
- klog.V(4).Infof("removed %s from cpuprice", labelString)
- } else {
- klog.Infof("FAILURE TO REMOVE %s from cpuprice", labelString)
- }
- ok = cmme.GPUPriceRecorder.DeleteLabelValues(labels...)
- if ok {
- klog.V(4).Infof("removed %s from gpuprice", labelString)
- } else {
- klog.Infof("FAILURE TO REMOVE %s from gpuprice", labelString)
- }
- ok = cmme.GPUCountRecorder.DeleteLabelValues(labels...)
- if ok {
- klog.V(4).Infof("removed %s from gpucount", labelString)
- } else {
- klog.Infof("FAILURE TO REMOVE %s from gpucount", labelString)
- }
- ok = cmme.RAMPriceRecorder.DeleteLabelValues(labels...)
- if ok {
- klog.V(4).Infof("removed %s from ramprice", labelString)
- } else {
- klog.Infof("FAILURE TO REMOVE %s from ramprice", labelString)
- }
- delete(nodeSeen, labelString)
- } else {
- nodeSeen[labelString] = false
- }
- }
- for labelString, seen := range loadBalancerSeen {
- if !seen {
- labels := getLabelStringsFromKey(labelString)
- cmme.LBCostRecorder.DeleteLabelValues(labels...)
- } else {
- loadBalancerSeen[labelString] = false
- }
- }
- for labelString, seen := range containerSeen {
- if !seen {
- labels := getLabelStringsFromKey(labelString)
- cmme.RAMAllocationRecorder.DeleteLabelValues(labels...)
- cmme.CPUAllocationRecorder.DeleteLabelValues(labels...)
- cmme.GPUAllocationRecorder.DeleteLabelValues(labels...)
- delete(containerSeen, labelString)
- } else {
- containerSeen[labelString] = false
- }
- }
- for labelString, seen := range pvSeen {
- if !seen {
- labels := getLabelStringsFromKey(labelString)
- cmme.PersistentVolumePriceRecorder.DeleteLabelValues(labels...)
- delete(pvSeen, labelString)
- } else {
- pvSeen[labelString] = false
- }
- }
- for labelString, seen := range pvcSeen {
- if !seen {
- labels := getLabelStringsFromKey(labelString)
- cmme.PVAllocationRecorder.DeleteLabelValues(labels...)
- delete(pvcSeen, labelString)
- } else {
- pvcSeen[labelString] = false
- }
- }
- select {
- case <-time.After(time.Minute):
- case <-cmme.recordingStop:
- cmme.recordingLock.Lock()
- cmme.recordingStopping = false
- cmme.recordingStop = nil
- cmme.recordingLock.Unlock()
- return
- }
- }
- }()
- return true
- }
- // Stop halts the metrics emission loop after the current emission is completed
- // or if the emission is paused.
- func (cmme *CostModelMetricsEmitter) Stop() {
- cmme.recordingLock.Lock()
- defer cmme.recordingLock.Unlock()
- if !cmme.recordingStopping && cmme.recordingStop != nil {
- cmme.recordingStopping = true
- close(cmme.recordingStop)
- }
- }
|