| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472 |
- package scrape
- import (
- "fmt"
- "slices"
- "strconv"
- "strings"
- "github.com/kubecost/events"
- "github.com/opencost/opencost/core/pkg/clustercache"
- "github.com/opencost/opencost/core/pkg/log"
- "github.com/opencost/opencost/core/pkg/source"
- coreutil "github.com/opencost/opencost/core/pkg/util"
- "github.com/opencost/opencost/core/pkg/util/promutil"
- "github.com/opencost/opencost/modules/collector-source/pkg/event"
- "github.com/opencost/opencost/modules/collector-source/pkg/metric"
- "github.com/opencost/opencost/modules/collector-source/pkg/util"
- "golang.org/x/exp/maps"
- v1 "k8s.io/api/core/v1"
- "k8s.io/apimachinery/pkg/api/resource"
- "k8s.io/apimachinery/pkg/types"
- "k8s.io/apimachinery/pkg/util/validation"
- )
- const unmountedPVsContainer = "unmounted-pvs"
- type ClusterCacheScraper struct {
- clusterCache clustercache.ClusterCache
- }
- func newClusterCacheScraper(clusterCache clustercache.ClusterCache) Scraper {
- return &ClusterCacheScraper{
- clusterCache: clusterCache,
- }
- }
- func (ccs *ClusterCacheScraper) Scrape() []metric.Update {
- // retrieve objects for scrape
- nodes := ccs.clusterCache.GetAllNodes()
- deployments := ccs.clusterCache.GetAllDeployments()
- namespaces := ccs.clusterCache.GetAllNamespaces()
- pods := ccs.clusterCache.GetAllPods()
- pvcs := ccs.clusterCache.GetAllPersistentVolumeClaims()
- pvs := ccs.clusterCache.GetAllPersistentVolumes()
- services := ccs.clusterCache.GetAllServices()
- statefulSets := ccs.clusterCache.GetAllStatefulSets()
- daemonSets := ccs.clusterCache.GetAllDaemonSets()
- jobs := ccs.clusterCache.GetAllJobs()
- cronJobs := ccs.clusterCache.GetAllCronJobs()
- replicaSets := ccs.clusterCache.GetAllReplicaSets()
- resourceQuotas := ccs.clusterCache.GetAllResourceQuotas()
- // create scrape indexes. While the pairs being mapped here don't have a 1 to 1 relationship in the general case,
- // we are assuming that in the context of a single snapshot of the cluster they are 1 to 1.
- nodeNameToUID := buildNodeIndex(nodes)
- namespaceNameToUID := buildNamespaceIndex(namespaces)
- pvcNameToUID := buildPVCIndex(pvcs)
- pvNameToUID := buildPVIndex(pvs)
- scrapeFuncs := []ScrapeFunc{
- ccs.GetScrapeNodes(nodes),
- ccs.GetScrapeDeployments(deployments, namespaceNameToUID),
- ccs.GetScrapeNamespaces(namespaces),
- ccs.GetScrapePods(pods, pvcs, nodeNameToUID, namespaceNameToUID, pvcNameToUID),
- ccs.GetScrapePVCs(pvcs, namespaceNameToUID, pvNameToUID),
- ccs.GetScrapePVs(pvs),
- ccs.GetScrapeServices(services, namespaceNameToUID),
- ccs.GetScrapeStatefulSets(statefulSets, namespaceNameToUID),
- ccs.GetScrapeDaemonSets(daemonSets, namespaceNameToUID),
- ccs.GetScrapeJobs(jobs, namespaceNameToUID),
- ccs.GetScrapeCronJobs(cronJobs, namespaceNameToUID),
- ccs.GetScrapeReplicaSets(replicaSets, namespaceNameToUID),
- ccs.GetScrapeResourceQuotas(resourceQuotas, namespaceNameToUID),
- }
- return concurrentScrape(scrapeFuncs...)
- }
- func (ccs *ClusterCacheScraper) GetScrapeNodes(nodes []*clustercache.Node) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeNodes(nodes)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeNodes(nodes []*clustercache.Node) []metric.Update {
- var scrapeResults []metric.Update
- for _, node := range nodes {
- nodeInfo := map[string]string{
- source.NodeLabel: node.Name,
- source.ProviderIDLabel: node.SpecProviderID,
- source.UIDLabel: string(node.UID),
- }
- if instanceType, ok := coreutil.GetInstanceType(node.Labels); ok {
- nodeInfo[source.InstanceTypeLabel] = instanceType
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.NodeInfo,
- Labels: nodeInfo,
- AdditionalInfo: nodeInfo,
- })
- // Node Capacity
- scrapeResults = scrapeResourceList(
- metric.NodeResourceCapacities,
- node.Status.Capacity,
- nodeInfo,
- scrapeResults)
- // This block and metric can be removed, when we stop exporting assets and allocations
- if node.Status.Capacity != nil {
- if quantity, ok := node.Status.Capacity[v1.ResourceCPU]; ok {
- _, _, value := toResourceUnitValue(v1.ResourceCPU, quantity)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNodeStatusCapacityCPUCores,
- Labels: nodeInfo,
- Value: value,
- })
- }
- if quantity, ok := node.Status.Capacity[v1.ResourceMemory]; ok {
- _, _, value := toResourceUnitValue(v1.ResourceMemory, quantity)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNodeStatusCapacityMemoryBytes,
- Labels: nodeInfo,
- Value: value,
- })
- }
- }
- // Node Allocatable Resources
- scrapeResults = scrapeResourceList(
- metric.NodeResourcesAllocatable,
- node.Status.Allocatable,
- nodeInfo,
- scrapeResults)
- // This block and metric can be removed, when we stop exporting assets and allocations
- if node.Status.Allocatable != nil {
- if quantity, ok := node.Status.Allocatable[v1.ResourceCPU]; ok {
- _, _, value := toResourceUnitValue(v1.ResourceCPU, quantity)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNodeStatusAllocatableCPUCores,
- Labels: nodeInfo,
- Value: value,
- })
- }
- if quantity, ok := node.Status.Allocatable[v1.ResourceMemory]; ok {
- _, _, value := toResourceUnitValue(v1.ResourceMemory, quantity)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNodeStatusAllocatableMemoryBytes,
- Labels: nodeInfo,
- Value: value,
- })
- }
- }
- // node labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(node.Labels)
- nodeLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNodeLabels,
- Labels: nodeInfo,
- Value: 0,
- AdditionalInfo: nodeLabels,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.NodeScraperType,
- Targets: len(nodes),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeDeployments(deployments []*clustercache.Deployment, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeDeployments(deployments, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeDeployments(deployments []*clustercache.Deployment, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- for _, deployment := range deployments {
- nsUID, ok := namespaceIndex[deployment.Namespace]
- if !ok {
- log.Debugf("deployment namespaceUID missing from index for namespace name '%s'", deployment.Namespace)
- }
- deploymentInfo := map[string]string{
- source.UIDLabel: string(deployment.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.NamespaceLabel: deployment.Namespace,
- source.DeploymentLabel: deployment.Name,
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DeploymentInfo,
- Labels: deploymentInfo,
- Value: 0,
- AdditionalInfo: deploymentInfo,
- })
- // deployment labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(deployment.Labels)
- deploymentLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DeploymentLabels,
- Labels: deploymentInfo,
- Value: 0,
- AdditionalInfo: deploymentLabels,
- })
- // deployment annotations
- annoationNames, annotationValues := promutil.KubeAnnotationsToLabels(deployment.Annotations)
- deploymentAnnotations := util.ToMap(annoationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DeploymentAnnotations,
- Labels: deploymentInfo,
- Value: 0,
- AdditionalInfo: deploymentAnnotations,
- })
- // deployment match labels
- matchLabelNames, matchLabelValues := promutil.KubeLabelsToLabels(deployment.MatchLabels)
- deploymentMatchLabels := util.ToMap(matchLabelNames, matchLabelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DeploymentMatchLabels,
- Labels: deploymentInfo,
- Value: 0,
- AdditionalInfo: deploymentMatchLabels,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.DeploymentScraperType,
- Targets: len(deployments),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeNamespaces(namespaces []*clustercache.Namespace) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeNamespaces(namespaces)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeNamespaces(namespaces []*clustercache.Namespace) []metric.Update {
- var scrapeResults []metric.Update
- for _, namespace := range namespaces {
- namespaceInfo := map[string]string{
- source.NamespaceLabel: namespace.Name,
- source.UIDLabel: string(namespace.UID),
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.NamespaceInfo,
- Labels: namespaceInfo,
- AdditionalInfo: namespaceInfo,
- Value: 0,
- })
- // namespace labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(namespace.Labels)
- namespaceLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNamespaceLabels,
- Labels: namespaceInfo,
- Value: 0,
- AdditionalInfo: namespaceLabels,
- })
- // namespace annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(namespace.Annotations)
- namespaceAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeNamespaceAnnotations,
- Labels: namespaceInfo,
- Value: 0,
- AdditionalInfo: namespaceAnnotations,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.NamespaceScraperType,
- Targets: len(namespaces),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapePods(
- pods []*clustercache.Pod,
- pvcs []*clustercache.PersistentVolumeClaim,
- nodeIndex map[string]types.UID,
- namespaceIndex map[string]types.UID,
- pvcIndex map[pvcKey]types.UID,
- ) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapePods(pods, pvcs, nodeIndex, namespaceIndex, pvcIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapePods(
- pods []*clustercache.Pod,
- pvcs []*clustercache.PersistentVolumeClaim,
- nodeIndex map[string]types.UID,
- namespaceIndex map[string]types.UID,
- pvcIndex map[pvcKey]types.UID,
- ) []metric.Update {
- // this is only populated if we find gpu resources being requested
- var nodesGpuInfo map[string]*NodeGpuInfo
- // pv allocation and unmounted pvs
- pvcInfo := getPvcsInfo(pvcs)
- // pod info by uid
- podInfoByUid := make(map[string]map[string]string)
- var scrapeResults []metric.Update
- for _, pod := range pods {
- // pods without a set node name are not running
- if pod.Spec.NodeName == "" {
- continue
- }
- nodeUID, ok := nodeIndex[pod.Spec.NodeName]
- if !ok {
- log.Debugf("pod nodeUID missing from index for node name '%s'", pod.Spec.NodeName)
- }
- nsUID, ok := namespaceIndex[pod.Namespace]
- if !ok {
- log.Debugf("pod namespaceUID missing from index for namespace name '%s'", pod.Namespace)
- }
- podInfo := map[string]string{
- source.UIDLabel: string(pod.UID),
- source.PodLabel: pod.Name,
- source.NamespaceUIDLabel: string(nsUID),
- source.NodeUIDLabel: string(nodeUID),
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.PodInfo,
- Labels: podInfo,
- Value: 0,
- AdditionalInfo: podInfo,
- })
- podInfo[source.NamespaceLabel] = pod.Namespace
- podInfo[source.NodeLabel] = pod.Spec.NodeName
- podInfo[source.InstanceLabel] = pod.Spec.NodeName
- podInfoByUid[string(pod.UID)] = podInfo
- // pod labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(pod.Labels)
- podLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePodLabels,
- Labels: podInfo,
- Value: 0,
- AdditionalInfo: podLabels,
- })
- // pod annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(pod.Annotations)
- podAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePodAnnotations,
- Labels: podInfo,
- Value: 0,
- AdditionalInfo: podAnnotations,
- })
- // Determine PVC use data for Pod
- claimed := make(map[string]struct{})
- for _, volume := range pod.Spec.Volumes {
- if volume.PersistentVolumeClaim != nil {
- name := volume.PersistentVolumeClaim.ClaimName
- key := pod.Namespace + "," + name
- if _, seen := claimed[key]; seen {
- continue
- }
- if pvc, ok := pvcInfo[key]; ok {
- pvc.PodsClaimed = append(pvc.PodsClaimed, string(pod.UID))
- claimed[key] = struct{}{}
- }
- }
- }
- // Pod owner metric
- for _, owner := range pod.OwnerReferences {
- controller := "false"
- if owner.Controller != nil && *owner.Controller {
- controller = "true"
- }
- ownerInfo := maps.Clone(podInfo)
- ownerInfo[source.OwnerKindLabel] = owner.Kind
- ownerInfo[source.OwnerNameLabel] = owner.Name
- ownerInfo[source.OwnerUIDLabel] = string(owner.UID)
- ownerInfo[source.ContainerLabel] = controller
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePodOwner,
- Labels: ownerInfo,
- Value: 0,
- })
- }
- // Container Status
- for _, status := range pod.Status.ContainerStatuses {
- if status.State.Running != nil {
- containerInfo := maps.Clone(podInfo)
- containerInfo[source.ContainerLabel] = status.Name
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePodContainerStatusRunning,
- Labels: containerInfo,
- AdditionalInfo: containerInfo,
- Value: 0,
- })
- }
- }
- for _, volume := range pod.Spec.Volumes {
- if volume.PersistentVolumeClaim != nil {
- pvcUID, ok := pvcIndex[pvcKey{
- name: volume.PersistentVolumeClaim.ClaimName,
- namespace: pod.Namespace,
- }]
- if !ok {
- continue
- }
- podPVCVolumeInfo := map[string]string{
- source.UIDLabel: string(pod.UID),
- source.PVCUIDLabel: string(pvcUID),
- source.PodVolumeNameLabel: volume.Name,
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.PodPVCVolume,
- Labels: podPVCVolumeInfo,
- Value: 0,
- })
- }
- }
- for _, container := range pod.Spec.Containers {
- containerInfo := maps.Clone(podInfo)
- containerInfo[source.ContainerLabel] = container.Name
- // Requests
- scrapeResults = scrapeResourceList(
- metric.KubePodContainerResourceRequests,
- container.Resources.Requests,
- containerInfo,
- scrapeResults)
- // Limits
- scrapeResults = scrapeResourceList(
- metric.KubePodContainerResourceLimits,
- container.Resources.Limits,
- containerInfo,
- scrapeResults)
- // Todo remove when asset/allocation pipeline are removed
- // gpu "requests" is either the request or limit if it exists
- var gpuRequest *float64
- for resourceName, quantity := range container.Resources.Requests {
- if isGpuResourceName(resourceName) {
- // set gpu request if it exists
- _, _, value := toResourceUnitValue(resourceName, quantity)
- gpuRequestValue := value
- gpuRequest = &gpuRequestValue
- break
- }
- }
- // Limits
- if gpuRequest == nil {
- for resourceName, quantity := range container.Resources.Limits {
- if isGpuResourceName(resourceName) {
- // set gpu request if it exists
- _, _, value := toResourceUnitValue(resourceName, quantity)
- gpuRequestValue := value
- gpuRequest = &gpuRequestValue
- break
- }
- }
- }
- // handle the GPU allocation metric here IFF there exists a request/limit for GPUs
- // we only load the node gpu data map if we run into a container with gpu requests/limits
- if gpuRequest != nil {
- if nodesGpuInfo == nil {
- nodesGpuInfo = ccs.getNodesGpuInfo()
- }
- gpuAlloc := *gpuRequest
- if nodeGpuInfo, ok := nodesGpuInfo[pod.Spec.NodeName]; ok {
- if nodeGpuInfo != nil && nodeGpuInfo.VGPU != 0 {
- gpuAlloc = gpuAlloc * (nodeGpuInfo.GPU / nodeGpuInfo.VGPU)
- }
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ContainerGPUAllocation,
- Labels: maps.Clone(containerInfo),
- Value: gpuAlloc,
- })
- }
- }
- }
- // Iterate through PVC Info after the pods have been tallied and export
- // allocation metrics based on the number of other pods claiming the volume
- for _, pvc := range pvcInfo {
- // unmounted pvs get full allocation
- if len(pvc.PodsClaimed) == 0 {
- labels := map[string]string{
- source.PodLabel: unmountedPVsContainer,
- source.NamespaceLabel: pvc.Namespace,
- source.PVCLabel: pvc.Claim,
- source.PVLabel: pvc.VolumeName,
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.PodPVCAllocation,
- Labels: labels,
- Value: pvc.Requests,
- })
- continue
- }
- // pods get a proportion of pv allocation
- value := pvc.Requests / float64(len(pvc.PodsClaimed))
- for _, podUid := range pvc.PodsClaimed {
- podInfo, ok := podInfoByUid[podUid]
- if !ok {
- continue
- }
- pvcLabels := maps.Clone(podInfo)
- pvcLabels[source.PVCLabel] = pvc.Claim
- pvcLabels[source.PVLabel] = pvc.VolumeName
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.PodPVCAllocation,
- Labels: pvcLabels,
- Value: value,
- })
- }
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.PodScraperType,
- Targets: len(pods),
- Errors: nil,
- })
- return scrapeResults
- }
- func scrapeResourceList(metricName string, resourceList v1.ResourceList, baseLabels map[string]string, scrapeResults []metric.Update) []metric.Update {
- if resourceList != nil {
- // sorting keys here for testing purposes
- keys := maps.Keys(resourceList)
- slices.Sort(keys)
- for _, resourceName := range keys {
- quantity := resourceList[resourceName]
- resource, unit, value := toResourceUnitValue(resourceName, quantity)
- // failed to parse the resource type
- if resource == "" {
- log.DedupedWarningf(5, "Failed to parse resource units and quantity for resource: %s", resourceName)
- continue
- }
- resourceRequestInfo := maps.Clone(baseLabels)
- resourceRequestInfo[source.ResourceLabel] = resource
- resourceRequestInfo[source.UnitLabel] = unit
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metricName,
- Labels: resourceRequestInfo,
- Value: value,
- })
- }
- }
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapePVCs(
- pvcs []*clustercache.PersistentVolumeClaim,
- namespaceIndex map[string]types.UID,
- pvIndex map[string]types.UID,
- ) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapePVCs(pvcs, namespaceIndex, pvIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapePVCs(
- pvcs []*clustercache.PersistentVolumeClaim,
- namespaceIndex map[string]types.UID,
- pvIndex map[string]types.UID,
- ) []metric.Update {
- var scrapeResults []metric.Update
- for _, pvc := range pvcs {
- nsUID, ok := namespaceIndex[pvc.Namespace]
- if !ok {
- log.Debugf("pvc namespaceUID missing from index for namespace name '%s'", pvc.Namespace)
- }
- pvUID, ok := pvIndex[pvc.Spec.VolumeName]
- if !ok && pvc.Spec.VolumeName != "" {
- log.Debugf("pvc volume name missing from index for pv name '%s'", pvc.Spec.VolumeName)
- }
- pvcInfo := map[string]string{
- source.UIDLabel: string(pvc.UID),
- source.PVCLabel: pvc.Name,
- source.NamespaceUIDLabel: string(nsUID),
- source.NamespaceLabel: pvc.Namespace,
- source.VolumeNameLabel: pvc.Spec.VolumeName,
- source.PVUIDLabel: string(pvUID),
- source.StorageClassLabel: getPersistentVolumeClaimClass(pvc),
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePersistentVolumeClaimInfo,
- Labels: pvcInfo,
- AdditionalInfo: pvcInfo,
- Value: 0,
- })
- if storage, ok := pvc.Spec.Resources.Requests[v1.ResourceStorage]; ok {
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePersistentVolumeClaimResourceRequestsStorageBytes,
- Labels: pvcInfo,
- Value: float64(storage.Value()),
- })
- }
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.PvcScraperType,
- Targets: len(pvcs),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapePVs(pvs []*clustercache.PersistentVolume) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapePVs(pvs)
- }
- }
- func (ccs *ClusterCacheScraper) scrapePVs(pvs []*clustercache.PersistentVolume) []metric.Update {
- var scrapeResults []metric.Update
- for _, pv := range pvs {
- providerID := pv.Name
- var csiVolumeHandle string
- // if a more accurate provider ID is available, use that
- if pv.Spec.CSI != nil && pv.Spec.CSI.VolumeHandle != "" {
- providerID = pv.Spec.CSI.VolumeHandle
- csiVolumeHandle = pv.Spec.CSI.VolumeHandle
- }
- pvInfo := map[string]string{
- source.UIDLabel: string(pv.UID),
- source.PVLabel: pv.Name,
- source.StorageClassLabel: pv.Spec.StorageClassName,
- source.ProviderIDLabel: providerID,
- source.CSIVolumeHandleLabel: csiVolumeHandle,
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubecostPVInfo,
- Labels: pvInfo,
- AdditionalInfo: pvInfo,
- Value: 0,
- })
- if storage, ok := pv.Spec.Capacity[v1.ResourceStorage]; ok {
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubePersistentVolumeCapacityBytes,
- Labels: pvInfo,
- Value: float64(storage.Value()),
- })
- }
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.PvScraperType,
- Targets: len(pvs),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeServices(
- services []*clustercache.Service,
- namespaceIndex map[string]types.UID,
- ) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeServices(services, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeServices(
- services []*clustercache.Service,
- namespaceIndex map[string]types.UID,
- ) []metric.Update {
- var scrapeResults []metric.Update
- for _, service := range services {
- namespaceUID := namespaceIndex[service.Namespace]
- // Assuming one address for now
- var lbIngressAddress string
- lbIngressAddresses := clustercache.GetLoadBalancerIngressAddress(service)
- if len(lbIngressAddresses) > 0 {
- lbIngressAddress = lbIngressAddresses[0]
- }
- serviceInfo := map[string]string{
- source.UIDLabel: string(service.UID),
- source.ServiceLabel: service.Name,
- source.NamespaceLabel: service.Namespace,
- source.NamespaceUIDLabel: string(namespaceUID),
- source.ServiceTypeLabel: string(service.Type),
- source.LBIngressAddress: lbIngressAddress,
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ServiceInfo,
- Labels: serviceInfo,
- Value: 0,
- AdditionalInfo: serviceInfo,
- })
- // service selector labels
- selectorNames, selectorValues := promutil.KubeLabelsToLabels(service.SpecSelector)
- serviceLabels := util.ToMap(selectorNames, selectorValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ServiceSelectorLabels,
- Labels: serviceInfo,
- Value: 0,
- AdditionalInfo: serviceLabels,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.ServiceScraperType,
- Targets: len(services),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeStatefulSets(statefulSets []*clustercache.StatefulSet, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeStatefulSets(statefulSets, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeStatefulSets(statefulSets []*clustercache.StatefulSet, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- for _, statefulSet := range statefulSets {
- nsUID, ok := namespaceIndex[statefulSet.Namespace]
- if !ok {
- log.Debugf("statefulSet namespaceUID missing from index for namespace name '%s'", statefulSet.Namespace)
- }
- statefulSetInfo := map[string]string{
- source.UIDLabel: string(statefulSet.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.StatefulSetLabel: statefulSet.Name,
- }
- // statefulSet info
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.StatefulSetInfo,
- Labels: statefulSetInfo,
- Value: 0,
- AdditionalInfo: statefulSetInfo,
- })
- // statefulSet labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(statefulSet.Labels)
- statefulSetLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.StatefulSetLabels,
- Labels: statefulSetInfo,
- Value: 0,
- AdditionalInfo: statefulSetLabels,
- })
- // statefulSet annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(statefulSet.Annotations)
- statefulSetAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.StatefulSetAnnotations,
- Labels: statefulSetInfo,
- Value: 0,
- AdditionalInfo: statefulSetAnnotations,
- })
- // statefulSet match labels
- statefulSetInfo[source.NamespaceLabel] = statefulSet.Namespace
- matchLabelNames, matchLabelValues := promutil.KubeLabelsToLabels(statefulSet.SpecSelector.MatchLabels)
- statefulSetMatchLabels := util.ToMap(matchLabelNames, matchLabelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.StatefulSetMatchLabels,
- Labels: statefulSetInfo,
- Value: 0,
- AdditionalInfo: statefulSetMatchLabels,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.StatefulSetScraperType,
- Targets: len(statefulSets),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeDaemonSets(daemonSets []*clustercache.DaemonSet, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeDaemonSets(daemonSets, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeDaemonSets(daemonSets []*clustercache.DaemonSet, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- for _, daemonSet := range daemonSets {
- nsUID, ok := namespaceIndex[daemonSet.Namespace]
- if !ok {
- log.Debugf("daemonSet namespaceUID missing from index for namespace name '%s'", daemonSet.Namespace)
- }
- daemonSetInfo := map[string]string{
- source.UIDLabel: string(daemonSet.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.DaemonSetLabel: daemonSet.Name,
- }
- // daemonSet info
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DaemonSetInfo,
- Labels: daemonSetInfo,
- Value: 0,
- AdditionalInfo: daemonSetInfo,
- })
- // daemonSet labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(daemonSet.Labels)
- daemonSetLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DaemonSetLabels,
- Labels: daemonSetInfo,
- Value: 0,
- AdditionalInfo: daemonSetLabels,
- })
- // daemonSet annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(daemonSet.Annotations)
- daemonSetAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.DaemonSetAnnotations,
- Labels: daemonSetInfo,
- Value: 0,
- AdditionalInfo: daemonSetAnnotations,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.DaemonSetScraperType,
- Targets: len(daemonSets),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeJobs(jobs []*clustercache.Job, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeJobs(jobs, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeJobs(jobs []*clustercache.Job, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- for _, job := range jobs {
- nsUID, ok := namespaceIndex[job.Namespace]
- if !ok {
- log.Debugf("job namespaceUID missing from index for namespace name '%s'", job.Namespace)
- }
- jobInfo := map[string]string{
- source.UIDLabel: string(job.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.JobLabel: job.Name,
- }
- // job info
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.JobInfo,
- Labels: jobInfo,
- Value: 0,
- AdditionalInfo: jobInfo,
- })
- // job labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(job.Labels)
- jobLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.JobLabels,
- Labels: jobInfo,
- Value: 0,
- AdditionalInfo: jobLabels,
- })
- // job annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(job.Annotations)
- jobAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.JobAnnotations,
- Labels: jobInfo,
- Value: 0,
- AdditionalInfo: jobAnnotations,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.JobScraperType,
- Targets: len(jobs),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeCronJobs(cronJobs []*clustercache.CronJob, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeCronJobs(cronJobs, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeCronJobs(cronJobs []*clustercache.CronJob, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- for _, cronJob := range cronJobs {
- nsUID, ok := namespaceIndex[cronJob.Namespace]
- if !ok {
- log.Debugf("cronjob namespaceUID missing from index for namespace name '%s'", cronJob.Namespace)
- }
- cronJobInfo := map[string]string{
- source.UIDLabel: string(cronJob.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.CronJobLabel: cronJob.Name,
- }
- // cronjob info
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.CronJobInfo,
- Labels: cronJobInfo,
- Value: 0,
- AdditionalInfo: cronJobInfo,
- })
- // cronjob labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(cronJob.Labels)
- cronJobLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.CronJobLabels,
- Labels: cronJobInfo,
- Value: 0,
- AdditionalInfo: cronJobLabels,
- })
- // cronjob annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(cronJob.Annotations)
- cronJobAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.CronJobAnnotations,
- Labels: cronJobInfo,
- Value: 0,
- AdditionalInfo: cronJobAnnotations,
- })
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.CronJobScraperType,
- Targets: len(cronJobs),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeReplicaSets(replicaSets []*clustercache.ReplicaSet, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeReplicaSets(replicaSets, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeReplicaSets(replicaSets []*clustercache.ReplicaSet, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- for _, replicaSet := range replicaSets {
- nsUID, ok := namespaceIndex[replicaSet.Namespace]
- if !ok {
- log.Debugf("replicaset namespaceUID missing from index for namespace name '%s'", replicaSet.Namespace)
- }
- replicaSetInfo := map[string]string{
- source.UIDLabel: string(replicaSet.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.ReplicaSetLabel: replicaSet.Name,
- }
- // replicaset info
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ReplicaSetInfo,
- Labels: replicaSetInfo,
- Value: 0,
- AdditionalInfo: replicaSetInfo,
- })
- // replicaset labels
- labelNames, labelValues := promutil.KubeLabelsToLabels(replicaSet.Labels)
- replicaSetLabels := util.ToMap(labelNames, labelValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ReplicaSetLabels,
- Labels: replicaSetInfo,
- Value: 0,
- AdditionalInfo: replicaSetLabels,
- })
- // replicaset annotations
- annotationNames, annotationValues := promutil.KubeAnnotationsToLabels(replicaSet.Annotations)
- replicaSetAnnotations := util.ToMap(annotationNames, annotationValues)
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ReplicaSetAnnotations,
- Labels: replicaSetInfo,
- Value: 0,
- AdditionalInfo: replicaSetAnnotations,
- })
- // owner references for backward compatibility
- replicaSetOwnerInfo := map[string]string{
- source.ReplicaSetLabel: replicaSet.Name,
- source.NamespaceLabel: replicaSet.Namespace,
- source.UIDLabel: string(replicaSet.UID),
- }
- // this specific metric exports a special <none> value for name and kind
- // if there are no owners
- if len(replicaSet.OwnerReferences) == 0 {
- ownerInfo := maps.Clone(replicaSetOwnerInfo)
- ownerInfo[source.OwnerKindLabel] = source.NoneLabelValue
- ownerInfo[source.OwnerNameLabel] = source.NoneLabelValue
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeReplicasetOwner,
- Labels: ownerInfo,
- Value: 0,
- })
- } else {
- for _, owner := range replicaSet.OwnerReferences {
- controller := "false"
- if owner.Controller != nil && *owner.Controller {
- controller = "true"
- }
- ownerInfo := maps.Clone(replicaSetOwnerInfo)
- ownerInfo[source.OwnerKindLabel] = owner.Kind
- ownerInfo[source.OwnerNameLabel] = owner.Name
- ownerInfo[source.OwnerUIDLabel] = string(owner.UID)
- ownerInfo[source.ControllerLabel] = controller
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.KubeReplicasetOwner,
- Labels: ownerInfo,
- Value: 0,
- })
- }
- }
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.ReplicaSetScraperType,
- Targets: len(replicaSets),
- Errors: nil,
- })
- return scrapeResults
- }
- func (ccs *ClusterCacheScraper) GetScrapeResourceQuotas(resourceQuotas []*clustercache.ResourceQuota, namespaceIndex map[string]types.UID) ScrapeFunc {
- return func() []metric.Update {
- return ccs.scrapeResourceQuotas(resourceQuotas, namespaceIndex)
- }
- }
- func (ccs *ClusterCacheScraper) scrapeResourceQuotas(resourceQuotas []*clustercache.ResourceQuota, namespaceIndex map[string]types.UID) []metric.Update {
- var scrapeResults []metric.Update
- processResource := func(baseLabels map[string]string, name v1.ResourceName, quantity resource.Quantity, metricName string) metric.Update {
- resource, unit, value := toResourceUnitValue(name, quantity)
- labels := maps.Clone(baseLabels)
- labels[source.ResourceLabel] = resource
- labels[source.UnitLabel] = unit
- return metric.Update{
- Name: metricName,
- Labels: labels,
- Value: value,
- }
- }
- for _, resourceQuota := range resourceQuotas {
- nsUID, _ := namespaceIndex[resourceQuota.Namespace]
- resourceQuotaInfo := map[string]string{
- source.UIDLabel: string(resourceQuota.UID),
- source.NamespaceUIDLabel: string(nsUID),
- source.ResourceQuotaLabel: resourceQuota.Name,
- }
- scrapeResults = append(scrapeResults, metric.Update{
- Name: metric.ResourceQuotaInfo,
- Labels: resourceQuotaInfo,
- AdditionalInfo: resourceQuotaInfo,
- Value: 0,
- })
- if resourceQuota.Spec.Hard != nil {
- // CPU/memory requests can also be aliased as "cpu" and "memory". For now, however, only scrape the complete names
- // https://kubernetes.io/docs/concepts/policy/resource-quotas/#compute-resource-quota
- if quantity, ok := resourceQuota.Spec.Hard[v1.ResourceRequestsCPU]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceCPU, quantity, metric.KubeResourceQuotaSpecResourceRequests))
- }
- if quantity, ok := resourceQuota.Spec.Hard[v1.ResourceRequestsMemory]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceMemory, quantity, metric.KubeResourceQuotaSpecResourceRequests))
- }
- if quantity, ok := resourceQuota.Spec.Hard[v1.ResourceLimitsCPU]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceCPU, quantity, metric.KubeResourceQuotaSpecResourceLimits))
- }
- if quantity, ok := resourceQuota.Spec.Hard[v1.ResourceLimitsMemory]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceMemory, quantity, metric.KubeResourceQuotaSpecResourceLimits))
- }
- }
- if resourceQuota.Status.Used != nil {
- if quantity, ok := resourceQuota.Status.Used[v1.ResourceRequestsCPU]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceCPU, quantity, metric.KubeResourceQuotaStatusUsedResourceRequests))
- }
- if quantity, ok := resourceQuota.Status.Used[v1.ResourceRequestsMemory]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceMemory, quantity, metric.KubeResourceQuotaStatusUsedResourceRequests))
- }
- if quantity, ok := resourceQuota.Status.Used[v1.ResourceLimitsCPU]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceCPU, quantity, metric.KubeResourceQuotaStatusUsedResourceLimits))
- }
- if quantity, ok := resourceQuota.Status.Used[v1.ResourceLimitsMemory]; ok {
- scrapeResults = append(scrapeResults, processResource(resourceQuotaInfo, v1.ResourceMemory, quantity, metric.KubeResourceQuotaStatusUsedResourceLimits))
- }
- }
- }
- events.Dispatch(event.ScrapeEvent{
- ScraperName: event.KubernetesClusterScraperName,
- ScrapeType: event.ResourceQuotaScraperType,
- Targets: len(resourceQuotas),
- Errors: nil,
- })
- return scrapeResults
- }
- // PvcInfo is used to store information about a pvc for tracking volume usage.
- type PvcInfo struct {
- Class string
- Claim string
- Namespace string
- VolumeName string
- Requests float64
- PodsClaimed []string
- }
- func getPvcsInfo(pvcs []*clustercache.PersistentVolumeClaim) map[string]*PvcInfo {
- toReturn := make(map[string]*PvcInfo)
- for _, pvc := range pvcs {
- ns := pvc.Namespace
- pvcName := pvc.Name
- volumeName := pvc.Spec.VolumeName
- pvClass := getPersistentVolumeClaimClass(pvc)
- requests := float64(pvc.Spec.Resources.Requests.Storage().Value())
- key := ns + "," + pvcName
- toReturn[key] = &PvcInfo{
- Class: pvClass,
- Claim: pvcName,
- Namespace: ns,
- VolumeName: volumeName,
- Requests: requests,
- }
- }
- return toReturn
- }
- // NodeGpuInfo contains the gpu count and vgpu counts for nodes
- type NodeGpuInfo struct {
- GPU float64
- VGPU float64
- }
- func (ccs *ClusterCacheScraper) getNodesGpuInfo() map[string]*NodeGpuInfo {
- // use a closure to cache allocatableVGPU result instead of calculating
- // it every time we need it
- var allocatableVGPUs *float64
- allocVGPUs := func() (float64, error) {
- if allocatableVGPUs != nil {
- return *allocatableVGPUs, nil
- }
- vgpu, err := getAllocatableVGPUs(ccs.clusterCache.GetAllDaemonSets())
- if err != nil {
- return vgpu, err
- }
- allocatableVGPUs = &vgpu
- return *allocatableVGPUs, nil
- }
- var nodeGpuMap map[string]*NodeGpuInfo = make(map[string]*NodeGpuInfo)
- for _, node := range ccs.clusterCache.GetAllNodes() {
- info, err := gpuInfoFor(node, allocVGPUs)
- if err != nil {
- log.Warnf("Failed to retrieve GPU Info for Node: %s - %s", node.Name, err)
- continue
- }
- nodeGpuMap[node.Name] = info
- }
- return nodeGpuMap
- }
- // getPersistentVolumeClaimClass returns StorageClassName. If no storage class was
- // requested, it returns "".
- func getPersistentVolumeClaimClass(claim *clustercache.PersistentVolumeClaim) string {
- // Use beta annotation first
- if class, found := claim.Annotations[v1.BetaStorageClassAnnotation]; found {
- return class
- }
- if claim.Spec.StorageClassName != nil {
- return *claim.Spec.StorageClassName
- }
- // Special non-empty string to indicate absence of storage class.
- return ""
- }
- // toResourceUnitValue accepts a resource name and quantity and returns the sanitized resource, the unit, and the value in the units.
- // Returns an empty string for resource and unit if there was a failure.
- func toResourceUnitValue(resourceName v1.ResourceName, quantity resource.Quantity) (resource string, unit string, value float64) {
- resource = promutil.SanitizeLabelName(string(resourceName))
- switch resourceName {
- case v1.ResourceCPU:
- unit = "core"
- value = float64(quantity.MilliValue()) / 1000
- return
- case v1.ResourceStorage:
- fallthrough
- case v1.ResourceEphemeralStorage:
- fallthrough
- case v1.ResourceMemory:
- unit = "byte"
- value = float64(quantity.Value())
- return
- case v1.ResourcePods:
- unit = "integer"
- value = float64(quantity.Value())
- return
- default:
- if isHugePageResourceName(resourceName) || isAttachableVolumeResourceName(resourceName) {
- unit = "byte"
- value = float64(quantity.Value())
- return
- }
- if isExtendedResourceName(resourceName) {
- unit = "integer"
- value = float64(quantity.Value())
- return
- }
- }
- resource = ""
- unit = ""
- value = 0.0
- return
- }
- func isGpuResourceName(name v1.ResourceName) bool {
- return name == "nvidia.com/gpu" || name == "k8s.amazonaws.com/vgpu"
- }
- // isHugePageResourceName checks for a huge page container resource name
- func isHugePageResourceName(name v1.ResourceName) bool {
- return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
- }
- // isAttachableVolumeResourceName checks for attached volume container resource name
- func isAttachableVolumeResourceName(name v1.ResourceName) bool {
- return strings.HasPrefix(string(name), v1.ResourceAttachableVolumesPrefix)
- }
- // isExtendedResourceName checks for extended container resource name
- func isExtendedResourceName(name v1.ResourceName) bool {
- if isNativeResource(name) || strings.HasPrefix(string(name), v1.DefaultResourceRequestsPrefix) {
- return false
- }
- // Ensure it satisfies the rules in IsQualifiedName() after converted into quota resource name
- nameForQuota := fmt.Sprintf("%s%s", v1.DefaultResourceRequestsPrefix, string(name))
- if errs := validation.IsQualifiedName(nameForQuota); len(errs) != 0 {
- return false
- }
- return true
- }
- // isNativeResource checks for a kubernetes.io/ prefixed resource name
- func isNativeResource(name v1.ResourceName) bool {
- return !strings.Contains(string(name), "/") || isPrefixedNativeResource(name)
- }
- func isPrefixedNativeResource(name v1.ResourceName) bool {
- return strings.Contains(string(name), v1.ResourceDefaultNamespacePrefix)
- }
- // gets the Node GPUs and VGPUs using the node data from k8s. Returns nil if GPUs could not be located for the node.
- func gpuInfoFor(
- n *clustercache.Node,
- allocatedVGPUs func() (float64, error),
- ) (*NodeGpuInfo, error) {
- g, hasGpu := n.Status.Capacity["nvidia.com/gpu"]
- _, hasReplicas := n.Labels["nvidia.com/gpu.replicas"]
- // Case 1: Standard NVIDIA GPU
- if hasGpu && g.Value() != 0 && !hasReplicas {
- return &NodeGpuInfo{
- GPU: float64(g.Value()),
- VGPU: float64(g.Value()),
- }, nil
- }
- // Case 2: NVIDIA GPU with GPU Feature Discovery (GFD) Pod enabled.
- // Ref: https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/gpu-sharing.html#verifying-the-gpu-time-slicing-configuration
- // Ref: https://github.com/NVIDIA/k8s-device-plugin/blob/d899752a424818428f744a946d32b132ea2c0cf1/internal/lm/resource_test.go#L44-L45
- // Ref: https://github.com/NVIDIA/k8s-device-plugin/blob/d899752a424818428f744a946d32b132ea2c0cf1/internal/lm/resource_test.go#L103-L118
- if hasReplicas {
- resultGPU := 0.0
- resultVGPU := 0.0
- if c, ok := n.Labels["nvidia.com/gpu.count"]; ok {
- var err error
- resultGPU, err = strconv.ParseFloat(c, 64)
- if err != nil {
- return nil, fmt.Errorf("could not parse label \"nvidia.com/gpu.count\": %v", err)
- }
- }
- if s, ok := n.Status.Capacity["nvidia.com/gpu.shared"]; ok { // GFD configured `renameByDefault=true`
- resultVGPU = float64(s.Value())
- } else if g, ok := n.Status.Capacity["nvidia.com/gpu"]; ok { // GFD configured `renameByDefault=false`
- resultVGPU = float64(g.Value())
- } else {
- resultVGPU = resultGPU
- }
- return &NodeGpuInfo{
- GPU: resultGPU,
- VGPU: resultVGPU,
- }, nil
- }
- // Case 3: AWS vGPU
- if vgpu, ok := n.Status.Capacity["k8s.amazonaws.com/vgpu"]; ok {
- vgpuCount, err := allocatedVGPUs()
- if err != nil {
- return nil, err
- }
- vgpuCoeff := 10.0
- if vgpuCount > 0.0 {
- vgpuCoeff = vgpuCount
- }
- if vgpu.Value() != 0 {
- resultGPU := float64(vgpu.Value()) / vgpuCoeff
- resultVGPU := float64(vgpu.Value())
- return &NodeGpuInfo{
- GPU: resultGPU,
- VGPU: resultVGPU,
- }, nil
- }
- }
- // No GPU found
- return nil, nil
- }
- func getAllocatableVGPUs(daemonsets []*clustercache.DaemonSet) (float64, error) {
- vgpuCount := 0.0
- for _, ds := range daemonsets {
- dsContainerList := &ds.SpecContainers
- for _, ctnr := range *dsContainerList {
- if ctnr.Args != nil {
- for _, arg := range ctnr.Args {
- if strings.Contains(arg, "--vgpu=") {
- vgpus, err := strconv.ParseFloat(arg[strings.IndexByte(arg, '=')+1:], 64)
- if err != nil {
- log.Errorf("failed to parse vgpu allocation string %s: %v", arg, err)
- continue
- }
- vgpuCount = vgpus
- return vgpuCount, nil
- }
- }
- }
- }
- }
- return vgpuCount, nil
- }
|