|
@@ -2,9 +2,7 @@ package metric
|
|
|
|
|
|
|
|
import (
|
|
import (
|
|
|
"fmt"
|
|
"fmt"
|
|
|
- "maps"
|
|
|
|
|
"sync"
|
|
"sync"
|
|
|
- "time"
|
|
|
|
|
|
|
|
|
|
"github.com/kubecost/events"
|
|
"github.com/kubecost/events"
|
|
|
"github.com/opencost/opencost/core/pkg/collections"
|
|
"github.com/opencost/opencost/core/pkg/collections"
|
|
@@ -14,12 +12,6 @@ import (
|
|
|
|
|
|
|
|
// Collector Metric Diagnostic IDs
|
|
// Collector Metric Diagnostic IDs
|
|
|
const (
|
|
const (
|
|
|
- // OpencostDiagnosticMetricID is the identifier for the metric used to determine if Opencost metrics are being updated
|
|
|
|
|
- OpencostDiagnosticMetricID = "opencostMetric"
|
|
|
|
|
-
|
|
|
|
|
- // NodesDiagnosticMetricID is the identifier for the query used to determine if the node CPU cores capacity is being updated
|
|
|
|
|
- NodesDiagnosticMetricID = "nodesCPUMetrics"
|
|
|
|
|
-
|
|
|
|
|
// DcgmScraperDiagnosticID contains the identifier for the the DCGM scraper diagnostic.
|
|
// DcgmScraperDiagnosticID contains the identifier for the the DCGM scraper diagnostic.
|
|
|
DcgmScraperDiagnosticID = event.DCGMScraperName
|
|
DcgmScraperDiagnosticID = event.DCGMScraperName
|
|
|
|
|
|
|
@@ -44,16 +36,6 @@ const (
|
|
|
KubernetesPvcsScraperDiagnosticID = event.KubernetesClusterScraperName + "-" + event.PvcScraperType
|
|
KubernetesPvcsScraperDiagnosticID = event.KubernetesClusterScraperName + "-" + event.PvcScraperType
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
-// DiagnosticType is used in the definitions to determine which type of implementation to use when representing the
|
|
|
|
|
-// diagnostic
|
|
|
|
|
-type DiagnosticType int
|
|
|
|
|
-
|
|
|
|
|
-const (
|
|
|
|
|
- DiagnosticTypeMetric DiagnosticType = 0
|
|
|
|
|
- DiagnosticTypeScraper DiagnosticType = 1
|
|
|
|
|
- // more diagnostic types?
|
|
|
|
|
-)
|
|
|
|
|
-
|
|
|
|
|
// diagnostic defintion is the type used to define a deterministic list of specific diagnostics we _expect_ to collect
|
|
// diagnostic defintion is the type used to define a deterministic list of specific diagnostics we _expect_ to collect
|
|
|
type diagnosticDefinition struct {
|
|
type diagnosticDefinition struct {
|
|
|
ID string
|
|
ID string
|
|
@@ -61,33 +43,15 @@ type diagnosticDefinition struct {
|
|
|
Label string
|
|
Label string
|
|
|
Description string
|
|
Description string
|
|
|
DocLink string
|
|
DocLink string
|
|
|
- DiagType DiagnosticType
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// diagnostic definitions mapping holds all of the diagnostic definitions that can be used for collector metrics diagnostics
|
|
// diagnostic definitions mapping holds all of the diagnostic definitions that can be used for collector metrics diagnostics
|
|
|
var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnosticDefinition{
|
|
var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnosticDefinition{
|
|
|
- NodesDiagnosticMetricID: {
|
|
|
|
|
- ID: NodesDiagnosticMetricID,
|
|
|
|
|
- MetricName: KubeNodeStatusCapacityCPUCores,
|
|
|
|
|
- Label: "Node CPU cores capacity is being scraped",
|
|
|
|
|
- Description: "Determine if the node CPU cores capacity metrics are being updated",
|
|
|
|
|
- DiagType: DiagnosticTypeMetric,
|
|
|
|
|
- },
|
|
|
|
|
-
|
|
|
|
|
- OpencostDiagnosticMetricID: {
|
|
|
|
|
- ID: OpencostDiagnosticMetricID,
|
|
|
|
|
- MetricName: NodeTotalHourlyCost,
|
|
|
|
|
- Label: "Opencost metrics for a node are being scraped",
|
|
|
|
|
- Description: "Determine if opencost metrics for a node are being updated",
|
|
|
|
|
- DiagType: DiagnosticTypeMetric,
|
|
|
|
|
- },
|
|
|
|
|
-
|
|
|
|
|
DcgmScraperDiagnosticID: {
|
|
DcgmScraperDiagnosticID: {
|
|
|
ID: DcgmScraperDiagnosticID,
|
|
ID: DcgmScraperDiagnosticID,
|
|
|
MetricName: event.DCGMScraperName,
|
|
MetricName: event.DCGMScraperName,
|
|
|
Label: "DCGM scraper is available and is being scraped.",
|
|
Label: "DCGM scraper is available and is being scraped.",
|
|
|
Description: scraperDiagnosticDescriptionFor(event.DCGMScraperName, ""),
|
|
Description: scraperDiagnosticDescriptionFor(event.DCGMScraperName, ""),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
OpenCostScraperDiagnosticID: {
|
|
OpenCostScraperDiagnosticID: {
|
|
@@ -95,7 +59,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: event.OpenCostScraperName,
|
|
MetricName: event.OpenCostScraperName,
|
|
|
Label: "Opencost metrics scraper is available and is being scraped.",
|
|
Label: "Opencost metrics scraper is available and is being scraped.",
|
|
|
Description: scraperDiagnosticDescriptionFor(event.OpenCostScraperName, ""),
|
|
Description: scraperDiagnosticDescriptionFor(event.OpenCostScraperName, ""),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
NodeStatsScraperDiagnosticID: {
|
|
NodeStatsScraperDiagnosticID: {
|
|
@@ -103,7 +66,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: event.NodeStatsScraperName,
|
|
MetricName: event.NodeStatsScraperName,
|
|
|
Label: "Node stats summary scraper is available and is being scraped.",
|
|
Label: "Node stats summary scraper is available and is being scraped.",
|
|
|
Description: scraperDiagnosticDescriptionFor(event.NodeStatsScraperName, ""),
|
|
Description: scraperDiagnosticDescriptionFor(event.NodeStatsScraperName, ""),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
NetworkCostsScraperDiagnosticID: {
|
|
NetworkCostsScraperDiagnosticID: {
|
|
@@ -111,7 +73,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: event.NetworkCostsScraperName,
|
|
MetricName: event.NetworkCostsScraperName,
|
|
|
Label: "Network costs daemonset metrics scrapers are available and being scraped.",
|
|
Label: "Network costs daemonset metrics scrapers are available and being scraped.",
|
|
|
Description: scraperDiagnosticDescriptionFor(event.NetworkCostsScraperName, ""),
|
|
Description: scraperDiagnosticDescriptionFor(event.NetworkCostsScraperName, ""),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesNodesScraperDiagnosticID: {
|
|
KubernetesNodesScraperDiagnosticID: {
|
|
@@ -119,7 +80,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesNodesScraperDiagnosticID,
|
|
MetricName: KubernetesNodesScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.NodeScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.NodeScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.NodeScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.NodeScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesNamespacesScraperDiagnosticID: {
|
|
KubernetesNamespacesScraperDiagnosticID: {
|
|
@@ -127,7 +87,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesNamespacesScraperDiagnosticID,
|
|
MetricName: KubernetesNamespacesScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.NamespaceScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.NamespaceScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.NamespaceScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.NamespaceScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesReplicaSetsScraperDiagnosticID: {
|
|
KubernetesReplicaSetsScraperDiagnosticID: {
|
|
@@ -135,7 +94,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesReplicaSetsScraperDiagnosticID,
|
|
MetricName: KubernetesReplicaSetsScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.ReplicaSetScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.ReplicaSetScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.ReplicaSetScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.ReplicaSetScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesDeploymentsScraperDiagnosticID: {
|
|
KubernetesDeploymentsScraperDiagnosticID: {
|
|
@@ -143,7 +101,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesDeploymentsScraperDiagnosticID,
|
|
MetricName: KubernetesDeploymentsScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.DeploymentScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.DeploymentScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.DeploymentScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.DeploymentScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesStatefulSetsScraperDiagnosticID: {
|
|
KubernetesStatefulSetsScraperDiagnosticID: {
|
|
@@ -151,7 +108,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesStatefulSetsScraperDiagnosticID,
|
|
MetricName: KubernetesStatefulSetsScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.StatefulSetScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.StatefulSetScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.StatefulSetScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.StatefulSetScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesServicesScraperDiagnosticID: {
|
|
KubernetesServicesScraperDiagnosticID: {
|
|
@@ -159,7 +115,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesServicesScraperDiagnosticID,
|
|
MetricName: KubernetesServicesScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.ServiceScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.ServiceScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.ServiceScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.ServiceScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesPodsScraperDiagnosticID: {
|
|
KubernetesPodsScraperDiagnosticID: {
|
|
@@ -167,7 +122,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesPodsScraperDiagnosticID,
|
|
MetricName: KubernetesPodsScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.PodScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.PodScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.PodScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.PodScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesPvsScraperDiagnosticID: {
|
|
KubernetesPvsScraperDiagnosticID: {
|
|
@@ -175,7 +129,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesPvsScraperDiagnosticID,
|
|
MetricName: KubernetesPvsScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.PvScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.PvScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.PvScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.PvScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
|
|
|
|
|
KubernetesPvcsScraperDiagnosticID: {
|
|
KubernetesPvcsScraperDiagnosticID: {
|
|
@@ -183,7 +136,6 @@ var diagnosticDefinitions map[string]*diagnosticDefinition = map[string]*diagnos
|
|
|
MetricName: KubernetesPvcsScraperDiagnosticID,
|
|
MetricName: KubernetesPvcsScraperDiagnosticID,
|
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.PvcScraperType),
|
|
Label: fmt.Sprintf("Kubernetes cluster resources: %s are available and being scraped", event.PvcScraperType),
|
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.PvcScraperType),
|
|
Description: scraperDiagnosticDescriptionFor(event.KubernetesClusterScraperName, event.PvcScraperType),
|
|
|
- DiagType: DiagnosticTypeScraper,
|
|
|
|
|
},
|
|
},
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -216,73 +168,6 @@ type CollectorDiagnostic interface {
|
|
|
Details() map[string]any
|
|
Details() map[string]any
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// metric diagnostic is checked on metrics update -- it maintains a historic record of all the instants
|
|
|
|
|
-// a specific metric was updated, and reports a diagnostic on the validity of that history.
|
|
|
|
|
-type metricDiagnostic struct {
|
|
|
|
|
- diagnostic *diagnosticDefinition
|
|
|
|
|
- updateTimestamps []time.Time
|
|
|
|
|
- result map[string]float64
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// creates a new metric diagnostic
|
|
|
|
|
-func newMetricDiagnostic(diagnostic *diagnosticDefinition) *metricDiagnostic {
|
|
|
|
|
- return &metricDiagnostic{
|
|
|
|
|
- diagnostic: diagnostic,
|
|
|
|
|
- result: make(map[string]float64),
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// Id returns the identifier for the metric diagnostic type -- this just proxies from the diagnostic
|
|
|
|
|
-// definition.
|
|
|
|
|
-func (md *metricDiagnostic) Id() string {
|
|
|
|
|
- return md.diagnostic.ID
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// Name returns the name of the metric being run for the metric diagnostic type -- this just proxies from
|
|
|
|
|
-// the diagnostic definition.
|
|
|
|
|
-func (md *metricDiagnostic) Name() string {
|
|
|
|
|
- return md.diagnostic.MetricName
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// Details generates an exportable detail map for the specific diagnostic, and resets any of its internal
|
|
|
|
|
-// state for the current cycle.
|
|
|
|
|
-func (md *metricDiagnostic) Details() map[string]any {
|
|
|
|
|
- // for all timestamps that occurred during our update cycle,
|
|
|
|
|
- // if any timestamps for our metric do not exist, then we
|
|
|
|
|
- // say that the diagnostic failed. if there are no timestamps
|
|
|
|
|
- // marked in the result, then we also say the diagnostic failed.
|
|
|
|
|
- passed := true
|
|
|
|
|
- if len(md.result) == 0 {
|
|
|
|
|
- passed = false
|
|
|
|
|
- } else {
|
|
|
|
|
- for _, t := range md.updateTimestamps {
|
|
|
|
|
- key := t.Format(time.RFC3339)
|
|
|
|
|
-
|
|
|
|
|
- _, hasTimestamp := md.result[key]
|
|
|
|
|
- if !hasTimestamp {
|
|
|
|
|
- passed = false
|
|
|
|
|
- break
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- details := map[string]any{
|
|
|
|
|
- "query": md.Name(),
|
|
|
|
|
- "label": md.diagnostic.Label,
|
|
|
|
|
- "docLink": md.diagnostic.DocLink,
|
|
|
|
|
- "result": maps.Clone(md.result),
|
|
|
|
|
- "passed": passed,
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // reset the update timestamps and results
|
|
|
|
|
- md.updateTimestamps = []time.Time{}
|
|
|
|
|
- for k := range md.result {
|
|
|
|
|
- delete(md.result, k)
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return details
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
// scrapeDiagnostic maintains the latest state of each scrape event that occurs. scrape
|
|
// scrapeDiagnostic maintains the latest state of each scrape event that occurs. scrape
|
|
|
// events can be registered for any event, but only the specific scrapes with diagnostic
|
|
// events can be registered for any event, but only the specific scrapes with diagnostic
|
|
|
// definitions defined will export as diagnostics.
|
|
// definitions defined will export as diagnostics.
|
|
@@ -377,25 +262,14 @@ func (sd *scrapeDiagnostic) Details() map[string]any {
|
|
|
type DiagnosticsModule struct {
|
|
type DiagnosticsModule struct {
|
|
|
lock sync.RWMutex
|
|
lock sync.RWMutex
|
|
|
diagnostics *collections.IdNameMap[CollectorDiagnostic]
|
|
diagnostics *collections.IdNameMap[CollectorDiagnostic]
|
|
|
- updater Updater
|
|
|
|
|
scrapeHandlerId events.HandlerID // scrape event handler identifier for removal
|
|
scrapeHandlerId events.HandlerID // scrape event handler identifier for removal
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// NewDiagnosticsModule creates a new `DiagnosticsModule` instance to be used with a collector data source
|
|
// NewDiagnosticsModule creates a new `DiagnosticsModule` instance to be used with a collector data source
|
|
|
-func NewDiagnosticsModule(updater Updater) *DiagnosticsModule {
|
|
|
|
|
- // initialize all metric diagnostics IFF the diagnostic type is "metrics"
|
|
|
|
|
- // NOTE: scraper diagnostics are dynamically created as scrape results arrive
|
|
|
|
|
|
|
+func NewDiagnosticsModule() *DiagnosticsModule {
|
|
|
diagnostics := collections.NewIdNameMap[CollectorDiagnostic]()
|
|
diagnostics := collections.NewIdNameMap[CollectorDiagnostic]()
|
|
|
- for _, def := range diagnosticDefinitions {
|
|
|
|
|
- // only insert metric diagnostic types
|
|
|
|
|
- if def.DiagType == DiagnosticTypeMetric {
|
|
|
|
|
- diagnostics.Insert(newMetricDiagnostic(def))
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
dm := &DiagnosticsModule{
|
|
dm := &DiagnosticsModule{
|
|
|
diagnostics: diagnostics,
|
|
diagnostics: diagnostics,
|
|
|
- updater: updater,
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
scrapeEvents := events.GlobalDispatcherFor[event.ScrapeEvent]()
|
|
scrapeEvents := events.GlobalDispatcherFor[event.ScrapeEvent]()
|
|
@@ -422,79 +296,6 @@ func (d *DiagnosticsModule) onScrapeEvent(event event.ScrapeEvent) {
|
|
|
d.diagnostics.Insert(newScrapeDiagnostic(event, def))
|
|
d.diagnostics.Insert(newScrapeDiagnostic(event, def))
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func (d *DiagnosticsModule) Update(updateSet *UpdateSet) {
|
|
|
|
|
- if updateSet == nil {
|
|
|
|
|
- return
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // This is done so that the update func is marked complete when both the updater and diagnostics are done
|
|
|
|
|
- // Otherwise we might face a race condition when calling the diagnostics details func before the diagnostics are done
|
|
|
|
|
- var wg sync.WaitGroup
|
|
|
|
|
- wg.Add(2) // 1 for updater, 1 for diagnostics
|
|
|
|
|
-
|
|
|
|
|
- go func() {
|
|
|
|
|
- defer wg.Done()
|
|
|
|
|
-
|
|
|
|
|
- d.lock.Lock()
|
|
|
|
|
- defer d.lock.Unlock()
|
|
|
|
|
-
|
|
|
|
|
- // add the timestamp to all metric diagnostic instances (see notes on addUpdateTimestamp)
|
|
|
|
|
- ts := updateSet.Timestamp
|
|
|
|
|
- d.addUpdateTimestamp(ts)
|
|
|
|
|
-
|
|
|
|
|
- timestamp := ts.Format(time.RFC3339)
|
|
|
|
|
-
|
|
|
|
|
- for _, update := range updateSet.Updates {
|
|
|
|
|
- if metric, ok := d.diagnostics.ByName(update.Name); ok {
|
|
|
|
|
- // this is unfortunately necessary due to the way our diangostic collectors
|
|
|
|
|
- // differ in functionality -- it makes more sense to duck type here rather
|
|
|
|
|
- // than maintain a separate map of just the metric types, or add metric
|
|
|
|
|
- // specific implementation details to the CollectorDiagnostic interface.
|
|
|
|
|
- // generally, we _should_ be able to make this assertion -- but we'll check in case.
|
|
|
|
|
- if metricDiag, isType := metric.(*metricDiagnostic); isType {
|
|
|
|
|
- // mark the timestamp as "seen" with the value
|
|
|
|
|
- metricDiag.result[timestamp] = update.Value
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }()
|
|
|
|
|
-
|
|
|
|
|
- // We are still maintaining the order in which the updates to the repo are called
|
|
|
|
|
- // as this function gets the new call only when both these go routines are done
|
|
|
|
|
- go func() {
|
|
|
|
|
- defer wg.Done()
|
|
|
|
|
- d.updater.Update(updateSet)
|
|
|
|
|
- }()
|
|
|
|
|
-
|
|
|
|
|
- wg.Wait()
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-// appends an update timestamp on each of the metric diagnostics -- we need to write
|
|
|
|
|
-// every timestamp that the update makes unfortunately. There isn't a way to determine
|
|
|
|
|
-// if a diagnostic service "cycle" is complete, so it's not really possible to maintain
|
|
|
|
|
-// a most recent timestamps on the DiagnosticsModule (the optimal solution). we're not
|
|
|
|
|
-// far from a solid design here, just might need some more support on the diagnostic
|
|
|
|
|
-// service side.
|
|
|
|
|
-func (d *DiagnosticsModule) addUpdateTimestamp(t time.Time) {
|
|
|
|
|
- for _, def := range diagnosticDefinitions {
|
|
|
|
|
- if def.DiagType != DiagnosticTypeMetric {
|
|
|
|
|
- continue
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- diag, ok := d.diagnostics.ById(def.ID)
|
|
|
|
|
- if !ok {
|
|
|
|
|
- continue
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- // More duck typing sadly -- there are some fundamental design incompatibilities
|
|
|
|
|
- // with the way DiagnosticService was written and this cached diagnostic approach
|
|
|
|
|
- // that make things like "cycle" resets a bit difficult
|
|
|
|
|
- if metricDiag, ok := diag.(*metricDiagnostic); ok {
|
|
|
|
|
- metricDiag.updateTimestamps = append(metricDiag.updateTimestamps, t)
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
// DiagnosticDefinitions returns a deterministic mapping of pre-defined diagnostics used with the collector.
|
|
// DiagnosticDefinitions returns a deterministic mapping of pre-defined diagnostics used with the collector.
|
|
|
func (d *DiagnosticsModule) DiagnosticsDefinitions() map[string]*diagnosticDefinition {
|
|
func (d *DiagnosticsModule) DiagnosticsDefinitions() map[string]*diagnosticDefinition {
|
|
|
return diagnosticDefinitions
|
|
return diagnosticDefinitions
|