Просмотр исходного кода

add telemetry to metrics (#3755)

Co-authored-by: David Townley <davidtownley@Davids-MacBook-Air.local>
d-g-town 2 лет назад
Родитель
Сommit
934eaba276

+ 2 - 2
api/server/handlers/cluster/get_pod_metrics.go

@@ -38,7 +38,7 @@ func (c *GetPodMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
 
 	cluster, _ := ctx.Value(types.ClusterScope).(*models.Cluster)
 
-	request := &types.GetPodMetricsRequest{}
+	request := &prometheus.GetPodMetricsRequest{}
 
 	if ok := c.DecodeAndValidate(w, r, request); !ok {
 		err := telemetry.Error(ctx, span, nil, "error decoding request")
@@ -61,7 +61,7 @@ func (c *GetPodMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	rawQuery, err := prometheus.QueryPrometheus(agent.Clientset, promSvc, &request.QueryOpts)
+	rawQuery, err := prometheus.QueryPrometheus(ctx, agent.Clientset, promSvc, &request.QueryOpts)
 	if err != nil {
 		err = telemetry.Error(ctx, span, err, "error querying prometheus")
 		c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(err, http.StatusInternalServerError))

+ 1 - 1
api/server/handlers/cluster/list_nginx_ingresses.go

@@ -43,7 +43,7 @@ func (c *ListNGINXIngressesHandler) ServeHTTP(w http.ResponseWriter, r *http.Req
 		return
 	}
 
-	var res types.ListNGINXIngressesResponse = ingresses
+	var res prometheus.ListNGINXIngressesResponse = ingresses
 
 	c.WriteResult(w, r, res)
 }

+ 1 - 1
api/server/handlers/porter_app/app_metrics.go

@@ -145,7 +145,7 @@ func (c *AppMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		Percentile: request.Percentile,
 	}
 
-	rawQuery, err := prometheus.QueryPrometheus(agent.Clientset, promSvc, queryOpts)
+	rawQuery, err := prometheus.QueryPrometheus(ctx, agent.Clientset, promSvc, queryOpts)
 	if err != nil {
 		err = telemetry.Error(ctx, span, err, "error querying prometheus")
 		c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(err, http.StatusInternalServerError))

+ 0 - 10
api/types/cluster.go

@@ -1,9 +1,5 @@
 package types
 
-import (
-	"github.com/porter-dev/porter/internal/kubernetes/prometheus"
-)
-
 const (
 	URLParamCandidateID URLParam = "candidate_id"
 	URLParamNodeName    URLParam = "node_name"
@@ -264,12 +260,6 @@ type GetTemporaryKubeconfigResponse struct {
 	Kubeconfig []byte `json:"kubeconfig"`
 }
 
-type ListNGINXIngressesResponse []prometheus.SimpleIngress
-
-type GetPodMetricsRequest struct {
-	prometheus.QueryOpts
-}
-
 type GetPodMetricsResponse *string
 
 type GetPodsRequest struct {

+ 41 - 5
internal/kubernetes/prometheus/metrics.go

@@ -8,12 +8,20 @@ import (
 	"sort"
 	"strings"
 
+	"github.com/porter-dev/porter/internal/telemetry"
+
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/kubernetes"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
+type ListNGINXIngressesResponse []SimpleIngress
+
+type GetPodMetricsRequest struct {
+	QueryOpts
+}
+
 // GetPrometheusService returns the prometheus service name. The prometheus-community/prometheus chart @ v15.5.3 uses non-FQDN labels, unlike v22.6.2. This function checks for both labels.
 func GetPrometheusService(clientset kubernetes.Interface) (*v1.Service, bool, error) {
 	redundantServices, err := clientset.CoreV1().Services("").List(context.TODO(), metav1.ListOptions{
@@ -126,19 +134,38 @@ type QueryOpts struct {
 }
 
 func QueryPrometheus(
+	ctx context.Context,
 	clientset kubernetes.Interface,
 	service *v1.Service,
 	opts *QueryOpts,
 ) ([]*promParsedSingletonQuery, error) {
+	ctx, span := telemetry.NewSpan(ctx, "query-prometheus")
+	defer span.End()
+
+	telemetry.WithAttributes(span,
+		telemetry.AttributeKV{Key: "metric", Value: opts.Metric},
+		telemetry.AttributeKV{Key: "should-sum", Value: opts.ShouldSum},
+		telemetry.AttributeKV{Key: "kind", Value: opts.Kind},
+		telemetry.AttributeKV{Key: "pod-list", Value: strings.Join(opts.PodList, ",")},
+		telemetry.AttributeKV{Key: "name", Value: opts.Name},
+		telemetry.AttributeKV{Key: "namespace", Value: opts.Namespace},
+		telemetry.AttributeKV{Key: "start-range", Value: opts.StartRange},
+		telemetry.AttributeKV{Key: "end-range", Value: opts.EndRange},
+		telemetry.AttributeKV{Key: "resolution", Value: opts.Resolution},
+		telemetry.AttributeKV{Key: "percentile", Value: opts.Percentile},
+	)
+
 	if len(service.Spec.Ports) == 0 {
-		return nil, fmt.Errorf("prometheus service has no exposed ports to query")
+		return nil, telemetry.Error(ctx, span, nil, "prometheus service has no exposed ports to query")
 	}
 
 	selectionRegex, err := getSelectionRegex(opts.Kind, opts.Name)
 	if err != nil {
-		return nil, err
+		return nil, telemetry.Error(ctx, span, err, "failed to get selection regex")
 	}
 
+	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "selection-regex", Value: selectionRegex})
+
 	var podSelector string
 
 	if len(opts.PodList) > 0 {
@@ -147,6 +174,8 @@ func QueryPrometheus(
 		podSelector = fmt.Sprintf(`namespace="%s",pod=~"%s",container!="POD",container!=""`, opts.Namespace, selectionRegex)
 	}
 
+	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "pod-selector", Value: podSelector})
+
 	query := ""
 
 	if opts.Metric == "cpu" {
@@ -169,7 +198,7 @@ func QueryPrometheus(
 	} else if opts.Metric == "nginx:status" {
 		query, err = getNginxStatusQuery(opts, selectionRegex)
 		if err != nil {
-			return nil, err
+			return nil, telemetry.Error(ctx, span, err, "failed to get nginx status query")
 		}
 	} else if opts.Metric == "cpu_hpa_threshold" {
 		// get the name of the kube hpa metric
@@ -206,6 +235,8 @@ func QueryPrometheus(
 		query = createHPACurrentReplicasQuery(metricName, opts.Name, opts.Namespace, appLabel, hpaMetricName)
 	}
 
+	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "query", Value: query})
+
 	if opts.ShouldSum {
 		query = fmt.Sprintf("sum(%s)", query)
 	}
@@ -232,10 +263,15 @@ func QueryPrometheus(
 			return []*promParsedSingletonQuery{}, nil
 		}
 
-		return nil, err
+		return nil, telemetry.Error(ctx, span, err, "failed to get raw query")
+	}
+
+	parsedQuery, err := parseQuery(rawQuery, opts.Metric)
+	if err != nil {
+		return nil, telemetry.Error(ctx, span, err, "failed to parse query")
 	}
 
-	return parseQuery(rawQuery, opts.Metric)
+	return parsedQuery, nil
 }
 
 func getNginxStatusQuery(opts *QueryOpts, selectionRegex string) (string, error) {