Ver código fonte

telemetry on resource usage (#3090)

Add telemetry on usage quotas.
d-g-town 3 anos atrás
pai
commit
4718361318

+ 5 - 4
api/server/handlers/project/get_usage.go

@@ -40,10 +40,11 @@ func (p *ProjectGetUsageHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
 	res := &types.GetProjectUsageResponse{}
 
 	currUsage, limit, usageCache, err := usage.GetUsage(&usage.GetUsageOpts{
-		Project:          proj,
-		DOConf:           p.Config().DOConf,
-		Repo:             p.Repo(),
-		WhitelistedUsers: p.Config().WhitelistedUsers,
+		Project:                          proj,
+		DOConf:                           p.Config().DOConf,
+		Repo:                             p.Repo(),
+		WhitelistedUsers:                 p.Config().WhitelistedUsers,
+		ClusterControlPlaneServiceClient: p.Config().ClusterControlPlaneClient,
 	})
 	if err != nil {
 		p.HandleAPIError(w, r, apierrors.NewErrInternal(err))

+ 30 - 21
api/server/router/middleware/usage.go

@@ -1,9 +1,10 @@
 package middleware
 
 import (
-	"fmt"
 	"net/http"
 
+	"github.com/porter-dev/porter/internal/telemetry"
+
 	"github.com/porter-dev/porter/api/server/shared/apierrors"
 	"github.com/porter-dev/porter/api/server/shared/config"
 	"github.com/porter-dev/porter/api/types"
@@ -24,16 +25,24 @@ var UsageErrFmt = "usage limit reached for metric %s: limit %d, requested %d"
 
 func (b *UsageMiddleware) Middleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		proj, _ := r.Context().Value(types.ProjectScope).(*models.Project)
+		ctx, span := telemetry.NewSpan(r.Context(), "middleware-usage")
+		defer span.End()
+
+		proj, _ := ctx.Value(types.ProjectScope).(*models.Project)
+
+		telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "project-id", Value: proj.ID})
 
 		// get the project usage limits
 		currentUsage, limit, _, err := usage.GetUsage(&usage.GetUsageOpts{
-			Project:          proj,
-			DOConf:           b.config.DOConf,
-			Repo:             b.config.Repo,
-			WhitelistedUsers: b.config.WhitelistedUsers,
+			Project:                          proj,
+			DOConf:                           b.config.DOConf,
+			Repo:                             b.config.Repo,
+			WhitelistedUsers:                 b.config.WhitelistedUsers,
+			ClusterControlPlaneServiceClient: b.config.ClusterControlPlaneClient,
 		})
 		if err != nil {
+			err = telemetry.Error(ctx, span, err, "error getting usage")
+
 			apierrors.HandleAPIError(
 				b.config.Logger,
 				b.config.Alerter,
@@ -45,25 +54,25 @@ func (b *UsageMiddleware) Middleware(next http.Handler) http.Handler {
 			return
 		}
 
+		telemetry.WithAttributes(span,
+			telemetry.AttributeKV{Key: "users-current-usage", Value: currentUsage.Users},
+			telemetry.AttributeKV{Key: "users-limit", Value: limit.Users},
+			telemetry.AttributeKV{Key: "cpu-current-usage", Value: currentUsage.ResourceCPU},
+			telemetry.AttributeKV{Key: "cpu-limit", Value: limit.ResourceCPU},
+			telemetry.AttributeKV{Key: "memory-current-usage", Value: currentUsage.ResourceMemory},
+			telemetry.AttributeKV{Key: "memory-limit", Value: limit.ResourceMemory},
+			telemetry.AttributeKV{Key: "clusters-current-usage", Value: currentUsage.Clusters},
+			telemetry.AttributeKV{Key: "clusters-limit", Value: limit.Clusters},
+		)
+
 		// check the usage limits
 		allowed := allowUsage(limit, currentUsage, b.metric)
 
-		if allowed {
-			next.ServeHTTP(w, r)
-		} else {
-			limit, curr := getMetricUsage(limit, currentUsage, b.metric)
+		telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "allowed", Value: allowed})
 
-			apierrors.HandleAPIError(
-				b.config.Logger,
-				b.config.Alerter,
-				w, r,
-				apierrors.NewErrPassThroughToClient(
-					fmt.Errorf(UsageErrFmt, b.metric, limit, curr),
-					http.StatusBadRequest,
-				),
-				true,
-			)
-		}
+		r = r.Clone(ctx)
+
+		next.ServeHTTP(w, r)
 	})
 }
 

+ 12 - 8
internal/usage/usage.go

@@ -4,6 +4,8 @@ import (
 	"errors"
 	"time"
 
+	"github.com/porter-dev/api-contracts/generated/go/porter/v1/porterv1connect"
+
 	"github.com/porter-dev/porter/api/types"
 	"github.com/porter-dev/porter/internal/kubernetes"
 	"github.com/porter-dev/porter/internal/kubernetes/nodes"
@@ -14,10 +16,11 @@ import (
 )
 
 type GetUsageOpts struct {
-	Repo             repository.Repository
-	DOConf           *oauth2.Config
-	Project          *models.Project
-	WhitelistedUsers map[uint]uint
+	Repo                             repository.Repository
+	DOConf                           *oauth2.Config
+	Project                          *models.Project
+	WhitelistedUsers                 map[uint]uint
+	ClusterControlPlaneServiceClient porterv1connect.ClusterControlPlaneServiceClient
 }
 
 // GetUsage gets a project's current usage and usage limit
@@ -133,10 +136,11 @@ func getResourceUsage(opts *GetUsageOpts, clusters []*models.Cluster) (uint, uin
 
 	for _, cluster := range clusters {
 		ooc := &kubernetes.OutOfClusterConfig{
-			Cluster:                   cluster,
-			Repo:                      opts.Repo,
-			DigitalOceanOAuth:         opts.DOConf,
-			AllowInClusterConnections: false,
+			Cluster:                     cluster,
+			Repo:                        opts.Repo,
+			DigitalOceanOAuth:           opts.DOConf,
+			AllowInClusterConnections:   false,
+			CAPIManagementClusterClient: opts.ClusterControlPlaneServiceClient,
 		}
 
 		agent, err := kubernetes.GetAgentOutOfClusterConfig(ooc)