prometheus_incoming.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. package webhook
  2. import (
  3. "context"
  4. "fmt"
  5. "net/http"
  6. "time"
  7. "connectrpc.com/connect"
  8. "google.golang.org/protobuf/types/known/timestamppb"
  9. porterv1 "github.com/porter-dev/api-contracts/generated/go/porter/v1"
  10. "github.com/porter-dev/porter/api/server/authz"
  11. "github.com/porter-dev/porter/api/server/handlers"
  12. "github.com/porter-dev/porter/api/server/shared"
  13. "github.com/porter-dev/porter/api/server/shared/apierrors"
  14. "github.com/porter-dev/porter/api/server/shared/config"
  15. "github.com/porter-dev/porter/api/server/shared/requestutils"
  16. "github.com/porter-dev/porter/api/types"
  17. "github.com/porter-dev/porter/internal/telemetry"
  18. )
  19. // PrometheusAlertWebhookHandler handles incoming prometheus alerts
  20. type PrometheusAlertWebhookHandler struct {
  21. handlers.PorterHandlerReadWriter
  22. authz.KubernetesAgentGetter
  23. }
  24. // NewPrometheusAlertWebhookHandler returns an instance of PrometheusAlertWebhookHandler
  25. func NewPrometheusAlertWebhookHandler(
  26. config *config.Config,
  27. decoderValidator shared.RequestDecoderValidator,
  28. writer shared.ResultWriter,
  29. ) *PrometheusAlertWebhookHandler {
  30. return &PrometheusAlertWebhookHandler{
  31. PorterHandlerReadWriter: handlers.NewDefaultPorterHandler(config, decoderValidator, writer),
  32. KubernetesAgentGetter: authz.NewOutOfClusterAgentGetter(config),
  33. }
  34. }
  35. func (p *PrometheusAlertWebhookHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
  36. ctx, span := telemetry.NewSpan(r.Context(), "serve-post-prometheus-alert")
  37. defer span.End()
  38. // get the webhook id from the request
  39. projectID, err := requestutils.GetURLParamUint(r, types.URLParamProjectID)
  40. if err != nil {
  41. e := telemetry.Error(ctx, span, err, "error getting project ID")
  42. p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(e, http.StatusBadRequest))
  43. return
  44. }
  45. clusterID, err := requestutils.GetURLParamUint(r, types.URLParamClusterID)
  46. if err != nil {
  47. e := telemetry.Error(ctx, span, nil, "error getting cluster ID")
  48. p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(e, http.StatusBadRequest))
  49. return
  50. }
  51. prometheusAlert := &types.PrometheusAlert{}
  52. if ok := p.DecodeAndValidate(w, r, prometheusAlert); !ok {
  53. e := telemetry.Error(ctx, span, nil, "error decoding request")
  54. p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(e, http.StatusBadRequest))
  55. return
  56. }
  57. if err := p.handlePrometheusAlert(ctx, int64(projectID), int64(clusterID), prometheusAlert); err != nil {
  58. e := telemetry.Error(ctx, span, err, "error handling prometheus alert")
  59. p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(e, http.StatusInternalServerError))
  60. return
  61. }
  62. p.WriteResult(w, r, "")
  63. }
  64. func (p *PrometheusAlertWebhookHandler) handlePrometheusAlert(ctx context.Context, projectId, clusterId int64, prometheusAlert *types.PrometheusAlert) error {
  65. ctx, span := telemetry.NewSpan(ctx, "porter-process-prom-alert")
  66. defer span.End()
  67. recordPrometheusAlertRequest := connect.NewRequest(&porterv1.RecordPrometheusAlertRequest{
  68. ProjectId: projectId,
  69. ClusterId: clusterId,
  70. })
  71. labelKeyValues := ""
  72. for _, alert := range prometheusAlert.Alerts {
  73. for k, v := range alert.Labels {
  74. labelKeyValues += fmt.Sprintf("%s %s", k, v)
  75. }
  76. if alert.Labels["alertname"] == "NoopAlert" {
  77. continue
  78. }
  79. startTime, err := time.Parse(time.RFC3339, alert.StartsAt)
  80. if err != nil {
  81. return telemetry.Error(ctx, span, err, "error parsing alert start time")
  82. }
  83. endTime, err := time.Parse(time.RFC3339, alert.EndsAt)
  84. if err != nil {
  85. return telemetry.Error(ctx, span, err, "error parsing alert end time")
  86. }
  87. var endTimestamp *timestamppb.Timestamp
  88. if endTime.After(startTime) {
  89. endTimestamp = timestamppb.New(endTime)
  90. }
  91. recordPrometheusAlertRequest.Msg.Alerts = append(recordPrometheusAlertRequest.Msg.Alerts, &porterv1.Alert{
  92. Name: alert.Labels["name"],
  93. Namespace: alert.Labels["namespace"],
  94. Type: p.getType(alert),
  95. Severity: alert.Labels["severity"],
  96. StartTime: timestamppb.New(startTime),
  97. EndTime: endTimestamp,
  98. })
  99. }
  100. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "porter-app-alert-labels", Value: labelKeyValues})
  101. _, err := p.Config().ClusterControlPlaneClient.RecordPrometheusAlert(ctx, recordPrometheusAlertRequest)
  102. if err != nil {
  103. return telemetry.Error(ctx, span, err, "error recording prometheus alert")
  104. }
  105. return nil
  106. }
  107. func (p *PrometheusAlertWebhookHandler) getType(alert types.Alert) porterv1.InvolvedObjectType {
  108. switch alert.Labels["involvedObjectType"] {
  109. case "Deployment":
  110. return porterv1.InvolvedObjectType_INVOLVED_OBJECT_TYPE_DEPLOYMENT
  111. case "StatefulSet":
  112. return porterv1.InvolvedObjectType_INVOLVED_OBJECT_TYPE_STATEFULSET
  113. case "DaemonSet":
  114. return porterv1.InvolvedObjectType_INVOLVED_OBJECT_TYPE_DAEMONSET
  115. default:
  116. return porterv1.InvolvedObjectType_INVOLVED_OBJECT_TYPE_UNSPECIFIED
  117. }
  118. }