Parcourir la source

Delete notification logic in Porter and delegate to CCP (#3973)

Feroze Mohideen il y a 2 ans
Parent
commit
08fd826040

+ 25 - 36
api/server/handlers/porter_app/create_and_update_events.go

@@ -8,7 +8,9 @@ import (
 	"strings"
 	"time"
 
+	"connectrpc.com/connect"
 	"github.com/google/uuid"
+	porterv1 "github.com/porter-dev/api-contracts/generated/go/porter/v1"
 	"github.com/porter-dev/porter/api/server/authz"
 	"github.com/porter-dev/porter/api/server/handlers"
 	"github.com/porter-dev/porter/api/server/shared"
@@ -16,8 +18,6 @@ import (
 	"github.com/porter-dev/porter/api/server/shared/config"
 	"github.com/porter-dev/porter/api/server/shared/requestutils"
 	"github.com/porter-dev/porter/api/types"
-	"github.com/porter-dev/porter/internal/deployment_target"
-	"github.com/porter-dev/porter/internal/kubernetes"
 	"github.com/porter-dev/porter/internal/models"
 	"github.com/porter-dev/porter/internal/porter_app/notifications"
 	"github.com/porter-dev/porter/internal/telemetry"
@@ -80,20 +80,7 @@ func (p *CreateUpdatePorterAppEventHandler) ServeHTTP(w http.ResponseWriter, r *
 
 	// This branch will only be hit for v2 app_event type events
 	if request.ID == "" && request.DeploymentTargetID != "" && request.Type == types.PorterAppEventType_AppEvent {
-		agent, err := p.GetAgent(r, cluster, "")
-		if err != nil {
-			err := telemetry.Error(ctx, span, err, "error getting agent")
-			p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(err, http.StatusInternalServerError))
-			return
-		}
-
-		if agent == nil {
-			err := telemetry.Error(ctx, span, nil, "agent not found")
-			p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(err, http.StatusInternalServerError))
-			return
-		}
-
-		err = p.handleNotification(ctx, request, project.ID, cluster.ID, *agent)
+		err := p.handleNotification(ctx, request, project.ID, cluster.ID)
 		if err != nil {
 			e := telemetry.Error(ctx, span, err, "error handling notification")
 			p.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(e, http.StatusInternalServerError))
@@ -624,33 +611,35 @@ func (p *CreateUpdatePorterAppEventHandler) updateDeployEventMatchingAppEventDet
 func (p *CreateUpdatePorterAppEventHandler) handleNotification(ctx context.Context,
 	request *types.CreateOrUpdatePorterAppEventRequest,
 	projectId, clusterId uint,
-	agent kubernetes.Agent,
 ) error {
 	ctx, span := telemetry.NewSpan(ctx, "serve-handle-notification")
 	defer span.End()
 
-	// get the namespace associated with the deployment target id
-	deploymentTarget, err := deployment_target.DeploymentTargetDetails(ctx, deployment_target.DeploymentTargetDetailsInput{
-		ProjectID:          int64(projectId),
-		ClusterID:          int64(clusterId),
-		DeploymentTargetID: request.DeploymentTargetID,
-		CCPClient:          p.Config().ClusterControlPlaneClient,
-	})
+	agentEventMetadata, err := notifications.ParseAgentEventMetadata(request.Metadata)
 	if err != nil {
-		return telemetry.Error(ctx, span, err, "error getting deployment target details")
-	}
-
-	inp := notifications.HandleNotificationInput{
-		RawAgentEventMetadata: request.Metadata,
-		EventRepo:             p.Repo().PorterAppEvent(),
-		DeploymentTargetID:    request.DeploymentTargetID,
-		Namespace:             deploymentTarget.Namespace,
-		K8sAgent:              agent,
-	}
+		return telemetry.Error(ctx, span, err, "failed to unmarshal app event metadata")
+	}
+	if agentEventMetadata == nil {
+		return telemetry.Error(ctx, span, nil, "app event metadata is nil")
+	}
+
+	createNotificationRequest := connect.NewRequest(&porterv1.CreateNotificationRequest{
+		ProjectId: int64(projectId),
+		ClusterId: int64(clusterId),
+		DeploymentTargetIdentifier: &porterv1.DeploymentTargetIdentifier{
+			Id: request.DeploymentTargetID,
+		},
+		AppName:            agentEventMetadata.AppName,
+		ServiceName:        agentEventMetadata.ServiceName,
+		AppRevisionId:      agentEventMetadata.AppRevisionID,
+		PorterAgentEventId: int64(agentEventMetadata.AgentEventID),
+		RawSummary:         agentEventMetadata.Summary,
+		RawDetail:          agentEventMetadata.Detail,
+	})
 
-	err = notifications.HandleNotification(ctx, inp)
+	_, err = p.Config().ClusterControlPlaneClient.CreateNotification(ctx, createNotificationRequest)
 	if err != nil {
-		return telemetry.Error(ctx, span, err, "error handling notification")
+		return telemetry.Error(ctx, span, err, "error creating notification")
 	}
 
 	return nil

+ 6 - 1
api/server/handlers/porter_app/current_app_revision.go

@@ -147,7 +147,12 @@ func (c *LatestAppRevisionHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ
 	}
 
 	appRevisionId := encodedRevision.ID
-	notificationEvents, err := c.Repo().PorterAppEvent().ReadNotificationsByAppRevisionID(ctx, appId, appRevisionId)
+	appInstanceId := encodedRevision.AppInstanceID
+	telemetry.WithAttributes(span,
+		telemetry.AttributeKV{Key: "app-revision-id", Value: appRevisionId},
+		telemetry.AttributeKV{Key: "app-instance-id", Value: appInstanceId},
+	)
+	notificationEvents, err := c.Repo().PorterAppEvent().ReadNotificationsByAppRevisionID(ctx, appInstanceId, appRevisionId)
 	if err != nil {
 		err := telemetry.Error(ctx, span, err, "error getting notifications from repo")
 		c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(err, http.StatusInternalServerError))

+ 37 - 28
dashboard/src/main/home/app-dashboard/app-view/tabs/activity-feed/events/types.ts

@@ -43,35 +43,44 @@ const porterAppPreDeployEventMetadataValidator = z.object({
   app_revision_id: z.string(),
   commit_sha: z.string().optional(),
 });
-export const porterAppNotificationEventMetadataValidator = z.object({
-  id: z.string(),
-  app_id: z.string(),
-  app_name: z.string(),
-  service_name: z.string(),
-  app_revision_id: z.string(),
-  error: z.object({
-    code: z.number(),
-    summary: z.string(),
-    detail: z.string(),
-    mitigation_steps: z.string(),
-    documentation: z.array(z.string()).default([]),
-  }),
-  timestamp: z.string(),
-  deployment: z.discriminatedUnion("status", [
-    z.object({
-      status: z.literal("PENDING"),
-    }),
-    z.object({
-      status: z.literal("SUCCESS"),
-    }),
-    z.object({
-      status: z.literal("FAILURE"),
-    }),
-    z.object({
-      status: z.literal("UNKNOWN"),
+export const porterAppNotificationEventMetadataValidator = z
+  .object({
+    id: z.string(),
+    app_id: z.string(),
+    app_name: z.string(),
+    service_name: z.string(),
+    app_revision_id: z.string(),
+    error: z.object({
+      code: z.number(),
+      summary: z.string(),
+      detail: z.string(),
+      mitigation_steps: z.string(),
+      documentation: z.array(z.string()).default([]),
     }),
-  ]),
-});
+    timestamp: z.string(),
+    deployment: z.discriminatedUnion("status", [
+      z.object({
+        status: z.literal("PENDING"),
+      }),
+      z.object({
+        status: z.literal("SUCCESS"),
+      }),
+      z.object({
+        status: z.literal("FAILURE"),
+      }),
+      z.object({
+        status: z.literal("UNKNOWN"),
+      }),
+    ]),
+  })
+  // this is necessary because the name for the pre-deploy job is called "pre-deploy" by the front-end but predeploy in k8s
+  // TODO: standardize the naming of the pre-deploy job: https://linear.app/porter/issue/POR-2119/standardize-naming-of-pre-deploy
+  .transform((obj) => {
+    if (obj.service_name === "predeploy") {
+      obj.service_name = "pre-deploy";
+    }
+    return obj;
+  });
 export type PorterAppNotification = z.infer<
   typeof porterAppNotificationEventMetadataValidator
 >;

+ 16 - 20
dashboard/src/main/home/app-dashboard/app-view/tabs/notifications/NotificationExpandedView.tsx

@@ -48,14 +48,11 @@ const NotificationExpandedView: React.FC<Props> = ({
     }
   }, [JSON.stringify(notification)]);
 
-  // this is necessary because the name for the pre-deploy job is called "pre-deploy" by the front-end but predeploy in k8s
-  // TODO: standardize the naming of the pre-deploy job
   const serviceNames = useMemo(() => {
     if (notification.service.config.type === "predeploy") {
       return ["predeploy"];
-    } else {
-      return [notification.service.name.value];
     }
+    return [notification.service.name.value];
   }, [notification.service.name.value]);
 
   return (
@@ -176,22 +173,21 @@ const NotificationExpandedView: React.FC<Props> = ({
           })}
         </StyledActivityFeed>
         <Spacer y={1} />
-        {notification.service.config.type !== "job" &&
-          notification.service.config.type !== "predeploy" && (
-            <Logs
-              projectId={projectId}
-              clusterId={clusterId}
-              appName={appName}
-              serviceNames={serviceNames}
-              deploymentTargetId={deploymentTargetId}
-              appRevisionId={notification.appRevisionId}
-              logFilterNames={["service_name"]}
-              appId={appId}
-              selectedService={serviceNames[0]}
-              selectedRevisionId={notification.appRevisionId}
-              defaultScrollToBottomEnabled={false}
-            />
-          )}
+        {notification.service.config.type !== "job" && (
+          <Logs
+            projectId={projectId}
+            clusterId={clusterId}
+            appName={appName}
+            serviceNames={serviceNames}
+            deploymentTargetId={deploymentTargetId}
+            appRevisionId={notification.appRevisionId}
+            logFilterNames={["service_name"]}
+            appId={appId}
+            selectedService={serviceNames[0]}
+            selectedRevisionId={notification.appRevisionId}
+            defaultScrollToBottomEnabled={false}
+          />
+        )}
       </ExpandedViewContent>
       {/* uncomment below once we implement recommended actions */}
       {/* <ExpandedViewFooter>

+ 1 - 1
go.mod

@@ -83,7 +83,7 @@ require (
 	github.com/matryer/is v1.4.0
 	github.com/nats-io/nats.go v1.24.0
 	github.com/open-policy-agent/opa v0.44.0
-	github.com/porter-dev/api-contracts v0.2.43
+	github.com/porter-dev/api-contracts v0.2.44
 	github.com/riandyrn/otelchi v0.5.1
 	github.com/santhosh-tekuri/jsonschema/v5 v5.0.1
 	github.com/stefanmcshane/helm v0.0.0-20221213002717-88a4a2c6e77d

+ 2 - 2
go.sum

@@ -1520,8 +1520,8 @@ github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/polyfloyd/go-errorlint v0.0.0-20210722154253-910bb7978349/go.mod h1:wi9BfjxjF/bwiZ701TzmfKu6UKC357IOAtNr0Td0Lvw=
-github.com/porter-dev/api-contracts v0.2.43 h1:y0EhZzIK003/EhsSPsyrMpvbjsxYogV+WmYeUDIix2U=
-github.com/porter-dev/api-contracts v0.2.43/go.mod h1:fX6JmP5QuzxDLvqP3evFOTXjI4dHxsG0+VKNTjImZU8=
+github.com/porter-dev/api-contracts v0.2.44 h1:dDi2Tb41KxMz6pC8XwI3FkMqtc2ubBYVscA4s0ZlcK8=
+github.com/porter-dev/api-contracts v0.2.44/go.mod h1:fX6JmP5QuzxDLvqP3evFOTXjI4dHxsG0+VKNTjImZU8=
 github.com/porter-dev/switchboard v0.0.3 h1:dBuYkiVLa5Ce7059d6qTe9a1C2XEORFEanhbtV92R+M=
 github.com/porter-dev/switchboard v0.0.3/go.mod h1:xSPzqSFMQ6OSbp42fhCi4AbGbQbsm6nRvOkrblFeXU4=
 github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=

+ 2 - 296
internal/porter_app/notifications/app_event.go

@@ -1,35 +1,9 @@
 package notifications
 
 import (
-	"context"
 	"encoding/json"
-	"strconv"
-	"time"
 
-	"github.com/google/uuid"
 	"github.com/porter-dev/porter/internal/models"
-	"github.com/porter-dev/porter/internal/repository"
-	"github.com/porter-dev/porter/internal/telemetry"
-)
-
-// PorterAppEventType_Notification is the type of a Porter App Event that is a notification
-const PorterAppEventType_Notification = "NOTIFICATION"
-
-// PorterAppEventType_Deploy is the type of a Porter App Event that is a deploy event
-const PorterAppEventType_Deploy = "DEPLOY"
-
-// PorterAppEventStatus is an alias for a string that represents a Porter App Event Status
-type PorterAppEventStatus string
-
-const (
-	// PorterAppEventStatus_Success represents a Porter App Event that was successful
-	PorterAppEventStatus_Success PorterAppEventStatus = "SUCCESS"
-	// PorterAppEventStatus_Failed represents a Porter App Event that failed
-	PorterAppEventStatus_Failed PorterAppEventStatus = "FAILED"
-	// PorterAppEventStatus_Progressing represents a Porter App Event that is in progress
-	PorterAppEventStatus_Progressing PorterAppEventStatus = "PROGRESSING"
-	// PorterAppEventStatus_Canceled represents a Porter App Event that has been canceled
-	PorterAppEventStatus_Canceled PorterAppEventStatus = "CANCELED"
 )
 
 // AppEventMetadata is the metadata for an app event
@@ -56,18 +30,8 @@ type AppEventMetadata struct {
 	Detail string `json:"detail"`
 }
 
-// ServiceDeploymentMetadata contains information about a service when it deploys, stored in the deploy event
-type ServiceDeploymentMetadata struct {
-	// Status is the status of the service deployment
-	Status PorterAppEventStatus `json:"status"`
-	// ExternalURI is the external URI of a service (if it is web)
-	ExternalURI string `json:"external_uri"`
-	// Type is the type of the service - one of web, worker, or job
-	Type string `json:"type"`
-}
-
-// parseAgentEventMetadata parses raw app event metadata to a AppEventMetadata struct
-func parseAgentEventMetadata(metadata map[string]interface{}) (*AppEventMetadata, error) {
+// ParseAgentEventMetadata parses raw app event metadata to a AppEventMetadata struct
+func ParseAgentEventMetadata(metadata map[string]interface{}) (*AppEventMetadata, error) {
 	appEventMetadata := &AppEventMetadata{}
 
 	bytes, err := json.Marshal(metadata)
@@ -82,264 +46,6 @@ func parseAgentEventMetadata(metadata map[string]interface{}) (*AppEventMetadata
 	return appEventMetadata, nil
 }
 
-// isNotificationDuplicate checks if another app event exists in the db with the same agent event id
-func isNotificationDuplicate(
-	ctx context.Context,
-	notification Notification,
-	eventRepo repository.PorterAppEventRepository,
-	deploymentTargetID string,
-) (bool, error) {
-	ctx, span := telemetry.NewSpan(ctx, "is-notification-duplicate")
-	defer span.End()
-
-	deploymentTargetUUID, err := uuid.Parse(deploymentTargetID)
-	if err != nil {
-		return false, telemetry.Error(ctx, span, err, "error parsing deployment target id")
-	}
-	if deploymentTargetUUID == uuid.Nil {
-		return false, telemetry.Error(ctx, span, nil, "deployment target id cannot be nil")
-	}
-
-	appIdInt, err := strconv.Atoi(notification.AppID)
-	if err != nil {
-		return false, telemetry.Error(ctx, span, err, "error converting app id to int")
-	}
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "app-id", Value: notification.AppID},
-		telemetry.AttributeKV{Key: "app-name", Value: notification.AppName},
-		telemetry.AttributeKV{Key: "app-revision-id", Value: notification.AppRevisionID},
-		telemetry.AttributeKV{Key: "agent-event-id", Value: notification.AgentEventID},
-		telemetry.AttributeKV{Key: "service-name", Value: notification.ServiceName},
-	)
-
-	existingEvents, _, err := eventRepo.ListEventsByPorterAppIDAndDeploymentTargetID(ctx, uint(appIdInt), deploymentTargetUUID)
-	if err != nil {
-		return false, telemetry.Error(ctx, span, err, "error listing porter app events for event type with deployment target id")
-	}
-
-	for _, existingEvent := range existingEvents {
-		if existingEvent != nil && existingEvent.Type == PorterAppEventType_Notification {
-			existingNotification, err := NotificationFromPorterAppEvent(existingEvent)
-			if err != nil {
-				continue
-			}
-			if existingNotification.AgentEventID == 0 {
-				continue
-			}
-			if existingNotification.AgentEventID == notification.AgentEventID {
-				return true, nil
-			}
-		}
-	}
-
-	return false, nil
-}
-
-// updateDeployEventInput is the input to updateDeployEvent
-type updateDeployEventInput struct {
-	Notification
-	EventRepo repository.PorterAppEventRepository
-	Status    PorterAppEventStatus
-}
-
-// updateDeployEvent updates the service status of a deploy event and possibly the event status itself with the input status
-// TODO: simplify this logic after https://linear.app/porter/issue/POR-2101/turn-servicedeploymentmetadata-from-a-map-into-a-list-in-ccp
-func updateDeployEvent(ctx context.Context, inp updateDeployEventInput) error {
-	ctx, span := telemetry.NewSpan(ctx, "update-matching-deploy-event")
-	defer span.End()
-
-	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "matching-k8s-deployment-status", Value: inp.Deployment.Status})
-
-	appID, err := strconv.Atoi(inp.Notification.AppID)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error converting app id to int")
-	}
-
-	matchEvent, err := inp.EventRepo.ReadDeployEventByAppRevisionID(ctx, uint(appID), inp.Notification.AppRevisionID)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error finding matching deploy event")
-	}
-	if matchEvent.ID == uuid.Nil {
-		return telemetry.Error(ctx, span, nil, "no matching deploy event found")
-	}
-	if matchEvent.Status != string(PorterAppEventStatus_Progressing) {
-		return nil // nothing to update here
-	}
-
-	serviceStatus, ok := matchEvent.Metadata["service_deployment_metadata"]
-	if !ok {
-		return telemetry.Error(ctx, span, nil, "service deployment metadata not found in deploy event metadata")
-	}
-	serviceDeploymentGenericMap, ok := serviceStatus.(map[string]interface{})
-	if !ok {
-		return telemetry.Error(ctx, span, nil, "service deployment metadata is not correct type")
-	}
-	serviceDeploymentMap := make(map[string]ServiceDeploymentMetadata)
-	for k, v := range serviceDeploymentGenericMap {
-		by, err := json.Marshal(v)
-		if err != nil {
-			return telemetry.Error(ctx, span, nil, "unable to marshal service deployment metadata")
-		}
-
-		var serviceDeploymentMetadata ServiceDeploymentMetadata
-		err = json.Unmarshal(by, &serviceDeploymentMetadata)
-		if err != nil {
-			return telemetry.Error(ctx, span, nil, "unable to unmarshal service deployment metadata")
-		}
-		serviceDeploymentMap[k] = serviceDeploymentMetadata
-	}
-	serviceDeploymentMetadata, ok := serviceDeploymentMap[inp.Notification.ServiceName]
-	if !ok {
-		return telemetry.Error(ctx, span, nil, "deployment metadata not found for service")
-	}
-
-	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "existing-status", Value: string(serviceDeploymentMetadata.Status)})
-
-	if serviceDeploymentMetadata.Status != PorterAppEventStatus_Progressing {
-		return nil // nothing to update here
-	}
-	// update the map with the new status
-	serviceDeploymentMetadata.Status = inp.Status
-	serviceDeploymentMap[inp.Notification.ServiceName] = serviceDeploymentMetadata
-
-	// update the deploy event with new map and status if all services are done
-	matchEvent.Metadata["service_deployment_metadata"] = serviceDeploymentMap
-	allServicesDone := true
-	anyServicesFailed := false
-	for _, deploymentMetadata := range serviceDeploymentMap {
-		if deploymentMetadata.Status == PorterAppEventStatus_Progressing {
-			allServicesDone = false
-			break
-		}
-		if deploymentMetadata.Status == PorterAppEventStatus_Failed {
-			anyServicesFailed = true
-		}
-	}
-	if allServicesDone {
-		matchEvent.Metadata["end_time"] = time.Now().UTC()
-		if anyServicesFailed {
-			matchEvent.Status = string(PorterAppEventStatus_Failed)
-		} else {
-			matchEvent.Status = string(PorterAppEventStatus_Success)
-		}
-	}
-
-	err = inp.EventRepo.UpdateEvent(ctx, &matchEvent)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error updating deploy event")
-	}
-
-	return nil
-}
-
-// serviceDeploymentMetadataFromDeployEvent returns the serviceDeploymentMetadata of a service from a deploy event
-// TODO: simplify this logic after https://linear.app/porter/issue/POR-2101/turn-servicedeploymentmetadata-from-a-map-into-a-list-in-ccp
-func serviceDeploymentMetadataFromDeployEvent(ctx context.Context, deployEvent models.PorterAppEvent, serviceName string) (ServiceDeploymentMetadata, error) {
-	ctx, span := telemetry.NewSpan(ctx, "service-deployment-metadata-from-deploy-event")
-	defer span.End()
-
-	serviceDeploymentMetadata := ServiceDeploymentMetadata{}
-
-	if deployEvent.ID == uuid.Nil {
-		return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "deploy event id cannot be nil")
-	}
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "app-id", Value: deployEvent.PorterAppID},
-		telemetry.AttributeKV{Key: "event-id", Value: deployEvent.ID},
-		telemetry.AttributeKV{Key: "event-type", Value: deployEvent.Type},
-		telemetry.AttributeKV{Key: "event-status", Value: deployEvent.Status},
-		telemetry.AttributeKV{Key: "service-name", Value: serviceName},
-	)
-
-	if deployEvent.Type != string(PorterAppEventType_Deploy) {
-		return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "event is not a deploy event")
-	}
-
-	serviceStatus, ok := deployEvent.Metadata["service_deployment_metadata"]
-	if !ok {
-		return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "service deployment metadata not found in deploy event metadata")
-	}
-	serviceDeploymentGenericMap, ok := serviceStatus.(map[string]interface{})
-	if !ok {
-		return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "service deployment metadata is not correct type")
-	}
-	serviceDeploymentMap := make(map[string]ServiceDeploymentMetadata)
-	for k, v := range serviceDeploymentGenericMap {
-		by, err := json.Marshal(v)
-		if err != nil {
-			return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "unable to marshal service deployment metadata")
-		}
-
-		var serviceDeploymentMetadata ServiceDeploymentMetadata
-		err = json.Unmarshal(by, &serviceDeploymentMetadata)
-		if err != nil {
-			return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "unable to unmarshal service deployment metadata")
-		}
-		serviceDeploymentMap[k] = serviceDeploymentMetadata
-	}
-	serviceDeploymentMetadata, ok = serviceDeploymentMap[serviceName]
-	if !ok {
-		return serviceDeploymentMetadata, telemetry.Error(ctx, span, nil, "deployment metadata not found for service")
-	}
-	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "status", Value: string(serviceDeploymentMetadata.Status)})
-
-	return serviceDeploymentMetadata, nil
-}
-
-// saveNotification saves a notification to the db
-// TODO: save the notification in its own table rather than co-opting the porter app events table
-func saveNotification(ctx context.Context, notification Notification, eventRepo repository.PorterAppEventRepository, deploymentTargetID string) error {
-	ctx, span := telemetry.NewSpan(ctx, "save-notification")
-	defer span.End()
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "app-id", Value: notification.AppID},
-		telemetry.AttributeKV{Key: "app-name", Value: notification.AppName},
-		telemetry.AttributeKV{Key: "app-revision-id", Value: notification.AppRevisionID},
-		telemetry.AttributeKV{Key: "agent-event-id", Value: notification.AgentEventID},
-		telemetry.AttributeKV{Key: "service-name", Value: notification.ServiceName},
-		telemetry.AttributeKV{Key: "deployment-target-id", Value: deploymentTargetID},
-	)
-
-	appID, err := strconv.Atoi(notification.AppID)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error converting app id to int")
-	}
-
-	deploymentTargetUUID, err := uuid.Parse(deploymentTargetID)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error parsing deployment target id")
-	}
-	if deploymentTargetUUID == uuid.Nil {
-		return telemetry.Error(ctx, span, err, "deployment target id cannot be nil")
-	}
-
-	notificationMap := make(map[string]any)
-	bytes, err := json.Marshal(notification)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error marshaling notification")
-	}
-	err = json.Unmarshal(bytes, &notificationMap)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error unmarshaling notification")
-	}
-
-	err = eventRepo.CreateEvent(ctx, &models.PorterAppEvent{
-		ID:                 uuid.New(),
-		Type:               string(PorterAppEventType_Notification),
-		PorterAppID:        uint(appID),
-		DeploymentTargetID: deploymentTargetUUID,
-		Metadata:           notificationMap,
-	})
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "error creating porter app event")
-	}
-
-	return nil
-}
-
 // NotificationFromPorterAppEvent converts a PorterAppEvent to a Notification
 func NotificationFromPorterAppEvent(appEvent *models.PorterAppEvent) (*Notification, error) {
 	notification := &Notification{}

+ 0 - 220
internal/porter_app/notifications/deployment.go

@@ -1,220 +0,0 @@
-package notifications
-
-import (
-	"context"
-	"fmt"
-	"strconv"
-	"strings"
-
-	"github.com/porter-dev/porter/internal/kubernetes"
-	"github.com/porter-dev/porter/internal/porter_app/notifications/porter_error"
-	"github.com/porter-dev/porter/internal/repository"
-	"github.com/porter-dev/porter/internal/telemetry"
-	v1 "k8s.io/api/apps/v1"
-)
-
-// Deployment represents metadata about a k8s deployment
-type Deployment struct {
-	Status DeploymentStatus `json:"status"`
-}
-
-// DeploymentStatus represents the status of a k8s deployment
-type DeploymentStatus string
-
-const (
-	// DeploymentStatus_Unknown indicates that the status of the deployment is unknown because we have not queried for it yet
-	DeploymentStatus_Unknown DeploymentStatus = "UNKNOWN"
-	// DeploymentStatus_Pending indicates that the deployment is still in progress
-	DeploymentStatus_Pending DeploymentStatus = "PENDING"
-	// DeploymentStatus_Success indicates that the deployment was successful
-	DeploymentStatus_Success DeploymentStatus = "SUCCESS"
-	// DeploymentStatus_Failure indicates that the deployment failed
-	DeploymentStatus_Failure DeploymentStatus = "FAILURE"
-)
-
-// hydrateNotificationWithDeploymentInput is the input struct for hydrateNotificationWithDeployment
-type hydrateNotificationWithDeploymentInput struct {
-	// Notification is the notification to hydrate
-	Notification
-	// DeploymentTargetId is the ID of the deployment target
-	DeploymentTargetId string
-	// Namespace is the namespace of the deployment target
-	Namespace string
-	// K8sAgent is the k8s agent, used to query for deployment info
-	K8sAgent kubernetes.Agent
-	// EventRepo is the repository for app events, used to check if we've already marked this deployment as successful/failed
-	EventRepo repository.PorterAppEventRepository
-}
-
-// hydrateNotificationWithDeployment hydrates a notification with k8s deployment info
-func hydrateNotificationWithDeployment(ctx context.Context, inp hydrateNotificationWithDeploymentInput) (Notification, error) {
-	ctx, span := telemetry.NewSpan(ctx, "hydrate-notification-with-deployment")
-	defer span.End()
-
-	hydratedNotification := inp.Notification
-
-	if inp.Notification.Deployment.Status != DeploymentStatus_Unknown {
-		return hydratedNotification, nil
-	}
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "deployment-target-id", Value: inp.DeploymentTargetId},
-		telemetry.AttributeKV{Key: "namespace", Value: inp.Namespace},
-		telemetry.AttributeKV{Key: "app-name", Value: inp.AppName},
-		telemetry.AttributeKV{Key: "app-revision-id", Value: inp.Notification.AppRevisionID},
-		telemetry.AttributeKV{Key: "service-name", Value: inp.ServiceName},
-	)
-
-	// first, we check if we've already marked this deployment as successful or failed
-	status, err := porterAppDeployEventStatus(ctx, porterAppDeployEventStatusInput{
-		AppID:         inp.AppID,
-		EventRepo:     inp.EventRepo,
-		AppRevisionID: inp.Notification.AppRevisionID,
-		ServiceName:   inp.Notification.ServiceName,
-	})
-	if err != nil {
-		err := telemetry.Error(ctx, span, err, "failed to get deployment status from db")
-		return hydratedNotification, err
-	}
-
-	// the status is still pending in the db, so we haven't updated the user on it yet
-	// therefore, we check the k8s deployment status
-	if status == DeploymentStatus_Pending {
-		selectors := []string{
-			fmt.Sprintf("porter.run/deployment-target-id=%s", inp.DeploymentTargetId),
-			fmt.Sprintf("porter.run/app-name=%s", inp.AppName),
-			fmt.Sprintf("porter.run/app-revision-id=%s", inp.Notification.AppRevisionID),
-			fmt.Sprintf("porter.run/service-name=%s", inp.ServiceName),
-		}
-		depls, err := inp.K8sAgent.GetDeploymentsBySelector(ctx, inp.Namespace, strings.Join(selectors, ","))
-		if err != nil {
-			err := telemetry.Error(ctx, span, err, "failed to get deployments for notification")
-			return hydratedNotification, err
-		}
-		if len(depls.Items) == 0 {
-			err := telemetry.Error(ctx, span, nil, "no deployments found for notification")
-			return hydratedNotification, err
-		}
-		if len(depls.Items) > 1 {
-			err := telemetry.Error(ctx, span, nil, "multiple deployments found for notification")
-			return hydratedNotification, err
-		}
-
-		matchingDeployment := depls.Items[0]
-		telemetry.WithAttributes(span,
-			telemetry.AttributeKV{Key: "deployment-name", Value: matchingDeployment.Name},
-			telemetry.AttributeKV{Key: "deployment-uid", Value: matchingDeployment.ObjectMeta.UID},
-			telemetry.AttributeKV{Key: "deployment-creation-timestamp", Value: matchingDeployment.ObjectMeta.CreationTimestamp},
-		)
-		status = k8sDeploymentStatus(matchingDeployment)
-	}
-
-	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "deployment-status", Value: status})
-	if status == DeploymentStatus_Unknown {
-		err := telemetry.Error(ctx, span, nil, "unable to determine status of deployment")
-		return hydratedNotification, err
-	}
-
-	hydratedNotification.Deployment = Deployment{
-		Status: status,
-	}
-
-	return hydratedNotification, nil
-}
-
-// porterAppDeployEventStatusInput is the input struct for porterAppDeployEventStatus
-type porterAppDeployEventStatusInput struct {
-	// AppID is the ID of the app
-	AppID string
-	// EventRepo is the repository for app events, used to check if we've already marked this deployment as successful/failed
-	EventRepo repository.PorterAppEventRepository
-	// AppRevisionID is the ID of the app revision
-	AppRevisionID string
-	// ServiceName is the name of the service
-	ServiceName string
-}
-
-// porterAppDeployEventStatus returns the status of a deploy event from the app events repository
-func porterAppDeployEventStatus(ctx context.Context, inp porterAppDeployEventStatusInput) (DeploymentStatus, error) {
-	ctx, span := telemetry.NewSpan(ctx, "db-deploy-event-status")
-	defer span.End()
-
-	deploymentStatus := DeploymentStatus_Unknown
-
-	appIdInt, err := strconv.Atoi(inp.AppID)
-	if err != nil {
-		return deploymentStatus, telemetry.Error(ctx, span, err, "failed to convert app id to int")
-	}
-	matchingDeployEvent, err := inp.EventRepo.ReadDeployEventByAppRevisionID(ctx, uint(appIdInt), inp.AppRevisionID)
-	if err != nil {
-		return deploymentStatus, telemetry.Error(ctx, span, err, "failed to read deploy event by app revision id")
-	}
-
-	serviceDeploymentMetadata, err := serviceDeploymentMetadataFromDeployEvent(ctx, matchingDeployEvent, inp.ServiceName)
-	if err != nil {
-		return deploymentStatus, telemetry.Error(ctx, span, err, "failed to get service deployment metadata from deploy event")
-	}
-
-	switch serviceDeploymentMetadata.Status {
-	case PorterAppEventStatus_Success:
-		deploymentStatus = DeploymentStatus_Success
-	case PorterAppEventStatus_Failed:
-		deploymentStatus = DeploymentStatus_Failure
-	case PorterAppEventStatus_Progressing:
-		deploymentStatus = DeploymentStatus_Pending
-	default:
-		deploymentStatus = DeploymentStatus_Unknown
-	}
-
-	telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "deployment-status", Value: string(deploymentStatus)})
-
-	return deploymentStatus, nil
-}
-
-// k8sDeploymentStatus returns the status of a k8s deployment
-func k8sDeploymentStatus(depl v1.Deployment) DeploymentStatus {
-	deploymentStatus := DeploymentStatus_Unknown
-
-	if depl.Status.Replicas == depl.Status.ReadyReplicas &&
-		depl.Status.Replicas == depl.Status.AvailableReplicas &&
-		depl.Status.Replicas == depl.Status.UpdatedReplicas {
-		deploymentStatus = DeploymentStatus_Success
-	} else {
-		for _, condition := range depl.Status.Conditions {
-			if condition.Type == "Progressing" {
-				if condition.Status == "False" && condition.Reason == "ProgressDeadlineExceeded" {
-					deploymentStatus = DeploymentStatus_Failure
-					break
-				} else {
-					deploymentStatus = DeploymentStatus_Pending
-				}
-			}
-		}
-	}
-
-	return deploymentStatus
-}
-
-var fatalDeploymentErrorCodes = []porter_error.PorterErrorCode{
-	porter_error.PorterErrorCode_NonZeroExitCode,
-	porter_error.PorterErrorCode_NonZeroExitCode_InvalidStartCommand,
-	porter_error.PorterErrorCode_NonZeroExitCode_CommonIssues,
-	porter_error.PorterErrorCode_ReadinessHealthCheck,
-	porter_error.PorterErrorCode_LivenessHealthCheck,
-	porter_error.PorterErrorCode_InvalidImageError,
-	porter_error.PorterErrorCode_RestartedDueToError,
-	porter_error.PorterErrorCode_MemoryLimitExceeded_ScaleUp,
-	porter_error.PorterErrorCode_CPULimitExceeded_ScaleUp,
-	porter_error.PorterErrorCode_CannotBeScheduled,
-}
-
-// errorCodeIndicatesDeploymentFailure returns true if the error code indicates that the deployment will eventually time out and fail
-// we use this to report deployment failure to the user early, rather than waiting for the deployment to time out
-func errorCodeIndicatesDeploymentFailure(errorCode porter_error.PorterErrorCode) bool {
-	for _, fatalErrorCode := range fatalDeploymentErrorCodes {
-		if errorCode == fatalErrorCode {
-			return true
-		}
-	}
-	return false
-}

+ 34 - 125
internal/porter_app/notifications/notification.go

@@ -1,113 +1,11 @@
 package notifications
 
 import (
-	"context"
-	"strings"
 	"time"
 
 	"github.com/google/uuid"
-	"github.com/porter-dev/porter/internal/kubernetes"
-	"github.com/porter-dev/porter/internal/porter_app/notifications/porter_error"
-	"github.com/porter-dev/porter/internal/repository"
-	"github.com/porter-dev/porter/internal/telemetry"
 )
 
-// HandleNotificationInput is the input to HandleNotification
-type HandleNotificationInput struct {
-	// RawAgentEventMetadata is the raw metadata from the agent event
-	RawAgentEventMetadata map[string]any
-	// EventRepo is the repository for app events
-	EventRepo repository.PorterAppEventRepository
-	// DeploymentTargetID is the ID of the deployment target
-	DeploymentTargetID string
-	// Namespace is the namespace of the deployment target
-	Namespace string
-	// K8sAgent is the k8s agent, used to query for deployment info
-	K8sAgent kubernetes.Agent
-}
-
-// HandleNotification handles the logic for processing agent events
-func HandleNotification(ctx context.Context, inp HandleNotificationInput) error {
-	ctx, span := telemetry.NewSpan(ctx, "internal-handle-notification")
-	defer span.End()
-
-	// 1. parse agent event
-	agentEventMetadata, err := parseAgentEventMetadata(inp.RawAgentEventMetadata)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "failed to unmarshal app event metadata")
-	}
-	if agentEventMetadata == nil {
-		return telemetry.Error(ctx, span, nil, "app event metadata is nil")
-	}
-
-	// 2. convert agent event to notification
-	hydratedNotification := agentEventToNotification(*agentEventMetadata)
-
-	// 3. dedupe notification
-	isDuplicate, err := isNotificationDuplicate(ctx, hydratedNotification, inp.EventRepo, inp.DeploymentTargetID)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "failed to check if app event is duplicate")
-	}
-	if isDuplicate {
-		telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "is-duplicate", Value: true})
-		return nil
-	}
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "app-id", Value: hydratedNotification.AppID},
-		telemetry.AttributeKV{Key: "app-name", Value: hydratedNotification.AppName},
-		telemetry.AttributeKV{Key: "service-name", Value: hydratedNotification.ServiceName},
-		telemetry.AttributeKV{Key: "app-revision-id", Value: hydratedNotification.AppRevisionID},
-		telemetry.AttributeKV{Key: "agent-event-id", Value: hydratedNotification.AgentEventID},
-		telemetry.AttributeKV{Key: "agent-detail", Value: hydratedNotification.AgentDetail},
-		telemetry.AttributeKV{Key: "agent-summary", Value: hydratedNotification.AgentSummary},
-	)
-
-	if !strings.Contains(hydratedNotification.AgentSummary, "job run") {
-		// 4. hydrate notification with k8s deployment info, only if this isn't a job run
-		hydratedNotification, err = hydrateNotificationWithDeployment(ctx, hydrateNotificationWithDeploymentInput{
-			Notification:       hydratedNotification,
-			DeploymentTargetId: inp.DeploymentTargetID,
-			Namespace:          inp.Namespace,
-			K8sAgent:           inp.K8sAgent,
-			EventRepo:          inp.EventRepo,
-		})
-		if err != nil {
-			return telemetry.Error(ctx, span, err, "failed to hydrate notification with deployment")
-		}
-	}
-
-	// 5. hydrate notification with a Porter error containing user-facing details
-	hydratedNotification = hydrateNotificationWithError(ctx, hydratedNotification)
-
-	// if we can ignore this error, then we don't need to save it
-	if hydratedNotification.Error.Code == porter_error.PorterErrorCode_Ignorable {
-		return nil
-	}
-
-	// 6. based on notification + k8s deployment, update the status of the matching deploy event
-	if hydratedNotification.Deployment.Status == DeploymentStatus_Failure ||
-		(hydratedNotification.Deployment.Status == DeploymentStatus_Pending &&
-			errorCodeIndicatesDeploymentFailure(hydratedNotification.Error.Code)) {
-		err = updateDeployEvent(ctx, updateDeployEventInput{
-			Notification: hydratedNotification,
-			EventRepo:    inp.EventRepo,
-			Status:       PorterAppEventStatus_Failed,
-		})
-		if err != nil {
-			return telemetry.Error(ctx, span, err, "failed to update deploy event matching notification")
-		}
-	}
-
-	// 7. save notification to db
-	err = saveNotification(ctx, hydratedNotification, inp.EventRepo, inp.DeploymentTargetID)
-	if err != nil {
-		return telemetry.Error(ctx, span, err, "failed to save notification")
-	}
-
-	return nil
-}
-
 // Notification is a struct that contains all actionable information from an app event
 type Notification struct {
 	// AppID is the ID of the app
@@ -125,7 +23,7 @@ type Notification struct {
 	// AgentSummary is the raw summary of the agent event
 	AgentSummary string `json:"agent_summary"`
 	// Error is the Porter error parsed from the agent event
-	Error porter_error.PorterError `json:"error"`
+	Error PorterError `json:"error"`
 	// Deployment is the deployment metadata, used to determine if the notification occurred during deployment or after
 	Deployment Deployment `json:"deployment"`
 	// Timestamp is the time that the notification was created
@@ -134,27 +32,38 @@ type Notification struct {
 	ID uuid.UUID `json:"id"`
 }
 
-// agentEventToNotification converts an app event to a notification
-func agentEventToNotification(appEventMetadata AppEventMetadata) Notification {
-	// There is a discrepancy between the predeploy naming; the front-end calls it "pre-deploy", but the job name is "predeploy"
-	// This is a hack to make sure that the front-end can still parse the notification
-	// TODO: rename the job to pre-deploy on the backend to match the front-end UI representation
-	serviceName := appEventMetadata.ServiceName
-	if serviceName == "predeploy" {
-		serviceName = "pre-deploy"
-	}
+// PorterError is the translation of a generic error from the agent into an actionable error for the user
+type PorterError struct {
+	// Code is the error code that can be used to determine the type of error
+	Code PorterErrorCode `json:"code"`
+	// Summary is a short description of the error
+	Summary string `json:"summary"`
+	// Detail is a longer description of the error
+	Detail string `json:"detail"`
+	// MitigationSteps are the steps that can be taken to resolve the error
+	MitigationSteps string `json:"mitigation_steps"`
+	// Documentation is a list of links to documentation that can be used to resolve the error
+	Documentation []string `json:"documentation"`
+}
+
+// PorterErrorCode is the error code that can be used to determine the type of error
+type PorterErrorCode int
 
-	notification := Notification{
-		AppID:         appEventMetadata.AppID,
-		AppName:       appEventMetadata.AppName,
-		ServiceName:   serviceName,
-		AgentEventID:  appEventMetadata.AgentEventID,
-		AgentDetail:   appEventMetadata.Detail,
-		AgentSummary:  appEventMetadata.Summary,
-		AppRevisionID: appEventMetadata.AppRevisionID,
-		Deployment:    Deployment{Status: DeploymentStatus_Unknown},
-		Timestamp:     time.Now().UTC(),
-		ID:            uuid.New(),
-	}
-	return notification
+// Deployment represents metadata about a k8s deployment
+type Deployment struct {
+	Status DeploymentStatus `json:"status"`
 }
+
+// DeploymentStatus represents the status of a k8s deployment
+type DeploymentStatus string
+
+const (
+	// DeploymentStatus_Unknown indicates that the status of the deployment is unknown because we have not queried for it yet
+	DeploymentStatus_Unknown DeploymentStatus = "UNKNOWN"
+	// DeploymentStatus_Pending indicates that the deployment is still in progress
+	DeploymentStatus_Pending DeploymentStatus = "PENDING"
+	// DeploymentStatus_Success indicates that the deployment was successful
+	DeploymentStatus_Success DeploymentStatus = "SUCCESS"
+	// DeploymentStatus_Failure indicates that the deployment failed
+	DeploymentStatus_Failure DeploymentStatus = "FAILURE"
+)

+ 0 - 131
internal/porter_app/notifications/porter_error/codes.go

@@ -1,131 +0,0 @@
-package porter_error
-
-import (
-	"regexp"
-	"strings"
-)
-
-// PorterError is the translation of a generic error from the agent into an actionable error for the user
-type PorterError struct {
-	// Code is the error code that can be used to determine the type of error
-	Code PorterErrorCode `json:"code"`
-	// Summary is a short description of the error
-	Summary string `json:"summary"`
-	// Detail is a longer description of the error
-	Detail string `json:"detail"`
-	// MitigationSteps are the steps that can be taken to resolve the error
-	MitigationSteps string `json:"mitigation_steps"`
-	// Documentation is a list of links to documentation that can be used to resolve the error
-	Documentation []string `json:"documentation"`
-}
-
-// PorterErrorCode is the error code that can be used to determine the type of error
-type PorterErrorCode int
-
-const (
-	// PorterErrorCode_Unknown is the default error code
-	PorterErrorCode_Unknown PorterErrorCode = 0
-	// PorterErrorCode_Ignorable is the error code for an ignorable error
-	PorterErrorCode_Ignorable PorterErrorCode = 1
-	// PorterErrorCode_NonZeroExitCode is the error code for a generic non-zero exit code
-	PorterErrorCode_NonZeroExitCode PorterErrorCode = 10
-	// PorterErrorCode_NonZeroExitCode_SIGKILL is the error code for a non-zero exit code due to a SIGKILL
-	PorterErrorCode_NonZeroExitCode_SIGKILL PorterErrorCode = 11
-	// PorterErrorCode_NonZeroExitCode_InvalidStartCommand is the error code for a non-zero exit code due to an invalid start command
-	PorterErrorCode_NonZeroExitCode_InvalidStartCommand PorterErrorCode = 12
-	// PorterErrorCode_NonZeroExitCode_CommonIssues is the error code for a non-zero exit code due to common issues
-	PorterErrorCode_NonZeroExitCode_CommonIssues PorterErrorCode = 13
-	// PorterErrorCode_LivenessHealthCheck is the error code for a failed liveness health check
-	PorterErrorCode_LivenessHealthCheck PorterErrorCode = 20
-	// PorterErrorCode_ReadinessHealthCheck is the error code for a failed readiness health check
-	PorterErrorCode_ReadinessHealthCheck PorterErrorCode = 30
-	// PorterErrorCode_RestartedDueToError is the error code for a restart due to an error
-	PorterErrorCode_RestartedDueToError PorterErrorCode = 40
-	// PorterErrorCode_InvalidImageError is the error code for an invalid image
-	PorterErrorCode_InvalidImageError PorterErrorCode = 50
-	// PorterErrorCode_MemoryLimitExceeded is the error code for a memory limit exceeded
-	PorterErrorCode_MemoryLimitExceeded PorterErrorCode = 60
-	// PorterErrorCode_MemoryLimitExceeded_ScaleUp is the error code for a memory limit exceeded when scaling up
-	PorterErrorCode_MemoryLimitExceeded_ScaleUp PorterErrorCode = 61
-	// PorterErrorCode_CPULimitExceeded is the error code for a CPU limit exceeded
-	PorterErrorCode_CPULimitExceeded PorterErrorCode = 70
-	// PorterErrorCode_CPULimitExceeded_ScaleUp is the error code for a CPU limit exceeded when scaling up
-	PorterErrorCode_CPULimitExceeded_ScaleUp PorterErrorCode = 71
-	// PorterErrorCode_CannotBeScheduled is the error code for a pod that cannot be scheduled
-	PorterErrorCode_CannotBeScheduled PorterErrorCode = 80
-)
-
-// ErrorCode parses the agent summary and possibly the detail (if it needs supplemental info) to return a standard Porter error code
-func ErrorCode(agentSummary, agentDetail string) PorterErrorCode {
-	errorCode := PorterErrorCode_Unknown
-
-	if strings.Contains(agentSummary, "non-zero exit code") {
-		return nonZeroExitCodeErrorCode(agentDetail)
-	}
-
-	if strings.Contains(agentSummary, "liveness health check") {
-		return PorterErrorCode_LivenessHealthCheck
-	}
-
-	if strings.Contains(agentSummary, "readiness health check") {
-		return PorterErrorCode_ReadinessHealthCheck
-	}
-
-	if strings.Contains(agentSummary, "restarted due to an error") {
-		return PorterErrorCode_RestartedDueToError
-	}
-
-	if strings.Contains(agentSummary, "invalid image") {
-		return PorterErrorCode_InvalidImageError
-	}
-
-	if strings.Contains(agentSummary, "ran out of memory") {
-		return PorterErrorCode_MemoryLimitExceeded
-	}
-
-	// this is often a false alarm. if it is actually blocking deploy, we will get a PorterErrorCode_MemoryLimitExceeded_ScaleUp
-	if strings.Contains(agentSummary, "requesting more memory than is available") {
-		return PorterErrorCode_Ignorable
-	}
-
-	if strings.Contains(agentSummary, "requesting too much memory and cannot scale up") {
-		return PorterErrorCode_MemoryLimitExceeded_ScaleUp
-	}
-
-	// this is often a false alarm. if it is actually blocking deploy, we will get a PorterErrorCode_CPULimitExceeded_ScaleUp
-	if strings.Contains(agentSummary, "requesting more cpu than is available") {
-		return PorterErrorCode_Ignorable
-	}
-
-	if strings.Contains(agentSummary, "requesting too much cpu and cannot scale up") {
-		return PorterErrorCode_CPULimitExceeded_ScaleUp
-	}
-
-	if strings.Contains(agentSummary, "cannot be scheduled") {
-		return PorterErrorCode_CannotBeScheduled
-	}
-
-	return errorCode
-}
-
-// nonZeroExitCodeErrorCode parses the agent detail for non-zero exit code errors to return a standard Porter error code
-func nonZeroExitCodeErrorCode(agentDetail string) PorterErrorCode {
-	errorCode := PorterErrorCode_NonZeroExitCode
-	regex := regexp.MustCompile(restartedWithErrorCodePattern)
-	matches := regex.FindStringSubmatch(agentDetail)
-	if len(matches) != 2 {
-		return errorCode
-	}
-
-	exitCode := matches[1]
-	switch exitCode {
-	case "1":
-		return PorterErrorCode_NonZeroExitCode_CommonIssues
-	case "127":
-		return PorterErrorCode_NonZeroExitCode_InvalidStartCommand
-	case "137":
-		return PorterErrorCode_NonZeroExitCode_SIGKILL
-	default:
-		return errorCode
-	}
-}

+ 0 - 372
internal/porter_app/notifications/porter_error/providers.go

@@ -1,372 +0,0 @@
-package porter_error
-
-import (
-	"fmt"
-	"regexp"
-	"strings"
-)
-
-const restartedWithErrorCodePattern = `restarted with exit code (\S+)`
-
-// ErrorDetailsProvider is the parent interface for populating user-facing info about a Porter Error.
-type ErrorDetailsProvider interface {
-	// Detail returns the error detail for the given error. E.g. "The service restarted with exit code 137."
-	Detail(rawAgentDetail string) string
-	// MitigationSteps returns the mitigation steps for the given error. E.g. "Please make sure that your service handles graceful shutdown when it receives a SIGTERM signal."
-	MitigationSteps(rawAgentDetail string) string
-	// Documentation returns the documentation links that would help with troubleshooting the given error.
-	Documentation(rawAgentDetail string) []string
-}
-
-// ErrorCodeToProvider maps PorterErrorCode to their respective ErrorDetailsProvider implementations.
-var ErrorCodeToProvider = map[PorterErrorCode]ErrorDetailsProvider{
-	PorterErrorCode_NonZeroExitCode:                     NonZeroExitCodeErrorProvider{},
-	PorterErrorCode_NonZeroExitCode_SIGKILL:             NonZeroExitCodeErrorProvider{},
-	PorterErrorCode_NonZeroExitCode_InvalidStartCommand: NonZeroExitCodeErrorProvider{},
-	PorterErrorCode_NonZeroExitCode_CommonIssues:        NonZeroExitCodeErrorProvider{},
-	PorterErrorCode_LivenessHealthCheck:                 LivenessHealthCheckErrorProvider{},
-	PorterErrorCode_ReadinessHealthCheck:                ReadinessHealthCheckErrorProvider{},
-	PorterErrorCode_RestartedDueToError:                 RestartedDueToErrorProvider{},
-	PorterErrorCode_InvalidImageError:                   InvalidImageErrorProvider{},
-	PorterErrorCode_MemoryLimitExceeded:                 MemoryLimitExceededErrorProvider{},
-	PorterErrorCode_MemoryLimitExceeded_ScaleUp:         MemoryLimitExceededScaleUpErrorProvider{},
-	PorterErrorCode_CPULimitExceeded:                    CPULimitExceededErrorProvider{},
-	PorterErrorCode_CPULimitExceeded_ScaleUp:            CPULimitExceededScaleUpErrorProvider{},
-	PorterErrorCode_CannotBeScheduled:                   CannotBeScheduledErrorProvider{},
-}
-
-// NonZeroExitCodeErrorProvider provides error details for NonZeroExitCode errors.
-type NonZeroExitCodeErrorProvider struct{}
-
-// Detail returns the error detail for NonZeroExitCode errors, parsing out the exit code from the agent event.
-func (e NonZeroExitCodeErrorProvider) Detail(rawAgentDetail string) string {
-	humanReadableDetail := rawAgentDetail
-	// Example detail from the agent: "restarted with exit code 137"
-	// We want to get the exit code
-	regex := regexp.MustCompile(restartedWithErrorCodePattern)
-	matches := regex.FindStringSubmatch(humanReadableDetail)
-	if len(matches) != 2 {
-		return humanReadableDetail
-	}
-
-	exitCode := matches[1]
-	prefix := fmt.Sprintf("The service restarted with exit code %s.", exitCode)
-	switch exitCode {
-	case "137":
-		return fmt.Sprintf("%s This indicates that the service was killed by SIGKILL. The most common reason for this is that your service does not handle graceful shutdown when it receives a SIGTERM signal.", prefix)
-	case "1":
-		return fmt.Sprintf("%s This indicates common issues.", prefix)
-	case "127":
-		return fmt.Sprintf("%s This indicates that the service has a misconfigured start command.", prefix)
-	default:
-		return prefix
-	}
-}
-
-// MitigationSteps returns the mitigation steps for NonZeroExitCode errors, parsing out the exit code from the agent event.
-func (e NonZeroExitCodeErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	mitigationSteps := "Please consult our documentation for further guidance. If you need additional help, please reach out to us at support@porter.run."
-	// Example detail from the agent: "restarted with exit code 137"
-	// We want to get the exit code
-	regex := regexp.MustCompile(restartedWithErrorCodePattern)
-	matches := regex.FindStringSubmatch(rawAgentDetail)
-	if len(matches) != 2 {
-		return mitigationSteps
-	}
-
-	exitCode := matches[1]
-	switch exitCode {
-	case "137":
-		return "Please make sure that your service handles graceful shutdown when it receives a SIGTERM signal. After receiving SIGTERM, your service should close existing connections and terminate with exit code 0."
-	case "1":
-		return "Check container logs for further troubleshooting."
-	case "127":
-		return "Please verify that the service start command is correct and redeploy."
-	default:
-		return mitigationSteps
-	}
-}
-
-// Documentation returns the documentation links for NonZeroExitCode errors, parsing out the exit code from the agent event.
-func (e NonZeroExitCodeErrorProvider) Documentation(rawAgentDetail string) []string {
-	docLinks := []string{
-		"https://docs.porter.run/enterprise/managing-applications/application-troubleshooting#application-issues-and-non-zero-exit-codes",
-	}
-	// Example detail from the agent: "restarted with exit code 137"
-	// We want to get the exit code
-	regex := regexp.MustCompile(restartedWithErrorCodePattern)
-	matches := regex.FindStringSubmatch(rawAgentDetail)
-	if len(matches) != 2 {
-		return docLinks
-	}
-
-	exitCode := matches[1]
-	switch exitCode {
-	case "137":
-		docLinks = append(docLinks, "https://docs.porter.run/enterprise/deploying-applications/zero-downtime-deployments#graceful-shutdown")
-	}
-
-	return docLinks
-}
-
-// LivenessHealthCheckErrorProvider provides error details for LivenessHealthCheck errors.
-type LivenessHealthCheckErrorProvider struct{}
-
-// Detail returns the error detail for LivenessHealthCheck errors, parsing out the healthcheck endpoint from the agent event.
-func (e LivenessHealthCheckErrorProvider) Detail(rawAgentDetail string) string {
-	humanReadableDetail := rawAgentDetail
-	// Example detail from the agent: "...Your liveness health check is set to the path /healthz..."
-	// We want to strip out the path
-	pattern := `Your liveness health check is set to the path (\S+)\.`
-	regex := regexp.MustCompile(pattern)
-	matches := regex.FindStringSubmatch(humanReadableDetail)
-	if len(matches) != 2 {
-		return humanReadableDetail
-	}
-	pathValue := matches[1]
-	return fmt.Sprintf("The liveness health check for this service is set to the path %s. The service is not responding with a 200-level response code on this endpoint, so it is continuously restarting.", pathValue)
-}
-
-// MitigationSteps returns the mitigation steps for LivenessHealthCheck errors, parsing out the healthcheck endpoint from the agent event.
-func (e LivenessHealthCheckErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	mitigationSteps := "Please make sure that your service responds with a 200-level response code on the liveness health check endpoint."
-	// Example detail from the agent: "...Your liveness health check is set to the path /healthz..."
-	// We want to strip out the path
-	pattern := `Your liveness health check is set to the path (\S+)\.`
-	regex := regexp.MustCompile(pattern)
-	matches := regex.FindStringSubmatch(rawAgentDetail)
-	if len(matches) != 2 {
-		return mitigationSteps
-	}
-	pathValue := matches[1]
-	return fmt.Sprintf("Please make sure that your service responds with a 200-level response code on the liveness health check endpoint %s.", pathValue)
-}
-
-// Documentation returns the documentation links for LivenessHealthCheck errors.
-func (e LivenessHealthCheckErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/zero-downtime-deployments#health-checks",
-		"https://docs.porter.run/standard/deploying-applications/zero-downtime-deployments#graceful-shutdown",
-	}
-}
-
-// ReadinessHealthCheckErrorProvider provides error details for ReadinessHealthCheck errors.
-type ReadinessHealthCheckErrorProvider struct{}
-
-// Detail returns the error detail for ReadinessHealthCheck errors, parsing out the healthcheck endpoint from the agent event.
-func (e ReadinessHealthCheckErrorProvider) Detail(rawAgentDetail string) string {
-	humanReadableDetail := rawAgentDetail
-	// Example detail from the agent: "...Your readiness health check is set to the path /healthz..."
-	// We want to strip out the path
-	pattern := `Your readiness health check is set to the path (\S+)\.`
-	regex := regexp.MustCompile(pattern)
-	matches := regex.FindStringSubmatch(humanReadableDetail)
-	if len(matches) != 2 {
-		return humanReadableDetail
-	}
-	pathValue := matches[1]
-	return fmt.Sprintf("The readiness health check for this service is set to the path %s. The service is not responding with a 200-level response code on this endpoint, so it is continuously restarting.", pathValue)
-}
-
-// MitigationSteps returns the mitigation steps for ReadinessHealthCheck errors, parsing out the healthcheck endpoint from the agent event.
-func (e ReadinessHealthCheckErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	mitigationSteps := "Please make sure that your service responds with a 200-level response code on the readiness health check endpoint."
-	// Example detail from the agent: "...Your readiness health check is set to the path /healthz..."
-	// We want to strip out the path
-	pattern := `Your readiness health check is set to the path (\S+)\.`
-	regex := regexp.MustCompile(pattern)
-	matches := regex.FindStringSubmatch(rawAgentDetail)
-	if len(matches) != 2 {
-		return mitigationSteps
-	}
-	pathValue := matches[1]
-	return fmt.Sprintf("Please make sure that your service responds with a 200-level response code on the readiness health check endpoint %s.", pathValue)
-}
-
-// Documentation returns the documentation links for ReadinessHealthCheck errors.
-func (e ReadinessHealthCheckErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/zero-downtime-deployments#health-checks",
-		"https://docs.porter.run/standard/deploying-applications/zero-downtime-deployments#graceful-shutdown",
-	}
-}
-
-// RestartedDueToErrorProvider provides error details for RestartedDueToError errors.
-type RestartedDueToErrorProvider struct{}
-
-// Detail returns the error detail for RestartedDueToError errors.
-func (e RestartedDueToErrorProvider) Detail(rawAgentDetail string) string {
-	return "The service is stuck in a restart loop. This is likely due to other errors."
-}
-
-// MitigationSteps returns the mitigation steps for RestartedDueToError errors.
-func (e RestartedDueToErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	return "Please address other errors if they exist, or check service logs for further troubleshooting."
-}
-
-// Documentation returns the documentation links for RestartedDueToError errors.
-func (e RestartedDueToErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/enterprise/managing-applications/application-troubleshooting#application-restarts",
-	}
-}
-
-// InvalidImageErrorProvider provides error details for InvalidImageError errors.
-type InvalidImageErrorProvider struct{}
-
-// Detail returns the error detail for InvalidImageError errors.
-func (e InvalidImageErrorProvider) Detail(rawAgentDetail string) string {
-	return "The service cannot pull from the image registry. This is likely due to an invalid image name or bad credentials."
-}
-
-// MitigationSteps returns the mitigation steps for InvalidImageError errors.
-func (e InvalidImageErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	return "Please double check that your image name is correct and that the tag specified exists for that image. If you are attempting to pull from a private registry, please make sure that the registry is correctly linked to your project. You can verify this by going to the Integrations tab -> Docker registry and ensuring that your image repository is listed there."
-}
-
-// Documentation returns the documentation links for InvalidImageError errors.
-func (e InvalidImageErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/enterprise/managing-applications/application-troubleshooting#image-pull-errors",
-		"https://docs.porter.run/enterprise/deploying-applications/deploying-from-docker-registry",
-	}
-}
-
-// MemoryLimitExceededErrorProvider provides error details for MemoryLimitExceededError errors.
-type MemoryLimitExceededErrorProvider struct{}
-
-// Detail returns the error detail for MemoryLimitExceededError errors, parsing out the memory limit from the agent event.
-func (e MemoryLimitExceededErrorProvider) Detail(rawAgentDetail string) string {
-	detail := "The service exceeded its memory limit. This may be caused by other errors."
-	// Example detail from the agent: "Your service was restarted because it exceeded its memory limit of 4M..."
-	// We want to get the memory limit
-	pattern := `exceeded its memory limit of (\S+)\.`
-	regex := regexp.MustCompile(pattern)
-	matches := regex.FindStringSubmatch(rawAgentDetail)
-	if len(matches) != 2 {
-		return detail
-	}
-	memoryLimit := matches[1]
-
-	return fmt.Sprintf("The service exceeded its memory limit of %s. This may be caused by other errors.", memoryLimit)
-}
-
-// MitigationSteps returns the mitigation steps for MemoryLimitExceededError errors.
-func (e MemoryLimitExceededErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	return "If other errors exist, address them first. Otherwise, please reduce the memory allocation for the service, then redeploy. Alternatively, you can choose a machine type with higher resource limits in the Advanced settings under the Infrastructure tab."
-}
-
-// Documentation returns the documentation links for MemoryLimitExceededError errors.
-func (e MemoryLimitExceededErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/runtime-configuration-options/web-applications#resources",
-	}
-}
-
-// MemoryLimitExceededScaleUpErrorProvider provides error details for MemoryLimitExceededError_ScaleUp errors that occur.
-type MemoryLimitExceededScaleUpErrorProvider struct{}
-
-// Detail returns the error detail for MemoryLimitExceededError_ScaleUp errors.
-func (e MemoryLimitExceededScaleUpErrorProvider) Detail(rawAgentDetail string) string {
-	return "The service is requesting more memory than the underlying infrastructure can provide."
-}
-
-// MitigationSteps returns the mitigation steps for MemoryLimitExceededError_ScaleUp errors.
-func (e MemoryLimitExceededScaleUpErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	return "Please reduce the memory allocation for the service, then redeploy. Alternatively, you can choose a machine type with higher resource limits in Infrastructure -> Advanced settings."
-}
-
-// Documentation returns the documentation links for MemoryLimitExceededError_ScaleUp errors.
-func (e MemoryLimitExceededScaleUpErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/runtime-configuration-options/web-applications#resources",
-		"https://docs.porter.run/other/kubernetes-101#resources",
-	}
-}
-
-// CPULimitExceededErrorProvider provides error details for CPULimitExceededError errors.
-type CPULimitExceededErrorProvider struct{}
-
-// Detail returns the error detail for CPULimitExceededError errors.
-func (e CPULimitExceededErrorProvider) Detail(rawAgentDetail string) string {
-	return "The service exceeded its CPU limit. This may be caused by other errors."
-}
-
-// MitigationSteps returns the mitigation steps for CPULimitExceededError errors.
-func (e CPULimitExceededErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	return "If other errors exist, address them first. Otherwise, please reduce the CPU allocation for the service, then redeploy. Alternatively, you can choose a machine type with higher resource limits in Infrastructure -> Advanced settings."
-}
-
-// Documentation returns the documentation links for CPULimitExceededError errors.
-func (e CPULimitExceededErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/runtime-configuration-options/web-applications#resources",
-	}
-}
-
-// CPULimitExceededScaleUpErrorProvider provides error details for CPULimitExceededError_ScaleUp errors that occur.
-type CPULimitExceededScaleUpErrorProvider struct{}
-
-// Detail returns the error detail for CPULimitExceededError_ScaleUp errors.
-func (e CPULimitExceededScaleUpErrorProvider) Detail(rawAgentDetail string) string {
-	return "The service is requesting more CPU than the underlying infrastructure can provide."
-}
-
-// MitigationSteps returns the mitigation steps for CPULimitExceededError_ScaleUp errors.
-func (e CPULimitExceededScaleUpErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	return "Please reduce the CPU allocation for the service, then redeploy. Alternatively, you can choose a machine type with higher resource limits in Infrastructure -> Advanced settings."
-}
-
-// Documentation returns the documentation links for CPULimitExceededError_ScaleUp errors.
-func (e CPULimitExceededScaleUpErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/runtime-configuration-options/web-applications#resources",
-		"https://docs.porter.run/other/kubernetes-101#resources",
-	}
-}
-
-// CannotBeScheduledErrorProvider provides error details for CannotBeScheduledError errors.
-type CannotBeScheduledErrorProvider struct{}
-
-// Detail returns the error detail for CannotBeScheduledError errors.
-func (e CannotBeScheduledErrorProvider) Detail(rawAgentDetail string) string {
-	prefix := "The service cannot be scheduled to run on the underlying infrastructure"
-	lowercaseDetail := strings.ToLower(rawAgentDetail)
-	if strings.Contains(lowercaseDetail, "insufficient cpu") && strings.Contains(lowercaseDetail, "insufficient memory") {
-		return fmt.Sprintf("%s because the service is requesting too much CPU and memory.", prefix)
-	}
-	if strings.Contains(lowercaseDetail, "insufficient cpu") {
-		return fmt.Sprintf("%s because the service is requesting too much CPU.", prefix)
-	}
-	if strings.Contains(lowercaseDetail, "Insufficient memory") {
-		return fmt.Sprintf("%s because the service is requesting too much memory.", prefix)
-	}
-
-	return fmt.Sprintf("%s.", prefix)
-}
-
-// MitigationSteps returns the mitigation steps for CannotBeScheduledError errors.
-func (e CannotBeScheduledErrorProvider) MitigationSteps(rawAgentDetail string) string {
-	lowercaseDetail := strings.ToLower(rawAgentDetail)
-	suffix := "Alternatively, you can choose a machine type with higher resource limits in Infrastructure -> Advanced settings."
-
-	if strings.Contains(lowercaseDetail, "insufficient cpu") && strings.Contains(lowercaseDetail, "insufficient memory") {
-		return fmt.Sprintf("Please reduce the CPU and memory allocation for the service, then redeploy. %s", suffix)
-	}
-	if strings.Contains(lowercaseDetail, "insufficient cpu") {
-		return fmt.Sprintf("Please reduce the CPU allocation for the service, then redeploy. %s", suffix)
-	}
-	if strings.Contains(lowercaseDetail, "Insufficient memory") {
-		return fmt.Sprintf("Please reduce the memory allocation for the service, then redeploy. %s", suffix)
-	}
-
-	return fmt.Sprintf("Please try reducing the CPU and memory allocation for the service, then redeploy. %s", suffix)
-}
-
-// Documentation returns the documentation links for CannotBeScheduledError errors.
-func (e CannotBeScheduledErrorProvider) Documentation(rawAgentDetail string) []string {
-	return []string{
-		"https://docs.porter.run/standard/deploying-applications/runtime-configuration-options/web-applications#resources",
-		"https://docs.porter.run/other/kubernetes-101#resources",
-	}
-}

+ 0 - 98
internal/porter_app/notifications/translate.go

@@ -1,98 +0,0 @@
-package notifications
-
-import (
-	"context"
-	"fmt"
-	"regexp"
-	"strings"
-
-	"github.com/porter-dev/porter/internal/porter_app/notifications/porter_error"
-	"github.com/porter-dev/porter/internal/telemetry"
-)
-
-// hydrateNotificationWithError translates information from the agent into a user-facing form
-func hydrateNotificationWithError(ctx context.Context, notification Notification) Notification {
-	ctx, span := telemetry.NewSpan(ctx, "hydrate-notification-with-user-facing-details")
-	defer span.End()
-
-	hydratedNotification := notification
-
-	errorCode := porter_error.ErrorCode(hydratedNotification.AgentSummary, hydratedNotification.AgentDetail)
-	porterError := createError(ctx, errorCode, hydratedNotification.AgentSummary, hydratedNotification.AgentDetail, hydratedNotification.ServiceName)
-
-	hydratedNotification.Error = porterError
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "agent-summary", Value: hydratedNotification.AgentSummary},
-		telemetry.AttributeKV{Key: "human-readable-summary", Value: hydratedNotification.Error.Summary},
-		telemetry.AttributeKV{Key: "agent-detail", Value: hydratedNotification.AgentDetail},
-		telemetry.AttributeKV{Key: "human-readable-detail", Value: hydratedNotification.Error.Detail},
-		telemetry.AttributeKV{Key: "error-code", Value: hydratedNotification.Error.Code},
-	)
-
-	return hydratedNotification
-}
-
-// createError creates a PorterError from a PorterErrorCode, falling back to agent info if the error code is unknown
-func createError(ctx context.Context, errorCode porter_error.PorterErrorCode, agentSummary, agentDetail, serviceName string) porter_error.PorterError {
-	ctx, span := telemetry.NewSpan(ctx, "create-error")
-	defer span.End()
-
-	telemetry.WithAttributes(span,
-		telemetry.AttributeKV{Key: "agent-summary", Value: agentSummary},
-		telemetry.AttributeKV{Key: "agent-detail", Value: agentDetail},
-		telemetry.AttributeKV{Key: "error-code", Value: int(errorCode)},
-	)
-
-	porterError := porter_error.PorterError{
-		Code:            errorCode,
-		Summary:         translateAgentSummary(agentSummary, serviceName),
-		Detail:          strings.ReplaceAll(agentDetail, "application", "service"),
-		MitigationSteps: "",
-		Documentation:   []string{},
-	}
-
-	// if we can ignore the error, there is nothing to hydrate
-	if errorCode == porter_error.PorterErrorCode_Ignorable {
-		return porterError
-	}
-
-	errorDetailsProvider, ok := porter_error.ErrorCodeToProvider[errorCode]
-	if ok {
-		porterError.Detail = errorDetailsProvider.Detail(agentDetail)
-		porterError.MitigationSteps = errorDetailsProvider.MitigationSteps(agentDetail)
-		porterError.Documentation = errorDetailsProvider.Documentation(agentDetail)
-	}
-
-	// if we do not know the error, or the error is a generic non-zero exit code, we report error so that we can handle it later, but we do not block
-	if !ok || errorCode == porter_error.PorterErrorCode_NonZeroExitCode {
-		_ = telemetry.Error(ctx, span, nil, "unhandled error code, passing along raw agent details")
-	}
-
-	return porterError
-}
-
-// translateAgentSummary translates the agent summary to a human readable summary
-// this is necessary until we make updates to the agent
-func translateAgentSummary(agentSummary, serviceName string) string {
-	humanReadableSummary := agentSummary
-	// Example summary from the agent: "Your application test-1 in namespace default has crashed because the application was restarted due to an error"
-	// We want to replace all instances of "application" with "service"
-	pattern := `application (\S+) in namespace (\S+)`
-	regex := regexp.MustCompile(pattern)
-	if regex.MatchString(humanReadableSummary) {
-		humanReadableSummary = regex.ReplaceAllString(humanReadableSummary, fmt.Sprintf("service %s", serviceName))
-	}
-	humanReadableSummary = strings.ReplaceAll(humanReadableSummary, "application", "service")
-	humanReadableSummary = strings.ReplaceAll(humanReadableSummary, "cpu", "CPU")
-	// We just want the reason, so we only take the part after "because "
-	// If we can't parse the summary, we just return the original summary with the replacement done above
-	parts := strings.SplitAfter(humanReadableSummary, "because ")
-	if len(parts) == 2 {
-		humanReadableSummary = parts[1]
-		if len(humanReadableSummary) > 1 {
-			humanReadableSummary = strings.ToUpper(string(humanReadableSummary[0])) + humanReadableSummary[1:]
-		}
-	}
-	return humanReadableSummary
-}

+ 9 - 0
internal/porter_app/revisions.go

@@ -37,6 +37,8 @@ type Revision struct {
 	DeploymentTargetID string `json:"deployment_target_id"`
 	// Env is the environment variables for the revision
 	Env environment_groups.EnvironmentGroup `json:"env,omitempty"`
+	// AppInstanceID is the id of the app instance the revision is associated with
+	AppInstanceID uuid.UUID `json:"app_instance_id"`
 }
 
 // GetAppRevisionInput is the input struct for GetAppRevisions
@@ -112,6 +114,12 @@ func EncodedRevisionFromProto(ctx context.Context, appRevision *porterv1.AppRevi
 		return revision, telemetry.Error(ctx, span, err, "error getting app revision status from proto")
 	}
 
+	appInstanceIdStr := appRevision.AppInstanceId
+	appInstanceId, err := uuid.Parse(appInstanceIdStr)
+	if err != nil {
+		return revision, telemetry.Error(ctx, span, err, "error parsing app instance id")
+	}
+
 	revision = Revision{
 		B64AppProto:        b64,
 		Status:             status,
@@ -120,6 +128,7 @@ func EncodedRevisionFromProto(ctx context.Context, appRevision *porterv1.AppRevi
 		CreatedAt:          appRevision.CreatedAt.AsTime(),
 		UpdatedAt:          appRevision.UpdatedAt.AsTime(),
 		DeploymentTargetID: appRevision.DeploymentTargetId,
+		AppInstanceID:      appInstanceId,
 	}
 
 	return revision, nil

+ 5 - 7
internal/repository/gorm/porter_app_event.go

@@ -169,22 +169,20 @@ func (repo *PorterAppEventRepository) ReadEvent(ctx context.Context, id uuid.UUI
 	return appEvent, nil
 }
 
-// ReadNotificationsByAppRevisionID returns a list of notifications for a given porter app id and app revision ID
-func (repo *PorterAppEventRepository) ReadNotificationsByAppRevisionID(ctx context.Context, porterAppID uint, appRevisionId string) ([]*models.PorterAppEvent, error) {
+// ReadNotificationsByAppRevisionID returns a list of notifications for a given porter app instance id and app revision ID
+func (repo *PorterAppEventRepository) ReadNotificationsByAppRevisionID(ctx context.Context, porterAppInstanceId uuid.UUID, appRevisionId string) ([]*models.PorterAppEvent, error) {
 	notifications := []*models.PorterAppEvent{}
 
 	if appRevisionId == "" {
 		return notifications, errors.New("invalid app revision ID supplied")
 	}
 
-	if porterAppID == 0 {
-		return notifications, errors.New("invalid porter app ID supplied")
+	if porterAppInstanceId == uuid.Nil {
+		return notifications, errors.New("invalid porter app instance ID supplied")
 	}
 
-	strAppID := strconv.Itoa(int(porterAppID))
-
 	// TODO: make app_revision_id a column in porter_app_event table: https://linear.app/porter/issue/POR-2096/add-app-revision-id-column-to-porter-app-events-table
-	if err := repo.db.Where("porter_app_id = ? AND type = 'NOTIFICATION' AND metadata->>'app_revision_id' = ?", strAppID, appRevisionId).Find(&notifications).Error; err != nil {
+	if err := repo.db.Where("app_instance_id = ? AND type = 'NOTIFICATION' AND metadata->>'app_revision_id' = ?", porterAppInstanceId, appRevisionId).Find(&notifications).Error; err != nil {
 		return notifications, err
 	}
 

+ 1 - 1
internal/repository/porter_app_event.go

@@ -20,5 +20,5 @@ type PorterAppEventRepository interface {
 	ReadDeployEventByRevision(ctx context.Context, porterAppID uint, revision float64) (models.PorterAppEvent, error)
 	// ReadDeployEventByAppRevisionID returns a deploy event for a given porter app id and app revision ID
 	ReadDeployEventByAppRevisionID(ctx context.Context, porterAppID uint, appRevisionID string) (models.PorterAppEvent, error)
-	ReadNotificationsByAppRevisionID(ctx context.Context, porterAppID uint, appRevisionID string) ([]*models.PorterAppEvent, error)
+	ReadNotificationsByAppRevisionID(ctx context.Context, porterAppInstanceID uuid.UUID, appRevisionID string) ([]*models.PorterAppEvent, error)
 }

+ 1 - 1
internal/repository/test/porter_app_event.go

@@ -54,6 +54,6 @@ func (repo *PorterAppEventRepository) ReadDeployEventByAppRevisionID(ctx context
 }
 
 // ReadNotificationsByAppRevisionID is a test method
-func (repo *PorterAppEventRepository) ReadNotificationsByAppRevisionID(ctx context.Context, porterAppID uint, appRevisionID string) ([]*models.PorterAppEvent, error) {
+func (repo *PorterAppEventRepository) ReadNotificationsByAppRevisionID(ctx context.Context, porterAppInstanceID uuid.UUID, appRevisionID string) ([]*models.PorterAppEvent, error) {
 	return nil, errors.New("cannot read database")
 }