status.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. package porter_app
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. "github.com/porter-dev/porter/internal/deployment_target"
  8. "github.com/porter-dev/porter/internal/kubernetes"
  9. "github.com/porter-dev/porter/internal/telemetry"
  10. v1 "k8s.io/api/core/v1"
  11. )
  12. const (
  13. // LabelKey_DeploymentTargetID is the label key for the deployment target id
  14. LabelKey_DeploymentTargetID = "porter.run/deployment-target-id"
  15. // LabelKey_AppName is the label key for the app name
  16. LabelKey_AppName = "porter.run/app-name"
  17. // LabelKey_ServiceName is the label key for the service name
  18. LabelKey_ServiceName = "porter.run/service-name"
  19. // LabelKey_AppRevisionID is the label key for the app revision id
  20. LabelKey_AppRevisionID = "porter.run/app-revision-id"
  21. )
  22. // ServiceStatus describes the status of a service of a porter app
  23. type ServiceStatus struct {
  24. ServiceName string `json:"service_name"`
  25. RevisionStatusList []RevisionStatus `json:"revision_status_list"`
  26. }
  27. // RevisionStatus describes the status of a revision of a service of a porter app
  28. type RevisionStatus struct {
  29. RevisionID string `json:"revision_id"`
  30. RevisionNumber int `json:"revision_number"`
  31. InstanceStatusList []InstanceStatus `json:"instance_status_list"`
  32. }
  33. // InstanceStatusDescriptor is a string that summarizes the status of an instance
  34. type InstanceStatusDescriptor string
  35. const (
  36. // InstanceStatusDescriptor_Failed means the instance has failed
  37. InstanceStatusDescriptor_Failed InstanceStatusDescriptor = "FAILED"
  38. // InstanceStatusDescriptor_Pending means the instance is pending
  39. InstanceStatusDescriptor_Pending InstanceStatusDescriptor = "PENDING"
  40. // InstanceStatusDescriptor_Running means the instance is running normally
  41. InstanceStatusDescriptor_Running InstanceStatusDescriptor = "RUNNING"
  42. // InstanceStatusDescriptor_Succeeded means the instance is succeeded
  43. InstanceStatusDescriptor_Succeeded InstanceStatusDescriptor = "SUCCEEDED"
  44. // InstanceStatusDescriptor_Unknown means the instance is unknown
  45. InstanceStatusDescriptor_Unknown InstanceStatusDescriptor = "UNKNOWN"
  46. )
  47. // CrashLoopBackOff is a string that describes the status of a pod that is in a crash loop backoff
  48. const CrashLoopBackOff = "CrashLoopBackOff"
  49. // InstanceStatus describes the status of an instance of a revision of a service of a porter app
  50. type InstanceStatus struct {
  51. Status InstanceStatusDescriptor `json:"status"`
  52. RestartCount int `json:"restart_count"`
  53. CreationTimestamp time.Time `json:"creation_timestamp"`
  54. }
  55. // GetServiceStatusInput is the input type for GetServiceStatus
  56. type GetServiceStatusInput struct {
  57. DeploymentTarget deployment_target.DeploymentTarget
  58. Agent kubernetes.Agent
  59. AppName string
  60. ServiceName string
  61. AppRevisions []Revision
  62. }
  63. // GetServiceStatus returns the status of a service of a porter app
  64. func GetServiceStatus(ctx context.Context, inp GetServiceStatusInput) (ServiceStatus, error) {
  65. ctx, span := telemetry.NewSpan(ctx, "get-service-status")
  66. defer span.End()
  67. telemetry.WithAttributes(span,
  68. telemetry.AttributeKV{Key: "app-name", Value: inp.AppName},
  69. telemetry.AttributeKV{Key: "service-name", Value: inp.ServiceName},
  70. telemetry.AttributeKV{Key: "deployment-target-id", Value: inp.DeploymentTarget.ID},
  71. telemetry.AttributeKV{Key: "deployment-target-namespace", Value: inp.DeploymentTarget.Namespace},
  72. )
  73. serviceStatus := ServiceStatus{
  74. ServiceName: inp.ServiceName,
  75. }
  76. if inp.AppName == "" {
  77. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide app name")
  78. }
  79. if inp.ServiceName == "" {
  80. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide service name")
  81. }
  82. if inp.DeploymentTarget.ID == "" {
  83. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide deployment target id")
  84. }
  85. if inp.DeploymentTarget.Namespace == "" {
  86. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide deployment target namespace")
  87. }
  88. selectorString := fmt.Sprintf(
  89. "%s=%s,%s=%s,%s=%s",
  90. LabelKey_DeploymentTargetID, inp.DeploymentTarget.ID,
  91. LabelKey_AppName, inp.AppName,
  92. LabelKey_ServiceName, inp.ServiceName,
  93. )
  94. podList, err := inp.Agent.GetPodsByLabel(selectorString, inp.DeploymentTarget.Namespace)
  95. if err != nil {
  96. return serviceStatus, telemetry.Error(ctx, span, err, "error getting pods by label")
  97. }
  98. if podList == nil {
  99. return serviceStatus, telemetry.Error(ctx, span, nil, "pod list is nil")
  100. }
  101. revisionStatusList, err := revisionStatusFromPods(ctx, revisionStatusFromPodsInput{
  102. PodList: *podList,
  103. AppRevisions: inp.AppRevisions,
  104. AppName: inp.AppName,
  105. ServiceName: inp.ServiceName,
  106. })
  107. if err != nil {
  108. return serviceStatus, telemetry.Error(ctx, span, err, "error processing pods")
  109. }
  110. serviceStatus.RevisionStatusList = revisionStatusList
  111. return serviceStatus, nil
  112. }
  113. type revisionStatusFromPodsInput struct {
  114. PodList v1.PodList
  115. AppRevisions []Revision
  116. AppName string
  117. ServiceName string
  118. }
  119. func revisionStatusFromPods(ctx context.Context, inp revisionStatusFromPodsInput) ([]RevisionStatus, error) {
  120. ctx, span := telemetry.NewSpan(ctx, "revision-status-from-pods")
  121. defer span.End()
  122. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "num-pods", Value: len(inp.PodList.Items)})
  123. revisionStatusList := []RevisionStatus{}
  124. revisionToInstanceStatusMap := map[string][]InstanceStatus{}
  125. for _, pod := range inp.PodList.Items {
  126. revisionID := pod.Labels[LabelKey_AppRevisionID]
  127. if revisionID == "" {
  128. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "pod-name", Value: pod.Name})
  129. return revisionStatusList, telemetry.Error(ctx, span, nil, "pod does not have revision id label")
  130. }
  131. instanceStatusList, ok := revisionToInstanceStatusMap[revisionID]
  132. if !ok {
  133. instanceStatusList = []InstanceStatus{}
  134. }
  135. instanceStatus, err := InstanceStatusFromPod(ctx, InstanceStatusFromPodInput{
  136. Pod: pod,
  137. AppName: inp.AppName,
  138. ServiceName: inp.ServiceName,
  139. })
  140. if err != nil {
  141. continue
  142. }
  143. instanceStatusList = append(instanceStatusList, instanceStatus)
  144. revisionToInstanceStatusMap[revisionID] = instanceStatusList
  145. }
  146. for revisionId, instanceStatusList := range revisionToInstanceStatusMap {
  147. revisionNumber, err := getRevisionNumberFromRevisionId(revisionId, inp.AppRevisions)
  148. if err != nil {
  149. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "revision-id", Value: revisionId})
  150. return revisionStatusList, telemetry.Error(ctx, span, err, "error getting revision number from revision id")
  151. }
  152. // no number for this revision yet, so skip it from reporting
  153. if revisionNumber == 0 {
  154. continue
  155. }
  156. revisionStatus := RevisionStatus{
  157. RevisionID: revisionId,
  158. RevisionNumber: revisionNumber,
  159. InstanceStatusList: instanceStatusList,
  160. }
  161. revisionStatusList = append(revisionStatusList, revisionStatus)
  162. }
  163. return revisionStatusList, nil
  164. }
  165. // InstanceStatusFromPodInput contains all the data necessary to get the status of the primary service container from a pod
  166. type InstanceStatusFromPodInput struct {
  167. Pod v1.Pod
  168. AppName string
  169. ServiceName string
  170. }
  171. // InstanceStatusFromPod gets the status of the primary service container from a pod
  172. func InstanceStatusFromPod(ctx context.Context, inp InstanceStatusFromPodInput) (InstanceStatus, error) {
  173. ctx, span := telemetry.NewSpan(ctx, "instance-status-from-pod")
  174. defer span.End()
  175. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "pod-name", Value: inp.Pod.Name})
  176. instanceStatus := InstanceStatus{}
  177. // find the container running the app code. Note that this is conditioned on the fact that
  178. // in our worker/web/job charts, there is one container created with this name during the deployment
  179. // there may be other containers (like the sidecar container for jobs), but we only care about the app container for reporting status
  180. appContainerName := fmt.Sprintf("%s-%s", inp.AppName, inp.ServiceName)
  181. var appContainerStatus v1.ContainerStatus
  182. for _, containerStatus := range inp.Pod.Status.ContainerStatuses {
  183. if containerStatus.Name == appContainerName {
  184. appContainerStatus = containerStatus
  185. break
  186. }
  187. }
  188. if appContainerStatus.Name == "" {
  189. return instanceStatus, telemetry.Error(ctx, span, nil, "app container not found")
  190. }
  191. instanceStatus.CreationTimestamp = inp.Pod.CreationTimestamp.Time
  192. instanceStatus.RestartCount = int(appContainerStatus.RestartCount)
  193. switch inp.Pod.Status.Phase {
  194. case v1.PodFailed:
  195. instanceStatus.Status = InstanceStatusDescriptor_Failed
  196. case v1.PodPending:
  197. instanceStatus.Status = InstanceStatusDescriptor_Pending
  198. case v1.PodRunning:
  199. instanceStatus.Status = InstanceStatusDescriptor_Running
  200. case v1.PodSucceeded:
  201. instanceStatus.Status = InstanceStatusDescriptor_Succeeded
  202. case v1.PodUnknown:
  203. instanceStatus.Status = InstanceStatusDescriptor_Unknown
  204. }
  205. if appContainerStatus.State.Waiting != nil && appContainerStatus.State.Waiting.Reason == CrashLoopBackOff {
  206. instanceStatus.Status = InstanceStatusDescriptor_Failed
  207. }
  208. return instanceStatus, nil
  209. }
  210. func getRevisionNumberFromRevisionId(revisionId string, appRevisions []Revision) (int, error) {
  211. for _, revision := range appRevisions {
  212. if revision.ID == revisionId {
  213. return int(revision.RevisionNumber), nil
  214. }
  215. }
  216. return 0, errors.New("revision id not found in app revisions")
  217. }