status.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. package porter_app
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "time"
  7. "github.com/porter-dev/porter/internal/deployment_target"
  8. "github.com/porter-dev/porter/internal/kubernetes"
  9. "github.com/porter-dev/porter/internal/telemetry"
  10. v1 "k8s.io/api/core/v1"
  11. )
  12. const (
  13. // LabelKey_DeploymentTargetID is the label key for the deployment target id
  14. LabelKey_DeploymentTargetID = "porter.run/deployment-target-id"
  15. // LabelKey_AppName is the label key for the app name
  16. LabelKey_AppName = "porter.run/app-name"
  17. // LabelKey_ServiceName is the label key for the service name
  18. LabelKey_ServiceName = "porter.run/service-name"
  19. // LabelKey_AppRevisionID is the label key for the app revision id
  20. LabelKey_AppRevisionID = "porter.run/app-revision-id"
  21. )
  22. // ServiceStatus describes the status of a service of a porter app
  23. type ServiceStatus struct {
  24. ServiceName string `json:"service_name"`
  25. RevisionStatusList []RevisionStatus `json:"revision_status_list"`
  26. }
  27. // RevisionStatus describes the status of a revision of a service of a porter app
  28. type RevisionStatus struct {
  29. RevisionID string `json:"revision_id"`
  30. RevisionNumber int `json:"revision_number"`
  31. InstanceStatusList []InstanceStatus `json:"instance_status_list"`
  32. }
  33. // InstanceStatusDescriptor is a string that summarizes the status of an instance
  34. type InstanceStatusDescriptor string
  35. const (
  36. // InstanceStatusDescriptor_Failed means the instance has failed
  37. InstanceStatusDescriptor_Failed InstanceStatusDescriptor = "FAILED"
  38. // InstanceStatusDescriptor_Pending means the instance is pending
  39. InstanceStatusDescriptor_Pending InstanceStatusDescriptor = "PENDING"
  40. // InstanceStatusDescriptor_Running means the instance is running normally
  41. InstanceStatusDescriptor_Running InstanceStatusDescriptor = "RUNNING"
  42. // InstanceStatusDescriptor_Succeeded means the instance is succeeded
  43. InstanceStatusDescriptor_Succeeded InstanceStatusDescriptor = "SUCCEEDED"
  44. // InstanceStatusDescriptor_Unknown means the instance is unknown
  45. InstanceStatusDescriptor_Unknown InstanceStatusDescriptor = "UNKNOWN"
  46. )
  47. // CrashLoopBackOff is a string that describes the status of a pod that is in a crash loop backoff
  48. const CrashLoopBackOff = "CrashLoopBackOff"
  49. // InstanceStatus describes the status of an instance of a revision of a service of a porter app
  50. type InstanceStatus struct {
  51. Status InstanceStatusDescriptor `json:"status"`
  52. RestartCount int `json:"restart_count"`
  53. CreationTimestamp time.Time `json:"creation_timestamp"`
  54. Name string `json:"name"`
  55. }
  56. // GetServiceStatusInput is the input type for GetServiceStatus
  57. type GetServiceStatusInput struct {
  58. DeploymentTarget deployment_target.DeploymentTarget
  59. Agent kubernetes.Agent
  60. AppName string
  61. ServiceName string
  62. AppRevisions []Revision
  63. }
  64. // GetServiceStatus returns the status of a service of a porter app
  65. func GetServiceStatus(ctx context.Context, inp GetServiceStatusInput) (ServiceStatus, error) {
  66. ctx, span := telemetry.NewSpan(ctx, "get-service-status")
  67. defer span.End()
  68. telemetry.WithAttributes(span,
  69. telemetry.AttributeKV{Key: "app-name", Value: inp.AppName},
  70. telemetry.AttributeKV{Key: "service-name", Value: inp.ServiceName},
  71. telemetry.AttributeKV{Key: "deployment-target-id", Value: inp.DeploymentTarget.ID},
  72. telemetry.AttributeKV{Key: "deployment-target-namespace", Value: inp.DeploymentTarget.Namespace},
  73. )
  74. serviceStatus := ServiceStatus{
  75. ServiceName: inp.ServiceName,
  76. }
  77. if inp.AppName == "" {
  78. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide app name")
  79. }
  80. if inp.ServiceName == "" {
  81. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide service name")
  82. }
  83. if inp.DeploymentTarget.ID == "" {
  84. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide deployment target id")
  85. }
  86. if inp.DeploymentTarget.Namespace == "" {
  87. return serviceStatus, telemetry.Error(ctx, span, nil, "must provide deployment target namespace")
  88. }
  89. selectorString := fmt.Sprintf(
  90. "%s=%s,%s=%s,%s=%s",
  91. LabelKey_DeploymentTargetID, inp.DeploymentTarget.ID,
  92. LabelKey_AppName, inp.AppName,
  93. LabelKey_ServiceName, inp.ServiceName,
  94. )
  95. podList, err := inp.Agent.GetPodsByLabel(selectorString, inp.DeploymentTarget.Namespace)
  96. if err != nil {
  97. return serviceStatus, telemetry.Error(ctx, span, err, "error getting pods by label")
  98. }
  99. if podList == nil {
  100. return serviceStatus, telemetry.Error(ctx, span, nil, "pod list is nil")
  101. }
  102. revisionStatusList, err := revisionStatusFromPods(ctx, revisionStatusFromPodsInput{
  103. PodList: *podList,
  104. AppRevisions: inp.AppRevisions,
  105. AppName: inp.AppName,
  106. ServiceName: inp.ServiceName,
  107. })
  108. if err != nil {
  109. return serviceStatus, telemetry.Error(ctx, span, err, "error processing pods")
  110. }
  111. serviceStatus.RevisionStatusList = revisionStatusList
  112. return serviceStatus, nil
  113. }
  114. type revisionStatusFromPodsInput struct {
  115. PodList v1.PodList
  116. AppRevisions []Revision
  117. AppName string
  118. ServiceName string
  119. }
  120. func revisionStatusFromPods(ctx context.Context, inp revisionStatusFromPodsInput) ([]RevisionStatus, error) {
  121. ctx, span := telemetry.NewSpan(ctx, "revision-status-from-pods")
  122. defer span.End()
  123. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "num-pods", Value: len(inp.PodList.Items)})
  124. revisionStatusList := []RevisionStatus{}
  125. revisionToInstanceStatusMap := map[string][]InstanceStatus{}
  126. for _, pod := range inp.PodList.Items {
  127. revisionID := pod.Labels[LabelKey_AppRevisionID]
  128. if revisionID == "" {
  129. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "pod-name", Value: pod.Name})
  130. return revisionStatusList, telemetry.Error(ctx, span, nil, "pod does not have revision id label")
  131. }
  132. instanceStatusList, ok := revisionToInstanceStatusMap[revisionID]
  133. if !ok {
  134. instanceStatusList = []InstanceStatus{}
  135. }
  136. instanceStatus, err := InstanceStatusFromPod(ctx, InstanceStatusFromPodInput{
  137. Pod: pod,
  138. AppName: inp.AppName,
  139. ServiceName: inp.ServiceName,
  140. })
  141. if err != nil {
  142. continue
  143. }
  144. instanceStatusList = append(instanceStatusList, instanceStatus)
  145. revisionToInstanceStatusMap[revisionID] = instanceStatusList
  146. }
  147. for revisionId, instanceStatusList := range revisionToInstanceStatusMap {
  148. revisionNumber, err := getRevisionNumberFromRevisionId(revisionId, inp.AppRevisions)
  149. if err != nil {
  150. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "revision-id", Value: revisionId})
  151. return revisionStatusList, telemetry.Error(ctx, span, err, "error getting revision number from revision id")
  152. }
  153. // no number for this revision yet, so skip it from reporting
  154. if revisionNumber == 0 {
  155. continue
  156. }
  157. revisionStatus := RevisionStatus{
  158. RevisionID: revisionId,
  159. RevisionNumber: revisionNumber,
  160. InstanceStatusList: instanceStatusList,
  161. }
  162. revisionStatusList = append(revisionStatusList, revisionStatus)
  163. }
  164. return revisionStatusList, nil
  165. }
  166. // InstanceStatusFromPodInput contains all the data necessary to get the status of the primary service container from a pod
  167. type InstanceStatusFromPodInput struct {
  168. Pod v1.Pod
  169. AppName string
  170. ServiceName string
  171. }
  172. // InstanceStatusFromPod gets the status of the primary service container from a pod
  173. func InstanceStatusFromPod(ctx context.Context, inp InstanceStatusFromPodInput) (InstanceStatus, error) {
  174. ctx, span := telemetry.NewSpan(ctx, "instance-status-from-pod")
  175. defer span.End()
  176. telemetry.WithAttributes(span, telemetry.AttributeKV{Key: "pod-name", Value: inp.Pod.Name})
  177. instanceStatus := InstanceStatus{
  178. Name: inp.Pod.Name,
  179. }
  180. // find the container running the app code. Note that this is conditioned on the fact that
  181. // in our worker/web/job charts, there is one container created with this name during the deployment
  182. // there may be other containers (like the sidecar container for jobs), but we only care about the app container for reporting status
  183. appContainerName := fmt.Sprintf("%s-%s", inp.AppName, inp.ServiceName)
  184. var appContainerStatus v1.ContainerStatus
  185. for _, containerStatus := range inp.Pod.Status.ContainerStatuses {
  186. if containerStatus.Name == appContainerName {
  187. appContainerStatus = containerStatus
  188. break
  189. }
  190. }
  191. if appContainerStatus.Name == "" {
  192. return instanceStatus, telemetry.Error(ctx, span, nil, "app container not found")
  193. }
  194. instanceStatus.CreationTimestamp = inp.Pod.CreationTimestamp.Time
  195. instanceStatus.RestartCount = int(appContainerStatus.RestartCount)
  196. switch inp.Pod.Status.Phase {
  197. case v1.PodFailed:
  198. instanceStatus.Status = InstanceStatusDescriptor_Failed
  199. case v1.PodPending:
  200. instanceStatus.Status = InstanceStatusDescriptor_Pending
  201. case v1.PodRunning:
  202. instanceStatus.Status = InstanceStatusDescriptor_Running
  203. case v1.PodSucceeded:
  204. instanceStatus.Status = InstanceStatusDescriptor_Succeeded
  205. case v1.PodUnknown:
  206. instanceStatus.Status = InstanceStatusDescriptor_Unknown
  207. }
  208. if appContainerStatus.State.Waiting != nil && appContainerStatus.State.Waiting.Reason == CrashLoopBackOff {
  209. instanceStatus.Status = InstanceStatusDescriptor_Failed
  210. }
  211. return instanceStatus, nil
  212. }
  213. func getRevisionNumberFromRevisionId(revisionId string, appRevisions []Revision) (int, error) {
  214. for _, revision := range appRevisions {
  215. if revision.ID == revisionId {
  216. return int(revision.RevisionNumber), nil
  217. }
  218. }
  219. return 0, errors.New("revision id not found in app revisions")
  220. }