Sfoglia il codice sorgente

Merge pull request #2297 from porter-dev/staging

Provisioner improvements + hotfixes -> production
abelanger5 3 anni fa
parent
commit
d2c7153dee

+ 10 - 39
api/server/handlers/environment/finalize_deployment.go

@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"net/http"
-	"net/url"
 	"strings"
 
 	"github.com/google/go-github/v41/github"
@@ -124,48 +123,20 @@ func (c *FinalizeDeploymentHandler) ServeHTTP(w http.ResponseWriter, r *http.Req
 		return
 	}
 
-	workflowRun, err := commonutils.GetLatestWorkflowRun(client, depl.RepoOwner, depl.RepoName,
-		fmt.Sprintf("porter_%s_env.yml", env.Name), depl.PRBranchFrom)
-
-	if err != nil {
-		c.HandleAPIError(w, r, apierrors.NewErrInternal(err))
-		return
-	}
+	commentBody := "## Porter Preview Environments\n"
 
 	if depl.Subdomain == "" {
-		depl.Subdomain = "*Ingress is disabled for this deployment*"
+		commentBody += fmt.Sprintf(
+			"✅ The latest SHA ([`%s`](https://github.com/%s/%s/commit/%s)) has been successfully deployed.",
+			depl.CommitSHA, depl.RepoOwner, depl.RepoName, depl.CommitSHA,
+		)
+	} else {
+		commentBody += fmt.Sprintf(
+			"✅ The latest SHA ([`%s`](https://github.com/%s/%s/commit/%s)) has been successfully deployed to %s",
+			depl.CommitSHA, depl.RepoOwner, depl.RepoName, depl.CommitSHA, depl.Subdomain,
+		)
 	}
 
-	// write comment in PR
-	commentBody := fmt.Sprintf(
-		"## Porter Preview Environments\n"+
-			"✅ All changes deployed successfully\n"+
-			"||Deployment Information|\n"+
-			"|-|-|\n"+
-			"| Latest SHA | [`%s`](https://github.com/%s/%s/commit/%s) |\n"+
-			"| Live URL | %s |\n"+
-			"| Build Logs | %s |\n"+
-			"| Porter Deployments URL | %s/preview-environments/details/%s?environment_id=%d&project_id=%d&cluster=%s |",
-		depl.CommitSHA, depl.RepoOwner, depl.RepoName, depl.CommitSHA, depl.Subdomain, workflowRun.GetHTMLURL(),
-		c.Config().ServerConf.ServerURL, depl.Namespace, depl.EnvironmentID, project.ID, url.QueryEscape(cluster.Name),
-	)
-
-	// if len(request.SuccessfulResources) > 0 {
-	// 	commentBody += "\n#### Successfully deployed resources\n"
-
-	// 	for _, res := range request.SuccessfulResources {
-	// 		if res.ReleaseType == "job" {
-	// 			commentBody += fmt.Sprintf("- [`%s`](%s/jobs/%s/%s/%s?project_id=%d)\n",
-	// 				res.ReleaseName, c.Config().ServerConf.ServerURL, cluster.Name, depl.Namespace,
-	// 				res.ReleaseName, project.ID)
-	// 		} else {
-	// 			commentBody += fmt.Sprintf("- [`%s`](%s/applications/%s/%s/%s?project_id=%d)\n",
-	// 				res.ReleaseName, c.Config().ServerConf.ServerURL, cluster.Name, depl.Namespace,
-	// 				res.ReleaseName, project.ID)
-	// 		}
-	// 	}
-	// }
-
 	err = createOrUpdateComment(client, c.Repo(), env.NewCommentsDisabled, depl, github.String(commentBody))
 
 	if err != nil {

+ 17 - 0
api/server/handlers/infra/forms.go

@@ -599,6 +599,14 @@ tabs:
       variable: additional_private_subnets
       settings:
         default: false
+  - name: subnet_multiplicity
+    show_if: additional_private_subnets
+    contents:
+    - type: number-input
+      label: "Multiplicity of the subnet within each AZ."
+      variable: additional_private_subnets_multiplicity
+      settings:
+        default: 3
   - name: nginx_settings
     contents:
     - type: heading
@@ -608,6 +616,15 @@ tabs:
       label: Disable NGINX load balancer and expose NGINX only on a cluster IP address.
       settings:
         default: false
+  - name: prometheus_settings
+    contents:
+    - type: heading
+      label: Prometheus Settings
+    - type: checkbox
+      variable: additional_prometheus_node_group
+      label: Add an additional prometheus node group to ensure monitoring stability.
+      settings:
+        default: false
 `
 
 const gcrForm = `name: GCR

+ 23 - 0
api/server/handlers/release/upgrade.go

@@ -118,6 +118,29 @@ func (c *UpgradeReleaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request
 		conf.Chart = chart
 	}
 
+	// if LatestRevision is set, check that the revision matches the latest revision in the database
+	if request.LatestRevision != 0 {
+		currHelmRelease, err := helmAgent.GetRelease(helmRelease.Name, 0, false)
+
+		if err != nil {
+			c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(
+				fmt.Errorf("could not retrieve latest revision"),
+				http.StatusBadRequest,
+			))
+
+			return
+		}
+
+		if currHelmRelease.Version != int(request.LatestRevision) {
+			c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(
+				fmt.Errorf("The provided revision is not up to date with the current revision (you may need to refresh the deployment). Provided revision is %d, latest revision is %d. If you would like to deploy from this revision, please revert first and update the configuration.", request.LatestRevision, currHelmRelease.Version),
+				http.StatusBadRequest,
+			))
+
+			return
+		}
+	}
+
 	newHelmRelease, upgradeErr := helmAgent.UpgradeRelease(conf, request.Values, c.Config().DOConf)
 
 	if upgradeErr == nil && newHelmRelease != nil {

+ 23 - 0
api/server/handlers/v1/release/upgrade.go

@@ -120,6 +120,29 @@ func (c *UpgradeReleaseHandler) ServeHTTP(w http.ResponseWriter, r *http.Request
 
 	conf.Values = request.Values
 
+	// if LatestRevision is set, check that the revision matches the latest revision in the database
+	if request.LatestRevision != 0 {
+		currHelmRelease, err := helmAgent.GetRelease(helmRelease.Name, 0, false)
+
+		if err != nil {
+			c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(
+				fmt.Errorf("could not retrieve latest revision"),
+				http.StatusBadRequest,
+			))
+
+			return
+		}
+
+		if currHelmRelease.Version != int(request.LatestRevision) {
+			c.HandleAPIError(w, r, apierrors.NewErrPassThroughToClient(
+				fmt.Errorf("The provided revision is not up to date with the current revision (you may need to refresh the deployment). Provided revision is %d, latest revision is %d. If you would like to deploy from this revision, please revert first and update the configuration.", request.LatestRevision, currHelmRelease.Version),
+				http.StatusBadRequest,
+			))
+
+			return
+		}
+	}
+
 	newHelmRelease, upgradeErr := helmAgent.UpgradeReleaseByValues(conf, c.Config().DOConf)
 
 	if upgradeErr == nil && newHelmRelease != nil {

+ 8 - 0
api/types/release.go

@@ -110,11 +110,19 @@ type V1UpgradeReleaseRequest struct {
 
 	// The Porter charts version to upgrade the release with
 	ChartVersion string `json:"version"`
+
+	// (optional) if set, the backend will validate that the user was upgrading from the revision specified by
+	// LatestRevision, and there hasn't been an upgrade in the meantime.
+	LatestRevision uint `json:"latest_revision"`
 }
 
 type UpgradeReleaseRequest struct {
 	Values       string `json:"values" form:"required"`
 	ChartVersion string `json:"version"`
+
+	// (optional) if set, the backend will validate that the user was upgrading from the revision specified by
+	// LatestRevision, and there hasn't been an upgrade in the meantime.
+	LatestRevision uint `json:"latest_revision"`
 }
 
 type UpdateImageBatchRequest struct {

+ 4 - 1
dashboard/src/main/home/cluster-dashboard/chart/ChartList.tsx

@@ -140,7 +140,10 @@ const ChartList: React.FunctionComponent<Props> = ({
                 return chart;
               });
             case "DELETE":
-              return tmpCharts.filter((chart) => !isSameChart(chart));
+              const chartToDelete = tmpCharts.find(isSameChart);
+              if (chartToDelete.version === newChart.version) {
+                return tmpCharts.filter((chart) => !isSameChart(chart));
+              }
             default:
               return tmpCharts;
           }

+ 4 - 0
dashboard/src/main/home/cluster-dashboard/expanded-chart/ExpandedChart.tsx

@@ -312,6 +312,9 @@ const ExpandedChart: React.FC<Props> = (props) => {
         "<token>",
         {
           values: valuesYaml,
+          // this is triggered from the Porter form, so we set the latest revision to ensure that the release is
+          // up to date
+          latest_revision: currentChart.version,
         },
         {
           id: currentProject.id,
@@ -369,6 +372,7 @@ const ExpandedChart: React.FC<Props> = (props) => {
           {
             values: valuesYaml,
             version: version,
+            latest_revision: currentChart.version,
           },
           {
             id: currentProject.id,

+ 1 - 0
dashboard/src/main/home/cluster-dashboard/expanded-chart/SettingsSection.tsx

@@ -116,6 +116,7 @@ const SettingsSection: React.FC<PropsType> = ({
         "<token>",
         {
           values: conf,
+          latest_revision: currentChart?.version,
         },
         {
           id: currentProject.id,

+ 1 - 0
dashboard/src/main/home/cluster-dashboard/expanded-chart/ValuesYaml.tsx

@@ -64,6 +64,7 @@ export default class ValuesYaml extends Component<PropsType, StateType> {
         "<token>",
         {
           values: valuesString,
+          latest_revision: this.props.currentChart.version,
         },
         {
           id: currentProject.id,

+ 1 - 0
dashboard/src/main/home/cluster-dashboard/expanded-chart/build-settings/BuildSettingsTab.tsx

@@ -122,6 +122,7 @@ const BuildSettingsTab: React.FC<Props> = ({
         "<token>",
         {
           values: valuesYaml,
+          latest_revision: chart.version,
         },
         {
           id: currentProject.id,

+ 1 - 0
dashboard/src/main/home/cluster-dashboard/expanded-chart/jobs/useJobs.ts

@@ -354,6 +354,7 @@ export const useJobs = (chart: ChartType) => {
         "<token>",
         {
           values: yamlValues,
+          latest_revision: chart.version,
         },
         {
           id: currentProject.id,

+ 2 - 0
dashboard/src/shared/hooks/useChart.ts

@@ -67,6 +67,7 @@ export const useChart = (oldChart: ChartType, closeChart: () => void) => {
         {
           values: valuesYaml,
           version: chart.latest_version,
+          latest_revision: chart.version,
         },
         {
           id: currentProject.id,
@@ -237,6 +238,7 @@ export const useChart = (oldChart: ChartType, closeChart: () => void) => {
         "<token>",
         {
           values,
+          latest_revision: chart.version,
         },
         {
           id: currentProject.id,

+ 5 - 5
internal/kubernetes/prometheus/metrics.go

@@ -145,15 +145,15 @@ func QueryPrometheus(
 		netPodSelector := fmt.Sprintf(`namespace="%s",pod=~"%s",container="POD"`, opts.Namespace, selectionRegex)
 		query = fmt.Sprintf("rate(container_network_receive_bytes_total{%s}[5m])", netPodSelector)
 	} else if opts.Metric == "nginx:errors" {
-		num := fmt.Sprintf(`sum(rate(nginx_ingress_controller_requests{status=~"5.*",namespace="%s",ingress=~"%s"}[5m]) OR on() vector(0))`, opts.Namespace, selectionRegex)
-		denom := fmt.Sprintf(`sum(rate(nginx_ingress_controller_requests{namespace="%s",ingress=~"%s"}[5m]) > 0)`, opts.Namespace, selectionRegex)
+		num := fmt.Sprintf(`sum(rate(nginx_ingress_controller_requests{status=~"5.*",exported_namespace="%s",ingress=~"%s"}[5m]) OR on() vector(0))`, opts.Namespace, selectionRegex)
+		denom := fmt.Sprintf(`sum(rate(nginx_ingress_controller_requests{exported_namespace="%s",ingress=~"%s"}[5m]) > 0)`, opts.Namespace, selectionRegex)
 		query = fmt.Sprintf(`%s / %s * 100 OR on() vector(0)`, num, denom)
 	} else if opts.Metric == "nginx:latency" {
-		num := fmt.Sprintf(`sum(rate(nginx_ingress_controller_request_duration_seconds_sum{namespace=~"%s",ingress=~"%s"}[5m]) OR on() vector(0))`, opts.Namespace, selectionRegex)
-		denom := fmt.Sprintf(`sum(rate(nginx_ingress_controller_request_duration_seconds_count{namespace=~"%s",ingress=~"%s"}[5m]))`, opts.Namespace, selectionRegex)
+		num := fmt.Sprintf(`sum(rate(nginx_ingress_controller_request_duration_seconds_sum{exported_namespace=~"%s",ingress=~"%s"}[5m]) OR on() vector(0))`, opts.Namespace, selectionRegex)
+		denom := fmt.Sprintf(`sum(rate(nginx_ingress_controller_request_duration_seconds_count{exported_namespace=~"%s",ingress=~"%s"}[5m]))`, opts.Namespace, selectionRegex)
 		query = fmt.Sprintf(`%s / %s OR on() vector(0)`, num, denom)
 	} else if opts.Metric == "nginx:latency-histogram" {
-		query = fmt.Sprintf(`histogram_quantile(%f, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{status!="404",status!="500",namespace=~"%s",ingress=~"%s"}[5m])) by (le, ingress))`, opts.Percentile, opts.Namespace, selectionRegex)
+		query = fmt.Sprintf(`histogram_quantile(%f, sum(rate(nginx_ingress_controller_request_duration_seconds_bucket{status!="404",status!="500",exported_namespace=~"%s",ingress=~"%s"}[5m])) by (le, ingress))`, opts.Percentile, opts.Namespace, selectionRegex)
 	} else if opts.Metric == "cpu_hpa_threshold" {
 		// get the name of the kube hpa metric
 		metricName, hpaMetricName := getKubeHPAMetricName(clientset, service, opts, "spec_target_metric")

+ 6 - 1
provisioner/server/handlers/state/create_resource.go

@@ -277,7 +277,12 @@ func createCluster(config *config.Config, infra *models.Infra, operation *models
 		}
 	}
 
-	cluster.Name = output["cluster_name"].(string)
+	// only update the cluster name if this is during creation - we don't want to overwrite the cluster name
+	// which may have been manually set
+	if isNotFound {
+		cluster.Name = output["cluster_name"].(string)
+	}
+
 	cluster.Server = output["cluster_endpoint"].(string)
 	cluster.CertificateAuthorityData = caData
 

+ 4 - 3
workers/jobs/helm_revisions_count_tracker.go

@@ -31,6 +31,7 @@ import (
 	"github.com/porter-dev/porter/api/types"
 	"github.com/porter-dev/porter/pkg/logger"
 	"github.com/porter-dev/porter/provisioner/integrations/storage/s3"
+	"github.com/porter-dev/porter/workers/utils"
 
 	"github.com/porter-dev/porter/ee/integrations/vault"
 	"github.com/porter-dev/porter/internal/helm"
@@ -191,13 +192,13 @@ func (t *helmRevisionsCountTracker) Run() error {
 				log.Printf("fetched %d namespaces for cluster ID %d", len(namespaces.Items), cluster.ID)
 
 				for _, ns := range namespaces.Items {
-					agent, err := helm.GetAgentOutOfClusterConfig(&helm.Form{
+					agent, err := utils.NewRetryHelmAgent(&helm.Form{
 						Cluster:                   cluster,
 						Namespace:                 ns.Name,
 						Repo:                      t.repo,
 						DigitalOceanOAuth:         t.doConf,
 						AllowInClusterConnections: false,
-					}, logger.New(true, os.Stdout))
+					}, logger.New(true, os.Stdout), 3, time.Second)
 
 					if err != nil {
 						log.Printf("error fetching helm client for namespace %s in cluster ID %d: %v. "+
@@ -219,7 +220,7 @@ func (t *helmRevisionsCountTracker) Run() error {
 
 					if err != nil {
 						log.Printf("error fetching releases for namespace %s in cluster ID %d: %v. skipping namespace ...",
-							len(releases), ns.Name, cluster.ID, err)
+							ns.Name, cluster.ID, err)
 						continue
 					}
 

+ 118 - 0
workers/utils/retry_helm_agent.go

@@ -0,0 +1,118 @@
+//go:build ee
+
+package utils
+
+import (
+	"fmt"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/porter-dev/porter/api/types"
+	"github.com/porter-dev/porter/internal/helm"
+	"github.com/porter-dev/porter/pkg/logger"
+	"helm.sh/helm/v3/pkg/release"
+)
+
+type RetryHelmAgent struct {
+	form          *helm.Form
+	l             *logger.Logger
+	agent         *helm.Agent
+	retryCount    uint
+	retryInterval time.Duration
+}
+
+func NewRetryHelmAgent(
+	form *helm.Form,
+	l *logger.Logger,
+	retryCount uint,
+	retryInterval time.Duration,
+) (*RetryHelmAgent, error) {
+	if l == nil {
+		l = logger.New(true, os.Stdout)
+	}
+
+	helmAgent, err := helm.GetAgentOutOfClusterConfig(form, l)
+
+	if err != nil {
+		return nil, err
+	}
+
+	return &RetryHelmAgent{
+		form, l, helmAgent, retryCount, retryInterval,
+	}, nil
+}
+
+func (a *RetryHelmAgent) ListReleases(
+	namespace string,
+	filter *types.ReleaseListFilter,
+) ([]*release.Release, error) {
+	for i := uint(0); i < a.retryCount; i++ {
+		releases, err := a.agent.ListReleases(namespace, filter)
+
+		if err == nil {
+			return releases, nil
+		} else if strings.Contains(err.Error(), "Unauthorized") {
+			a.agent, err = helm.GetAgentOutOfClusterConfig(a.form, a.l)
+
+			if err != nil {
+				return nil, fmt.Errorf("error recreating helm agent for retrying ListReleases: %w", err)
+			}
+		} else {
+			return nil, err
+		}
+
+		time.Sleep(a.retryInterval)
+	}
+
+	return nil, fmt.Errorf("maxiumum number of retries (%d) reached for ListReleases", a.retryCount)
+}
+
+func (a *RetryHelmAgent) GetReleaseHistory(
+	name string,
+) ([]*release.Release, error) {
+	for i := uint(0); i < a.retryCount; i++ {
+		releases, err := a.agent.GetReleaseHistory(name)
+
+		if err == nil {
+			return releases, nil
+		} else if strings.Contains(err.Error(), "Unauthorized") {
+			a.agent, err = helm.GetAgentOutOfClusterConfig(a.form, a.l)
+
+			if err != nil {
+				return nil, fmt.Errorf("error recreating helm agent for retrying GetReleaseHistory: %w", err)
+			}
+		} else {
+			return nil, err
+		}
+
+		time.Sleep(a.retryInterval)
+	}
+
+	return nil, fmt.Errorf("maxiumum number of retries (%d) reached for GetReleaseHistory", a.retryCount)
+}
+
+func (a *RetryHelmAgent) DeleteReleaseRevision(
+	name string,
+	version int,
+) error {
+	for i := uint(0); i < a.retryCount; i++ {
+		err := a.agent.DeleteReleaseRevision(name, version)
+
+		if err == nil {
+			return nil
+		} else if strings.Contains(err.Error(), "Unauthorized") {
+			a.agent, err = helm.GetAgentOutOfClusterConfig(a.form, a.l)
+
+			if err != nil {
+				return fmt.Errorf("error recreating helm agent for retrying DeleteReleaseRevision: %w", err)
+			}
+		} else {
+			return err
+		}
+
+		time.Sleep(a.retryInterval)
+	}
+
+	return fmt.Errorf("maxiumum number of retries (%d) reached for DeleteReleaseRevision", a.retryCount)
+}