Explorar el Código

Add GPU support for GCP (#4054)

Co-authored-by: Stefan McShane <stefanmcshane@users.noreply.github.com>
sdess09 hace 2 años
padre
commit
35c49799a7

+ 117 - 44
dashboard/src/components/GCPProvisionerSettings.tsx

@@ -40,6 +40,8 @@ import PreflightChecks from "./PreflightChecks";
 import VerticalSteps from "./porter/VerticalSteps";
 import { useIntercom } from "lib/hooks/useIntercom";
 import { log } from "console";
+import InputSlider from "./porter/InputSlider";
+import Select from "./porter/Select";
 
 
 const locationOptions = [
@@ -78,6 +80,15 @@ const instanceTypes = [
   // { value: "n1-standard-16", label: "n1-standard-16" }, // Maximum of 1 GPU per node until further notice
 ];
 
+const gpuMachineTypeOptions = [
+  { value: "n1-standard-1", label: "n1-standard-1" }, // start of GPU nodes. 
+  { value: "n1-standard-2", label: "n1-standard-2" },
+  { value: "n1-standard-4", label: "n1-standard-4" },
+  { value: "n1-standard-8", label: "n1-standard-8" },
+  { value: "n1-standard-16", label: "n1-standard-16" }
+];
+
+
 const clusterVersionOptions = [{ value: "1.27", label: "v1.27" }];
 
 type Props = RouteComponentProps & {
@@ -85,6 +96,7 @@ type Props = RouteComponentProps & {
   provisionerError?: string;
   credentialId: string;
   clusterId?: number;
+  gpuModal?: boolean;
 };
 
 const VALID_CIDR_RANGE_PATTERN = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\/(8|9|1\d|2[0-8])$/;
@@ -116,6 +128,9 @@ const GCPProvisionerSettings: React.FC<Props> = (props) => {
   const [isLoading, setIsLoading] = useState(false);
   const [isExpanded, setIsExpanded] = useState(false);
   const [preflightError, setPreflightError] = useState<string>("")
+  const [gpuMinInstances, setGpuMinInstances] = useState(1);
+  const [gpuMaxInstances, setGpuMaxInstances] = useState(5);
+  const [gpuInstanceType, setGpuInstanceType] = useState("n1-standard-1");
 
   const { showIntercomWithMessage } = useIntercom();
 
@@ -249,32 +264,40 @@ const GCPProvisionerSettings: React.FC<Props> = (props) => {
     return "";
   }
 
-  const createCluster = async () => {
-
-    const err = validateInputs();
-    if (err !== "") {
-      setErrorMessage(err)
-      setErrorDetails("")
-      return;
+  const createClusterObj = (): Contract => {
+    const nodePools = [
+      new GKENodePool({
+        instanceType: "custom-2-4096",
+        minInstances: 1,
+        maxInstances: 1,
+        nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_MONITORING
+      }),
+      new GKENodePool({
+        instanceType: "custom-2-4096",
+        minInstances: 1,
+        maxInstances: 2,
+        nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_SYSTEM
+      }),
+      new GKENodePool({
+        instanceType: instanceType,
+        minInstances: 1, // TODO: make these customizable before merging
+        maxInstances: 10, // TODO: make these customizable before merging
+        nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_APPLICATION
+      }),
+    ];
+
+    // Conditionally add the last EKSNodeGroup if gpuModal is enabled
+    if (props.gpuModal) {
+      nodePools.push(new GKENodePool({
+        instanceType: gpuInstanceType,
+        minInstances: gpuMinInstances || 0,
+        maxInstances: gpuMaxInstances || 5,
+        nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_CUSTOM,
+      }));
     }
-    setIsLoading(true);
 
-    setIsClicked(true);
-
-
-    try {
-      window.dataLayer?.push({
-        event: 'provision-attempt',
-        data: {
-          cloud: 'gcp',
-          email: user?.email
-        }
-      });
-    } catch (err) {
-      console.log(err);
-    }
 
-    var data = new Contract({
+    const data = new Contract({
       cluster: new Cluster({
         projectId: currentProject.id,
         kind: EnumKubernetesKind.GKE,
@@ -292,32 +315,42 @@ const GCPProvisionerSettings: React.FC<Props> = (props) => {
               podCidr: defaultClusterNetworking.podCidr,
               serviceCidr: defaultClusterNetworking.serviceCidr,
             }),
-            nodePools: [
-              new GKENodePool({
-                instanceType: "custom-2-4096",
-                minInstances: 1,
-                maxInstances: 1,
-                nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_MONITORING
-              }),
-              new GKENodePool({
-                instanceType: "custom-2-4096",
-                minInstances: 1,
-                maxInstances: 2,
-                nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_SYSTEM
-              }),
-              new GKENodePool({
-                instanceType: instanceType,
-                minInstances: 1, // TODO: make these customizable before merging
-                maxInstances: 10, // TODO: make these customizable before merging
-                nodePoolType: GKENodePoolType.GKE_NODE_POOL_TYPE_APPLICATION
-              }),
-
-            ],
+            nodePools
           }),
         },
       }),
     });
 
+    return data
+  }
+
+
+  const createCluster = async () => {
+
+    const err = validateInputs();
+    if (err !== "") {
+      setErrorMessage(err)
+      setErrorDetails("")
+      return;
+    }
+    setIsLoading(true);
+
+    setIsClicked(true);
+
+
+    try {
+      window.dataLayer?.push({
+        event: 'provision-attempt',
+        data: {
+          cloud: 'gcp',
+          email: user?.email
+        }
+      });
+    } catch (err) {
+      console.log(err);
+    }
+
+    const data = createClusterObj();
 
     if (props.clusterId) {
       data["cluster"]["clusterId"] = props.clusterId;
@@ -541,6 +574,46 @@ const GCPProvisionerSettings: React.FC<Props> = (props) => {
       );
     }
 
+    if (props.gpuModal) {
+      return (
+        <>
+          <Select
+            options={gpuMachineTypeOptions}
+            width="350px"
+            disabled={isReadOnly}
+            value={gpuInstanceType}
+            setValue={(x: string) => {
+              setGpuInstanceType(x)
+            }
+            }
+            label="GPU Instance type"
+          />
+          <Spacer y={1} />
+          <InputSlider
+            label="Max Instances: "
+            unit="nodes"
+            min={0}
+            max={5}
+            step={1}
+            width="350px"
+            disabled={isReadOnly || isLoading}
+            value={gpuMaxInstances.toString()}
+            setValue={(x: number) => {
+              setGpuMaxInstances(x)
+            }}
+          />
+          <Button
+            disabled={isDisabled() || isLoading}
+            onClick={createCluster}
+            status={getStatus()}
+          >
+            Provision
+          </Button>
+
+          <Spacer y={.5} />
+        </>
+      )
+    }
     // If settings, update full form
     return (
       <>

+ 34 - 0
dashboard/src/lib/hooks/useClusterResourceLimits.ts

@@ -3,6 +3,7 @@ import {
   Contract,
   LoadBalancerType,
   NodeGroupType,
+  NodePoolType,
 } from "@porter-dev/api-contracts";
 import { useQuery } from "@tanstack/react-query";
 import convert from "convert";
@@ -55,6 +56,30 @@ export type EksKind = {
   };
 };
 
+export type GKEKind = {
+  clusterName: string;
+  clusterVersion: string;
+  region: string;
+  nodePools: NodePools[];
+  user: {
+    id: number;
+  };
+  network: {
+    cidrRange: string;
+    controlPlaneCidr: string;
+    podCidr: string;
+    serviceCidr: string;
+  };
+};
+
+export type NodePools = {
+  instanceType: string;
+  minInstances: number;
+  maxInstances: number;
+  nodePoolType: string;
+  isStateful?: boolean;
+};
+
 const clusterNodesValidator = z
   .object({
     labels: z
@@ -289,6 +314,15 @@ export const useClusterResourceLimits = ({
                 ng.instanceType.includes("g4dn"))
           );
         })
+        .with({ kindValues: { case: "gkeKind" } }, (c) => {
+          return c.kindValues.value.nodePools.some(
+            (ng) =>
+              (ng.nodePoolType === NodePoolType.CUSTOM &&
+                ng.instanceType.includes("n1")) ||
+              (ng.nodePoolType === NodePoolType.APPLICATION &&
+                ng.instanceType.includes("n1"))
+          );
+        })
         .otherwise(() => false);
 
       const loadBalancerType: ClientLoadBalancerType = match(contract)

+ 2 - 1
dashboard/src/main/home/app-dashboard/validate-apply/services-settings/tabs/Resources.tsx

@@ -222,7 +222,7 @@ const Resources: React.FC<ResourcesProps> = ({
         )}
       />
 
-      {currentCluster?.cloud_provider === "AWS" &&
+      {(currentCluster?.cloud_provider === "AWS" || currentCluster?.cloud_provider === "GCP") &&
         currentProject?.gpu_enabled && (
           <>
             <Spacer y={1} />
@@ -288,6 +288,7 @@ const Resources: React.FC<ResourcesProps> = ({
                         setClusterModalVisible(false);
                       }}
                       gpuModal={true}
+                      gcp={currentCluster?.cloud_provider === "GCP"}
                     />
                   )}
                 </>

+ 22 - 7
dashboard/src/main/home/sidebar/ProvisionClusterModal.tsx

@@ -14,16 +14,19 @@ import ClusterRevisionSelector from "../cluster-dashboard/dashboard/ClusterRevis
 import AWSCredentialsList from "./AddCluster/AWSCredentialList";
 import { type InfraCredentials } from "shared/types";
 import { z } from "zod";
+import GCPProvisionerSettings from "components/GCPProvisionerSettings";
 
 type Props = RouteComponentProps & {
   closeModal: () => void;
   gpuModal?: boolean;
+  gcp?: boolean;
 }
 
 
 const ProvisionClusterModal: React.FC<Props> = ({
   closeModal,
   gpuModal,
+  gcp,
 }) => {
   const {
     currentCluster,
@@ -70,13 +73,25 @@ const ProvisionClusterModal: React.FC<Props> = ({
                 gpuModal={true}
               />
 
-              <ProvisionerSettings
-                clusterId={gpuModal ? currentCluster?.id : null}
-                gpuModal={gpuModal}
-                credentialId={currentCluster.cloud_provider_credential_identifier}
-                selectedClusterVersion={selectedClusterVersion}
-                closeModal={closeModal}
-              />
+              {gcp ? (
+                <GCPProvisionerSettings
+                  clusterId={gpuModal ? currentCluster?.id : null}
+                  gpuModal={gpuModal}
+                  credentialId={currentCluster.cloud_provider_credential_identifier}
+                  selectedClusterVersion={selectedClusterVersion}
+                  closeModal={closeModal}
+                />
+              ) : (
+                <ProvisionerSettings
+                  clusterId={gpuModal ? currentCluster?.id : undefined}
+                  gpuModal={gpuModal}
+                  credentialId={currentCluster.cloud_provider_credential_identifier}
+                  selectedClusterVersion={selectedClusterVersion}
+                  closeModal={closeModal}
+                />
+              )}
+
+
             </>
           ) :
             (