소스 검색

Revert "Gpu refactor" (#4029)

sdess09 2 년 전
부모
커밋
fc9a5e6d32

+ 0 - 26
dashboard/src/lib/porter-apps/services.ts

@@ -82,10 +82,6 @@ export const serviceValidator = z.object({
   cpuCores: serviceNumberValidator,
   ramMegabytes: serviceNumberValidator,
   gpuCoresNvidia: serviceNumberValidator,
-  gpu: z.object({
-    enabled: serviceBooleanValidator,
-    gpuCoresNvidia: serviceNumberValidator,
-  }),
   smartOptimization: serviceBooleanValidator.optional(),
   terminationGracePeriodSeconds: serviceNumberValidator.optional(),
   config: z.discriminatedUnion("type", [
@@ -121,10 +117,6 @@ export type SerializedService = {
   ramMegabytes: number;
   smartOptimization?: boolean;
   gpuCoresNvidia: number;
-  gpu: {
-    enabled: boolean;
-    gpuCoresNvidia: number;
-  };
   terminationGracePeriodSeconds?: number;
   config:
     | {
@@ -204,10 +196,6 @@ export function defaultSerialized({
     cpuCores: defaultCPU,
     ramMegabytes: defaultRAM,
     gpuCoresNvidia: 0,
-    gpu: {
-      enabled: false,
-      gpuCoresNvidia: 0,
-    },
     smartOptimization: true,
   };
 
@@ -276,10 +264,6 @@ export function serializeService(service: ClientService): SerializedService {
     ramMegabytes: Math.round(service.ramMegabytes.value), // RAM must be an integer
     smartOptimization: service.smartOptimization?.value,
     gpuCoresNvidia: service.gpuCoresNvidia.value,
-    gpu: {
-      enabled: service.gpu.enabled.value,
-      gpuCoresNvidia: service.gpu.gpuCoresNvidia.value,
-    },
     terminationGracePeriodSeconds: service.terminationGracePeriodSeconds?.value,
     config: match(service.config)
       .with({ type: "web" }, (config) =>
@@ -352,16 +336,6 @@ export function deserializeService({
     instances: ServiceField.number(service.instances, override?.instances),
     port: ServiceField.number(service.port, override?.port),
     cpuCores: ServiceField.number(service.cpuCores, override?.cpuCores),
-    gpu: {
-      enabled: ServiceField.boolean(
-        service.gpu?.enabled,
-        override?.gpu.enabled
-      ),
-      gpuCoresNvidia: ServiceField.number(
-        service.gpu?.gpuCoresNvidia,
-        override?.gpu?.gpuCoresNvidia
-      ),
-    },
     gpuCoresNvidia: ServiceField.number(
       service.gpuCoresNvidia,
       override?.gpuCoresNvidia

+ 2 - 2
dashboard/src/main/home/app-dashboard/validate-apply/services-settings/ServiceContainer.tsx

@@ -154,7 +154,7 @@ const ServiceContainer: React.FC<ServiceProps> = ({
           {service.name.value.trim().length > 0
             ? service.name.value
             : "New Service"}
-          {service.gpu.enabled.value && (
+          {service.gpuCoresNvidia.value > 0 && (
             <>
               <Spacer inline x={1.5} />
               <TagContainer>
@@ -276,7 +276,7 @@ const ServiceHeader = styled.div<{
     border-radius: 20px;
     margin-left: -10px;
     transform: ${(props: { showExpanded?: boolean }) =>
-    props.showExpanded ? "" : "rotate(-90deg)"};
+      props.showExpanded ? "" : "rotate(-90deg)"};
   }
 `;
 

+ 31 - 31
dashboard/src/main/home/app-dashboard/validate-apply/services-settings/tabs/Resources.tsx

@@ -227,7 +227,7 @@ const Resources: React.FC<ResourcesProps> = ({
           <>
             <Spacer y={1} />
             <Controller
-              name={`app.services.${index}.gpu`}
+              name={`app.services.${index}.gpuCoresNvidia`}
               control={control}
               render={({ field: { value, onChange } }) => (
                 <>
@@ -235,20 +235,19 @@ const Resources: React.FC<ResourcesProps> = ({
                     <Switch
                       size="small"
                       color="primary"
-                      checked={value.enabled.value}
+                      checked={value.value > 0}
                       disabled={!clusterContainsGPUNodes}
                       onChange={() => {
-                        onChange({
-                          ...value,
-                          enabled: {
-                            ...value.enabled,
-                            value: !value.enabled.value,
-                          },
-                          gpuCoresNvidia: {
-                            ...value.gpuCoresNvidia,
-                            value: value.enabled.value ? 0 : 1,
-                          }
-                        });
+                        if (value.value > 0) {
+                          onChange({
+                            ...value,
+                            value: 0,
+                          });
+                        } else
+                          onChange({
+                            ...value,
+                            value: 1,
+                          });
                       }}
                       inputProps={{ "aria-label": "controlled" }}
                     />
@@ -265,7 +264,7 @@ const Resources: React.FC<ResourcesProps> = ({
                           You cluster has no GPU nodes available.
                         </Text>
                         <Spacer inline x={0.5} />
-                        {currentCluster.status !== "UPDATING" && <Tag>
+                        <Tag>
                           <Link
                             onClick={() => {
                               setClusterModalVisible(true);
@@ -274,7 +273,7 @@ const Resources: React.FC<ResourcesProps> = ({
                             <TagIcon src={addCircle} />
                             Add GPU nodes
                           </Link>
-                        </Tag>}
+                        </Tag>
                       </>
                     )}
                   </Container>
@@ -291,22 +290,23 @@ const Resources: React.FC<ResourcesProps> = ({
                 </>
               )}
             />
-            {(currentCluster.status === "UPDATING" && !clusterContainsGPUNodes) && (
-              <CheckItemContainer>
-                <CheckItemTop>
-                  <Loading offset="0px" width="20px" height="20px" />
-                  <Spacer inline x={1} />
-                  <Text>{"Cluster is updating..."}</Text>
-                  <Spacer inline x={1} />
-                  <Tag>
-                    <Link to={`/cluster-dashboard`}>
-                      <TagIcon src={infra} />
-                      View Status
-                    </Link>
-                  </Tag>
-                </CheckItemTop>
-              </CheckItemContainer>
-            )}
+            {currentCluster.status === "UPDATING" &&
+              clusterContainsGPUNodes && (
+                <CheckItemContainer>
+                  <CheckItemTop>
+                    <Loading offset="0px" width="20px" height="20px" />
+                    <Spacer inline x={1} />
+                    <Text>{"Creating GPU nodes..."}</Text>
+                    <Spacer inline x={1} />
+                    <Tag>
+                      <Link to={`/cluster-dashboard`}>
+                        <TagIcon src={infra} />
+                        View Status
+                      </Link>
+                    </Tag>
+                  </CheckItemTop>
+                </CheckItemContainer>
+              )}
           </>
         )}
       {match(service.config)

+ 0 - 28
internal/porter_app/test/parse_test.go

@@ -54,10 +54,6 @@ var result_nobuild = &porterv1.PorterApp{
 			Port:         8080,
 			CpuCores:     0.1,
 			RamMegabytes: 256,
-			Gpu: &porterv1.GPU{
-				Enabled:        false,
-				GpuCoresNvidia: 0,
-			},
 			Config: &porterv1.Service_WebConfig{
 				WebConfig: &porterv1.WebServiceConfig{
 					Autoscaling: &porterv1.Autoscaling{
@@ -91,10 +87,6 @@ var result_nobuild = &porterv1.PorterApp{
 			CpuCores:          0.1,
 			RamMegabytes:      256,
 			GpuCoresNvidia:    0,
-			Gpu: &porterv1.GPU{
-				Enabled:        false,
-				GpuCoresNvidia: 0,
-			},
 			Config: &porterv1.Service_WorkerConfig{
 				WorkerConfig: &porterv1.WorkerServiceConfig{
 					Autoscaling: nil,
@@ -108,10 +100,6 @@ var result_nobuild = &porterv1.PorterApp{
 			CpuCores:       0.1,
 			RamMegabytes:   256,
 			GpuCoresNvidia: 0,
-			Gpu: &porterv1.GPU{
-				Enabled:        false,
-				GpuCoresNvidia: 0,
-			},
 			Config: &porterv1.Service_JobConfig{
 				JobConfig: &porterv1.JobServiceConfig{
 					AllowConcurrentOptional: pointer.Bool(true),
@@ -131,10 +119,6 @@ var result_nobuild = &porterv1.PorterApp{
 			CpuCores:       0.1,
 			RamMegabytes:   256,
 			GpuCoresNvidia: 0,
-			Gpu: &porterv1.GPU{
-				Enabled:        false,
-				GpuCoresNvidia: 0,
-			},
 			Config: &porterv1.Service_WebConfig{
 				WebConfig: &porterv1.WebServiceConfig{
 					Autoscaling: &porterv1.Autoscaling{
@@ -168,10 +152,6 @@ var result_nobuild = &porterv1.PorterApp{
 			CpuCores:          0.1,
 			RamMegabytes:      256,
 			GpuCoresNvidia:    0,
-			Gpu: &porterv1.GPU{
-				Enabled:        false,
-				GpuCoresNvidia: 0,
-			},
 			Config: &porterv1.Service_WorkerConfig{
 				WorkerConfig: &porterv1.WorkerServiceConfig{
 					Autoscaling: nil,
@@ -185,10 +165,6 @@ var result_nobuild = &porterv1.PorterApp{
 			CpuCores:       0.1,
 			RamMegabytes:   256,
 			GpuCoresNvidia: 0,
-			Gpu: &porterv1.GPU{
-				Enabled:        false,
-				GpuCoresNvidia: 0,
-			},
 			Config: &porterv1.Service_JobConfig{
 				JobConfig: &porterv1.JobServiceConfig{
 					AllowConcurrentOptional: pointer.Bool(true),
@@ -206,10 +182,6 @@ var result_nobuild = &porterv1.PorterApp{
 		CpuCores:       0,
 		RamMegabytes:   0,
 		GpuCoresNvidia: 0,
-		Gpu: &porterv1.GPU{
-			Enabled:        false,
-			GpuCoresNvidia: 0,
-		},
 		Config:         &porterv1.Service_JobConfig{},
 		Type:           3,
 	},

+ 31 - 34
internal/porter_app/testdata/v1_input_no_build_no_image.yaml

@@ -3,7 +3,6 @@ apps:
   example-job:
     type: job
     run: echo 'hello world'
-    gpu: {}
     config:
       allowConcurrent: true
       resources:
@@ -12,54 +11,52 @@ apps:
           memory: 256Mi
       schedule:
         enabled: true
-        value: "*/10 * * * *"
+        value: '*/10 * * * *'
       paused: true
       cloudsql:
         enabled: false
-        connectionName: ""
-        dbPort: "5432"
-        serviceAccountJSON: ""
+        connectionName: ''
+        dbPort: '5432'
+        serviceAccountJSON: ''
   example-wkr:
     type: worker
     run: "echo 'work'"
-    gpu: {}
     config:
-      replicaCount: "1"
+      replicaCount: '1'
       container:
-        port: "80"
+        port: '80'
       resources:
         requests:
           cpu: 100m
           memory: 256Mi
       autoscaling:
         enabled: false
-        minReplicas: "1"
-        maxReplicas: "10"
-        targetCPUUtilizationPercentage: "50"
-        targetMemoryUtilizationPercentage: "50"
+        minReplicas: '1'
+        maxReplicas: '10'
+        targetCPUUtilizationPercentage: '50'
+        targetMemoryUtilizationPercentage: '50'
       cloudsql:
         enabled: false
-        connectionName: ""
-        dbPort: "5432"
-        serviceAccountJSON: ""
+        connectionName: ''
+        dbPort: '5432'
+        serviceAccountJSON: ''
   example-web:
     type: web
     run: node index.js
-    gpu: {}
     config:
-      replicaCount: "0"
+      replicaCount: '0'
       resources:
         requests:
           cpu: 100m
           memory: 256Mi
       container:
-        port: "8080"
+        port: '8080'
       autoscaling:
         enabled: true
-        minReplicas: "1"
-        maxReplicas: "3"
-        targetCPUUtilizationPercentage: "60"
-        targetMemoryUtilizationPercentage: "60"
+        minReplicas: '1'
+        maxReplicas: '3'
+        targetCPUUtilizationPercentage: '60'
+        targetMemoryUtilizationPercentage: '60'
       ingress:
         enabled: true
         custom_domain: true
@@ -69,30 +66,30 @@ apps:
         porter_hosts: []
         annotations:
       service:
-        port: "8080"
+        port: '8080'
       health:
         startupProbe:
           enabled: false
-          failureThreshold: "3"
+          failureThreshold: '3'
           path: /startupz
-          periodSeconds: "5"
+          periodSeconds: '5'
         readinessProbe:
           enabled: true
-          failureThreshold: "3"
+          failureThreshold: '3'
           path: /healthz
-          initialDelaySeconds: "0"
+          initialDelaySeconds: '0'
         livenessProbe:
           enabled: true
-          failureThreshold: "3"
+          failureThreshold: '3'
           path: /healthz
-          periodSeconds: "5"
+          periodSeconds: '5'
       cloudsql:
         enabled: false
-        connectionName: ""
-        dbPort: "5432"
-        serviceAccountJSON: ""
+        connectionName: ''
+        dbPort: '5432'
+        serviceAccountJSON: ''
 release:
   run: ls
 env:
-  PORT: "8080"
-  NODE_ENV: "production"
+  PORT: '8080'
+  NODE_ENV: 'production'

+ 1 - 17
internal/porter_app/testdata/v2_input_no_build_no_env.yaml

@@ -10,10 +10,6 @@ services:
     port: 8080
     cpuCores: 0.1
     ramMegabytes: 256
-    gpu: {
-      enabled:        false,
-			gpuCoresNvidia: 0,
-    }
     autoscaling:
       enabled: true
       minInstances: 1
@@ -33,27 +29,15 @@ services:
     cpuCores: 0.1
     ramMegabytes: 256
     instances: 1
-    gpu: {
-      enabled:        false,
-			gpuCoresNvidia: 0,
-    }
   - name: example-job
     type: job
     run: echo 'hello world'
     allowConcurrent: true
     cpuCores: 0.1
     ramMegabytes: 256
-    cron: "*/10 * * * *"
+    cron: '*/10 * * * *'
     timeoutSeconds: 60
     suspendCron: false
-    gpu: {
-      enabled:        false,
-			gpuCoresNvidia: 0,
-    }
 predeploy:
   type: job
   run: ls
-  gpu: {
-      enabled:        false,
-			gpuCoresNvidia: 0,
-  }

+ 2 - 6
internal/porter_app/testdata/v2_input_nobuild.yaml

@@ -1,5 +1,5 @@
 version: v2
-name: "test-app"
+name: 'test-app'
 image:
   repository: nginx
   tag: latest
@@ -10,7 +10,6 @@ services:
     port: 8080
     cpuCores: 0.1
     ramMegabytes: 256
-    gpu: {}
     autoscaling:
       enabled: true
       minInstances: 1
@@ -29,7 +28,6 @@ services:
     port: 80
     cpuCores: 0.1
     ramMegabytes: 256
-    gpu: {}
     instances: 1
   - name: example-job
     type: job
@@ -37,14 +35,12 @@ services:
     allowConcurrent: true
     cpuCores: 0.1
     ramMegabytes: 256
-    gpu: {}
-    cron: "*/10 * * * *"
+    cron: '*/10 * * * *'
     timeoutSeconds: 60
     suspendCron: false
 predeploy:
   type: job
   run: ls
-  gpu: {}
 env:
   PORT: 8080
   NODE_ENV: production

+ 8 - 27
internal/porter_app/v2/yaml.go

@@ -169,7 +169,6 @@ type Service struct {
 	CpuCores                      float32           `yaml:"cpuCores,omitempty"`
 	RamMegabytes                  int               `yaml:"ramMegabytes,omitempty"`
 	GpuCoresNvidia                float32           `yaml:"gpuCoresNvidia,omitempty"`
-	GPU                           *GPU              `yaml:"gpu,omitempty"`
 	SmartOptimization             *bool             `yaml:"smartOptimization,omitempty"`
 	TerminationGracePeriodSeconds *int32            `yaml:"terminationGracePeriodSeconds,omitempty"`
 	Port                          int               `yaml:"port,omitempty"`
@@ -194,12 +193,6 @@ type AutoScaling struct {
 	MemoryThresholdPercent int  `yaml:"memoryThresholdPercent"`
 }
 
-// GPU represents GPU settings for a service
-type GPU struct {
-	Enabled        bool `yaml:"enabled"`
-	GpuCoresNvidia int  `yaml:"gpuCoresNvidia"`
-}
-
 // Domains are the custom domains for a web service
 type Domains struct {
 	Name string `yaml:"name"`
@@ -341,14 +334,6 @@ func serviceProtoFromConfig(service Service, serviceType porterv1.ServiceType) (
 		TerminationGracePeriodSeconds: service.TerminationGracePeriodSeconds,
 	}
 
-	if service.GPU != nil {
-		gpu := &porterv1.GPU{
-			Enabled:        service.GPU.Enabled,
-			GpuCoresNvidia: int32(service.GPU.GpuCoresNvidia),
-		}
-
-		serviceProto.Gpu = gpu
-	}
 	switch serviceType {
 	default:
 		return nil, fmt.Errorf("invalid service type '%s'", serviceType)
@@ -495,18 +480,14 @@ func AppFromProto(appProto *porterv1.PorterApp) (PorterApp, error) {
 
 func appServiceFromProto(service *porterv1.Service) (Service, error) {
 	appService := Service{
-		Name:              service.Name,
-		Run:               service.RunOptional,
-		Instances:         service.InstancesOptional,
-		CpuCores:          service.CpuCores,
-		RamMegabytes:      int(service.RamMegabytes),
-		GpuCoresNvidia:    service.GpuCoresNvidia, // nolint:staticcheck // https://linear.app/porter/issue/POR-2137/support-new-gpu-field-in-porteryaml
-		Port:              int(service.Port),
-		SmartOptimization: service.SmartOptimization,
-		GPU: &GPU{
-			Enabled:        service.Gpu.Enabled,
-			GpuCoresNvidia: int(service.Gpu.GpuCoresNvidia),
-		},
+		Name:                          service.Name,
+		Run:                           service.RunOptional,
+		Instances:                     service.InstancesOptional,
+		CpuCores:                      service.CpuCores,
+		RamMegabytes:                  int(service.RamMegabytes),
+		GpuCoresNvidia:                service.GpuCoresNvidia, // nolint:staticcheck // https://linear.app/porter/issue/POR-2137/support-new-gpu-field-in-porteryaml
+		Port:                          int(service.Port),
+		SmartOptimization:             service.SmartOptimization,
 		TerminationGracePeriodSeconds: service.TerminationGracePeriodSeconds,
 	}