Просмотр исходного кода

Merge branch 'belanger/fix-job-metrics-query' into dev

Alexander Belanger 3 лет назад
Родитель
Сommit
37e2c78166

+ 20 - 0
api/server/handlers/infra/forms.go

@@ -408,6 +408,16 @@ tabs:
           value: c6i.xlarge
         - label: c6i.2xlarge
           value: c6i.2xlarge
+        - label: c6i.4xlarge
+          value: c6i.4xlarge
+        - label: m6i.large
+          value: m6i.large
+        - label: m6i.xlarge
+          value: m6i.xlarge
+        - label: m6i.2xlarge
+          value: m6i.2xlarge
+        - label: m6i.4xlarge
+          value: m6i.4xlarge
         - label: r5.large
           value: r5.large
         - value: r5.xlarge
@@ -506,6 +516,16 @@ tabs:
           value: c6i.xlarge
         - label: c6i.2xlarge
           value: c6i.2xlarge
+        - label: c6i.4xlarge
+          value: c6i.4xlarge
+        - label: m6i.large
+          value: m6i.large
+        - label: m6i.xlarge
+          value: m6i.xlarge
+        - label: m6i.2xlarge
+          value: m6i.2xlarge
+        - label: m6i.4xlarge
+          value: m6i.4xlarge
     - type: number-input
       label: Minimum number of EC2 instances to create in the application autoscaling group.
       variable: additional_nodegroup_min_instances

+ 0 - 2
dashboard/src/main/home/cluster-dashboard/expanded-chart/deploy-status-section/ControllerTab.tsx

@@ -166,8 +166,6 @@ const ControllerTabFC: React.FunctionComponent<Props> = ({
     rawList?: any[],
     userSelected?: boolean
   ) => {
-    console.log(rawPodList, rawList, !!userSelected);
-
     const rawPod = [...rawPodList, ...(rawList || [])].find(
       (rawPod) => rawPod?.metadata?.name === pod?.name
     );

+ 39 - 31
dashboard/src/main/home/cluster-dashboard/expanded-chart/logs-section/LogsSection.tsx

@@ -126,7 +126,7 @@ const LogsSection: React.FC<Props> = ({
 
     setTimeout(() => {
       setNotification(undefined);
-    }, 3000);
+    }, 5000);
   };
 
   const { loading, logs, refresh, moveCursor, paginationInfo } = useLogs(
@@ -284,22 +284,23 @@ const LogsSection: React.FC<Props> = ({
             )}
           </Flex>
         </FlexRow>
-        <StyledLogsSection isFullscreen={isFullscreen}>
-          {loading || !logs.length ? (
-            <Loading message="Waiting for logs..." />
-          ) : (
-            <>
-              <LoadMoreButton
-                active={
-                  logs.length !== 0 && paginationInfo.previousCursor !== null
-                }
-                role="button"
-                onClick={onLoadPrevious}
-              >
-                Load Previous
-              </LoadMoreButton>
-              {renderLogs()}
-              {/* <Message>
+        <LogsSectionWrapper>
+          <StyledLogsSection isFullscreen={isFullscreen}>
+            {loading || !logs.length ? (
+              <Loading message="Waiting for logs..." />
+            ) : (
+              <>
+                <LoadMoreButton
+                  active={
+                    logs.length !== 0 && paginationInfo.previousCursor !== null
+                  }
+                  role="button"
+                  onClick={onLoadPrevious}
+                >
+                  Load Previous
+                </LoadMoreButton>
+                {renderLogs()}
+                {/* <Message>
             
             No matching logs found.
             <Highlight onClick={() => {}}>
@@ -307,20 +308,24 @@ const LogsSection: React.FC<Props> = ({
               Refresh
             </Highlight>
           </Message> */}
-              <LoadMoreButton
-                active={selectedDate && logs.length !== 0}
-                role="button"
-                onClick={() => moveCursor(Direction.forward)}
-              >
-                Load more
-              </LoadMoreButton>
-            </>
-          )}
-          <div ref={scrollToBottomRef} />
-          <NotificationWrapper active={!!notification}>
+                <LoadMoreButton
+                  active={selectedDate && logs.length !== 0}
+                  role="button"
+                  onClick={() => moveCursor(Direction.forward)}
+                >
+                  Load more
+                </LoadMoreButton>
+              </>
+            )}
+            <div ref={scrollToBottomRef} />
+          </StyledLogsSection>
+          <NotificationWrapper
+            key={JSON.stringify(logs)}
+            active={!!notification}
+          >
             <Banner>{notification}</Banner>
           </NotificationWrapper>
-        </StyledLogsSection>
+        </LogsSectionWrapper>
       </>
     );
   };
@@ -668,9 +673,8 @@ const NotificationWrapper = styled.div<{ active?: boolean }>`
   left: 50%;
   transform: translateX(-50%);
   width: fit-content;
-  padding-inline: 10px;
   background: #101420;
-  animation: bounceIn 0.3s ease-out;
+  z-index: 9999;
 
   @keyframes bounceIn {
     0% {
@@ -683,3 +687,7 @@ const NotificationWrapper = styled.div<{ active?: boolean }>`
     }
   }
 `;
+
+const LogsSectionWrapper = styled.div`
+  position: relative;
+`;

+ 15 - 4
dashboard/src/main/home/cluster-dashboard/expanded-chart/logs-section/useAgentLogs.ts

@@ -288,6 +288,12 @@ export const useLogs = (
 
     updateLogs(initialLogs);
 
+    if (!isLive && !initialLogs.length) {
+      notify(
+        "You have no logs for this time period. Try with a different time range."
+      );
+    }
+
     closeWebsocket(websocketKey);
 
     setLoading(false);
@@ -312,10 +318,15 @@ export const useLogs = (
         Direction.backward
       );
 
-      updateLogs(
-        paginationInfo.previousCursor ? newLogs.slice(0, -1) : newLogs,
-        direction
-      );
+      const logsToUpdate = paginationInfo.previousCursor
+        ? newLogs.slice(0, -1)
+        : newLogs;
+
+      updateLogs(logsToUpdate, direction);
+
+      if (!logsToUpdate.length) {
+        notify("You have reached the beginning of the logs");
+      }
 
       setPaginationInfo((paginationInfo) => ({
         ...paginationInfo,

+ 19 - 2
dashboard/src/main/home/cluster-dashboard/expanded-chart/metrics/JobMetricsSection.tsx

@@ -97,6 +97,23 @@ const JobMetricsSection: React.FunctionComponent<PropsType> = ({
       });
   }, [currentChart, currentCluster, currentProject]);
 
+  // prometheus has a limit of 11,000 data points to return per metric. we thus ensure that
+  // the resolution will not exceed 11,000 data points.
+  //
+  // This breaks down if the job runs for over 6 years.
+  const getJobResolution = (start: number, end: number) => {
+    let duration = end - start;
+    if (duration <= 3600) {
+      return "1s";
+    } else if (duration <= 54000) {
+      return "15s";
+    } else if (duration <= 216000) {
+      return "60s";
+    }
+
+    return "5h";
+  };
+
   const getAutoscalingThreshold = async (
     metricType: "cpu_hpa_threshold" | "memory_hpa_threshold",
     shouldsum: boolean,
@@ -117,7 +134,7 @@ const JobMetricsSection: React.FunctionComponent<PropsType> = ({
           namespace: namespace,
           startrange: start,
           endrange: end,
-          resolution: resolutions[selectedRange],
+          resolution: getJobResolution(start, end),
           pods: [],
         },
         {
@@ -168,7 +185,7 @@ const JobMetricsSection: React.FunctionComponent<PropsType> = ({
           namespace: namespace,
           startrange: start,
           endrange: end,
-          resolution: resolutions[selectedRange],
+          resolution: getJobResolution(start, end),
           // pods: podNames,
         },
         {

+ 32 - 25
internal/opa/opa.go

@@ -98,35 +98,42 @@ func (runner *KubernetesOPARunner) GetRecommendations(categories []string) ([]*O
 
 	res := make([]*OPARecommenderQueryResult, 0)
 
-	for _, name := range collectionNames {
-		// look up to determine if the name is registered
-		queryCollection, exists := runner.Policies[name]
+	// ping the cluster with a version check to make sure it's reachable - if not, return an error
+	_, err := runner.k8sAgent.Clientset.Discovery().ServerVersion()
 
-		if !exists {
-			return nil, fmt.Errorf("No policies for %s found", name)
-		}
+	if err != nil {
+		fmt.Printf("discovery check failed: %v\n", err.Error())
+	} else {
+		for _, name := range collectionNames {
+			// look up to determine if the name is registered
+			queryCollection, exists := runner.Policies[name]
 
-		var currResults []*OPARecommenderQueryResult
-		var err error
-
-		switch queryCollection.Kind {
-		case HelmRelease:
-			currResults, err = runner.runHelmReleaseQueries(name, queryCollection)
-		case Pod:
-			currResults, err = runner.runPodQueries(name, queryCollection)
-		case CRDList:
-			currResults, err = runner.runCRDListQueries(name, queryCollection)
-		default:
-			fmt.Printf("%s is not a supported query kind", queryCollection.Kind)
-			continue
-		}
+			if !exists {
+				return nil, fmt.Errorf("No policies for %s found", name)
+			}
 
-		if err != nil {
-			fmt.Printf("%s", err.Error())
-			continue
-		}
+			var currResults []*OPARecommenderQueryResult
+			var err error
+
+			switch queryCollection.Kind {
+			case HelmRelease:
+				currResults, err = runner.runHelmReleaseQueries(name, queryCollection)
+			case Pod:
+				currResults, err = runner.runPodQueries(name, queryCollection)
+			case CRDList:
+				currResults, err = runner.runCRDListQueries(name, queryCollection)
+			default:
+				fmt.Printf("%s is not a supported query kind", queryCollection.Kind)
+				continue
+			}
+
+			if err != nil {
+				fmt.Printf("%s", err.Error())
+				continue
+			}
 
-		res = append(res, currResults...)
+			res = append(res, currResults...)
+		}
 	}
 
 	return res, nil

+ 1 - 0
workers/jobs/recommender.go

@@ -204,6 +204,7 @@ func (n *recommender) Run() error {
 			Repo:                      n.repo,
 			DigitalOceanOAuth:         n.doConf,
 			AllowInClusterConnections: false,
+			Timeout:                   5 * time.Second,
 		})
 
 		if err != nil {