Browse Source

Improve minion pool status management.

Nashwan Azhari 5 years ago
parent
commit
9cb8eddd42

+ 2 - 2
coriolis/api/v1/views/minion_pool_view.py

@@ -38,8 +38,8 @@ def _format_minion_pool(req, minion_pool, keys=None):
                 if 'connection_details' in machine[
                         'backup_writer_connection_info']:
                     _hide_minion_creds(
-                        machine['connection_details'][
-                            'backup_writer_connection_info'])
+                        machine['backup_writer_connection_info'][
+                            'connection_details'])
 
     return minion_pool_dict
 

+ 65 - 26
coriolis/conductor/rpc/server.py

@@ -1825,15 +1825,69 @@ class ConductorServerEndpoint(object):
                 "No new tasks were started for execution '%s' following "
                 "state advancement after cancellation.", execution.id)
 
-    @staticmethod
-    def _set_tasks_execution_status(ctxt, execution_id, execution_status):
+    def _set_tasks_execution_status(self, ctxt, execution_id, execution_status):
         LOG.info(
             "Tasks execution %(id)s status updated to: %(status)s",
             {"id": execution_id, "status": execution_status})
-        db_api.set_execution_status(ctxt, execution_id, execution_status)
+        execution = db_api.set_execution_status(
+            ctxt, execution_id, execution_status)
         if ctxt.delete_trust_id:
             keystone.delete_trust(ctxt)
 
+        if execution.type in constants.MINION_POOL_EXECUTION_TYPES:
+            self._update_minion_pool_status_for_finished_execution(
+                ctxt, execution, execution_status)
+
+    @staticmethod
+    def _update_minion_pool_status_for_finished_execution(
+            ctxt, execution, new_execution_status):
+        # status map if execution is active:
+        stat_map = {
+            constants.EXECUTION_TYPE_MINION_POOL_ALLOCATE_MINIONS:
+                constants.MINION_POOL_STATUS_ALLOCATING,
+            constants.EXECUTION_TYPE_MINION_POOL_DEALLOCATE_MINIONS:
+                constants.MINION_POOL_STATUS_DEALLOCATING,
+            constants.EXECUTION_TYPE_MINION_POOL_SET_UP_SHARED_RESOURCES:
+                constants.MINION_POOL_STATUS_INITIALIZING,
+            constants.EXECUTION_TYPE_MINION_POOL_TEAR_DOWN_SHARED_RESOURCES:
+                constants.MINION_POOL_STATUS_UNINITIALIZING}
+        if new_execution_status == constants.EXECUTION_STATUS_COMPLETED:
+            stat_map = {
+                constants.EXECUTION_TYPE_MINION_POOL_ALLOCATE_MINIONS:
+                    constants.MINION_POOL_STATUS_ALLOCATED,
+                constants.EXECUTION_TYPE_MINION_POOL_DEALLOCATE_MINIONS:
+                    constants.MINION_POOL_STATUS_DEALLOCATED,
+                constants.EXECUTION_TYPE_MINION_POOL_SET_UP_SHARED_RESOURCES:
+                    constants.MINION_POOL_STATUS_DEALLOCATED,
+                constants.EXECUTION_TYPE_MINION_POOL_TEAR_DOWN_SHARED_RESOURCES:
+                    constants.MINION_POOL_STATUS_UNINITIALIZED}
+        elif new_execution_status in constants.FINALIZED_TASK_STATUSES:
+            stat_map = {
+                constants.EXECUTION_TYPE_MINION_POOL_ALLOCATE_MINIONS:
+                    constants.MINION_POOL_STATUS_DEALLOCATED,
+                constants.EXECUTION_TYPE_MINION_POOL_DEALLOCATE_MINIONS:
+                    constants.MINION_POOL_STATUS_ALLOCATED,
+                constants.EXECUTION_TYPE_MINION_POOL_SET_UP_SHARED_RESOURCES:
+                    constants.MINION_POOL_STATUS_UNINITIALIZED,
+                constants.EXECUTION_TYPE_MINION_POOL_TEAR_DOWN_SHARED_RESOURCES:
+                    constants.MINION_POOL_STATUS_UNINITIALIZED}
+        final_pool_status = stat_map.get(execution.type)
+        if not final_pool_status:
+            LOG.error(
+                "Could not determine pool status following transition of "
+                "execution '%s' (type '%s') to status '%s'. Presuming error "
+                "has occured. Marking piil as error'd.",
+                execution.id, execution.type, new_execution_status)
+            final_pool_status = constants.MINION_POOL_STATUS_ERROR
+
+        LOG.info(
+            "Marking minion pool '%s' status as '%s' in the DB following the "
+            "transition of execution '%s' (type '%s') to status '%s'.",
+            execution.action_id, final_pool_status, execution.id,
+            execution.type, new_execution_status)
+        db_api.set_minion_pool_lifecycle_status(
+            ctxt, execution.action_id, final_pool_status)
+
     @parent_tasks_execution_synchronized
     def set_task_host(self, ctxt, task_id, host):
         """ Saves the ID of the worker host which has accepted
@@ -2425,9 +2479,6 @@ class ConductorServerEndpoint(object):
                     ctxt, execution.action_id, {
                         "pool_shared_resources": task_info.get(
                             "pool_shared_resources", {})})
-                db_api.set_minion_pool_lifecycle_status(
-                    ctxt, execution.action_id,
-                    constants.MINION_POOL_STATUS_DEALLOCATED)
 
         elif task_type == constants.TASK_TYPE_TEAR_DOWN_SHARED_POOL_RESOURCES:
             still_running = _check_other_tasks_running(execution, task)
@@ -2439,9 +2490,6 @@ class ConductorServerEndpoint(object):
                 db_api.update_minion_pool_lifecycle(
                     ctxt, execution.action_id, {
                         "pool_shared_resources": {}})
-                db_api.set_minion_pool_lifecycle_status(
-                    ctxt, execution.action_id,
-                    constants.MINION_POOL_STATUS_UNINITIALIZED)
 
         elif task_type == constants.TASK_TYPE_CREATE_MINION:
             LOG.info(
@@ -2461,12 +2509,6 @@ class ConductorServerEndpoint(object):
                 "minion_backup_writer_connection_info"]
             db_api.add_minion_machine(ctxt, minion_machine)
 
-            still_running = _check_other_tasks_running(execution, task)
-            if not still_running:
-                db_api.set_minion_pool_lifecycle_status(
-                    ctxt, execution.action_id,
-                    constants.MINION_POOL_STATUS_ALLOCATED)
-
         elif task_type == constants.TASK_TYPE_DELETE_MINION:
             LOG.info(
                 "%s task for Minon Machine '%s' has completed successfully. "
@@ -2474,12 +2516,6 @@ class ConductorServerEndpoint(object):
                 constants.TASK_TYPE_DELETE_MINION, task.instance)
             db_api.delete_minion_machine(ctxt, task.instance)
 
-            still_running = _check_other_tasks_running(execution, task)
-            if not still_running:
-                db_api.set_minion_pool_lifecycle_status(
-                    ctxt, execution.action_id,
-                    constants.MINION_POOL_STATUS_DEALLOCATED)
-
         else:
             LOG.debug(
                 "No post-task actions required for task '%s' of type '%s'",
@@ -3414,14 +3450,17 @@ class ConductorServerEndpoint(object):
         minion_pool = self._get_minion_pool(
             ctxt, minion_pool_id, include_tasks_executions=False,
             include_machines=False)
-        if minion_pool.pool_status != constants.MINION_POOL_STATUS_UNINITIALIZED:
+        acceptable_deletion_statuses = [
+            constants.MINION_POOL_STATUS_UNINITIALIZED,
+            constants.MINION_POOL_STATUS_ERROR]
+        if minion_pool.pool_status not in acceptable_deletion_statuses:
             raise exception.InvalidMinionPoolState(
                 "Minion Pool '%s' cannot be deleted as it is in '%s' status "
-                "instead of the expected '%s'. Please ensure the pool machines"
-                "have been deallocated and the pool's supporting resources "
-                "have been torn down before deleting the pool." % (
+                "instead of one of the expected '%s'. Please ensure the pool "
+                "machines have been deallocated and the pool's supporting "
+                "resources have been torn down before deleting the pool." % (
                     minion_pool_id, minion_pool.pool_status,
-                    constants.MINION_POOL_STATUS_UNINITIALIZED))
+                    acceptable_deletion_statuses))
 
         LOG.info("Deleting minion pool with ID '%s'" % minion_pool_id)
         db_api.delete_minion_pool_lifecycle(ctxt, minion_pool_id)

+ 9 - 0
coriolis/constants.py

@@ -242,6 +242,14 @@ EXECUTION_TYPE_MINION_POOL_ALLOCATE_MINIONS = "minion_pool_allocate_minions"
 EXECUTION_TYPE_MINION_POOL_DEALLOCATE_MINIONS = (
     "minion_pool_deallocate_minions")
 
+MINION_POOL_EXECUTION_TYPES = [
+    EXECUTION_TYPE_MINION_POOL_MAINTENANCE,
+    EXECUTION_TYPE_MINION_POOL_UPDATE,
+    EXECUTION_TYPE_MINION_POOL_SET_UP_SHARED_RESOURCES,
+    EXECUTION_TYPE_MINION_POOL_TEAR_DOWN_SHARED_RESOURCES,
+    EXECUTION_TYPE_MINION_POOL_ALLOCATE_MINIONS,
+    EXECUTION_TYPE_MINION_POOL_DEALLOCATE_MINIONS]
+
 TASK_LOCK_NAME_FORMAT = "task-%s"
 EXECUTION_LOCK_NAME_FORMAT = "execution-%s"
 ENDPOINT_LOCK_NAME_FORMAT = "endpoint-%s"
@@ -279,6 +287,7 @@ SCHEDULER_MAIN_MESSAGING_TOPIC = "coriolis_scheduler"
 REPLICA_CRON_MAIN_MESSAGING_TOPIC = "coriolis_replica_cron_worker"
 
 MINION_POOL_STATUS_UNKNOWN = "UNKNOWN"
+MINION_POOL_STATUS_ERROR = "ERROR"
 MINION_POOL_STATUS_UNINITIALIZED = "UNINITIALIZED"
 MINION_POOL_STATUS_UNINITIALIZING = "UNINITIALIZING"
 MINION_POOL_STATUS_INITIALIZING = "INITIALIZING"

+ 1 - 0
coriolis/db/api.py

@@ -574,6 +574,7 @@ def set_execution_status(
     if update_action_status:
         set_action_last_execution_status(
             context, execution.action_id, status)
+    return execution
 
 
 @enginefacade.reader