
This commit removes the hardcoded "RegionOne" region name and instead retrieves the region name dynamically from the service configuration. This change prepares for a future update where DC services will be deployed on a standalone system that uses a UUID as the default region name. Test Plan: 01. PASS - Add a subcloud. 02. PASS - Manage and unmanage a subcloud. 03. PASS - List and show subcloud details using subcloud list and subcloud show --detail. 04. PASS - Delete a subcloud. 05. PASS - Run 'dcmanager strategy-config update' using different region names: "RegionOne", "SystemController", and without specifying a region name. Verify that the default options are modified accordingly. 06. PASS - Run the previous test but using 'dcmanager strategy-config show' instead. 07. PASS - Upload a patch using the dcorch proxy (--os-region-name SystemController). 08. PASS - Run prestage orchestration. 09. PASS - Apply a patch to the system controller and then to the subclouds 10. PASS - Review all dcmanager and dcorch logs to ensure no exceptions are raised. Story: 2011312 Task: 51861 Change-Id: I85c93c865c40418a351dab28aac56fc08464af72 Signed-off-by: Gustavo Herzmann <gustavo.herzmann@windriver.com>
726 lines
29 KiB
Python
726 lines
29 KiB
Python
# Copyright 2017 Ericsson AB.
|
|
# Copyright (c) 2017-2025 Wind River Systems, Inc.
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
import abc
|
|
import datetime
|
|
import threading
|
|
import time
|
|
|
|
from keystoneauth1 import exceptions as keystone_exceptions
|
|
from oslo_log import log as logging
|
|
|
|
from dccommon import consts as dccommon_consts
|
|
from dccommon.drivers.openstack.keystone_v3 import KeystoneClient
|
|
from dccommon.drivers.openstack.patching_v1 import PatchingClient
|
|
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
|
from dccommon.drivers.openstack.software_v1 import SoftwareClient
|
|
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
|
from dccommon.drivers.openstack import vim
|
|
from dccommon import utils as cutils
|
|
from dcmanager.common import consts
|
|
from dcmanager.common import context
|
|
from dcmanager.common import exceptions
|
|
from dcmanager.common import scheduler
|
|
from dcmanager.common import utils
|
|
from dcmanager.db import api as db_api
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
DEFAULT_SLEEP_TIME_IN_SECONDS = 10
|
|
|
|
|
|
class OrchThread(threading.Thread):
|
|
"""Abstract Orchestration Thread
|
|
|
|
This thread is responsible for the orchestration strategy.
|
|
Here is how it works:
|
|
- The user creates an update strategy from CLI (or REST API)
|
|
- This is being handled by the SwUpdateManager class, which
|
|
runs under the main dcmanager thread. The strategy is created and stored
|
|
in the database.
|
|
- The user then applies the strategy from the CLI (or REST API). The
|
|
SwUpdateManager code updates the state of the strategy in the database.
|
|
- The OrchThread wakes up periodically and checks the database for
|
|
a strategy of its expected type that is in an active state. If
|
|
so, it executes the strategy, updating the strategy and steps in the
|
|
database as it goes, with state and progress information.
|
|
"""
|
|
|
|
# each subclass must provide the STATE_OPERATORS
|
|
STATE_OPERATORS = {}
|
|
|
|
def __init__(
|
|
self,
|
|
strategy_lock,
|
|
audit_rpc_client,
|
|
update_type,
|
|
vim_strategy_name,
|
|
starting_state,
|
|
):
|
|
super(OrchThread, self).__init__()
|
|
# Used to protect strategy when an atomic read/update is required.
|
|
self.strategy_lock = strategy_lock
|
|
# Used to notify dcmanager-audit to trigger an audit
|
|
self.audit_rpc_client = audit_rpc_client
|
|
# The update type for the orch thread
|
|
self.update_type = update_type
|
|
# The vim strategy name for the orch thread
|
|
self.vim_strategy_name = vim_strategy_name
|
|
# When an apply is initiated, this is the first state
|
|
self.starting_state = starting_state
|
|
|
|
self.context = context.get_admin_context()
|
|
self._stop = threading.Event()
|
|
# Keeps track of greenthreads we create to do work.
|
|
self.thread_group_manager = scheduler.ThreadGroupManager(thread_pool_size=5000)
|
|
# Track worker created for each subcloud.
|
|
self.subcloud_workers = dict()
|
|
# Track if the strategy setup function was executed
|
|
self._setup = False
|
|
# Initialize main orch thread sleep time
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
|
|
@abc.abstractmethod
|
|
def trigger_audit(self):
|
|
"""Subclass MUST override this method"""
|
|
LOG.warn(
|
|
"(%s) OrchThread subclass must override trigger_audit" % self.update_type
|
|
)
|
|
|
|
def _pre_apply_setup(self):
|
|
"""Setup performed once before a strategy starts to apply"""
|
|
if not self._setup:
|
|
LOG.info("(%s) OrchThread Pre-Apply Setup" % self.update_type)
|
|
self._setup = True
|
|
self.pre_apply_setup()
|
|
|
|
def pre_apply_setup(self):
|
|
"""Subclass can override this method"""
|
|
|
|
def _post_delete_teardown(self):
|
|
"""Cleanup code executed once after deleting a strategy"""
|
|
if self._setup:
|
|
LOG.info("(%s) OrchThread Post-Delete Teardown" % self.update_type)
|
|
self._setup = False
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
self.post_delete_teardown()
|
|
|
|
def post_delete_teardown(self):
|
|
"""Subclass can override this method"""
|
|
|
|
def stopped(self):
|
|
return self._stop.isSet()
|
|
|
|
def stop(self):
|
|
LOG.info("(%s) OrchThread Stopping" % self.update_type)
|
|
self._stop.set()
|
|
|
|
def run(self):
|
|
LOG.info("(%s) OrchThread Starting" % self.update_type)
|
|
self.run_orch()
|
|
# Stop any greenthreads that are still running
|
|
LOG.info("(%s) OrchThread Stopping" % self.update_type)
|
|
self.thread_group_manager.stop()
|
|
|
|
@staticmethod
|
|
def get_ks_client(region_name: str = None) -> KeystoneClient:
|
|
"""This will get a cached keystone client (and token)
|
|
|
|
throws an exception if keystone client cannot be initialized
|
|
"""
|
|
os_client = OpenStackDriver(
|
|
region_name=region_name,
|
|
region_clients=None,
|
|
fetch_subcloud_ips=utils.fetch_subcloud_mgmt_ips,
|
|
)
|
|
return os_client.keystone_client
|
|
|
|
@staticmethod
|
|
def get_vim_client(region_name: str = None) -> vim.VimClient:
|
|
ks_client = OrchThread.get_ks_client(region_name)
|
|
return vim.VimClient(ks_client.region_name, ks_client.session)
|
|
|
|
@staticmethod
|
|
def get_sysinv_client(region_name: str = None) -> SysinvClient:
|
|
ks_client = OrchThread.get_ks_client(region_name)
|
|
endpoint = ks_client.endpoint_cache.get_endpoint("sysinv")
|
|
return SysinvClient(ks_client.region_name, ks_client.session, endpoint=endpoint)
|
|
|
|
@staticmethod
|
|
def get_software_client(region_name: str = None) -> SoftwareClient:
|
|
ks_client = OrchThread.get_ks_client(region_name)
|
|
return SoftwareClient(
|
|
ks_client.session,
|
|
endpoint=ks_client.endpoint_cache.get_endpoint("usm"),
|
|
)
|
|
|
|
@staticmethod
|
|
def get_patching_client(region_name: str = None) -> PatchingClient:
|
|
ks_client = OrchThread.get_ks_client(region_name)
|
|
return PatchingClient(ks_client.region_name, ks_client.session)
|
|
|
|
@staticmethod
|
|
def get_region_name(strategy_step):
|
|
"""Get the region name for a strategy step"""
|
|
if strategy_step.subcloud_id is None:
|
|
# This is the SystemController.
|
|
return cutils.get_region_one_name()
|
|
return strategy_step.subcloud.region_name
|
|
|
|
@staticmethod
|
|
def get_subcloud_name(strategy_step):
|
|
"""Get the subcloud name for a strategy step"""
|
|
if strategy_step.subcloud_id is None:
|
|
# This is the SystemController.
|
|
return cutils.get_region_one_name()
|
|
return strategy_step.subcloud.name
|
|
|
|
@staticmethod
|
|
def format_update_details(last_state, info):
|
|
# Optionally include the last state, since the current state is likely 'failed'
|
|
if last_state:
|
|
details = "%s: %s" % (last_state, info)
|
|
else:
|
|
details = str(info)
|
|
# details cannot exceed 1000 chars. inform user to check full logs
|
|
if len(details) > 1000:
|
|
details = (
|
|
"Error message longer than 1000 characters, "
|
|
"please check orchestrator logs for additional details."
|
|
)
|
|
return details
|
|
|
|
def determine_state_operator(self, strategy_step):
|
|
"""Return the state operator for the current state"""
|
|
state_operator = self.STATE_OPERATORS.get(strategy_step.state)
|
|
# instantiate and return the state_operator class
|
|
return state_operator(region_name=OrchThread.get_region_name(strategy_step))
|
|
|
|
def strategy_step_update(self, subcloud_id, state=None, details=None, stage=None):
|
|
"""Update the strategy step in the DB
|
|
|
|
Sets the start and finished timestamp if necessary, based on state.
|
|
"""
|
|
started_at = None
|
|
finished_at = None
|
|
if state == self.starting_state:
|
|
started_at = datetime.datetime.now()
|
|
elif state in [
|
|
consts.STRATEGY_STATE_COMPLETE,
|
|
consts.STRATEGY_STATE_ABORTED,
|
|
consts.STRATEGY_STATE_FAILED,
|
|
]:
|
|
finished_at = datetime.datetime.now()
|
|
# Return the updated object, in case we need to use its updated values
|
|
return db_api.strategy_step_update(
|
|
self.context,
|
|
subcloud_id,
|
|
stage=stage,
|
|
state=state,
|
|
details=details,
|
|
started_at=started_at,
|
|
finished_at=finished_at,
|
|
)
|
|
|
|
def _update_subcloud_deploy_status(self, subcloud):
|
|
# If an exception occurs during the create/apply of the VIM strategy, the
|
|
# deploy_status will be set to 'apply-strategy-failed'. If we retry the
|
|
# orchestration and the process completes successfully, we need to update the
|
|
# deploy_status to 'complete'.
|
|
if subcloud.deploy_status != consts.DEPLOY_STATE_DONE:
|
|
# Update deploy state for subclouds to complete
|
|
db_api.subcloud_update(
|
|
self.context,
|
|
subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE,
|
|
)
|
|
|
|
def _delete_subcloud_worker(self, region, subcloud_id):
|
|
db_api.strategy_step_update(
|
|
self.context,
|
|
subcloud_id,
|
|
stage=consts.STAGE_SUBCLOUD_ORCHESTRATION_PROCESSED,
|
|
)
|
|
if region in self.subcloud_workers:
|
|
# The orchestration for this subcloud has either completed/failed/aborted,
|
|
# remove it from the dictionary.
|
|
LOG.debug("Remove %s from subcloud_workers dict" % region)
|
|
del self.subcloud_workers[region]
|
|
|
|
def _adjust_sleep_time(self, number_of_subclouds):
|
|
prev_sleep_time = self.sleep_time
|
|
|
|
if number_of_subclouds <= 0:
|
|
new_sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
else:
|
|
new_sleep_time = min(
|
|
(DEFAULT_SLEEP_TIME_IN_SECONDS * 60)
|
|
/ min(number_of_subclouds, consts.MAX_PARALLEL_SUBCLOUDS_LIMIT),
|
|
DEFAULT_SLEEP_TIME_IN_SECONDS,
|
|
)
|
|
|
|
if new_sleep_time != prev_sleep_time:
|
|
self.sleep_time = new_sleep_time
|
|
LOG.debug(
|
|
f"Adjusted {self.update_type} orch thread sleep time from "
|
|
f"{prev_sleep_time} to {self.sleep_time} "
|
|
f"based on {number_of_subclouds} parallel subclouds."
|
|
)
|
|
|
|
def run_orch(self):
|
|
while not self.stopped():
|
|
try:
|
|
LOG.debug("(%s) OrchThread Running" % self.update_type)
|
|
|
|
sw_update_strategy = db_api.sw_update_strategy_get(
|
|
self.context, update_type=self.update_type
|
|
)
|
|
|
|
if sw_update_strategy.type == self.update_type:
|
|
if sw_update_strategy.state in [
|
|
consts.SW_UPDATE_STATE_APPLYING,
|
|
consts.SW_UPDATE_STATE_ABORTING,
|
|
]:
|
|
self._pre_apply_setup()
|
|
self.apply(sw_update_strategy)
|
|
elif (
|
|
sw_update_strategy.state
|
|
== consts.SW_UPDATE_STATE_ABORT_REQUESTED
|
|
):
|
|
self.abort(sw_update_strategy)
|
|
elif sw_update_strategy.state == consts.SW_UPDATE_STATE_DELETING:
|
|
self.delete(sw_update_strategy)
|
|
self._post_delete_teardown()
|
|
|
|
except exceptions.NotFound:
|
|
# Nothing to do if a strategy doesn't exist
|
|
pass
|
|
|
|
except Exception:
|
|
# We catch all exceptions to avoid terminating the thread.
|
|
LOG.exception("(%s) OrchThread unexpected exception" % self.update_type)
|
|
|
|
# Wake up every so often to see if there is work to do.
|
|
time.sleep(self.sleep_time)
|
|
|
|
LOG.info("(%s) OrchThread ended main loop" % self.update_type)
|
|
|
|
def apply(self, sw_update_strategy):
|
|
"""Apply a sw update strategy"""
|
|
|
|
LOG.debug("(%s) Applying update strategy" % self.update_type)
|
|
strategy_steps = db_api.strategy_step_get_all(self.context)
|
|
# Adjust sleep time based on the number of subclouds being processed
|
|
# in parallel
|
|
self._adjust_sleep_time(len(strategy_steps))
|
|
|
|
stop = False
|
|
failure_detected = False
|
|
abort_detected = False
|
|
for strategy_step in strategy_steps:
|
|
if strategy_step.state == consts.STRATEGY_STATE_COMPLETE:
|
|
# Update deploy state for subclouds to complete
|
|
self._update_subcloud_deploy_status(strategy_step.subcloud)
|
|
# This step is complete
|
|
self._delete_subcloud_worker(
|
|
strategy_step.subcloud.region_name, strategy_step.subcloud_id
|
|
)
|
|
continue
|
|
elif strategy_step.state == consts.STRATEGY_STATE_ABORTED:
|
|
# This step was aborted
|
|
self._delete_subcloud_worker(
|
|
strategy_step.subcloud.region_name, strategy_step.subcloud_id
|
|
)
|
|
abort_detected = True
|
|
continue
|
|
elif strategy_step.state == consts.STRATEGY_STATE_FAILED:
|
|
failure_detected = True
|
|
self._delete_subcloud_worker(
|
|
strategy_step.subcloud.region_name, strategy_step.subcloud_id
|
|
)
|
|
# This step has failed and needs no further action
|
|
if strategy_step.subcloud_id is None:
|
|
# Strategy on SystemController failed. We are done.
|
|
LOG.info(
|
|
"(%s) Stopping strategy due to failure while "
|
|
"processing update step on SystemController" % self.update_type
|
|
)
|
|
with self.strategy_lock:
|
|
db_api.sw_update_strategy_update(
|
|
self.context,
|
|
state=consts.SW_UPDATE_STATE_FAILED,
|
|
update_type=self.update_type,
|
|
)
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
# Trigger audit to update the sync status for each subcloud.
|
|
self.trigger_audit()
|
|
return
|
|
elif sw_update_strategy.stop_on_failure:
|
|
# We have been told to stop on failures
|
|
stop = True
|
|
break
|
|
continue
|
|
# We have found the first step that isn't complete or failed.
|
|
break
|
|
else:
|
|
# The strategy application is complete
|
|
if failure_detected:
|
|
LOG.info("(%s) Strategy application has failed." % self.update_type)
|
|
with self.strategy_lock:
|
|
db_api.sw_update_strategy_update(
|
|
self.context,
|
|
state=consts.SW_UPDATE_STATE_FAILED,
|
|
update_type=self.update_type,
|
|
)
|
|
elif abort_detected:
|
|
LOG.info("(%s) Strategy application was aborted." % self.update_type)
|
|
with self.strategy_lock:
|
|
db_api.sw_update_strategy_update(
|
|
self.context,
|
|
state=consts.SW_UPDATE_STATE_ABORTED,
|
|
update_type=self.update_type,
|
|
)
|
|
else:
|
|
LOG.info("(%s) Strategy application is complete." % self.update_type)
|
|
with self.strategy_lock:
|
|
db_api.sw_update_strategy_update(
|
|
self.context,
|
|
state=consts.SW_UPDATE_STATE_COMPLETE,
|
|
update_type=self.update_type,
|
|
)
|
|
self.subcloud_workers.clear()
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
|
|
# Trigger audit to update the sync status for each subcloud.
|
|
LOG.info(f"Trigger audit for {self.update_type}")
|
|
self.trigger_audit()
|
|
return
|
|
|
|
if stop:
|
|
work_remaining = False
|
|
# We are going to stop after the steps that are in progress finish.
|
|
if len(self.subcloud_workers) > 0:
|
|
work_remaining = True
|
|
|
|
if not work_remaining:
|
|
# We have completed the remaining steps
|
|
LOG.info("(%s) Stopping strategy due to failure" % self.update_type)
|
|
with self.strategy_lock:
|
|
db_api.sw_update_strategy_update(
|
|
self.context,
|
|
state=consts.SW_UPDATE_STATE_FAILED,
|
|
update_type=self.update_type,
|
|
)
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
# Trigger audit to update the sync status for each subcloud.
|
|
self.trigger_audit()
|
|
return
|
|
|
|
for strategy_step in strategy_steps:
|
|
region = self.get_region_name(strategy_step)
|
|
if self.stopped():
|
|
LOG.info("(%s) Exiting because task is stopped" % self.update_type)
|
|
self.subcloud_workers.clear()
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
return
|
|
if strategy_step.state == consts.STRATEGY_STATE_FAILED:
|
|
LOG.debug("(%s) Intermediate step is failed" % self.update_type)
|
|
self._delete_subcloud_worker(region, strategy_step.subcloud_id)
|
|
continue
|
|
elif strategy_step.state == consts.STRATEGY_STATE_COMPLETE:
|
|
LOG.debug("(%s) Intermediate step is complete" % self.update_type)
|
|
self._delete_subcloud_worker(region, strategy_step.subcloud_id)
|
|
continue
|
|
elif strategy_step.state == consts.STRATEGY_STATE_ABORTED:
|
|
LOG.debug("(%s) Intermediate step is aborted" % self.update_type)
|
|
self._delete_subcloud_worker(region, strategy_step.subcloud_id)
|
|
elif strategy_step.state == consts.STRATEGY_STATE_INITIAL:
|
|
if (
|
|
sw_update_strategy.max_parallel_subclouds
|
|
> len(self.subcloud_workers)
|
|
and not stop
|
|
):
|
|
# Don't start upgrading this subcloud if it has been unmanaged by
|
|
# the user. If orchestration was already started, it will be allowed
|
|
# to complete.
|
|
if (
|
|
strategy_step.subcloud_id is not None
|
|
and strategy_step.subcloud.management_state
|
|
== dccommon_consts.MANAGEMENT_UNMANAGED
|
|
):
|
|
message = (
|
|
"Subcloud %s is unmanaged." % strategy_step.subcloud.name
|
|
)
|
|
LOG.warn(message)
|
|
self.strategy_step_update(
|
|
strategy_step.subcloud_id,
|
|
state=consts.STRATEGY_STATE_FAILED,
|
|
details=message,
|
|
)
|
|
continue
|
|
|
|
# We are just getting started, enter the first state
|
|
# Use the updated value for calling process_update_step
|
|
strategy_step = self.strategy_step_update(
|
|
strategy_step.subcloud_id,
|
|
stage=consts.STAGE_SUBCLOUD_ORCHESTRATION_STARTED,
|
|
state=self.starting_state,
|
|
)
|
|
# Starting state should log an error if greenthread exists
|
|
self.process_update_step(region, strategy_step, log_error=True)
|
|
else:
|
|
self.process_update_step(region, strategy_step, log_error=False)
|
|
|
|
def abort(self, sw_update_strategy):
|
|
"""Abort an update strategy"""
|
|
|
|
LOG.info("(%s) Aborting update strategy" % self.update_type)
|
|
|
|
# Only strategy steps that did not start processing can be updated to aborted
|
|
filters = {"state": consts.STRATEGY_STATE_INITIAL}
|
|
values = {"state": consts.STRATEGY_STATE_ABORTED, "details": ""}
|
|
|
|
# Currently, the orchestrator only supports executing a single strategy at
|
|
# a time and there isn't any database relationship between the steps and the
|
|
# strategy, so we just update all the steps
|
|
db_api.strategy_step_update_all(self.context, filters, values)
|
|
|
|
with self.strategy_lock:
|
|
db_api.sw_update_strategy_update(
|
|
self.context,
|
|
state=consts.SW_UPDATE_STATE_ABORTING,
|
|
update_type=self.update_type,
|
|
)
|
|
|
|
def delete(self, sw_update_strategy):
|
|
"""Delete an update strategy"""
|
|
|
|
LOG.info("(%s) Deleting update strategy" % self.update_type)
|
|
strategy_steps = db_api.strategy_step_get_all(self.context)
|
|
# Adjust sleep time based on the number of subclouds being processed
|
|
# in parallel
|
|
self._adjust_sleep_time(len(strategy_steps))
|
|
|
|
for strategy_step in strategy_steps:
|
|
region = self.get_region_name(strategy_step)
|
|
if region in self.subcloud_workers:
|
|
# A worker already exists. Let it finish whatever it was doing.
|
|
LOG.debug("Worker already exists for %s." % region)
|
|
else:
|
|
# Create a greenthread to delete the subcloud strategy
|
|
delete_thread = self.thread_group_manager.start(
|
|
self.delete_subcloud_strategy, strategy_step
|
|
)
|
|
if delete_thread:
|
|
self.subcloud_workers[region] = delete_thread
|
|
|
|
if self.stopped():
|
|
LOG.info("(%s) Exiting because task is stopped" % self.update_type)
|
|
return
|
|
|
|
# Wait for 180 seconds so that last 100 workers can complete their execution
|
|
counter = 0
|
|
while len(self.subcloud_workers) > 0:
|
|
time.sleep(10)
|
|
counter = counter + 1
|
|
if counter > 18:
|
|
break
|
|
|
|
# Remove the strategy from the database if all workers have completed their
|
|
# execution
|
|
try:
|
|
db_api.strategy_step_destroy_all(self.context)
|
|
db_api.sw_update_strategy_destroy(self.context)
|
|
except Exception as e:
|
|
LOG.exception("(%s) exception during delete" % self.update_type)
|
|
raise e
|
|
finally:
|
|
self.sleep_time = DEFAULT_SLEEP_TIME_IN_SECONDS
|
|
|
|
LOG.info("(%s) Finished deleting update strategy" % self.update_type)
|
|
|
|
def delete_subcloud_strategy(self, strategy_step):
|
|
"""Delete the update strategy in this subcloud
|
|
|
|
Removes the worker reference after the operation is complete.
|
|
"""
|
|
|
|
try:
|
|
self.do_delete_subcloud_strategy(strategy_step)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
finally:
|
|
# The worker is done.
|
|
region = self.get_region_name(strategy_step)
|
|
if region in self.subcloud_workers:
|
|
del self.subcloud_workers[region]
|
|
|
|
def do_delete_subcloud_strategy(self, strategy_step):
|
|
"""Delete the vim strategy in this subcloud"""
|
|
|
|
if self.vim_strategy_name is None:
|
|
return
|
|
|
|
region = self.get_region_name(strategy_step)
|
|
|
|
LOG.info(
|
|
"(%s) Deleting vim strategy:(%s) for region:(%s)"
|
|
% (self.update_type, self.vim_strategy_name, region)
|
|
)
|
|
|
|
# First check if the strategy has been created.
|
|
try:
|
|
vim_client = OrchThread.get_vim_client(region)
|
|
subcloud_strategy = vim_client.get_strategy(
|
|
strategy_name=self.vim_strategy_name
|
|
)
|
|
except (keystone_exceptions.EndpointNotFound, IndexError):
|
|
message = "(%s) Endpoint for subcloud: %s not found." % (
|
|
self.update_type,
|
|
region,
|
|
)
|
|
LOG.warn(message)
|
|
return
|
|
except Exception:
|
|
# Strategy doesn't exist so there is nothing to do
|
|
return
|
|
|
|
if subcloud_strategy.state in [
|
|
vim.STATE_BUILDING,
|
|
vim.STATE_APPLYING,
|
|
vim.STATE_ABORTING,
|
|
]:
|
|
# Can't delete a vim strategy in these states
|
|
message = (
|
|
"(%s) Vim strategy:(%s) for region:(%s) in wrong state:(%s) for delete."
|
|
% (
|
|
self.update_type,
|
|
self.vim_strategy_name,
|
|
region,
|
|
subcloud_strategy.state,
|
|
)
|
|
)
|
|
LOG.warn(message)
|
|
return
|
|
|
|
# If we are here, we need to delete the strategy
|
|
try:
|
|
vim_client.delete_strategy(strategy_name=self.vim_strategy_name)
|
|
except Exception:
|
|
message = "(%s) Vim strategy:(%s) delete failed for region:(%s)" % (
|
|
self.update_type,
|
|
self.vim_strategy_name,
|
|
region,
|
|
)
|
|
LOG.warn(message)
|
|
return
|
|
|
|
def process_update_step(self, region, strategy_step, log_error=False):
|
|
"""manage the green thread for calling perform_state_action"""
|
|
if region in self.subcloud_workers:
|
|
if self.subcloud_workers[region][0] == strategy_step.state:
|
|
# A worker already exists. Let it finish whatever it was doing.
|
|
if log_error:
|
|
LOG.error(
|
|
"(%s) Worker should not exist for %s."
|
|
% (self.update_type, region)
|
|
)
|
|
else:
|
|
LOG.debug(
|
|
"(%s) Update worker exists for %s." % (self.update_type, region)
|
|
)
|
|
else:
|
|
LOG.debug(
|
|
"Starting a new worker for region %s at state %s (update)"
|
|
% (region, strategy_step.state)
|
|
)
|
|
# Advance to the next state. The previous greenthread has exited,
|
|
# create a new one.
|
|
self.subcloud_workers[region] = (
|
|
strategy_step.state,
|
|
self.thread_group_manager.start(
|
|
self.perform_state_action, strategy_step
|
|
),
|
|
)
|
|
else:
|
|
# This is the first state. create a greenthread to start processing the
|
|
# update for the subcloud and invoke the perform_state_action method.
|
|
LOG.debug(
|
|
"Starting a new worker for region %s at state %s"
|
|
% (region, strategy_step.state)
|
|
)
|
|
self.subcloud_workers[region] = (
|
|
strategy_step.state,
|
|
self.thread_group_manager.start(
|
|
self.perform_state_action, strategy_step
|
|
),
|
|
)
|
|
|
|
def perform_state_action(self, strategy_step):
|
|
"""Extensible state handler for processing and transitioning states"""
|
|
try:
|
|
LOG.info(
|
|
"(%s) Stage: %s, State: %s, Subcloud: %s"
|
|
% (
|
|
self.update_type,
|
|
strategy_step.stage,
|
|
strategy_step.state,
|
|
self.get_subcloud_name(strategy_step),
|
|
)
|
|
)
|
|
# Instantiate the state operator and perform the state actions
|
|
state_operator = self.determine_state_operator(strategy_step)
|
|
state_operator.registerStopEvent(self._stop)
|
|
next_state = state_operator.perform_state_action(strategy_step)
|
|
self.strategy_step_update(
|
|
strategy_step.subcloud_id, state=next_state, details=""
|
|
)
|
|
except exceptions.StrategySkippedException as ex:
|
|
LOG.info(
|
|
"(%s) Skipping subcloud, Stage: %s, State: %s, Subcloud: %s"
|
|
% (
|
|
self.update_type,
|
|
strategy_step.stage,
|
|
strategy_step.state,
|
|
strategy_step.subcloud.name,
|
|
)
|
|
)
|
|
# Transition immediately to complete. Update the details to show
|
|
# that this subcloud has been skipped
|
|
details = self.format_update_details(None, str(ex))
|
|
self.strategy_step_update(
|
|
strategy_step.subcloud_id,
|
|
state=consts.STRATEGY_STATE_COMPLETE,
|
|
details=details,
|
|
)
|
|
except Exception as ex:
|
|
# Catch ALL exceptions and set the strategy to failed
|
|
LOG.exception(
|
|
"(%s) Failed! Stage: %s, State: %s, Subcloud: %s"
|
|
% (
|
|
self.update_type,
|
|
strategy_step.stage,
|
|
strategy_step.state,
|
|
strategy_step.subcloud.name,
|
|
)
|
|
)
|
|
details = self.format_update_details(strategy_step.state, str(ex))
|
|
self.strategy_step_update(
|
|
strategy_step.subcloud_id,
|
|
state=consts.STRATEGY_STATE_FAILED,
|
|
details=details,
|
|
)
|