Hugo Brito b3d206781b Improve DC VIM strategy create/apply error handling
This commit updates subcloud's error_description with the error
returned by the software API during VIM strategy create and apply.

- Created two custom exceptions for handling these errors.
- Clean up error_description in strategy creation.

Note: This also updated the timeout values of software API.

Test Plan:
PASS - Apply a sw-deploy-strategy and force an error in the
deploy precheck command.
  - Apply should fail in the `create VIM strategy` state
  - dcmanager subcloud errors should be updated
PASS - Apply a sw-deploy-strategy and force an error in the
deploy start command.
  - Apply should fail in `apply VIM strategy` state
  - dcmanager subcloud errors should be updated
PASS - Create a dcmanager sw-deploy-strategy with subcloud errors.
  - Strategy created and subcloud errors should be `No errors present`.

Story: 2010676
Task: 50644

Change-Id: Ib0b0b586d90093088a6af96e5d630e3fe04fd3f7
Signed-off-by: Hugo Brito <hugo.brito@windriver.com>
2024-07-30 13:59:53 -03:00

192 lines
8.1 KiB
Python

#
# Copyright (c) 2020-2021, 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import time
from dccommon.drivers.openstack import vim
from dcmanager.common import consts
from dcmanager.common import exceptions
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
# Max time: 30 minutes = 180 queries x 10 seconds between
DEFAULT_MAX_QUERIES = 180
DEFAULT_SLEEP_DURATION = 10
class CreatingVIMStrategyState(BaseState):
"""State for creating the VIM strategy."""
def __init__(self, next_state, region_name, strategy_name):
super().__init__(next_state=next_state, region_name=region_name)
self.strategy_name = strategy_name
# max time to wait for the strategy to be built (in seconds)
# is: sleep_duration * max_queries
self.sleep_duration = DEFAULT_SLEEP_DURATION
self.max_queries = DEFAULT_MAX_QUERIES
def _create_vim_strategy(self, strategy_step, region):
self.info_log(strategy_step, "Creating (%s) VIM strategy" % self.strategy_name)
# Get the update options
opts_dict = utils.get_sw_update_opts(
self.context, for_sw_update=True, subcloud_id=strategy_step.subcloud_id
)
# Get release parameter data for sw-deploy strategy
if self.strategy_name == vim.STRATEGY_NAME_SW_USM:
extra_args = utils.get_sw_update_strategy_extra_args(self.context)
release_id = extra_args.get(consts.EXTRA_ARGS_RELEASE_ID)
opts_dict["release_id"] = release_id
# Create rollback = False since DC orchestration do not support rollback
opts_dict["rollback"] = False
# Call the API to build the VIM strategy
# release and rollback will be sent as a **kwargs value for sw-deploy strategy
subcloud_strategy = self.get_vim_client(region).create_strategy(
self.strategy_name,
opts_dict["storage-apply-type"],
opts_dict["worker-apply-type"],
opts_dict["max-parallel-workers"],
opts_dict["default-instance-action"],
opts_dict["alarm-restriction-type"],
release=opts_dict.get("release_id"),
rollback=opts_dict.get("rollback"),
)
# a successful API call to create MUST set the state be 'building'
if subcloud_strategy.state != vim.STATE_BUILDING:
message = "Unexpected VIM strategy build state."
raise exceptions.CreateVIMStrategyFailedException(
subcloud=strategy_step.subcloud.name,
name=self.strategy_name,
state=subcloud_strategy.state,
details=message,
)
return subcloud_strategy
def skip_check(self, strategy_step, subcloud_strategy):
"""Subclasses can override this to allow this state to skip ahead"""
return None
def perform_state_action(self, strategy_step):
"""Create a VIM strategy using VIM REST API
Any client (vim, sysinv, etc..) should be re-queried whenever used
to ensure the keystone token is up to date.
Any exceptions raised by this method set the strategy to FAILED
Returns the next state for the state machine if successful.
"""
region = self.get_region_name(strategy_step)
# Get the existing VIM strategy, which may be None
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name, raise_error_if_missing=False
)
if subcloud_strategy is None:
subcloud_strategy = self._create_vim_strategy(strategy_step, region)
else:
self.info_log(
strategy_step,
"VIM strategy exists with state: %s" % subcloud_strategy.state,
)
# if a strategy exists in any type of failed state or aborted state it
# should be deleted. Applied state should also be deleted from previous
# success runs.
if subcloud_strategy.state in [
vim.STATE_BUILDING,
vim.STATE_APPLYING,
vim.STATE_ABORTING,
]:
# Can't delete a strategy in these states
message = (
"Failed to create a VIM strategy. There already is an existing "
"strategy in this state."
)
self.warn_log(strategy_step, message)
raise exceptions.CreateVIMStrategyFailedException(
subcloud=strategy_step.subcloud.name,
name=self.strategy_name,
state=subcloud_strategy.state,
details=message,
)
# if strategy exists in any other type of state, delete and create
self.info_log(strategy_step, "Deleting existing VIM strategy")
self.get_vim_client(region).delete_strategy(
strategy_name=self.strategy_name
)
# re-create it
subcloud_strategy = self._create_vim_strategy(strategy_step, region)
# A strategy already exists, or is being built
# Loop until the strategy is done building Repeatedly query the API
counter = 0
while True:
error_message = None
# If event handler stop has been triggered, fail the state
if self.stopped():
raise exceptions.StrategyStoppedException()
if counter >= self.max_queries:
details = "Timeout building VIM strategy."
raise exceptions.CreateVIMStrategyFailedException(
subcloud=strategy_step.subcloud.name,
name=self.strategy_name,
state=subcloud_strategy.state,
details=details,
)
counter += 1
time.sleep(self.sleep_duration)
# query the vim strategy to see if it is in the new state
subcloud_strategy = self.get_vim_client(region).get_strategy(
strategy_name=self.strategy_name, raise_error_if_missing=True
)
# Check for skip criteria where a failed 'build' might be expected
# pylint: disable-next=assignment-from-none
skip_state = self.skip_check(strategy_step, subcloud_strategy)
if skip_state is not None:
self.info_log(strategy_step, "Skip forward to state:(%s)" % skip_state)
self.override_next_state(skip_state)
# break out of loop. Let overridden 'next_state' take over
break
if subcloud_strategy.state == vim.STATE_READY_TO_APPLY:
self.info_log(strategy_step, "VIM strategy has been built")
break
elif subcloud_strategy.state == vim.STATE_BUILDING:
# This is the expected state while creating the strategy
pass
elif subcloud_strategy.state == vim.STATE_BUILD_FAILED:
error_message = "VIM strategy build failed: "
elif subcloud_strategy.state == vim.STATE_BUILD_TIMEOUT:
error_message = "VIM strategy build timed out: "
else:
error_message = "VIM strategy unexpected build state."
if error_message:
build_error = subcloud_strategy.build_phase.response
# If response is None, use the reason
if not build_error:
build_error = subcloud_strategy.build_phase.reason
db_api.subcloud_update(
self.context,
strategy_step.subcloud_id,
error_description=build_error,
)
raise exceptions.CreateVIMStrategyFailedException(
subcloud=strategy_step.subcloud.name,
name=self.strategy_name,
state=subcloud_strategy.state,
details=error_message + build_error,
)
# Success, state machine can proceed to the next state
return self.next_state