# Copyright 2015 Huawei Technologies Co., Ltd. # Copyright (c) 2017-2025 Wind River Systems, Inc. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # import datetime import grp import itertools import json import os import pwd import re import resource as sys_resource import subprocess from typing import List from typing import Optional from typing import Union import uuid import xml.etree.ElementTree as ElementTree from keystoneauth1 import exceptions as keystone_exceptions from keystoneclient.v3.client import Client as KeystoneClient import netaddr from oslo_concurrency import lockutils from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import base64 import pecan import requests from retrying import retry import tsconfig.tsconfig as tsc import yaml from dccommon import consts as dccommon_consts from dccommon.drivers.openstack.sdk_platform import OpenStackDriver from dccommon.drivers.openstack import software_v1 from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.drivers.openstack import vim from dccommon import exceptions as dccommon_exceptions from dccommon import kubeoperator from dccommon import utils as cutils from dcmanager.audit import alarm_aggregation from dcmanager.common import consts from dcmanager.common import context from dcmanager.common import exceptions from dcmanager.common.i18n import _ from dcmanager.db import api as db_api LOG = logging.getLogger(__name__) DC_MANAGER_USERNAME = "root" DC_MANAGER_GRPNAME = "root" # Max lines output msg from logs MAX_LINES_MSG = 10 REGION_VALUE_CMD = "grep " + consts.OS_REGION_NAME + " /etc/platform/openrc" ABORT_UPDATE_STATUS = { consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_ABORTING_INSTALL, consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_ABORTING_BOOTSTRAP, consts.DEPLOY_STATE_CONFIGURING: consts.DEPLOY_STATE_ABORTING_CONFIG, consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_ABORTED, consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_ABORTED, consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_ABORTED, } ABORT_UPDATE_FAIL_STATUS = { consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_FAILED, consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_FAILED, consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_FAILED, } RESUME_PREP_UPDATE_STATUS = { consts.DEPLOY_PHASE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL, consts.DEPLOY_PHASE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP, consts.DEPLOY_PHASE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG, } RESUME_PREP_UPDATE_FAIL_STATUS = { consts.DEPLOY_PHASE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED, consts.DEPLOY_PHASE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED, consts.DEPLOY_PHASE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG_FAILED, } def get_import_path(cls): return cls.__module__ + "." + cls.__name__ # Returns a iterator of tuples containing batch_size number of objects in each def get_batch_projects(batch_size, project_list, fillvalue=None): args = [iter(project_list)] * batch_size return itertools.zip_longest(fillvalue=fillvalue, *args) def validate_address_str(ip_address_str, networks): """Determine whether an dual-stack address is valid.""" address_values = ip_address_str.split(",") if len(address_values) > 2: raise exceptions.ValidateFail("Invalid address - more than two IP addresses") ip_addresses = [] for address_value in address_values: try: ip_address = netaddr.IPAddress(address_value) ip_addresses.append(ip_address) except Exception: raise exceptions.ValidateFail("Invalid address - not a valid IP address") if len(address_values) == 2 and ip_addresses[0].version == ip_addresses[1].version: raise exceptions.ValidateFail("Invalid address - dual-stack of same IP family") if len(ip_addresses) != len(networks): raise exceptions.ValidateFail( "Invalid address - Not of same size (single or dual-stack) with subnet" ) for i, ip_address in enumerate(ip_addresses): if ip_address.version != networks[i].version: msg = ( "Invalid IP version - must match network version " + ip_version_to_string(networks[i].version) ) raise exceptions.ValidateFail(msg) elif ip_address == networks[i]: raise exceptions.ValidateFail("Cannot use network address") elif ip_address.version == 4 and ip_address == networks[i].broadcast: raise exceptions.ValidateFail("Cannot use broadcast address") elif ip_address not in networks[i]: raise exceptions.ValidateFail( "Address must be in subnet %s" % str(networks[i]) ) return ip_addresses def ip_version_to_string(ip_version): """Returns a string representation of ip_version.""" if ip_version == 4: return "IPv4" elif ip_version == 6: return "IPv6" else: return "IP" def validate_network_str( network_str, minimum_size, existing_networks=None, multicast=False, operation=None ): """Determine whether dual-stack network is valid.""" network_values = network_str.split(",") if len(network_values) > 2: raise exceptions.ValidateFail("Invalid subnet - more than two IP subnets") networks = [] for network_value in network_values: try: network = netaddr.IPNetwork(network_value) networks.append(network) except netaddr.AddrFormatError: raise exceptions.ValidateFail("Invalid subnet - not a valid IP subnet") if len(network_values) == 2 and networks[0].version == networks[1].version: raise exceptions.ValidateFail("Invalid subnet - dual-stack of same IP family") for network in networks: if network.size < minimum_size: raise exceptions.ValidateFail( "Subnet too small - must have at least %d addresses" % minimum_size ) elif network.version == 6 and network.prefixlen < 64: raise exceptions.ValidateFail("IPv6 minimum prefix length is 64") elif existing_networks and operation != "redeploy": if any(network.ip in subnet for subnet in existing_networks): raise exceptions.ValidateFail( "Subnet overlaps with another configured subnet" ) elif multicast and not network.is_multicast(): raise exceptions.ValidateFail("Invalid subnet - must be multicast") return networks def validate_certificate_subject(subject): """Validate a certificate subject Duplicate the get_subject validation logic defined in: sysinv/api/controllers/v1/kube_rootca_update.py Returns a tuple of True, "" if the input is None Returns a tuple of True, "" if the input is valid Returns a tuple of False, "" if the input is invalid """ if subject is None: return True, "" params_supported = ["C", "OU", "O", "ST", "CN", "L"] subject_pairs = re.findall(r"([^=]+=[^=]+)(?:\s|$)", subject) subject_dict = {} for pair_value in subject_pairs: key, value = pair_value.split("=") subject_dict[key] = value if not all([param in params_supported for param in subject_dict.keys()]): return False, ( "There are parameters not supported for the certificate subject " "specification. The subject parameter has to be in the format of " "'C= ST= L= O= " "OU= CN=" ) if "CN" not in list(subject_dict.keys()): return False, ( "The CN= parameter is required to be " "specified in subject argument" ) return True, "" def validate_expiry_date(expiry_date): """Validate a certificate expiry date Duplicate the expiry_date validation logic defined in: sysinv/api/controllers/v1/kube_rootca_update.py Returns a tuple of True, "" if the input is None Returns a tuple of True, "" if the input is valid Returns a tuple of False, "" if the input is invalid """ if expiry_date is None: return True, "" try: date = datetime.datetime.strptime(expiry_date, "%Y-%m-%d") except ValueError: return False, ("expiry_date %s doesn't match format YYYY-MM-DD" % expiry_date) delta = date - datetime.datetime.now() # we sum one day (24 hours) to accomplish the certificate expiry # during the day specified by the user duration = delta.days * 24 + 24 # Cert-manager manages certificates and renew them some time # before it expires. Along this procedure we set renewBefore # parameter for 24h, so we are checking if the duration sent # has at least this amount of time. This is needed to avoid # cert-manager to block the creation of the resources. if duration <= 24: return False, ( "New k8s rootCA should have at least 24 hours of validation before expiry." ) return True, "" # to do validate the quota limits def validate_quota_limits(payload): for resource in payload: # Check valid resource name if resource not in itertools.chain( dccommon_consts.CINDER_QUOTA_FIELDS, dccommon_consts.NOVA_QUOTA_FIELDS, dccommon_consts.NEUTRON_QUOTA_FIELDS, ): raise exceptions.InvalidInputError # Check valid quota limit value in case for put/post if isinstance(payload, dict) and ( not isinstance(payload[resource], int) or payload[resource] <= 0 ): raise exceptions.InvalidInputError def get_sw_update_strategy_extra_args(context, update_type=None): """Query an existing sw_update_strategy for its extra_args. :param context: request context object. :param update_type: filter the update strategy (defaults to None) :returns dict (returns an empty dictionary if no strategy exists) """ try: sw_update_strategy = db_api.sw_update_strategy_get( context, update_type=update_type ) return sw_update_strategy.extra_args except exceptions.NotFound: # return an empty dictionary if there is no strategy return {} def get_sw_update_opts(context, for_sw_update=False, subcloud_id=None): """Get sw update options for a subcloud :param context: request context object. :param for_sw_update: return the default options if subcloud options are empty. Useful for retrieving sw update options on application of patch strategy. :param subcloud_id: id of subcloud. """ if subcloud_id is None: # Requesting defaults. Return constants if no entry in db. sw_update_opts_ref = db_api.sw_update_opts_default_get(context) if not sw_update_opts_ref: sw_update_opts_dict = vim.SW_UPDATE_OPTS_CONST_DEFAULT return sw_update_opts_dict else: # requesting subcloud options sw_update_opts_ref = db_api.sw_update_opts_get(context, subcloud_id) if sw_update_opts_ref: subcloud_name = db_api.subcloud_get(context, subcloud_id).name return db_api.sw_update_opts_w_name_db_model_to_dict( sw_update_opts_ref, subcloud_name ) elif for_sw_update: sw_update_opts_ref = db_api.sw_update_opts_default_get(context) if not sw_update_opts_ref: sw_update_opts_dict = vim.SW_UPDATE_OPTS_CONST_DEFAULT return sw_update_opts_dict else: raise exceptions.SubcloudPatchOptsNotFound(subcloud_id=subcloud_id) return db_api.sw_update_opts_w_name_db_model_to_dict( sw_update_opts_ref, dccommon_consts.SW_UPDATE_DEFAULT_TITLE ) def ensure_lock_path(): # Determine the oslo_concurrency lock path: # 1) First, from the oslo_concurrency section of the config # a) If not set via an option default or config file, oslo_concurrency # sets it to the OSLO_LOCK_PATH env variable # 2) Then if not set, set it to a specific directory under # tsc.VOLATILE_PATH if cfg.CONF.oslo_concurrency.lock_path: lock_path = cfg.CONF.oslo_concurrency.lock_path else: lock_path = os.path.join(tsc.VOLATILE_PATH, "dcmanager") if not os.path.isdir(lock_path): try: uid = pwd.getpwnam(DC_MANAGER_USERNAME).pw_uid gid = grp.getgrnam(DC_MANAGER_GRPNAME).gr_gid os.makedirs(lock_path) os.chown(lock_path, uid, gid) LOG.info("Created directory=%s" % lock_path) except OSError as e: LOG.exception("makedir %s OSError=%s encountered" % (lock_path, e)) return None return lock_path def synchronized(name, external=True, fair=False): if external: prefix = "DCManager-" lock_path = ensure_lock_path() else: prefix = None lock_path = None return lockutils.synchronized( name, lock_file_prefix=prefix, external=external, lock_path=lock_path, semaphores=None, delay=0.01, fair=fair, ) def get_filename_by_prefix(dir_path, prefix): """Returns the first filename found matching 'prefix' within 'dir_path' Note: returns base filename only - result does not include dir_path """ for filename in os.listdir(dir_path): if filename.startswith(prefix): return filename return None def get_ansible_host_ip_from_inventory(subcloud_name: str): """Get ansible host ip from inventory file for the specified subcloud""" postfix = consts.INVENTORY_FILE_POSTFIX filename = get_ansible_filename(subcloud_name, postfix) content = load_yaml_file(filename) bootstrap_address = ( content["all"]["hosts"].get(subcloud_name, {}).get("ansible_host") ) return bootstrap_address def create_subcloud_inventory(subcloud, inventory_file, initial_deployment=False): """Create the ansible inventory file for the specified subcloud""" # Delete the file if it already exists delete_subcloud_inventory(inventory_file) with open(inventory_file, "w") as f_out_inventory: f_out_inventory.write( "---\n" "all:\n" " vars:\n" " ansible_ssh_user: sysadmin\n" ' ansible_ssh_extra_args: "-o UserKnownHostsFile=/dev/null"\n' " initial_deployment: " + str(initial_deployment) + "\n" " hosts:\n" " " + subcloud["name"] + ":\n" " ansible_host: " + subcloud["bootstrap-address"] + "\n" ) def create_subcloud_inventory_with_admin_creds( subcloud_name, inventory_file, subcloud_bootstrap_address, ansible_pass, initial_deployment=False, ): """Create the ansible inventory file for the specified subcloud. Includes ansible_become_pass attribute. """ # Delete the file if it already exists delete_subcloud_inventory(inventory_file) with open(inventory_file, "w") as f_out_inventory: f_out_inventory.write( ( "---\n" "all:\n" " vars:\n" " ansible_ssh_user: sysadmin\n" " ansible_ssh_pass: {0}\n" " ansible_become_pass: {0}\n" ' ansible_ssh_extra_args: "-o UserKnownHostsFile=/dev/null"\n' " initial_deployment: " + str(initial_deployment) + "\n" " hosts:\n" " {1}:\n" " ansible_host: {2}\n" ).format(ansible_pass, subcloud_name, subcloud_bootstrap_address) ) def delete_subcloud_inventory(inventory_file): """Delete the ansible inventory file for the specified subcloud""" # Delete the file if it exists if inventory_file and os.path.isfile(inventory_file): os.remove(inventory_file) def get_vault_load_files(target_version): """Return a tuple for the ISO and SIG for this load version from the vault. The files can be imported to the vault using any name, but must end in 'iso' or 'sig'. : param target_version: The software version to search under the vault """ vault_dir = "{}/{}/".format(dccommon_consts.SOFTWARE_VAULT_DIR, target_version) matching_iso = None matching_sig = None if os.path.isdir(vault_dir): for a_file in os.listdir(vault_dir): if a_file.lower().endswith(".iso"): matching_iso = os.path.join(vault_dir, a_file) continue elif a_file.lower().endswith(".sig"): matching_sig = os.path.join(vault_dir, a_file) continue # If no .iso or .sig is found, raise an exception if matching_iso is None: raise exceptions.VaultLoadMissingError( file_type=".iso", vault_dir=vault_dir ) if matching_sig is None: raise exceptions.VaultLoadMissingError( file_type=".sig", vault_dir=vault_dir ) # return the iso and sig for this load return (matching_iso, matching_sig) def get_active_kube_version(kube_versions): """Returns the active (target) kubernetes from a list of versions""" matching_kube_version = None for kube in kube_versions: kube_dict = kube.to_dict() if kube_dict.get("target") and kube_dict.get("state") == "active": matching_kube_version = kube_dict.get("version") break return matching_kube_version def select_available_kube_version(kube_versions, to_version): """Return selected kube version based on desired version If the desired "to_version" is higher than the highest "available" version then return the highest "available" version. If the desired "to_version" is "available", we want to select it. Otherwise we want to select the highest "available" kubernetes version. """ # Check if the desired version is higher than the highest "available" version. for kube in reversed(kube_versions): kube_dict = kube.to_dict() if kube_dict.get("state") == "available": version = kube_dict.get("version") if kube_version_compare(version, to_version) == -1: return version else: break # Check if the desired version is "available" for kube in reversed(kube_versions): kube_dict = kube.to_dict() version = kube_dict.get("version") if kube_version_compare(version, to_version) == 0: if kube_dict.get("state") == "available": return version else: break # Return the highest "available" version for kube in reversed(kube_versions): kube_dict = kube.to_dict() if kube_dict.get("state") == "available": return kube_dict.get("version") # There are no "available" versions return None def kube_version_compare(left, right): """Performs a cmp operation for two kubernetes versions Return -1, 0, or 1 if left is less, equal, or greater than right left and right are semver strings starting with the letter 'v' If either value is None, an exception is raised If the strings are not 'v'major.minor.micro, an exception is raised Note: This method supports shorter versions. ex: v1.22 When comparing different length tuples, additional fields are ignored. For example: v1.19 and v1.19.1 would be the same. """ if left is None or right is None or left[0] != "v" or right[0] != "v": raise Exception( "Invalid kube version(s), left: (%s), right: (%s)" % (left, right) ) # start the split at index 1 ('after' the 'v' character) l_val = tuple(map(int, (left[1:].split(".")))) r_val = tuple(map(int, (right[1:].split(".")))) # If the tuples are different length, convert both to the same length min_tuple = min(len(l_val), len(r_val)) l_val = l_val[0:min_tuple] r_val = r_val[0:min_tuple] # The following is the same as cmp. Verified in python2 and python3 # cmp does not exist in python3. return (l_val > r_val) - (l_val < r_val) def system_peer_get_by_ref(context, peer_ref): """Handle getting a system peer by either UUID, or ID, or Name :param context: The request context :param peer_ref: Reference to the system peer, either an UUID or an ID or a Name """ try: if peer_ref.isdigit(): return db_api.system_peer_get(context, peer_ref) try: uuid.UUID(peer_ref) return db_api.system_peer_get_by_uuid(context, peer_ref) except ValueError: return db_api.system_peer_get_by_name(context, peer_ref) except ( exceptions.SystemPeerNotFound, exceptions.SystemPeerUUIDNotFound, exceptions.SystemPeerNameNotFound, ): return None def subcloud_peer_group_db_list_to_dict(peer_groups): return { "subcloud_peer_groups": [ db_api.subcloud_peer_group_db_model_to_dict(peer_group) for peer_group in peer_groups ] } def subcloud_get_by_ref(context, subcloud_ref): """Handle getting a subcloud by either name, or ID :param context: The request context :param subcloud_ref: Reference to the subcloud, either a name or an ID """ try: return ( db_api.subcloud_get(context, subcloud_ref) if subcloud_ref.isdigit() else db_api.subcloud_get_by_name(context, subcloud_ref) ) except (exceptions.SubcloudNotFound, exceptions.SubcloudNameNotFound): return None def subcloud_group_get_by_ref(context, group_ref): # Handle getting a group by either name, or ID if group_ref.isdigit(): # Lookup subcloud group as an ID try: group = db_api.subcloud_group_get(context, group_ref) except exceptions.SubcloudGroupNotFound: return None else: # Lookup subcloud group as a name try: group = db_api.subcloud_group_get_by_name(context, group_ref) except exceptions.SubcloudGroupNameNotFound: return None return group def subcloud_peer_group_get_by_ref(context, group_ref): """Handle getting a peer group by either name, or ID""" try: if group_ref.isdigit(): # Lookup subcloud group as an ID group = db_api.subcloud_peer_group_get(context, group_ref) else: # Lookup subcloud group as a name group = db_api.subcloud_peer_group_get_by_name(context, group_ref) except ( exceptions.SubcloudPeerGroupNotFound, exceptions.SubcloudPeerGroupNameNotFound, ): return None return group def subcloud_db_list_to_dict(subclouds): return { "subclouds": [ db_api.subcloud_db_model_to_dict(subcloud) for subcloud in subclouds ] } def get_oam_floating_ip_primary(subcloud, sc_ks_client): """Get the subcloud's oam primary floating ip""" # First need to retrieve the Subcloud's Keystone session try: endpoint = sc_ks_client.endpoint_cache.get_endpoint("sysinv") sysinv_client = SysinvClient( subcloud.region_name, sc_ks_client.session, endpoint=endpoint ) # We don't want to call sysinv_client.get_oam_address_pools() # here, as the subcloud's software version could be < 24.09. # As we are interested only on primary IP-stack, get_oam_addresses # supports this in any software_version. oam_addresses = sysinv_client.get_oam_addresses() if oam_addresses is not None: return oam_addresses.oam_floating_ip return None except (keystone_exceptions.EndpointNotFound, IndexError) as e: message = "Identity endpoint for subcloud: %s not found. %s" % ( subcloud.name, e, ) LOG.error(message) except dccommon_exceptions.OAMAddressesNotFound: message = "OAM addresses for subcloud: %s not found." % subcloud.name LOG.error(message) return None def get_pool_by_ip_family(pools, ip_family): """Get the pool corresponding to ip family""" for pool in pools: if pool.family == ip_family: return pool raise exceptions.ValidateFail(f"IPv{ip_family} pool not found in pools {pools}") def get_ansible_filename(subcloud_name, postfix=".yml"): """Build ansible filename using subcloud and given postfix""" ansible_filename = os.path.join( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + postfix ) return ansible_filename def pre_check_management_affected_alarm(system_health): """Acceptable health conditions: a) subcloud is completely healthy (i.e. no failed checks) b) there is alarm but no management affecting alarm c) subcloud fails alarm check and it only has non-management affecting alarm(s) """ failed_alarm_check = re.findall(r"No alarms: \[Fail\]", system_health) no_mgmt_alarms = re.findall( r"\[0\] of which are management affecting", system_health ) if failed_alarm_check and not no_mgmt_alarms: return False return True def is_subcloud_name_format_valid(name): """Validates subcloud name format Regex based on RFC 1123 subdomain validation param: name = Subcloud name returns True if name is valid, otherwise it returns false. """ rex = r"[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*" pat = re.compile(rex) if re.fullmatch(pat, name): return True return False # TODO(glyraper): Replace get_region_from_subcloud_address() # with get_region_name once all the subclouds support # '/v1/isystems/region_id' API def get_region_from_subcloud_address(payload): """Retrieves the current region from the subcloud being migrated param: payload = Subcloud payload returns the OS_REGION_NAME param value from subcloud and error cause if occurs """ bootstrap_addr = None bootstrap_pwd = None subcloud_region = None err_cause = None if not payload: err_cause = "Unable to get subcloud connection data: payload is empty" return (subcloud_region, err_cause) try: bootstrap_addr = payload.get("bootstrap-address") bootstrap_pwd = payload.get("sysadmin_password") if not bootstrap_addr: err_cause = ( "Unable to get subcloud connection data: missing bootstrap-address" ) return (subcloud_region, err_cause) if not bootstrap_pwd: err_cause = ( "Unable to get subcloud connection data: missing sysadmin_password" ) return (subcloud_region, err_cause) ip_address = netaddr.IPAddress(bootstrap_addr) if ip_address.version not in [4, 6]: err_cause = "Invalid subcloud bootstrap address" return (subcloud_region, err_cause) cmd = [ "sshpass", "-p", str(bootstrap_pwd), "ssh", "-q", "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", "sysadmin@" + str(bootstrap_addr), REGION_VALUE_CMD, ] task = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode("utf-8") if len(task) < 1: err_cause = "Malformed subcloud region" return subcloud_region, err_cause subcloud_region = str(task.split("=")[1]).strip() except Exception as e: # check_output() will raise CalledProcessError if the called # process returns a non-zero return code. # We are printing the exception name to avoid any sensitive # connection data err_cause = "exception %s occurred" % type(e).__name__ subcloud_region = None if subcloud_region in cutils.get_system_controller_region_names(): err_cause = f"region {subcloud_region} is not valid for a subcloud" subcloud_region = None if err_cause: LOG.error(err_cause) # Returns # subcloud_region value if subcloud is reachable, otherwise None # err_cause message if an exception occurs, otherwise None # For old systems the region value is the same as subcloud name: # export OS_REGION_NAME=[human readable based region value] # For new systems the region is uuid format based: # export OS_REGION_NAME=[uuid based region value] return (subcloud_region, err_cause) @retry( retry_on_exception=lambda x: isinstance(x, exceptions.ServiceUnavailable), wait_exponential_multiplier=1000, # Start with 1 second wait_exponential_max=10000, # Cap at 10 seconds stop_max_attempt_number=60, # Stop after 60 attempts ) def get_region_name( endpoint, timeout=dccommon_consts.SYSINV_CLIENT_REST_DEFAULT_TIMEOUT ): url = endpoint + "/v1/isystems/region_id" response = requests.get(url, timeout=timeout) if response.status_code == 200: data = response.json() if "region_name" not in data: raise exceptions.NotFound region_name = data["region_name"] return region_name else: msg = f"GET region_name from {url} FAILED WITH RC {response.status_code}" LOG.error(msg) raise exceptions.ServiceUnavailable def find_ansible_error_msg(subcloud_name, log_file, stage=None): """Find errors into ansible logs. It will search into ansible log for a fatal error expression. If fatal error is found, it will capture the message until the final expression. It will get always the more recent fatal error from the log files. If the error message is longer than N lines, it will be summarized. Also, the last task is provided. Returns the error message found Returns generic error message if not found or there is failures during search """ error_found = False error_msg = [] failed_task = "" cmd_1 = "awk" # awk command to get the information inside the last match found # starting with 'fatal: [' and ending with 'PLAY RECAP'. cmd_2 = r"""BEGIN {f=""} # initialize f /fatal: \[|ERROR/,/PLAY RECAP/ { # capture text between two delimiters if ($0 ~ /fatal: \[|ERROR/) f="" # reset f on a new match if ($0 ~ /PLAY RECAP/) next # exclude last delimiter if ($0 == "") next # exclude blank line f = f ? (f "\n" $0) : $0} # assign or append to f END {print f} """ try: # necessary check since is possible to have # the error in rotated ansible log files_for_search = add_latest_rotated_file(log_file) if len(files_for_search) < 2: cmd_list = [cmd_1, cmd_2, files_for_search[0]] else: cmd_list = [cmd_1, cmd_2, files_for_search[0], files_for_search[1]] error_msg_raw = subprocess.check_output( cmd_list, stderr=subprocess.STDOUT ).decode("utf-8") if len(error_msg_raw) > 1: error_found = True error_msg = [elem for elem in error_msg_raw.split("\n") if elem] failed_task = get_failed_task(files_for_search) except Exception as exc: LOG.error("Failed getting info from ansible log file :%s" % exc) if error_found and (len(error_msg) > MAX_LINES_MSG): error_msg = summarize_message(error_msg) error_msg = "\n".join(str(element) for element in error_msg) error_msg = error_msg.replace("'", '"') if error_found: msg = "FAILED %s playbook of (%s).\ndetail: %s \nFAILED TASK: %s" % ( stage, subcloud_name, error_msg, failed_task, ) else: msg = ( "FAILED %s playbook of (%s).\ncheck individual log at " "%s for detailed output" ) % (stage, subcloud_name, log_file) return msg def add_latest_rotated_file(log_file): """Find the latest rotated file for the given log file. Check the existence of the given log file with its latest rotated file. Returns the log file itself if it exists and the latest rotated file doesn't exist; or the log file and its latest rotated file if both exist; or the latest rotated file only if it exists but the log file itself doesn't exit. Raises exception if both of the log file and its latest rotated file don't exist. """ log_files = [] # the latest rotated log file log_file_temp = log_file + ".1" if os.path.exists(log_file_temp): log_files.append(log_file_temp) if os.path.exists(log_file): log_files.append(log_file) if len(log_files) == 0: raise Exception( "Log file %s and its latest rotated file don't exist." % log_file ) return log_files def get_failed_task(files): """Get last task failed It receives an ansible log file (or a couple of files) and search for the last failed task with its date Returns a string with the task and date """ cmd_1 = "awk" # awk command to get the information about last failed task. # Match expression starting with 'TASK [' and ending with # 'fatal: [' cmd_2 = r"""BEGIN {f=""} # initialize f /TASK \[/ {f=""} # reset f on first match /TASK \[/,/fatal: \[/ { # capture text between two delimiters if ($0 ~ /fatal: \[/) next # exclude last delimiter if ($0 == "") next # exclude blank line f = f ? (f "\n" $0) : $0} # assign or append to f END {print f} """ # necessary check since is possible to have # the error in rotated ansible log if len(files) < 2: awk_cmd = [cmd_1, cmd_2, files[0]] else: awk_cmd = [cmd_1, cmd_2, files[0], files[1]] try: failed_task = subprocess.check_output(awk_cmd, stderr=subprocess.STDOUT).decode( "utf-8" ) if len(failed_task) < 1: return None except Exception as exc: LOG.error("Failed getting failed task :%s" % exc) return None failed_task = failed_task.replace("*", "") failed_task = failed_task.replace("'", '"') failed_task = [elem for elem in failed_task.split("\n") if elem] failed_task = "%s %s" % (failed_task[0], failed_task[1]) return failed_task def summarize_message(error_msg): """Summarize message. This function receives a long error message and greps it using key words to return a summarized error message. Returns a brief message. """ list_of_strings_to_search_for = ["msg:", "fail", "error", "cmd", "stderr"] brief_message = [] for line in error_msg: for s in list_of_strings_to_search_for: if re.search(s, line, re.IGNORECASE): if len(brief_message) >= MAX_LINES_MSG: break # append avoiding duplicated items if line not in brief_message: brief_message.append(line) return brief_message def is_valid_for_backup_operation(operation, subcloud, bootstrap_address_dict=None): if operation == "create": return _is_valid_for_backup_create(subcloud) elif operation == "delete": return _is_valid_for_backup_delete(subcloud) elif operation == "restore": return _is_valid_for_backup_restore(subcloud, bootstrap_address_dict) else: msg = "Invalid operation %s" % operation LOG.error(msg) raise exceptions.ValidateFail(msg) def _is_valid_for_backup_create(subcloud): if ( subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE or subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED or subcloud.deploy_status != consts.DEPLOY_STATE_DONE or subcloud.prestage_status in consts.STATES_FOR_ONGOING_PRESTAGE ): msg = ( "Subcloud %s must be deployed, online, managed, and no ongoing prestage " "for the subcloud-backup create operation." % subcloud.name ) raise exceptions.ValidateFail(msg) return True def _is_valid_for_backup_delete(subcloud): if ( subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE or subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED ): msg = ( "Subcloud %s must be online and managed for the subcloud-backup " "delete operation with --local-only option." % subcloud.name ) raise exceptions.ValidateFail(msg) return True def _is_valid_for_backup_restore(subcloud, bootstrap_address_dict=None): msg = None ansible_subcloud_inventory_file = get_ansible_filename( subcloud.name, consts.INVENTORY_FILE_POSTFIX ) has_bootstrap_address = ( bootstrap_address_dict and subcloud.name in bootstrap_address_dict ) has_install_values = subcloud.data_install is not None has_inventory_file = os.path.exists(ansible_subcloud_inventory_file) if ( subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED or subcloud.deploy_status in consts.INVALID_DEPLOY_STATES_FOR_RESTORE ): msg = ( "Subcloud %s must be unmanaged and in a valid deploy state " "for the subcloud-backup restore operation." % subcloud.name ) elif not (has_bootstrap_address or has_install_values or has_inventory_file): msg = ( "Unable to obtain the subcloud %s bootstrap_address from either " "restore or install values. Please ensure bootstrap_address is " "specified in the restore-values.yml and try again." % subcloud.name ) elif has_bootstrap_address: try: netaddr.IPAddress(bootstrap_address_dict[subcloud.name]) except netaddr.AddrFormatError: msg = ( f"Subcloud {subcloud.name} must have a valid bootstrap address: " f"{bootstrap_address_dict[subcloud.name]}" ) if msg: raise exceptions.ValidateFail(msg) return True def get_matching_iso(software_version=None): try: if not software_version: software_version = tsc.SW_VERSION matching_iso, _ = get_vault_load_files(software_version) if not matching_iso: error_msg = ( f"Failed to get {software_version} load image. Provide active/inactive " "load image via 'software --os-region-name SystemController upload'" ) LOG.exception(error_msg) return None, error_msg return matching_iso, None except Exception as e: LOG.exception("Could not load vault files.") return None, str(e) def is_subcloud_healthy(subcloud_region, management_ip: str = None): system_health = "" try: os_client = OpenStackDriver( region_name=subcloud_region, region_clients=None, fetch_subcloud_ips=fetch_subcloud_mgmt_ips, subcloud_management_ip=management_ip, ) keystone_client = os_client.keystone_client endpoint = keystone_client.endpoint_cache.get_endpoint("sysinv") sysinv_client = SysinvClient( subcloud_region, keystone_client.session, endpoint=endpoint ) system_health = sysinv_client.get_system_health() except Exception as e: LOG.exception(e) raise fails = re.findall(r"\[Fail\]", system_health) failed_alarm_check = re.findall(r"No alarms: \[Fail\]", system_health) no_mgmt_alarms = re.findall( r"\[0\] of which are management affecting", system_health ) # Subcloud is considered healthy if there are no failures or # a single failure with only low severity alarms (not management affecting) if (len(fails) == 0) or (len(fails) == 1 and failed_alarm_check and no_mgmt_alarms): return True LOG.error( f"Subcloud {subcloud_region} failed health check. Health output: " f"\n{system_health}\n" ) return False def get_system_controller_software_list(region_name: str = None) -> list[dict]: """Get software list from USM API This function is responsible for querying the USM API for the list of releases present on the node through the USM endpoint. The node is determined from the parameter region_name, whose default value represents the region one. Args: region_name (str): The name of the region to be consulted. Default is RegionOne. Returns: list of dict: each dict item contains the parameters that identify the release from API response """ if not region_name: region_name = cutils.get_region_one_name() try: os_client = OpenStackDriver( region_name=region_name, region_clients=None, fetch_subcloud_ips=fetch_subcloud_mgmt_ips, ) ks_client = os_client.keystone_client software_client = software_v1.SoftwareClient( ks_client.session, endpoint=ks_client.endpoint_cache.get_endpoint("usm"), ) return software_client.list() except (requests.exceptions.ConnectionError, keystone_exceptions.ConnectionError): LOG.exception("Failed to get software list for %s", region_name) raise except Exception: LOG.exception("Failed to get keystone client for %s", region_name) raise def get_systemcontroller_deployed_releases( software_list: List[dict], key: str = "sw_version" ) -> List[str]: """Get a list of deployed software releases on the SystemController using the key. This function is used to get the deployed software releases on the SystemController using the key provided. For example, the key can be "sw_version" or "release_id", this will return ["10.0.0", "10.0.2", ...] or ["stx-10.0.0", "stx-10.0.1", ...] respectively. Other keys can be used as well, like "state", "description", etc. Args: software_list (list): The software list provided by USM API key (str): (default 'sw_version') The key to extract information from each deployed release. Returns: List[str]: Values of the given key from all deployed software releases. """ deployed_releases = [ release[key] for release in software_list if release["state"] == software_v1.DEPLOYED ] return deployed_releases def get_systemcontroller_installed_releases_ids() -> List[str]: software_list = get_system_controller_software_list() return get_systemcontroller_deployed_releases(software_list, key="release_id") def is_software_ready_to_be_prestaged_for_install(software_list, software_version): """Check if software is ready to be prestaged for install This function checks if a given release version is ready to be deployed. The criteria to consult is valid when a prestage is performed from the for_install parameter, since for for_sw_deploy this is not required. To determine if the release is valid for install, it is initially compared to the release of the system controller. If matches, it is assumed that the release is already deployed and it is not required to check the software list. Otherwise, the query will be made in the list of software for the available status, since those releases that have not yet been deployed must be considered. Args: software_list (list[dict]): The software list from USM API software_version (str): The requested software version Returns: bool: `True` if software version matches, otherwise `False` """ # It is assumed that if the requested release matches the release deployed # on the system controller, checking against the software list is not required. if software_version == tsc.SW_VERSION: return True # It is necessary to query the list for the requested release, # whose status must be available, unavailable or deployed to be # able to install it return any( is_base_release(release["sw_version"]) and get_major_release(release["sw_version"]) == software_version and release["state"] in (software_v1.AVAILABLE, software_v1.DEPLOYED, software_v1.UNAVAILABLE) for release in software_list ) def get_prestage_reason(payload): """Get the prestage reason from payload This function is used to get the prestage reason from payload checking the for_sw_deploy param. Args: payload (dict): payload from request Returns: str: for_sw_deploy if param is present on payload str: for_install if param is absent (default) """ if payload.get(consts.PRESTAGE_FOR_SW_DEPLOY): return consts.PRESTAGE_FOR_SW_DEPLOY return consts.PRESTAGE_FOR_INSTALL def get_validated_sw_version_for_prestage( payload, subcloud=None, system_controller_sw_list=None ): """Get the validated software version from payload This function is used to get the software version previously being validated to determine if the value is correct, that is, if it meets the requirements to be considered a valid release. It is validated if the release format meets the expected format (MM.mm). Where MM is the high part of the release, for example: 24. Where mm is the lower part of the release, for example: 09. Any value that does not respect the format is considered an invalid release, including formats such as: 24.09.1, 24.09.1.1, alphabetical characters and symbols. It is expected to receive only numbers that represent a valid release separated by "."; i.e: 22.12, 24.09. Args: payload (dict): payload from request subcloud (dict): subcloud params if present system_controller_sw_list (list): list of system controller releases, will get from software client if not passed Returns: tuple: The release validation result: - str: The first item represents the validated release, that is, the value of the release that was requested. If a validation error occurs, the value will be None. - str: The second element represents a message in case of error. """ for_sw_deploy = get_prestage_reason(payload) == consts.PRESTAGE_FOR_SW_DEPLOY for_install = not for_sw_deploy software_version = payload.get(consts.PRESTAGE_REQUEST_RELEASE) subcloud = {} if subcloud is None else subcloud # If the release parameter is present in the payload, it validates if # the format is MM.mm. Otherwise it will return error. if software_version and not is_major_release(software_version): return None, ( "Specified release format is not supported. Version format " "must be MM.mm." ) # Gets the release in validated MM.mm format. software_version = get_sw_version(software_version, for_install) # Query to the USM API to get the software list if system_controller_sw_list is None: system_controller_sw_list = get_system_controller_software_list() # Gets only the list of deployed major releases. deployed_releases = get_major_releases( get_systemcontroller_deployed_releases(system_controller_sw_list) ) # Check for deploy release param if for_sw_deploy: # 22.12 version is not supported for software deploy if software_version < consts.SOFTWARE_VERSION_24_09: return None, ( "The requested software version is not supported, " "cannot prestage for software deploy." ) # Ensures that the requested release version exists within the # list of deployed releases if software_version not in deployed_releases: return None, ( "The requested software version was not installed in the " "system controller, cannot prestage for software deploy." ) else: # Check for install release param if not is_software_ready_to_be_prestaged_for_install( system_controller_sw_list, software_version ): return None, ("The requested release is not ready to be installed.") return software_version, "" def get_certificate_from_secret(secret_name, secret_ns): """Get certificate from k8s secret :param secret_name: the name of the secret :param secret_ns: the namespace of the secret :return: tls_crt: the certificate. tls_key: the corresponding private key of the certificate. ca_crt: the CA certificate that issued tls_crt if available. raise Exception for kubernetes data errors """ kube = kubeoperator.KubeOperator() secret = kube.kube_get_secret(secret_name, secret_ns) if not hasattr(secret, "data"): raise Exception("Invalid secret %s\\%s" % (secret_ns, secret_name)) data = secret.data if "tls.crt" not in data or "tls.key" not in data: raise Exception( "Invalid certificate data from secret %s\\%s" % (secret_ns, secret_name) ) try: tls_crt = base64.decode_as_text(data["tls.crt"]) tls_key = base64.decode_as_text(data["tls.key"]) if "ca.crt" in data: ca_crt = base64.decode_as_text(data["ca.crt"]) else: LOG.warning( "Secret doesn't have required CA data stored: %s\\%s" % (secret_ns, secret_name) ) ca_crt = "" except TypeError: raise Exception( "Certificate secret data is invalid %s\\%s" % (secret_ns, secret_name) ) return tls_crt, tls_key, ca_crt def get_management_subnet(payload): """Get management subnet. Given a payload dict, prefer an admin subnet over a management subnet if it is present. Returns the management subnet. """ if payload.get("admin_subnet", None): return payload.get("admin_subnet") return payload.get("management_subnet", "") def get_primary_management_subnet(payload): """Get primary management subnet. Returns the primary management subnet. """ return get_management_subnet(payload).split(",")[0] def get_management_start_address(payload): """Get management start address. Given a payload dict, prefer an admin start address over a management start address if it is present. Returns the management start address. """ if payload.get("admin_start_address", None): return payload.get("admin_start_address") return payload.get("management_start_address", "") def get_primary_management_start_address(payload): """Get primary management start address. Returns the primary management start address. """ return get_management_start_address(payload).split(",")[0] def get_management_end_address(payload): """Get management end address. Given a payload dict, prefer an admin end address over a management end address if it is present. Returns the management end address. """ if payload.get("admin_end_address", None): return payload.get("admin_end_address") return payload.get("management_end_address", "") def get_primary_management_end_address(payload): """Get primary management end address. Returns the primary management end address. """ return get_management_end_address(payload).split(",")[0] def get_primary_management_gateway_address(payload): """Get primary management gateway address. Given a payload dict, prefer an admin gateway address over a management gateway address if it is present. Returns the primary management gateway address. """ if payload.get("admin_gateway_address", None): return payload.get("admin_gateway_address").split(",")[0] return payload.get("management_gateway_address", "").split(",")[0] def get_primary_systemcontroller_gateway_address(payload): """Get primary systemcontroller gateway address. Returns the primary systemcontroller gateway address. """ if payload.get("systemcontroller_gateway_address", None): return payload.get("systemcontroller_gateway_address").split(",")[0] return None def get_primary_management_gateway_address_ip_family(payload): """Get primary management gateway address family""" address_value = get_primary_management_gateway_address(payload) try: ip_address = netaddr.IPAddress(address_value) except Exception as e: raise exceptions.ValidateFail(f"Invalid address - not a valid IP address: {e}") return ip_address.version def get_primary_oam_address_ip_family(payload): """Get primary oam address's IP family :param payload: subcloud configuration """ # First check external_oam_subnet_ip_family, if it is # DB subcloud payload if payload.get("external_oam_subnet_ip_family", None): try: family = int(payload.get("external_oam_subnet_ip_family")) if family == 4 or family == 6: return family raise exceptions.ValidateFail( f"Invalid external_oam_subnet_ip_family: {family}" ) except Exception as e: raise exceptions.ValidateFail(f"Invalid external_oam_subnet_ip_family: {e}") network_value = payload.get("external_oam_subnet", "").split(",")[0] try: ip_network = netaddr.IPNetwork(network_value) except Exception as e: raise exceptions.ValidateFail( f"Invalid OAM network - not a valid IP Network: {e}" ) return ip_network.version def has_network_reconfig(payload, subcloud): """Check if network reconfiguration is needed :param payload: subcloud configuration :param subcloud: subcloud object """ management_subnet = get_primary_management_subnet(payload) start_address = get_primary_management_start_address(payload) end_address = get_primary_management_end_address(payload) gateway_address = get_primary_management_gateway_address(payload) sys_controller_gw_ip = get_primary_systemcontroller_gateway_address(payload) has_network_reconfig = any( [ management_subnet != subcloud.management_subnet, start_address != subcloud.management_start_ip, end_address != subcloud.management_end_ip, gateway_address != subcloud.management_gateway_ip, sys_controller_gw_ip != subcloud.systemcontroller_gateway_ip, ] ) return has_network_reconfig def set_open_file_limit(new_soft_limit: int): """Adjust the maximum number of open files for this process (soft limit)""" try: current_soft, current_hard = sys_resource.getrlimit(sys_resource.RLIMIT_NOFILE) if new_soft_limit > current_hard: LOG.error( f"New process open file soft limit [{new_soft_limit}] exceeds the " f"hard limit [{current_hard}]. Setting to hard limit instead." ) new_soft_limit = current_hard if new_soft_limit != current_soft: LOG.info( f"Setting process open file limit to {new_soft_limit} " f"(from {current_soft})" ) sys_resource.setrlimit( sys_resource.RLIMIT_NOFILE, (new_soft_limit, current_hard) ) except Exception as ex: LOG.exception(f"Failed to set NOFILE resource limit: {ex}") def get_playbook_for_software_version(playbook_filename, software_version=None): """Get the ansible playbook filename in corresponding software version. :param playbook_filename: ansible playbook filename :param software_version: software version :raises PlaybookNotFound: If the playbook is not found Returns the unchanged ansible playbook filename if the software version parameter is not provided or the same as active release, otherwise, returns the filename in corresponding software version. """ if software_version and software_version != tsc.SW_VERSION: software_version_path = os.path.join( consts.ANSIBLE_PREVIOUS_VERSION_BASE_PATH, software_version ) playbook_filename = playbook_filename.replace( consts.ANSIBLE_CURRENT_VERSION_BASE_PATH, software_version_path ) if not os.path.isfile(playbook_filename): raise exceptions.PlaybookNotFound(playbook_name=playbook_filename) return playbook_filename def get_value_from_yaml_file(filename, key): """Get corresponding value for a key in the given yaml file. :param filename: the yaml filename :param key: the path for the value Returns the value or None if not found. """ value = None if os.path.isfile(filename): with open(os.path.abspath(filename), "r") as f: data = f.read() data = yaml.load(data, Loader=yaml.SafeLoader) value = data.get(key) return value def update_values_on_yaml_file(filename, values, values_to_keep=None, yaml_dump=True): """Update all specified key values from the given yaml file. If values_to_keep is provided, all values other than specified will be deleted from the loaded file prior to update. :param filename: the yaml filename :param values: dict with yaml keys and values to replace :param values_to_keep: list of values to keep on original file :param yaml_dump: write file using yaml dump (default is True) returns True if the yaml file exists else False """ if values_to_keep is None: values_to_keep = [] update_file = False if not os.path.isfile(filename): return False with open(os.path.abspath(filename), "r") as f: data = f.read() data = yaml.load(data, Loader=yaml.SafeLoader) if values_to_keep: for key in data.copy(): if key not in values_to_keep: data.pop(key) update_file = True for key, value in values.items(): if key not in data or value != data.get(key): data.update({key: value}) update_file = True if update_file: with open(os.path.abspath(filename), "w") as f: if yaml_dump: yaml.dump(data, f, sort_keys=False) else: f.write("---\n") for k, v in data.items(): f.write("%s: %s\n" % (k, json.dumps(v))) return True def load_yaml_file(filename: str): with open(os.path.abspath(filename), "r") as f: data = yaml.load(f, Loader=yaml.loader.SafeLoader) return data def update_install_values_with_new_bootstrap_address(context, payload, subcloud): """Update install values with new bootstrap address provided on request This is necessary during deploy bootstrap if the user provided a new bootstrap_address, so future redeploy/upgrade is not affected :param context: request context object :param payload: subcloud payload :param subcloud: subcloud object """ if not subcloud.data_install: return bootstrap_address = payload.get(consts.BOOTSTRAP_ADDRESS) install_values = json.loads(subcloud.data_install) if bootstrap_address and bootstrap_address != install_values.get( "bootstrap_address" ): install_values["bootstrap_address"] = bootstrap_address db_api.subcloud_update( context, subcloud.id, data_install=json.dumps(install_values) ) def update_abort_status(context, subcloud_id, deploy_status, abort_failed=False): """Update the subcloud deploy status during deploy abort operation. :param context: request context object :param subcloud_id: subcloud id from db :param deploy_status: subcloud deploy status from db :param abort_failed: if abort process fails (default False) """ if abort_failed: abort_status_dict = ABORT_UPDATE_FAIL_STATUS else: abort_status_dict = ABORT_UPDATE_STATUS new_deploy_status = abort_status_dict[deploy_status] updated_subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=new_deploy_status ) return updated_subcloud def subcloud_is_secondary_state(deploy_state): if deploy_state in [ consts.DEPLOY_STATE_SECONDARY, consts.DEPLOY_STATE_SECONDARY_FAILED, ]: return True return False def create_subcloud_rehome_data_template(): """Create a subcloud rehome data template""" return {"saved_payload": {}} def get_sw_version(release=None, for_install=True): """Get the sw_version to be used. Return the sw_version by first validating a set release version. If a release is not specified then use the current system controller software_version. for_install = True (--for-install) then it will validate the requested release against the upgrade support file. for_install = False (--for-sw-deploy) then it will return the requested release in format MM.mm previously validated. """ if release: try: if for_install: validate_major_release_version_supported(release) else: release = get_major_release(release) return release except exceptions.ValidateFail as e: pecan.abort(400, _("Error: invalid release version parameter. %s" % e)) except Exception: pecan.abort(500, _("Error: unable to validate the release version.")) else: return tsc.SW_VERSION def validate_major_release_version_supported(release_version_to_check): """Check if a release version is supported by the current active version. :param release_version_to_check: version string to validate returns True to indicate that the version is valid raise ValidateFail for an invalid/unsupported release version """ current_version = tsc.SW_VERSION if current_version == release_version_to_check: return True supported_versions = get_current_supported_upgrade_versions() if release_version_to_check not in supported_versions: msg = "%s is not a supported release version (%s)" % ( release_version_to_check, ",".join(supported_versions), ) raise exceptions.ValidateFail(msg) return True def is_major_release(version): """Check if a given version is a valid major release This function is useful for determining whether a given version represents a major release. Both the MM part and the mm part must have two digits. Args: version (str): The requested version value Returns: bool: `True` if the version value meets the expected format. `False` if the version format is not valid. """ pattern = r"^\d{2}\.\d{2}$" if not re.match(pattern, version): return False MM, mm = version.split(".") MM = int(MM) mm = int(mm) return 0 <= MM <= 99 and 0 <= mm <= 99 def is_minor_release(version): """Check if a given version is a valid minor release The third value in a release format representation, for example MM.mm.pp, is considered a minor release. Both the MM part and the mm part must have two digits. The pp part can have one digit or two. The third part of the format starting from 1 is considered a minor release, since 0 represents the major release. Args: version (str): The requested version value Returns: bool: `True` if the version value meets the expected format. `False` if the version format is not valid. """ pattern = r"^\d{2}\.\d{2}\.\d{1,2}$" if not re.match(pattern, version): return False MM, mm, pp = version.split(".") MM = int(MM) mm = int(mm) pp = int(pp) return 0 <= MM <= 99 and 0 <= mm <= 99 and 1 <= pp <= 99 def is_base_release(version): """Check if a given version is a valid base release The third value in a release format representation, for example MM.mm.p, is considered a base release. Both the MM part and the mm part must have two digits. The p part represents the base release digit, which is always 0. Args: version (str): The requested version value Returns: bool: `True` if the version value is a valid base release. `False` if the version is a not valid base release. """ if version < consts.SOFTWARE_VERSION_24_09: return is_major_release(version) pattern = r"^\d{2}\.\d{2}\.\d{1}$" if not re.match(pattern, version): return False MM, mm, p = version.split(".") MM = int(MM) mm = int(mm) p = int(p) return 0 <= MM <= 99 and 0 <= mm <= 99 and 0 == p def extract_version(release_id: str) -> str: """Extract the MM.mm part of a release_id. Args: release_id: The release_id. Example: stx-10.0.1 Returns: str: The extracted major.minor version in the format MM.mm, or None if not found. """ # Regular expression to match the MM.mm part of the version pattern = r"(\d{1,2}\.\d{1,2})" # Search for the MM.mm pattern in the version_string match = re.search(pattern, release_id) # Return the MM.mm part if found, otherwise return None if match: return match.group(1) return None def get_major_release(version): """Returns the YY.MM portion of the given version string""" if "-" in version: version = version.split("-")[1] split_version = version.split(".") return ".".join(split_version[0:2]) def get_software_version(releases): """Returns the maximum YY.MM portion from the given release list""" versions = [] for release in releases: version = extract_version(release) if version: versions.append(version) return max(versions, default=None) def get_current_supported_upgrade_versions(): """Parse the upgrades metadata file to build a list of supported versions. returns a list of supported upgrade versions raise InternalError exception for a missing/invalid metadata file """ supported_versions = [] try: with open(consts.SUPPORTED_UPGRADES_METADATA_FILE_PATH) as file: root = ElementTree.fromstring(file.read()) except Exception: LOG.exception("Error reading the supported upgrades metadata file") raise exceptions.InternalError() supported_upgrades = root.find("supported_upgrades") if not supported_upgrades: LOG.error("Missing supported upgrades information") raise exceptions.InternalError() upgrades = supported_upgrades.findall("upgrade") for upgrade in upgrades: version = upgrade.findtext("version") supported_versions.append(version.strip()) return supported_versions def get_major_releases(releases): """Returns release list in format MM.mm""" major_releases = [] for release in releases: major_release = get_major_release(release) if major_release not in major_releases: major_releases.append(major_release) return major_releases # Feature: Subcloud Name Reconfiguration # This method is useful to determine the origin of the request # towards the api. The goal was to avoid any code changes in # the cert-monitor module, since it only needs the region reference. # When this method is called, the condition is applied to replace the # value of the "name" field with the value of the "region_name" field # in the response. In this way, the cert-monitor does not lose the # region reference in subcloud rename operation. def is_req_from_cert_mon_agent(request): ua = request.headers.get("User-Agent") if ua == consts.CERT_MON_HTTP_AGENT: return True else: return False def yaml_safe_load(contents, content_type): """Wrapper for yaml.safe_load with error logging and reporting. :param contents: decoded contents to load :param content_type: values being loaded :returns dict constructed from parsed contents """ error = False msg = "Error: Unable to load " + content_type + " file contents ({})." try: data = yaml.safe_load(contents) if data is None: error = True msg = msg.format("empty file provided") except yaml.YAMLError as e: error = True if hasattr(e, "problem_mark"): mark = e.problem_mark msg = msg.format("problem on line: " + str(mark.line)) else: msg = msg.format("please see logs for more details") LOG.exception(e) if error: LOG.error(msg) pecan.abort(400, _(msg)) return data # Feature: Subcloud Name Reconfiguration # This method is useful to determine the origin of the request # towards the api. def is_req_from_another_dc(request): ua = request.headers.get("User-Agent") if ua == consts.DCMANAGER_V1_HTTP_AGENT: return True else: return False def validate_name( name, prohibited_name_list=[], invalid_chars=".*+?|()[]{}^$", max_length=255, case_sensitive=False, ): """validate name string. :param name: name string :param prohibited_name_list: a list containing prohibited string :param max_length: max length of name :param invalid_chars: default is regular expression chars :param case_sensitive: case sensitive setting for prohibited_name_list :returns boolean value as result """ special_chars = set(invalid_chars) if not name: return False if name.isdigit(): LOG.warning("Invalid name [%s], can not be digit" % name) return False if len(name) > max_length: LOG.warning("Invalid name length") return False for char in name: if char in special_chars: LOG.warning( "Invalid name, Prohibited to use regular expression characters: %s" % name ) return False normalized_name = name if case_sensitive else name.lower() normalized_prohibited_list = ( prohibited_name_list if case_sensitive else [s.lower() for s in prohibited_name_list] ) if normalized_name in normalized_prohibited_list: LOG.warning("Invalid name, cannot use '%s' as name" % name) return False return True def get_local_system(): m_ks_client = OpenStackDriver( region_clients=None, fetch_subcloud_ips=fetch_subcloud_mgmt_ips, ).keystone_client endpoint = m_ks_client.endpoint_cache.get_endpoint("sysinv") sysinv_client = SysinvClient( m_ks_client.region_name, m_ks_client.session, endpoint=endpoint ) system = sysinv_client.get_system() return system def get_msg_output_info(log_file, target_task, target_str): """Get msg output by searching the target string from the given task. It receives an ansible log file and searches for the last msg output matching the target string from the given task. Returns the msg output """ # awk command to get the last occurrence string after 'msg: {target_str}' # between 'TASK \[{target_task}' and 'PLAY RECAP' delimiters. awk_script = rf""" /TASK \[{target_task}/,/PLAY RECAP/ {{ if ($0 ~ /msg: '{target_str}(.+)'/) {{ result = $0 }} }} END {{ if (result) {{ match(result, /msg: '{target_str}(.+)'/, arr) print arr[1] }} }} """ try: # necessary check since is possible to have # the message in rotated ansible log files_for_search = add_latest_rotated_file(log_file) awk_cmd = ["awk", awk_script] + files_for_search # Run the AWK script using subprocess result = subprocess.run(awk_cmd, capture_output=True, text=True, check=True) return result.stdout.strip() except Exception as e: LOG.error( "Failed getting msg output by searching '%s' from task '%s': %s" % (target_str, target_task, e) ) return None def get_subcloud_ansible_log_file(subcloud_name): return os.path.join( consts.DC_ANSIBLE_LOG_DIR, subcloud_name + "_playbook_output.log" ) def is_leader_on_local_site(peer_group): return peer_group.system_leader_id == get_local_system().uuid def generate_sync_info_message(association_ids): info_message = None if association_ids: info_message = ( "The operation has caused the SPG to be out-of-sync. " "Please run peer-group-association sync command to push " "the subcloud peer group changes to the peer site:\n" ) for association_id in association_ids: info_message += ( f"$ dcmanager peer-group-association sync {association_id}\n" ) return info_message def fetch_subcloud_mgmt_ips(region_name: str = None) -> Union[dict, str]: """Fetch the subcloud(s) primary management IP(s). :param region_name: The subcloud region name, defaults to None :return: A dictionary of region names to IPs (if no region provided) or a single IP string (for specific region). """ LOG.info(f"Fetching subcloud(s) management IP(s) ({region_name=})") ctx = context.get_admin_context() if region_name: subcloud = db_api.subcloud_get_by_region_name(ctx, region_name) return subcloud.management_start_ip ip_map = {} subclouds = db_api.subcloud_get_all(ctx) for subcloud in subclouds: ip_map[subcloud.region_name] = subcloud.management_start_ip return ip_map def format_address(ip_address: str) -> str: """Formats an IP address for use in a URL. IPv6 addresses are enclosed in square brackets. """ try: address = netaddr.IPAddress(ip_address) if address.version == 6: return f"[{address}]" return str(address) except netaddr.AddrFormatError as e: LOG.error(f"Failed to format the IP address: {ip_address}. Error: {e}") raise def validate_patch_strategy(payload: dict): patch_id = payload.get("patch_id") if not patch_id: message = ( "patch_id parameter is required for " f"{consts.SW_UPDATE_TYPE_PATCH} strategy." ) pecan.abort(400, _(message)) patch_file = ( f"{consts.PATCH_VAULT_DIR}/{consts.PATCHING_SW_VERSION}/{patch_id}.patch" ) if not os.path.isfile(patch_file): message = f"Patch file {patch_file} is missing in DC Vault patches." pecan.abort(400, _(message)) remove = payload.get("remove", "").lower() == "true" upload_only = payload.get("upload-only", "").lower() == "true" if remove and upload_only: message = "Both remove and upload-only parameters cannot be used together." pecan.abort(400, _(message)) def validate_software_strategy(release_id: str): if not release_id: message = ( "Release ID is required for strategy type: " f"{consts.SW_UPDATE_TYPE_SOFTWARE}." ) pecan.abort(400, _(message)) elif release_id not in get_systemcontroller_installed_releases_ids(): message = f"Release ID: {release_id} not deployed in the SystemController" pecan.abort(400, _(message)) def has_usm_service( subcloud_region: str, keystone_client: KeystoneClient = None ) -> bool: # Lookup keystone client session if not specified if not keystone_client: try: keystone_client = OpenStackDriver( region_name=subcloud_region, region_clients=None, fetch_subcloud_ips=fetch_subcloud_mgmt_ips, ).keystone_client.keystone_client except Exception as e: LOG.exception( f"Failed to get keystone client for subcloud_region: {subcloud_region}" ) raise exceptions.InternalError() from e try: # Try to get the USM service for the subcloud. keystone_client.services.find(name=dccommon_consts.ENDPOINT_NAME_USM) return True except keystone_exceptions.NotFound: LOG.warning("USM service not found for subcloud_region: %s", subcloud_region) return False def get_system_controller_deploy() -> Optional[dict]: # get a cached keystone client (and token) try: os_client = OpenStackDriver( region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME, region_clients=None ) except Exception: LOG.exception( "Failed to get keystone client for %s", dccommon_consts.SYSTEM_CONTROLLER_NAME, ) raise ks_client = os_client.keystone_client software_client = software_v1.SoftwareClient( ks_client.session, dccommon_consts.SYSTEM_CONTROLLER_NAME, endpoint=ks_client.endpoint_cache.get_endpoint( dccommon_consts.ENDPOINT_NAME_USM ), ) # Show deploy always returns either an empty list when there's no deploy # or a list with a single element when there's a deploy deploy_list = software_client.show_deploy() return deploy_list[0] if deploy_list else None def is_system_controller_deploying() -> bool: return get_system_controller_deploy() is not None def clear_subcloud_alarm_summary(context, subcloud_name: str): """Clears the alarm summary for a subcloud. :param context: request context object. :param subcloud_name: The subcloud name """ alarm_updates = { "critical_alarms": -1, "major_alarms": -1, "minor_alarms": -1, "warnings": -1, "cloud_status": consts.ALARMS_DISABLED, } alarm_aggr = alarm_aggregation.AlarmAggregation(context) alarm_aggr.update_alarm_summary(subcloud_name, alarm_updates) def verify_ongoing_subcloud_strategy(context, subcloud): strategy_steps = None try: strategy_steps = db_api.strategy_step_get(context, subcloud.id) except exceptions.StrategyStepNotFound: LOG.debug(f"No existing vim strategy steps on subcloud: {subcloud.name}") except Exception: LOG.exception(f"Failed to get strategy steps on subcloud: {subcloud.name}.") return True if strategy_steps and strategy_steps.state not in ( consts.STRATEGY_STATE_COMPLETE, consts.STRATEGY_STATE_ABORTED, consts.STRATEGY_STATE_FAILED, ): return True return False