
This commit creates a REST API to reinstall a subcloud. In SubcloudController, the tasks includes: 1. Read the subcloud info from dcmanager db, subclouds table. 2. JSONify the data_install from db, generate the new install values. In SubcloudManager: 1. Check the subcloud availability, software version before reinstall 2. Check the image value, if image doesn't exists, update the image with iso in dc-vault 3. Run the install command and apply command API format: PATCH /v1.0/subclouds/<subcloud>/reinstall ----- Tests: ----- Happy path: 1. dcmanager subcloud add: a new subcloud with bootstrap values, deploy config and install values. 2. dcmanager subcloud delete: an existing subcloud. 3. dcmanager subcloud reconfig: an existing subcloud with deploy config. 4. dcmanager subcloud reinstall: an existing offline subcloud, this subcloud has image path in data_install in db dcmanager. After the tasks of reinstall and bootstrap succeed, reconfig this subcloud with proper deploy config, this subcloud will be online. 5. upload an image to dc-vault using: system --os-region-name SystemController load-import -a <bootimage.iso> <bootimage.sig>. Then using dcmanager subcloud reinstall to an existing offline subcloud, this subcloud has no image path in data_install in db dcmanager. After the tasks of reinstall and bootstrap succeed, reconfig this subcloud with proper deploy config, this subcloud will be online. Unhappy path: 1. dcmanager subcloud reinstall: an existing online subcloud, reinstall fails. 2. dcmanager subcloud reinstall: an existing subcloud without data_install value in db, reinstall fails. 3. dcmanager subcloud reinstall: an existing subcloud with data_install in db, but missing mandatory install value, reinstall fails. 4. dcmanager subcloud reinstall: an existing subcloud, but its sw version in db doesn't meet the sw version with system controllers', reinstall fails. 5. dcmanager subcloud reinstall: an existing subcloud with data_install in db, but has no image path in data_install, and also has no right versioned image in dc-vault in system controller, reinstall fails. Story: 2007267 Task: 40732 Change-Id: I8be6d8d11e6b4ee02bbcca499ba8869ba76bffaa Signed-off-by: Yuxing Jiang <yuxing.jiang@windriver.com>
1389 lines
60 KiB
Python
1389 lines
60 KiB
Python
# Copyright 2017 Ericsson AB.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# Copyright (c) 2017-2020 Wind River Systems, Inc.
|
|
#
|
|
# The right to copy, distribute, modify, or otherwise make use
|
|
# of this software may be licensed only pursuant to the terms
|
|
# of an applicable Wind River license agreement.
|
|
#
|
|
|
|
import datetime
|
|
from eventlet.green import subprocess
|
|
import filecmp
|
|
import json
|
|
import keyring
|
|
import netaddr
|
|
import os
|
|
import threading
|
|
import time
|
|
|
|
from oslo_log import log as logging
|
|
from oslo_messaging import RemoteError
|
|
|
|
from tsconfig.tsconfig import CONFIG_PATH
|
|
from tsconfig.tsconfig import SW_VERSION
|
|
|
|
from dccommon import consts as dccommon_consts
|
|
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
|
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
|
from dccommon import kubeoperator
|
|
from dccommon.subcloud_install import SubcloudInstall
|
|
|
|
from dcorch.common import consts as dcorch_consts
|
|
from dcorch.rpc import client as dcorch_rpc_client
|
|
|
|
from dcmanager.common import consts
|
|
from dcmanager.common.consts import INVENTORY_FILE_POSTFIX
|
|
from dcmanager.common import context
|
|
from dcmanager.common import exceptions
|
|
from dcmanager.common.i18n import _
|
|
from dcmanager.common import manager
|
|
from dcmanager.common import utils
|
|
from dcmanager.rpc import client as rpc_client
|
|
|
|
from dcmanager.db import api as db_api
|
|
|
|
from fm_api import constants as fm_const
|
|
from fm_api import fm_api
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
# Name of our distributed cloud addn_hosts file for dnsmasq
|
|
# to read. This file is referenced in dnsmasq.conf
|
|
ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc'
|
|
|
|
# Subcloud configuration paths
|
|
ANSIBLE_SUBCLOUD_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml'
|
|
ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK = \
|
|
'/usr/share/ansible/stx-ansible/playbooks/install.yml'
|
|
|
|
USERS_TO_REPLICATE = [
|
|
'sysinv',
|
|
'patching',
|
|
'vim',
|
|
'mtce',
|
|
'fm',
|
|
'barbican',
|
|
'dcmanager']
|
|
|
|
SC_INTERMEDIATE_CERT_DURATION = "8760h" # 1 year = 24 hours x 365
|
|
SC_INTERMEDIATE_CERT_RENEW_BEFORE = "720h" # 30 days
|
|
CERT_NAMESPACE = "dc-cert"
|
|
|
|
|
|
def sync_update_subcloud_endpoint_status(func):
|
|
"""Synchronized lock decorator for _update_subcloud_endpoint_status. """
|
|
|
|
def _get_lock_and_call(*args, **kwargs):
|
|
"""Get a single fair lock per subcloud based on subcloud name. """
|
|
|
|
# subcloud name is the 3rd argument to
|
|
# _update_subcloud_endpoint_status()
|
|
@utils.synchronized(args[2], external=False, fair=True)
|
|
def _call_func(*args, **kwargs):
|
|
return func(*args, **kwargs)
|
|
|
|
return _call_func(*args, **kwargs)
|
|
|
|
return _get_lock_and_call
|
|
|
|
|
|
class SubcloudManager(manager.Manager):
|
|
"""Manages tasks related to subclouds."""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
LOG.debug(_('SubcloudManager initialization...'))
|
|
|
|
super(SubcloudManager, self).__init__(service_name="subcloud_manager",
|
|
*args, **kwargs)
|
|
self.context = context.get_admin_context()
|
|
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
|
|
self.fm_api = fm_api.FaultAPIs()
|
|
|
|
@staticmethod
|
|
def _get_subcloud_cert_name(subcloud_name):
|
|
cert_name = "%s-adminep-ca-certificate" % subcloud_name
|
|
return cert_name
|
|
|
|
@staticmethod
|
|
def _get_subcloud_cert_secret_name(subcloud_name):
|
|
secret_name = "%s-adminep-ca-certificate" % subcloud_name
|
|
return secret_name
|
|
|
|
@staticmethod
|
|
def _create_intermediate_ca_cert(payload):
|
|
subcloud_name = payload["name"]
|
|
cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
|
|
secret_name = SubcloudManager._get_subcloud_cert_secret_name(
|
|
subcloud_name)
|
|
|
|
cert = {
|
|
"apiVersion": "cert-manager.io/v1alpha2",
|
|
"kind": "Certificate",
|
|
"metadata": {
|
|
"namespace": CERT_NAMESPACE,
|
|
"name": cert_name
|
|
},
|
|
"spec": {
|
|
"secretName": secret_name,
|
|
"duration": SC_INTERMEDIATE_CERT_DURATION,
|
|
"renewBefore": SC_INTERMEDIATE_CERT_RENEW_BEFORE,
|
|
"issuerRef": {
|
|
"kind": "Issuer",
|
|
"name": "dc-adminep-root-ca-issuer"
|
|
},
|
|
"commonName": cert_name,
|
|
"isCA": True,
|
|
},
|
|
}
|
|
|
|
kube = kubeoperator.KubeOperator()
|
|
kube.apply_cert_manager_certificate(CERT_NAMESPACE, cert_name, cert)
|
|
|
|
for count in range(1, 20):
|
|
secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE)
|
|
if not hasattr(secret, 'data'):
|
|
time.sleep(1)
|
|
LOG.debug('Wait for %s ... %s' % (secret_name, count))
|
|
continue
|
|
|
|
data = secret.data
|
|
if ('ca.crt' not in data or
|
|
'tls.crt' not in data or 'tls.key' not in data) or \
|
|
not (data['ca.crt'] and data['tls.crt'] and data['tls.key']):
|
|
# ca cert, certificate and key pair are needed and must exist
|
|
# for creating an intermediate ca. If not, certificate is not
|
|
# ready yet.
|
|
time.sleep(1)
|
|
LOG.debug('Wait for %s ... %s' % (secret_name, count))
|
|
continue
|
|
|
|
payload['dc_root_ca_cert'] = data['ca.crt']
|
|
payload['sc_ca_cert'] = data['tls.crt']
|
|
payload['sc_ca_key'] = data['tls.key']
|
|
return
|
|
|
|
raise Exception("Secret for certificate %s is not ready." % cert_name)
|
|
|
|
def _get_ansible_filename(self, subcloud_name, postfix='.yml'):
|
|
ansible_filename = os.path.join(
|
|
consts.ANSIBLE_OVERRIDES_PATH,
|
|
subcloud_name + postfix)
|
|
return ansible_filename
|
|
|
|
def compose_install_command(self, subcloud_name, ansible_subcloud_inventory_file):
|
|
install_command = [
|
|
"ansible-playbook", ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK,
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name,
|
|
"-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + '/' + "install_values.yml"]
|
|
return install_command
|
|
|
|
def compose_apply_command(self, subcloud_name, ansible_subcloud_inventory_file):
|
|
apply_command = [
|
|
"ansible-playbook", ANSIBLE_SUBCLOUD_PLAYBOOK, "-i",
|
|
ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name
|
|
]
|
|
# Add the overrides dir and region_name so the playbook knows
|
|
# which overrides to load
|
|
apply_command += [
|
|
"-e", str("override_files_dir='%s' region_name=%s") % (
|
|
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)]
|
|
return apply_command
|
|
|
|
def compose_deploy_command(self, subcloud_name, ansible_subcloud_inventory_file, payload):
|
|
deploy_command = [
|
|
"ansible-playbook", payload[consts.DEPLOY_PLAYBOOK],
|
|
"-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
|
|
subcloud_name + '_deploy_values.yml',
|
|
"-i", ansible_subcloud_inventory_file,
|
|
"--limit", subcloud_name
|
|
]
|
|
return deploy_command
|
|
|
|
def add_subcloud(self, context, payload):
|
|
"""Add subcloud and notify orchestrators.
|
|
|
|
:param context: request context object
|
|
:param payload: subcloud configuration
|
|
"""
|
|
LOG.info("Adding subcloud %s." % payload['name'])
|
|
subcloud = db_api.subcloud_get_by_name(context, payload['name'])
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)
|
|
|
|
# Populate the subcloud status table with all endpoints
|
|
for endpoint in dcorch_consts.ENDPOINT_TYPES_LIST:
|
|
db_api.subcloud_status_create(context,
|
|
subcloud.id,
|
|
endpoint)
|
|
|
|
try:
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
# Create a new route to this subcloud on the management interface
|
|
# on both controllers.
|
|
m_ks_client = OpenStackDriver(
|
|
region_name=consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
subcloud_subnet = netaddr.IPNetwork(payload['management_subnet'])
|
|
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME,
|
|
m_ks_client.session)
|
|
controllers = sysinv_client.get_controller_hosts()
|
|
for controller in controllers:
|
|
management_interface = sysinv_client.get_management_interface(
|
|
controller.hostname)
|
|
if management_interface is not None:
|
|
sysinv_client.create_route(
|
|
management_interface.uuid,
|
|
str(subcloud_subnet.ip),
|
|
subcloud_subnet.prefixlen,
|
|
payload['systemcontroller_gateway_address'],
|
|
1)
|
|
|
|
# Create endpoints to this subcloud on the
|
|
# management-start-ip of the subcloud which will be allocated
|
|
# as the floating Management IP of the Subcloud if the
|
|
# Address Pool is not shared. Incase the endpoint entries
|
|
# are incorrect, or the management IP of the subcloud is changed
|
|
# in the future, it will not go managed or will show up as
|
|
# out of sync. To fix this use Openstack endpoint commands
|
|
# on the SystemController to change the subcloud endpoints.
|
|
# The non-identity endpoints are added to facilitate horizon access
|
|
# from the System Controller to the subcloud.
|
|
endpoint_config = []
|
|
endpoint_ip = payload['management_start_address']
|
|
if netaddr.IPAddress(endpoint_ip).version == 6:
|
|
endpoint_ip = '[' + endpoint_ip + ']'
|
|
|
|
for service in m_ks_client.services_list:
|
|
if service.type == dcorch_consts.ENDPOINT_TYPE_PLATFORM:
|
|
admin_endpoint_url = "https://{}:6386/v1".format(endpoint_ip)
|
|
endpoint_config.append({"id": service.id,
|
|
"admin_endpoint_url": admin_endpoint_url})
|
|
elif service.type == dcorch_consts.ENDPOINT_TYPE_IDENTITY:
|
|
admin_endpoint_url = "https://{}:5001/v3".format(endpoint_ip)
|
|
endpoint_config.append({"id": service.id,
|
|
"admin_endpoint_url": admin_endpoint_url})
|
|
elif service.type == dcorch_consts.ENDPOINT_TYPE_PATCHING:
|
|
admin_endpoint_url = "https://{}:5492".format(endpoint_ip)
|
|
endpoint_config.append({"id": service.id,
|
|
"admin_endpoint_url": admin_endpoint_url})
|
|
elif service.type == dcorch_consts.ENDPOINT_TYPE_FM:
|
|
admin_endpoint_url = "https://{}:18003".format(endpoint_ip)
|
|
endpoint_config.append({"id": service.id,
|
|
"admin_endpoint_url": admin_endpoint_url})
|
|
elif service.type == dcorch_consts.ENDPOINT_TYPE_NFV:
|
|
admin_endpoint_url = "https://{}:4546".format(endpoint_ip)
|
|
endpoint_config.append({"id": service.id,
|
|
"admin_endpoint_url": admin_endpoint_url})
|
|
|
|
if len(endpoint_config) < 5:
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Missing service in SystemController')
|
|
|
|
for endpoint in endpoint_config:
|
|
m_ks_client.keystone_client.endpoints.create(
|
|
endpoint["id"],
|
|
endpoint['admin_endpoint_url'],
|
|
interface=dccommon_consts.KS_ENDPOINT_ADMIN,
|
|
region=subcloud.name)
|
|
|
|
# Inform orchestrator that subcloud has been added
|
|
self.dcorch_rpc_client.add_subcloud(
|
|
context, subcloud.name, subcloud.software_version)
|
|
|
|
# create entry into alarm summary table, will get real values later
|
|
alarm_updates = {'critical_alarms': -1,
|
|
'major_alarms': -1,
|
|
'minor_alarms': -1,
|
|
'warnings': -1,
|
|
'cloud_status': consts.ALARMS_DISABLED}
|
|
db_api.subcloud_alarms_create(context, subcloud.name,
|
|
alarm_updates)
|
|
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
# Query system controller keystone admin user/project IDs,
|
|
# services project id, sysinv and dcmanager user id and store in
|
|
# payload so they get copied to the override file
|
|
admin_user_id = None
|
|
sysinv_user_id = None
|
|
dcmanager_user_id = None
|
|
admin_project_id = None
|
|
services_project_id = None
|
|
|
|
user_list = m_ks_client.get_enabled_users(id_only=False)
|
|
for user in user_list:
|
|
if user.name == dccommon_consts.ADMIN_USER_NAME:
|
|
admin_user_id = user.id
|
|
elif user.name == dccommon_consts.SYSINV_USER_NAME:
|
|
sysinv_user_id = user.id
|
|
elif user.name == dccommon_consts.DCMANAGER_USER_NAME:
|
|
dcmanager_user_id = user.id
|
|
|
|
project_list = m_ks_client.get_enabled_projects(id_only=False)
|
|
for project in project_list:
|
|
if project.name == dccommon_consts.ADMIN_PROJECT_NAME:
|
|
admin_project_id = project.id
|
|
elif project.name == dccommon_consts.SERVICES_USER_NAME:
|
|
services_project_id = project.id
|
|
|
|
payload['system_controller_keystone_admin_user_id'] = \
|
|
admin_user_id
|
|
payload['system_controller_keystone_admin_project_id'] = \
|
|
admin_project_id
|
|
payload['system_controller_keystone_services_project_id'] = \
|
|
services_project_id
|
|
payload['system_controller_keystone_sysinv_user_id'] = \
|
|
sysinv_user_id
|
|
payload['system_controller_keystone_dcmanager_user_id'] = \
|
|
dcmanager_user_id
|
|
|
|
# Add the admin and service user passwords to the payload so they
|
|
# get copied to the override file
|
|
payload['ansible_become_pass'] = payload['sysadmin_password']
|
|
payload['ansible_ssh_pass'] = payload['sysadmin_password']
|
|
payload['admin_password'] = str(keyring.get_password('CGCS',
|
|
'admin'))
|
|
|
|
if "install_values" in payload:
|
|
payload['install_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
if 'image' not in payload['install_values']:
|
|
matching_iso, matching_sig = utils.get_vault_load_files(
|
|
SW_VERSION)
|
|
payload['install_values'].update({'image': matching_iso})
|
|
|
|
deploy_command = None
|
|
if "deploy_playbook" in payload:
|
|
self._prepare_for_deployment(payload, subcloud.name)
|
|
deploy_command = self.compose_deploy_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload)
|
|
|
|
del payload['sysadmin_password']
|
|
payload['users'] = dict()
|
|
for user in USERS_TO_REPLICATE:
|
|
payload['users'][user] = \
|
|
str(keyring.get_password(
|
|
user, dccommon_consts.SERVICES_USER_NAME))
|
|
|
|
# Create the ansible inventory for the new subcloud
|
|
utils.create_subcloud_inventory(payload,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
# create subcloud intermediate certificate and pass in keys
|
|
self._create_intermediate_ca_cert(payload)
|
|
|
|
# Write this subclouds overrides to file
|
|
# NOTE: This file should not be deleted if subcloud add fails
|
|
# as it is used for debugging
|
|
self._write_subcloud_ansible_config(context, payload)
|
|
install_command = None
|
|
if "install_values" in payload:
|
|
install_command = self.compose_install_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file)
|
|
apply_command = self.compose_apply_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
apply_thread = threading.Thread(
|
|
target=self.run_deploy,
|
|
args=(subcloud, payload, context,
|
|
install_command, apply_command, deploy_command))
|
|
apply_thread.start()
|
|
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
|
|
except Exception:
|
|
LOG.exception("Failed to create subcloud %s" % payload['name'])
|
|
# If we failed to create the subcloud, update the
|
|
# deployment status
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)
|
|
|
|
def reconfigure_subcloud(self, context, subcloud_id, payload):
|
|
"""Reconfigure subcloud
|
|
|
|
:param context: request context object
|
|
:param payload: subcloud configuration
|
|
"""
|
|
LOG.info("Reconfiguring subcloud %s." % subcloud_id)
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)
|
|
try:
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
deploy_command = None
|
|
if "deploy_playbook" in payload:
|
|
self._prepare_for_deployment(payload, subcloud.name)
|
|
deploy_command = self.compose_deploy_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file,
|
|
payload)
|
|
|
|
del payload['sysadmin_password']
|
|
apply_thread = threading.Thread(
|
|
target=self.run_deploy,
|
|
args=(subcloud, payload, context, None, None, deploy_command))
|
|
apply_thread.start()
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
except Exception:
|
|
LOG.exception("Failed to create subcloud %s" % subcloud.name)
|
|
# If we failed to create the subcloud, update the
|
|
# deployment status
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)
|
|
|
|
def reinstall_subcloud(self, context, subcloud_id, payload):
|
|
"""Reinstall subcloud
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param payload: subcloud reinstall
|
|
"""
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
|
|
# Semantic checking
|
|
if subcloud.availability_status == \
|
|
consts.AVAILABILITY_ONLINE:
|
|
raise exceptions.SubcloudNotOffline()
|
|
|
|
software_version = str(payload['install_values'].get('software_version'))
|
|
LOG.info("The type of sw version is %s" % type(SW_VERSION))
|
|
|
|
if software_version != SW_VERSION:
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Software version should match the system controller')
|
|
|
|
if 'image' not in payload['install_values']:
|
|
matching_iso, matching_sig = utils.get_vault_load_files(
|
|
SW_VERSION)
|
|
payload['install_values'].update({'image': matching_iso})
|
|
|
|
LOG.info("Reinstalling subcloud %s." % subcloud_id)
|
|
|
|
subcloud = db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
software_version=SW_VERSION,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL)
|
|
|
|
try:
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
payload['admin_password'] = str(
|
|
keyring.get_password('CGCS', 'admin'))
|
|
payload['ansible_become_pass'] = payload['admin_password']
|
|
payload['ansible_ssh_pass'] = payload['admin_password']
|
|
payload['install_values']['ansible_ssh_pass'] = \
|
|
payload['admin_password']
|
|
payload['install_values']['ansible_become_pass'] = \
|
|
payload['admin_password']
|
|
payload['bootstrap-address'] = \
|
|
payload['install_values']['bootstrap_address']
|
|
|
|
utils.create_subcloud_inventory(payload,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
self._create_intermediate_ca_cert(payload)
|
|
|
|
install_command = self.compose_install_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file)
|
|
apply_command = self.compose_apply_command(
|
|
subcloud.name,
|
|
ansible_subcloud_inventory_file)
|
|
apply_thread = threading.Thread(
|
|
target=self.run_deploy,
|
|
args=(subcloud, payload, context,
|
|
install_command, apply_command, None))
|
|
apply_thread.start()
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
except Exception:
|
|
LOG.exception("Failed to reinstall subcloud %s" % subcloud.name)
|
|
# If we failed to reinstall the subcloud, update the
|
|
# deployment status
|
|
db_api.subcloud_update(
|
|
context, subcloud_id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)
|
|
|
|
@staticmethod
|
|
def run_deploy(subcloud, payload, context,
|
|
install_command=None, apply_command=None,
|
|
deploy_command=None):
|
|
|
|
if install_command:
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL)
|
|
try:
|
|
install = SubcloudInstall(context, subcloud.name)
|
|
install.prep(consts.ANSIBLE_OVERRIDES_PATH,
|
|
payload['install_values'])
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
|
|
LOG.error(e.message)
|
|
install.cleanup()
|
|
return
|
|
|
|
# Run the remote install playbook
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_INSTALLING)
|
|
try:
|
|
install.install(consts.DC_LOG_DIR, install_command)
|
|
except Exception as e:
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED)
|
|
LOG.error(e.message)
|
|
install.cleanup()
|
|
return
|
|
install.cleanup()
|
|
LOG.info("Successfully installed subcloud %s" % subcloud.name)
|
|
|
|
if apply_command:
|
|
try:
|
|
# Update the subcloud to bootstrapping
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise e
|
|
|
|
# Run the ansible boostrap-subcloud playbook
|
|
log_file = \
|
|
consts.DC_LOG_DIR + subcloud.name + '_bootstrap_' + \
|
|
str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) \
|
|
+ '.log'
|
|
with open(log_file, "w") as f_out_log:
|
|
try:
|
|
subprocess.check_call(apply_command,
|
|
stdout=f_out_log,
|
|
stderr=f_out_log)
|
|
except subprocess.CalledProcessError as ex:
|
|
msg = "Failed to run the subcloud bootstrap playbook" \
|
|
" for subcloud %s, check individual log at " \
|
|
"%s for detailed output." % (
|
|
subcloud.name,
|
|
log_file)
|
|
ex.cmd = 'ansible-playbook'
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED)
|
|
return
|
|
LOG.info("Successfully bootstrapped subcloud %s" %
|
|
subcloud.name)
|
|
|
|
if deploy_command:
|
|
# Run the custom deploy playbook
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOYING)
|
|
log_file = \
|
|
consts.DC_LOG_DIR + subcloud.name + '_deploy_' + \
|
|
str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) \
|
|
+ '.log'
|
|
with open(log_file, "w") as f_out_log:
|
|
try:
|
|
subprocess.check_call(deploy_command,
|
|
stdout=f_out_log,
|
|
stderr=f_out_log)
|
|
except subprocess.CalledProcessError as ex:
|
|
msg = "Failed to run the subcloud deploy playbook" \
|
|
" for subcloud %s, check individual log at " \
|
|
"%s for detailed output." % (
|
|
subcloud.name,
|
|
log_file)
|
|
ex.cmd = 'deploy-playbook'
|
|
LOG.error(msg)
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DEPLOY_FAILED)
|
|
return
|
|
LOG.info("Successfully deployed subcloud %s" %
|
|
subcloud.name)
|
|
|
|
db_api.subcloud_update(
|
|
context, subcloud.id,
|
|
deploy_status=consts.DEPLOY_STATE_DONE)
|
|
|
|
def _create_addn_hosts_dc(self, context):
|
|
"""Generate the addn_hosts_dc file for hostname/ip translation"""
|
|
|
|
addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC)
|
|
addn_hosts_dc_temp = addn_hosts_dc + '.temp'
|
|
|
|
subclouds = db_api.subcloud_get_all(context)
|
|
with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp:
|
|
for subcloud in subclouds:
|
|
addn_dc_line = subcloud.management_start_ip + ' ' + \
|
|
subcloud.name + '\n'
|
|
f_out_addn_dc_temp.write(addn_dc_line)
|
|
|
|
# if no more subclouds, create empty file so dnsmasq does not
|
|
# emit an error log.
|
|
if not subclouds:
|
|
f_out_addn_dc_temp.write(' ')
|
|
|
|
if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc):
|
|
os.rename(addn_hosts_dc_temp, addn_hosts_dc)
|
|
# restart dnsmasq so it can re-read our addn_hosts file.
|
|
os.system("pkill -HUP dnsmasq")
|
|
|
|
def _write_subcloud_ansible_config(self, context, payload):
|
|
"""Create the override file for usage with the specified subcloud"""
|
|
|
|
overrides_file = os.path.join(consts.ANSIBLE_OVERRIDES_PATH,
|
|
payload['name'] + '.yml')
|
|
|
|
m_ks_client = OpenStackDriver(
|
|
region_name=consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME, m_ks_client.session)
|
|
|
|
mgmt_pool = sysinv_client.get_management_address_pool()
|
|
mgmt_floating_ip = mgmt_pool.floating_address
|
|
mgmt_subnet = "%s/%d" % (mgmt_pool.network, mgmt_pool.prefix)
|
|
|
|
oam_addresses = sysinv_client.get_oam_addresses()
|
|
oam_floating_ip = oam_addresses.oam_floating_ip
|
|
oam_subnet = oam_addresses.oam_subnet
|
|
|
|
with open(overrides_file, 'w') as f_out_overrides_file:
|
|
f_out_overrides_file.write(
|
|
'---'
|
|
'\nregion_config: yes'
|
|
'\ndistributed_cloud_role: subcloud'
|
|
'\nsystem_controller_subnet: ' + mgmt_subnet +
|
|
'\nsystem_controller_floating_address: ' + mgmt_floating_ip +
|
|
'\nsystem_controller_oam_subnet: ' + oam_subnet +
|
|
'\nsystem_controller_oam_floating_address: ' + oam_floating_ip
|
|
+ '\n'
|
|
)
|
|
|
|
for k, v in payload.items():
|
|
if k not in ['deploy_playbook', 'deploy_values',
|
|
'deploy_config', 'deploy_chart',
|
|
'deploy_overrides', 'install_values']:
|
|
f_out_overrides_file.write("%s: %s\n" % (k, json.dumps(v)))
|
|
|
|
def _write_deploy_files(self, payload, subcloud_name):
|
|
"""Create the deploy value files for the subcloud"""
|
|
|
|
deploy_values_file = os.path.join(
|
|
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
|
|
'_deploy_values.yml')
|
|
|
|
with open(deploy_values_file, 'w') as f_out_deploy_values_file:
|
|
json.dump(payload['deploy_values'], f_out_deploy_values_file)
|
|
|
|
def _prepare_for_deployment(self, payload, subcloud_name):
|
|
payload['deploy_values'] = dict()
|
|
payload['deploy_values']['ansible_become_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['deploy_values']['ansible_ssh_pass'] = \
|
|
payload['sysadmin_password']
|
|
payload['deploy_values']['admin_password'] = \
|
|
str(keyring.get_password('CGCS', 'admin'))
|
|
payload['deploy_values']['deployment_config'] = \
|
|
payload[consts.DEPLOY_CONFIG]
|
|
payload['deploy_values']['deployment_manager_chart'] = \
|
|
payload[consts.DEPLOY_CHART]
|
|
payload['deploy_values']['deployment_manager_overrides'] = \
|
|
payload[consts.DEPLOY_OVERRIDES]
|
|
self._write_deploy_files(payload, subcloud_name)
|
|
|
|
def _delete_subcloud_routes(self, context, subcloud):
|
|
"""Delete the routes to this subcloud"""
|
|
|
|
keystone_client = OpenStackDriver(
|
|
region_name=consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
|
|
# Delete the route to this subcloud on the management interface on
|
|
# both controllers.
|
|
management_subnet = netaddr.IPNetwork(subcloud.management_subnet)
|
|
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME, keystone_client.session)
|
|
controllers = sysinv_client.get_controller_hosts()
|
|
for controller in controllers:
|
|
management_interface = sysinv_client.get_management_interface(
|
|
controller.hostname)
|
|
if management_interface is not None:
|
|
sysinv_client.delete_route(
|
|
management_interface.uuid,
|
|
str(management_subnet.ip),
|
|
management_subnet.prefixlen,
|
|
str(netaddr.IPAddress(
|
|
subcloud.systemcontroller_gateway_ip)),
|
|
1)
|
|
|
|
@staticmethod
|
|
def _delete_subcloud_cert(subcloud_name):
|
|
cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
|
|
secret_name = SubcloudManager._get_subcloud_cert_secret_name(
|
|
subcloud_name)
|
|
|
|
kube = kubeoperator.KubeOperator()
|
|
kube.delete_cert_manager_certificate(CERT_NAMESPACE, cert_name)
|
|
|
|
kube.kube_delete_secret(secret_name, CERT_NAMESPACE)
|
|
LOG.info("cert %s and secret %s are deleted" % (cert_name, secret_name))
|
|
|
|
def _remove_subcloud_details(self, context,
|
|
subcloud,
|
|
ansible_subcloud_inventory_file):
|
|
"""Remove subcloud details from database and inform orchestrators"""
|
|
# Inform orchestrators that subcloud has been deleted
|
|
try:
|
|
self.dcorch_rpc_client.del_subcloud(context, subcloud.name)
|
|
except RemoteError as e:
|
|
if "SubcloudNotFound" in e:
|
|
pass
|
|
|
|
# delete the associated alarm entry
|
|
try:
|
|
db_api.subcloud_alarms_delete(context, subcloud.name)
|
|
except RemoteError as e:
|
|
if "SubcloudNotFound" in e:
|
|
pass
|
|
|
|
# We only delete subcloud endpoints, region and user information
|
|
# in the Central Region. The subcloud is already unmanaged and powered
|
|
# down so is not accessible. Therefore set up a session with the
|
|
# Central Region Keystone ONLY.
|
|
keystone_client = OpenStackDriver(
|
|
region_name=consts.DEFAULT_REGION_NAME,
|
|
region_clients=None).keystone_client
|
|
|
|
# Delete keystone endpoints for subcloud
|
|
keystone_client.delete_endpoints(subcloud.name)
|
|
keystone_client.delete_region(subcloud.name)
|
|
|
|
# Delete the routes to this subcloud
|
|
self._delete_subcloud_routes(context, subcloud)
|
|
|
|
# Remove the subcloud from the database
|
|
try:
|
|
db_api.subcloud_destroy(context, subcloud.id)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise e
|
|
|
|
# Delete the ansible inventory for the new subcloud
|
|
utils.delete_subcloud_inventory(ansible_subcloud_inventory_file)
|
|
|
|
# Delete the subcloud intermediate certificate
|
|
SubcloudManager._delete_subcloud_cert(subcloud.name)
|
|
|
|
# Regenerate the addn_hosts_dc file
|
|
self._create_addn_hosts_dc(context)
|
|
|
|
def delete_subcloud(self, context, subcloud_id):
|
|
"""Delete subcloud and notify orchestrators.
|
|
|
|
:param context: request context object.
|
|
:param subcloud_id: id of subcloud to delete
|
|
"""
|
|
LOG.info("Deleting subcloud %s." % subcloud_id)
|
|
|
|
# Retrieve the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
|
|
# Semantic checking
|
|
if subcloud.management_state != consts.MANAGEMENT_UNMANAGED:
|
|
raise exceptions.SubcloudNotUnmanaged()
|
|
|
|
if subcloud.availability_status == \
|
|
consts.AVAILABILITY_ONLINE:
|
|
raise exceptions.SubcloudNotOffline()
|
|
|
|
# Ansible inventory filename for the specified subcloud
|
|
ansible_subcloud_inventory_file = self._get_ansible_filename(
|
|
subcloud.name, INVENTORY_FILE_POSTFIX)
|
|
|
|
self._remove_subcloud_details(context,
|
|
subcloud,
|
|
ansible_subcloud_inventory_file)
|
|
|
|
# Clear the offline fault associated with this subcloud as we
|
|
# are deleting it. Note that endpoint out-of-sync alarms should
|
|
# have been cleared when the subcloud was unmanaged and the endpoint
|
|
# sync statuses were set to unknown.
|
|
entity_instance_id = "subcloud=%s" % subcloud.name
|
|
|
|
try:
|
|
subcloud_offline = fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE
|
|
fault = self.fm_api.get_fault(subcloud_offline,
|
|
entity_instance_id)
|
|
|
|
if fault:
|
|
self.fm_api.clear_fault(subcloud_offline,
|
|
entity_instance_id)
|
|
except Exception as e:
|
|
LOG.info("Problem clearing offline fault for "
|
|
"subcloud %s" % subcloud.name)
|
|
LOG.exception(e)
|
|
|
|
def update_subcloud(self,
|
|
context,
|
|
subcloud_id,
|
|
management_state=None,
|
|
description=None,
|
|
location=None,
|
|
group_id=None,
|
|
data_install=None,
|
|
force=None):
|
|
"""Update subcloud and notify orchestrators.
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: id of subcloud to update
|
|
:param management_state: new management state
|
|
:param description: new description
|
|
:param location: new location
|
|
:param group_id: new subcloud group id
|
|
:param data_install: subcloud install values
|
|
:param force: force flag
|
|
"""
|
|
|
|
LOG.info("Updating subcloud %s." % subcloud_id)
|
|
|
|
# Get the subcloud details from the database
|
|
subcloud = db_api.subcloud_get(context, subcloud_id)
|
|
original_management_state = subcloud.management_state
|
|
|
|
# Semantic checking
|
|
if management_state:
|
|
if management_state == consts.MANAGEMENT_UNMANAGED:
|
|
if subcloud.management_state == consts.MANAGEMENT_UNMANAGED:
|
|
LOG.warning("Subcloud %s already unmanaged" % subcloud_id)
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud is already unmanaged')
|
|
elif management_state == consts.MANAGEMENT_MANAGED:
|
|
if subcloud.management_state == consts.MANAGEMENT_MANAGED:
|
|
LOG.warning("Subcloud %s already managed" % subcloud_id)
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud is already managed')
|
|
elif not force:
|
|
if subcloud.deploy_status != consts.DEPLOY_STATE_DONE:
|
|
LOG.warning("Subcloud %s can be managed only when"
|
|
"deploy_status is complete" % subcloud_id)
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud can be managed only if deploy status is complete')
|
|
if subcloud.availability_status != \
|
|
consts.AVAILABILITY_ONLINE:
|
|
LOG.warning("Subcloud %s is not online" % subcloud_id)
|
|
raise exceptions.SubcloudNotOnline()
|
|
else:
|
|
LOG.error("Invalid management_state %s" % management_state)
|
|
raise exceptions.InternalError()
|
|
|
|
subcloud = db_api.subcloud_update(context,
|
|
subcloud_id,
|
|
management_state=management_state,
|
|
description=description,
|
|
location=location,
|
|
group_id=group_id,
|
|
data_install=data_install)
|
|
|
|
# Inform orchestrators that subcloud has been updated
|
|
if management_state:
|
|
|
|
try:
|
|
# Inform orchestrator of state change
|
|
self.dcorch_rpc_client.update_subcloud_states(
|
|
context,
|
|
subcloud.name,
|
|
management_state,
|
|
subcloud.availability_status)
|
|
|
|
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
|
|
'availability:%s' % (subcloud.name,
|
|
management_state,
|
|
subcloud.availability_status))
|
|
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
LOG.warn('Problem informing dcorch of subcloud '
|
|
'state change, resume to original state, subcloud: %s'
|
|
% subcloud.name)
|
|
management_state = original_management_state
|
|
subcloud = \
|
|
db_api.subcloud_update(context, subcloud_id,
|
|
management_state=management_state,
|
|
description=description,
|
|
location=location)
|
|
|
|
if management_state == consts.MANAGEMENT_UNMANAGED:
|
|
|
|
# set all endpoint statuses to unknown, except the dc-cert
|
|
# endpoint which continues to be audited for unmanaged
|
|
# subclouds
|
|
self.update_subcloud_endpoint_status(
|
|
context,
|
|
subcloud_name=subcloud.name,
|
|
endpoint_type=None,
|
|
sync_status=consts.SYNC_STATUS_UNKNOWN,
|
|
ignore_endpoints=[dcorch_consts.ENDPOINT_TYPE_DC_CERT])
|
|
elif management_state == consts.MANAGEMENT_MANAGED:
|
|
# Subcloud is managed
|
|
# Tell cert-mon to audit endpoint certificate
|
|
LOG.info('Request for managed audit for %s' % subcloud.name)
|
|
dc_notification = rpc_client.DCManagerNotifications()
|
|
dc_notification.subcloud_managed(context, subcloud.name)
|
|
|
|
return db_api.subcloud_db_model_to_dict(subcloud)
|
|
|
|
def _update_online_managed_subcloud(self, context, subcloud_id,
|
|
endpoint_type, sync_status,
|
|
alarmable, ignore_endpoints=None):
|
|
"""Update online/managed subcloud endpoint status
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: id of subcloud to update
|
|
:param endpoint_type: endpoint type to update
|
|
:param sync_status: sync status to set
|
|
:param alarmable: controls raising an alarm if applicable
|
|
:param ignore_endpoints: list of endpoints to ignore (only used if
|
|
endpoint_type is None)
|
|
"""
|
|
|
|
if ignore_endpoints is None:
|
|
ignore_endpoints = []
|
|
|
|
subcloud_status_list = []
|
|
subcloud = None
|
|
# retrieve the info from the db for this subcloud.
|
|
# subcloud_id should not be None
|
|
try:
|
|
for subcloud, subcloud_status in db_api. \
|
|
subcloud_get_with_status(context, subcloud_id):
|
|
if subcloud_status:
|
|
subcloud_status_list.append(
|
|
db_api.subcloud_endpoint_status_db_model_to_dict(
|
|
subcloud_status))
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise e
|
|
|
|
if subcloud:
|
|
if endpoint_type:
|
|
# updating a single endpoint on a single subcloud
|
|
for subcloud_status in subcloud_status_list:
|
|
if subcloud_status['endpoint_type'] == endpoint_type:
|
|
if subcloud_status['sync_status'] == sync_status:
|
|
# No change in the sync_status
|
|
LOG.debug("Sync status (%s) for subcloud %s did "
|
|
"not change - ignore update" %
|
|
(sync_status, subcloud.name))
|
|
return
|
|
# We found the endpoint
|
|
break
|
|
else:
|
|
# We did not find the endpoint
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Endpoint %s not found for subcloud' %
|
|
endpoint_type)
|
|
|
|
LOG.info("Updating subcloud:%s endpoint:%s sync:%s" %
|
|
(subcloud.name, endpoint_type, sync_status))
|
|
db_api.subcloud_status_update(context,
|
|
subcloud_id,
|
|
endpoint_type,
|
|
sync_status)
|
|
|
|
entity_instance_id = "subcloud=%s.resource=%s" % \
|
|
(subcloud.name, endpoint_type)
|
|
fault = self.fm_api.get_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
|
|
entity_instance_id)
|
|
|
|
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
|
|
and fault:
|
|
try:
|
|
self.fm_api.clear_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
|
|
entity_instance_id)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
elif not fault and alarmable and \
|
|
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
|
|
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
|
|
try:
|
|
fault = fm_api.Fault(
|
|
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
|
|
alarm_state=fm_const.FM_ALARM_STATE_SET,
|
|
entity_type_id=entity_type_id,
|
|
entity_instance_id=entity_instance_id,
|
|
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
|
|
reason_text=("%s %s sync_status is "
|
|
"out-of-sync" %
|
|
(subcloud.name, endpoint_type)),
|
|
alarm_type=fm_const.FM_ALARM_TYPE_0,
|
|
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
|
|
proposed_repair_action="If problem persists "
|
|
"contact next level "
|
|
"of support",
|
|
service_affecting=False)
|
|
|
|
self.fm_api.set_fault(fault)
|
|
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
else:
|
|
# update all endpoints on this subcloud
|
|
LOG.info("Updating all endpoints on subcloud: %s sync: %s "
|
|
"ignore_endpoints: %s" %
|
|
(subcloud.name, sync_status, ignore_endpoints))
|
|
|
|
for entry in subcloud_status_list:
|
|
endpoint = entry[consts.ENDPOINT_TYPE]
|
|
if endpoint in ignore_endpoints:
|
|
# Do not update this endpoint
|
|
continue
|
|
|
|
db_api.subcloud_status_update(context,
|
|
subcloud_id,
|
|
endpoint,
|
|
sync_status)
|
|
|
|
entity_instance_id = "subcloud=%s.resource=%s" % \
|
|
(subcloud.name, endpoint)
|
|
|
|
fault = self.fm_api.get_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
|
|
entity_instance_id)
|
|
|
|
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
|
|
and fault:
|
|
try:
|
|
self.fm_api.clear_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
|
|
entity_instance_id)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
elif not fault and alarmable and \
|
|
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
|
|
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
|
|
try:
|
|
fault = fm_api.Fault(
|
|
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
|
|
alarm_state=fm_const.FM_ALARM_STATE_SET,
|
|
entity_type_id=entity_type_id,
|
|
entity_instance_id=entity_instance_id,
|
|
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
|
|
reason_text=("%s %s sync_status is "
|
|
"out-of-sync" %
|
|
(subcloud.name, endpoint)),
|
|
alarm_type=fm_const.FM_ALARM_TYPE_0,
|
|
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
|
|
proposed_repair_action="If problem persists "
|
|
"contact next level "
|
|
"of support",
|
|
service_affecting=False)
|
|
|
|
self.fm_api.set_fault(fault)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
else:
|
|
LOG.error("Subcloud not found:%s" % subcloud_id)
|
|
|
|
@sync_update_subcloud_endpoint_status
|
|
def _update_subcloud_endpoint_status(
|
|
self, context,
|
|
subcloud_name,
|
|
endpoint_type=None,
|
|
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
|
|
alarmable=True,
|
|
ignore_endpoints=None):
|
|
"""Update subcloud endpoint status
|
|
|
|
:param context: request context object
|
|
:param subcloud_name: name of subcloud to update
|
|
:param endpoint_type: endpoint type to update
|
|
:param sync_status: sync status to set
|
|
:param alarmable: controls raising an alarm if applicable
|
|
:param ignore_endpoints: list of endpoints to ignore (only used if
|
|
endpoint_type is None)
|
|
"""
|
|
|
|
if ignore_endpoints is None:
|
|
ignore_endpoints = []
|
|
|
|
if not subcloud_name:
|
|
raise exceptions.BadRequest(
|
|
resource='subcloud',
|
|
msg='Subcloud name not provided')
|
|
|
|
try:
|
|
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise e
|
|
|
|
# Only allow updating the sync status if managed and online.
|
|
# except dc-cert endpoint is audit only when subcloud is online
|
|
# this could happen before subcloud is being managed.
|
|
# This means if a subcloud is going offline or unmanaged, then
|
|
# the sync status update must be done first.
|
|
if (((subcloud.availability_status ==
|
|
consts.AVAILABILITY_ONLINE)
|
|
and (subcloud.management_state ==
|
|
consts.MANAGEMENT_MANAGED or
|
|
endpoint_type == dcorch_consts.ENDPOINT_TYPE_DC_CERT))
|
|
or (sync_status != consts.SYNC_STATUS_IN_SYNC)):
|
|
|
|
# update a single subcloud
|
|
try:
|
|
self._update_online_managed_subcloud(context,
|
|
subcloud.id,
|
|
endpoint_type,
|
|
sync_status,
|
|
alarmable,
|
|
ignore_endpoints)
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise e
|
|
else:
|
|
LOG.info("Ignoring subcloud sync_status update for subcloud:%s "
|
|
"availability:%s management:%s endpoint:%s sync:%s" %
|
|
(subcloud_name, subcloud.availability_status,
|
|
subcloud.management_state, endpoint_type, sync_status))
|
|
|
|
def update_subcloud_endpoint_status(
|
|
self, context,
|
|
subcloud_name=None,
|
|
endpoint_type=None,
|
|
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
|
|
alarmable=True,
|
|
ignore_endpoints=None):
|
|
"""Update subcloud endpoint status
|
|
|
|
:param context: request context object
|
|
:param subcloud_name: name of subcloud to update
|
|
:param endpoint_type: endpoint type to update
|
|
:param sync_status: sync status to set
|
|
:param alarmable: controls raising an alarm if applicable
|
|
:param ignore_endpoints: list of endpoints to ignore (only used if
|
|
endpoint_type is None)
|
|
"""
|
|
|
|
if ignore_endpoints is None:
|
|
ignore_endpoints = []
|
|
|
|
if subcloud_name:
|
|
self._update_subcloud_endpoint_status(
|
|
context, subcloud_name, endpoint_type, sync_status, alarmable,
|
|
ignore_endpoints)
|
|
else:
|
|
# update all subclouds
|
|
for subcloud in db_api.subcloud_get_all(context):
|
|
self._update_subcloud_endpoint_status(
|
|
context, subcloud.name, endpoint_type, sync_status,
|
|
alarmable, ignore_endpoints)
|
|
|
|
def _update_subcloud_state(self, context, subcloud_name,
|
|
management_state, availability_status):
|
|
try:
|
|
self.dcorch_rpc_client.update_subcloud_states(
|
|
context, subcloud_name, management_state, availability_status)
|
|
|
|
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
|
|
'availability:%s' %
|
|
(subcloud_name,
|
|
management_state,
|
|
availability_status))
|
|
except Exception:
|
|
LOG.exception('Problem informing dcorch of subcloud state change,'
|
|
'subcloud: %s' % subcloud_name)
|
|
|
|
def _raise_or_clear_subcloud_status_alarm(self, subcloud_name,
|
|
availability_status):
|
|
entity_instance_id = "subcloud=%s" % subcloud_name
|
|
fault = self.fm_api.get_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
|
entity_instance_id)
|
|
|
|
if fault and (availability_status == consts.AVAILABILITY_ONLINE):
|
|
try:
|
|
self.fm_api.clear_fault(
|
|
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
|
entity_instance_id)
|
|
except Exception:
|
|
LOG.exception("Failed to clear offline alarm for subcloud: %s",
|
|
subcloud_name)
|
|
|
|
elif not fault and \
|
|
(availability_status == consts.AVAILABILITY_OFFLINE):
|
|
try:
|
|
fault = fm_api.Fault(
|
|
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
|
|
alarm_state=fm_const.FM_ALARM_STATE_SET,
|
|
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
|
|
entity_instance_id=entity_instance_id,
|
|
|
|
severity=fm_const.FM_ALARM_SEVERITY_CRITICAL,
|
|
reason_text=('%s is offline' % subcloud_name),
|
|
alarm_type=fm_const.FM_ALARM_TYPE_0,
|
|
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29,
|
|
proposed_repair_action="Wait for subcloud to "
|
|
"become online; if "
|
|
"problem persists contact "
|
|
"next level of support.",
|
|
service_affecting=True)
|
|
|
|
self.fm_api.set_fault(fault)
|
|
except Exception:
|
|
LOG.exception("Failed to raise offline alarm for subcloud: %s",
|
|
subcloud_name)
|
|
|
|
def update_subcloud_availability(self, context, subcloud_name,
|
|
availability_status,
|
|
update_state_only=False,
|
|
audit_fail_count=None):
|
|
try:
|
|
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
|
|
except Exception:
|
|
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
|
|
|
|
if update_state_only:
|
|
# Nothing has changed, but we want to send a state update for this
|
|
# subcloud as an audit. Get the most up-to-date data.
|
|
self._update_subcloud_state(context, subcloud_name,
|
|
subcloud.management_state,
|
|
availability_status)
|
|
elif availability_status is None:
|
|
# only update the audit fail count
|
|
try:
|
|
db_api.subcloud_update(self.context, subcloud.id,
|
|
audit_fail_count=audit_fail_count)
|
|
except exceptions.SubcloudNotFound:
|
|
# slim possibility subcloud could have been deleted since
|
|
# we found it in db, ignore this benign error.
|
|
LOG.info('Ignoring SubcloudNotFound when attempting '
|
|
'audit_fail_count update: %s' % subcloud_name)
|
|
return
|
|
else:
|
|
self._raise_or_clear_subcloud_status_alarm(subcloud_name,
|
|
availability_status)
|
|
|
|
if availability_status == consts.AVAILABILITY_OFFLINE:
|
|
# Subcloud is going offline, set all endpoint statuses to
|
|
# unknown.
|
|
self._update_subcloud_endpoint_status(
|
|
context, subcloud_name, endpoint_type=None,
|
|
sync_status=consts.SYNC_STATUS_UNKNOWN)
|
|
|
|
try:
|
|
updated_subcloud = db_api.subcloud_update(
|
|
context,
|
|
subcloud.id,
|
|
availability_status=availability_status,
|
|
audit_fail_count=audit_fail_count)
|
|
except exceptions.SubcloudNotFound:
|
|
# slim possibility subcloud could have been deleted since
|
|
# we found it in db, ignore this benign error.
|
|
LOG.info('Ignoring SubcloudNotFound when attempting state'
|
|
' update: %s' % subcloud_name)
|
|
return
|
|
|
|
if availability_status == consts.AVAILABILITY_ONLINE:
|
|
# Subcloud is going online
|
|
# Tell cert-mon to audit endpoint certificate.
|
|
LOG.info('Request for online audit for %s' % subcloud_name)
|
|
dc_notification = rpc_client.DCManagerNotifications()
|
|
dc_notification.subcloud_online(context, subcloud_name)
|
|
|
|
# Send dcorch a state update
|
|
self._update_subcloud_state(context, subcloud_name,
|
|
updated_subcloud.management_state,
|
|
availability_status)
|
|
|
|
def update_subcloud_sync_endpoint_type(self, context,
|
|
subcloud_name,
|
|
endpoint_type_list,
|
|
openstack_installed):
|
|
operation = 'add' if openstack_installed else 'remove'
|
|
func_switcher = {
|
|
'add': (
|
|
self.dcorch_rpc_client.add_subcloud_sync_endpoint_type,
|
|
db_api.subcloud_status_create
|
|
),
|
|
'remove': (
|
|
self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type,
|
|
db_api.subcloud_status_delete
|
|
)
|
|
}
|
|
|
|
try:
|
|
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
|
|
except Exception:
|
|
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
|
|
|
|
try:
|
|
# Notify dcorch to add/remove sync endpoint type list
|
|
func_switcher[operation][0](self.context, subcloud_name,
|
|
endpoint_type_list)
|
|
LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' %
|
|
(subcloud_name, endpoint_type_list))
|
|
|
|
# Update subcloud status table by adding/removing openstack sync
|
|
# endpoint types
|
|
for endpoint_type in endpoint_type_list:
|
|
func_switcher[operation][1](self.context, subcloud.id,
|
|
endpoint_type)
|
|
# Update openstack_installed of subcloud table
|
|
db_api.subcloud_update(self.context, subcloud.id,
|
|
openstack_installed=openstack_installed)
|
|
except Exception:
|
|
LOG.exception('Problem informing dcorch of subcloud sync endpoint'
|
|
' type change, subcloud: %s' % subcloud_name)
|