
1. Refactor dcorch's generic_sync_manager.py and initial_sync_manager into a main process manager and a worker manager. The main manager will handle the allocation of eligible subclouds to each worker. 2. Rename the current EngineService to EngineWorkerService and introduce a new EngineService for the main process, similar to DCManagerAuditService and DCManagerAuditWorkerService. 3. Rename the current RPC EngineClient to EngineWorkerClient and introduce a new EngineClient. Adapt the RPC methods to accommodate the modifications in these main process managers and worker managers. 4. Move master resources data retrieval from each sync_thread to engine workers. 5. Implement 2 new db APIs for subcloud batch sync and state updates. 6. Remove code related to sync_lock and its associated db table schema. 7. Add ocf script for managing the start and stop of the dcorch engine-worker service, and make changes in packaging accordingly. 8. Bug fixes for the issues related to the usage of base64.urlsafe_b64encode and base64.urlsafe_b64decode in python3. 9. Update unit tests for the main process and worker managers. Test Plan: PASS: Verify that the dcorch audit runs properly every 5 minutes. PASS: Verify that the initial sync runs properly every 10 seconds. PASS: Verify that the sync subclouds operation runs properly every 5 seconds. PASS: Successfully start and stop the dcorch-engine and dcorch-engine-worker services using the sm commands. PASS: Change the admin password on the system controller using the command "openstack --os-region-name SystemController user password set". Verify that the admin password is synchronized to the subcloud and the dcorch receives the corresponding sync request, followed by successful execution of sync resources for the subcloud. PASS: Unmanage and then manage a subcloud, and verify that the initial sync is executed successfully for that subcloud. PASS: Verify the removal of the sync_lock table from the dcorch db. Story: 2011106 Task: 50013 Change-Id: I329847bd1107ec43e67ec59bdd1e3111b7b37cd3 Signed-off-by: lzhu1 <li.zhu@windriver.com>
144 lines
5.7 KiB
Python
144 lines
5.7 KiB
Python
# Copyright 2017 Ericsson AB.
|
|
# Copyright (c) 2020-2024 Wind River Systems, Inc.
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import eventlet
|
|
from oslo_config import cfg
|
|
from oslo_log import log as logging
|
|
|
|
from dccommon import consts as dccommon_consts
|
|
from dcorch.common import consts as dco_consts
|
|
from dcorch.common import context
|
|
from dcorch.db import api as db_api
|
|
from dcorch.rpc import client
|
|
|
|
CONF = cfg.CONF
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
CHECK_AUDIT_INTERVAL = 300 # frequency to check for audit work
|
|
CHECK_SYNC_INTERVAL = 5 # frequency to check for sync work
|
|
AUDIT_INTERVAL = 1200 # Default audit interval
|
|
|
|
|
|
class GenericSyncManager(object):
|
|
"""Manages tasks related to resource management."""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
self.context = context.get_admin_context()
|
|
self.engine_worker_rpc_client = client.EngineWorkerClient()
|
|
|
|
def sync_job_thread(self):
|
|
"""Perform sync request for subclouds as required."""
|
|
|
|
while True:
|
|
try:
|
|
self.sync_subclouds()
|
|
eventlet.greenthread.sleep(CHECK_SYNC_INTERVAL)
|
|
except eventlet.greenlet.GreenletExit:
|
|
# We have been told to exit
|
|
return
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
def sync_audit_thread(self):
|
|
"""Perform sync request for subclouds as required."""
|
|
|
|
while True:
|
|
try:
|
|
self.run_sync_audit()
|
|
eventlet.greenthread.sleep(CHECK_AUDIT_INTERVAL)
|
|
except eventlet.greenlet.GreenletExit:
|
|
# We have been told to exit
|
|
return
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
|
|
def _process_subclouds(self, rpc_method, subcloud_sync_list):
|
|
# We want a chunksize of at least 1 so add the number of workers.
|
|
chunksize = \
|
|
(len(subcloud_sync_list) + CONF.workers) // (CONF.workers)
|
|
|
|
subcloud_sync_chunk = []
|
|
for subcloud_sync in subcloud_sync_list:
|
|
subcloud_sync_chunk.append(subcloud_sync)
|
|
if len(subcloud_sync_chunk) == chunksize:
|
|
# We've gathered a batch of subclouds, send it to engine worker
|
|
# to process.
|
|
self._send_chunk(rpc_method, subcloud_sync_chunk)
|
|
subcloud_sync_chunk = []
|
|
if subcloud_sync_chunk:
|
|
# We've got a partial batch...send it off for processing.
|
|
self._send_chunk(rpc_method, subcloud_sync_chunk)
|
|
LOG.debug(f"Done sending {rpc_method.__name__} request messages.")
|
|
|
|
def sync_subclouds(self):
|
|
LOG.info("Start sync_subclouds")
|
|
|
|
# get a list of eligible subclouds (region_name, endpoint_type),
|
|
# and mark them as in-progress.
|
|
subcloud_sync_list = db_api.subcloud_sync_update_all_to_in_progress(
|
|
self.context,
|
|
management_state=dccommon_consts.MANAGEMENT_MANAGED,
|
|
availability_status=dccommon_consts.AVAILABILITY_ONLINE,
|
|
initial_sync_state=dco_consts.INITIAL_SYNC_STATE_COMPLETED,
|
|
sync_requests=[dco_consts.SYNC_STATUS_REQUESTED,
|
|
dco_consts.SYNC_STATUS_FAILED])
|
|
|
|
if subcloud_sync_list:
|
|
self._process_subclouds(
|
|
self.engine_worker_rpc_client.sync_subclouds, subcloud_sync_list)
|
|
else:
|
|
LOG.debug("No eligible subclouds for sync.")
|
|
|
|
def run_sync_audit(self):
|
|
LOG.info("Start run_sync_audit")
|
|
|
|
# get a list of eligible subclouds (region_name, endpoint_type),
|
|
# and mark them as in-progress.
|
|
# check if the last audit time is equal or greater than the audit
|
|
# interval only if the status is completed or in progress (in case
|
|
# the process is dead while audit is in progress), or go ahead with
|
|
# audit if the status is failed or none.
|
|
subcloud_sync_list = db_api.subcloud_audit_update_all_to_in_progress(
|
|
self.context,
|
|
management_state=dccommon_consts.MANAGEMENT_MANAGED,
|
|
availability_status=dccommon_consts.AVAILABILITY_ONLINE,
|
|
initial_sync_state=dco_consts.INITIAL_SYNC_STATE_COMPLETED,
|
|
audit_interval=AUDIT_INTERVAL)
|
|
|
|
if subcloud_sync_list:
|
|
self._process_subclouds(
|
|
self.engine_worker_rpc_client.run_sync_audit, subcloud_sync_list)
|
|
else:
|
|
LOG.debug("No eligible subclouds for audit.")
|
|
|
|
def sync_request(self, ctxt, endpoint_type):
|
|
# Someone has enqueued a sync job. set the endpoint sync_request to
|
|
# requested
|
|
db_api.subcloud_sync_update_all(
|
|
ctxt, dccommon_consts.MANAGEMENT_MANAGED, endpoint_type,
|
|
values={'sync_request': dco_consts.SYNC_STATUS_REQUESTED})
|
|
|
|
def _send_chunk(self, rpc_method, subcloud_sync_chunk):
|
|
try:
|
|
rpc_method(self.context, subcloud_sync_chunk)
|
|
LOG.debug(f"Sent {rpc_method.__name__} request message for "
|
|
f"{len(subcloud_sync_chunk)} (subcloud, endpoint_type) "
|
|
f"pairs.")
|
|
except Exception as e:
|
|
LOG.error(f"Exception occurred in {rpc_method.__name__} for "
|
|
f"subclouds {subcloud_sync_chunk}: {e}")
|