
1. Refactor dcorch's generic_sync_manager.py and initial_sync_manager into a main process manager and a worker manager. The main manager will handle the allocation of eligible subclouds to each worker. 2. Rename the current EngineService to EngineWorkerService and introduce a new EngineService for the main process, similar to DCManagerAuditService and DCManagerAuditWorkerService. 3. Rename the current RPC EngineClient to EngineWorkerClient and introduce a new EngineClient. Adapt the RPC methods to accommodate the modifications in these main process managers and worker managers. 4. Move master resources data retrieval from each sync_thread to engine workers. 5. Implement 2 new db APIs for subcloud batch sync and state updates. 6. Remove code related to sync_lock and its associated db table schema. 7. Add ocf script for managing the start and stop of the dcorch engine-worker service, and make changes in packaging accordingly. 8. Bug fixes for the issues related to the usage of base64.urlsafe_b64encode and base64.urlsafe_b64decode in python3. 9. Update unit tests for the main process and worker managers. Test Plan: PASS: Verify that the dcorch audit runs properly every 5 minutes. PASS: Verify that the initial sync runs properly every 10 seconds. PASS: Verify that the sync subclouds operation runs properly every 5 seconds. PASS: Successfully start and stop the dcorch-engine and dcorch-engine-worker services using the sm commands. PASS: Change the admin password on the system controller using the command "openstack --os-region-name SystemController user password set". Verify that the admin password is synchronized to the subcloud and the dcorch receives the corresponding sync request, followed by successful execution of sync resources for the subcloud. PASS: Unmanage and then manage a subcloud, and verify that the initial sync is executed successfully for that subcloud. PASS: Verify the removal of the sync_lock table from the dcorch db. Story: 2011106 Task: 50013 Change-Id: I329847bd1107ec43e67ec59bdd1e3111b7b37cd3 Signed-off-by: lzhu1 <li.zhu@windriver.com>
475 lines
22 KiB
Python
475 lines
22 KiB
Python
# Copyright 2016 Ericsson AB
|
|
# Copyright (c) 2018-2022, 2024 Wind River Systems, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import collections
|
|
import copy
|
|
import re
|
|
import threading
|
|
|
|
from oslo_config import cfg
|
|
from oslo_log import log as logging
|
|
from six.moves.queue import Queue
|
|
|
|
from dccommon import consts as dccommon_consts
|
|
from dccommon import endpoint_cache
|
|
from dcorch.common import consts
|
|
from dcorch.common import context
|
|
from dcorch.common import exceptions
|
|
from dcorch.common.i18n import _
|
|
from dcorch.common import manager
|
|
from dcorch.common import utils
|
|
from dcorch.db import api as db_api
|
|
from dcorch.drivers.openstack import sdk
|
|
|
|
CONF = cfg.CONF
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
# Projects are synced batch by batch. Below configuration defines
|
|
# number of projects in each batch
|
|
batch_opts = [
|
|
cfg.IntOpt('batch_size',
|
|
default=3,
|
|
help='Batch size number of projects will be synced at a time')
|
|
]
|
|
|
|
batch_opt_group = cfg.OptGroup('batch')
|
|
cfg.CONF.register_group(batch_opt_group)
|
|
cfg.CONF.register_opts(batch_opts, group=batch_opt_group)
|
|
TASK_TYPE = 'quota_sync'
|
|
|
|
|
|
class QuotaManager(manager.Manager):
|
|
"""Manages tasks related to quota management"""
|
|
|
|
# Static variables used to store cached usage information and the lock
|
|
# that protects their access.
|
|
# It's either this or pass references to the QuotaManager object
|
|
# down into the guts of the SyncThread class.
|
|
usage_lock = threading.Lock()
|
|
# Each of the following is a dict where the keys are project-ID/user-ID
|
|
# tuples. (Where the user-ID can be None for project-only usage.)
|
|
total_project_usages = {}
|
|
regions_usage_dict = {}
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
LOG.debug(_('QuotaManager initialization...'))
|
|
|
|
super(QuotaManager, self).__init__(service_name="quota_manager",
|
|
*args, **kwargs)
|
|
self.context = context.get_admin_context()
|
|
self.endpoints = endpoint_cache.EndpointCache()
|
|
|
|
@classmethod
|
|
def calculate_subcloud_project_quotas(cls, project_id, user_id,
|
|
new_global_quotas, subcloud):
|
|
# Someone has changed the quotas for a project, so we need to
|
|
# calculate the new quotas in each subcloud.
|
|
|
|
# First, grab a copy of the usage from the last quota audit.
|
|
with cls.usage_lock:
|
|
regions_usage_dict = copy.deepcopy(
|
|
cls.regions_usage_dict.get((project_id, user_id), {}))
|
|
total_project_usages = copy.deepcopy(
|
|
cls.total_project_usages.get((project_id, user_id), {}))
|
|
|
|
# Calculate the remaining global project limit based on the new quotas
|
|
# and the total usage for the project across all subclouds.
|
|
unused_global_limits = collections.Counter(
|
|
new_global_quotas) - collections.Counter(total_project_usages)
|
|
|
|
# Now get the region-specific usage and trim it back to just the dict
|
|
# keys present in the new quotas.
|
|
try:
|
|
region_usage = regions_usage_dict[subcloud]
|
|
region_usage = dict([(k, region_usage[k])
|
|
for k in new_global_quotas])
|
|
except KeyError:
|
|
# From startup until the quota audit runs we'll end up here.
|
|
region_usage = {}
|
|
|
|
# Now add the region-specific usage to the global remaining limits.
|
|
region_new_limits = dict(unused_global_limits +
|
|
collections.Counter(region_usage))
|
|
|
|
return region_new_limits
|
|
|
|
def get_projects_users_with_modified_quotas(self):
|
|
# get the list of project/user tuples that have modified quotas
|
|
project_user_list = set([])
|
|
os_client = sdk.OpenStackDriver(dccommon_consts.VIRTUAL_MASTER_CLOUD)
|
|
try:
|
|
quotas = os_client.nova_client.nova_client.quotas.list()
|
|
project_user_quotas = quotas['project_user_quotas']
|
|
for project_user_quota in project_user_quotas:
|
|
project_id = project_user_quota['project_id']
|
|
user_quotas = project_user_quota['user_quotas']
|
|
for user_quota in user_quotas:
|
|
user_id = user_quota['user_id']
|
|
project_user_list.add((project_id, user_id))
|
|
except AttributeError:
|
|
# Dealing with novaclient that doesn't have quotas.list(),
|
|
# so just ignore project/user quotas.
|
|
pass
|
|
return list(project_user_list)
|
|
|
|
def periodic_balance_all(self):
|
|
LOG.info("periodically balance quota for all keystone tenants")
|
|
projects_thread_list = []
|
|
|
|
# Generate a list of project_id/user_id tuples that need to have their
|
|
# quotas updated. This is basically all projects, and the
|
|
# projects/users that have modified quotas.
|
|
# Where user_id is None, this represents the project quotas. Where
|
|
# user_id is specified, it represents the project/user quotas. (This
|
|
# is only applicable to nova.)
|
|
project_list = sdk.OpenStackDriver().get_enabled_projects()
|
|
project_user_list = [(project, None) for project in project_list]
|
|
project_user_mod_list = self.get_projects_users_with_modified_quotas()
|
|
project_user_list.extend(project_user_mod_list)
|
|
|
|
# Remove any global cache entries for project_id/user_id tuples that
|
|
# aren't in the list to be updated. (They'll get updated on-demand.)
|
|
with QuotaManager.usage_lock:
|
|
# The same keys should be in QuotaManager.total_project_usages
|
|
# so we only need to look at one of them.
|
|
to_delete = [k for k in QuotaManager.regions_usage_dict
|
|
if k not in project_user_mod_list]
|
|
for k in to_delete:
|
|
del QuotaManager.regions_usage_dict[k]
|
|
del QuotaManager.total_project_usages[k]
|
|
|
|
# Iterate through project list and call sync project for each project
|
|
# using threads
|
|
# Divide list of projects into batches and perfrom quota sync
|
|
# for one batch at a time.
|
|
for current_batch_projects_users in utils.get_batch_projects(
|
|
cfg.CONF.batch.batch_size, project_user_list):
|
|
# "current_batch_projects_users" may have some None entries that
|
|
# we don't want to iterate over.
|
|
current_batch_projects_users = [
|
|
x for x in current_batch_projects_users if x is not None]
|
|
LOG.info("Syncing quota for current batch with projects: %s",
|
|
current_batch_projects_users)
|
|
for project_id, user_id in current_batch_projects_users:
|
|
if project_id:
|
|
thread = threading.Thread(
|
|
target=self.quota_sync_for_project,
|
|
args=(project_id, user_id,))
|
|
projects_thread_list.append(thread)
|
|
thread.start()
|
|
# Wait for all the threads to complete
|
|
# the job(sync all projects quota)
|
|
for current_thread in projects_thread_list:
|
|
current_thread.join()
|
|
|
|
def read_quota_usage(self, project_id, user_id, region, usage_queue):
|
|
# Writes usage dict to the Queue in the following format
|
|
# {'region_name': (<nova_usages>, <neutron_usages>, <cinder_usages>)}
|
|
LOG.info("Reading quota usage for project: %(project_id)s and user: "
|
|
"%(user_id)s in %(region)s",
|
|
{'project_id': project_id, 'user_id': user_id,
|
|
'region': region}
|
|
)
|
|
os_client = sdk.OpenStackDriver(region)
|
|
(nova_usage, neutron_usage, cinder_usage) = \
|
|
os_client.get_resource_usages(project_id, user_id)
|
|
total_region_usage = collections.defaultdict(dict)
|
|
# region_usage[0], region_usage[1], region_usage[3] are
|
|
# nova, neutron & cinder usages respectively
|
|
if nova_usage:
|
|
total_region_usage.update(nova_usage)
|
|
if neutron_usage:
|
|
total_region_usage.update(neutron_usage)
|
|
if cinder_usage:
|
|
total_region_usage.update(cinder_usage)
|
|
usage_queue.put({region: total_region_usage})
|
|
|
|
def get_summation(self, regions_dict):
|
|
# Adds resources usages from different regions
|
|
single_region = {}
|
|
resultant_dict = collections.Counter()
|
|
for current_region in regions_dict:
|
|
single_region[current_region] = collections.Counter(
|
|
regions_dict[current_region])
|
|
resultant_dict += single_region[current_region]
|
|
return resultant_dict
|
|
|
|
def get_tenant_quota_limits_region(self, project_id, user_id, region):
|
|
# returns quota limits for region in the following format
|
|
# {<nova_limits>, <neutron_limits>, <cinder_limits>}
|
|
LOG.info("Reading quota limits for project: %(project_id)s and user: "
|
|
"%(user_id)s in %(region)s",
|
|
{'project_id': project_id, 'user_id': user_id,
|
|
'region': region}
|
|
)
|
|
os_client = sdk.OpenStackDriver(region)
|
|
(nova_limits, neutron_limits, cinder_limits) = \
|
|
os_client.get_quota_limits(project_id, user_id)
|
|
limits = {}
|
|
limits.update(nova_limits)
|
|
limits.update(neutron_limits)
|
|
limits.update(cinder_limits)
|
|
return limits
|
|
|
|
def _get_dc_orch_project_limit(self, project_id):
|
|
# Returns DC Orchestrator project limit for a project.
|
|
dc_orch_limits_for_project = collections.defaultdict(dict)
|
|
try:
|
|
# checks if there are any quota limit in DB for a project
|
|
limits_from_db = db_api.quota_get_all_by_project(self.context,
|
|
project_id)
|
|
except exceptions.ProjectQuotaNotFound:
|
|
limits_from_db = {}
|
|
for current_resource in CONF.dc_orch_global_limit.items():
|
|
resource = re.sub('quota_', '', current_resource[0])
|
|
# If resource limit in DB, then use it or else use limit
|
|
# from conf file
|
|
if resource in limits_from_db:
|
|
dc_orch_limits_for_project[resource] = limits_from_db[
|
|
resource]
|
|
else:
|
|
dc_orch_limits_for_project[resource] = current_resource[1]
|
|
return dc_orch_limits_for_project
|
|
|
|
def _arrange_quotas_by_service_name(self, region_new_limit):
|
|
# Returns a dict of resources with limits arranged by service name
|
|
resource_with_service = collections.defaultdict(dict)
|
|
resource_with_service['nova'] = collections.defaultdict(dict)
|
|
resource_with_service['cinder'] = collections.defaultdict(dict)
|
|
resource_with_service['neutron'] = collections.defaultdict(dict)
|
|
for limit in region_new_limit:
|
|
if limit in dccommon_consts.NOVA_QUOTA_FIELDS:
|
|
resource_with_service['nova'].update(
|
|
{limit: region_new_limit[limit]})
|
|
elif limit in dccommon_consts.CINDER_QUOTA_FIELDS:
|
|
resource_with_service['cinder'].update(
|
|
{limit: region_new_limit[limit]})
|
|
elif limit in dccommon_consts.NEUTRON_QUOTA_FIELDS:
|
|
resource_with_service['neutron'].update(
|
|
{limit: region_new_limit[limit]})
|
|
return resource_with_service
|
|
|
|
def update_quota_limits(self, project_id, user_id, region_new_limit,
|
|
current_region):
|
|
# Updates quota limit for a project with new calculated limit
|
|
os_client = sdk.OpenStackDriver(current_region)
|
|
os_client.write_quota_limits(project_id, user_id, region_new_limit)
|
|
|
|
def quota_usage_update(self, project_id, user_id):
|
|
# Update the quota usage for the specified project/user
|
|
regions_usage_dict = self.get_tenant_quota_usage_per_region(project_id,
|
|
user_id)
|
|
if not regions_usage_dict:
|
|
# Skip syncing for the project if not able to read regions usage
|
|
LOG.error("Error reading regions usage for the project: "
|
|
"'%(project)s' and user: '%(user)s'. Aborting, continue "
|
|
"with next project/user.",
|
|
{'project': project_id, 'user': user_id})
|
|
return None, None
|
|
|
|
# We want to return the original per-subcloud usage, so make a
|
|
# copy for us to mangle.
|
|
regions_usage_dict_copy = copy.deepcopy(regions_usage_dict)
|
|
|
|
# We don't want to sum up the subcloud usage of resource types that
|
|
# are managed by dcorch so delete them from all regions except
|
|
# the master one.
|
|
for region in regions_usage_dict_copy:
|
|
if region == dccommon_consts.VIRTUAL_MASTER_CLOUD:
|
|
continue
|
|
for quota in consts.QUOTAS_FOR_MANAGED_RESOURCES:
|
|
regions_usage_dict_copy[region].pop(quota, None)
|
|
|
|
# Add up the usage for this project/user across all subclouds.
|
|
total_project_usages = dict(
|
|
self.get_summation(regions_usage_dict_copy))
|
|
|
|
# Save the global and per-region usage for use when
|
|
# modifying quotas later
|
|
with QuotaManager.usage_lock:
|
|
# Use the project/user tuple as the dict key.
|
|
# 'user_id' will be None for the overall project usage.
|
|
QuotaManager.total_project_usages[(project_id, user_id)] = \
|
|
copy.deepcopy(total_project_usages)
|
|
QuotaManager.regions_usage_dict[(project_id, user_id)] = \
|
|
copy.deepcopy(regions_usage_dict)
|
|
|
|
return total_project_usages, regions_usage_dict
|
|
|
|
def quota_sync_for_project(self, project_id, user_id):
|
|
# Sync quota limits for the project according to below formula
|
|
# Global remaining limit =
|
|
# DC Orchestrator global limit - Summation of usages
|
|
# in all the regions
|
|
# New quota limit = Global remaining limit + usage in that region
|
|
LOG.info("Quota sync called for project: %(project)s user: %(user)s",
|
|
{'project': project_id, 'user': user_id})
|
|
regions_thread_list = []
|
|
# Retrieve regions for the project. This is also done in
|
|
# get_tenant_quota_usage_per_region() so we may be able to only do
|
|
# it once. Would have to consider the failure modes though.
|
|
os_driver = sdk.OpenStackDriver()
|
|
region_lists = os_driver.get_all_regions_for_project(
|
|
project_id)
|
|
|
|
total_project_usages, regions_usage_dict = self.quota_usage_update(
|
|
project_id, user_id)
|
|
if (total_project_usages, regions_usage_dict) == (None, None):
|
|
return
|
|
|
|
# Get the global limit for this project from the master subcloud.
|
|
dc_orch_global_limits = self.get_overall_tenant_quota_limits(
|
|
project_id, user_id)
|
|
# Calculate how much of the various limits have not yet been used.
|
|
unused_global_limits = collections.Counter(
|
|
dc_orch_global_limits) - collections.Counter(total_project_usages)
|
|
|
|
# Remove the master region from the list. Its quotas should already
|
|
# be up to date for managed resources.
|
|
region_lists.remove(dccommon_consts.VIRTUAL_MASTER_CLOUD)
|
|
|
|
# (NOTE: knasim-wrs): The Master Cloud's Project ID and User ID
|
|
# dont mean anything for the subcloud, so we need to first resolve
|
|
# the project name, and username and then determine the specific
|
|
# IDs for that subcloud
|
|
qproject = os_driver.get_project_by_id(project_id)
|
|
quser = os_driver.get_user_by_id(user_id)
|
|
|
|
for current_region in region_lists:
|
|
# Calculate the new limit for this region.
|
|
region_new_limits = dict(
|
|
unused_global_limits + collections.Counter(
|
|
regions_usage_dict[current_region]))
|
|
# Reformat the limits
|
|
region_new_limits = self._arrange_quotas_by_service_name(
|
|
region_new_limits)
|
|
# Update the subcloud with the new limit
|
|
try:
|
|
# First find this project and user in this subcloud
|
|
sc_user_id = None
|
|
sc_os_driver = sdk.OpenStackDriver(current_region)
|
|
sc_project = sc_os_driver.get_project_by_name(qproject.name)
|
|
if not sc_project:
|
|
LOG.info("Cannot find project %s in subcloud %s. Skipping "
|
|
"quota sync for this project on subcloud",
|
|
qproject.name, current_region)
|
|
continue
|
|
sc_project_id = sc_project.id
|
|
if quser:
|
|
sc_user = sc_os_driver.get_user_by_name(quser.name)
|
|
sc_user_id = getattr(sc_user, 'id', None)
|
|
except Exception as e:
|
|
LOG.error("quota sync %s: %s", current_region, str(e))
|
|
continue
|
|
|
|
thread = threading.Thread(target=self.update_quota_limits,
|
|
args=(sc_project_id, sc_user_id,
|
|
region_new_limits,
|
|
current_region,))
|
|
regions_thread_list.append(thread)
|
|
thread.start()
|
|
|
|
# Wait for all the threads to update quota
|
|
for current_thread in regions_thread_list:
|
|
current_thread.join()
|
|
|
|
def get_overall_tenant_quota_limits(self, project_id, user_id):
|
|
# Return quota limits in the master cloud. These are the overall
|
|
# quota limits for the whole cloud.
|
|
return self.get_tenant_quota_limits_region(
|
|
project_id, user_id,
|
|
dccommon_consts.VIRTUAL_MASTER_CLOUD)
|
|
|
|
def get_tenant_quota_usage_per_region(self, project_id, user_id):
|
|
# Return quota usage dict with keys as region name & values as usages.
|
|
# Calculates the usage from each region concurrently using threads.
|
|
os_driver = sdk.OpenStackDriver()
|
|
# Retrieve regions for the project
|
|
region_lists = os_driver.get_all_regions_for_project(
|
|
project_id)
|
|
usage_queue = Queue()
|
|
regions_usage_dict = collections.defaultdict(dict)
|
|
regions_thread_list = []
|
|
qproject = os_driver.get_project_by_id(project_id)
|
|
quser = os_driver.get_user_by_id(user_id)
|
|
|
|
for current_region in region_lists:
|
|
# First find this project and user in this subcloud
|
|
try:
|
|
sc_user_id = None
|
|
sc_os_driver = sdk.OpenStackDriver(current_region)
|
|
sc_project = sc_os_driver.get_project_by_name(qproject.name)
|
|
if not sc_project:
|
|
LOG.info("Cannot find project %s in subcloud %s. Skipping "
|
|
"quota usage for this project on subcloud",
|
|
qproject.name, current_region)
|
|
continue
|
|
sc_project_id = sc_project.id
|
|
if quser:
|
|
sc_user = sc_os_driver.get_user_by_name(quser.name)
|
|
sc_user_id = getattr(sc_user, 'id', None)
|
|
except Exception as e:
|
|
LOG.error("quota usage %s: %s", current_region, str(e))
|
|
continue
|
|
|
|
thread = threading.Thread(target=self.read_quota_usage,
|
|
args=(sc_project_id, sc_user_id,
|
|
current_region, usage_queue))
|
|
regions_thread_list.append(thread)
|
|
thread.start()
|
|
# Wait for all the threads to finish reading usages
|
|
for current_thread in regions_thread_list:
|
|
current_thread.join()
|
|
# Check If all the regions usages are read
|
|
if len(region_lists) == usage_queue.qsize():
|
|
for i in range(usage_queue.qsize()):
|
|
# Read Queue
|
|
current_region_data = usage_queue.get()
|
|
regions_usage_dict.update(current_region_data)
|
|
return regions_usage_dict
|
|
|
|
def get_usage_for_project_and_user(self, endpoint_type,
|
|
project_id, user_id):
|
|
# Returns cached quota usage for a project and user. If there
|
|
# is no cached usage information then update the cache.
|
|
|
|
with QuotaManager.usage_lock:
|
|
# First, try to get a copy of the usage from the last quota audit.
|
|
total_project_usages = copy.deepcopy(
|
|
QuotaManager.total_project_usages.get((project_id, user_id),
|
|
None))
|
|
if total_project_usages is None:
|
|
# This project/user doesn't have any cached usage information,
|
|
# so we need to query it.
|
|
try:
|
|
total_project_usages, regions_usage_dict = \
|
|
self.quota_usage_update(project_id, user_id)
|
|
except exceptions.ProjectNotFound:
|
|
total_project_usages = {}
|
|
|
|
# "total_project_usages" includes fields from multiple
|
|
# endpoint types, so we need to figure out which ones we want.
|
|
desired_fields = consts.ENDPOINT_QUOTA_MAPPING[endpoint_type]
|
|
usage_dict = {}
|
|
for k, v in total_project_usages.items():
|
|
if k in desired_fields:
|
|
usage_dict[k] = v
|
|
return usage_dict
|
|
|
|
|
|
def list_opts():
|
|
yield batch_opt_group.name, batch_opts
|