
This makes 'nova-manage limits migrate_to_unified_limits' scan the API database for flavors and detect if any resource classes are missing registered limits in Keystone. Related to blueprint unified-limits-nova-unset-limits Change-Id: I431176fd4d09201c551d8f82c71515cd4616cfea
3842 lines
170 KiB
Python
3842 lines
170 KiB
Python
# Copyright (c) 2011 X.commerce, a business unit of eBay Inc.
|
|
# Copyright 2010 United States Government as represented by the
|
|
# Administrator of the National Aeronautics and Space Administration.
|
|
# All Rights Reserved.
|
|
# Copyright 2013 Red Hat, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
|
|
"""
|
|
CLI interface for nova management.
|
|
"""
|
|
|
|
import collections
|
|
from contextlib import contextmanager
|
|
import functools
|
|
import os
|
|
import re
|
|
import sys
|
|
import textwrap
|
|
import time
|
|
import traceback
|
|
import typing as ty
|
|
from urllib import parse as urlparse
|
|
|
|
from dateutil import parser as dateutil_parser
|
|
from keystoneauth1 import exceptions as ks_exc
|
|
from neutronclient.common import exceptions as neutron_client_exc
|
|
from os_brick.initiator import connector
|
|
import os_resource_classes as orc
|
|
from oslo_config import cfg
|
|
from oslo_db import exception as db_exc
|
|
from oslo_log import log as logging
|
|
import oslo_messaging as messaging
|
|
from oslo_serialization import jsonutils
|
|
from oslo_utils import encodeutils
|
|
from oslo_utils import uuidutils
|
|
import prettytable
|
|
from sqlalchemy.engine import url as sqla_url
|
|
|
|
from nova.cmd import common as cmd_common
|
|
from nova.compute import api
|
|
from nova.compute import instance_actions
|
|
from nova.compute import instance_list as list_instances
|
|
from nova.compute import rpcapi
|
|
import nova.conf
|
|
from nova.conf import utils as conf_utils
|
|
from nova import config
|
|
from nova import context
|
|
from nova.db import constants as db_const
|
|
from nova.db.main import api as db
|
|
from nova.db import migration
|
|
from nova import exception
|
|
from nova.i18n import _
|
|
from nova.limit import local as local_limit
|
|
from nova.limit import placement as placement_limit
|
|
from nova.network import constants
|
|
from nova.network import neutron as neutron_api
|
|
from nova import objects
|
|
from nova.objects import block_device as block_device_obj
|
|
from nova.objects import compute_node as compute_node_obj
|
|
from nova.objects import fields as obj_fields
|
|
from nova.objects import host_mapping as host_mapping_obj
|
|
from nova.objects import instance as instance_obj
|
|
from nova.objects import instance_mapping as instance_mapping_obj
|
|
from nova.objects import pci_device as pci_device_obj
|
|
from nova.objects import quotas as quotas_obj
|
|
from nova.objects import virtual_interface as virtual_interface_obj
|
|
import nova.quota
|
|
from nova import rpc
|
|
from nova.scheduler.client import report
|
|
from nova.scheduler import utils as scheduler_utils
|
|
from nova import utils
|
|
from nova import version
|
|
from nova.virt.libvirt import machine_type_utils
|
|
from nova.volume import cinder
|
|
|
|
CONF = nova.conf.CONF
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
# Keep this list sorted and one entry per line for readability.
|
|
_EXTRA_DEFAULT_LOG_LEVELS = [
|
|
'nova=ERROR',
|
|
'oslo_concurrency=INFO',
|
|
'oslo_db=INFO',
|
|
'oslo_policy=INFO',
|
|
'oslo.privsep=ERROR',
|
|
'os_brick=ERROR',
|
|
]
|
|
|
|
# Consts indicating whether allocations need to be healed by creating them or
|
|
# by updating existing allocations.
|
|
_CREATE = 'create'
|
|
_UPDATE = 'update'
|
|
|
|
# Decorators for actions
|
|
args = cmd_common.args
|
|
action_description = cmd_common.action_description
|
|
|
|
|
|
def mask_passwd_in_url(url):
|
|
parsed = urlparse.urlparse(url)
|
|
safe_netloc = re.sub(':.*@', ':****@', parsed.netloc)
|
|
new_parsed = urlparse.ParseResult(
|
|
parsed.scheme, safe_netloc,
|
|
parsed.path, parsed.params,
|
|
parsed.query, parsed.fragment)
|
|
return urlparse.urlunparse(new_parsed)
|
|
|
|
|
|
def format_dict(dct, dict_property="Property", dict_value='Value',
|
|
sort_key=None):
|
|
"""Print a `dict` as a table of two columns.
|
|
|
|
:param dct: `dict` to print
|
|
:param dict_property: name of the first column
|
|
:param dict_value: header label for the value (second) column
|
|
:param sort_key: key used for sorting the dict
|
|
"""
|
|
pt = prettytable.PrettyTable([dict_property, dict_value])
|
|
pt.align = 'l'
|
|
# starting in PrettyTable 3.4.0 we need to also set the header
|
|
# as align now only applies to the data.
|
|
if hasattr(pt, 'header_align'):
|
|
pt.header_align = 'l'
|
|
for k, v in sorted(dct.items(), key=sort_key):
|
|
# convert dict to str to check length
|
|
if isinstance(v, dict):
|
|
v = str(v)
|
|
# if value has a newline, add in multiple rows
|
|
# e.g. fault with stacktrace
|
|
if v and isinstance(v, str) and r'\n' in v:
|
|
lines = v.strip().split(r'\n')
|
|
col1 = k
|
|
for line in lines:
|
|
pt.add_row([col1, line])
|
|
col1 = ''
|
|
else:
|
|
pt.add_row([k, v])
|
|
|
|
return encodeutils.safe_encode(pt.get_string()).decode()
|
|
|
|
|
|
@contextmanager
|
|
def locked_instance(cell_mapping, instance, reason):
|
|
"""Context manager to lock and unlock instance,
|
|
lock state will be restored regardless of the success or failure
|
|
of target functionality.
|
|
|
|
:param cell_mapping: instance-cell-mapping
|
|
:param instance: instance to be lock and unlock
|
|
:param reason: reason, why lock is required
|
|
"""
|
|
|
|
compute_api = api.API()
|
|
|
|
initial_state = 'locked' if instance.locked else 'unlocked'
|
|
if not instance.locked:
|
|
with context.target_cell(
|
|
context.get_admin_context(), cell_mapping) as cctxt:
|
|
compute_api.lock(cctxt, instance, reason=reason)
|
|
try:
|
|
yield
|
|
finally:
|
|
if initial_state == 'unlocked':
|
|
with context.target_cell(
|
|
context.get_admin_context(), cell_mapping) as cctxt:
|
|
compute_api.unlock(cctxt, instance)
|
|
|
|
|
|
class DbCommands(object):
|
|
"""Class for managing the main database."""
|
|
|
|
# NOTE(danms): These functions are called with a DB context and a
|
|
# count, which is the maximum batch size requested by the
|
|
# user. They must be idempotent. At most $count records should be
|
|
# migrated. The function must return a tuple of (found, done). The
|
|
# found value indicates how many unmigrated/candidate records existed in
|
|
# the database prior to the migration (either total, or up to the
|
|
# $count limit provided), and a nonzero found value may tell the user
|
|
# that there is still work to do. The done value indicates whether
|
|
# or not any records were actually migrated by the function. Thus
|
|
# if both (found, done) are nonzero, work was done and some work
|
|
# remains. If found is nonzero and done is zero, some records are
|
|
# not migratable (or don't need migrating), but all migrations that can
|
|
# complete have finished.
|
|
# NOTE(stephenfin): These names must be unique
|
|
online_migrations = (
|
|
# Added in Pike
|
|
quotas_obj.migrate_quota_limits_to_api_db,
|
|
# Added in Pike
|
|
quotas_obj.migrate_quota_classes_to_api_db,
|
|
# Added in Queens
|
|
db.migration_migrate_to_uuid,
|
|
# Added in Queens
|
|
block_device_obj.BlockDeviceMapping.populate_uuids,
|
|
# Added in Rocky
|
|
# NOTE(tssurya): This online migration is going to be backported to
|
|
# Queens and Pike since instance.avz of instances before Pike
|
|
# need to be populated if it was not specified during boot time.
|
|
instance_obj.populate_missing_availability_zones,
|
|
# Added in Rocky
|
|
instance_mapping_obj.populate_queued_for_delete,
|
|
# Added in Stein
|
|
compute_node_obj.migrate_empty_ratio,
|
|
# Added in Stein
|
|
virtual_interface_obj.fill_virtual_interface_list,
|
|
# Added in Stein
|
|
instance_mapping_obj.populate_user_id,
|
|
# Added in Victoria
|
|
pci_device_obj.PciDevice.populate_dev_uuids,
|
|
# Added in 2023.2
|
|
instance_obj.populate_instance_compute_id,
|
|
)
|
|
|
|
@args('--local_cell', action='store_true',
|
|
help='Only sync db in the local cell: do not attempt to fan-out '
|
|
'to all cells')
|
|
@args('version', metavar='VERSION', nargs='?', help='Database version')
|
|
def sync(self, version=None, local_cell=False):
|
|
"""Sync the database up to the most recent version."""
|
|
if not local_cell:
|
|
ctxt = context.RequestContext()
|
|
# NOTE(mdoff): Multiple cells not yet implemented. Currently
|
|
# fanout only looks for cell0.
|
|
try:
|
|
cell_mapping = objects.CellMapping.get_by_uuid(
|
|
ctxt, objects.CellMapping.CELL0_UUID,
|
|
)
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
migration.db_sync(version, context=cctxt)
|
|
except exception.CellMappingNotFound:
|
|
msg = _(
|
|
'WARNING: cell0 mapping not found - not syncing cell0.'
|
|
)
|
|
print(msg)
|
|
except Exception as e:
|
|
msg = _(
|
|
'ERROR: Could not access cell0.\n'
|
|
'Has the nova_api database been created?\n'
|
|
'Has the nova_cell0 database been created?\n'
|
|
'Has "nova-manage api_db sync" been run?\n'
|
|
'Has "nova-manage cell_v2 map_cell0" been run?\n'
|
|
'Is [api_database]/connection set in nova.conf?\n'
|
|
'Is the cell0 database connection URL correct?\n'
|
|
'Error: %s'
|
|
)
|
|
print(msg % str(e))
|
|
return 1
|
|
|
|
return migration.db_sync(version)
|
|
|
|
def version(self):
|
|
"""Print the current database version."""
|
|
print(migration.db_version())
|
|
|
|
@args('--max_rows', type=int, metavar='<number>', dest='max_rows',
|
|
help='Maximum number of deleted rows to archive per table. Defaults '
|
|
'to 1000. Note that this number is a soft limit and does not '
|
|
'include the corresponding rows, if any, that are removed '
|
|
'from the API database for deleted instances.')
|
|
@args('--before', metavar='<date>',
|
|
help=('Archive rows that have been deleted before this date. '
|
|
'Accepts date strings in the default format output by the '
|
|
'``date`` command, as well as ``YYYY-MM-DD [HH:mm:ss]``.'))
|
|
@args('--verbose', action='store_true', dest='verbose', default=False,
|
|
help='Print how many rows were archived per table.')
|
|
@args('--until-complete', action='store_true', dest='until_complete',
|
|
default=False,
|
|
help=('Run continuously until all deleted rows are archived. Use '
|
|
'max_rows as a batch size for each iteration.'))
|
|
@args('--purge', action='store_true', dest='purge', default=False,
|
|
help='Purge all data from shadow tables after archive completes')
|
|
@args('--all-cells', action='store_true', dest='all_cells',
|
|
default=False, help='Run command across all cells.')
|
|
@args('--task-log', action='store_true', dest='task_log', default=False,
|
|
help=('Also archive ``task_log`` table records. Note that '
|
|
'``task_log`` records are never deleted, so archiving them '
|
|
'will move all of the ``task_log`` records up to now into the '
|
|
'shadow tables. It is recommended to also specify the '
|
|
'``--before`` option to avoid races for those consuming '
|
|
'``task_log`` record data via the '
|
|
'``/os-instance_usage_audit_log`` API (example: Telemetry).'))
|
|
@args('--sleep', type=int, metavar='<seconds>', dest='sleep',
|
|
help='The amount of time in seconds to sleep between batches when '
|
|
'``--until-complete`` is used. Defaults to 0.')
|
|
def archive_deleted_rows(
|
|
self, max_rows=1000, verbose=False,
|
|
until_complete=False, purge=False,
|
|
before=None, all_cells=False, task_log=False, sleep=0,
|
|
):
|
|
"""Move deleted rows from production tables to shadow tables.
|
|
|
|
Returns 0 if nothing was archived, 1 if some number of rows were
|
|
archived, 2 if max_rows is invalid, 3 if no connection could be
|
|
established to the API DB, 4 if before date is invalid. If automating,
|
|
this should be run continuously while the result
|
|
is 1, stopping at 0.
|
|
"""
|
|
max_rows = int(max_rows)
|
|
if max_rows < 0:
|
|
print(_("Must supply a positive value for max_rows"))
|
|
return 2
|
|
if max_rows > db_const.MAX_INT:
|
|
print(_('max rows must be <= %(max_value)d') %
|
|
{'max_value': db_const.MAX_INT})
|
|
return 2
|
|
|
|
ctxt = context.get_admin_context()
|
|
try:
|
|
# NOTE(tssurya): This check has been added to validate if the API
|
|
# DB is reachable or not as this is essential for purging the
|
|
# related API database records of the deleted instances.
|
|
cell_mappings = objects.CellMappingList.get_all(ctxt)
|
|
except db_exc.CantStartEngineError:
|
|
print(_('Failed to connect to API DB so aborting this archival '
|
|
'attempt. Please check your config file to make sure that '
|
|
'[api_database]/connection is set and run this '
|
|
'command again.'))
|
|
return 3
|
|
|
|
if before:
|
|
try:
|
|
before_date = dateutil_parser.parse(before, fuzzy=True)
|
|
except ValueError as e:
|
|
print(_('Invalid value for --before: %s') % e)
|
|
return 4
|
|
else:
|
|
before_date = None
|
|
|
|
table_to_rows_archived = {}
|
|
if until_complete and verbose:
|
|
sys.stdout.write(_('Archiving') + '..') # noqa
|
|
|
|
interrupt = False
|
|
|
|
if all_cells:
|
|
# Sort first by cell name, then by table:
|
|
# +--------------------------------+-------------------------+
|
|
# | Table | Number of Rows Archived |
|
|
# +--------------------------------+-------------------------+
|
|
# | cell0.block_device_mapping | 1 |
|
|
# | cell1.block_device_mapping | 1 |
|
|
# | cell1.instance_actions | 2 |
|
|
# | cell1.instance_actions_events | 2 |
|
|
# | cell2.block_device_mapping | 1 |
|
|
# | cell2.instance_actions | 2 |
|
|
# | cell2.instance_actions_events | 2 |
|
|
# ...
|
|
def sort_func(item):
|
|
cell_name, table = item[0].split('.')
|
|
return cell_name, table
|
|
print_sort_func = sort_func
|
|
else:
|
|
cell_mappings = [None]
|
|
print_sort_func = None
|
|
total_rows_archived = 0
|
|
for cell_mapping in cell_mappings:
|
|
# NOTE(Kevin_Zheng): No need to calculate limit for each
|
|
# cell if until_complete=True.
|
|
# We need not adjust max rows to avoid exceeding a specified total
|
|
# limit because with until_complete=True, we have no total limit.
|
|
if until_complete:
|
|
max_rows_to_archive = max_rows
|
|
elif max_rows > total_rows_archived:
|
|
# We reduce the max rows to archive based on what we've
|
|
# archived so far to avoid potentially exceeding the specified
|
|
# total limit.
|
|
max_rows_to_archive = max_rows - total_rows_archived
|
|
else:
|
|
break
|
|
# If all_cells=False, cell_mapping is None
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
cell_name = cell_mapping.name if cell_mapping else None
|
|
try:
|
|
rows_archived = self._do_archive(
|
|
table_to_rows_archived,
|
|
cctxt,
|
|
max_rows_to_archive,
|
|
until_complete,
|
|
verbose,
|
|
before_date,
|
|
cell_name,
|
|
task_log,
|
|
sleep)
|
|
except KeyboardInterrupt:
|
|
interrupt = True
|
|
break
|
|
# TODO(melwitt): Handle skip/warn for unreachable cells. Note
|
|
# that cell_mappings = [None] if not --all-cells
|
|
total_rows_archived += rows_archived
|
|
|
|
if until_complete and verbose:
|
|
if interrupt:
|
|
print('.' + _('stopped')) # noqa
|
|
else:
|
|
print('.' + _('complete')) # noqa
|
|
|
|
if verbose:
|
|
if table_to_rows_archived:
|
|
print(format_dict(
|
|
table_to_rows_archived,
|
|
dict_property=_('Table'),
|
|
dict_value=_('Number of Rows Archived'),
|
|
sort_key=print_sort_func,
|
|
))
|
|
else:
|
|
print(_('Nothing was archived.'))
|
|
|
|
if table_to_rows_archived and purge:
|
|
if verbose:
|
|
print(_('Rows were archived, running purge...'))
|
|
self.purge(purge_all=True, verbose=verbose, all_cells=all_cells)
|
|
|
|
# NOTE(danms): Return nonzero if we archived something
|
|
return int(bool(table_to_rows_archived))
|
|
|
|
def _do_archive(
|
|
self, table_to_rows_archived, cctxt, max_rows,
|
|
until_complete, verbose, before_date, cell_name, task_log, sleep,
|
|
):
|
|
"""Helper function for archiving deleted rows for a cell.
|
|
|
|
This will archive deleted rows for a cell database and remove the
|
|
associated API database records for deleted instances.
|
|
|
|
:param table_to_rows_archived: Dict tracking the number of rows
|
|
archived by <cell_name>.<table name>. Example:
|
|
{'cell0.instances': 2,
|
|
'cell1.instances': 5}
|
|
:param cctxt: Cell-targeted nova.context.RequestContext if archiving
|
|
across all cells
|
|
:param max_rows: Maximum number of deleted rows to archive per table.
|
|
Note that this number is a soft limit and does not include the
|
|
corresponding rows, if any, that are removed from the API database
|
|
for deleted instances.
|
|
:param until_complete: Whether to run continuously until all deleted
|
|
rows are archived
|
|
:param verbose: Whether to print how many rows were archived per table
|
|
:param before_date: Archive rows that were deleted before this date
|
|
:param cell_name: Name of the cell or None if not archiving across all
|
|
cells
|
|
:param task_log: Whether to archive task_log table rows
|
|
:param sleep: The amount of time in seconds to sleep between batches
|
|
when ``until_complete`` is True.
|
|
"""
|
|
ctxt = context.get_admin_context()
|
|
while True:
|
|
# table_to_rows = {table_name: number_of_rows_archived}
|
|
# deleted_instance_uuids = ['uuid1', 'uuid2', ...]
|
|
table_to_rows, deleted_instance_uuids, total_rows_archived = \
|
|
db.archive_deleted_rows(
|
|
cctxt, max_rows, before=before_date, task_log=task_log)
|
|
|
|
for table_name, rows_archived in table_to_rows.items():
|
|
if cell_name:
|
|
table_name = cell_name + '.' + table_name
|
|
table_to_rows_archived.setdefault(table_name, 0)
|
|
table_to_rows_archived[table_name] += rows_archived
|
|
|
|
# deleted_instance_uuids does not necessarily mean that any
|
|
# instances rows were archived because it is obtained by a query
|
|
# separate from the archive queries. For example, if a
|
|
# DBReferenceError was raised while processing the instances table,
|
|
# we would have skipped the table and had 0 rows archived even
|
|
# though deleted instances rows were found.
|
|
instances_archived = table_to_rows.get('instances', 0)
|
|
if deleted_instance_uuids and instances_archived:
|
|
table_to_rows_archived.setdefault(
|
|
'API_DB.instance_mappings', 0)
|
|
table_to_rows_archived.setdefault(
|
|
'API_DB.request_specs', 0)
|
|
table_to_rows_archived.setdefault(
|
|
'API_DB.instance_group_member', 0)
|
|
deleted_mappings = objects.InstanceMappingList.destroy_bulk(
|
|
ctxt, deleted_instance_uuids)
|
|
table_to_rows_archived[
|
|
'API_DB.instance_mappings'] += deleted_mappings
|
|
deleted_specs = objects.RequestSpec.destroy_bulk(
|
|
ctxt, deleted_instance_uuids)
|
|
table_to_rows_archived[
|
|
'API_DB.request_specs'] += deleted_specs
|
|
deleted_group_members = (
|
|
objects.InstanceGroup.destroy_members_bulk(
|
|
ctxt, deleted_instance_uuids))
|
|
table_to_rows_archived[
|
|
'API_DB.instance_group_member'] += deleted_group_members
|
|
|
|
# If we're not archiving until there is nothing more to archive, we
|
|
# have reached max_rows in this cell DB or there was nothing to
|
|
# archive. We check the values() in case we get something like
|
|
# table_to_rows = {'instances': 0} back somehow.
|
|
if not until_complete or not any(table_to_rows.values()):
|
|
break
|
|
if verbose:
|
|
sys.stdout.write('.')
|
|
# Optionally sleep between batches to throttle the archiving.
|
|
time.sleep(sleep)
|
|
return total_rows_archived
|
|
|
|
@args('--before', metavar='<before>', dest='before',
|
|
help='If specified, purge rows from shadow tables that are older '
|
|
'than this. Accepts date strings in the default format output '
|
|
'by the ``date`` command, as well as ``YYYY-MM-DD '
|
|
'[HH:mm:ss]``.')
|
|
@args('--all', dest='purge_all', action='store_true',
|
|
help='Purge all rows in the shadow tables')
|
|
@args('--verbose', dest='verbose', action='store_true', default=False,
|
|
help='Print information about purged records')
|
|
@args('--all-cells', dest='all_cells', action='store_true', default=False,
|
|
help='Run against all cell databases')
|
|
def purge(self, before=None, purge_all=False, verbose=False,
|
|
all_cells=False):
|
|
if before is None and purge_all is False:
|
|
print(_('Either --before or --all is required'))
|
|
return 1
|
|
if before:
|
|
try:
|
|
before_date = dateutil_parser.parse(before, fuzzy=True)
|
|
except ValueError as e:
|
|
print(_('Invalid value for --before: %s') % e)
|
|
return 2
|
|
else:
|
|
before_date = None
|
|
|
|
def status(msg):
|
|
if verbose:
|
|
print('%s: %s' % (identity, msg))
|
|
|
|
deleted = 0
|
|
admin_ctxt = context.get_admin_context()
|
|
|
|
if all_cells:
|
|
try:
|
|
cells = objects.CellMappingList.get_all(admin_ctxt)
|
|
except db_exc.DBError:
|
|
print(_('Unable to get cell list from API DB. '
|
|
'Is it configured?'))
|
|
return 4
|
|
for cell in cells:
|
|
identity = _('Cell %s') % cell.identity
|
|
with context.target_cell(admin_ctxt, cell) as cctxt:
|
|
deleted += db.purge_shadow_tables(
|
|
cctxt, before_date, status_fn=status)
|
|
else:
|
|
identity = _('DB')
|
|
deleted = db.purge_shadow_tables(
|
|
admin_ctxt, before_date, status_fn=status)
|
|
if deleted:
|
|
return 0
|
|
else:
|
|
return 3
|
|
|
|
def _run_migration(self, ctxt, max_count):
|
|
ran = 0
|
|
exceptions = False
|
|
migrations = {}
|
|
for migration_meth in self.online_migrations:
|
|
count = max_count - ran
|
|
try:
|
|
found, done = migration_meth(ctxt, count)
|
|
except Exception:
|
|
msg = (_("Error attempting to run %(method)s") % dict(
|
|
method=migration_meth))
|
|
print(msg)
|
|
LOG.exception(msg)
|
|
exceptions = True
|
|
found = done = 0
|
|
|
|
name = migration_meth.__name__
|
|
if found:
|
|
print(_('%(total)i rows matched query %(meth)s, %(done)i '
|
|
'migrated') % {'total': found,
|
|
'meth': name,
|
|
'done': done})
|
|
# This is the per-migration method result for this batch, and
|
|
# _run_migration will either continue on to the next migration,
|
|
# or stop if up to this point we've processed max_count of
|
|
# records across all migration methods.
|
|
migrations[name] = found, done
|
|
if max_count is not None:
|
|
ran += done
|
|
if ran >= max_count:
|
|
break
|
|
return migrations, exceptions
|
|
|
|
@args('--max-count', metavar='<number>', dest='max_count',
|
|
help='Maximum number of objects to consider')
|
|
def online_data_migrations(self, max_count=None):
|
|
ctxt = context.get_admin_context()
|
|
if max_count is not None:
|
|
try:
|
|
max_count = int(max_count)
|
|
except ValueError:
|
|
max_count = -1
|
|
unlimited = False
|
|
if max_count < 1:
|
|
print(_('Must supply a positive value for max_number'))
|
|
return 127
|
|
else:
|
|
unlimited = True
|
|
max_count = 50
|
|
print(_('Running batches of %i until complete') % max_count)
|
|
|
|
ran = None
|
|
migration_info = {}
|
|
exceptions = False
|
|
while ran is None or ran != 0:
|
|
migrations, exceptions = self._run_migration(ctxt, max_count)
|
|
ran = 0
|
|
# For each batch of migration method results, build the cumulative
|
|
# set of results.
|
|
for name in migrations:
|
|
migration_info.setdefault(name, (0, 0))
|
|
migration_info[name] = (
|
|
migration_info[name][0] + migrations[name][0],
|
|
migration_info[name][1] + migrations[name][1],
|
|
)
|
|
ran += migrations[name][1]
|
|
if not unlimited:
|
|
break
|
|
|
|
t = prettytable.PrettyTable([_('Migration'),
|
|
_('Total Needed'), # Really: Total Found
|
|
_('Completed')])
|
|
for name in sorted(migration_info.keys()):
|
|
info = migration_info[name]
|
|
t.add_row([name, info[0], info[1]])
|
|
print(t)
|
|
|
|
# NOTE(imacdonn): In the "unlimited" case, the loop above will only
|
|
# terminate when all possible migrations have been effected. If we're
|
|
# still getting exceptions, there's a problem that requires
|
|
# intervention. In the max-count case, exceptions are only considered
|
|
# fatal if no work was done by any other migrations ("not ran"),
|
|
# because otherwise work may still remain to be done, and that work
|
|
# may resolve dependencies for the failing migrations.
|
|
if exceptions and (unlimited or not ran):
|
|
print(_("Some migrations failed unexpectedly. Check log for "
|
|
"details."))
|
|
return 2
|
|
|
|
# TODO(mriedem): Potentially add another return code for
|
|
# "there are more migrations, but not completable right now"
|
|
return ran and 1 or 0
|
|
|
|
@args('--ironic-node-uuid', metavar='<uuid>', dest='compute_node_uuid',
|
|
help='UUID of Ironic node to be moved between services')
|
|
@args('--destination-host', metavar='<host>',
|
|
dest='destination_service_host',
|
|
help='Destination ironic nova-compute service CONF.host')
|
|
def ironic_compute_node_move(self, compute_node_uuid,
|
|
destination_service_host):
|
|
ctxt = context.get_admin_context()
|
|
|
|
destination_service = objects.Service.get_by_compute_host(
|
|
ctxt, destination_service_host)
|
|
if destination_service.forced_down:
|
|
raise exception.NovaException(
|
|
"Destination compute is forced down!")
|
|
|
|
target_compute_node = objects.ComputeNode.get_by_uuid(
|
|
ctxt, compute_node_uuid)
|
|
source_service = objects.Service.get_by_id(
|
|
ctxt, target_compute_node.service_id)
|
|
if not source_service.forced_down:
|
|
raise exception.NovaException(
|
|
"Source service is not yet forced down!")
|
|
|
|
instances = objects.InstanceList.get_by_host_and_node(
|
|
ctxt, target_compute_node.host,
|
|
target_compute_node.hypervisor_hostname)
|
|
if len(instances) > 1:
|
|
raise exception.NovaException(
|
|
"Found an ironic host with more than one instance! "
|
|
"Please delete all Nova instances that do not match "
|
|
"the instance uuid recorded on the Ironic node.")
|
|
|
|
target_compute_node.service_id = destination_service.id
|
|
target_compute_node.host = destination_service.host
|
|
target_compute_node.save()
|
|
|
|
for instance in instances:
|
|
# this is a bit like evacuate, except no need to rebuild
|
|
instance.host = destination_service.host
|
|
instance.save()
|
|
|
|
|
|
class ApiDbCommands(object):
|
|
"""Class for managing the api database."""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
@args('version', metavar='VERSION', nargs='?', help='Database version')
|
|
def sync(self, version=None):
|
|
"""Sync the database up to the most recent version."""
|
|
return migration.db_sync(version, database='api')
|
|
|
|
def version(self):
|
|
"""Print the current database version."""
|
|
print(migration.db_version(database='api'))
|
|
|
|
|
|
class CellV2Commands(object):
|
|
"""Commands for managing cells v2."""
|
|
|
|
def _validate_transport_url(self, transport_url, warn_about_none=True):
|
|
if not transport_url:
|
|
if not CONF.transport_url:
|
|
if warn_about_none:
|
|
print(_(
|
|
'Must specify --transport-url if '
|
|
'[DEFAULT]/transport_url is not set in the '
|
|
'configuration file.'))
|
|
return None
|
|
print(_('--transport-url not provided in the command line, '
|
|
'using the value [DEFAULT]/transport_url from the '
|
|
'configuration file'))
|
|
transport_url = CONF.transport_url
|
|
|
|
try:
|
|
messaging.TransportURL.parse(conf=CONF,
|
|
url=objects.CellMapping.format_mq_url(
|
|
transport_url))
|
|
except (messaging.InvalidTransportURL, ValueError) as e:
|
|
print(_('Invalid transport URL: %s') % str(e))
|
|
return None
|
|
|
|
return transport_url
|
|
|
|
def _validate_database_connection(
|
|
self, database_connection, warn_about_none=True):
|
|
if not database_connection:
|
|
if not CONF.database.connection:
|
|
if warn_about_none:
|
|
print(_(
|
|
'Must specify --database_connection if '
|
|
'[database]/connection is not set in the '
|
|
'configuration file.'))
|
|
return None
|
|
print(_('--database_connection not provided in the command line, '
|
|
'using the value [database]/connection from the '
|
|
'configuration file'))
|
|
return CONF.database.connection
|
|
return database_connection
|
|
|
|
def _non_unique_transport_url_database_connection_checker(self, ctxt,
|
|
cell_mapping, transport_url, database_connection):
|
|
for cell in objects.CellMappingList.get_all(ctxt):
|
|
if cell_mapping and cell.uuid == cell_mapping.uuid:
|
|
# If we're looking for a specific cell, then don't check
|
|
# that one for same-ness to allow idempotent updates
|
|
continue
|
|
if (cell.database_connection == database_connection or
|
|
cell.transport_url == transport_url):
|
|
print(_('The specified transport_url and/or '
|
|
'database_connection combination already exists '
|
|
'for another cell with uuid %s.') % cell.uuid)
|
|
return True
|
|
return False
|
|
|
|
@args('--transport-url', metavar='<transport_url>', dest='transport_url',
|
|
help='The transport url for the cell message queue')
|
|
def simple_cell_setup(self, transport_url=None):
|
|
"""Simple cellsv2 setup.
|
|
|
|
This simplified command is for use by existing non-cells users to
|
|
configure the default environment. Returns 0 if setup is completed (or
|
|
has already been done) and 1 if no hosts are reporting (and this cannot
|
|
be mapped).
|
|
"""
|
|
transport_url = self._validate_transport_url(transport_url)
|
|
if not transport_url:
|
|
return 1
|
|
ctxt = context.RequestContext()
|
|
try:
|
|
cell0_mapping = self._map_cell0()
|
|
except db_exc.DBDuplicateEntry:
|
|
print(_('Cell0 is already setup'))
|
|
cell0_mapping = objects.CellMapping.get_by_uuid(
|
|
ctxt, objects.CellMapping.CELL0_UUID)
|
|
|
|
# Run migrations so cell0 is usable
|
|
with context.target_cell(ctxt, cell0_mapping) as cctxt:
|
|
try:
|
|
migration.db_sync(None, context=cctxt)
|
|
except db_exc.DBError as ex:
|
|
print(_('Unable to sync cell0 schema: %s') % ex)
|
|
|
|
cell_uuid = self._map_cell_and_hosts(transport_url)
|
|
if cell_uuid is None:
|
|
# There are no compute hosts which means no cell_mapping was
|
|
# created. This should also mean that there are no instances.
|
|
return 1
|
|
self.map_instances(cell_uuid)
|
|
return 0
|
|
|
|
@args('--database_connection',
|
|
metavar='<database_connection>',
|
|
help='The database connection url for cell0. '
|
|
'This is optional. If not provided, a standard database '
|
|
'connection will be used based on the main database connection '
|
|
'from the Nova configuration.'
|
|
)
|
|
def map_cell0(self, database_connection=None):
|
|
"""Create a cell mapping for cell0.
|
|
|
|
cell0 is used for instances that have not been scheduled to any cell.
|
|
This generally applies to instances that have encountered an error
|
|
before they have been scheduled.
|
|
|
|
This command creates a cell mapping for this special cell which
|
|
requires a database to store the instance data.
|
|
|
|
Returns 0 if cell0 created successfully or already setup.
|
|
"""
|
|
try:
|
|
self._map_cell0(database_connection=database_connection)
|
|
except db_exc.DBDuplicateEntry:
|
|
print(_('Cell0 is already setup'))
|
|
return 0
|
|
|
|
def _map_cell0(self, database_connection=None):
|
|
"""Facilitate creation of a cell mapping for cell0.
|
|
See map_cell0 for more.
|
|
"""
|
|
def cell0_default_connection():
|
|
# If no database connection is provided one is generated
|
|
# based on the database connection url.
|
|
# The cell0 database will use the same database scheme and
|
|
# netloc as the main database, with a related path.
|
|
# NOTE(sbauza): The URL has to be RFC1738 compliant in order to
|
|
# be usable by sqlalchemy.
|
|
connection = CONF.database.connection
|
|
# sqlalchemy has a nice utility for parsing database connection
|
|
# URLs so we use that here to get the db name so we don't have to
|
|
# worry about parsing and splitting a URL which could have special
|
|
# characters in the password, which makes parsing a nightmare.
|
|
url = sqla_url.make_url(connection)
|
|
url = url.set(database=url.database + '_cell0')
|
|
|
|
return urlparse.unquote(url.render_as_string(hide_password=False))
|
|
|
|
dbc = database_connection or cell0_default_connection()
|
|
ctxt = context.RequestContext()
|
|
# A transport url of 'none://' is provided for cell0. RPC should not
|
|
# be used to access cell0 objects. Cells transport switching will
|
|
# ignore any 'none' transport type.
|
|
cell_mapping = objects.CellMapping(
|
|
ctxt, uuid=objects.CellMapping.CELL0_UUID, name="cell0",
|
|
transport_url="none:///",
|
|
database_connection=dbc)
|
|
cell_mapping.create()
|
|
return cell_mapping
|
|
|
|
def _get_and_map_instances(self, ctxt, cell_mapping, limit, marker):
|
|
filters = {}
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
instances = objects.InstanceList.get_by_filters(
|
|
cctxt.elevated(read_deleted='yes'), filters,
|
|
sort_key='created_at', sort_dir='asc', limit=limit,
|
|
marker=marker)
|
|
|
|
for instance in instances:
|
|
try:
|
|
mapping = objects.InstanceMapping(ctxt)
|
|
mapping.instance_uuid = instance.uuid
|
|
mapping.cell_mapping = cell_mapping
|
|
mapping.project_id = instance.project_id
|
|
mapping.user_id = instance.user_id
|
|
mapping.create()
|
|
except db_exc.DBDuplicateEntry:
|
|
continue
|
|
|
|
if len(instances) == 0 or len(instances) < limit:
|
|
# We've hit the end of the instances table
|
|
marker = None
|
|
else:
|
|
marker = instances[-1].uuid
|
|
return marker
|
|
|
|
@args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
required=True,
|
|
help='Unmigrated instances will be mapped to the cell with the '
|
|
'uuid provided.')
|
|
@args('--max-count', metavar='<max_count>', dest='max_count',
|
|
help='Maximum number of instances to map. If not set, all instances '
|
|
'in the cell will be mapped in batches of 50. If you have a '
|
|
'large number of instances, consider specifying a custom value '
|
|
'and run the command until it exits with 0.')
|
|
@args('--reset', action='store_true', dest='reset_marker',
|
|
help='The command will start from the beginning as opposed to the '
|
|
'default behavior of starting from where the last run '
|
|
'finished')
|
|
def map_instances(self, cell_uuid, max_count=None, reset_marker=None):
|
|
"""Map instances into the provided cell.
|
|
|
|
Instances in the nova database of the provided cell (nova database
|
|
info is obtained from the nova-api database) will be queried from
|
|
oldest to newest and if unmapped, will be mapped to the provided cell.
|
|
A max-count can be set on the number of instance to map in a single
|
|
run. Repeated runs of the command will start from where the last run
|
|
finished so it is not necessary to increase max-count to finish. A
|
|
reset option can be passed which will reset the marker, thus making the
|
|
command start from the beginning as opposed to the default behavior of
|
|
starting from where the last run finished. An exit code of 0 indicates
|
|
that all instances have been mapped.
|
|
"""
|
|
|
|
# NOTE(stephenfin): The support for batching in this command relies on
|
|
# a bit of a hack. We initially process N instance-cell mappings, where
|
|
# N is the value of '--max-count' if provided else 50. To ensure we
|
|
# can continue from N on the next iteration, we store a instance-cell
|
|
# mapping object with a special name and the UUID of the last
|
|
# instance-cell mapping processed (N - 1) in munged form. On the next
|
|
# iteration, we search for the special name and unmunge the UUID to
|
|
# pick up where we left off. This is done until all mappings are
|
|
# processed. The munging is necessary as there's a unique constraint on
|
|
# the UUID field and we need something reversible. For more
|
|
# information, see commit 9038738d0.
|
|
|
|
if max_count is not None:
|
|
try:
|
|
max_count = int(max_count)
|
|
except ValueError:
|
|
max_count = -1
|
|
map_all = False
|
|
if max_count < 1:
|
|
print(_('Must supply a positive value for max-count'))
|
|
return 127
|
|
else:
|
|
map_all = True
|
|
max_count = 50
|
|
|
|
ctxt = context.RequestContext()
|
|
marker_project_id = 'INSTANCE_MIGRATION_MARKER'
|
|
|
|
# Validate the cell exists, this will raise if not
|
|
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
|
|
# Check for a marker from a previous run
|
|
marker_mapping = objects.InstanceMappingList.get_by_project_id(ctxt,
|
|
marker_project_id)
|
|
if len(marker_mapping) == 0:
|
|
marker = None
|
|
else:
|
|
# There should be only one here
|
|
marker = marker_mapping[0].instance_uuid.replace(' ', '-')
|
|
if reset_marker:
|
|
marker = None
|
|
marker_mapping[0].destroy()
|
|
|
|
next_marker = True
|
|
while next_marker is not None:
|
|
next_marker = self._get_and_map_instances(ctxt, cell_mapping,
|
|
max_count, marker)
|
|
marker = next_marker
|
|
if not map_all:
|
|
break
|
|
|
|
if next_marker:
|
|
# Don't judge me. There's already an InstanceMapping with this UUID
|
|
# so the marker needs to be non destructively modified.
|
|
next_marker = next_marker.replace('-', ' ')
|
|
# This is just the marker record, so set user_id to the special
|
|
# marker name as well.
|
|
objects.InstanceMapping(ctxt, instance_uuid=next_marker,
|
|
project_id=marker_project_id,
|
|
user_id=marker_project_id).create()
|
|
return 1
|
|
return 0
|
|
|
|
def _map_cell_and_hosts(self, transport_url, name=None, verbose=False):
|
|
ctxt = context.RequestContext()
|
|
cell_mapping_uuid = cell_mapping = None
|
|
# First, try to detect if a CellMapping has already been created
|
|
compute_nodes = objects.ComputeNodeList.get_all(ctxt)
|
|
if not compute_nodes:
|
|
print(_('No hosts found to map to cell, exiting.'))
|
|
return None
|
|
missing_nodes = set()
|
|
for compute_node in compute_nodes:
|
|
try:
|
|
host_mapping = objects.HostMapping.get_by_host(
|
|
ctxt, compute_node.host)
|
|
except exception.HostMappingNotFound:
|
|
missing_nodes.add(compute_node.host)
|
|
else:
|
|
if verbose:
|
|
print(_(
|
|
'Host %(host)s is already mapped to cell %(uuid)s'
|
|
) % {'host': host_mapping.host,
|
|
'uuid': host_mapping.cell_mapping.uuid})
|
|
# Re-using the existing UUID in case there is already a mapping
|
|
# NOTE(sbauza): There could be possibly multiple CellMappings
|
|
# if the operator provides another configuration file and moves
|
|
# the hosts to another cell v2, but that's not really something
|
|
# we should support.
|
|
cell_mapping_uuid = host_mapping.cell_mapping.uuid
|
|
if not missing_nodes:
|
|
print(_('All hosts are already mapped to cell(s).'))
|
|
return cell_mapping_uuid
|
|
# Create the cell mapping in the API database
|
|
if cell_mapping_uuid is not None:
|
|
cell_mapping = objects.CellMapping.get_by_uuid(
|
|
ctxt, cell_mapping_uuid)
|
|
if cell_mapping is None:
|
|
cell_mapping_uuid = uuidutils.generate_uuid()
|
|
cell_mapping = objects.CellMapping(
|
|
ctxt, uuid=cell_mapping_uuid, name=name,
|
|
transport_url=transport_url,
|
|
database_connection=CONF.database.connection)
|
|
cell_mapping.create()
|
|
# Pull the hosts from the cell database and create the host mappings
|
|
for compute_host in missing_nodes:
|
|
host_mapping = objects.HostMapping(
|
|
ctxt, host=compute_host, cell_mapping=cell_mapping)
|
|
host_mapping.create()
|
|
if verbose:
|
|
print(cell_mapping_uuid)
|
|
return cell_mapping_uuid
|
|
|
|
@args('--transport-url', metavar='<transport_url>', dest='transport_url',
|
|
help='The transport url for the cell message queue')
|
|
@args('--name', metavar='<cell_name>', help='The name of the cell')
|
|
@args('--verbose', action='store_true',
|
|
help='Output the cell mapping uuid for any newly mapped hosts.')
|
|
def map_cell_and_hosts(self, transport_url=None, name=None, verbose=False):
|
|
"""EXPERIMENTAL. Create a cell mapping and host mappings for a cell.
|
|
|
|
Users not dividing their cloud into multiple cells will be a single
|
|
cell v2 deployment and should specify:
|
|
|
|
nova-manage cell_v2 map_cell_and_hosts --config-file <nova.conf>
|
|
|
|
Users running multiple cells can add a cell v2 by specifying:
|
|
|
|
nova-manage cell_v2 map_cell_and_hosts --config-file <cell nova.conf>
|
|
"""
|
|
transport_url = self._validate_transport_url(transport_url)
|
|
if not transport_url:
|
|
return 1
|
|
self._map_cell_and_hosts(transport_url, name, verbose)
|
|
# online_data_migrations established a pattern of 0 meaning everything
|
|
# is done, 1 means run again to do more work. This command doesn't do
|
|
# partial work so 0 is appropriate.
|
|
return 0
|
|
|
|
@args('--uuid', metavar='<instance_uuid>', dest='uuid', required=True,
|
|
help=_('The instance UUID to verify'))
|
|
@args('--quiet', action='store_true', dest='quiet',
|
|
help=_('Do not print anything'))
|
|
def verify_instance(self, uuid, quiet=False):
|
|
"""Verify instance mapping to a cell.
|
|
|
|
This command is useful to determine if the cellsv2 environment is
|
|
properly setup, specifically in terms of the cell, host, and instance
|
|
mapping records required.
|
|
|
|
This prints one of three strings (and exits with a code) indicating
|
|
whether the instance is successfully mapped to a cell (0), is unmapped
|
|
due to an incomplete upgrade (1), unmapped due to normally transient
|
|
state (2), it is a deleted instance which has instance mapping (3),
|
|
or it is an archived instance which still has an instance mapping (4).
|
|
"""
|
|
def say(string):
|
|
if not quiet:
|
|
print(string)
|
|
|
|
ctxt = context.get_admin_context()
|
|
try:
|
|
mapping = objects.InstanceMapping.get_by_instance_uuid(
|
|
ctxt, uuid)
|
|
except exception.InstanceMappingNotFound:
|
|
say('Instance %s is not mapped to a cell '
|
|
'(upgrade is incomplete) or instance '
|
|
'does not exist' % uuid)
|
|
return 1
|
|
if mapping.cell_mapping is None:
|
|
say('Instance %s is not mapped to a cell' % uuid)
|
|
return 2
|
|
else:
|
|
with context.target_cell(ctxt, mapping.cell_mapping) as cctxt:
|
|
try:
|
|
instance = objects.Instance.get_by_uuid(cctxt, uuid)
|
|
except exception.InstanceNotFound:
|
|
try:
|
|
el_ctx = cctxt.elevated(read_deleted='yes')
|
|
instance = objects.Instance.get_by_uuid(el_ctx, uuid)
|
|
# instance is deleted
|
|
if instance:
|
|
say('The instance with uuid %s has been deleted.'
|
|
% uuid)
|
|
say('Execute '
|
|
'`nova-manage db archive_deleted_rows` '
|
|
'command to archive this deleted '
|
|
'instance and remove its instance_mapping.')
|
|
return 3
|
|
except exception.InstanceNotFound:
|
|
# instance is archived
|
|
say('The instance with uuid %s has been archived.'
|
|
% uuid)
|
|
say('However its instance_mapping remains.')
|
|
return 4
|
|
# instance is alive and mapped to a cell
|
|
say('Instance %s is in cell: %s (%s)' % (
|
|
uuid,
|
|
mapping.cell_mapping.name,
|
|
mapping.cell_mapping.uuid))
|
|
return 0
|
|
|
|
@args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
help='If provided only this cell will be searched for new hosts to '
|
|
'map.')
|
|
@args('--verbose', action='store_true',
|
|
help=_('Provide detailed output when discovering hosts.'))
|
|
@args('--strict', action='store_true',
|
|
help=_('Considered successful (exit code 0) only when an unmapped '
|
|
'host is discovered. Any other outcome will be considered a '
|
|
'failure (non-zero exit code).'))
|
|
@args('--by-service', action='store_true', default=False,
|
|
dest='by_service',
|
|
help=_('Discover hosts by service instead of compute node'))
|
|
def discover_hosts(self, cell_uuid=None, verbose=False, strict=False,
|
|
by_service=False):
|
|
"""Searches cells, or a single cell, and maps found hosts.
|
|
|
|
When a new host is added to a deployment it will add a service entry
|
|
to the db it's configured to use. This command will check the db for
|
|
each cell, or a single one if passed in, and map any hosts which are
|
|
not currently mapped. If a host is already mapped nothing will be done.
|
|
|
|
This command should be run once after all compute hosts have been
|
|
deployed and should not be run in parallel. When run in parallel,
|
|
the commands will collide with each other trying to map the same hosts
|
|
in the database at the same time.
|
|
"""
|
|
def status_fn(msg):
|
|
if verbose:
|
|
print(msg)
|
|
|
|
ctxt = context.RequestContext()
|
|
try:
|
|
hosts = host_mapping_obj.discover_hosts(ctxt, cell_uuid, status_fn,
|
|
by_service)
|
|
except exception.HostMappingExists as exp:
|
|
print(_('ERROR: Duplicate host mapping was encountered. This '
|
|
'command should be run once after all compute hosts have '
|
|
'been deployed and should not be run in parallel. When '
|
|
'run in parallel, the commands will collide with each '
|
|
'other trying to map the same hosts in the database at '
|
|
'the same time. Error: %s') % exp)
|
|
return 2
|
|
# discover_hosts will return an empty list if no hosts are discovered
|
|
if strict:
|
|
return int(not hosts)
|
|
|
|
@action_description(
|
|
_("Add a new cell to nova API database. "
|
|
"DB and MQ urls can be provided directly "
|
|
"or can be taken from config. The result is cell uuid."))
|
|
@args('--name', metavar='<cell_name>', help=_('The name of the cell'))
|
|
@args('--database_connection', metavar='<database_connection>',
|
|
dest='database_connection',
|
|
help=_('The database url for the cell database'))
|
|
@args('--transport-url', metavar='<transport_url>', dest='transport_url',
|
|
help=_('The transport url for the cell message queue'))
|
|
@args('--verbose', action='store_true',
|
|
help=_('Output the uuid of the created cell'))
|
|
@args('--disabled', action='store_true',
|
|
help=_('To create a pre-disabled cell.'))
|
|
def create_cell(self, name=None, database_connection=None,
|
|
transport_url=None, verbose=False, disabled=False):
|
|
ctxt = context.get_context()
|
|
transport_url = self._validate_transport_url(transport_url)
|
|
if not transport_url:
|
|
return 1
|
|
|
|
database_connection = self._validate_database_connection(
|
|
database_connection)
|
|
if not database_connection:
|
|
return 1
|
|
if (self._non_unique_transport_url_database_connection_checker(ctxt,
|
|
None, transport_url, database_connection)):
|
|
return 2
|
|
cell_mapping_uuid = uuidutils.generate_uuid()
|
|
cell_mapping = objects.CellMapping(
|
|
ctxt,
|
|
uuid=cell_mapping_uuid, name=name,
|
|
transport_url=transport_url,
|
|
database_connection=database_connection,
|
|
disabled=disabled)
|
|
cell_mapping.create()
|
|
if verbose:
|
|
print(cell_mapping_uuid)
|
|
return 0
|
|
|
|
@args('--verbose', action='store_true',
|
|
help=_('Show sensitive details, such as passwords'))
|
|
def list_cells(self, verbose=False):
|
|
"""Lists the v2 cells in the deployment.
|
|
|
|
By default the cell name, uuid, disabled state, masked transport
|
|
URL and database connection details are shown. Use the --verbose
|
|
option to see transport URL and database connection with their
|
|
sensitive details.
|
|
"""
|
|
cell_mappings = objects.CellMappingList.get_all(
|
|
context.get_admin_context())
|
|
|
|
field_names = [_('Name'), _('UUID'), _('Transport URL'),
|
|
_('Database Connection'), _('Disabled')]
|
|
|
|
t = prettytable.PrettyTable(field_names)
|
|
for cell in sorted(cell_mappings,
|
|
# CellMapping.name is optional
|
|
key=lambda _cell: _cell.name or ''):
|
|
fields = [cell.name or '', cell.uuid]
|
|
if verbose:
|
|
fields.extend([cell.transport_url, cell.database_connection])
|
|
else:
|
|
fields.extend([
|
|
mask_passwd_in_url(cell.transport_url),
|
|
mask_passwd_in_url(cell.database_connection)])
|
|
fields.extend([cell.disabled])
|
|
t.add_row(fields)
|
|
print(t)
|
|
return 0
|
|
|
|
@args('--force', action='store_true', default=False,
|
|
help=_('Delete hosts and instance_mappings that belong '
|
|
'to the cell as well.'))
|
|
@args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
required=True, help=_('The uuid of the cell to delete.'))
|
|
def delete_cell(self, cell_uuid, force=False):
|
|
"""Delete an empty cell by the given uuid.
|
|
|
|
This command will return a non-zero exit code in the following cases.
|
|
|
|
* The cell is not found by uuid.
|
|
* It has hosts and force is False.
|
|
* It has instance mappings and force is False.
|
|
|
|
If force is True and the cell has hosts and/or instance_mappings, they
|
|
are deleted as well (as long as there are no living instances).
|
|
|
|
Returns 0 in the following cases.
|
|
|
|
* The empty cell is found and deleted successfully.
|
|
* The cell has hosts and force is True then the cell, hosts and
|
|
instance_mappings are deleted successfully; if there are no
|
|
living instances.
|
|
"""
|
|
ctxt = context.get_admin_context()
|
|
# Find the CellMapping given the uuid.
|
|
try:
|
|
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
except exception.CellMappingNotFound:
|
|
print(_('Cell with uuid %s was not found.') % cell_uuid)
|
|
return 1
|
|
|
|
# Check to see if there are any HostMappings for this cell.
|
|
host_mappings = objects.HostMappingList.get_by_cell_id(
|
|
ctxt, cell_mapping.id)
|
|
nodes = []
|
|
if host_mappings:
|
|
if not force:
|
|
print(_('There are existing hosts mapped to cell with uuid '
|
|
'%s.') % cell_uuid)
|
|
return 2
|
|
# We query for the compute nodes in the cell,
|
|
# so that they can be unmapped.
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
nodes = objects.ComputeNodeList.get_all(cctxt)
|
|
|
|
# Check to see if there are any InstanceMappings for this cell.
|
|
instance_mappings = objects.InstanceMappingList.get_by_cell_id(
|
|
ctxt, cell_mapping.id)
|
|
if instance_mappings:
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
instances = objects.InstanceList.get_all(cctxt)
|
|
if instances:
|
|
# There are instances in the cell.
|
|
print(_('There are existing instances mapped to cell with '
|
|
'uuid %s.') % cell_uuid)
|
|
return 3
|
|
else:
|
|
if not force:
|
|
# There are no instances in the cell but the records remain
|
|
# in the 'instance_mappings' table.
|
|
print(_("There are instance mappings to cell with uuid "
|
|
"%s, but all instances have been deleted "
|
|
"in the cell.") % cell_uuid)
|
|
print(_("So execute 'nova-manage db archive_deleted_rows' "
|
|
"to delete the instance mappings."))
|
|
return 4
|
|
|
|
# Delete instance_mappings of the deleted instances
|
|
for instance_mapping in instance_mappings:
|
|
instance_mapping.destroy()
|
|
|
|
# Unmap the compute nodes so that they can be discovered
|
|
# again in future, if needed.
|
|
for node in nodes:
|
|
node.mapped = 0
|
|
node.save()
|
|
|
|
# Delete hosts mapped to the cell.
|
|
for host_mapping in host_mappings:
|
|
host_mapping.destroy()
|
|
|
|
# There are no hosts or instances mapped to the cell so delete it.
|
|
cell_mapping.destroy()
|
|
return 0
|
|
|
|
@args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
required=True, help=_('The uuid of the cell to update.'))
|
|
@args('--name', metavar='<cell_name>', dest='name',
|
|
help=_('Set the cell name.'))
|
|
@args('--transport-url', metavar='<transport_url>', dest='transport_url',
|
|
help=_('Set the cell transport_url. NOTE that running nodes '
|
|
'will not see the change until restart!'))
|
|
@args('--database_connection', metavar='<database_connection>',
|
|
dest='db_connection',
|
|
help=_('Set the cell database_connection. NOTE that running nodes '
|
|
'will not see the change until restart!'))
|
|
@args('--disable', action='store_true', dest='disable',
|
|
help=_('Disables the cell. Note that the scheduling will be blocked '
|
|
'to this cell until its enabled and followed by a SIGHUP of '
|
|
'nova-scheduler service.'))
|
|
@args('--enable', action='store_true', dest='enable',
|
|
help=_('Enables the cell. Note that this makes a disabled cell '
|
|
'available for scheduling after a SIGHUP of the '
|
|
'nova-scheduler service'))
|
|
def update_cell(self, cell_uuid, name=None, transport_url=None,
|
|
db_connection=None, disable=False, enable=False):
|
|
"""Updates the properties of a cell by the given uuid.
|
|
|
|
If the cell is not found by uuid, this command will return an exit
|
|
code of 1. If the provided transport_url or/and database_connection
|
|
is/are same as another cell, this command will return an exit code
|
|
of 3. If the properties cannot be set, this will return 2. If an
|
|
attempt is made to disable and enable a cell at the same time, this
|
|
command will exit with a return code of 4. If an attempt is made to
|
|
disable or enable cell0 this command will exit with a return code of 5.
|
|
Otherwise, the exit code will be 0.
|
|
|
|
NOTE: Updating the transport_url or database_connection fields on
|
|
a running system will NOT result in all nodes immediately using the
|
|
new values. Use caution when changing these values.
|
|
NOTE (tssurya): The scheduler will not notice that a cell has been
|
|
enabled/disabled until it is restarted or sent the SIGHUP signal.
|
|
"""
|
|
ctxt = context.get_admin_context()
|
|
try:
|
|
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
except exception.CellMappingNotFound:
|
|
print(_('Cell with uuid %s was not found.') % cell_uuid)
|
|
return 1
|
|
|
|
if name:
|
|
cell_mapping.name = name
|
|
|
|
# Having empty transport_url and db_connection means leaving the
|
|
# existing values
|
|
transport_url = self._validate_transport_url(
|
|
transport_url, warn_about_none=False)
|
|
db_connection = self._validate_database_connection(
|
|
db_connection, warn_about_none=False)
|
|
|
|
if (self._non_unique_transport_url_database_connection_checker(ctxt,
|
|
cell_mapping, transport_url, db_connection)):
|
|
# We use the return code 3 before 2 to avoid changing the
|
|
# semantic meanings of return codes.
|
|
return 3
|
|
|
|
if transport_url:
|
|
cell_mapping.transport_url = transport_url
|
|
|
|
if db_connection:
|
|
cell_mapping.database_connection = db_connection
|
|
|
|
if disable and enable:
|
|
print(_('Cell cannot be disabled and enabled at the same time.'))
|
|
return 4
|
|
if disable or enable:
|
|
if cell_mapping.is_cell0():
|
|
print(_('Cell0 cannot be disabled.'))
|
|
return 5
|
|
elif disable and not cell_mapping.disabled:
|
|
cell_mapping.disabled = True
|
|
elif enable and cell_mapping.disabled:
|
|
cell_mapping.disabled = False
|
|
elif disable and cell_mapping.disabled:
|
|
print(_('Cell %s is already disabled') % cell_uuid)
|
|
elif enable and not cell_mapping.disabled:
|
|
print(_('Cell %s is already enabled') % cell_uuid)
|
|
|
|
try:
|
|
cell_mapping.save()
|
|
except Exception as e:
|
|
print(_('Unable to update CellMapping: %s') % e)
|
|
return 2
|
|
|
|
return 0
|
|
|
|
@args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
help=_('The uuid of the cell.'))
|
|
def list_hosts(self, cell_uuid=None):
|
|
"""Lists the hosts in one or all v2 cells."""
|
|
ctxt = context.get_admin_context()
|
|
if cell_uuid:
|
|
# Find the CellMapping given the uuid.
|
|
try:
|
|
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
except exception.CellMappingNotFound:
|
|
print(_('Cell with uuid %s was not found.') % cell_uuid)
|
|
return 1
|
|
|
|
host_mappings = objects.HostMappingList.get_by_cell_id(
|
|
ctxt, cell_mapping.id)
|
|
else:
|
|
host_mappings = objects.HostMappingList.get_all(ctxt)
|
|
|
|
field_names = [_('Cell Name'), _('Cell UUID'), _('Hostname')]
|
|
|
|
t = prettytable.PrettyTable(field_names)
|
|
for host in sorted(host_mappings, key=lambda _host: _host.host):
|
|
fields = [host.cell_mapping.name, host.cell_mapping.uuid,
|
|
host.host]
|
|
t.add_row(fields)
|
|
print(t)
|
|
return 0
|
|
|
|
@args('--cell_uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
required=True, help=_('The uuid of the cell.'))
|
|
@args('--host', metavar='<host>', dest='host',
|
|
required=True, help=_('The host to delete.'))
|
|
def delete_host(self, cell_uuid, host):
|
|
"""Delete a host in a cell (host mappings) by the given host name
|
|
|
|
This command will return a non-zero exit code in the following cases.
|
|
|
|
* The cell is not found by uuid.
|
|
* The host is not found by host name.
|
|
* The host is not in the cell.
|
|
* The host has instances.
|
|
|
|
Returns 0 if the host is deleted successfully.
|
|
|
|
NOTE: The scheduler caches host-to-cell mapping information so when
|
|
deleting a host the scheduler may need to be restarted or sent the
|
|
SIGHUP signal.
|
|
"""
|
|
ctxt = context.get_admin_context()
|
|
# Find the CellMapping given the uuid.
|
|
try:
|
|
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
except exception.CellMappingNotFound:
|
|
print(_('Cell with uuid %s was not found.') % cell_uuid)
|
|
return 1
|
|
|
|
try:
|
|
host_mapping = objects.HostMapping.get_by_host(ctxt, host)
|
|
except exception.HostMappingNotFound:
|
|
print(_('The host %s was not found.') % host)
|
|
return 2
|
|
|
|
if host_mapping.cell_mapping.uuid != cell_mapping.uuid:
|
|
print(_('The host %(host)s was not found '
|
|
'in the cell %(cell_uuid)s.') % {'host': host,
|
|
'cell_uuid': cell_uuid})
|
|
return 3
|
|
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
instances = objects.InstanceList.get_by_host(cctxt, host)
|
|
try:
|
|
nodes = objects.ComputeNodeList.get_all_by_host(cctxt, host)
|
|
except exception.ComputeHostNotFound:
|
|
nodes = []
|
|
|
|
if instances:
|
|
print(_('There are instances on the host %s.') % host)
|
|
return 4
|
|
|
|
for node in nodes:
|
|
node.mapped = 0
|
|
node.save()
|
|
|
|
host_mapping.destroy()
|
|
return 0
|
|
|
|
|
|
class PlacementCommands(object):
|
|
"""Commands for managing placement resources."""
|
|
|
|
@staticmethod
|
|
def _get_compute_node_uuid(ctxt, instance, node_cache):
|
|
"""Find the ComputeNode.uuid for the given Instance
|
|
|
|
:param ctxt: cell-targeted nova.context.RequestContext
|
|
:param instance: the instance to lookup a compute node
|
|
:param node_cache: dict of Instance.node keys to ComputeNode.uuid
|
|
values; this cache is updated if a new node is processed.
|
|
:returns: ComputeNode.uuid for the given instance
|
|
:raises: nova.exception.ComputeHostNotFound
|
|
"""
|
|
if instance.node in node_cache:
|
|
return node_cache[instance.node]
|
|
|
|
compute_node = objects.ComputeNode.get_by_host_and_nodename(
|
|
ctxt, instance.host, instance.node)
|
|
node_uuid = compute_node.uuid
|
|
node_cache[instance.node] = node_uuid
|
|
return node_uuid
|
|
|
|
@staticmethod
|
|
def _get_ports(ctxt, instance, neutron):
|
|
"""Return the ports that are bound to the instance
|
|
|
|
:param ctxt: nova.context.RequestContext
|
|
:param instance: the instance to return the ports for
|
|
:param neutron: nova.network.neutron.ClientWrapper to
|
|
communicate with Neutron
|
|
:return: a list of neutron port dict objects
|
|
:raise UnableToQueryPorts: If the neutron list ports query fails.
|
|
"""
|
|
try:
|
|
return neutron.list_ports(
|
|
ctxt, device_id=instance.uuid,
|
|
fields=['id', constants.RESOURCE_REQUEST,
|
|
constants.BINDING_PROFILE]
|
|
)['ports']
|
|
except neutron_client_exc.NeutronClientException as e:
|
|
raise exception.UnableToQueryPorts(
|
|
instance_uuid=instance.uuid, error=str(e))
|
|
|
|
@staticmethod
|
|
def _has_request_but_no_allocation(port, neutron):
|
|
has_res_req = neutron_api.API()._has_resource_request(
|
|
context.get_admin_context(), port, neutron)
|
|
|
|
binding_profile = neutron_api.get_binding_profile(port)
|
|
allocation = binding_profile.get(constants.ALLOCATION)
|
|
return has_res_req and not allocation
|
|
|
|
@staticmethod
|
|
def _merge_allocations(alloc1, alloc2):
|
|
"""Return a new allocation dict that contains the sum of alloc1 and
|
|
alloc2.
|
|
|
|
:param alloc1: a dict in the form of
|
|
{
|
|
<rp_uuid>: {'resources': {<resource class>: amount,
|
|
<resource class>: amount},
|
|
<rp_uuid>: {'resources': {<resource class>: amount},
|
|
}
|
|
:param alloc2: a dict in the same form as alloc1
|
|
:return: the merged allocation of alloc1 and alloc2 in the same format
|
|
"""
|
|
|
|
allocations = collections.defaultdict(
|
|
lambda: {'resources': collections.defaultdict(int)})
|
|
|
|
for alloc in [alloc1, alloc2]:
|
|
for rp_uuid in alloc:
|
|
for rc, amount in alloc[rp_uuid]['resources'].items():
|
|
allocations[rp_uuid]['resources'][rc] += amount
|
|
return allocations
|
|
|
|
@staticmethod
|
|
def _get_resource_request_from_ports(
|
|
ctxt: context.RequestContext,
|
|
ports: ty.List[ty.Dict[str, ty.Any]]
|
|
) -> ty.Tuple[
|
|
ty.Dict[str, ty.List['objects.RequestGroup']],
|
|
'objects.RequestLevelParams']:
|
|
"""Collect RequestGroups and RequestLevelParams for all ports
|
|
|
|
:param ctxt: the request context
|
|
:param ports: a list of port dicts
|
|
:returns: A two tuple where the first item is a dict mapping port
|
|
uuids to a list of request groups coming from that port, the
|
|
second item is a combined RequestLevelParams object from all ports.
|
|
"""
|
|
groups = {}
|
|
request_level_params = objects.RequestLevelParams()
|
|
extended_res_req = (
|
|
neutron_api.API().has_extended_resource_request_extension(
|
|
ctxt)
|
|
)
|
|
|
|
for port in ports:
|
|
resource_request = port.get(constants.RESOURCE_REQUEST)
|
|
if extended_res_req:
|
|
groups[port['id']] = (
|
|
objects.RequestGroup.from_extended_port_request(
|
|
ctxt, resource_request
|
|
)
|
|
)
|
|
request_level_params.extend_with(
|
|
objects.RequestLevelParams.from_port_request(
|
|
resource_request
|
|
)
|
|
)
|
|
else:
|
|
# This is the legacy format, only one group per port and no
|
|
# request level param support
|
|
# TODO(gibi): remove this path once the extended resource
|
|
# request extension is mandatory in neutron
|
|
groups[port['id']] = [
|
|
objects.RequestGroup.from_port_request(
|
|
ctxt, port['id'], resource_request
|
|
)
|
|
]
|
|
|
|
return groups, request_level_params
|
|
|
|
@staticmethod
|
|
def _get_port_binding_profile_allocation(
|
|
ctxt: context.RequestContext,
|
|
neutron: neutron_api.ClientWrapper,
|
|
port: ty.Dict[str, ty.Any],
|
|
request_groups: ty.List['objects.RequestGroup'],
|
|
resource_provider_mapping: ty.Dict[str, ty.List[str]],
|
|
) -> ty.Dict[str, str]:
|
|
"""Generate the value of the allocation key of the port binding profile
|
|
based on the provider mapping returned from placement
|
|
|
|
:param ctxt: the request context
|
|
:param neutron: the neutron client
|
|
:param port: the port dict from neutron
|
|
:param request_groups: the list of RequestGroups object generated from
|
|
the port resource request
|
|
:param resource_provider_mapping: The dict of request group to resource
|
|
provider mapping returned by the Placement allocation candidate
|
|
query
|
|
:returns: a dict mapping request group ids to resource provider uuids
|
|
in the form as Neutron expects in the port binding profile.
|
|
"""
|
|
if neutron_api.API().has_extended_resource_request_extension(
|
|
ctxt, neutron
|
|
):
|
|
# The extended resource request format also means that a
|
|
# port has more than a one request groups.
|
|
# Each request group id from the port needs to be mapped to
|
|
# a single provider id from the provider mappings. Each
|
|
# group from the port is mapped to a numbered request group
|
|
# in placement so we can assume that they are mapped to
|
|
# a single provider and therefore the provider mapping list
|
|
# has a single provider id.
|
|
allocation = {
|
|
group.requester_id: resource_provider_mapping[
|
|
group.requester_id][0]
|
|
for group in request_groups
|
|
}
|
|
else:
|
|
# This is the legacy resource request format where a port
|
|
# is mapped to a single request group
|
|
# NOTE(gibi): In the resource provider mapping there can be
|
|
# more than one RP fulfilling a request group. But resource
|
|
# requests of a Neutron port is always mapped to a
|
|
# numbered request group that is always fulfilled by one
|
|
# resource provider. So we only pass that single RP UUID
|
|
# here.
|
|
allocation = resource_provider_mapping[
|
|
port['id']][0]
|
|
|
|
return allocation
|
|
|
|
def _get_port_allocations_to_heal(
|
|
self, ctxt, instance, node_cache, placement, neutron, output):
|
|
"""Return the needed extra allocation for the ports of the instance.
|
|
|
|
:param ctxt: nova.context.RequestContext
|
|
:param instance: instance to get the port allocations for
|
|
:param node_cache: dict of Instance.node keys to ComputeNode.uuid
|
|
values; this cache is updated if a new node is processed.
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param neutron: nova.network.neutron.ClientWrapper to
|
|
communicate with Neutron
|
|
:param output: function that takes a single message for verbose output
|
|
:raise UnableToQueryPorts: If the neutron list ports query fails.
|
|
:raise nova.exception.ComputeHostNotFound: if compute node of the
|
|
instance not found in the db.
|
|
:raise PlacementAPIConnectFailure: if placement API cannot be reached
|
|
:raise AllocationUpdateFailed: if there is either no allocation
|
|
candidate returned from placement for the missing port allocations
|
|
or there are more than one candidates making the healing
|
|
ambiguous.
|
|
:return: A two tuple where the first item is a dict of resources keyed
|
|
by RP uuid to be included in the instance allocation dict. The
|
|
second item is a list of port dicts to be updated in Neutron.
|
|
"""
|
|
# We need to heal port allocations for ports that have resource_request
|
|
# but do not have an RP uuid in the binding:profile.allocation field.
|
|
# We cannot use the instance info_cache to check the binding profile
|
|
# as this code needs to be able to handle ports that were attached
|
|
# before nova in stein started updating the allocation key in the
|
|
# binding:profile.
|
|
# In theory a port can be assigned to an instance without it being
|
|
# bound to any host (e.g. in case of shelve offload) but
|
|
# _heal_allocations_for_instance() already filters out instances that
|
|
# are not on any host.
|
|
ports_to_heal = [
|
|
port for port in self._get_ports(ctxt, instance, neutron)
|
|
if self._has_request_but_no_allocation(port, neutron)]
|
|
|
|
if not ports_to_heal:
|
|
# nothing to do, return early
|
|
return {}, []
|
|
|
|
node_uuid = self._get_compute_node_uuid(
|
|
ctxt, instance, node_cache)
|
|
|
|
# NOTE(gibi): We need to handle both legacy and extended resource
|
|
# request. So we need to handle ports with multiple request groups
|
|
# allocating from multiple providers.
|
|
# The logic what we follow here is pretty similar to the logic
|
|
# implemented in ComputeManager._allocate_port_resource_for_instance
|
|
# for the interface attach case. We just apply it to more then one
|
|
# ports here.
|
|
request_groups_per_port, req_lvl_params = (
|
|
self._get_resource_request_from_ports(ctxt, ports_to_heal)
|
|
)
|
|
# flatten the list of list of groups
|
|
request_groups = [
|
|
group
|
|
for groups in request_groups_per_port.values()
|
|
for group in groups
|
|
]
|
|
|
|
# we can have multiple request groups, it would be enough to restrict
|
|
# only one of them to the compute tree but for symmetry we restrict
|
|
# all of them
|
|
for request_group in request_groups:
|
|
request_group.in_tree = node_uuid
|
|
|
|
# If there are multiple groups then the group_policy is mandatory in
|
|
# the allocation candidate query. We can assume that if this instance
|
|
# booted successfully then we have the policy in the flavor. If there
|
|
# is only one group and therefore no policy then the value of the
|
|
# policy in the allocation candidate query is ignored, so we simply
|
|
# default it here.
|
|
group_policy = instance.flavor.extra_specs.get("group_policy", "none")
|
|
|
|
rr = scheduler_utils.ResourceRequest.from_request_groups(
|
|
request_groups, req_lvl_params, group_policy)
|
|
res = placement.get_allocation_candidates(ctxt, rr)
|
|
# NOTE(gibi): the get_allocation_candidates method has the
|
|
# @safe_connect decorator applied. Such decorator will return None
|
|
# if the connection to Placement is failed. So we raise an exception
|
|
# here. The case when Placement successfully return a response, even
|
|
# if it is a negative or empty response, the method will return a three
|
|
# tuple. That case is handled couple of lines below.
|
|
if not res:
|
|
raise exception.PlacementAPIConnectFailure()
|
|
alloc_reqs, __, __ = res
|
|
|
|
if not alloc_reqs:
|
|
port_ids = [port['id'] for port in ports_to_heal]
|
|
raise exception.AllocationUpdateFailed(
|
|
consumer_uuid=instance.uuid,
|
|
error=f'Placement returned no allocation candidate to fulfill '
|
|
f'the resource request of the port(s) {port_ids}'
|
|
)
|
|
if len(alloc_reqs) > 1:
|
|
# If there is more than one candidates then it is an ambiguous
|
|
# situation that we cannot handle here because selecting the right
|
|
# one might need extra information from the compute node. For
|
|
# example which PCI PF the VF is allocated from and which RP
|
|
# represents that PCI PF in placement.
|
|
# TODO(gibi): One way to get that missing information to resolve
|
|
# ambiguity would be to load up the InstancePciRequest objects and
|
|
# try to use the parent_if_name in their spec to find the proper
|
|
# candidate that allocates for the same port from the PF RP that
|
|
# has the same name.
|
|
port_ids = [port['id'] for port in ports_to_heal]
|
|
raise exception.AllocationUpdateFailed(
|
|
consumer_uuid=instance.uuid,
|
|
error=f'Placement returned more than one possible allocation '
|
|
f'candidates to fulfill the resource request of the '
|
|
f'port(s) {port_ids}. This script does not have enough '
|
|
f'information to select the proper candidate to heal the'
|
|
f'missing allocations. A possible way to heal the'
|
|
f'allocation of this instance is to migrate it to '
|
|
f'another compute as the migration process re-creates '
|
|
f'the full allocation on the target host.'
|
|
)
|
|
|
|
# so we have one candidate, lets use that to get the needed allocations
|
|
# and the provider mapping for the ports' binding profile
|
|
alloc_req = alloc_reqs[0]
|
|
allocations = alloc_req["allocations"]
|
|
provider_mappings = alloc_req["mappings"]
|
|
|
|
for port in ports_to_heal:
|
|
# We also need to record the RPs we are allocated from in the
|
|
# port. This will be sent back to Neutron before the allocation
|
|
# is updated in placement
|
|
profile_allocation = self._get_port_binding_profile_allocation(
|
|
ctxt, neutron, port, request_groups_per_port[port['id']],
|
|
provider_mappings
|
|
)
|
|
binding_profile = neutron_api.get_binding_profile(port)
|
|
binding_profile[constants.ALLOCATION] = profile_allocation
|
|
port[constants.BINDING_PROFILE] = binding_profile
|
|
|
|
output(_(
|
|
"Found a request group : resource provider mapping "
|
|
"%(mapping)s for the port %(port_uuid)s with resource request "
|
|
"%(request)s attached to the instance %(instance_uuid)s") %
|
|
{"mapping": profile_allocation, "port_uuid": port['id'],
|
|
"request": port.get(constants.RESOURCE_REQUEST),
|
|
"instance_uuid": instance.uuid}
|
|
)
|
|
|
|
return allocations, ports_to_heal
|
|
|
|
def _update_ports(self, neutron, ports_to_update, output):
|
|
succeeded = []
|
|
try:
|
|
for port in ports_to_update:
|
|
profile = neutron_api.get_binding_profile(port)
|
|
body = {
|
|
'port': {
|
|
constants.BINDING_PROFILE: profile
|
|
}
|
|
}
|
|
output(
|
|
_('Updating port %(port_uuid)s with attributes '
|
|
'%(attributes)s') %
|
|
{'port_uuid': port['id'], 'attributes': body['port']})
|
|
neutron.update_port(port['id'], body=body)
|
|
succeeded.append(port)
|
|
except neutron_client_exc.NeutronClientException as e:
|
|
output(
|
|
_('Updating port %(port_uuid)s failed: %(error)s') %
|
|
{'port_uuid': port['id'], 'error': str(e)})
|
|
# one of the port updates failed. We need to roll back the updates
|
|
# that succeeded before
|
|
self._rollback_port_updates(neutron, succeeded, output)
|
|
# we failed to heal so we need to stop but we successfully rolled
|
|
# back the partial updates so the admin can retry the healing.
|
|
raise exception.UnableToUpdatePorts(error=str(e))
|
|
|
|
@staticmethod
|
|
def _rollback_port_updates(neutron, ports_to_rollback, output):
|
|
# _update_ports() added the allocation key to these ports, so we need
|
|
# to remove them during the rollback.
|
|
manual_rollback_needed = []
|
|
last_exc = None
|
|
for port in ports_to_rollback:
|
|
profile = neutron_api.get_binding_profile(port)
|
|
profile.pop(constants.ALLOCATION)
|
|
body = {
|
|
'port': {
|
|
constants.BINDING_PROFILE: profile
|
|
}
|
|
}
|
|
try:
|
|
output(_('Rolling back port update for %(port_uuid)s') %
|
|
{'port_uuid': port['id']})
|
|
neutron.update_port(port['id'], body=body)
|
|
except neutron_client_exc.NeutronClientException as e:
|
|
output(
|
|
_('Rolling back update for port %(port_uuid)s failed: '
|
|
'%(error)s') % {'port_uuid': port['id'],
|
|
'error': str(e)})
|
|
# TODO(gibi): We could implement a retry mechanism with
|
|
# back off.
|
|
manual_rollback_needed.append(port['id'])
|
|
last_exc = e
|
|
|
|
if manual_rollback_needed:
|
|
# At least one of the port operation failed so we failed to roll
|
|
# back. There are partial updates in neutron. Human intervention
|
|
# needed.
|
|
raise exception.UnableToRollbackPortUpdates(
|
|
error=str(last_exc),
|
|
port_uuids=manual_rollback_needed)
|
|
|
|
def _heal_missing_alloc(self, ctxt, instance, node_cache):
|
|
node_uuid = self._get_compute_node_uuid(
|
|
ctxt, instance, node_cache)
|
|
|
|
# Now get the resource allocations for the instance based
|
|
# on its embedded flavor.
|
|
resources = scheduler_utils.resources_from_flavor(
|
|
instance, instance.flavor)
|
|
|
|
payload = {
|
|
'allocations': {
|
|
node_uuid: {'resources': resources},
|
|
},
|
|
'project_id': instance.project_id,
|
|
'user_id': instance.user_id,
|
|
'consumer_generation': None
|
|
}
|
|
return payload
|
|
|
|
def _heal_missing_project_and_user_id(self, allocations, instance):
|
|
allocations['project_id'] = instance.project_id
|
|
allocations['user_id'] = instance.user_id
|
|
return allocations
|
|
|
|
@staticmethod
|
|
def ensure_instance_has_no_vgpu_request(instance):
|
|
if instance.flavor.extra_specs.get("resources:VGPU"):
|
|
raise exception.HealvGPUAllocationNotSupported(
|
|
instance_uuid=instance.uuid)
|
|
|
|
@staticmethod
|
|
def ensure_instance_has_no_cyborg_device_profile_request(instance):
|
|
if instance.flavor.extra_specs.get("accel:device_profile"):
|
|
raise exception.HealDeviceProfileAllocationNotSupported(
|
|
instance_uuid=instance.uuid)
|
|
|
|
def _heal_allocations_for_instance(self, ctxt, instance, node_cache,
|
|
output, placement, dry_run,
|
|
heal_port_allocations, neutron,
|
|
force):
|
|
"""Checks the given instance to see if it needs allocation healing
|
|
|
|
:param ctxt: cell-targeted nova.context.RequestContext
|
|
:param instance: the instance to check for allocation healing
|
|
:param node_cache: dict of Instance.node keys to ComputeNode.uuid
|
|
values; this cache is updated if a new node is processed.
|
|
:param output: function that takes a single message for verbose output
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param dry_run: Process instances and print output but do not commit
|
|
any changes.
|
|
:param heal_port_allocations: True if healing port allocation is
|
|
requested, False otherwise.
|
|
:param neutron: nova.network.neutron.ClientWrapper to
|
|
communicate with Neutron
|
|
:param force: True if force healing is requested for particular
|
|
instance, False otherwise.
|
|
:return: True if allocations were created or updated for the instance,
|
|
None if nothing needed to be done
|
|
:raises: nova.exception.ComputeHostNotFound if a compute node for a
|
|
given instance cannot be found
|
|
:raises: AllocationCreateFailed if unable to create allocations for
|
|
a given instance against a given compute node resource provider
|
|
:raises: AllocationUpdateFailed if unable to update allocations for
|
|
a given instance with consumer project/user information
|
|
:raise UnableToQueryPorts: If the neutron list ports query fails.
|
|
:raise PlacementAPIConnectFailure: if placement API cannot be reached
|
|
:raise UnableToUpdatePorts: if a port update failed in neutron but any
|
|
partial update was rolled back successfully.
|
|
:raise UnableToRollbackPortUpdates: if a port update failed in neutron
|
|
and the rollback of the partial updates also failed.
|
|
"""
|
|
if instance.task_state is not None:
|
|
output(_('Instance %(instance)s is undergoing a task '
|
|
'state transition: %(task_state)s') %
|
|
{'instance': instance.uuid,
|
|
'task_state': instance.task_state})
|
|
return
|
|
|
|
if instance.node is None:
|
|
output(_('Instance %s is not on a host.') % instance.uuid)
|
|
return
|
|
|
|
self.ensure_instance_has_no_vgpu_request(instance)
|
|
self.ensure_instance_has_no_cyborg_device_profile_request(instance)
|
|
|
|
try:
|
|
allocations = placement.get_allocs_for_consumer(
|
|
ctxt, instance.uuid)
|
|
except (ks_exc.ClientException,
|
|
exception.ConsumerAllocationRetrievalFailed) as e:
|
|
raise exception.AllocationUpdateFailed(
|
|
consumer_uuid=instance.uuid,
|
|
error=_("Allocation retrieval failed: %s") % e)
|
|
|
|
need_healing = False
|
|
|
|
# Placement response can have an empty {'allocations': {}} in it if
|
|
# there are no allocations for the instance
|
|
if not allocations.get('allocations'):
|
|
# This instance doesn't have allocations
|
|
need_healing = _CREATE
|
|
allocations = self._heal_missing_alloc(ctxt, instance, node_cache)
|
|
|
|
if (allocations.get('project_id') != instance.project_id or
|
|
allocations.get('user_id') != instance.user_id):
|
|
# We have an instance with allocations but not the correct
|
|
# project_id/user_id, so we want to update the allocations
|
|
# and re-put them. We don't use put_allocations here
|
|
# because we don't want to mess up shared or nested
|
|
# provider allocations.
|
|
need_healing = _UPDATE
|
|
allocations = self._heal_missing_project_and_user_id(
|
|
allocations, instance)
|
|
|
|
if force:
|
|
output(_('Force flag passed for instance %s') % instance.uuid)
|
|
need_healing = _UPDATE
|
|
# get default allocations
|
|
alloc = self._heal_missing_alloc(ctxt, instance, node_cache)
|
|
# set consumer generation of existing allocations
|
|
alloc["consumer_generation"] = allocations["consumer_generation"]
|
|
# set allocations
|
|
allocations = alloc
|
|
|
|
if heal_port_allocations:
|
|
to_heal = self._get_port_allocations_to_heal(
|
|
ctxt, instance, node_cache, placement, neutron, output)
|
|
port_allocations, ports_to_update = to_heal
|
|
else:
|
|
port_allocations, ports_to_update = {}, []
|
|
|
|
if port_allocations:
|
|
need_healing = need_healing or _UPDATE
|
|
# Merge in any missing port allocations
|
|
allocations['allocations'] = self._merge_allocations(
|
|
allocations['allocations'], port_allocations)
|
|
|
|
if need_healing:
|
|
if dry_run:
|
|
# json dump the allocation dict as it contains nested default
|
|
# dicts that is pretty hard to read in the verbose output
|
|
alloc = jsonutils.dumps(allocations)
|
|
if need_healing == _CREATE:
|
|
output(_('[dry-run] Create allocations for instance '
|
|
'%(instance)s: %(allocations)s') %
|
|
{'instance': instance.uuid,
|
|
'allocations': alloc})
|
|
elif need_healing == _UPDATE:
|
|
output(_('[dry-run] Update allocations for instance '
|
|
'%(instance)s: %(allocations)s') %
|
|
{'instance': instance.uuid,
|
|
'allocations': alloc})
|
|
else:
|
|
# First update ports in neutron. If any of those operations
|
|
# fail, then roll back the successful part of it and fail the
|
|
# healing. We do this first because rolling back the port
|
|
# updates is more straight-forward than rolling back allocation
|
|
# changes.
|
|
self._update_ports(neutron, ports_to_update, output)
|
|
|
|
# Now that neutron update succeeded we can try to update
|
|
# placement. If it fails we need to rollback every neutron port
|
|
# update done before.
|
|
resp = placement.put_allocations(ctxt, instance.uuid,
|
|
allocations)
|
|
if resp:
|
|
if need_healing == _CREATE:
|
|
output(_('Successfully created allocations for '
|
|
'instance %(instance)s.') %
|
|
{'instance': instance.uuid})
|
|
elif need_healing == _UPDATE:
|
|
output(_('Successfully updated allocations for '
|
|
'instance %(instance)s.') %
|
|
{'instance': instance.uuid})
|
|
return True
|
|
else:
|
|
# Rollback every neutron update. If we succeed to
|
|
# roll back then it is safe to stop here and let the admin
|
|
# retry. If the rollback fails then
|
|
# _rollback_port_updates() will raise another exception
|
|
# that instructs the operator how to clean up manually
|
|
# before the healing can be retried
|
|
self._rollback_port_updates(
|
|
neutron, ports_to_update, output)
|
|
raise exception.AllocationUpdateFailed(
|
|
consumer_uuid=instance.uuid, error='')
|
|
else:
|
|
output(_('The allocation of instance %s is up-to-date. '
|
|
'Nothing to be healed.') % instance.uuid)
|
|
return
|
|
|
|
def _heal_instances_in_cell(self, ctxt, max_count, unlimited, output,
|
|
placement, dry_run, instance_uuid,
|
|
heal_port_allocations, neutron,
|
|
force):
|
|
"""Checks for instances to heal in a given cell.
|
|
|
|
:param ctxt: cell-targeted nova.context.RequestContext
|
|
:param max_count: batch size (limit per instance query)
|
|
:param unlimited: True if all instances in the cell should be
|
|
processed, else False to just process $max_count instances
|
|
:param output: function that takes a single message for verbose output
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param dry_run: Process instances and print output but do not commit
|
|
any changes.
|
|
:param instance_uuid: UUID of a specific instance to process.
|
|
:param heal_port_allocations: True if healing port allocation is
|
|
requested, False otherwise.
|
|
:param neutron: nova.network.neutron.ClientWrapper to
|
|
communicate with Neutron
|
|
:param force: True if force healing is requested for particular
|
|
instance, False otherwise.
|
|
:return: Number of instances that had allocations created.
|
|
:raises: nova.exception.ComputeHostNotFound if a compute node for a
|
|
given instance cannot be found
|
|
:raises: AllocationCreateFailed if unable to create allocations for
|
|
a given instance against a given compute node resource provider
|
|
:raises: AllocationUpdateFailed if unable to update allocations for
|
|
a given instance with consumer project/user information
|
|
:raise UnableToQueryPorts: If the neutron list ports query fails.
|
|
:raise PlacementAPIConnectFailure: if placement API cannot be reached
|
|
:raise UnableToUpdatePorts: if a port update failed in neutron but any
|
|
partial update was rolled back successfully.
|
|
:raise UnableToRollbackPortUpdates: if a port update failed in neutron
|
|
and the rollback of the partial updates also failed.
|
|
"""
|
|
# Keep a cache of instance.node to compute node resource provider UUID.
|
|
# This will save some queries for non-ironic instances to the
|
|
# compute_nodes table.
|
|
node_cache = {}
|
|
# Track the total number of instances that have allocations created
|
|
# for them in this cell. We return when num_processed equals max_count
|
|
# and unlimited=True or we exhaust the number of instances to process
|
|
# in this cell.
|
|
num_processed = 0
|
|
# Get all instances from this cell which have a host and are not
|
|
# undergoing a task state transition. Go from oldest to newest.
|
|
# NOTE(mriedem): Unfortunately we don't have a marker to use
|
|
# between runs where the user is specifying --max-count.
|
|
# TODO(mriedem): Store a marker in system_metadata so we can
|
|
# automatically pick up where we left off without the user having
|
|
# to pass it in (if unlimited is False).
|
|
filters = {'deleted': False}
|
|
if instance_uuid:
|
|
filters['uuid'] = instance_uuid
|
|
instances = objects.InstanceList.get_by_filters(
|
|
ctxt, filters=filters, sort_key='created_at', sort_dir='asc',
|
|
limit=max_count, expected_attrs=['flavor'])
|
|
while instances:
|
|
output(_('Found %s candidate instances.') % len(instances))
|
|
# For each instance in this list, we need to see if it has
|
|
# allocations in placement and if so, assume it's correct and
|
|
# continue.
|
|
for instance in instances:
|
|
if self._heal_allocations_for_instance(
|
|
ctxt, instance, node_cache, output, placement,
|
|
dry_run, heal_port_allocations, neutron, force):
|
|
num_processed += 1
|
|
|
|
# Make sure we don't go over the max count. Note that we
|
|
# don't include instances that already have allocations in the
|
|
# max_count number, only the number of instances that have
|
|
# successfully created allocations.
|
|
# If a specific instance was requested we return here as well.
|
|
if (not unlimited and num_processed == max_count) or instance_uuid:
|
|
return num_processed
|
|
|
|
# Use a marker to get the next page of instances in this cell.
|
|
# Note that InstanceList doesn't support slice notation.
|
|
marker = instances[len(instances) - 1].uuid
|
|
instances = objects.InstanceList.get_by_filters(
|
|
ctxt, filters=filters, sort_key='created_at', sort_dir='asc',
|
|
limit=max_count, marker=marker, expected_attrs=['flavor'])
|
|
|
|
return num_processed
|
|
|
|
@action_description(
|
|
_("Iterates over non-cell0 cells looking for instances which do "
|
|
"not have allocations in the Placement service, or have incomplete "
|
|
"consumer project_id/user_id values in existing allocations or "
|
|
"missing allocations for ports having resource request, and "
|
|
"which are not undergoing a task state transition. For each "
|
|
"instance found, allocations are created (or updated) against the "
|
|
"compute node resource provider for that instance based on the "
|
|
"flavor associated with the instance. This command requires that "
|
|
"the [api_database]/connection and [placement] configuration "
|
|
"options are set."))
|
|
@args('--max-count', metavar='<max_count>', dest='max_count',
|
|
help='Maximum number of instances to process. If not specified, all '
|
|
'instances in each cell will be mapped in batches of 50. '
|
|
'If you have a large number of instances, consider specifying '
|
|
'a custom value and run the command until it exits with '
|
|
'0 or 4.')
|
|
@args('--verbose', action='store_true', dest='verbose', default=False,
|
|
help='Provide verbose output during execution.')
|
|
@args('--dry-run', action='store_true', dest='dry_run', default=False,
|
|
help='Runs the command and prints output but does not commit any '
|
|
'changes. The return code should be 4.')
|
|
@args('--instance', metavar='<instance_uuid>', dest='instance_uuid',
|
|
help='UUID of a specific instance to process. If specified '
|
|
'--max-count has no effect. '
|
|
'The --cell and --instance options are mutually exclusive.')
|
|
@args('--skip-port-allocations', action='store_true',
|
|
dest='skip_port_allocations', default=False,
|
|
help='Skip the healing of the resource allocations of bound ports. '
|
|
'E.g. healing bandwidth resource allocation for ports having '
|
|
'minimum QoS policy rules attached. If your deployment does '
|
|
'not use such a feature then the performance impact of '
|
|
'querying neutron ports for each instance can be avoided with '
|
|
'this flag.')
|
|
@args('--cell', metavar='<cell_uuid>', dest='cell_uuid',
|
|
help='Heal allocations within a specific cell. '
|
|
'The --cell and --instance options are mutually exclusive.')
|
|
@args('--force', action='store_true', dest='force', default=False,
|
|
help='Force heal allocations. Requires the --instance argument.')
|
|
def heal_allocations(self, max_count=None, verbose=False, dry_run=False,
|
|
instance_uuid=None, skip_port_allocations=False,
|
|
cell_uuid=None, force=False):
|
|
"""Heals instance allocations in the Placement service
|
|
|
|
Return codes:
|
|
|
|
* 0: Command completed successfully and allocations were created.
|
|
* 1: --max-count was reached and there are more instances to process.
|
|
* 2: Unable to find a compute node record for a given instance.
|
|
* 3: Unable to create (or update) allocations for an instance against
|
|
its compute node resource provider.
|
|
* 4: Command completed successfully but no allocations were created.
|
|
* 5: Unable to query ports from neutron
|
|
* 6: Unable to update ports in neutron
|
|
* 7: Cannot roll back neutron port updates. Manual steps needed.
|
|
* 8: Cannot heal instance with vGPU or Cyborg resource request
|
|
* 127: Invalid input.
|
|
"""
|
|
# NOTE(mriedem): Thoughts on ways to expand this:
|
|
# - allow filtering on enabled/disabled cells
|
|
# - add a force option to force allocations for instances which have
|
|
# task_state is not None (would get complicated during a migration);
|
|
# for example, this could cleanup ironic instances that have
|
|
# allocations on VCPU/MEMORY_MB/DISK_GB but are now using a custom
|
|
# resource class
|
|
# - deal with nested resource providers?
|
|
|
|
heal_port_allocations = not skip_port_allocations
|
|
|
|
output = lambda msg: None
|
|
if verbose:
|
|
output = lambda msg: print(msg)
|
|
|
|
# If user has provided both cell and instance
|
|
# Throw an error
|
|
if instance_uuid and cell_uuid:
|
|
print(_('The --cell and --instance options '
|
|
'are mutually exclusive.'))
|
|
return 127
|
|
|
|
if force and not instance_uuid:
|
|
print(_('The --instance flag is required '
|
|
'when using --force flag.'))
|
|
return 127
|
|
|
|
# TODO(mriedem): Rather than --max-count being both a total and batch
|
|
# count, should we have separate options to be specific, i.e. --total
|
|
# and --batch-size? Then --batch-size defaults to 50 and --total
|
|
# defaults to None to mean unlimited.
|
|
if instance_uuid:
|
|
max_count = 1
|
|
unlimited = False
|
|
elif max_count is not None:
|
|
try:
|
|
max_count = int(max_count)
|
|
except ValueError:
|
|
max_count = -1
|
|
unlimited = False
|
|
if max_count < 1:
|
|
print(_('Must supply a positive integer for --max-count.'))
|
|
return 127
|
|
else:
|
|
max_count = 50
|
|
unlimited = True
|
|
output(_('Running batches of %i until complete') % max_count)
|
|
|
|
ctxt = context.get_admin_context()
|
|
# If we are going to process a specific instance, just get the cell
|
|
# it is in up front.
|
|
if instance_uuid:
|
|
try:
|
|
im = objects.InstanceMapping.get_by_instance_uuid(
|
|
ctxt, instance_uuid)
|
|
cells = objects.CellMappingList(objects=[im.cell_mapping])
|
|
except exception.InstanceMappingNotFound:
|
|
print('Unable to find cell for instance %s, is it mapped? Try '
|
|
'running "nova-manage cell_v2 verify_instance" or '
|
|
'"nova-manage cell_v2 map_instances".' %
|
|
instance_uuid)
|
|
return 127
|
|
elif cell_uuid:
|
|
try:
|
|
# validate cell_uuid
|
|
cell = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
# create CellMappingList
|
|
cells = objects.CellMappingList(objects=[cell])
|
|
except exception.CellMappingNotFound:
|
|
print(_('Cell with uuid %s was not found.') % cell_uuid)
|
|
return 127
|
|
else:
|
|
cells = objects.CellMappingList.get_all(ctxt)
|
|
if not cells:
|
|
output(_('No cells to process.'))
|
|
return 4
|
|
|
|
placement = report.report_client_singleton()
|
|
|
|
neutron = None
|
|
if heal_port_allocations:
|
|
neutron = neutron_api.get_client(ctxt, admin=True)
|
|
|
|
num_processed = 0
|
|
# TODO(mriedem): Use context.scatter_gather_skip_cell0.
|
|
for cell in cells:
|
|
# Skip cell0 since that is where instances go that do not get
|
|
# scheduled and hence would not have allocations against a host.
|
|
if cell.uuid == objects.CellMapping.CELL0_UUID:
|
|
continue
|
|
output(_('Looking for instances in cell: %s') % cell.identity)
|
|
|
|
limit_per_cell = max_count
|
|
if not unlimited:
|
|
# Adjust the limit for the next cell. For example, if the user
|
|
# only wants to process a total of 100 instances and we did
|
|
# 75 in cell1, then we only need 25 more from cell2 and so on.
|
|
limit_per_cell = max_count - num_processed
|
|
|
|
with context.target_cell(ctxt, cell) as cctxt:
|
|
try:
|
|
num_processed += self._heal_instances_in_cell(
|
|
cctxt, limit_per_cell, unlimited, output, placement,
|
|
dry_run, instance_uuid, heal_port_allocations, neutron,
|
|
force)
|
|
except exception.ComputeHostNotFound as e:
|
|
print(e.format_message())
|
|
return 2
|
|
except (
|
|
exception.AllocationCreateFailed,
|
|
exception.AllocationUpdateFailed,
|
|
exception.PlacementAPIConnectFailure
|
|
) as e:
|
|
print(e.format_message())
|
|
return 3
|
|
except exception.UnableToQueryPorts as e:
|
|
print(e.format_message())
|
|
return 5
|
|
except exception.UnableToUpdatePorts as e:
|
|
print(e.format_message())
|
|
return 6
|
|
except exception.UnableToRollbackPortUpdates as e:
|
|
print(e.format_message())
|
|
return 7
|
|
except (
|
|
exception.HealvGPUAllocationNotSupported,
|
|
exception.HealDeviceProfileAllocationNotSupported,
|
|
) as e:
|
|
print(e.format_message())
|
|
return 8
|
|
|
|
# Make sure we don't go over the max count. Note that we
|
|
# don't include instances that already have allocations in the
|
|
# max_count number, only the number of instances that have
|
|
# successfully created allocations.
|
|
# If a specific instance was provided then we'll just exit
|
|
# the loop and process it below (either return 4 or 0).
|
|
if num_processed == max_count and not instance_uuid:
|
|
output(_('Max count reached. Processed %s instances.')
|
|
% num_processed)
|
|
return 1
|
|
|
|
output(_('Processed %s instances.') % num_processed)
|
|
if not num_processed:
|
|
return 4
|
|
return 0
|
|
|
|
@staticmethod
|
|
def _get_rp_uuid_for_host(ctxt, host):
|
|
"""Finds the resource provider (compute node) UUID for the given host.
|
|
|
|
:param ctxt: cell-targeted nova RequestContext
|
|
:param host: name of the compute host
|
|
:returns: The UUID of the resource provider (compute node) for the host
|
|
:raises: nova.exception.HostMappingNotFound if no host_mappings record
|
|
is found for the host; indicates
|
|
"nova-manage cell_v2 discover_hosts" needs to be run on the cell.
|
|
:raises: nova.exception.ComputeHostNotFound if no compute_nodes record
|
|
is found in the cell database for the host; indicates the
|
|
nova-compute service on that host might need to be restarted.
|
|
:raises: nova.exception.TooManyComputesForHost if there are more than
|
|
one compute_nodes records in the cell database for the host which
|
|
is only possible (under normal circumstances) for ironic hosts but
|
|
ironic hosts are not currently supported with host aggregates so
|
|
if more than one compute node is found for the host, it is
|
|
considered an error which the operator will need to resolve
|
|
manually.
|
|
"""
|
|
# Get the host mapping to determine which cell it's in.
|
|
hm = objects.HostMapping.get_by_host(ctxt, host)
|
|
# Now get the compute node record for the host from the cell.
|
|
with context.target_cell(ctxt, hm.cell_mapping) as cctxt:
|
|
# There should really only be one, since only ironic
|
|
# hosts can have multiple nodes, and you can't have
|
|
# ironic hosts in aggregates for that reason. If we
|
|
# find more than one, it's an error.
|
|
nodes = objects.ComputeNodeList.get_all_by_host(
|
|
cctxt, host)
|
|
|
|
if len(nodes) > 1:
|
|
# This shouldn't happen, so we need to bail since we
|
|
# won't know which node to use.
|
|
raise exception.TooManyComputesForHost(
|
|
num_computes=len(nodes), host=host)
|
|
return nodes[0].uuid
|
|
|
|
@action_description(
|
|
_("Mirrors compute host aggregates to resource provider aggregates "
|
|
"in the Placement service. Requires the [api_database] and "
|
|
"[placement] sections of the nova configuration file to be "
|
|
"populated."))
|
|
@args('--verbose', action='store_true', dest='verbose', default=False,
|
|
help='Provide verbose output during execution.')
|
|
# TODO(mriedem): Add an option for the 'remove aggregate' behavior.
|
|
# We know that we want to mirror hosts aggregate membership to
|
|
# placement, but regarding removal, what if the operator or some external
|
|
# tool added the resource provider to an aggregate but there is no matching
|
|
# host aggregate, e.g. ironic nodes or shared storage provider
|
|
# relationships?
|
|
# TODO(mriedem): Probably want an option to pass a specific host instead of
|
|
# doing all of them.
|
|
def sync_aggregates(self, verbose=False):
|
|
"""Synchronizes nova host aggregates with resource provider aggregates
|
|
|
|
Adds nodes to missing provider aggregates in Placement.
|
|
|
|
NOTE: Depending on the size of your deployment and the number of
|
|
compute hosts in aggregates, this command could cause a non-negligible
|
|
amount of traffic to the placement service and therefore is
|
|
recommended to be run during maintenance windows.
|
|
|
|
Return codes:
|
|
|
|
* 0: Successful run
|
|
* 1: A host was found with more than one matching compute node record
|
|
* 2: An unexpected error occurred while working with the placement API
|
|
* 3: Failed updating provider aggregates in placement
|
|
* 4: Host mappings not found for one or more host aggregate members
|
|
* 5: Compute node records not found for one or more hosts
|
|
* 6: Resource provider not found by uuid for a given host
|
|
"""
|
|
# Start by getting all host aggregates.
|
|
ctxt = context.get_admin_context()
|
|
aggregate_api = api.AggregateAPI()
|
|
placement = aggregate_api.placement_client
|
|
aggregates = aggregate_api.get_aggregate_list(ctxt)
|
|
# Now we're going to loop over the existing compute hosts in aggregates
|
|
# and check to see if their corresponding resource provider, found via
|
|
# the host's compute node uuid, are in the same aggregate. If not, we
|
|
# add the resource provider to the aggregate in Placement.
|
|
output = lambda msg: None
|
|
if verbose:
|
|
output = lambda msg: print(msg)
|
|
output(_('Filling in missing placement aggregates'))
|
|
# Since hosts can be in more than one aggregate, keep track of the host
|
|
# to its corresponding resource provider uuid to avoid redundant
|
|
# lookups.
|
|
host_to_rp_uuid = {}
|
|
unmapped_hosts = set() # keep track of any missing host mappings
|
|
computes_not_found = set() # keep track of missing nodes
|
|
providers_not_found = {} # map of hostname to missing provider uuid
|
|
for aggregate in aggregates:
|
|
output(_('Processing aggregate: %s') % aggregate.name)
|
|
for host in aggregate.hosts:
|
|
output(_('Processing host: %s') % host)
|
|
rp_uuid = host_to_rp_uuid.get(host)
|
|
if not rp_uuid:
|
|
try:
|
|
rp_uuid = self._get_rp_uuid_for_host(ctxt, host)
|
|
host_to_rp_uuid[host] = rp_uuid
|
|
except exception.HostMappingNotFound:
|
|
# Don't fail on this now, we can dump it at the end.
|
|
unmapped_hosts.add(host)
|
|
continue
|
|
except exception.ComputeHostNotFound:
|
|
# Don't fail on this now, we can dump it at the end.
|
|
computes_not_found.add(host)
|
|
continue
|
|
except exception.TooManyComputesForHost as e:
|
|
# TODO(mriedem): Should we treat this like the other
|
|
# errors and not fail immediately but dump at the end?
|
|
print(e.format_message())
|
|
return 1
|
|
|
|
# We've got our compute node record, so now we can ensure that
|
|
# the matching resource provider, found via compute node uuid,
|
|
# is in the same aggregate in placement, found via aggregate
|
|
# uuid.
|
|
try:
|
|
placement.aggregate_add_host(ctxt, aggregate.uuid,
|
|
rp_uuid=rp_uuid)
|
|
output(_('Successfully added host (%(host)s) and '
|
|
'provider (%(provider)s) to aggregate '
|
|
'(%(aggregate)s).') %
|
|
{'host': host, 'provider': rp_uuid,
|
|
'aggregate': aggregate.uuid})
|
|
except exception.ResourceProviderNotFound:
|
|
# The resource provider wasn't found. Store this for later.
|
|
providers_not_found[host] = rp_uuid
|
|
except exception.ResourceProviderAggregateRetrievalFailed as e:
|
|
print(e.message)
|
|
return 2
|
|
except exception.NovaException as e:
|
|
# The exception message is too generic in this case
|
|
print(_('Failed updating provider aggregates for '
|
|
'host (%(host)s), provider (%(provider)s) '
|
|
'and aggregate (%(aggregate)s). Error: '
|
|
'%(error)s') %
|
|
{'host': host, 'provider': rp_uuid,
|
|
'aggregate': aggregate.uuid,
|
|
'error': e.message})
|
|
return 3
|
|
|
|
# Now do our error handling. Note that there is no real priority on
|
|
# the error code we return. We want to dump all of the issues we hit
|
|
# so the operator can fix them before re-running the command, but
|
|
# whether we return 4 or 5 or 6 doesn't matter.
|
|
return_code = 0
|
|
if unmapped_hosts:
|
|
print(_('The following hosts were found in nova host aggregates '
|
|
'but no host mappings were found in the nova API DB. Run '
|
|
'"nova-manage cell_v2 discover_hosts" and then retry. '
|
|
'Missing: %s') % ','.join(unmapped_hosts))
|
|
return_code = 4
|
|
|
|
if computes_not_found:
|
|
print(_('Unable to find matching compute_nodes record entries in '
|
|
'the cell database for the following hosts; does the '
|
|
'nova-compute service on each host need to be restarted? '
|
|
'Missing: %s') % ','.join(computes_not_found))
|
|
return_code = 5
|
|
|
|
if providers_not_found:
|
|
print(_('Unable to find matching resource provider record in '
|
|
'placement with uuid for the following hosts: %s. Try '
|
|
'restarting the nova-compute service on each host and '
|
|
'then retry.') %
|
|
','.join('(%s=%s)' % (host, providers_not_found[host])
|
|
for host in sorted(providers_not_found.keys())))
|
|
return_code = 6
|
|
|
|
return return_code
|
|
|
|
def _get_instances_and_current_migrations(self, ctxt, cn_uuid):
|
|
if self.cn_uuid_mapping.get(cn_uuid):
|
|
cell_uuid, cn_host, cn_node = self.cn_uuid_mapping[cn_uuid]
|
|
else:
|
|
# We need to find the compute node record from all cells.
|
|
results = context.scatter_gather_skip_cell0(
|
|
ctxt, objects.ComputeNode.get_by_uuid, cn_uuid)
|
|
for result_cell_uuid, result in results.items():
|
|
if not context.is_cell_failure_sentinel(result):
|
|
cn = result
|
|
cell_uuid = result_cell_uuid
|
|
break
|
|
else:
|
|
return False
|
|
cn_host, cn_node = (cn.host, cn.hypervisor_hostname)
|
|
self.cn_uuid_mapping[cn_uuid] = (cell_uuid, cn_host, cn_node)
|
|
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
|
|
|
|
# Get all the active instances from this compute node
|
|
if self.instances_mapping.get(cn_uuid):
|
|
inst_uuids = self.instances_mapping[cn_uuid]
|
|
else:
|
|
# Get the instance list record from the cell.
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
instances = objects.InstanceList.get_by_host_and_node(
|
|
cctxt, cn_host, cn_node, expected_attrs=[])
|
|
inst_uuids = [instance.uuid for instance in instances]
|
|
self.instances_mapping[cn_uuid] = inst_uuids
|
|
|
|
# Get all *active* migrations for this compute node
|
|
# NOTE(sbauza): Since migrations are transient, it's better to not
|
|
# cache the results as they could be stale
|
|
with context.target_cell(ctxt, cell_mapping) as cctxt:
|
|
migs = objects.MigrationList.get_in_progress_by_host_and_node(
|
|
cctxt, cn_host, cn_node)
|
|
mig_uuids = [migration.uuid for migration in migs]
|
|
|
|
return (inst_uuids, mig_uuids)
|
|
|
|
def _delete_allocations_from_consumer(self, ctxt, placement, provider,
|
|
consumer_uuid, consumer_type):
|
|
"""Deletes allocations from a resource provider with consumer UUID.
|
|
|
|
:param ctxt: nova.context.RequestContext
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param provider: Resource Provider to look at.
|
|
:param consumer_uuid: the consumer UUID having allocations.
|
|
:param consumer_type: the type of consumer,
|
|
either 'instance' or 'migration'
|
|
:returns: bool whether the allocations were deleted.
|
|
"""
|
|
# We need to be careful and only remove the allocations
|
|
# against this specific RP or we would delete the
|
|
# whole instance usage and then it would require some
|
|
# healing.
|
|
# TODO(sbauza): Remove this extra check once placement
|
|
# supports querying allocation delete on both
|
|
# consumer and resource provider parameters.
|
|
allocations = placement.get_allocs_for_consumer(
|
|
ctxt, consumer_uuid)
|
|
if len(allocations['allocations']) > 1:
|
|
# This consumer has resources spread among multiple RPs (think
|
|
# nested or shared for example)
|
|
# We then need to just update the usage to remove
|
|
# the orphaned resources on the specific RP
|
|
del allocations['allocations'][provider['uuid']]
|
|
try:
|
|
placement.put_allocations(
|
|
ctxt, consumer_uuid, allocations)
|
|
except exception.AllocationUpdateFailed:
|
|
return False
|
|
|
|
else:
|
|
try:
|
|
placement.delete_allocation_for_instance(
|
|
ctxt, consumer_uuid, consumer_type, force=True)
|
|
except exception.AllocationDeleteFailed:
|
|
return False
|
|
return True
|
|
|
|
def _check_orphaned_allocations_for_provider(self, ctxt, placement,
|
|
output, provider,
|
|
delete):
|
|
"""Finds orphaned allocations for a specific resource provider.
|
|
|
|
:param ctxt: nova.context.RequestContext
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param output: function that takes a single message for verbose output
|
|
:param provider: Resource Provider to look at.
|
|
:param delete: deletes the found orphaned allocations.
|
|
:return: a tuple (<number of orphaned allocs>, <number of faults>)
|
|
"""
|
|
num_processed = 0
|
|
faults = 0
|
|
|
|
# TODO(sbauza): Are we sure we have all Nova RCs ?
|
|
# FIXME(sbauza): Possibly use consumer types once Placement API
|
|
# supports them.
|
|
# NOTE(sbauza): We check allocations having *any* below RC, not having
|
|
# *all* of them.
|
|
NOVA_RCS = [orc.VCPU, orc.MEMORY_MB, orc.DISK_GB, orc.VGPU,
|
|
orc.NET_BW_EGR_KILOBIT_PER_SEC,
|
|
orc.NET_BW_IGR_KILOBIT_PER_SEC,
|
|
orc.PCPU, orc.MEM_ENCRYPTION_CONTEXT]
|
|
|
|
# Since the RP can be a child RP, we need to get the root RP as it's
|
|
# the compute node UUID
|
|
# NOTE(sbauza): In case Placement doesn't support 1.14 microversion,
|
|
# that means we don't have nested RPs.
|
|
# Since we ask for microversion 1.14, all RPs have a root RP UUID.
|
|
cn_uuid = provider.get("root_provider_uuid")
|
|
# Now get all the existing instances and active migrations for this
|
|
# compute node
|
|
result = self._get_instances_and_current_migrations(ctxt, cn_uuid)
|
|
if result is False:
|
|
# We don't want to hard stop here because the compute service could
|
|
# have disappear while we could still have orphaned allocations.
|
|
output(_('The compute node for UUID %s can not be '
|
|
'found') % cn_uuid)
|
|
inst_uuids, mig_uuids = result or ([], [])
|
|
try:
|
|
pallocs = placement.get_allocations_for_resource_provider(
|
|
ctxt, provider['uuid'])
|
|
except exception.ResourceProviderAllocationRetrievalFailed:
|
|
print(_('Not able to find allocations for resource '
|
|
'provider %s.') % provider['uuid'])
|
|
raise
|
|
|
|
# Verify every allocations for each consumer UUID
|
|
for consumer_uuid, consumer_resources in pallocs.allocations.items():
|
|
consumer_allocs = consumer_resources['resources']
|
|
if any(rc in NOVA_RCS
|
|
for rc in consumer_allocs):
|
|
# We reset the consumer type for each allocation
|
|
consumer_type = None
|
|
# This is an allocation for Nova resources
|
|
# We need to guess whether the instance was deleted
|
|
# or if the instance is currently migrating
|
|
if not (consumer_uuid in inst_uuids or
|
|
consumer_uuid in mig_uuids):
|
|
# By default we suspect the orphaned allocation was for a
|
|
# migration...
|
|
consumer_type = 'migration'
|
|
if consumer_uuid not in inst_uuids:
|
|
# ... but if we can't find it either for an instance,
|
|
# that means it was for this.
|
|
consumer_type = 'instance'
|
|
if consumer_type is not None:
|
|
output(_('Allocations were set against consumer UUID '
|
|
'%(consumer_uuid)s but no existing instances or '
|
|
'active migrations are related. ')
|
|
% {'consumer_uuid': consumer_uuid})
|
|
if delete:
|
|
deleted = self._delete_allocations_from_consumer(
|
|
ctxt, placement, provider, consumer_uuid,
|
|
consumer_type)
|
|
if not deleted:
|
|
print(_('Not able to delete allocations '
|
|
'for consumer UUID %s')
|
|
% consumer_uuid)
|
|
faults += 1
|
|
continue
|
|
output(_('Deleted allocations for consumer UUID '
|
|
'%(consumer_uuid)s on Resource Provider '
|
|
'%(rp)s: %(allocations)s')
|
|
% {'consumer_uuid': consumer_uuid,
|
|
'rp': provider['uuid'],
|
|
'allocations': consumer_allocs})
|
|
else:
|
|
output(_('Allocations for consumer UUID '
|
|
'%(consumer_uuid)s on Resource Provider '
|
|
'%(rp)s can be deleted: '
|
|
'%(allocations)s')
|
|
% {'consumer_uuid': consumer_uuid,
|
|
'rp': provider['uuid'],
|
|
'allocations': consumer_allocs})
|
|
num_processed += 1
|
|
return (num_processed, faults)
|
|
|
|
# TODO(sbauza): Move this to the scheduler report client ?
|
|
def _get_resource_provider(self, context, placement, uuid):
|
|
"""Returns a single Resource Provider by its UUID.
|
|
|
|
:param context: The nova.context.RequestContext auth context
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param uuid: A specific Resource Provider UUID
|
|
:return: the existing resource provider.
|
|
:raises: keystoneauth1.exceptions.base.ClientException on failure to
|
|
communicate with the placement API
|
|
"""
|
|
|
|
resource_providers = self._get_resource_providers(context, placement,
|
|
uuid=uuid)
|
|
if not resource_providers:
|
|
# The endpoint never returns a 404, it rather returns an empty list
|
|
raise exception.ResourceProviderNotFound(name_or_uuid=uuid)
|
|
return resource_providers[0]
|
|
|
|
def _get_resource_providers(self, context, placement, **kwargs):
|
|
"""Returns all resource providers regardless of their relationships.
|
|
|
|
:param context: The nova.context.RequestContext auth context
|
|
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
|
to communicate with the Placement service API.
|
|
:param kwargs: extra attributes for the query string
|
|
:return: list of resource providers.
|
|
:raises: keystoneauth1.exceptions.base.ClientException on failure to
|
|
communicate with the placement API
|
|
"""
|
|
url = '/resource_providers'
|
|
if 'uuid' in kwargs:
|
|
url += '?uuid=%s' % kwargs['uuid']
|
|
|
|
resp = placement.get(url, global_request_id=context.global_id,
|
|
version='1.14')
|
|
if resp is None:
|
|
raise exception.PlacementAPIConnectFailure()
|
|
|
|
data = resp.json()
|
|
resource_providers = data.get('resource_providers')
|
|
|
|
return resource_providers
|
|
|
|
@action_description(
|
|
_("Audits orphaned allocations that are no longer corresponding to "
|
|
"existing instance resources. This command requires that "
|
|
"the [api_database]/connection and [placement] configuration "
|
|
"options are set."))
|
|
@args('--verbose', action='store_true', dest='verbose', default=False,
|
|
help='Provide verbose output during execution.')
|
|
@args('--resource_provider', metavar='<provider_uuid>',
|
|
dest='provider_uuid',
|
|
help='UUID of a specific resource provider to verify.')
|
|
@args('--delete', action='store_true', dest='delete', default=False,
|
|
help='Deletes orphaned allocations that were found.')
|
|
def audit(self, verbose=False, provider_uuid=None, delete=False):
|
|
"""Provides information about orphaned allocations that can be removed
|
|
|
|
Return codes:
|
|
|
|
* 0: Command completed successfully and no orphaned allocations exist.
|
|
* 1: An unexpected error happened during run.
|
|
* 3: Orphaned allocations were detected.
|
|
* 4: Orphaned allocations were detected and deleted.
|
|
* 127: Invalid input.
|
|
"""
|
|
|
|
ctxt = context.get_admin_context()
|
|
output = lambda msg: None
|
|
if verbose:
|
|
output = lambda msg: print(msg)
|
|
|
|
placement = report.report_client_singleton()
|
|
# Resets two in-memory dicts for knowing instances per compute node
|
|
self.cn_uuid_mapping = collections.defaultdict(tuple)
|
|
self.instances_mapping = collections.defaultdict(list)
|
|
|
|
num_processed = 0
|
|
faults = 0
|
|
|
|
if provider_uuid:
|
|
try:
|
|
resource_provider = self._get_resource_provider(
|
|
ctxt, placement, provider_uuid)
|
|
except exception.ResourceProviderNotFound:
|
|
print(_('Resource provider with UUID %s does not exist.') %
|
|
provider_uuid)
|
|
return 127
|
|
resource_providers = [resource_provider]
|
|
else:
|
|
resource_providers = self._get_resource_providers(ctxt, placement)
|
|
|
|
for provider in resource_providers:
|
|
nb_p, faults = self._check_orphaned_allocations_for_provider(
|
|
ctxt, placement, output, provider, delete)
|
|
num_processed += nb_p
|
|
if faults > 0:
|
|
print(_('The Resource Provider %s had problems when '
|
|
'deleting allocations. Stopping now. Please fix the '
|
|
'problem by hand and run again.') %
|
|
provider['uuid'])
|
|
return 1
|
|
if num_processed > 0:
|
|
suffix = 's.' if num_processed > 1 else '.'
|
|
output(_('Processed %(num)s allocation%(suffix)s')
|
|
% {'num': num_processed,
|
|
'suffix': suffix})
|
|
return 4 if delete else 3
|
|
return 0
|
|
|
|
|
|
class LibvirtCommands(object):
|
|
"""Commands for managing libvirt instances"""
|
|
|
|
@action_description(
|
|
_("Fetch the stored machine type of the instance from the database."))
|
|
@args('instance_uuid', metavar='<instance_uuid>',
|
|
help='UUID of instance to fetch the machine type for')
|
|
def get_machine_type(self, instance_uuid=None):
|
|
"""Fetch the stored machine type of the instance from the database.
|
|
|
|
Return codes:
|
|
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
* 2: Unable to find instance or instance mapping.
|
|
* 3: No machine type found for the instance.
|
|
|
|
"""
|
|
try:
|
|
ctxt = context.get_admin_context()
|
|
mtype = machine_type_utils.get_machine_type(ctxt, instance_uuid)
|
|
if mtype:
|
|
print(mtype)
|
|
return 0
|
|
else:
|
|
print(_('No machine type registered for instance %s') %
|
|
instance_uuid)
|
|
return 3
|
|
except (exception.InstanceNotFound,
|
|
exception.InstanceMappingNotFound) as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
@action_description(
|
|
_("Set or update the stored machine type of the instance in the "
|
|
"database. This is only allowed for instances with a STOPPED, "
|
|
"SHELVED or SHELVED_OFFLOADED vm_state."))
|
|
@args('instance_uuid', metavar='<instance_uuid>',
|
|
help='UUID of instance to update')
|
|
@args('machine_type', metavar='<machine_type>',
|
|
help='Machine type to set')
|
|
@args('--force', action='store_true', default=False, dest='force',
|
|
help='Force the update of the stored machine type')
|
|
def update_machine_type(
|
|
self,
|
|
instance_uuid=None,
|
|
machine_type=None,
|
|
force=False
|
|
):
|
|
"""Set or update the machine type of a given instance.
|
|
|
|
Return codes:
|
|
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
* 2: Unable to find the instance or instance cell mapping.
|
|
* 3: Invalid instance vm_state.
|
|
* 4: Unable to move between underlying machine types (pc to q35 etc)
|
|
or to older versions.
|
|
* 5: Unsupported machine type.
|
|
"""
|
|
ctxt = context.get_admin_context()
|
|
if force:
|
|
print(_("Forcing update of machine type."))
|
|
|
|
try:
|
|
rtype, ptype = machine_type_utils.update_machine_type(
|
|
ctxt, instance_uuid, machine_type, force=force)
|
|
except exception.UnsupportedMachineType as e:
|
|
print(str(e))
|
|
return 5
|
|
except exception.InvalidMachineTypeUpdate as e:
|
|
print(str(e))
|
|
return 4
|
|
except exception.InstanceInvalidState as e:
|
|
print(str(e))
|
|
return 3
|
|
except (
|
|
exception.InstanceNotFound,
|
|
exception.InstanceMappingNotFound,
|
|
) as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
print(_("Updated instance %(instance_uuid)s machine type to "
|
|
"%(machine_type)s (previously %(previous_type)s)") %
|
|
{'instance_uuid': instance_uuid,
|
|
'machine_type': rtype,
|
|
'previous_type': ptype})
|
|
return 0
|
|
|
|
@action_description(
|
|
_("List the UUIDs of instances that do not have hw_machine_type set "
|
|
"in their image metadata"))
|
|
@args('--cell-uuid', metavar='<cell_uuid>', dest='cell_uuid',
|
|
required=False, help='UUID of cell from which to list instances')
|
|
def list_unset_machine_type(self, cell_uuid=None):
|
|
"""List the UUIDs of instances without image_hw_machine_type set
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully, no instances found.
|
|
* 1: An unexpected error happened.
|
|
* 2: Unable to find cell mapping.
|
|
* 3: Instances found without hw_machine_type set.
|
|
"""
|
|
try:
|
|
instance_list = machine_type_utils.get_instances_without_type(
|
|
context.get_admin_context(), cell_uuid)
|
|
except exception.CellMappingNotFound as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
if instance_list:
|
|
print('\n'.join(i.uuid for i in instance_list))
|
|
return 3
|
|
else:
|
|
print(_("No instances found without hw_machine_type set."))
|
|
return 0
|
|
|
|
|
|
class VolumeAttachmentCommands(object):
|
|
|
|
@action_description(_("Show the details of a given volume attachment."))
|
|
@args(
|
|
'instance_uuid', metavar='<instance_uuid>',
|
|
help='UUID of the instance')
|
|
@args(
|
|
'volume_id', metavar='<volume_id>',
|
|
help='UUID of the volume')
|
|
@args(
|
|
'--connection_info', action='store_true',
|
|
default=False, dest='connection_info', required=False,
|
|
help='Only display the connection_info of the volume attachment.')
|
|
@args(
|
|
'--json', action='store_true',
|
|
default=False, dest='json', required=False,
|
|
help='Display output as json without a table.')
|
|
def show(
|
|
self,
|
|
instance_uuid=None,
|
|
volume_id=None,
|
|
connection_info=False,
|
|
json=False
|
|
):
|
|
"""Show attributes of a given volume attachment.
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
* 2: Instance not found.
|
|
* 3: Volume is not attached to instance.
|
|
"""
|
|
try:
|
|
ctxt = context.get_admin_context()
|
|
im = objects.InstanceMapping.get_by_instance_uuid(
|
|
ctxt, instance_uuid)
|
|
with context.target_cell(ctxt, im.cell_mapping) as cctxt:
|
|
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
|
|
cctxt, volume_id, instance_uuid)
|
|
if connection_info and json:
|
|
print(bdm.connection_info)
|
|
elif connection_info:
|
|
print(format_dict(jsonutils.loads(bdm.connection_info)))
|
|
elif json:
|
|
print(jsonutils.dumps(bdm))
|
|
else:
|
|
print(format_dict(bdm))
|
|
return 0
|
|
except exception.VolumeBDMNotFound as e:
|
|
print(str(e))
|
|
return 3
|
|
except (
|
|
exception.InstanceNotFound,
|
|
exception.InstanceMappingNotFound,
|
|
) as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
@action_description(_('Show the host connector for this host'))
|
|
@args(
|
|
'--json', action='store_true',
|
|
default=False, dest='json', required=False,
|
|
help='Display output as json without a table.')
|
|
def get_connector(self, json=False):
|
|
"""Show the host connector for this host.
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
"""
|
|
try:
|
|
root_helper = utils.get_root_helper()
|
|
host_connector = connector.get_connector_properties(
|
|
root_helper, CONF.my_block_storage_ip,
|
|
CONF.libvirt.volume_use_multipath,
|
|
enforce_multipath=True,
|
|
host=CONF.host)
|
|
if json:
|
|
print(jsonutils.dumps(host_connector))
|
|
else:
|
|
print(format_dict(host_connector))
|
|
return 0
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
def _refresh(self, instance_uuid, volume_id, connector):
|
|
"""Refresh the bdm.connection_info associated with a volume attachment
|
|
|
|
Unlike the current driver BDM implementation under
|
|
nova.virt.block_device.DriverVolumeBlockDevice.refresh_connection_info
|
|
that simply GETs an existing volume attachment from cinder this method
|
|
cleans up any existing volume connections from the host before creating
|
|
a fresh attachment in cinder and populates the underlying BDM with
|
|
connection_info from the new attachment.
|
|
|
|
We can do that here as the command requires that the instance is
|
|
stopped, something that isn't always the case with the current driver
|
|
BDM approach and thus the two are kept separate for the time being.
|
|
|
|
:param instance_uuid: UUID of instance
|
|
:param volume_id: ID of volume attached to the instance
|
|
:param connector: Connector with which to create the new attachment
|
|
:return status_code: volume-refresh status_code 0 on success
|
|
"""
|
|
|
|
ctxt = context.get_admin_context()
|
|
im = objects.InstanceMapping.get_by_instance_uuid(ctxt, instance_uuid)
|
|
with context.target_cell(ctxt, im.cell_mapping) as cctxt:
|
|
|
|
instance = objects.Instance.get_by_uuid(cctxt, instance_uuid)
|
|
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
|
|
cctxt, volume_id, instance_uuid)
|
|
|
|
if instance.vm_state != obj_fields.InstanceState.STOPPED:
|
|
raise exception.InstanceInvalidState(
|
|
instance_uuid=instance_uuid, attr='vm_state',
|
|
state=instance.vm_state,
|
|
method='refresh connection_info (must be stopped)')
|
|
|
|
locking_reason = (
|
|
f'Refreshing connection_info for BDM {bdm.uuid} '
|
|
f'associated with instance {instance_uuid} and volume '
|
|
f'{volume_id}.')
|
|
|
|
with locked_instance(im.cell_mapping, instance, locking_reason):
|
|
return self._do_refresh(
|
|
cctxt, instance, volume_id, bdm, connector)
|
|
|
|
def _do_refresh(self, cctxt, instance,
|
|
volume_id, bdm, connector):
|
|
volume_api = cinder.API()
|
|
compute_rpcapi = rpcapi.ComputeAPI()
|
|
|
|
new_attachment_id = None
|
|
try:
|
|
# Log this as an instance action so operators and users are
|
|
# aware that this has happened.
|
|
instance_action = objects.InstanceAction.action_start(
|
|
cctxt, instance.uuid,
|
|
instance_actions.NOVA_MANAGE_REFRESH_VOLUME_ATTACHMENT)
|
|
|
|
# Create a blank attachment to keep the volume reserved
|
|
new_attachment_id = volume_api.attachment_create(
|
|
cctxt, volume_id, instance.uuid)['id']
|
|
|
|
# RPC call to the compute to cleanup the connections, which
|
|
# will in turn unmap the volume from the compute host
|
|
if instance.host == connector['host']:
|
|
compute_rpcapi.remove_volume_connection(
|
|
cctxt, instance, volume_id, instance.host,
|
|
delete_attachment=True)
|
|
else:
|
|
msg = (
|
|
f"The compute host '{connector['host']}' in the "
|
|
f"connector does not match the instance host "
|
|
f"'{instance.host}'.")
|
|
raise exception.HostConflict(_(msg))
|
|
|
|
# Update the attachment with host connector, this regenerates
|
|
# the connection_info that we can now stash in the bdm.
|
|
new_connection_info = volume_api.attachment_update(
|
|
cctxt, new_attachment_id, connector,
|
|
bdm.device_name)['connection_info']
|
|
|
|
# Before we save it to the BDM ensure the serial is stashed as
|
|
# is done in various other codepaths when attaching volumes.
|
|
if 'serial' not in new_connection_info:
|
|
new_connection_info['serial'] = bdm.volume_id
|
|
|
|
# Save the new attachment id and connection_info to the DB
|
|
bdm.attachment_id = new_attachment_id
|
|
bdm.connection_info = jsonutils.dumps(new_connection_info)
|
|
bdm.save()
|
|
|
|
# Finally mark the attachment as complete, moving the volume
|
|
# status from attaching to in-use ahead of the instance
|
|
# restarting
|
|
volume_api.attachment_complete(cctxt, new_attachment_id)
|
|
return 0
|
|
|
|
finally:
|
|
# If the bdm.attachment_id wasn't updated make sure we clean
|
|
# up any attachments created during the run.
|
|
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
|
|
cctxt, volume_id, instance.uuid)
|
|
if (
|
|
new_attachment_id and
|
|
bdm.attachment_id != new_attachment_id
|
|
):
|
|
volume_api.attachment_delete(cctxt, new_attachment_id)
|
|
|
|
# If we failed during attachment_update the bdm.attachment_id
|
|
# has already been deleted so recreate it now to ensure the
|
|
# volume is still associated with the instance and clear the
|
|
# now stale connection_info.
|
|
try:
|
|
volume_api.attachment_get(cctxt, bdm.attachment_id)
|
|
except exception.VolumeAttachmentNotFound:
|
|
bdm.attachment_id = volume_api.attachment_create(
|
|
cctxt, volume_id, instance.uuid)['id']
|
|
bdm.connection_info = None
|
|
bdm.save()
|
|
|
|
# Finish the instance action if it was created and started
|
|
# TODO(lyarwood): While not really required we should store
|
|
# the exec and traceback in here on failure.
|
|
if instance_action:
|
|
instance_action.finish()
|
|
|
|
@action_description(
|
|
_("Refresh the connection info for a given volume attachment"))
|
|
@args(
|
|
'instance_uuid', metavar='<instance_uuid>',
|
|
help='UUID of the instance')
|
|
@args(
|
|
'volume_id', metavar='<volume_id>',
|
|
help='UUID of the volume')
|
|
@args(
|
|
'connector_path', metavar='<connector_path>',
|
|
help='Path to file containing the host connector in json format.')
|
|
def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):
|
|
"""Refresh the connection_info associated with a volume attachment
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
* 2: Connector path does not exist.
|
|
* 3: Failed to open connector path.
|
|
* 4: Instance does not exist.
|
|
* 5: Instance state invalid.
|
|
* 6: Volume is not attached to instance.
|
|
* 7: Connector host is not correct.
|
|
"""
|
|
try:
|
|
# TODO(lyarwood): Make this optional and provide a rpcapi capable
|
|
# of pulling this down from the target compute during this flow.
|
|
if not os.path.exists(connector_path):
|
|
raise exception.InvalidInput(
|
|
reason=f'Connector file not found at {connector_path}')
|
|
|
|
# Read in the json connector file
|
|
with open(connector_path, 'rb') as connector_file:
|
|
connector = jsonutils.load(connector_file)
|
|
|
|
# Refresh the volume attachment
|
|
return self._refresh(instance_uuid, volume_id, connector)
|
|
|
|
except exception.HostConflict as e:
|
|
print(
|
|
f"The command 'nova-manage volume_attachment get_connector' "
|
|
f"may have been run on the wrong compute host. Or the "
|
|
f"instance host may be wrong and in need of repair.\n{e}")
|
|
return 7
|
|
except exception.VolumeBDMNotFound as e:
|
|
print(str(e))
|
|
return 6
|
|
except exception.InstanceInvalidState as e:
|
|
print(str(e))
|
|
return 5
|
|
except (
|
|
exception.InstanceNotFound,
|
|
exception.InstanceMappingNotFound,
|
|
) as e:
|
|
print(str(e))
|
|
return 4
|
|
except ValueError as e:
|
|
print(
|
|
f'Failed to open {connector_path}. Does it contain valid '
|
|
f'connector_info data?\nError: {str(e)}'
|
|
)
|
|
return 3
|
|
except OSError as e:
|
|
print(str(e))
|
|
return 3
|
|
except exception.InvalidInput as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
|
|
class ImagePropertyCommands:
|
|
|
|
@action_description(_("Show the value of an instance image property."))
|
|
@args(
|
|
'instance_uuid', metavar='<instance_uuid>',
|
|
help='UUID of the instance')
|
|
@args(
|
|
'image_property', metavar='<image_property>',
|
|
help='Image property to show')
|
|
def show(self, instance_uuid=None, image_property=None):
|
|
"""Show value of a given instance image property.
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
* 2: Instance not found.
|
|
* 3: Image property not found.
|
|
"""
|
|
try:
|
|
ctxt = context.get_admin_context()
|
|
im = objects.InstanceMapping.get_by_instance_uuid(
|
|
ctxt, instance_uuid)
|
|
with context.target_cell(ctxt, im.cell_mapping) as cctxt:
|
|
instance = objects.Instance.get_by_uuid(
|
|
cctxt, instance_uuid, expected_attrs=['system_metadata'])
|
|
property_value = instance.system_metadata.get(
|
|
f'image_{image_property}')
|
|
if property_value:
|
|
print(property_value)
|
|
return 0
|
|
else:
|
|
print(f'Image property {image_property} not found '
|
|
f'for instance {instance_uuid}.')
|
|
return 3
|
|
except (
|
|
exception.InstanceNotFound,
|
|
exception.InstanceMappingNotFound,
|
|
) as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print(f'Unexpected error, see nova-manage.log for the full '
|
|
f'trace: {str(e)}')
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
def _validate_image_properties(self, image_properties):
|
|
"""Validate the provided image property names and values
|
|
|
|
:param image_properties: List of image property names and values
|
|
"""
|
|
# Sanity check the format of the provided properties, this should be
|
|
# in the format of name=value.
|
|
if any(x for x in image_properties if '=' not in x):
|
|
raise exception.InvalidInput(
|
|
"--property should use the format key=value")
|
|
|
|
# Transform the list of delimited properties to a dict
|
|
image_properties = dict(prop.split('=') for prop in image_properties)
|
|
|
|
# Validate the names of each property by checking against the o.vo
|
|
# fields currently listed by ImageProps. We can't use from_dict to
|
|
# do this as it silently ignores invalid property keys.
|
|
for image_property_name in image_properties.keys():
|
|
if image_property_name not in objects.ImageMetaProps.fields:
|
|
raise exception.InvalidImagePropertyName(
|
|
image_property_name=image_property_name)
|
|
|
|
# Validate the values by creating an object from the provided dict.
|
|
objects.ImageMetaProps.from_dict(image_properties)
|
|
|
|
# Return the dict so we can update the instance system_metadata
|
|
return image_properties
|
|
|
|
def _update_image_properties(self, ctxt, instance, image_properties):
|
|
"""Update instance image properties
|
|
|
|
:param ctxt: nova.context.RequestContext
|
|
:param instance: The instance to update
|
|
:param image_properties: List of image properties and values to update
|
|
"""
|
|
# Check the state of the instance
|
|
allowed_states = [
|
|
obj_fields.InstanceState.STOPPED,
|
|
obj_fields.InstanceState.SHELVED,
|
|
obj_fields.InstanceState.SHELVED_OFFLOADED,
|
|
]
|
|
if instance.vm_state not in allowed_states:
|
|
raise exception.InstanceInvalidState(
|
|
instance_uuid=instance.uuid, attr='vm_state',
|
|
state=instance.vm_state,
|
|
method='image_property set (must be STOPPED, SHELVED, OR '
|
|
'SHELVED_OFFLOADED).')
|
|
|
|
# Validate the property names and values
|
|
image_properties = self._validate_image_properties(image_properties)
|
|
|
|
# Update the image properties and save the instance record
|
|
for image_property, value in image_properties.items():
|
|
instance.system_metadata[f'image_{image_property}'] = value
|
|
|
|
request_spec = objects.RequestSpec.get_by_instance_uuid(
|
|
ctxt, instance.uuid)
|
|
request_spec.image = instance.image_meta
|
|
|
|
# Save and return 0
|
|
instance.save()
|
|
request_spec.save()
|
|
return 0
|
|
|
|
@action_description(_(
|
|
"Set the values of instance image properties stored in the database. "
|
|
"This is only allowed for " "instances with a STOPPED, SHELVED or "
|
|
"SHELVED_OFFLOADED vm_state."))
|
|
@args(
|
|
'instance_uuid', metavar='<instance_uuid>',
|
|
help='UUID of the instance')
|
|
@args(
|
|
'--property', metavar='<image_property>', action='append',
|
|
dest='image_properties',
|
|
help='Image property to set using the format name=value. For example: '
|
|
'--property hw_disk_bus=virtio --property hw_cdrom_bus=sata')
|
|
def set(self, instance_uuid=None, image_properties=None):
|
|
"""Set instance image property values
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error happened.
|
|
* 2: Unable to find instance.
|
|
* 3: Instance is in an invalid state.
|
|
* 4: Invalid input format.
|
|
* 5: Invalid image property name.
|
|
* 6: Invalid image property value.
|
|
"""
|
|
try:
|
|
ctxt = context.get_admin_context()
|
|
im = objects.InstanceMapping.get_by_instance_uuid(
|
|
ctxt, instance_uuid)
|
|
with context.target_cell(ctxt, im.cell_mapping) as cctxt:
|
|
instance = objects.Instance.get_by_uuid(
|
|
cctxt, instance_uuid, expected_attrs=['system_metadata'])
|
|
return self._update_image_properties(
|
|
ctxt, instance, image_properties)
|
|
except ValueError as e:
|
|
print(str(e))
|
|
return 6
|
|
except exception.InvalidImagePropertyName as e:
|
|
print(str(e))
|
|
return 5
|
|
except exception.InvalidInput as e:
|
|
print(str(e))
|
|
return 4
|
|
except exception.InstanceInvalidState as e:
|
|
print(str(e))
|
|
return 3
|
|
except (
|
|
exception.InstanceNotFound,
|
|
exception.InstanceMappingNotFound,
|
|
) as e:
|
|
print(str(e))
|
|
return 2
|
|
except Exception as e:
|
|
print('Unexpected error, see nova-manage.log for the full '
|
|
'trace: %s ' % str(e))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
|
|
class LimitsCommands():
|
|
|
|
def _create_unified_limits(self, ctxt, keystone_api, service_id,
|
|
legacy_defaults, project_id, region_id, output,
|
|
dry_run):
|
|
return_code = 0
|
|
|
|
# Create registered (default) limits first.
|
|
unified_to_legacy_names = dict(
|
|
**local_limit.LEGACY_LIMITS, **placement_limit.LEGACY_LIMITS)
|
|
|
|
legacy_to_unified_names = dict(
|
|
zip(unified_to_legacy_names.values(),
|
|
unified_to_legacy_names.keys()))
|
|
|
|
# Handle the special case of PCPU. With legacy quotas, there is no
|
|
# dedicated quota limit for PCPUs, so they share the quota limit for
|
|
# VCPUs: 'cores'. With unified limits, class:PCPU has its own dedicated
|
|
# quota limit, so we will just mirror the limit for class:VCPU and
|
|
# create a limit with the same value for class:PCPU.
|
|
if 'cores' in legacy_defaults:
|
|
# Just make up a dummy legacy resource 'pcores' for this.
|
|
legacy_defaults['pcores'] = legacy_defaults['cores']
|
|
unified_to_legacy_names['class:PCPU'] = 'pcores'
|
|
legacy_to_unified_names['pcores'] = 'class:PCPU'
|
|
|
|
# Retrieve the existing resource limits from Keystone.
|
|
registered_limits = keystone_api.registered_limits(region_id=region_id)
|
|
|
|
unified_defaults = {
|
|
rl.resource_name: rl.default_limit for rl in registered_limits}
|
|
|
|
# f-strings don't seem to work well with the _() translation function.
|
|
msg = f'Found default limits in Keystone: {unified_defaults} ...'
|
|
output(_(msg))
|
|
|
|
# Determine which resource limits are missing in Keystone so that we
|
|
# can create them.
|
|
output(_('Creating default limits in Keystone ...'))
|
|
for resource, rlimit in legacy_defaults.items():
|
|
resource_name = legacy_to_unified_names[resource]
|
|
if resource_name not in unified_defaults:
|
|
msg = f'Creating default limit: {resource_name} = {rlimit}'
|
|
if region_id:
|
|
msg += f' in region {region_id}'
|
|
output(_(msg))
|
|
if not dry_run:
|
|
try:
|
|
keystone_api.create_registered_limit(
|
|
resource_name=resource_name,
|
|
default_limit=rlimit, region_id=region_id,
|
|
service_id=service_id)
|
|
except Exception as e:
|
|
msg = f'Failed to create default limit: {str(e)}'
|
|
print(_(msg))
|
|
return_code = 1
|
|
else:
|
|
existing_rlimit = unified_defaults[resource_name]
|
|
msg = (f'A default limit: {resource_name} = {existing_rlimit} '
|
|
'already exists in Keystone, skipping ...')
|
|
output(_(msg))
|
|
|
|
# Create project limits if there are any.
|
|
if not project_id:
|
|
return return_code
|
|
|
|
output(_('Reading project limits from the Nova API database ...'))
|
|
legacy_projects = objects.Quotas.get_all_by_project(ctxt, project_id)
|
|
legacy_projects.pop('project_id', None)
|
|
msg = f'Found project limits in the database: {legacy_projects} ...'
|
|
output(_(msg))
|
|
|
|
# Handle the special case of PCPU again for project limits.
|
|
if 'cores' in legacy_projects:
|
|
# Just make up a dummy legacy resource 'pcores' for this.
|
|
legacy_projects['pcores'] = legacy_projects['cores']
|
|
|
|
# Retrieve existing limits from Keystone.
|
|
project_limits = keystone_api.limits(
|
|
project_id=project_id, region_id=region_id)
|
|
unified_projects = {
|
|
pl.resource_name: pl.resource_limit for pl in project_limits}
|
|
msg = f'Found project limits in Keystone: {unified_projects} ...'
|
|
output(_(msg))
|
|
|
|
output(_('Creating project limits in Keystone ...'))
|
|
for resource, plimit in legacy_projects.items():
|
|
resource_name = legacy_to_unified_names[resource]
|
|
if resource_name not in unified_projects:
|
|
msg = (
|
|
f'Creating project limit: {resource_name} = {plimit} '
|
|
f'for project {project_id}')
|
|
if region_id:
|
|
msg += f' in region {region_id}'
|
|
output(_(msg))
|
|
if not dry_run:
|
|
try:
|
|
keystone_api.create_limit(
|
|
resource_name=resource_name,
|
|
resource_limit=plimit, project_id=project_id,
|
|
region_id=region_id, service_id=service_id)
|
|
except Exception as e:
|
|
msg = f'Failed to create project limit: {str(e)}'
|
|
print(_(msg))
|
|
return_code = 1
|
|
else:
|
|
existing_plimit = unified_projects[resource_name]
|
|
msg = (f'A project limit: {resource_name} = {existing_plimit} '
|
|
'already exists in Keystone, skipping ...')
|
|
output(_(msg))
|
|
|
|
return return_code
|
|
|
|
@staticmethod
|
|
def _get_resources_from_flavor(flavor, warn_output):
|
|
resources = set()
|
|
for spec in [
|
|
s for s in flavor.extra_specs if s.startswith('resources:')]:
|
|
resources.add('class:' + spec.lstrip('resources:'))
|
|
try:
|
|
for resource in scheduler_utils.resources_for_limits(flavor,
|
|
is_bfv=False):
|
|
resources.add('class:' + resource)
|
|
except Exception as e:
|
|
# This is to be resilient about potential extra spec translation
|
|
# bugs like https://bugs.launchpad.net/nova/+bug/2088831
|
|
msg = _('An exception was raised: %s, skipping flavor %s'
|
|
% (str(e), flavor.flavorid))
|
|
warn_output(msg)
|
|
return resources
|
|
|
|
def _get_resources_from_api_flavors(self, ctxt, output, warn_output):
|
|
msg = _('Scanning flavors in API database for resource classes ...')
|
|
output(msg)
|
|
resources = set()
|
|
marker = None
|
|
while True:
|
|
flavors = objects.FlavorList.get_all(ctxt, limit=500,
|
|
marker=marker)
|
|
for flavor in flavors:
|
|
resources |= self._get_resources_from_flavor(
|
|
flavor, warn_output)
|
|
if not flavors:
|
|
break
|
|
marker = flavors[-1].flavorid
|
|
return resources
|
|
|
|
def _get_resources_from_embedded_flavors(self, ctxt, project_id, output,
|
|
warn_output):
|
|
project_str = f' project {project_id}' if project_id else ''
|
|
msg = _('Scanning%s non-deleted instances embedded flavors for '
|
|
'resource classes ...' % project_str)
|
|
output(msg)
|
|
resources = set()
|
|
down_cell_uuids = set()
|
|
marker = None
|
|
while True:
|
|
filters = {'deleted': False}
|
|
if project_id:
|
|
filters['project_id'] = project_id
|
|
instances, cells = list_instances.get_instance_objects_sorted(
|
|
ctxt, filters=filters, limit=500, marker=marker,
|
|
expected_attrs=['flavor'], sort_keys=None, sort_dirs=None)
|
|
down_cell_uuids |= set(cells)
|
|
for instance in instances:
|
|
resources |= self._get_resources_from_flavor(
|
|
instance.flavor, warn_output)
|
|
if not instances:
|
|
break
|
|
marker = instances[-1].uuid
|
|
return resources, down_cell_uuids
|
|
|
|
def _scan_flavors(self, ctxt, keystone_api, service_id, project_id,
|
|
region_id, output, warn_output, verbose,
|
|
no_embedded_flavor_scan):
|
|
return_code = 0
|
|
|
|
# We already know we need to check class:DISK_GB because it is not a
|
|
# legacy resource from a quota perspective.
|
|
flavor_resources = set(['class:DISK_GB'])
|
|
|
|
# Scan existing flavors to check whether any requestable resources are
|
|
# missing registered limits in Keystone.
|
|
flavor_resources |= self._get_resources_from_api_flavors(
|
|
ctxt, output, warn_output)
|
|
|
|
down_cell_uuids = None
|
|
if not no_embedded_flavor_scan:
|
|
# Scan the embedded flavors of non-deleted instances.
|
|
resources, down_cell_uuids = (
|
|
self._get_resources_from_embedded_flavors(
|
|
ctxt, project_id, output, warn_output))
|
|
flavor_resources |= resources
|
|
|
|
# Retrieve the existing resource limits from Keystone (we may have
|
|
# added new ones above).
|
|
registered_limits = keystone_api.registered_limits(
|
|
service_id=service_id, region_id=region_id)
|
|
existing_limits = {
|
|
li.resource_name: li.default_limit for li in registered_limits}
|
|
|
|
table = prettytable.PrettyTable()
|
|
table.align = 'l'
|
|
table.field_names = ['Resource', 'Registered Limit']
|
|
table.sortby = 'Resource'
|
|
found_missing = False
|
|
for resource in flavor_resources:
|
|
if resource in existing_limits:
|
|
if verbose:
|
|
table.add_row([resource, existing_limits[resource]])
|
|
else:
|
|
found_missing = True
|
|
table.add_row([resource, 'missing'])
|
|
|
|
if table.rows:
|
|
msg = _(
|
|
'The following resource classes were found during the scan:\n')
|
|
warn_output(msg)
|
|
warn_output(table)
|
|
|
|
if down_cell_uuids:
|
|
msg = _(
|
|
'NOTE: Cells %s did not respond and their data is not '
|
|
'included in this table.' % down_cell_uuids)
|
|
warn_output('\n' + textwrap.fill(msg, width=80))
|
|
|
|
if found_missing:
|
|
msg = _(
|
|
'WARNING: It is strongly recommended to create registered '
|
|
'limits for resource classes missing limits in Keystone '
|
|
'before proceeding.')
|
|
warn_output('\n' + textwrap.fill(msg, width=80))
|
|
return_code = 3
|
|
else:
|
|
msg = _(
|
|
'SUCCESS: All resource classes have registered limits set.')
|
|
warn_output(msg)
|
|
|
|
return return_code
|
|
|
|
@action_description(
|
|
_("Copy quota limits from the Nova API database to Keystone."))
|
|
@args('--project-id', metavar='<project-id>', dest='project_id',
|
|
help='Project ID for which to migrate quota limits')
|
|
@args('--region-id', metavar='<region-id>', dest='region_id',
|
|
help='Region ID for which to migrate quota limits')
|
|
@args('--verbose', action='store_true', dest='verbose', default=False,
|
|
help='Provide verbose output during execution.')
|
|
@args('--dry-run', action='store_true', dest='dry_run', default=False,
|
|
help='Show what limits would be created without actually '
|
|
'creating them. Flavors will still be scanned for resource '
|
|
'classes missing limits.')
|
|
@args('--quiet', action='store_true', dest='quiet', default=False,
|
|
help='Do not output anything during execution.')
|
|
@args('--no-embedded-flavor-scan', action='store_true',
|
|
dest='no_embedded_flavor_scan', default=False,
|
|
help='Do not scan instances embedded flavors for resource classes '
|
|
'missing limits.')
|
|
def migrate_to_unified_limits(self, project_id=None, region_id=None,
|
|
verbose=False, dry_run=False, quiet=False,
|
|
no_embedded_flavor_scan=False):
|
|
"""Migrate quota limits from legacy quotas to unified limits.
|
|
|
|
Return codes:
|
|
* 0: Command completed successfully.
|
|
* 1: An unexpected error occurred.
|
|
* 2: Failed to connect to the database.
|
|
* 3: Missing registered limits were identified.
|
|
"""
|
|
if verbose and quiet:
|
|
print('--verbose and --quiet are mutually exclusive')
|
|
return 1
|
|
|
|
ctxt = context.get_admin_context()
|
|
|
|
# Verbose output is optional details.
|
|
output = lambda msg: print(msg) if verbose else None
|
|
# In general, we always want to show important warning output (for
|
|
# example, warning about missing registered limits). Only suppress
|
|
# warning output if --quiet was specified by the caller.
|
|
warn_output = lambda msg: None if quiet else print(msg)
|
|
|
|
output(_('Reading default limits from the Nova API database ...'))
|
|
|
|
try:
|
|
# This will look for limits in the 'default' quota class first and
|
|
# then fall back to the [quota] config options.
|
|
legacy_defaults = nova.quota.QUOTAS.get_defaults(ctxt)
|
|
except db_exc.CantStartEngineError:
|
|
print(_('Failed to connect to the database so aborting this '
|
|
'migration attempt. Please check your config file to make '
|
|
'sure that [api_database]/connection and '
|
|
'[database]/connection are set and run this '
|
|
'command again.'))
|
|
return 2
|
|
|
|
# Remove obsolete resource limits.
|
|
for resource in ('fixed_ips', 'floating_ips', 'security_groups',
|
|
'security_group_rules'):
|
|
if resource in legacy_defaults:
|
|
msg = f'Skipping obsolete limit for {resource} ...'
|
|
output(_(msg))
|
|
legacy_defaults.pop(resource)
|
|
|
|
msg = (
|
|
f'Found default limits in the database: {legacy_defaults} ...')
|
|
output(_(msg))
|
|
|
|
# For auth, reuse the [keystone_authtoken] section.
|
|
if not hasattr(CONF, 'keystone_authtoken'):
|
|
conf_utils.register_ksa_opts(
|
|
CONF, 'keystone_authtoken', 'identity')
|
|
keystone_api = utils.get_sdk_adapter(
|
|
'identity', conf_group='keystone_authtoken')
|
|
# Service ID is required in unified limits APIs.
|
|
service_id = keystone_api.find_service('nova').id
|
|
|
|
try:
|
|
result = self._create_unified_limits(
|
|
ctxt, keystone_api, service_id, legacy_defaults, project_id,
|
|
region_id, output, dry_run)
|
|
if result:
|
|
# If there was an error, just return now.
|
|
return result
|
|
result = self._scan_flavors(
|
|
ctxt, keystone_api, service_id, project_id, region_id,
|
|
output, warn_output, verbose, no_embedded_flavor_scan)
|
|
return result
|
|
except db_exc.CantStartEngineError:
|
|
print(_('Failed to connect to the database so aborting this '
|
|
'migration attempt. Please check your config file to make '
|
|
'sure that [api_database]/connection and '
|
|
'[database]/connection are set and run this '
|
|
'command again.'))
|
|
return 2
|
|
except Exception as e:
|
|
msg = (f'Unexpected error, see nova-manage.log for the full '
|
|
f'trace: {str(e)}')
|
|
print(_(msg))
|
|
LOG.exception('Unexpected error')
|
|
return 1
|
|
|
|
|
|
CATEGORIES = {
|
|
'api_db': ApiDbCommands,
|
|
'cell_v2': CellV2Commands,
|
|
'db': DbCommands,
|
|
'placement': PlacementCommands,
|
|
'libvirt': LibvirtCommands,
|
|
'volume_attachment': VolumeAttachmentCommands,
|
|
'image_property': ImagePropertyCommands,
|
|
'limits': LimitsCommands,
|
|
}
|
|
|
|
|
|
add_command_parsers = functools.partial(cmd_common.add_command_parsers,
|
|
categories=CATEGORIES)
|
|
|
|
|
|
category_opt = cfg.SubCommandOpt('category',
|
|
title='Command categories',
|
|
help='Available categories',
|
|
handler=add_command_parsers)
|
|
|
|
post_mortem_opt = cfg.BoolOpt('post-mortem',
|
|
default=False,
|
|
help='Allow post-mortem debugging')
|
|
|
|
|
|
def main():
|
|
"""Parse options and call the appropriate class/method."""
|
|
CONF.register_cli_opts([category_opt, post_mortem_opt])
|
|
config.parse_args(sys.argv)
|
|
logging.set_defaults(
|
|
default_log_levels=logging.get_default_log_levels() +
|
|
_EXTRA_DEFAULT_LOG_LEVELS)
|
|
logging.setup(CONF, "nova")
|
|
objects.register_all()
|
|
|
|
if CONF.category.name == "version":
|
|
print(version.version_string_with_package())
|
|
return 0
|
|
|
|
if CONF.category.name == "bash-completion":
|
|
cmd_common.print_bash_completion(CATEGORIES)
|
|
return 0
|
|
|
|
try:
|
|
fn, fn_args, fn_kwargs = cmd_common.get_action_fn()
|
|
ret = fn(*fn_args, **fn_kwargs)
|
|
rpc.cleanup()
|
|
return ret
|
|
except Exception:
|
|
if CONF.post_mortem:
|
|
import pdb
|
|
pdb.post_mortem()
|
|
else:
|
|
print(_("An error has occurred:\n%s") % traceback.format_exc())
|
|
return 255
|