Merge "Cyborg evacuate support"

This commit is contained in:
Zuul 2020-09-02 04:30:10 +00:00 committed by Gerrit Code Review
commit 99781d6fa9
17 changed files with 414 additions and 191 deletions

View File

@ -29,9 +29,8 @@ appropriate privileges) must do the following:
openstack server create --flavor $myflavor --image $myimage $servername
As of 21.0.0 (Ussuri), nova supports only specific operations for instances
with accelerators. The lists of supported and unsupported operations are as
below:
Nova supports only specific operations for instances with accelerators.
The lists of supported and unsupported operations are as below:
* Supported operations.
@ -42,17 +41,21 @@ below:
* Take a snapshot.
* Backup.
* Rescue and unrescue.
* Rebuild.
* Evacuate.
* Unsupported operations
* Rebuild.
* Resize.
* Evacuate.
* Suspend and resume.
* Shelve and unshelve.
* Cold migration.
* Live migration.
.. versionchanged:: 22.0.0(Victoria)
Added support for rebuild and evacuate operations.
Some operations, such as lock and unlock, work as they are effectively
no-ops for accelerators.

View File

@ -302,3 +302,12 @@ class _CyborgClient(object):
if err_msg:
# No point raising an exception.
LOG.error('Failed to delete ARQs %s', arq_uuid_str)
def get_arq_uuids_for_instance(self, instance):
"""Get ARQ UUIDs for the instance.
:param instance: Instance Object
:return: ARQ UUIDs.
"""
return [arq['uuid']
for arq in self.get_arqs_for_instance(instance.uuid)]

View File

@ -109,6 +109,8 @@ MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
# TODO(huaqiang): Remove in Wallaby
MIN_VER_NOVA_COMPUTE_MIXED_POLICY = 52
SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD = 53
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
# trigger.
@ -307,14 +309,27 @@ def _get_image_meta_obj(image_meta_dict):
return image_meta
def block_accelerators(func):
def block_accelerators(until_service=None):
def inner(func):
@functools.wraps(func)
def wrapper(self, context, instance, *args, **kwargs):
# NOTE(brinzhang): Catch a request operating a mixed instance,
# make sure all nova-compute services have been upgraded and
# support the accelerators.
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
if dp_name:
service_support = False
if not dp_name:
service_support = True
elif until_service:
min_version = objects.service.get_minimum_version_all_cells(
nova_context.get_admin_context(), ['nova-compute'])
if min_version >= until_service:
service_support = True
if not service_support:
raise exception.ForbiddenWithAccelerators()
return func(self, context, instance, *args, **kwargs)
return wrapper
return inner
@profiler.trace_cls("compute_api")
@ -3393,7 +3408,7 @@ class API(base.Base):
fields_obj.Architecture.canonicalize(img_arch)
@reject_vtpm_instances(instance_actions.REBUILD)
@block_accelerators
@block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD)
# TODO(stephenfin): We should expand kwargs out to named args
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
@ -3930,7 +3945,7 @@ class API(base.Base):
# finally split resize and cold migration into separate code paths
# TODO(stephenfin): The 'block_accelerators' decorator doesn't take into
# account the accelerators requested in the new flavor
@block_accelerators
@block_accelerators()
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
@check_instance_host(check_is_up=True)
@ -4159,7 +4174,7 @@ class API(base.Base):
return allow_same_host
@reject_vtpm_instances(instance_actions.SHELVE)
@block_accelerators
@block_accelerators()
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.PAUSED, vm_states.SUSPENDED])
@ -4324,7 +4339,7 @@ class API(base.Base):
return self.compute_rpcapi.get_instance_diagnostics(context,
instance=instance)
@block_accelerators
@block_accelerators()
@reject_sev_instances(instance_actions.SUSPEND)
@check_instance_lock
@check_instance_state(vm_state=[vm_states.ACTIVE])
@ -5028,7 +5043,7 @@ class API(base.Base):
diff=diff)
return _metadata
@block_accelerators
@block_accelerators()
@reject_vtpm_instances(instance_actions.LIVE_MIGRATION)
@reject_sev_instances(instance_actions.LIVE_MIGRATION)
@check_instance_lock
@ -5160,7 +5175,7 @@ class API(base.Base):
instance, migration.id)
@reject_vtpm_instances(instance_actions.EVACUATE)
@block_accelerators
@block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD)
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
vm_states.ERROR])
def evacuate(self, context, instance, host, on_shared_storage,

View File

@ -530,7 +530,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='5.11')
target = messaging.Target(version='5.12')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@ -3256,18 +3256,29 @@ class ComputeManager(manager.Manager):
migration.status = status
migration.save()
def _rebuild_default_impl(self, context, instance, image_meta,
injected_files, admin_password, allocations,
bdms, detach_block_devices, attach_block_devices,
network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False):
def _rebuild_default_impl(
self, context, instance, image_meta, injected_files,
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None, evacuate=False,
block_device_info=None, preserve_ephemeral=False,
accel_uuids=None):
if preserve_ephemeral:
# The default code path does not support preserving ephemeral
# partitions.
raise exception.PreserveEphemeralNotSupported()
accel_info = []
if evacuate:
if instance.flavor.extra_specs.get('accel:device_profile'):
try:
accel_info = self._get_bound_arq_resources(
context, instance, accel_uuids or [])
except (Exception, eventlet.timeout.Timeout) as exc:
LOG.exception(exc)
self._build_resources_cleanup(instance, network_info)
msg = _('Failure getting accelerator resources.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
detach_block_devices(context, bdms)
else:
self._power_off_instance(instance, clean_shutdown=True)
@ -3275,6 +3286,14 @@ class ComputeManager(manager.Manager):
self.driver.destroy(context, instance,
network_info=network_info,
block_device_info=block_device_info)
try:
accel_info = self._get_accel_info(context, instance)
except Exception as exc:
LOG.exception(exc)
self._build_resources_cleanup(instance, network_info)
msg = _('Failure getting accelerator resources.')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
instance.task_state = task_states.REBUILD_BLOCK_DEVICE_MAPPING
instance.save(expected_task_state=[task_states.REBUILDING])
@ -3289,7 +3308,8 @@ class ComputeManager(manager.Manager):
self.driver.spawn(context, instance, image_meta, injected_files,
admin_password, allocations,
network_info=network_info,
block_device_info=new_block_device_info)
block_device_info=new_block_device_info,
accel_info=accel_info)
def _notify_instance_rebuild_error(self, context, instance, error, bdms):
self._notify_about_instance_usage(context, instance,
@ -3298,7 +3318,8 @@ class ComputeManager(manager.Manager):
context, instance, self.host,
phase=fields.NotificationPhase.ERROR, exception=error, bdms=bdms)
@messaging.expected_exceptions(exception.PreserveEphemeralNotSupported)
@messaging.expected_exceptions(exception.PreserveEphemeralNotSupported,
exception.BuildAbortException)
@wrap_exception()
@reverts_task_state
@wrap_instance_event(prefix='compute')
@ -3307,7 +3328,7 @@ class ComputeManager(manager.Manager):
injected_files, new_pass, orig_sys_metadata,
bdms, recreate, on_shared_storage,
preserve_ephemeral, migration,
scheduled_node, limits, request_spec):
scheduled_node, limits, request_spec, accel_uuids):
"""Destroy and re-make this instance.
A 'rebuild' effectively purges all existing data from the system and
@ -3338,6 +3359,7 @@ class ComputeManager(manager.Manager):
:param limits: Overcommit limits set by the scheduler. If a host was
specified by the user, this will be None
:param request_spec: a RequestSpec object used to schedule the instance
:param accel_uuids: a list of cyborg ARQ uuids.
"""
# recreate=True means the instance is being evacuated from a failed
@ -3402,7 +3424,7 @@ class ComputeManager(manager.Manager):
image_meta, injected_files, new_pass, orig_sys_metadata,
bdms, evacuate, on_shared_storage, preserve_ephemeral,
migration, request_spec, allocs, rebuild_claim,
scheduled_node, limits)
scheduled_node, limits, accel_uuids)
except (exception.ComputeResourcesUnavailable,
exception.RescheduledException) as e:
if isinstance(e, exception.ComputeResourcesUnavailable):
@ -3469,7 +3491,7 @@ class ComputeManager(manager.Manager):
self, context, instance, orig_image_ref, image_meta,
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, rebuild_claim, scheduled_node, limits):
allocations, rebuild_claim, scheduled_node, limits, accel_uuids):
"""Helper to avoid deep nesting in the top-level method."""
provider_mapping = None
@ -3490,7 +3512,7 @@ class ComputeManager(manager.Manager):
context, instance, orig_image_ref, image_meta, injected_files,
new_pass, orig_sys_metadata, bdms, evacuate, on_shared_storage,
preserve_ephemeral, migration, request_spec, allocations,
provider_mapping)
provider_mapping, accel_uuids)
@staticmethod
def _get_image_name(image_meta):
@ -3499,12 +3521,12 @@ class ComputeManager(manager.Manager):
else:
return ''
def _do_rebuild_instance(self, context, instance, orig_image_ref,
image_meta, injected_files, new_pass,
orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral,
migration, request_spec, allocations,
request_group_resource_providers_mapping):
def _do_rebuild_instance(
self, context, instance, orig_image_ref, image_meta,
injected_files, new_pass, orig_sys_metadata, bdms, evacuate,
on_shared_storage, preserve_ephemeral, migration, request_spec,
allocations, request_group_resource_providers_mapping,
accel_uuids):
orig_vm_state = instance.vm_state
if evacuate:
@ -3645,7 +3667,8 @@ class ComputeManager(manager.Manager):
block_device_info=block_device_info,
network_info=network_info,
preserve_ephemeral=preserve_ephemeral,
evacuate=evacuate)
evacuate=evacuate,
accel_uuids=accel_uuids)
try:
with instance.mutated_migration_context():
self.driver.rebuild(**kwargs)

View File

@ -378,6 +378,8 @@ class ComputeAPI(object):
* 5.10 - Add finish_revert_snapshot_based_resize_at_source()
* 5.11 - Add accel_uuids (accelerator requests) parameter to
build_and_run_instance()
* 5.12 - Add accel_uuids (accelerator requests) parameter to
rebuild_instance()
'''
VERSION_ALIASES = {
@ -1056,20 +1058,29 @@ class ComputeAPI(object):
block_device_info=block_device_info,
reboot_type=reboot_type)
def rebuild_instance(self, ctxt, instance, new_pass, injected_files,
def rebuild_instance(
self, ctxt, instance, new_pass, injected_files,
image_ref, orig_image_ref, orig_sys_metadata, bdms,
recreate, on_shared_storage, host, node,
preserve_ephemeral, migration, limits, request_spec):
preserve_ephemeral, migration, limits, request_spec, accel_uuids):
# NOTE(edleafe): compute nodes can only use the dict form of limits.
if isinstance(limits, objects.SchedulerLimits):
limits = limits.to_dict()
msg_args = {'preserve_ephemeral': preserve_ephemeral,
msg_args = {
'preserve_ephemeral': preserve_ephemeral,
'migration': migration,
'scheduled_node': node,
'limits': limits,
'request_spec': request_spec}
version = '5.0'
'request_spec': request_spec,
'accel_uuids': accel_uuids
}
version = '5.12'
client = self.router.client(ctxt)
if not client.can_send_version(version):
del msg_args['accel_uuids']
version = '5.0'
cctxt = client.prepare(server=_compute_host(host, instance),
version=version)
cctxt.cast(ctxt, 'rebuild_instance',

View File

@ -1147,14 +1147,21 @@ class ComputeTaskManager(base.Base):
# is not forced to be the original host
request_spec.reset_forced_destinations()
port_res_req = (
external_resources = []
external_resources += (
self.network_api.get_requested_resource_for_instance(
context, instance.uuid))
# NOTE(gibi): When cyborg or other module wants to handle
# similar non-nova resources then here we have to collect
# all the external resource requests in a single list and
extra_specs = request_spec.flavor.extra_specs
device_profile = extra_specs.get('accel:device_profile')
external_resources.extend(
cyborg.get_device_profile_request_groups(
context, device_profile)
if device_profile else [])
# NOTE(gibi): When other modules want to handle similar
# non-nova resources then here we have to collect all
# the external resource requests in a single list and
# add them to the RequestSpec.
request_spec.requested_resources = port_res_req
request_spec.requested_resources = external_resources
try:
# if this is a rebuild of instance on the same host with
@ -1219,8 +1226,17 @@ class ComputeTaskManager(base.Base):
instance.availability_zone = (
availability_zones.get_host_availability_zone(
context, host))
try:
accel_uuids = self._rebuild_cyborg_arq(
context, instance, host, request_spec, evacuate)
except exception.AcceleratorRequestBindingFailed as exc:
cyclient = cyborg.get_client(context)
cyclient.delete_arqs_by_uuid(exc.arqs)
LOG.exception('Failed to rebuild. Reason: %s', exc)
raise exc
self.compute_rpcapi.rebuild_instance(context,
self.compute_rpcapi.rebuild_instance(
context,
instance=instance,
new_pass=new_pass,
injected_files=injected_files,
@ -1232,8 +1248,27 @@ class ComputeTaskManager(base.Base):
on_shared_storage=on_shared_storage,
preserve_ephemeral=preserve_ephemeral,
migration=migration,
host=host, node=node, limits=limits,
request_spec=request_spec)
host=host,
node=node,
limits=limits,
request_spec=request_spec,
accel_uuids=accel_uuids)
def _rebuild_cyborg_arq(
self, context, instance, host, request_spec, evacuate):
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
if not dp_name:
return []
cyclient = cyborg.get_client(context)
if not evacuate:
return cyclient.get_arq_uuids_for_instance(instance)
cyclient.delete_arqs_for_instance(instance.uuid)
resource_provider_mapping = request_spec.get_request_group_mapping()
return self._create_and_bind_arqs(
context, instance.uuid, instance.flavor.extra_specs,
host, resource_provider_mapping)
def _validate_image_traits_for_rebuild(self, context, instance, image_ref):
"""Validates that the traits specified in the image can be satisfied

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 52
SERVICE_VERSION = 53
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -187,6 +187,9 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '5.11'},
# Version 52: Add support for the 'mixed' CPU allocation policy
{'compute_rpc': '5.11'},
# Version 53: Compute RPC v5.12:
# Add accel_uuids (accelerator requests) param to rebuild_instance
{'compute_rpc': '5.12'},
)

View File

@ -477,6 +477,13 @@ class InstanceHelperMixin:
self.api.post_server_action(server['id'], {'unshelve': {}})
return self._wait_for_state_change(server, expected_state)
def _evacuate_server(self, server, host, expected_state='ACTIVE'):
"""Evacuate a server."""
self.api.post_server_action(server['id'], {'evacuate': {}})
self._wait_for_server_parameter(
self.server, {'OS-EXT-SRV-ATTR:host': host,
'status': expected_state})
class PlacementHelperMixin:
"""A helper mixin for interacting with placement."""

View File

@ -8086,6 +8086,20 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
networks='none', expected_state='ACTIVE')
def _test_evacuate(self, server, num_hosts):
server_hostname = server['OS-EXT-SRV-ATTR:host']
for i in range(num_hosts):
if self.compute_services[i].host == server_hostname:
compute_to_stop = self.compute_services[i]
else:
compute_to_evacuate = self.compute_services[i]
# Stop and force down the compute service.
compute_id = self.admin_api.get_services(
host=server_hostname, binary='nova-compute')[0]['id']
compute_to_stop.stop()
self.admin_api.put_service(compute_id, {'forced_down': 'true'})
return compute_to_stop, compute_to_evacuate
def test_soft_reboot_ok(self):
self._reboot_server(self.server)
self._check_allocations_usage(self.server)
@ -8160,6 +8174,30 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
self.api.post_server_action(self.server['id'], {'unrescue': {}})
self._check_allocations_usage(self.server)
def test_evacuate_ok(self):
server_hostname = self.server['OS-EXT-SRV-ATTR:host']
arqs = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
compute_to_stop, compute_to_evacuate = self._test_evacuate(
self.server, self.NUM_HOSTS)
self._evacuate_server(self.server, compute_to_evacuate.host)
compute_to_stop.start()
self.server = self.api.get_server(self.server['id'])
arqs_new = self.cyborg.fake_get_arqs_for_instance(self.server['id'])
evac_hostname = self.server['OS-EXT-SRV-ATTR:host']
self.assertNotEqual(server_hostname, evac_hostname)
self.assertEqual(server_hostname, arqs[0]['hostname'])
self.assertEqual(evac_hostname, arqs_new[0]['hostname'])
def test_rebuild_ok(self):
rebuild_image_ref = fake_image.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID
self.api.post_server_action(self.server['id'],
{'rebuild': {
'imageRef': rebuild_image_ref,
'OS-DCF:diskConfig': 'AUTO'}})
fake_notifier.wait_for_versioned_notifications('instance.rebuild.end')
self._wait_for_state_change(self.server, 'ACTIVE')
self._check_allocations_usage(self.server)
def test_resize_fails(self):
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
@ -8186,30 +8224,29 @@ class AcceleratorServerOpsTest(AcceleratorServerBase):
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_evacuate_fails(self):
server_hostname = self.server['OS-EXT-SRV-ATTR:host']
for i in range(self.NUM_HOSTS):
hostname = 'accel_host' + str(i)
if hostname != server_hostname:
other_hostname = hostname
if self.compute_services[i].host == server_hostname:
compute_to_stop = self.compute_services[i]
# Stop and force down the compute service.
compute_id = self.admin_api.get_services(
host=server_hostname, binary='nova-compute')[0]['id']
compute_to_stop.stop()
self.admin_api.put_service(compute_id, {'forced_down': 'true'})
@mock.patch.object(objects.service, 'get_minimum_version_all_cells')
def test_evacuate_old_compute(self, old_compute_version):
"""Tests when the source compute service is too old to call
evacuate so OpenStackApiException is raised.
"""
old_compute_version.return_value = 52
_, compute_to_evacuate = self._test_evacuate(
self.server, self.NUM_HOSTS)
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],
{'evacuate': {
'host': other_hostname,
'host': compute_to_evacuate.host,
'adminPass': 'MySecretPass'}})
self.assertEqual(403, ex.response.status_code)
self._check_allocations_usage(self.server)
def test_rebuild_fails(self):
@mock.patch.object(objects.service, 'get_minimum_version_all_cells')
def test_rebuild_old_compute(self, old_compute_version):
"""Tests when the source compute service is too old to call
rebuild so OpenStackApiException is raised.
"""
old_compute_version.return_value = 52
rebuild_image_ref = fake_image.AUTO_DISK_CONFIG_ENABLED_IMAGE_UUID
ex = self.assertRaises(client.OpenStackApiException,
self.api.post_server_action, self.server['id'],

View File

@ -23,6 +23,7 @@ from oslo_serialization import jsonutils
from nova.accelerator import cyborg
from nova import context
from nova import exception
from nova import objects
from nova.objects import request_spec
from nova import test
from nova.tests.unit import fake_requests
@ -394,3 +395,25 @@ class CyborgTestCase(test.NoDBTestCase):
self.client.ARQ_URL, params={'arqs': arq_uuid_str})
mock_log.assert_called_once_with('Failed to delete ARQs %s',
arq_uuid_str)
@mock.patch('keystoneauth1.adapter.Adapter.get')
def test_get_arq_uuids_for_instance(self, mock_cyborg_get):
# Happy path, without only_resolved=True
_, bound_arqs = self._get_bound_arqs()
instance_uuid = bound_arqs[0]['instance_uuid']
flavor = objects.Flavor(extra_specs={'accel:device_profile': 'dp1'})
instance = objects.Instance(flavor=flavor,
uuid=instance_uuid)
query = {"instance": instance_uuid}
content = jsonutils.dumps({'arqs': bound_arqs})
resp = fake_requests.FakeResponse(200, content)
mock_cyborg_get.return_value = resp
ret_arqs = self.client.get_arq_uuids_for_instance(instance)
mock_cyborg_get.assert_called_once_with(
self.client.ARQ_URL, params=query)
bound_arqs = [bound_arq['uuid'] for bound_arq in bound_arqs]
bound_arqs.sort()
ret_arqs.sort()
self.assertEqual(bound_arqs, ret_arqs)

View File

@ -2122,7 +2122,7 @@ class _ComputeAPIUnitTestMixIn(object):
fake_inst = self._create_instance_obj()
fake_inst.flavor = cur_flavor
new_flavor = objects.Flavor(id=2, name='bar', vcpus=1, memory_mb=2048,
root_gb=10, disabled=False)
root_gb=10, disabled=False, extra_specs={})
mock_get.return_value = new_flavor
mock_check.side_effect = exception.OverQuota(
overs=['ram'], quotas={'cores': 1, 'ram': 2048},
@ -7569,8 +7569,9 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
mock_get_min_ver.assert_called_once_with(
self.context, ['nova-compute'])
def _test_block_accelerators(self, instance, args_info):
@compute_api.block_accelerators
def _test_block_accelerators(self, instance, args_info,
until_service=None):
@compute_api.block_accelerators(until_service=until_service)
def myfunc(self, context, instance, *args, **kwargs):
args_info['args'] = (context, instance, *args)
args_info['kwargs'] = dict(**kwargs)
@ -7604,6 +7605,37 @@ class ComputeAPIUnitTestCase(_ComputeAPIUnitTestMixIn, test.NoDBTestCase):
# myfunc was not called
self.assertEqual({}, args_info)
@mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=54)
def test_block_accelerators_until_service(self, mock_get_min):
"""Support operating server with acclerators until compute service
more than the version of 53.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
instance = self._create_instance_obj(flavor=flavor)
args_info = {}
expected_args, kwargs = self._test_block_accelerators(
instance, args_info, until_service=53)
self.assertEqual(expected_args, args_info['args'])
self.assertEqual(kwargs, args_info['kwargs'])
@mock.patch('nova.objects.service.get_minimum_version_all_cells',
return_value=52)
def test_block_accelerators_until_service_forbidden(self, mock_get_min):
"""Ensure a 'ForbiddenWithAccelerators' exception raises if any
compute service less than the version of 53.
"""
extra_specs = {'accel:device_profile': 'mydp'}
flavor = self._create_flavor(extra_specs=extra_specs)
instance = self._create_instance_obj(flavor=flavor)
args_info = {}
self.assertRaisesRegex(exception.ForbiddenWithAccelerators,
'Forbidden with instances that have accelerators.',
self._test_block_accelerators, instance, args_info, 53)
# myfunc was not called
self.assertEqual({}, args_info)
# TODO(huaqiang): Remove in Wallaby
@mock.patch('nova.objects.service.get_minimum_version_all_cells')
def test__check_compute_service_for_mixed_instance(self, mock_ver):

View File

@ -2743,16 +2743,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False,
migration=None, scheduled_node=None,
limits={}, request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=[], recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None, scheduled_node=None,
limits={}, request_spec=None, accel_uuids=[])
self.compute.terminate_instance(self.context, instance, [])
def test_rebuild_driver(self):
@ -2777,16 +2773,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=[], recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None, scheduled_node=None,
limits={}, request_spec=None, accel_uuids=[])
self.assertTrue(called['rebuild'])
self.compute.terminate_instance(self.context, instance, [])
@ -2833,16 +2825,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=bdms, recreate=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
on_shared_storage=False,
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata,
bdms=bdms, recreate=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits={},
on_shared_storage=False, request_spec=None, accel_uuids=[])
self.assertTrue(called['rebuild'])
self.compute.terminate_instance(self.context, instance, [])
@ -2856,14 +2844,12 @@ class ComputeTestCase(BaseTestCase,
block_device_mapping=[])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
'', '', injected_files=[],
new_pass="new_password",
orig_sys_metadata=sys_metadata, bdms=[],
recreate=False, on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits=None,
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, '', '', injected_files=[],
new_pass="new_password", orig_sys_metadata=sys_metadata, bdms=[],
recreate=False, on_shared_storage=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits=None,
request_spec=None, accel_uuids=[])
self.compute.terminate_instance(self.context, instance, [])
def test_rebuild_launched_at_time(self):
@ -2879,16 +2865,12 @@ class ComputeTestCase(BaseTestCase,
time_fixture.advance_time_delta(cur_time - old_time)
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=[],
new_pass="new_password",
orig_sys_metadata={},
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref, injected_files=[],
new_pass="new_password", orig_sys_metadata={}, bdms=[],
recreate=False, on_shared_storage=False, preserve_ephemeral=False,
migration=None, scheduled_node=None, limits={}, request_spec=None,
accel_uuids=[])
instance.refresh()
self.assertEqual(cur_time,
instance['launched_at'].replace(tzinfo=None))
@ -2916,16 +2898,12 @@ class ComputeTestCase(BaseTestCase,
instance['uuid'])
db.instance_update(self.context, instance['uuid'],
{"task_state": task_states.REBUILDING})
self.compute.rebuild_instance(self.context, instance,
image_ref, image_ref,
injected_files=injected_files,
new_pass="new_password",
orig_sys_metadata=sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False,
migration=None, scheduled_node=None,
limits={}, request_spec=None)
self.compute.rebuild_instance(
self.context, instance, image_ref, image_ref,
injected_files=injected_files, new_pass="new_password",
orig_sys_metadata=sys_metadata, bdms=[], recreate=False,
on_shared_storage=False, preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[])
self.compute.terminate_instance(self.context, instance, [])
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
@ -4654,7 +4632,8 @@ class ComputeTestCase(BaseTestCase,
'scheduled_node': None,
'limits': {},
'request_spec': None,
'on_shared_storage': False}),
'on_shared_storage': False,
'accel_uuids': ()}),
("set_admin_password", task_states.UPDATING_PASSWORD,
{'new_pass': None}),
("rescue_instance", task_states.RESCUING,
@ -5166,17 +5145,12 @@ class ComputeTestCase(BaseTestCase,
inst_ref.task_state = task_states.REBUILDING
inst_ref.save()
self.compute.rebuild_instance(self.context,
inst_ref,
image_ref, new_image_ref,
injected_files=[],
new_pass=password,
orig_sys_metadata=orig_sys_metadata,
bdms=[], recreate=False,
on_shared_storage=False,
preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={},
request_spec=None)
self.compute.rebuild_instance(
self.context, inst_ref, image_ref, new_image_ref,
injected_files=[], new_pass=password,
orig_sys_metadata=orig_sys_metadata, bdms=[], recreate=False,
on_shared_storage=False, preserve_ephemeral=False, migration=None,
scheduled_node=None, limits={}, request_spec=None, accel_uuids=[])
inst_ref.refresh()
@ -12830,7 +12804,7 @@ class EvacuateHostTestCase(BaseTestCase):
image_ref, injected_files, 'newpass', {}, bdms, recreate=True,
on_shared_storage=on_shared_storage, migration=migration,
preserve_ephemeral=False, scheduled_node=node, limits=limits,
request_spec=None)
request_spec=None, accel_uuids=[])
if vm_states_is_stopped:
mock_notify_rebuild.assert_has_calls([
mock.call(ctxt, self.inst, self.inst.host, phase='start',
@ -13019,7 +12993,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchType(objects.ImageMeta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
@mock.patch.object(fake.FakeDriver, 'spawn')
def test_rebuild_on_host_without_shared_storage(self, mock_spawn):
@ -13037,7 +13011,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchType(objects.ImageMeta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
def test_rebuild_on_host_instance_exists(self):
"""Rebuild if instance exists raises an exception."""
@ -13081,7 +13055,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchObjPrims(self.inst.image_meta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
@mock.patch.object(fake.FakeDriver, 'spawn')
def test_on_shared_storage_not_provided_host_with_shared_storage(self,
@ -13097,7 +13071,7 @@ class EvacuateHostTestCase(BaseTestCase):
test.MatchType(objects.ImageMeta),
mock.ANY, 'newpass', mock.ANY,
network_info=mock.ANY,
block_device_info=mock.ANY)
block_device_info=mock.ANY, accel_info=mock.ANY)
def test_rebuild_migration_passed_in(self):
migration = mock.Mock(spec=objects.Migration)
@ -13140,7 +13114,7 @@ class EvacuateHostTestCase(BaseTestCase):
'nova.scheduler.client.report.SchedulerReportClient.'
'remove_provider_tree_from_instance_allocation')
with patch_spawn, patch_on_disk, patch_claim, patch_remove_allocs:
self.assertRaises(exception.BuildAbortException,
self.assertRaises(messaging.ExpectedException,
self._rebuild, migration=migration,
send_node=True)
self.assertEqual("failed", migration.status)
@ -13174,7 +13148,7 @@ class EvacuateHostTestCase(BaseTestCase):
# NOTE(ndipanov): Make sure that we pass the topology from the context
def fake_spawn(context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):
block_device_info=None, accel_info=None):
self.assertIsNone(instance.numa_topology)
self.inst.numa_topology = numa_topology

View File

@ -5142,10 +5142,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_rebuild.side_effect = exc
self.compute.rebuild_instance(self.context, instance, None, None, None,
None, None, None, recreate,
False, False, None, scheduled_node, {},
None)
self.compute.rebuild_instance(
self.context, instance, None, None, None, None, None, None,
recreate, False, False, None, scheduled_node, {}, None, [])
mock_set.assert_called_once_with(None, 'failed')
mock_notify_about_instance_usage.assert_called_once_with(
mock.ANY, instance, 'rebuild.error', fault=mock_rebuild.side_effect
@ -5209,7 +5208,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
instance = fake_instance.fake_instance_obj(self.context)
instance.vm_state = vm_states.ACTIVE
ex = exception.ComputeResourcesUnavailable(reason='out of foo')
self.assertRaises(exception.BuildAbortException,
self.assertRaises(messaging.ExpectedException,
self._test_rebuild_ex, instance, ex)
# Make sure the instance vm_state did not change.
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
@ -5256,7 +5255,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
None, recreate=True, on_shared_storage=None,
preserve_ephemeral=False, migration=None,
scheduled_node='fake-node',
limits={}, request_spec=request_spec)
limits={}, request_spec=request_spec, accel_uuids=[])
mock_validate_policy.assert_called_once_with(
elevated_context, instance, {'group': [uuids.group]})
@ -5291,11 +5290,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_validate_policy.side_effect = exc
self.assertRaises(
exception.BuildAbortException, self.compute.rebuild_instance,
messaging.ExpectedException, self.compute.rebuild_instance,
self.context, instance, None, None, None, None, None, None,
recreate=True, on_shared_storage=None, preserve_ephemeral=False,
migration=None, scheduled_node='fake-node', limits={},
request_spec=request_spec)
request_spec=request_spec, accel_uuids=[])
mock_validate_policy.assert_called_once_with(
elevated_context, instance, {'group': [uuids.group]})
@ -5317,9 +5316,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(objects.Instance, 'save'),
mock.patch.object(self.compute, '_set_migration_status'),
) as (mock_get, mock_rebuild, mock_save, mock_set):
self.compute.rebuild_instance(self.context, instance, None, None,
self.compute.rebuild_instance(
self.context, instance, None, None,
None, None, None, None, False,
False, False, None, None, {}, None)
False, False, None, None, {}, None, [])
self.assertFalse(mock_get.called)
self.assertEqual(node, instance.node)
mock_set.assert_called_once_with(None, 'done')
@ -5339,9 +5339,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(self.compute, '_set_migration_status'),
) as (mock_get, mock_rebuild, mock_save, mock_set):
mock_get.return_value.hypervisor_hostname = 'new-node'
self.compute.rebuild_instance(self.context, instance, None, None,
None, None, None, None, True,
False, False, None, None, {}, None)
self.compute.rebuild_instance(
self.context, instance, None, None, None, None, None,
None, True, False, False, None, None, {}, None, [])
mock_get.assert_called_once_with(mock.ANY, self.compute.host)
self.assertEqual('new-node', instance.node)
mock_set.assert_called_once_with(None, 'done')
@ -5423,7 +5423,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
recreate, on_shared_storage,
preserve_ephemeral, {}, {},
self.allocations,
mock.sentinel.mapping)
mock.sentinel.mapping, [])
mock_notify_usage.assert_has_calls(
[mock.call(self.context, instance, "rebuild.start",
@ -5454,7 +5454,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
def _spawn(context, instance, image_meta, injected_files,
admin_password, allocations, network_info=None,
block_device_info=None):
block_device_info=None, accel_info=None):
self.assertEqual(block_device_info['block_device_mapping'],
'shared_block_storage')
@ -5466,12 +5466,15 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.patch.object(objects.Instance, 'save',
return_value=None),
mock.patch.object(self.compute, '_power_off_instance',
return_value=None)
return_value=None),
mock.patch.object(self.compute, '_get_accel_info',
return_value=[])
) as(
mock_destroy,
mock_spawn,
mock_save,
mock_power_off
mock_power_off,
mock_accel_info
):
instance = fake_instance.fake_instance_obj(self.context)
instance.migration_context = None
@ -5522,7 +5525,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
preserve_ephemeral=False, migration=objects.Migration(),
request_spec=objects.RequestSpec(),
allocations=self.allocations,
request_group_resource_providers_mapping=mock.sentinel.mapping)
request_group_resource_providers_mapping=mock.sentinel.mapping,
accel_uuids=[])
self.assertIn('Trusted image certificates provided on host',
six.text_type(ex))

View File

@ -693,12 +693,55 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
reboot_type='type')
def test_rebuild_instance(self):
self._test_compute_api('rebuild_instance', 'cast', new_pass='None',
# With rpcapi 5.12, when a list of accel_uuids is passed as a param,
# that list must be passed to the client. That is tested in
# _test_compute_api with rpc_mock.assert, where expected_kwargs
# must have the accel_uuids.
self._test_compute_api(
'rebuild_instance', 'cast', new_pass='None',
injected_files='None', image_ref='None', orig_image_ref='None',
bdms=[], instance=self.fake_instance_obj, host='new_host',
orig_sys_metadata=None, recreate=True, on_shared_storage=True,
preserve_ephemeral=True, migration=None, node=None,
limits=None, request_spec=None, version='5.0')
limits=None, request_spec=None, accel_uuids=[], version='5.12')
def test_rebuild_instance_old_rpcapi(self):
# With rpcapi < 5.12, accel_uuids must be dropped in the client call.
ctxt = context.RequestContext('fake_user', 'fake_project')
compute_api = compute_rpcapi.ComputeAPI()
compute_api.router.client = mock.Mock()
mock_client = mock.MagicMock()
compute_api.router.client.return_value = mock_client
# Force can_send_version to False, so that 5.0 version is used.
mock_client.can_send_version.return_value = False
mock_cctx = mock.MagicMock()
mock_client.prepare.return_value = mock_cctx
rebuild_args = {
'new_pass': 'admin_password',
'injected_files': 'files_to_inject',
'image_ref': uuids.image_ref,
'orig_image_ref': uuids.orig_image_ref,
'orig_sys_metadata': 'orig_sys_meta',
'bdms': {},
'recreate': False,
'on_shared_storage': False,
'preserve_ephemeral': False,
'request_spec': None,
'migration': None,
'limits': None
}
compute_api.rebuild_instance(
ctxt, instance=self.fake_instance_obj,
accel_uuids=['938af7f9-f136-4e5a-bdbe-3b6feab54311'],
node=None, host=None, **rebuild_args)
mock_client.can_send_version.assert_called_once_with('5.12')
mock_client.prepare.assert_called_with(
server=self.fake_instance_obj.host, version='5.0')
mock_cctx.cast.assert_called_with( # No accel_uuids
ctxt, 'rebuild_instance',
instance=self.fake_instance_obj,
scheduled_node=None, **rebuild_args)
def test_reserve_block_device_name(self):
self.flags(long_rpc_timeout=1234)

View File

@ -390,6 +390,7 @@ class _BaseTaskTestCase(object):
compute_rebuild_args['migration'] = migration
compute_rebuild_args['node'] = node
compute_rebuild_args['limits'] = limits
compute_rebuild_args['accel_uuids'] = []
return rebuild_args, compute_rebuild_args
@ -1780,6 +1781,7 @@ class _BaseTaskTestCase(object):
rebuild_args, _ = self._prepare_rebuild_args(
{'host': None, 'recreate': True})
fake_spec = objects.RequestSpec()
fake_spec.flavor = inst_obj.flavor
rebuild_args['request_spec'] = fake_spec
with test.nested(
@ -1847,6 +1849,7 @@ class _BaseTaskTestCase(object):
fake_selection = objects.Selection(service_host=expected_host,
nodename=expected_node, limits=None)
fake_spec = objects.RequestSpec(ignore_hosts=[uuids.ignored_host])
fake_spec.flavor = inst_obj.flavor
rebuild_args, compute_args = self._prepare_rebuild_args(
{'host': None, 'node': expected_node, 'limits': expected_limits,
'request_spec': fake_spec, 'recreate': True})

View File

@ -286,7 +286,7 @@ class ComputeDriver(object):
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False):
preserve_ephemeral=False, accel_uuids=None):
"""Destroy and re-make this instance.
A 'rebuild' effectively purges all existing data from the system and
@ -323,6 +323,7 @@ class ComputeDriver(object):
attached to the instance.
:param preserve_ephemeral: True if the default ephemeral storage
partition must be preserved on rebuild
:param accel_uuids: Accelerator UUIDs.
"""
raise NotImplementedError()

View File

@ -1670,7 +1670,7 @@ class IronicDriver(virt_driver.ComputeDriver):
admin_password, allocations, bdms, detach_block_devices,
attach_block_devices, network_info=None,
evacuate=False, block_device_info=None,
preserve_ephemeral=False):
preserve_ephemeral=False, accel_uuids=None):
"""Rebuild/redeploy an instance.
This version of rebuild() allows for supporting the option to
@ -1710,7 +1710,7 @@ class IronicDriver(virt_driver.ComputeDriver):
information. Ignored by this driver.
:param preserve_ephemeral: Boolean value; if True the ephemeral
must be preserved on rebuild.
:param accel_uuids: Accelerator UUIDs. Ignored by this driver.
"""
LOG.debug('Rebuild called for instance', instance=instance)