Track error migrations in resource tracker
If rollback_live_migration failed, the migration status is set to 'error', and there might me some resource not be cleaned up like vpmem since rollback is not completed. So we propose to track those 'error' migrations in resource tracker until they are cleaned up by periodic task '_cleanup_incomplete_migrations'. So if rollback_live_migration succeeds, we need to set the migration status to 'failed' which will not be tracked in resource tracker. The 'failed' status is already used for resize to indicated a migration finishing the cleanup. '_cleanup_incomplete_migrations' will also handle failed rollback_live_migration cleanup except for failed resize/revert-resize. Besides, we introduce a new 'cleanup_lingering_instance_resources' virt driver interface to handle lingering instance resources cleanup including vpmem cleanup and whatever we add in the future. Change-Id: I422a907056543f9bf95acbffdd2658438febf801 Partially-Implements: blueprint vpmem-enhancement
This commit is contained in:
parent
b0c529a73d
commit
255b3f2f91
@ -8716,7 +8716,7 @@ class ComputeManager(manager.Manager):
|
||||
@wrap_instance_fault
|
||||
def _rollback_live_migration(self, context, instance,
|
||||
dest, migrate_data=None,
|
||||
migration_status='error',
|
||||
migration_status='failed',
|
||||
source_bdms=None):
|
||||
"""Recovers Instance/volume state from migrating -> running.
|
||||
|
||||
@ -8857,9 +8857,8 @@ class ComputeManager(manager.Manager):
|
||||
phase=fields.NotificationPhase.END,
|
||||
bdms=bdms)
|
||||
|
||||
# TODO(luyao): set migration status to 'failed' but not 'error'
|
||||
# which means rollback_live_migration is done, we have successfully
|
||||
# cleaned up and returned instance back to normal status.
|
||||
# NOTE(luyao): we have cleanup everything and get instance
|
||||
# back to normal status, now set migration status to 'failed'
|
||||
self._set_migration_status(migration, migration_status)
|
||||
|
||||
@wrap_exception()
|
||||
@ -10377,11 +10376,14 @@ class ComputeManager(manager.Manager):
|
||||
|
||||
@periodic_task.periodic_task(spacing=CONF.instance_delete_interval)
|
||||
def _cleanup_incomplete_migrations(self, context):
|
||||
"""Delete instance files on failed resize/revert-resize operation
|
||||
"""Cleanup on failed resize/revert-resize operation and
|
||||
failed rollback live migration operation.
|
||||
|
||||
During resize/revert-resize operation, if that instance gets deleted
|
||||
in-between then instance files might remain either on source or
|
||||
destination compute node because of race condition.
|
||||
During resize/revert-resize operation, or after a failed rollback
|
||||
live migration operation, if that instance gets deleted then instance
|
||||
files might remain either on source or destination compute node and
|
||||
other specific resources might not be cleaned up because of the race
|
||||
condition.
|
||||
"""
|
||||
LOG.debug('Cleaning up deleted instances with incomplete migration ')
|
||||
migration_filters = {'host': CONF.host,
|
||||
@ -10403,21 +10405,29 @@ class ComputeManager(manager.Manager):
|
||||
context, inst_filters, expected_attrs=attrs, use_slave=True)
|
||||
|
||||
for instance in instances:
|
||||
if instance.host != CONF.host:
|
||||
for migration in migrations:
|
||||
if instance.uuid == migration.instance_uuid:
|
||||
# Delete instance files if not cleanup properly either
|
||||
# from the source or destination compute nodes when
|
||||
# the instance is deleted during resizing.
|
||||
self.driver.delete_instance_files(instance)
|
||||
try:
|
||||
migration.status = 'failed'
|
||||
migration.save()
|
||||
except exception.MigrationNotFound:
|
||||
LOG.warning("Migration %s is not found.",
|
||||
migration.id,
|
||||
instance=instance)
|
||||
break
|
||||
if instance.host == CONF.host:
|
||||
continue
|
||||
for migration in migrations:
|
||||
if instance.uuid != migration.instance_uuid:
|
||||
continue
|
||||
self.driver.delete_instance_files(instance)
|
||||
# we are not sure whether the migration_context is applied
|
||||
# during incompleted migrating, we need to apply/revert
|
||||
# migration_context to get instance object content matching
|
||||
# current host.
|
||||
revert = (True if migration.source_compute == CONF.host
|
||||
else False)
|
||||
with instance.mutated_migration_context(revert=revert):
|
||||
self.driver.cleanup_lingering_instance_resources(instance)
|
||||
|
||||
try:
|
||||
migration.status = 'failed'
|
||||
migration.save()
|
||||
except exception.MigrationNotFound:
|
||||
LOG.warning("Migration %s is not found.",
|
||||
migration.id,
|
||||
instance=instance)
|
||||
break
|
||||
|
||||
@messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
|
||||
exception.QemuGuestAgentNotEnabled,
|
||||
|
@ -897,9 +897,9 @@ class ResourceTracker(object):
|
||||
instance_by_uuid = self._update_usage_from_instances(
|
||||
context, instances, nodename)
|
||||
|
||||
# Grab all in-progress migrations:
|
||||
migrations = objects.MigrationList.get_in_progress_by_host_and_node(
|
||||
context, self.host, nodename)
|
||||
# Grab all in-progress migrations and error migrations:
|
||||
migrations = objects.MigrationList.get_in_progress_and_error(
|
||||
context, self.host, nodename)
|
||||
|
||||
self._pair_instances_to_migrations(migrations, instance_by_uuid)
|
||||
self._update_usage_from_migrations(context, migrations, nodename)
|
||||
@ -1338,6 +1338,11 @@ class ResourceTracker(object):
|
||||
|
||||
try:
|
||||
if uuid not in instances:
|
||||
# Track migrating instance even if it is deleted but still
|
||||
# has database record. This kind of instance might be
|
||||
# deleted during unfinished migrating but exist in the
|
||||
# hypervisor.
|
||||
migration._context = context.elevated(read_deleted='yes')
|
||||
instances[uuid] = migration.instance
|
||||
except exception.InstanceNotFound as e:
|
||||
# migration referencing deleted instance
|
||||
|
@ -470,6 +470,14 @@ def migration_get_by_sort_filters(context, sort_keys, sort_dirs, values):
|
||||
values)
|
||||
|
||||
|
||||
def migration_get_in_progress_and_error_by_host_and_node(context, host, node):
|
||||
"""Finds all in progress migrations and error migrations for the given
|
||||
host + node.
|
||||
"""
|
||||
return IMPL.migration_get_in_progress_and_error_by_host_and_node(
|
||||
context, host, node)
|
||||
|
||||
|
||||
####################
|
||||
|
||||
|
||||
|
@ -3204,6 +3204,10 @@ def migration_get_all_by_filters(context, filters,
|
||||
elif "source_compute" in filters:
|
||||
host = filters['source_compute']
|
||||
query = query.filter(models.Migration.source_compute == host)
|
||||
if "node" in filters:
|
||||
node = filters['node']
|
||||
query = query.filter(or_(models.Migration.source_node == node,
|
||||
models.Migration.dest_node == node))
|
||||
if "migration_type" in filters:
|
||||
migtype = filters["migration_type"]
|
||||
query = query.filter(models.Migration.migration_type == migtype)
|
||||
@ -3272,6 +3276,20 @@ def migration_migrate_to_uuid(context, count):
|
||||
return done, done
|
||||
|
||||
|
||||
@pick_context_manager_reader
|
||||
def migration_get_in_progress_and_error_by_host_and_node(context, host, node):
|
||||
return model_query(context, models.Migration).\
|
||||
filter(or_(and_(models.Migration.source_compute == host,
|
||||
models.Migration.source_node == node),
|
||||
and_(models.Migration.dest_compute == host,
|
||||
models.Migration.dest_node == node))).\
|
||||
filter(~models.Migration.status.in_(['confirmed', 'reverted',
|
||||
'failed', 'completed',
|
||||
'cancelled', 'done'])).\
|
||||
options(_joinedload_all('instance.system_metadata')).\
|
||||
all()
|
||||
|
||||
|
||||
########################
|
||||
# User-provided metadata
|
||||
|
||||
|
@ -1047,8 +1047,8 @@ class Instance(base.NovaPersistentObject, base.NovaObject,
|
||||
setattr(self, inst_attr_name, attr_value)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def mutated_migration_context(self):
|
||||
"""Context manager to temporarily apply the migration context.
|
||||
def mutated_migration_context(self, revert=False):
|
||||
"""Context manager to temporarily apply/revert the migration context.
|
||||
|
||||
Calling .save() from within the context manager means that the mutated
|
||||
context will be saved which can cause incorrect resource tracking, and
|
||||
@ -1063,7 +1063,10 @@ class Instance(base.NovaPersistentObject, base.NovaObject,
|
||||
current_values = {}
|
||||
for attr_name in _MIGRATION_CONTEXT_ATTRS:
|
||||
current_values[attr_name] = getattr(self, attr_name)
|
||||
self.apply_migration_context()
|
||||
if revert:
|
||||
self.revert_migration_context()
|
||||
else:
|
||||
self.apply_migration_context()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
|
@ -222,7 +222,9 @@ class MigrationList(base.ObjectListBase, base.NovaObject):
|
||||
# for an instance.
|
||||
# Version 1.4: Added sort_keys, sort_dirs, limit, marker kwargs to
|
||||
# get_by_filters for migrations pagination support.
|
||||
VERSION = '1.4'
|
||||
# Version 1.5: Added a new function to get in progress migrations
|
||||
# and error migrations for a given host + node.
|
||||
VERSION = '1.5'
|
||||
|
||||
fields = {
|
||||
'objects': fields.ListOfObjectsField('Migration'),
|
||||
@ -266,3 +268,11 @@ class MigrationList(base.ObjectListBase, base.NovaObject):
|
||||
context, instance_uuid, migration_type)
|
||||
return base.obj_make_list(context, cls(context), objects.Migration,
|
||||
db_migrations)
|
||||
|
||||
@base.remotable_classmethod
|
||||
def get_in_progress_and_error(cls, context, host, node):
|
||||
db_migrations = \
|
||||
db.migration_get_in_progress_and_error_by_host_and_node(
|
||||
context, host, node)
|
||||
return base.obj_make_list(context, cls(context), objects.Migration,
|
||||
db_migrations)
|
||||
|
@ -351,7 +351,7 @@ class NUMALiveMigrationRollbackTests(NUMALiveMigrationPositiveBase):
|
||||
# rollback test, so server_a is expected to remain on host_a.
|
||||
if pin_dest:
|
||||
self._rpc_pin_host('host_b')
|
||||
self._live_migrate(self.server_a, 'error')
|
||||
self._live_migrate(self.server_a, 'failed')
|
||||
self.assertEqual('host_a', self.get_host(self.server_a['id']))
|
||||
self.assertIsNone(self._get_migration_context(self.server_a['id']))
|
||||
|
||||
@ -442,7 +442,7 @@ class NUMALiveMigrationLegacyBase(NUMALiveMigrationPositiveBase):
|
||||
['completed'])
|
||||
else:
|
||||
final_host = 'source'
|
||||
self._wait_for_migration_status(self.migrating_server, ['error'])
|
||||
self._wait_for_migration_status(self.migrating_server, ['failed'])
|
||||
self.assertEqual(final_host,
|
||||
self.get_host(self.migrating_server['id']))
|
||||
self.assertTrue(self.migrate_stub_ran)
|
||||
|
@ -82,7 +82,7 @@ class TestVolAttachmentsDuringPreLiveMigration(
|
||||
# Migrate the instance and wait until the migration errors out thanks
|
||||
# to our mocked version of pre_live_migration raising
|
||||
# test.TestingException
|
||||
self._live_migrate(server, 'error')
|
||||
self._live_migrate(server, 'failed')
|
||||
|
||||
# Assert that we called the fake pre_live_migration method
|
||||
mock_plm.assert_called_once()
|
||||
|
@ -2870,8 +2870,10 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
|
||||
}
|
||||
self.api.post_server_action(server['id'], post)
|
||||
# The compute manager will put the migration record into error status
|
||||
# when pre_live_migration fails, so wait for that to happen.
|
||||
migration = self._wait_for_migration_status(server, ['error'])
|
||||
# when start _cleanup_pre_live_migration, then set it to failed status
|
||||
# at the end of _rollback_live_migration
|
||||
migration = self._wait_for_migration_status(
|
||||
server, ['error', 'failed'])
|
||||
# The _rollback_live_migration method in the compute manager will reset
|
||||
# the task_state on the instance, so wait for that to happen.
|
||||
server = self._wait_for_server_parameter(
|
||||
|
@ -6251,7 +6251,7 @@ class ComputeTestCase(BaseTestCase,
|
||||
self.assertEqual('src_host', instance.host)
|
||||
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
|
||||
self.assertIsNone(instance.task_state)
|
||||
self.assertEqual('error', migration.status)
|
||||
self.assertEqual('failed', migration.status)
|
||||
mock_get_disk.assert_called()
|
||||
mock_pre.assert_called_once_with(c,
|
||||
instance, True, mock_get_disk.return_value, dest_host,
|
||||
@ -6651,7 +6651,7 @@ class ComputeTestCase(BaseTestCase,
|
||||
|
||||
_test()
|
||||
|
||||
self.assertEqual(migration_status or 'error', migration.status)
|
||||
self.assertEqual(migration_status or 'failed', migration.status)
|
||||
self.assertEqual(0, instance.progress)
|
||||
|
||||
def test_rollback_live_migration_set_migration_status(self):
|
||||
|
@ -2463,13 +2463,14 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
mock_delete.assert_has_calls([mock.call(mock.ANY),
|
||||
mock.call(mock.ANY)])
|
||||
|
||||
@mock.patch.object(objects.Instance, 'mutated_migration_context')
|
||||
@mock.patch.object(objects.Migration, 'save')
|
||||
@mock.patch.object(objects.MigrationList, 'get_by_filters')
|
||||
@mock.patch.object(objects.InstanceList, 'get_by_filters')
|
||||
def _test_cleanup_incomplete_migrations(self, inst_host,
|
||||
mock_inst_get_by_filters,
|
||||
mock_migration_get_by_filters,
|
||||
mock_save):
|
||||
mock_save, mock_mutated_mgr_ctxt):
|
||||
def fake_inst(context, uuid, host):
|
||||
inst = objects.Instance(context)
|
||||
inst.uuid = uuid
|
||||
@ -2498,7 +2499,11 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
mock_migration_get_by_filters.return_value = fake_migrations
|
||||
mock_inst_get_by_filters.return_value = fake_instances
|
||||
|
||||
with mock.patch.object(self.compute.driver, 'delete_instance_files'):
|
||||
with test.nested(
|
||||
mock.patch.object(self.compute.driver, 'delete_instance_files'),
|
||||
mock.patch.object(self.compute.driver,
|
||||
'cleanup_lingering_instance_resources')
|
||||
) as (mock_delete_files, mock_cleanup_resources):
|
||||
self.compute._cleanup_incomplete_migrations(self.context)
|
||||
|
||||
# Ensure that migration status is set to 'failed' after instance
|
||||
|
@ -585,7 +585,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_disabled(self, get_mock, migr_mock, get_cn_mock, pci_mock,
|
||||
instance_pci_mock):
|
||||
@ -618,7 +618,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_no_instances_no_migrations_no_reserved(self, get_mock, migr_mock,
|
||||
get_cn_mock, pci_mock,
|
||||
@ -670,7 +670,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
|
||||
'_remove_deleted_instances_allocations')
|
||||
@ -699,7 +699,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
|
||||
'_init_compute_node', return_value=True)
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
|
||||
'_remove_deleted_instances_allocations')
|
||||
@ -729,7 +729,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_no_instances_no_migrations_reserved_disk_ram_and_cpu(
|
||||
self, get_mock, migr_mock, get_cn_mock, pci_mock,
|
||||
@ -771,7 +771,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_some_instances_no_migrations(self, get_mock, migr_mock,
|
||||
get_cn_mock, pci_mock,
|
||||
@ -820,7 +820,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_orphaned_instances_no_migrations(self, get_mock, migr_mock,
|
||||
get_cn_mock, pci_mock,
|
||||
@ -886,7 +886,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_no_instances_source_migration(self, get_mock, get_inst_mock,
|
||||
@ -952,7 +952,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_no_instances_dest_migration(self, get_mock, get_inst_mock,
|
||||
@ -1015,7 +1015,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_no_instances_dest_evacuation(self, get_mock, get_inst_mock,
|
||||
@ -1076,7 +1076,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.MigrationContext.get_by_instance_uuid',
|
||||
return_value=None)
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_some_instances_source_and_dest_migration(self, get_mock,
|
||||
@ -1140,6 +1140,69 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
actual_resources))
|
||||
update_mock.assert_called_once()
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
return_value=False)
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
|
||||
return_value=objects.InstancePCIRequests(requests=[]))
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_no_instances_err_migration(
|
||||
self, get_mock, get_inst_mock, migr_mock, get_cn_mock, pci_mock,
|
||||
instance_pci_mock, mock_is_volume_backed_instance
|
||||
):
|
||||
# We test the behavior of update_available_resource() when
|
||||
# there is an error migration that involves this compute node
|
||||
# as the destination host not the source host, and the resource
|
||||
# tracker does not yet have any instances assigned to it. This is
|
||||
# the case when a migration to this compute host from another host
|
||||
# is in error, the resources claimed on destination host might not
|
||||
# be cleaned up, so the resource tracker must reserve the resources
|
||||
# for the error migration.
|
||||
|
||||
# Setup virt resources to match used resources to number
|
||||
# of defined instances on the hypervisor
|
||||
virt_resources = copy.deepcopy(_VIRT_DRIVER_AVAIL_RESOURCES)
|
||||
virt_resources.update(
|
||||
vcpus_used=2, memory_mb_used=256, local_gb_used=5)
|
||||
self._setup_rt(virt_resources=virt_resources)
|
||||
|
||||
get_mock.return_value = []
|
||||
migr_obj = _MIGRATION_FIXTURES['dest-only']
|
||||
migr_obj.status = 'error'
|
||||
# in-process and error migrations
|
||||
migr_mock.return_value = [migr_obj]
|
||||
inst_uuid = migr_obj.instance_uuid
|
||||
instance = _MIGRATION_INSTANCE_FIXTURES[inst_uuid].obj_clone()
|
||||
get_inst_mock.return_value = instance
|
||||
get_cn_mock.return_value = _COMPUTE_NODE_FIXTURES[0]
|
||||
instance.migration_context = _MIGRATION_CONTEXT_FIXTURES[inst_uuid]
|
||||
|
||||
update_mock = self._update_available_resources()
|
||||
|
||||
get_cn_mock.assert_called_once_with(mock.ANY, _HOSTNAME, _NODENAME)
|
||||
expected_resources = copy.deepcopy(_COMPUTE_NODE_FIXTURES[0])
|
||||
vals = {
|
||||
'free_disk_gb': 1,
|
||||
'local_gb': 6,
|
||||
'free_ram_mb': 256, # 512 total - 256 for possible confirm of new
|
||||
'memory_mb_used': 256, # 256 possible confirmed amount
|
||||
'vcpus_used': 2,
|
||||
'local_gb_used': 5,
|
||||
'memory_mb': 512,
|
||||
'current_workload': 0,
|
||||
'vcpus': 4,
|
||||
'running_vms': 0
|
||||
}
|
||||
_update_compute_node(expected_resources, **vals)
|
||||
actual_resources = update_mock.call_args[0][1]
|
||||
self.assertTrue(
|
||||
obj_base.obj_equal_prims(expected_resources, actual_resources))
|
||||
update_mock.assert_called_once()
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
new=mock.Mock(return_value=False))
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
@ -1147,7 +1210,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.MigrationContext.get_by_instance_uuid',
|
||||
new=mock.Mock(return_value=None))
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_populate_assigned_resources(self, mock_get_instances,
|
||||
@ -1199,7 +1262,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.MigrationContext.get_by_instance_uuid',
|
||||
new=mock.Mock(return_value=None))
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.Instance.get_by_uuid')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_check_resources_startup_success(self, mock_get_instances,
|
||||
@ -1240,7 +1303,7 @@ class TestUpdateAvailableResources(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
new=mock.Mock(return_value=objects.PciDeviceList()))
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
def test_check_resources_startup_fail(self, mock_get_instances,
|
||||
mock_get_migrations,
|
||||
@ -1949,7 +2012,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
self.assertIsInstance(claim, claims.NopClaim)
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
def test_update_usage_with_claim(self, migr_mock, check_bfv_mock):
|
||||
# Test that RT.update_usage() only changes the compute node
|
||||
# resources if there has been a claim first.
|
||||
@ -1989,7 +2052,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
self.assertTrue(obj_base.obj_equal_prims(expected, cn))
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
def test_update_usage_removed(self, migr_mock, check_bfv_mock):
|
||||
# Test that RT.update_usage() removes the instance when update is
|
||||
# called in a removed state
|
||||
@ -2065,7 +2128,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
self.assertEqual(0, len(self.rt.assigned_resources[cn.uuid][rc]))
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
def test_claim(self, migr_mock, check_bfv_mock):
|
||||
self.instance.pci_requests = objects.InstancePCIRequests(requests=[])
|
||||
check_bfv_mock.return_value = False
|
||||
@ -2105,7 +2168,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.pci.stats.PciDeviceStats.support_requests',
|
||||
return_value=True)
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
def test_claim_with_pci(self, migr_mock, pci_stats_mock,
|
||||
check_bfv_mock):
|
||||
# Test that a claim involving PCI requests correctly claims
|
||||
@ -2238,7 +2301,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
|
||||
'_sync_compute_service_disabled_trait', new=mock.Mock())
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_claim_abort_context_manager(self, save_mock, migr_mock,
|
||||
check_bfv_mock):
|
||||
@ -2282,7 +2345,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
|
||||
'_sync_compute_service_disabled_trait', new=mock.Mock())
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_claim_abort(self, save_mock, migr_mock, check_bfv_mock):
|
||||
self.instance.pci_requests = objects.InstancePCIRequests(requests=[])
|
||||
@ -2324,7 +2387,7 @@ class TestInstanceClaim(BaseTestCase):
|
||||
self.assertEqual(0, cn.running_vms)
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_claim_numa(self, save_mock, migr_mock, check_bfv_mock):
|
||||
self.instance.pci_requests = objects.InstancePCIRequests(requests=[])
|
||||
@ -2360,7 +2423,7 @@ class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_resize_claim_same_host(self, save_mock, get_mock, migr_mock,
|
||||
@ -2465,7 +2528,7 @@ class TestResize(BaseTestCase):
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
|
||||
return_value=_COMPUTE_NODE_FIXTURES[0])
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node',
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error',
|
||||
return_value=[])
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node',
|
||||
return_value=[])
|
||||
@ -2473,7 +2536,7 @@ class TestResize(BaseTestCase):
|
||||
def _test_instance_build_resize(self,
|
||||
save_mock,
|
||||
get_by_host_and_node_mock,
|
||||
get_in_progress_by_host_and_node_mock,
|
||||
get_in_progress_and_error_mock,
|
||||
get_by_host_and_nodename_mock,
|
||||
pci_get_by_compute_node_mock,
|
||||
pci_get_by_instance_mock,
|
||||
@ -2634,7 +2697,7 @@ class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.pci.request.get_pci_requests_from_flavor')
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node')
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_resize_claim_dest_host_with_pci(self, save_mock, get_mock,
|
||||
@ -2790,7 +2853,7 @@ class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_resize_claim_two_instances(self, save_mock, get_mock, migr_mock,
|
||||
@ -2922,7 +2985,7 @@ class TestRebuild(BaseTestCase):
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
return_value=objects.PciDeviceList())
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_by_host_and_node')
|
||||
@mock.patch('nova.objects.MigrationList.get_in_progress_and_error')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_rebuild_claim(self, save_mock, get_mock, migr_mock, get_cn_mock,
|
||||
@ -3095,13 +3158,13 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
|
||||
instance_uuid='some_uuid',
|
||||
)
|
||||
self._setup_rt()
|
||||
self.rt._update_usage_from_migrations(mock.sentinel.ctx, [migration],
|
||||
ctx = mock.MagicMock()
|
||||
self.rt._update_usage_from_migrations(ctx, [migration],
|
||||
_NODENAME)
|
||||
mock_get_instance.assert_called_once_with(mock.sentinel.ctx,
|
||||
'some_uuid',
|
||||
expected_attrs=[
|
||||
'migration_context',
|
||||
'flavor'])
|
||||
mock_get_instance.assert_called_once_with(
|
||||
ctx.elevated.return_value, 'some_uuid',
|
||||
expected_attrs=['migration_context', 'flavor'])
|
||||
ctx.elevated.assert_called_once_with(read_deleted='yes')
|
||||
self.assertFalse(mock_update_usage.called)
|
||||
|
||||
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
|
||||
@ -3149,17 +3212,16 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
|
||||
mig_list = objects.MigrationList(objects=migrations)
|
||||
instance.migration_context = objects.MigrationContext(
|
||||
migration_id=mig2.id)
|
||||
self.rt._update_usage_from_migrations(mock.sentinel.ctx, mig_list,
|
||||
_NODENAME)
|
||||
upd_mock.assert_called_once_with(mock.sentinel.ctx, instance, mig2,
|
||||
_NODENAME)
|
||||
ctxt = mock.MagicMock()
|
||||
self.rt._update_usage_from_migrations(ctxt, mig_list, _NODENAME)
|
||||
upd_mock.assert_called_once_with(ctxt, instance, mig2, _NODENAME)
|
||||
|
||||
upd_mock.reset_mock()
|
||||
mig2.updated_at = None
|
||||
instance.migration_context.migration_id = mig1.id
|
||||
self.rt._update_usage_from_migrations(mock.sentinel.ctx, mig_list,
|
||||
self.rt._update_usage_from_migrations(ctxt, mig_list,
|
||||
_NODENAME)
|
||||
upd_mock.assert_called_once_with(mock.sentinel.ctx, instance, mig1,
|
||||
upd_mock.assert_called_once_with(ctxt, instance, mig1,
|
||||
_NODENAME)
|
||||
|
||||
@mock.patch('nova.objects.migration.Migration.save')
|
||||
@ -3188,8 +3250,8 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
|
||||
id=1,
|
||||
instance=instance)
|
||||
mig_list = objects.MigrationList(objects=[mig1])
|
||||
self.rt._update_usage_from_migrations(mock.sentinel.ctx, mig_list,
|
||||
_NODENAME)
|
||||
ctxt = mock.MagicMock()
|
||||
self.rt._update_usage_from_migrations(ctxt, mig_list, _NODENAME)
|
||||
self.assertFalse(upd_mock.called)
|
||||
self.assertEqual(mig1.status, "error")
|
||||
|
||||
@ -3208,10 +3270,11 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
|
||||
instance_uuid=uuids.instance, id=1, instance=instance)
|
||||
for state in task_states.rebuild_states + task_states.resizing_states:
|
||||
instance.task_state = state
|
||||
ctxt = mock.MagicMock()
|
||||
self.rt._update_usage_from_migrations(
|
||||
mock.sentinel.ctx, [migration], _NODENAME)
|
||||
ctxt, [migration], _NODENAME)
|
||||
mock_update_usage.assert_called_once_with(
|
||||
mock.sentinel.ctx, instance, migration, _NODENAME)
|
||||
ctxt, instance, migration, _NODENAME)
|
||||
mock_update_usage.reset_mock()
|
||||
|
||||
@mock.patch('nova.objects.migration.Migration.save')
|
||||
@ -3228,10 +3291,11 @@ class TestUpdateUsageFromMigrations(BaseTestCase):
|
||||
dest_compute=_HOSTNAME, dest_node=_NODENAME,
|
||||
instance_uuid=uuids.instance, id=1, instance=instance,
|
||||
migration_type='live-migration')
|
||||
ctxt = mock.MagicMock()
|
||||
self.rt._update_usage_from_migrations(
|
||||
mock.sentinel.ctx, [migration], _NODENAME)
|
||||
ctxt, [migration], _NODENAME)
|
||||
mock_update_usage.assert_called_once_with(
|
||||
mock.sentinel.ctx, instance, migration, _NODENAME)
|
||||
ctxt, instance, migration, _NODENAME)
|
||||
|
||||
|
||||
class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
|
@ -1441,6 +1441,13 @@ class _TestInstanceObject(object):
|
||||
getattr(inst.migration_context, 'new_' + attr_name))
|
||||
self.assertIs(inst_value, migration_context_value)
|
||||
|
||||
with inst.mutated_migration_context(revert=True):
|
||||
for attr_name in instance._MIGRATION_CONTEXT_ATTRS:
|
||||
inst_value = getattr(inst, attr_name)
|
||||
migration_context_value = (
|
||||
getattr(inst.migration_context, 'old_' + attr_name))
|
||||
self.assertIs(inst_value, migration_context_value)
|
||||
|
||||
for attr_name in instance._MIGRATION_CONTEXT_ATTRS:
|
||||
inst_value = getattr(inst, attr_name)
|
||||
self.assertIs(expected_objs[attr_name], inst_value)
|
||||
|
@ -1105,7 +1105,7 @@ object_data = {
|
||||
'MemoryDiagnostics': '1.0-2c995ae0f2223bb0f8e523c5cc0b83da',
|
||||
'Migration': '1.7-bd45b232fd7c95cd79ae9187e10ef582',
|
||||
'MigrationContext': '1.2-89f10a83999f852a489962ae37d8a026',
|
||||
'MigrationList': '1.4-983a9c29d4f1e747ce719dc9063b729b',
|
||||
'MigrationList': '1.5-36793f8d65bae421bd5564d09a4de7be',
|
||||
'MonitorMetric': '1.1-53b1db7c4ae2c531db79761e7acc52ba',
|
||||
'MonitorMetricList': '1.1-15ecf022a68ddbb8c2a6739cfc9f8f5e',
|
||||
'NUMACell': '1.4-7695303e820fa855d76954be2eb2680e',
|
||||
|
@ -1856,6 +1856,17 @@ class ComputeDriver(object):
|
||||
"""
|
||||
return False
|
||||
|
||||
def cleanup_lingering_instance_resources(self, instance):
|
||||
"""Cleanup resources occupied by lingering instance.
|
||||
|
||||
For example, cleanup other specific resources or whatever we
|
||||
add in the future.
|
||||
|
||||
:param instance: nova.objects.instance.Instance
|
||||
:returns: True if the cleanup is successful, else false.
|
||||
"""
|
||||
return True
|
||||
|
||||
|
||||
def load_compute_driver(virtapi, compute_driver=None):
|
||||
"""Load a compute driver module.
|
||||
|
@ -1482,6 +1482,13 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
|
||||
self._undefine_domain(instance)
|
||||
|
||||
def cleanup_lingering_instance_resources(self, instance):
|
||||
# zero the data on backend pmem device, if fails
|
||||
# it will raise an exception
|
||||
vpmems = self._get_vpmems(instance)
|
||||
if vpmems:
|
||||
self._cleanup_vpmems(vpmems)
|
||||
|
||||
def _cleanup_vpmems(self, vpmems):
|
||||
for vpmem in vpmems:
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user