Delete dangling bdms
On reboot, check the instance volume status on the cinder side. Verify if volume exists and cinder has an attachment ID, else delete its BDMS data from nova DB and vice versa. Updated existing test cases to use CinderFixture while rebooting as reboot calls get_all_attachments Implements: blueprint https://blueprints.launchpad.net/nova/+spec/cleanup-dangling-volume-attachments Closes-Bug: 2019078 Change-Id: Ieb619d4bfe0a6472aefb118b58283d7ad8d24c29
This commit is contained in:
parent
c33a9ccf4c
commit
9d5935d007
@ -81,6 +81,18 @@ hosting the instance. This could even include refreshing different elements of
|
||||
the attachment to ensure the latest configuration changes within the
|
||||
environment have been applied.
|
||||
|
||||
.. note::
|
||||
|
||||
If you encounter any dangling volume attachments in either the Nova or
|
||||
Cinder databases, a ``hard reboot`` of the affected instance can help
|
||||
resolve the issue. During the instance reboot process, Nova performs
|
||||
a synchronization mechanism that verifies the availability of volume
|
||||
attachments in the Cinder database. Any missing or dangling/stale
|
||||
attachments are detected and deleted from both Nova and Cinder during
|
||||
``hard reboot`` process.
|
||||
|
||||
|
||||
|
||||
Checking an existing attachment
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -4174,6 +4174,56 @@ class ComputeManager(manager.Manager):
|
||||
accel_info = []
|
||||
return accel_info
|
||||
|
||||
def _delete_dangling_bdms(self, context, instance, bdms):
|
||||
"""Deletes dangling or stale attachments for volume from
|
||||
Nova and Cinder DB so both service DBs can be in sync.
|
||||
|
||||
Retrieves volume attachments from the Nova block_device_mapping
|
||||
table and verifies them with the Cinder volume_attachment table.
|
||||
If attachment is not present in any one of the DBs, delete
|
||||
attachments from the other DB.
|
||||
|
||||
:param context: The nova request context.
|
||||
:param instance: instance object.
|
||||
:param instance: BlockDeviceMappingList list object.
|
||||
"""
|
||||
|
||||
# attachments present in nova DB, ones nova knows about
|
||||
nova_attachments = []
|
||||
bdms_to_delete = []
|
||||
for bdm in bdms.objects:
|
||||
if bdm.volume_id and bdm.source_type == 'volume' and \
|
||||
bdm.destination_type == 'volume':
|
||||
try:
|
||||
self.volume_api.attachment_get(context, bdm.attachment_id)
|
||||
except exception.VolumeAttachmentNotFound:
|
||||
LOG.info(
|
||||
f"Removing stale volume attachment "
|
||||
f"'{bdm.attachment_id}' from instance for "
|
||||
f"volume '{bdm.volume_id}'.", instance=instance)
|
||||
bdm.destroy()
|
||||
bdms_to_delete.append(bdm)
|
||||
else:
|
||||
nova_attachments.append(bdm.attachment_id)
|
||||
|
||||
cinder_attachments = self.volume_api.attachment_get_all(
|
||||
context, instance.uuid)
|
||||
cinder_attachments = [each['id'] for each in cinder_attachments]
|
||||
|
||||
if len(set(cinder_attachments) - set(nova_attachments)):
|
||||
LOG.info(
|
||||
"Removing stale volume attachments of instance from "
|
||||
"Cinder", instance=instance)
|
||||
for each_attach in set(cinder_attachments) - set(nova_attachments):
|
||||
# delete only cinder known attachments, from cinder DB.
|
||||
LOG.debug(
|
||||
f"Removing attachment '{each_attach}'", instance=instance)
|
||||
self.volume_api.attachment_delete(context, each_attach)
|
||||
|
||||
# refresh bdms object
|
||||
for bdm in bdms_to_delete:
|
||||
bdms.objects.remove(bdm)
|
||||
|
||||
@wrap_exception()
|
||||
@reverts_task_state
|
||||
@wrap_instance_event(prefix='compute')
|
||||
@ -4203,6 +4253,9 @@ class ComputeManager(manager.Manager):
|
||||
|
||||
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
|
||||
context, instance.uuid)
|
||||
|
||||
self._delete_dangling_bdms(context, instance, bdms)
|
||||
|
||||
block_device_info = self._get_instance_block_device_info(
|
||||
context, instance, bdms=bdms)
|
||||
|
||||
@ -5636,7 +5689,7 @@ class ComputeManager(manager.Manager):
|
||||
# in case _prep_resize fails.
|
||||
instance_state = instance.vm_state
|
||||
with self._error_out_instance_on_exception(
|
||||
context, instance, instance_state=instance_state),\
|
||||
context, instance, instance_state=instance_state), \
|
||||
errors_out_migration_ctxt(migration):
|
||||
|
||||
self._send_prep_resize_notifications(
|
||||
|
@ -66,10 +66,8 @@ class TestAttachedVolumes(
|
||||
# verify if volume attachment is present at nova
|
||||
bdm_list = self._get_bdm_list(server)
|
||||
|
||||
# TODO(auniyal): Reboot should remove stale bdms
|
||||
# after fix bdm should not have any volume
|
||||
self.assertEqual(1, len(bdm_list))
|
||||
self.assertEqual(volume_id, bdm_list[0][0])
|
||||
self.assertEqual(0, len(bdm_list))
|
||||
|
||||
def test_delete_multiple_stale_attachment_from_nova(self):
|
||||
volumes = [
|
||||
@ -109,13 +107,12 @@ class TestAttachedVolumes(
|
||||
bdm_list_2 = self._get_bdm_list(server)
|
||||
bdm_attc_vols = [bdm[0] for bdm in bdm_list_2]
|
||||
|
||||
# after fix only 2 volumes should be attached instead 4
|
||||
self.assertEqual(4, len(bdm_list_2))
|
||||
self.assertEqual(2, len(bdm_list_2))
|
||||
|
||||
self.assertIn(bdm_list[0][0], bdm_attc_vols)
|
||||
self.assertIn(bdm_list[1][0], bdm_attc_vols)
|
||||
self.assertNotIn(bdm_list[1][0], bdm_attc_vols)
|
||||
self.assertIn(bdm_list[2][0], bdm_attc_vols)
|
||||
self.assertIn(bdm_list[3][0], bdm_attc_vols)
|
||||
self.assertNotIn(bdm_list[3][0], bdm_attc_vols)
|
||||
|
||||
def test_delete_multiple_stale_attachment_from_cinder(self):
|
||||
volume_id = 'aeb9b5f4-1fe9-4964-ab65-5e168be4de8e'
|
||||
@ -149,9 +146,7 @@ class TestAttachedVolumes(
|
||||
# rebooting server should remove stale attachments
|
||||
server = self._reboot_server(server, hard=True)
|
||||
|
||||
# TODO(auniyal): Reboot should remove only stale attachments
|
||||
# from cinder too
|
||||
# verify if how many volume attachments are present at cinder
|
||||
attached_volume_ids = self.cinder.attachment_ids_for_instance(
|
||||
server['id'])
|
||||
self.assertEqual(attachments + 1, len(attached_volume_ids))
|
||||
self.assertEqual(1, len(attached_volume_ids))
|
||||
|
@ -1326,6 +1326,7 @@ class ProviderUsageBaseTestCase(test.TestCase, PlacementInstanceHelperMixin):
|
||||
self.policy = self.useFixture(nova_fixtures.RealPolicyFixture())
|
||||
self.neutron = self.useFixture(nova_fixtures.NeutronFixture(self))
|
||||
self.glance = self.useFixture(nova_fixtures.GlanceFixture(self))
|
||||
self.useFixture(nova_fixtures.CinderFixture(self))
|
||||
self.placement = self.useFixture(func_fixtures.PlacementFixture()).api
|
||||
self.useFixture(nova_fixtures.AllServicesCurrent())
|
||||
|
||||
|
@ -27,6 +27,7 @@ class RegressionTest1835822(
|
||||
self.useFixture(nova_fixtures.GlanceFixture(self))
|
||||
self.useFixture(nova_fixtures.NeutronFixture(self))
|
||||
self.useFixture(func_fixtures.PlacementFixture())
|
||||
self.useFixture(nova_fixtures.CinderFixture(self))
|
||||
|
||||
self.api = self.useFixture(nova_fixtures.OSAPIFixture(
|
||||
api_version='v2.1')).api
|
||||
|
@ -85,6 +85,7 @@ class InstanceActionEventFaultsTestCase(
|
||||
self.useFixture(nova_fixtures.NeutronFixture(self))
|
||||
self.useFixture(func_fixtures.PlacementFixture())
|
||||
self.useFixture(nova_fixtures.RealPolicyFixture())
|
||||
self.useFixture(nova_fixtures.CinderFixture(self))
|
||||
|
||||
# Start the compute services.
|
||||
self.start_service('conductor')
|
||||
|
@ -33,6 +33,7 @@ class ServerFaultTestCase(test.TestCase,
|
||||
self.useFixture(func_fixtures.PlacementFixture())
|
||||
self.useFixture(nova_fixtures.GlanceFixture(self))
|
||||
self.useFixture(nova_fixtures.RealPolicyFixture())
|
||||
self.useFixture(nova_fixtures.CinderFixture(self))
|
||||
|
||||
# Start the compute services.
|
||||
self.start_service('conductor')
|
||||
|
@ -1471,6 +1471,132 @@ class ComputeVolumeTestCase(BaseTestCase):
|
||||
self.assertEqual(1, attach_block_devices.call_count)
|
||||
get_swap.assert_called_once_with([])
|
||||
|
||||
def _test__delete_dangling_bdms(
|
||||
self, instance, nova_bdms, cinder_attachments, valid=False):
|
||||
with test.nested(
|
||||
mock.patch.object(objects.BlockDeviceMappingList,
|
||||
'get_by_instance_uuid'),
|
||||
mock.patch.object(objects.BlockDeviceMapping, 'destroy'),
|
||||
mock.patch.object(cinder.API, 'attachment_get'),
|
||||
mock.patch.object(cinder.API, 'attachment_get_all'),
|
||||
mock.patch.object(cinder.API, 'attachment_delete')
|
||||
) as (mock_get_bdms, mock_destroy, mock_attach_get,
|
||||
mock_all_attachments, mock_attachment_delete):
|
||||
mock_get_bdms.return_value = nova_bdms
|
||||
mock_all_attachments.return_value = cinder_attachments
|
||||
|
||||
if not valid:
|
||||
err = exception.VolumeAttachmentNotFound(
|
||||
attachment_id=None)
|
||||
mock_attach_get.side_effect = err
|
||||
|
||||
self.compute._delete_dangling_bdms(
|
||||
self.context, instance, nova_bdms)
|
||||
|
||||
return mock_destroy, mock_attachment_delete
|
||||
|
||||
def test_dangling_bdms_nothing_to_delete(self):
|
||||
"""no bdm, no attachments"""
|
||||
instance = self._create_fake_instance_obj()
|
||||
bdms = objects.BlockDeviceMappingList(objects=[])
|
||||
mock_dstr, mock_atach_del = self._test__delete_dangling_bdms(
|
||||
instance, bdms, [])
|
||||
self.assertTrue(mock_dstr.assert_not_called)
|
||||
self.assertTrue(mock_atach_del.assert_not_called)
|
||||
|
||||
def test_dangling_bdms_delete_from_bdm(self):
|
||||
"""valid source type:
|
||||
able to retrieve from valid target attachmnet from cinder
|
||||
bdm should not get deleted.
|
||||
there is a 'valid' flag passed while
|
||||
calling _test__delete_dangling_bdms
|
||||
|
||||
invalid source type:
|
||||
attachment is of image type, bdm should not get deleted
|
||||
"""
|
||||
instance = self._create_fake_instance_obj()
|
||||
bdms = objects.BlockDeviceMappingList(objects=[
|
||||
objects.BlockDeviceMapping(
|
||||
**fake_block_device.AnonFakeDbBlockDeviceDict(
|
||||
{
|
||||
'instance_uuid': instance.uuid,
|
||||
'volume_id': uuids.fake_vol1,
|
||||
'attachment_id': uuids.fake_attachment_1,
|
||||
'source_type': 'volume',
|
||||
'destination_type': 'volume'})),
|
||||
objects.BlockDeviceMapping(
|
||||
**fake_block_device.AnonFakeDbBlockDeviceDict(
|
||||
{
|
||||
'instance_uuid': instance.uuid,
|
||||
'volume_id': uuids.fake_vol2,
|
||||
'attachment_id': uuids.fake_attachment_2,
|
||||
'source_type': 'image',
|
||||
'destination_type': 'volume'}))
|
||||
])
|
||||
|
||||
mock_destroy, _ = self._test__delete_dangling_bdms(
|
||||
instance, bdms, [], True)
|
||||
# bdm.destroy never gets called
|
||||
self.assertFalse(mock_destroy.called)
|
||||
self.assertEqual(mock_destroy.call_count, 0)
|
||||
# u_bdms are valid bdms, both image and volume are valid bdm
|
||||
self.assertEqual(len(bdms), 2)
|
||||
|
||||
def test_dangling_bdms_delete_from_multi_bdm(self):
|
||||
"""nova has bdms but they are not present at cinder side
|
||||
both bdms should be deleted
|
||||
"""
|
||||
instance = self._create_fake_instance_obj()
|
||||
bdms = objects.BlockDeviceMappingList(objects=[
|
||||
objects.BlockDeviceMapping(
|
||||
**fake_block_device.AnonFakeDbBlockDeviceDict(
|
||||
{
|
||||
'instance_uuid': instance.uuid,
|
||||
'volume_id': uuids.fake_vol1,
|
||||
'attachment_id': uuids.fake_attachment_1,
|
||||
'source_type': 'volume',
|
||||
'destination_type': 'volume'})),
|
||||
objects.BlockDeviceMapping(
|
||||
**fake_block_device.AnonFakeDbBlockDeviceDict(
|
||||
{
|
||||
'instance_uuid': instance.uuid,
|
||||
'volume_id': uuids.fake_vol2,
|
||||
'attachment_id': uuids.fake_attachment_2,
|
||||
'source_type': 'volume',
|
||||
'destination_type': 'volume'}))
|
||||
])
|
||||
|
||||
mock_destroy, _ = self._test__delete_dangling_bdms(
|
||||
instance, bdms, [])
|
||||
self.assertTrue(mock_destroy.called)
|
||||
self.assertEqual(mock_destroy.call_count, 2)
|
||||
self.assertEqual(len(bdms), 0)
|
||||
|
||||
def test_dangling_bdms_delete_cinder_attachments(self):
|
||||
"""out of 2 one cinder attachment is present in nova side"""
|
||||
instance = self._create_fake_instance_obj()
|
||||
bdms = objects.BlockDeviceMappingList(objects=[
|
||||
objects.BlockDeviceMapping(
|
||||
**fake_block_device.AnonFakeDbBlockDeviceDict(
|
||||
{
|
||||
'instance_uuid': instance.uuid,
|
||||
'volume_id': uuids.fake_vol1,
|
||||
'attachment_id': uuids.fake_attachment_1,
|
||||
'source_type': 'volume',
|
||||
'destination_type': 'volume'})),
|
||||
])
|
||||
|
||||
cinder_attachments = [
|
||||
{'id': uuids.fake_attachment_1},
|
||||
{'id': 2},
|
||||
]
|
||||
_, mock_attachment_delete = self._test__delete_dangling_bdms(
|
||||
instance, bdms, cinder_attachments, True)
|
||||
|
||||
self.assertTrue(mock_attachment_delete.call_count, 1)
|
||||
self.assertEqual(mock_attachment_delete.call_args_list[0][0][1], 2)
|
||||
self.assertEqual(len(bdms), 1)
|
||||
|
||||
|
||||
class ComputeTestCase(BaseTestCase,
|
||||
test_diagnostics.DiagnosticsComparisonMixin,
|
||||
@ -2897,6 +3023,8 @@ class ComputeTestCase(BaseTestCase,
|
||||
reimage_boot_volume=False, target_state=None)
|
||||
self.compute.terminate_instance(self.context, instance, [])
|
||||
|
||||
@mock.patch.object(compute_manager.ComputeManager,
|
||||
'_delete_dangling_bdms')
|
||||
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
|
||||
@mock.patch.object(compute_manager.ComputeManager,
|
||||
'_get_instance_block_device_info')
|
||||
@ -2908,9 +3036,9 @@ class ComputeTestCase(BaseTestCase,
|
||||
@mock.patch('nova.compute.utils.notify_about_instance_action')
|
||||
def _test_reboot(self, soft, mock_notify_action, mock_get_power,
|
||||
mock_get_orig, mock_update, mock_notify_usage,
|
||||
mock_get_blk, mock_get_bdms, test_delete=False,
|
||||
test_unrescue=False, fail_reboot=False,
|
||||
fail_running=False):
|
||||
mock_get_blk, mock_get_bdms, mock_del_stale_bdms,
|
||||
test_delete=False, test_unrescue=False,
|
||||
fail_reboot=False, fail_running=False):
|
||||
reboot_type = soft and 'SOFT' or 'HARD'
|
||||
task_pending = (soft and task_states.REBOOT_PENDING or
|
||||
task_states.REBOOT_PENDING_HARD)
|
||||
@ -3127,6 +3255,8 @@ class ComputeTestCase(BaseTestCase,
|
||||
def test_reboot_hard_and_delete_and_rescued(self):
|
||||
self._test_reboot(False, test_delete=True, test_unrescue=True)
|
||||
|
||||
@mock.patch.object(compute_manager.ComputeManager,
|
||||
'_delete_dangling_bdms')
|
||||
@mock.patch('nova.virt.fake.FakeDriver.reboot')
|
||||
@mock.patch('nova.objects.instance.Instance.save')
|
||||
@mock.patch.object(objects.BlockDeviceMappingList, 'get_by_instance_uuid')
|
||||
@ -3141,7 +3271,7 @@ class ComputeTestCase(BaseTestCase,
|
||||
def _test_reboot_with_accels(self, mock_notify_action, mock_get_power,
|
||||
mock_get_orig, mock_update, mock_notify_usage,
|
||||
mock_get_blk, mock_get_bdms, mock_inst_save, mock_reboot,
|
||||
extra_specs=None, accel_info=None):
|
||||
mock_del_stale_bdms, extra_specs=None, accel_info=None):
|
||||
|
||||
self.compute.network_api.get_instance_nw_info = mock.Mock()
|
||||
|
||||
|
@ -0,0 +1,24 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
This change ensures the synchronization of volume attachments
|
||||
between Nova and Cinder, by deleting any dangling volume attachments
|
||||
and maintaining consistency between two databases.
|
||||
|
||||
Block device mapping (BDM) table in the Nova database, stores
|
||||
information about volume attachments, image attachments
|
||||
and swap attachments. Similarly, each volume attachment had a
|
||||
corresponding entry in the Cinder database volume attachment table.
|
||||
|
||||
With this change, on instance reboot, Nova will checks for all volume
|
||||
attachments associated with the instance and verifies their availability
|
||||
in the Cinder database. If attachments are not found they will get deleted
|
||||
from Nova database too.
|
||||
|
||||
After Nova database cleanup, similarly Cinder database is checked for
|
||||
attachments related to instance. If attachments found in Cinder DB that
|
||||
are not present in Nova DB, they will get deleted from Cinder databse.
|
||||
|
||||
See `spec`__ for more details.
|
||||
|
||||
__ https://specs.openstack.org/openstack/nova-specs/specs/2023.2/approved/cleanup-dangling-volume-attachments.html
|
Loading…
x
Reference in New Issue
Block a user