Merge "Allow claiming PCI PF if child VF is unavailable"
This commit is contained in:
commit
7520711a0e
@ -346,10 +346,40 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject):
|
||||
# Update PF status to CLAIMED if all of it dependants are free
|
||||
# and set their status to UNCLAIMABLE
|
||||
vfs_list = self.child_devices
|
||||
if not all([vf.is_available() for vf in vfs_list]):
|
||||
raise exception.PciDeviceVFInvalidStatus(
|
||||
compute_node_id=self.compute_node_id,
|
||||
address=self.address)
|
||||
non_free_dependants = [
|
||||
vf for vf in vfs_list if not vf.is_available()]
|
||||
if non_free_dependants:
|
||||
# NOTE(gibi): There should not be any dependent devices that
|
||||
# are UNCLAIMABLE or UNAVAILABLE as the parent is AVAILABLE,
|
||||
# but we got reports in bug 1969496 that this inconsistency
|
||||
# can happen. So check if the only non-free devices are in
|
||||
# state UNCLAIMABLE or UNAVAILABLE then we log a warning but
|
||||
# allow to claim the parent.
|
||||
actual_statuses = {
|
||||
child.status for child in non_free_dependants}
|
||||
allowed_non_free_statues = {
|
||||
fields.PciDeviceStatus.UNCLAIMABLE,
|
||||
fields.PciDeviceStatus.UNAVAILABLE,
|
||||
}
|
||||
if actual_statuses - allowed_non_free_statues == set():
|
||||
LOG.warning(
|
||||
"Some child device of parent %s is in an inconsistent "
|
||||
"state. If you can reproduce this warning then please "
|
||||
"report a bug at "
|
||||
"https://bugs.launchpad.net/nova/+filebug with "
|
||||
"reproduction steps. Inconsistent children with "
|
||||
"state: %s",
|
||||
self.address,
|
||||
",".join(
|
||||
"%s - %s" % (child.address, child.status)
|
||||
for child in non_free_dependants
|
||||
),
|
||||
)
|
||||
|
||||
else:
|
||||
raise exception.PciDeviceVFInvalidStatus(
|
||||
compute_node_id=self.compute_node_id,
|
||||
address=self.address)
|
||||
self._bulk_update_status(vfs_list,
|
||||
fields.PciDeviceStatus.UNCLAIMABLE)
|
||||
|
||||
|
@ -279,8 +279,12 @@ class PciDeviceStats(object):
|
||||
if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF:
|
||||
vfs_list = pci_dev.child_devices
|
||||
if vfs_list:
|
||||
free_devs = self.get_free_devs()
|
||||
for vf in vfs_list:
|
||||
self.remove_device(vf)
|
||||
# NOTE(gibi): do not try to remove a device that are
|
||||
# already removed
|
||||
if vf in free_devs:
|
||||
self.remove_device(vf)
|
||||
elif pci_dev.dev_type in (
|
||||
fields.PciDeviceType.SRIOV_VF,
|
||||
fields.PciDeviceType.VDPA,
|
||||
|
@ -507,32 +507,134 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
|
||||
)
|
||||
# now try to claim and allocate the PF. It should work as it is
|
||||
# available
|
||||
# This is bug 1969496 as the claim fails with exception
|
||||
ex = self.assertRaises(
|
||||
exception.PciDevicePoolEmpty,
|
||||
self.tracker.claim_instance,
|
||||
mock.sentinel.context,
|
||||
pci_requests_obj,
|
||||
None
|
||||
)
|
||||
self.assertIn(
|
||||
'Attempt to consume PCI device 1:0000:00:02.1 from empty pool',
|
||||
str(ex)
|
||||
)
|
||||
self.tracker.claim_instance(
|
||||
mock.sentinel.context, pci_requests_obj, None)
|
||||
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
self.assertEqual('allocated', pf_dev.status)
|
||||
vf_dev = self._get_device_by_address(vf['address'])
|
||||
self.assertEqual('unavailable', vf_dev.status)
|
||||
|
||||
# This should work when the bug is fixed
|
||||
# self.tracker.claim_instance(
|
||||
# mock.sentinel.context, pci_requests_obj, None)
|
||||
# self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
|
||||
self.assertIn(
|
||||
'Some child device of parent 0000:00:01.1 is in an inconsistent '
|
||||
'state. If you can reproduce this warning then please report a '
|
||||
'bug at https://bugs.launchpad.net/nova/+filebug with '
|
||||
'reproduction steps. Inconsistent children with state: '
|
||||
'0000:00:02.1 - unavailable',
|
||||
self.stdlog.logger.output
|
||||
)
|
||||
|
||||
# pf_dev = self._get_device_by_address(pf['address'])
|
||||
# self.assertEqual('allocated', pf_dev.status)
|
||||
# vf_dev = self._get_device_by_address(vf['address'])
|
||||
# self.assertEqual('unavailable', vf_dev.status)
|
||||
# Ensure that the claim actually fixes the inconsistency so when the
|
||||
# parent if freed the children become available too.
|
||||
self.tracker.free_instance(
|
||||
mock.sentinel.context, {'uuid': uuidsentinel.instance1})
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
vf_dev = self._get_device_by_address(vf['address'])
|
||||
self.assertEqual('available', vf_dev.status)
|
||||
|
||||
def test_claim_available_pf_while_children_vfs_are_in_mixed_state(self):
|
||||
# We start with a PF parent and two VF children. The PF is available
|
||||
# and one of the VF is unavailable while the other is available.
|
||||
pf = copy.deepcopy(fake_db_dev_3)
|
||||
vf1 = copy.deepcopy(fake_db_dev_4)
|
||||
vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE
|
||||
vf2 = copy.deepcopy(fake_db_dev_5)
|
||||
vf2['status'] = fields.PciDeviceStatus.AVAILABLE
|
||||
self._create_tracker([pf, vf1, vf2])
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||
self.assertEqual('unavailable', vf1_dev.status)
|
||||
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||
self.assertEqual('available', vf2_dev.status)
|
||||
|
||||
pci_requests_obj = self._create_pci_requests_object(
|
||||
[
|
||||
{
|
||||
'count': 1,
|
||||
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}]
|
||||
}
|
||||
],
|
||||
instance_uuid=uuidsentinel.instance1,
|
||||
)
|
||||
# now try to claim and allocate the PF. It should work as it is
|
||||
# available
|
||||
self.tracker.claim_instance(
|
||||
mock.sentinel.context, pci_requests_obj, None)
|
||||
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('allocated', pf_dev.status)
|
||||
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||
self.assertEqual('unavailable', vf1_dev.status)
|
||||
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||
self.assertEqual('unavailable', vf2_dev.status)
|
||||
|
||||
self.assertIn(
|
||||
'Some child device of parent 0000:00:01.1 is in an inconsistent '
|
||||
'state. If you can reproduce this warning then please report a '
|
||||
'bug at https://bugs.launchpad.net/nova/+filebug with '
|
||||
'reproduction steps. Inconsistent children with state: '
|
||||
'0000:00:02.1 - unavailable',
|
||||
self.stdlog.logger.output
|
||||
)
|
||||
|
||||
# Ensure that the claim actually fixes the inconsistency so when the
|
||||
# parent if freed the children become available too.
|
||||
self.tracker.free_instance(
|
||||
mock.sentinel.context, {'uuid': uuidsentinel.instance1})
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||
self.assertEqual('available', vf1_dev.status)
|
||||
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||
self.assertEqual('available', vf2_dev.status)
|
||||
|
||||
def test_claim_available_pf_while_a_child_is_used(self):
|
||||
pf = copy.deepcopy(fake_db_dev_3)
|
||||
vf1 = copy.deepcopy(fake_db_dev_4)
|
||||
vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE
|
||||
vf2 = copy.deepcopy(fake_db_dev_5)
|
||||
vf2['status'] = fields.PciDeviceStatus.CLAIMED
|
||||
self._create_tracker([pf, vf1, vf2])
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||
self.assertEqual('unavailable', vf1_dev.status)
|
||||
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||
self.assertEqual('claimed', vf2_dev.status)
|
||||
|
||||
pci_requests_obj = self._create_pci_requests_object(
|
||||
[
|
||||
{
|
||||
'count': 1,
|
||||
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}]
|
||||
}
|
||||
],
|
||||
instance_uuid=uuidsentinel.instance1,
|
||||
)
|
||||
# now try to claim and allocate the PF. The claim should fail as on of
|
||||
# the child is used.
|
||||
self.assertRaises(
|
||||
exception.PciDeviceVFInvalidStatus,
|
||||
self.tracker.claim_instance,
|
||||
mock.sentinel.context,
|
||||
pci_requests_obj,
|
||||
None,
|
||||
)
|
||||
|
||||
pf_dev = self._get_device_by_address(pf['address'])
|
||||
self.assertEqual('available', pf_dev.status)
|
||||
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||
self.assertEqual('unavailable', vf1_dev.status)
|
||||
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||
self.assertEqual('claimed', vf2_dev.status)
|
||||
|
||||
def test_update_pci_for_instance_active(self):
|
||||
pci_requests_obj = self._create_pci_requests_object(fake_pci_requests)
|
||||
|
Loading…
x
Reference in New Issue
Block a user