diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index a26c3be65123..83683589c80d 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -31,6 +31,61 @@ To enable virtual GPUs, follow the steps below: Enable GPU types (Compute) -------------------------- +#. Enable virtual functions on NVIDIA GPUs. + + .. code-block:: bash + + $ /usr/lib/nvidia/sriov-manage -e slot:bus:domain.function + + For example, to enable the virtual functions for the GPU with + slot ``0000``, bus ``41``, domain ``00``, and function ``0``: + + .. code-block:: bash + + $ /usr/lib/nvidia/sriov-manage -e 0000:41:00.0 + + You may want to automate this process as it has to be done on each boot of + the host. + + Given an example ``systemd`` template unit file named + ``nvidia-sriov-manage@.service``: + + .. code-block:: text + + [Unit] + After = nvidia-vgpu-mgr.service + After = nvidia-vgpud.service + Description = Enable Nvidia GPU virtual functions + + [Service] + Type = oneshot + User = root + Group = root + ExecStart = /usr/lib/nvidia/sriov-manage -e %i + # Give a reasonable amount of time for the server to start up/shut down + TimeoutSec = 120 + # This creates a specific slice which all services will operate from + # The accounting options give us the ability to see resource usage + # through the `systemd-cgtop` command. + Slice = system.slice + # Set Accounting + CPUAccounting = True + BlockIOAccounting = True + MemoryAccounting = True + TasksAccounting = True + RemainAfterExit = True + ExecStartPre = /usr/bin/sleep 30 + + [Install] + WantedBy = multi-user.target + + To enable the virtual functions for the GPU with slot ``0000``, bus ``41``, + domain ``00``, and function ``0``: + + .. code-block:: bash + + $ systemctl enable nvidia-sriov-manage@0000:41:00.0.service + #. Specify which specific GPU type(s) the instances would get. Edit :oslo.config:option:`devices.enabled_mdev_types`: diff --git a/nova/tests/fixtures/libvirt.py b/nova/tests/fixtures/libvirt.py index 76840ef52c58..ac44f2beb927 100644 --- a/nova/tests/fixtures/libvirt.py +++ b/nova/tests/fixtures/libvirt.py @@ -188,6 +188,8 @@ VIR_CONNECT_LIST_DOMAINS_INACTIVE = 2 VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV = 2 VIR_CONNECT_LIST_NODE_DEVICES_CAP_NET = 1 << 4 VIR_CONNECT_LIST_NODE_DEVICES_CAP_VDPA = 1 << 17 +VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV = 1 << 5 +VIR_CONNECT_LIST_NODE_DEVICES_INACTIVE = 1 << 8 # secret type VIR_SECRET_USAGE_TYPE_NONE = 0 @@ -200,6 +202,12 @@ VIR_DOMAIN_METADATA_DESCRIPTION = 0 VIR_DOMAIN_METADATA_TITLE = 1 VIR_DOMAIN_METADATA_ELEMENT = 2 +# virNodeDeviceCreateXML flags +VIR_NODE_DEVICE_CREATE_XML_VALIDATE = 4 + +# virNodeDeviceDefineXML flags +VIR_NODE_DEVICE_DEFINE_XML_VALIDATE = 5 + # Libvirt version to match MIN_LIBVIRT_VERSION in driver.py FAKE_LIBVIRT_VERSION = versionutils.convert_version_to_int( libvirt_driver.MIN_LIBVIRT_VERSION) @@ -2126,6 +2134,12 @@ class Connection(object): error_code=VIR_ERR_NO_NODE_DEVICE, error_domain=VIR_FROM_NODEDEV) + def nodeDeviceCreateXML(self, xml, flags): + pass + + def nodeDeviceDefineXML(self, xml, flags): + pass + def listDevices(self, cap, flags): if cap == 'pci': return self.pci_info.get_all_devices() diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 8f108d216b89..95145c591837 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -19,6 +19,7 @@ import os_resource_classes as orc from oslo_config import cfg from oslo_log import log as logging from oslo_utils import uuidutils +from oslo_utils import versionutils from nova.compute import instance_actions import nova.conf @@ -55,9 +56,16 @@ class VGPUTestBase(base.ServersTestBase): 'used': 44, 'free': 84, } - self.useFixture(fixtures.MockPatch( - 'nova.privsep.libvirt.create_mdev', - side_effect=self._create_mdev)) + # Persistent mdevs in libvirt >= 7.3.0 + if self.FAKE_LIBVIRT_VERSION < versionutils.convert_version_to_int( + libvirt_driver.MIN_LIBVIRT_PERSISTENT_MDEV): + create_mdev_str = 'nova.privsep.libvirt.create_mdev' + else: + create_mdev_str = ( + 'nova.virt.libvirt.driver.LibvirtDriver._create_mdev') + self._create_mdev = self._create_mdev_7_3 + self.useFixture( + fixtures.MockPatch(create_mdev_str, side_effect=self._create_mdev)) # for the sake of resizing, we need to patch the two methods below self.useFixture(fixtures.MockPatch( @@ -114,6 +122,23 @@ class VGPUTestBase(base.ServersTestBase): parent=libvirt_parent)}) return uuid + def _create_mdev_7_3(self, dev_name, mdev_type, uuid=None): + # We need to fake the newly created sysfs object by adding a new + # FakeMdevDevice in the existing persisted Connection object so + # when asking to get the existing mdevs, we would see it. + if not uuid: + uuid = uuidutils.generate_uuid() + mdev_name = libvirt_utils.mdev_uuid2name(uuid) + # Here, we get the right compute thanks by the self.current_host that + # was modified just before + connection = self.computes[ + self._current_host].driver._host.get_connection() + connection.mdev_info.devices.update( + {mdev_name: fakelibvirt.FakeMdevDevice(dev_name=mdev_name, + type_id=mdev_type, + parent=dev_name)}) + return uuid + def start_compute_with_vgpu(self, hostname, pci_info=None): if not pci_info: pci_info = fakelibvirt.HostPCIDevicesInfo( @@ -751,3 +776,10 @@ class DifferentMdevClassesTests(VGPUTestBase): expected_rc='CUSTOM_NOTVGPU') self.assert_mdev_usage(self.compute2, expected_amount=1, expected_rc='CUSTOM_NOTVGPU') + + +class VGPUTestsLibvirt7_3(VGPUTests): + + # Minimum version supporting persistent mdevs is 7.3.0. + # https://libvirt.org/drvnodedev.html#mediated-devices-mdevs + FAKE_LIBVIRT_VERSION = 7003000 diff --git a/nova/tests/functional/regressions/test_bug_1951656.py b/nova/tests/functional/regressions/test_bug_1951656.py index d705ff6fe315..ef4646180710 100644 --- a/nova/tests/functional/regressions/test_bug_1951656.py +++ b/nova/tests/functional/regressions/test_bug_1951656.py @@ -11,37 +11,13 @@ # License for the specific language governing permissions and limitations # under the License. -from oslo_utils import uuidutils - - from nova.tests.fixtures import libvirt as fakelibvirt from nova.tests.functional.libvirt import test_vgpu -from nova.virt.libvirt import utils as libvirt_utils class VGPUTestsLibvirt7_7(test_vgpu.VGPUTestBase): - def _create_mdev(self, physical_device, mdev_type, uuid=None): - # We need to fake the newly created sysfs object by adding a new - # FakeMdevDevice in the existing persisted Connection object so - # when asking to get the existing mdevs, we would see it. - if not uuid: - uuid = uuidutils.generate_uuid() - mdev_name = libvirt_utils.mdev_uuid2name(uuid) - libvirt_parent = self.pci2libvirt_address(physical_device) - - # Libvirt 7.7 now creates mdevs with a parent_addr suffix. - new_mdev_name = '_'.join([mdev_name, libvirt_parent]) - - # Here, we get the right compute thanks by the self.current_host that - # was modified just before - connection = self.computes[ - self._current_host].driver._host.get_connection() - connection.mdev_info.devices.update( - {mdev_name: fakelibvirt.FakeMdevDevice(dev_name=new_mdev_name, - type_id=mdev_type, - parent=libvirt_parent)}) - return uuid + FAKE_LIBVIRT_VERSION = 7007000 def setUp(self): super(VGPUTestsLibvirt7_7, self).setUp() diff --git a/nova/tests/unit/virt/libvirt/test_config.py b/nova/tests/unit/virt/libvirt/test_config.py index b91c68557123..c37dcbe2162e 100644 --- a/nova/tests/unit/virt/libvirt/test_config.py +++ b/nova/tests/unit/virt/libvirt/test_config.py @@ -3350,6 +3350,9 @@ class LibvirtConfigNodeDeviceTest(LibvirtConfigBaseTest): self.assertEqual(12, obj.mdev_information.iommu_group) self.assertIsNone(obj.mdev_information.uuid) + xmlout = obj.to_xml() + self.assertXmlEqual(xmlin, xmlout) + def test_config_mdev_device_uuid(self): xmlin = """ @@ -3375,6 +3378,19 @@ class LibvirtConfigNodeDeviceTest(LibvirtConfigBaseTest): self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a", obj.mdev_information.uuid) + expected_xml = """ + + mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0 + pci_0000_41_00_0 + + + b2107403-110c-45b0-af87-32cc91597b8a + + + """ + xmlout = obj.to_xml() + self.assertXmlEqual(expected_xml, xmlout) + def test_config_vdpa_device(self): xmlin = """ diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index b43860f5b9f7..0726c51f4d9a 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -27596,6 +27596,142 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): self.assertRaises(exception.InvalidLibvirtMdevConfig, drvr.init_host, host='foo') + @mock.patch('oslo_utils.uuidutils.generate_uuid') + def test_create_mdev(self, mock_generate_uuid, uuid=None, drvr=None): + if drvr is None: + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._host = mock.Mock() + + r = drvr._create_mdev( + mock.sentinel.dev_name, mock.sentinel.mdev_type, uuid=uuid) + + drvr._host.device_create.assert_called_once() + dev_conf = drvr._host.device_create.call_args.args[0] + self.assertIsInstance(dev_conf, vconfig.LibvirtConfigNodeDevice) + self.assertEqual(mock.sentinel.dev_name, dev_conf.parent) + self.assertEqual( + mock.sentinel.mdev_type, dev_conf.mdev_information.type) + expected_uuid = uuid or mock_generate_uuid.return_value + self.assertEqual(expected_uuid, dev_conf.mdev_information.uuid) + drvr._host.device_define.assert_called_once_with(dev_conf) + drvr._host.device_set_autostart.assert_called_once_with( + drvr._host.device_define.return_value, autostart=True) + self.assertEqual(expected_uuid, r) + + def test_create_mdev_with_uuid(self): + self.test_create_mdev(uuid=uuids.mdev) + + @mock.patch('nova.virt.libvirt.driver.LOG.info') + def test_create_mdev_autostart_error(self, mock_log_info): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._host = mock.Mock() + drvr._host.device_set_autostart.side_effect = test.TestingException( + 'error') + + self.test_create_mdev(uuid=uuids.mdev, drvr=drvr) + + mock_log_info.assert_called_once_with( + 'Failed to set autostart to True for mdev ' + f'{drvr._host.device_define.return_value.name.return_value} with ' + f'UUID {uuids.mdev}: error.') + + @mock.patch.object( + libvirt_driver.LibvirtDriver, + '_register_all_undefined_instance_details', new=mock.Mock()) + def test_start_inactive_mediated_devices_on_init_host(self): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + device1 = mock.MagicMock() + device2 = mock.MagicMock() + drvr._host = mock.Mock() + drvr._host.list_all_devices.return_value = [device1, device2] + + drvr.init_host(host='foo') + + flags = ( + fakelibvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV | + fakelibvirt.VIR_CONNECT_LIST_NODE_DEVICES_INACTIVE) + drvr._host.list_all_devices.assert_called_once_with(flags) + self.assertEqual( + [mock.call(device1), mock.call(device2)], + drvr._host.device_start.mock_calls) + + @mock.patch.object( + libvirt_driver.LibvirtDriver, '_get_all_assigned_mediated_devices', + new=mock.Mock(return_value={})) + @mock.patch.object( + libvirt_driver.LibvirtDriver, 'destroy', new=mock.Mock()) + @mock.patch('oslo_utils.fileutils.ensure_tree', new=mock.Mock()) + @mock.patch( + 'nova.virt.libvirt.blockinfo.get_disk_info', + new=mock.Mock(return_value=mock.sentinel.disk_info)) + @mock.patch.object(libvirt_driver.LibvirtDriver, '_allocate_mdevs') + @mock.patch('nova.objects.Instance.image_meta') + @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_guest_xml') + @mock.patch.object( + libvirt_driver.LibvirtDriver, '_create_images_and_backing', + new=mock.Mock()) + @mock.patch( + 'oslo_service.loopingcall.FixedIntervalLoopingCall', new=mock.Mock()) + def _test_hard_reboot_allocate_missing_mdevs( + self, mock_get_xml, mock_image_meta, mock_allocate_mdevs): + mock_compute = mock.Mock() + mock_compute.reportclient.get_allocations_for_consumer.return_value = ( + mock.sentinel.allocations) + virtapi = manager.ComputeVirtAPI(mock_compute) + drvr = libvirt_driver.LibvirtDriver(virtapi, True) + ctxt = context.get_admin_context() + instance = objects.Instance( + uuid=uuids.instance, + system_metadata={}, + image_ref=uuids.image, + flavor=objects.Flavor(extra_specs={'resources:VGPU': 1})) + + drvr._hard_reboot(ctxt, instance, mock.sentinel.network_info) + + (mock_compute.reportclient.get_allocations_for_consumer. + assert_called_once_with(ctxt, instance.uuid)) + mock_allocate_mdevs.assert_called_once_with(mock.sentinel.allocations) + mock_get_xml.assert_called_once_with( + ctxt, instance, mock.sentinel.network_info, + mock.sentinel.disk_info, mock_image_meta, block_device_info=None, + mdevs=mock_allocate_mdevs.return_value, accel_info=None) + + return ctxt, mock_get_xml, instance + + @mock.patch.object( + libvirt_driver.LibvirtDriver, '_create_guest_with_network', + new=mock.Mock()) + def test_hard_reboot_allocate_missing_mdevs(self): + # Test a scenario where the instance's flavor requests VGPU but for + # whatever reason (example: libvirt error raised after the domain was + # undefined) it is missing assigned mdevs. + self._test_hard_reboot_allocate_missing_mdevs() + + @mock.patch.object( + libvirt_driver.LibvirtDriver, '_create_guest_with_network') + def test_hard_reboot_allocate_missing_mdevs_fail(self, mock_create_guest): + # Test the scenario where a libvirt error is raised the first time we + # try to create the guest after allocating missing mdevs. + err_msg = ( + 'error getting device from group 0: Input/output error ' + 'Verify all devices in group 0 are bound to vfio- or pci-stub' + 'and not already in use') + error = fakelibvirt.make_libvirtError( + fakelibvirt.libvirtError, err_msg, error_message=err_msg, + error_code=fakelibvirt.VIR_ERR_INTERNAL_ERROR) + # First attempt to create the guest fails and the second succeeds. + mock_create_guest.side_effect = [error, None] + + ctxt, mock_get_xml, instance = ( + self._test_hard_reboot_allocate_missing_mdevs()) + + call = mock.call( + ctxt, mock_get_xml.return_value, instance, + mock.sentinel.network_info, None, vifs_already_plugged=True, + external_events=[]) + # We should have tried to create the guest twice. + self.assertEqual([call, call], mock_create_guest.mock_calls) + @mock.patch.object(libvirt_guest.Guest, 'detach_device') def _test_detach_mediated_devices(self, side_effect, detach_device): diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py index ff1eb925b2b3..a04ce0134b67 100644 --- a/nova/virt/libvirt/config.py +++ b/nova/virt/libvirt/config.py @@ -3367,6 +3367,16 @@ class LibvirtConfigNodeDevice(LibvirtConfigObject): self.vdpa_capability = None self.vpd_capability = None + def format_dom(self): + dev = super().format_dom() + if self.name: + dev.append(self._text_node('name', str(self.name))) + if self.parent: + dev.append(self._text_node('parent', str(self.parent))) + if self.mdev_information: + dev.append(self.mdev_information.format_dom()) + return dev + def parse_dom(self, xmldoc): super(LibvirtConfigNodeDevice, self).parse_dom(xmldoc) @@ -3520,6 +3530,21 @@ class LibvirtConfigNodeDeviceMdevInformation(LibvirtConfigObject): self.iommu_group = None self.uuid = None + def format_dom(self): + dev = super().format_dom() + dev.set('type', 'mdev') + if self.type: + mdev_type = self._new_node('type') + mdev_type.set('id', self.type) + dev.append(mdev_type) + if self.uuid: + dev.append(self._text_node('uuid', self.uuid)) + if self.iommu_group: + iommu_group = self._new_node('iommuGroup') + iommu_group.set('number', str(self.iommu_group)) + dev.append(iommu_group) + return dev + def parse_dom(self, xmldoc): super(LibvirtConfigNodeDeviceMdevInformation, self).parse_dom(xmldoc) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 88969306a3b0..6aad2ce5d055 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -243,6 +243,14 @@ VGPU_RESOURCE_SEMAPHORE = 'vgpu_resources' MIN_MDEV_LIVEMIG_LIBVIRT_VERSION = (8, 6, 0) MIN_MDEV_LIVEMIG_QEMU_VERSION = (8, 1, 0) +# Minimum version supporting persistent mdevs. +# https://libvirt.org/drvnodedev.html#mediated-devices-mdevs +MIN_LIBVIRT_PERSISTENT_MDEV = (7, 3, 0) + +# Autostart appears to be available starting in 7.8.0 +# https://github.com/libvirt/libvirt/commit/c6607a25b93bd6b0188405785d6608fdf71c8e0a +MIN_LIBVIRT_NODEDEV_AUTOSTART = (7, 8, 0) + LIBVIRT_PERF_EVENT_PREFIX = 'VIR_PERF_PARAM_' # Maxphysaddr minimal support version. @@ -837,9 +845,17 @@ class LibvirtDriver(driver.ComputeDriver): # wrongly modified. libvirt_cpu.power_down_all_dedicated_cpus() - # TODO(sbauza): Remove this code once mediated devices are persisted - # across reboots. - self._recreate_assigned_mediated_devices() + if not self._host.has_min_version(MIN_LIBVIRT_PERSISTENT_MDEV): + # TODO(sbauza): Remove this code once mediated devices are + # persisted across reboots. + self._recreate_assigned_mediated_devices() + else: + # NOTE(melwitt): We shouldn't need to do this with libvirt 7.8.0 + # and newer because we're setting autostart=True on the devices -- + # but if that fails for whatever reason and any devices become + # inactive, we can start them here. With libvirt version < 7.8.0, + # this is needed because autostart is not available. + self._start_inactive_mediated_devices() self._check_cpu_compatibility() @@ -1088,6 +1104,25 @@ class LibvirtDriver(driver.ComputeDriver): LOG.debug('Enabling emulated TPM support') + def _start_inactive_mediated_devices(self): + # Get a list of inactive mdevs so we can start them and make them + # active. We need to start inactive mdevs even if they are not + # currently assigned to instances because attempting to use an inactive + # mdev when booting a new instance, for example, will raise an error: + # libvirt.libvirtError: device not found: mediated device '' not + # found. + # An inactive mdev is an mdev that is defined but not created. + flags = ( + libvirt.VIR_CONNECT_LIST_NODE_DEVICES_CAP_MDEV | + libvirt.VIR_CONNECT_LIST_NODE_DEVICES_INACTIVE) + inactive_mdevs = self._host.list_all_devices(flags) + if inactive_mdevs: + names = [mdev.name() for mdev in inactive_mdevs] + LOG.info(f'Found inactive mdevs: {names}') + for mdev in inactive_mdevs: + LOG.info(f'Starting inactive mdev: {mdev.name()}') + self._host.device_start(mdev) + @staticmethod def _is_existing_mdev(uuid): # FIXME(sbauza): Some kernel can have a uevent race meaning that the @@ -4059,6 +4094,19 @@ class LibvirtDriver(driver.ComputeDriver): instance, instance.image_meta, block_device_info) + # NOTE(melwitt): It's possible that we lost track of the allocated + # mdevs of an instance if, for example, a libvirt error was encountered + # after the domain XML was undefined in a previous hard reboot. + # Try to get existing mdevs that are created but not assigned so they + # will be added into the generated domain XML. + if instance.flavor.extra_specs.get('resources:VGPU') and not mdevs: + LOG.info( + 'The instance flavor requests VGPU but no mdevs are assigned ' + 'to the instance. Attempting to re-assign mdevs.', + instance=instance) + allocs = self.virtapi.reportclient.get_allocations_for_consumer( + context, instance.uuid) + mdevs = self._allocate_mdevs(allocs) # NOTE(vish): This could generate the wrong device_format if we are # using the raw backend and the images don't exist yet. # The create_images_and_backing below doesn't properly @@ -4110,10 +4158,32 @@ class LibvirtDriver(driver.ComputeDriver): # NOTE(efried): The instance should already have a vtpm_secret_uuid # registered if appropriate. - self._create_guest_with_network( - context, xml, instance, network_info, block_device_info, - vifs_already_plugged=vifs_already_plugged, - external_events=external_events) + try: + self._create_guest_with_network( + context, xml, instance, network_info, block_device_info, + vifs_already_plugged=vifs_already_plugged, + external_events=external_events) + except libvirt.libvirtError as e: + errcode = e.get_error_code() + errmsg = e.get_error_message() + # NOTE(melwitt): If we are reassigning mdevs, we might hit the + # following error on the first attempt to create the guest: + # error getting device from group : Input/output error + # Verify all devices in group are bound to vfio- or + # pci-stub and not already in use + # Retry the guest creation once in this case as it usually succeeds + # on the second try. + if (mdevs and errcode == libvirt.VIR_ERR_INTERNAL_ERROR and + 'error getting device from group' in errmsg): + LOG.info( + f'Encountered error {errmsg}, reattempting creation of ' + 'the guest.', instance=instance) + self._create_guest_with_network( + context, xml, instance, network_info, block_device_info, + vifs_already_plugged=vifs_already_plugged, + external_events=external_events) + else: + raise def _wait_for_reboot(): """Called at an interval until the VM is running again.""" @@ -8726,6 +8796,33 @@ class LibvirtDriver(driver.ComputeDriver): LOG.info('Available mdevs at: %s.', available_mdevs) return available_mdevs + def _create_mdev(self, dev_name, mdev_type, uuid=None): + if uuid is None: + uuid = uuidutils.generate_uuid() + conf = vconfig.LibvirtConfigNodeDevice() + conf.parent = dev_name + conf.mdev_information = ( + vconfig.LibvirtConfigNodeDeviceMdevInformation()) + conf.mdev_information.type = mdev_type + conf.mdev_information.uuid = uuid + # Create the transient device. + self._host.device_create(conf) + # Define it to make it persistent. + mdev_dev = self._host.device_define(conf) + if self._host.has_min_version(MIN_LIBVIRT_NODEDEV_AUTOSTART): + # Set it to automatically start when the compute host boots or the + # parent device becomes available. + # NOTE(melwitt): Make this not fatal because we can try to manually + # start mdevs in init_host() if they didn't start automatically + # after a host reboot. + try: + self._host.device_set_autostart(mdev_dev, autostart=True) + except Exception as e: + LOG.info( + 'Failed to set autostart to True for mdev ' + f'{mdev_dev.name()} with UUID {uuid}: {str(e)}.') + return uuid + def _create_new_mediated_device(self, parent, uuid=None): """Find a physical device that can support a new mediated device and create it. @@ -8755,8 +8852,12 @@ class LibvirtDriver(driver.ComputeDriver): # We need the PCI address, not the libvirt name # The libvirt name is like 'pci_0000_84_00_0' pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_')) - chosen_mdev = nova.privsep.libvirt.create_mdev( - pci_addr, dev_supported_type, uuid=uuid) + if not self._host.has_min_version(MIN_LIBVIRT_PERSISTENT_MDEV): + chosen_mdev = nova.privsep.libvirt.create_mdev( + pci_addr, dev_supported_type, uuid=uuid) + else: + chosen_mdev = self._create_mdev( + dev_name, dev_supported_type, uuid=uuid) LOG.info('Created mdev: %s on pGPU: %s.', chosen_mdev, pci_addr) return chosen_mdev diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 23c9ec01f7a2..a0511c764929 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -1254,6 +1254,65 @@ class Host(object): """ return self.get_connection().nodeDeviceLookupByName(name) + def device_create(self, conf, validate=False): + """Create a node device from specified device XML + + This creates the device as transient. + + :param conf: A LibvirtConfigObject of the device to create + :param validate: whether to validate the XML document against schema + + :returns: a virNodeDevice instance if successful, else None + """ + flag = libvirt.VIR_NODE_DEVICE_CREATE_XML_VALIDATE + flags = validate and flag or 0 + device_xml = conf.to_xml() + return self.get_connection().nodeDeviceCreateXML(device_xml, flags) + + def device_define(self, conf, validate=False): + """Define a node device from specified device XML + + This defines the device to make it persistent. + + :param conf: A LibvirtConfigObject of the device to create + :param validate: whether to validate the XML document against schema + + :returns: a virNodeDevice instance if successful, else None + """ + flag = libvirt.VIR_NODE_DEVICE_DEFINE_XML_VALIDATE + flags = validate and flag or 0 + device_xml = conf.to_xml() + return self.get_connection().nodeDeviceDefineXML(device_xml, flags) + + def device_start(self, dev): + """Start a defined node device + + :param dev: The virNodeDevice instance to start + """ + # extra flags; not used yet, so callers should always pass 0 + # https://libvirt.org/html/libvirt-libvirt-nodedev.html + flags = 0 + result = dev.create(flags) + if result == -1: + msg = f'Failed to start node device {dev.name()}' + raise exception.InternalError(_(msg)) + + def device_set_autostart(self, dev, autostart=True): + """Set a node device to automatically start when the host boots + + This can set whether the node device should automatically start when + the host machine boots or when the parent device becomes available. + + :param dev: The virNodeDevice instance to set the autostart value + :param autostart: Whether to set the device to automatically start + """ + result = dev.setAutostart(autostart=autostart) + if result == -1: + msg = ( + f'Failed to set autostart to {autostart} for node device ' + f'{dev.name()}') + raise exception.InternalError(_(msg)) + def _get_pcinet_info( self, dev: 'libvirt.virNodeDevice', @@ -1611,7 +1670,7 @@ class Host(object): """Lookup devices. :param flags: a bitmask of flags to filter the returned devices. - :returns: a list of virNodeDevice xml strings. + :returns: a list of virNodeDevice instances. """ try: return self.get_connection().listAllDevices(flags) or [] diff --git a/releasenotes/notes/persistent-mdevs-2a69e44baae9d5ca.yaml b/releasenotes/notes/persistent-mdevs-2a69e44baae9d5ca.yaml new file mode 100644 index 000000000000..50f82e0ec2d7 --- /dev/null +++ b/releasenotes/notes/persistent-mdevs-2a69e44baae9d5ca.yaml @@ -0,0 +1,11 @@ +features: + - | + With the libvirt driver and libvirt version 7.3.0 or newer, mediated + devices for vGPUs are now persisted across reboots of a compute host. + + .. note:: + + After a compute host reboots, the virtual functions for the GPU must be + enabled again before instances will be able to use their vGPUs. + Please see https://docs.openstack.org/nova/latest/admin/virtual-gpu.html + for more information.