diff --git a/doc/source/admin/virtual-gpu.rst b/doc/source/admin/virtual-gpu.rst index 680f48b5a66e..b4de23da8a47 100644 --- a/doc/source/admin/virtual-gpu.rst +++ b/doc/source/admin/virtual-gpu.rst @@ -35,15 +35,33 @@ Enable GPU types (Compute) [devices] enabled_vgpu_types = nvidia-35 - .. note:: + If you want to support more than a single GPU type, you need to provide a + separate configuration section for each device. For example: - As of the Queens release, Nova only supports a single type. If more - than one vGPU type is specified (as a comma-separated list), only the - first one will be used. + .. code-block:: ini + + [devices] + enabled_vgpu_types = nvidia-35, nvidia-36 + + [vgpu_nvidia-35] + device_addresses = 0000:84:00.0,0000:85:00.0 + + [vgpu_nvidia-36] + device_addresses = 0000:86:00.0 + + where you have to define which physical GPUs are supported per GPU type. + + If the same PCI address is provided for two different types, nova-compute + will refuse to start and issue a specific error in the logs. To know which specific type(s) to mention, please refer to `How to discover a GPU type`_. + .. versionchanged:: 21.0.0 + + Supporting multiple GPU types is only supported by the Ussuri release and + later versions. + #. Restart the ``nova-compute`` service. @@ -269,6 +287,59 @@ OpenStackClient. For details on specific commands, see its documentation. physical GPU having the PCI ID ``0000:85:00.0``. +(Optional) Provide custom traits for multiple GPU types +------------------------------------------------------- + +Since operators want to support different GPU types per compute, it would be +nice to have flavors asking for a specific GPU type. This is now possible +using custom traits by decorating child Resource Providers that correspond +to physical GPUs. + +.. note:: + + Possible improvements in a future release could consist of providing + automatic tagging of Resource Providers with standard traits corresponding + to versioned mapping of public GPU types. For the moment, this has to be + done manually. + +#. Get the list of resource providers + + See `Checking allocations and inventories for virtual GPUs`_ first for getting + the list of Resource Providers that support a ``VGPU`` resource class. + +#. Define custom traits that will correspond for each to a GPU type + + .. code-block:: console + + $ openstack --os-placement-api-version 1.6 trait create CUSTOM_NVIDIA_11 + + In this example, we ask to create a custom trait named ``CUSTOM_NVIDIA_11``. + +#. Add the corresponding trait to the Resource Provider matching the GPU + + .. code-block:: console + + $ openstack --os-placement-api-version 1.6 resource provider trait set \ + --trait CUSTOM_NVIDIA_11 e2f8607b-0683-4141-a8af-f5e20682e28c + + In this case, the trait ``CUSTOM_NVIDIA_11`` will be added to the Resource + Provider with the UUID ``e2f8607b-0683-4141-a8af-f5e20682e28c`` that + corresponds to the PCI address ``0000:85:00:0`` as shown above. + +#. Amend the flavor to add a requested trait + + .. code-block:: console + + $ openstack flavor set --property trait:CUSTOM_NVIDIA_11=required vgpu_1 + + In this example, we add the ``CUSTOM_NVIDIA_11`` trait as a required + information for the ``vgpu_1`` flavor we created earlier. + + This will allow the Placement service to only return the Resource Providers + matching this trait so only the GPUs that were decorated with will be checked + for this flavor. + + Caveats ------- @@ -324,6 +395,8 @@ For XenServer: resize. If you want to migrate an instance, make sure to rebuild it after the migration. +* Multiple GPU types per compute is not supported by the XenServer driver. + .. [#] https://bugs.launchpad.net/nova/+bug/1762688 .. Links diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 8be0f2db288d..d8380c763c05 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1384,6 +1384,8 @@ class ComputeManager(manager.Manager): whitelist.Whitelist(CONF.pci.passthrough_whitelist) nova.conf.neutron.register_dynamic_opts(CONF) + # Even if only libvirt uses them, make it available for all drivers + nova.conf.devices.register_dynamic_opts(CONF) # Override the number of concurrent disk operations allowed if the # user has specified a limit. diff --git a/nova/conf/devices.py b/nova/conf/devices.py index 9dea1fdf3a8c..cae434541cfb 100644 --- a/nova/conf/devices.py +++ b/nova/conf/devices.py @@ -24,12 +24,33 @@ The vGPU types enabled in the compute node. Some pGPUs (e.g. NVIDIA GRID K1) support different vGPU types. User can use this option to specify a list of enabled vGPU types that may be assigned to a -guest instance. But please note that Nova only supports a single type in the -Queens release. If more than one vGPU type is specified (as a comma-separated -list), only the first one will be used. An example is as the following:: +guest instance. + +If more than one single vGPU type is provided, then for each *vGPU type* an +additional section, ``[vgpu_$(VGPU_TYPE)]``, must be added to the configuration +file. Each section then **must** be configured with a single configuration +option, ``device_addresses``, which should be a list of PCI addresses +corresponding to the physical GPU(s) to assign to this type. + +If one or more sections are missing (meaning that a specific type is not wanted +to use for at least one physical GPU) or if no device addresses are provided, +then Nova will only use the first type that was provided by +``[devices]/enabled_vgpu_types``. + +If the same PCI address is provided for two different types, nova-compute will +return an InvalidLibvirtGPUConfig exception at restart. + +An example is as the following:: [devices] - enabled_vgpu_types = GRID K100,Intel GVT-g,MxGPU.2,nvidia-11 + enabled_vgpu_types = nvidia-35, nvidia-36 + + [vgpu_nvidia-35] + device_addresses = 0000:84:00.0,0000:85:00.0 + + [vgpu_nvidia-36] + device_addresses = 0000:86:00.0 + """) ] @@ -39,5 +60,20 @@ def register_opts(conf): conf.register_opts(vgpu_opts, group=devices_group) +def register_dynamic_opts(conf): + """Register dynamically-generated options and groups. + + This must be called by the service that wishes to use the options **after** + the initial configuration has been loaded. + """ + opt = cfg.ListOpt('device_addresses', default=[], + item_type=cfg.types.String()) + + # Register the '[vgpu_$(VGPU_TYPE)]/device_addresses' opts, implicitly + # registering the '[vgpu_$(VGPU_TYPE)]' groups in the process + for vgpu_type in conf.devices.enabled_vgpu_types: + conf.register_opt(opt, group='vgpu_%s' % vgpu_type) + + def list_opts(): return {devices_group: vgpu_opts} diff --git a/nova/exception.py b/nova/exception.py index a5f16b85fc85..5e8669bae18b 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2297,3 +2297,7 @@ class DeviceProfileError(NovaException): class AcceleratorRequestOpFailed(NovaException): msg_fmt = _("Failed to %(op)s accelerator requests: %(msg)s") + + +class InvalidLibvirtGPUConfig(NovaException): + msg_fmt = _('Invalid configuration for GPU devices: %(reason)s') diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index c27d9a8460f2..f495c1fcd6cb 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -87,6 +87,16 @@ class VGPUReshapeTests(base.ServersTestBase): '/resource_providers/%s/inventories' % compute_rp_uuid, inventories) + # enabled vgpu support + self.flags( + enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, + group='devices') + # We don't want to restart the compute service or it would call for + # a reshape but we still want to accept some vGPU types so we call + # directly the needed method + self.compute.driver.supported_vgpu_types = ( + self.compute.driver._get_supported_vgpu_types()) + # now we boot two servers with vgpu extra_spec = {"resources:VGPU": 1} flavor_id = self._create_flavor(extra_spec=extra_spec) @@ -139,10 +149,6 @@ class VGPUReshapeTests(base.ServersTestBase): {'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2, 'VGPU': 1}, allocations[compute_rp_uuid]['resources']) - # enabled vgpu support - self.flags( - enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, - group='devices') # restart compute which will trigger a reshape self.compute = self.restart_compute_service(self.compute) diff --git a/nova/tests/unit/conf/test_devices.py b/nova/tests/unit/conf/test_devices.py new file mode 100644 index 000000000000..6de8a97a94b3 --- /dev/null +++ b/nova/tests/unit/conf/test_devices.py @@ -0,0 +1,34 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import nova.conf +from nova import test + + +CONF = nova.conf.CONF + + +class DevicesConfTestCase(test.NoDBTestCase): + + def test_register_dynamic_opts(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + + self.assertNotIn('vgpu_nvidia-11', CONF) + self.assertNotIn('vgpu_nvidia-12', CONF) + + nova.conf.devices.register_dynamic_opts(CONF) + + self.assertIn('vgpu_nvidia-11', CONF) + self.assertIn('vgpu_nvidia-12', CONF) + self.assertEqual([], getattr(CONF, 'vgpu_nvidia-11').device_addresses) + self.assertEqual([], getattr(CONF, 'vgpu_nvidia-12').device_addresses) diff --git a/nova/tests/unit/conf_fixture.py b/nova/tests/unit/conf_fixture.py index b02d73b39e77..ac14c135e657 100644 --- a/nova/tests/unit/conf_fixture.py +++ b/nova/tests/unit/conf_fixture.py @@ -17,6 +17,7 @@ from oslo_config import fixture as config_fixture from oslo_policy import opts as policy_opts +from nova.conf import devices from nova.conf import neutron from nova.conf import paths from nova import config @@ -64,3 +65,4 @@ class ConfFixture(config_fixture.Config): init_rpc=False) policy_opts.set_defaults(self.conf) neutron.register_dynamic_opts(self.conf) + devices.register_dynamic_opts(self.conf) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index eeac3468e96c..e2902fc899a1 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -23264,7 +23264,8 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): '._get_mediated_devices') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver' '._get_mdev_capable_devices') - def test_get_gpu_inventories(self, get_mdev_capable_devs, + def _test_get_gpu_inventories(self, drvr, expected, expected_types, + get_mdev_capable_devs, get_mediated_devices): get_mdev_capable_devs.return_value = [ {"dev_id": "pci_0000_06_00_0", @@ -23279,6 +23280,9 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): "types": {'nvidia-11': {'availableInstances': 7, 'name': 'GRID M60-0B', 'deviceAPI': 'vfio-pci'}, + 'nvidia-12': {'availableInstances': 10, + 'name': 'GRID M60-8Q', + 'deviceAPI': 'vfio-pci'}, } }, ] @@ -23292,13 +23296,19 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): 'parent': "pci_0000_07_00_0", 'type': 'nvidia-11', 'iommu_group': 1}] - drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertEqual(expected, drvr._get_gpu_inventories()) + get_mdev_capable_devs.assert_called_once_with(types=expected_types) + get_mediated_devices.assert_called_once_with(types=expected_types) + + def test_get_gpu_inventories_with_a_single_type(self): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) # If the operator doesn't provide GPU types self.assertEqual({}, drvr._get_gpu_inventories()) - # Now, set a specific GPU type + # Now, set a specific GPU type and restart the driver self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) expected = { # the first GPU also has one mdev allocated against it 'pci_0000_06_00_0': {'total': 15 + 1, @@ -23317,9 +23327,158 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): 'allocation_ratio': 1.0, }, } - self.assertEqual(expected, drvr._get_gpu_inventories()) - get_mdev_capable_devs.assert_called_once_with(types=['nvidia-11']) - get_mediated_devices.assert_called_once_with(types=['nvidia-11']) + self._test_get_gpu_inventories(drvr, expected, ['nvidia-11']) + + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '._get_mdev_capable_devices') + def test_get_gpu_inventories_with_two_types(self, get_mdev_capable_devs): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + expected = { + # the first GPU supports nvidia-11 and has one mdev with this type + 'pci_0000_06_00_0': {'total': 15 + 1, + 'max_unit': 15 + 1, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + # the second GPU supports nvidia-12 but the existing mdev is not + # using this type, so we only count the availableInstances value + # for nvidia-12. + 'pci_0000_07_00_0': {'total': 10, + 'max_unit': 10, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + self._test_get_gpu_inventories(drvr, expected, ['nvidia-11', + 'nvidia-12']) + + @mock.patch.object(libvirt_driver.LOG, 'warning') + def test_get_supported_vgpu_types(self, mock_warning): + # Verify that by default we don't support vGPU types + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertEqual([], drvr._get_supported_vgpu_types()) + + # Now, provide only one supported vGPU type + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types()) + # Given we only support one vGPU type, we don't have any map for PCI + # devices *yet* + self.assertEqual({}, drvr.pgpu_type_mapping) + # Since the operator wanted to only support one type, it's fine to not + # provide config groups + mock_warning.assert_not_called() + # For further checking + mock_warning.reset_mock() + + # Now two types without forgetting to provide the pGPU addresses + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.assertEqual(['nvidia-11'], drvr._get_supported_vgpu_types()) + self.assertEqual({}, drvr.pgpu_type_mapping) + msg = ("The vGPU type '%(type)s' was listed in '[devices] " + "enabled_vgpu_types' but no corresponding " + "'[vgpu_%(type)s]' group or " + "'[vgpu_%(type)s] device_addresses' " + "option was defined. Only the first type '%(ftype)s' " + "will be used." % {'type': 'nvidia-12', + 'ftype': 'nvidia-11'}) + mock_warning.assert_called_once_with(msg) + # For further checking + mock_warning.reset_mock() + + # And now do it correctly ! + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + self.assertEqual(['nvidia-11', 'nvidia-12'], + drvr._get_supported_vgpu_types()) + self.assertEqual({'0000:84:00.0': 'nvidia-11', + '0000:85:00.0': 'nvidia-12'}, drvr.pgpu_type_mapping) + mock_warning.assert_not_called() + + def test_get_supported_vgpu_types_with_duplicate_types(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + # Provide the same pGPU PCI ID for two different types + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-12') + self.assertRaises(exception.InvalidLibvirtGPUConfig, + libvirt_driver.LibvirtDriver, + fake.FakeVirtAPI(), False) + + def test_get_supported_vgpu_types_with_invalid_pci_address(self): + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + # Fat-finger the PCI address + self.flags(device_addresses=['whoops'], group='vgpu_nvidia-11') + self.assertRaises(exception.InvalidLibvirtGPUConfig, + libvirt_driver.LibvirtDriver, + fake.FakeVirtAPI(), False) + + def test_get_vgpu_type_per_pgpu(self): + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + device = 'pci_0000_84_00_0' + self.assertIsNone(drvr._get_vgpu_type_per_pgpu(device)) + + # BY default, we return the first type if we only support one. + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertEqual('nvidia-11', drvr._get_vgpu_type_per_pgpu(device)) + + # Now, make sure we provide the right vGPU type for the device + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # the libvirt name pci_0000_84_00_0 matches 0000:84:00.0 + self.assertEqual('nvidia-11', drvr._get_vgpu_type_per_pgpu(device)) + + def test_get_vgpu_type_per_pgpu_with_incorrect_pci_addr(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # 'whoops' is not a correct libvirt name corresponding to a PCI address + self.assertIsNone(drvr._get_vgpu_type_per_pgpu('whoops')) + + def test_get_vgpu_type_per_pgpu_with_unconfigured_pgpu(self): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:84:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:85:00.0'], group='vgpu_nvidia-12') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + # 0000:86:00.0 wasn't configured + self.assertIsNone(drvr._get_vgpu_type_per_pgpu('pci_0000_86_00_0')) @mock.patch.object(host.Host, 'device_lookup_by_name') @mock.patch.object(host.Host, 'list_mdev_capable_devices') @@ -23517,7 +23676,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): unallocated_mdevs, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') allocations = { uuids.rp1: { 'resources': { @@ -23529,7 +23694,12 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): get_mdev_capable_devs.return_value = [ {"dev_id": "pci_0000_06_00_0", "vendor_id": 0x10de, - "types": {'nvidia-11': {'availableInstances': 16, + # This pGPU can support both types but the operator only wanted + # to use nvidia-11 for it. + "types": {'nvidia-10': {'availableInstances': 16, + 'name': 'GRID M60-8Q', + 'deviceAPI': 'vfio-pci'}, + 'nvidia-11': {'availableInstances': 16, 'name': 'GRID M60-0B', 'deviceAPI': 'vfio-pci'}, } @@ -23610,11 +23780,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): "pGPU device name %(name)s can't be guessed from the ProviderTree " "roots %(roots)s", {'name': 'oops_I_did_it_again', 'roots': 'cn'}) + @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_vgpu_type_per_pgpu') @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_mediated_devices') @mock.patch.object(libvirt_driver.LibvirtDriver, '_get_all_assigned_mediated_devices') def test_get_existing_mdevs_not_assigned(self, get_all_assigned_mdevs, - get_mediated_devices): + get_mediated_devices, + get_vgpu_type_per_pgpu): # mdev2 is assigned to instance1 get_all_assigned_mdevs.return_value = {uuids.mdev2: uuids.inst1} # there is a total of 2 mdevs, mdev1 and mdev2 @@ -23627,10 +23799,22 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): 'uuid': uuids.mdev2, 'parent': "pci_some", 'type': 'nvidia-11', - 'iommu_group': 1}] + 'iommu_group': 1}, + {'dev_id': 'mdev_some_uuid3', + 'uuid': uuids.mdev3, + 'parent': "pci_some", + 'type': 'nvidia-12', + 'iommu_group': 1}, + ] + + def _fake_get_vgpu_type_per_pgpu(parent_addr): + # Always return the same vGPU type so we avoid mdev3 + return 'nvidia-11' + get_vgpu_type_per_pgpu.side_effect = _fake_get_vgpu_type_per_pgpu drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - # Since mdev2 is assigned to inst1, only mdev1 is available + # Since mdev3 type is not supported and mdev2 is assigned to inst1, + # only mdev1 is available self.assertEqual(set([uuids.mdev1]), drvr._get_existing_mdevs_not_assigned(parent=None)) @@ -23647,7 +23831,13 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): def test_recreate_mediated_device_on_init_host( self, get_all_assigned_mdevs, exists, mock_get_mdev_info, get_mdev_capable_devs, privsep_create_mdev): - self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + # we need to call the below again to ensure the updated + # 'device_addresses' value is read and the new groups created + nova.conf.devices.register_dynamic_opts(CONF) + self.flags(device_addresses=['0000:06:00.0'], group='vgpu_nvidia-11') + self.flags(device_addresses=['0000:07:00.0'], group='vgpu_nvidia-12') get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1, uuids.mdev2: uuids.inst2} @@ -23664,7 +23854,7 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): exists.side_effect = _exists mock_get_mdev_info.side_effect = [ {"dev_id": "mdev_fake", - "uuid": uuids.mdev1, + "uuid": uuids.mdev2, "parent": "pci_0000_06_00_0", "type": "nvidia-11", "iommu_group": 12 @@ -23680,9 +23870,35 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) drvr.init_host(host='foo') + # Only mdev2 will be recreated as mdev1 already exists. privsep_create_mdev.assert_called_once_with( "0000:06:00.0", 'nvidia-11', uuid=uuids.mdev2) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_mediated_device_information') + @mock.patch.object(os.path, 'exists') + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_get_all_assigned_mediated_devices') + def test_recreate_mediated_device_on_init_host_with_wrong_config( + self, get_all_assigned_mdevs, exists, mock_get_mdev_info): + self.flags(enabled_vgpu_types=['nvidia-11', 'nvidia-12'], + group='devices') + get_all_assigned_mdevs.return_value = {uuids.mdev1: uuids.inst1} + # We pretend this mdev doesn't exist hence it needs recreation + exists.return_value = False + mock_get_mdev_info.side_effect = [ + {"dev_id": "mdev_fake", + "uuid": uuids.mdev1, + "parent": "pci_0000_06_00_0", + "type": "nvidia-99", + "iommu_group": 12 + }] + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + # mdev1 was originally created for nvidia-99 but the operator messed up + # the configuration by removing this type, we want to hardstop. + self.assertRaises(exception.InvalidLibvirtGPUConfig, + drvr.init_host, host='foo') + @mock.patch.object(libvirt_guest.Guest, 'detach_device') def _test_detach_mediated_devices(self, side_effect, detach_device): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 6ded3249f56e..3ff2b2dae5a1 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -429,6 +429,10 @@ class LibvirtDriver(driver.ComputeDriver): self._vpmems_by_name, self._vpmems_by_rc = self._discover_vpmems( vpmem_conf=CONF.libvirt.pmem_namespaces) + # We default to not support vGPUs unless the configuration is set. + self.pgpu_type_mapping = collections.defaultdict(str) + self.supported_vgpu_types = self._get_supported_vgpu_types() + def _discover_vpmems(self, vpmem_conf=None): """Discover vpmems on host and configuration. @@ -795,6 +799,21 @@ class LibvirtDriver(driver.ComputeDriver): dev_name = libvirt_utils.mdev_uuid2name(mdev_uuid) dev_info = self._get_mediated_device_information(dev_name) parent = dev_info['parent'] + parent_type = self._get_vgpu_type_per_pgpu(parent) + if dev_info['type'] != parent_type: + # NOTE(sbauza): The mdev was created by using a different + # vGPU type. We can't recreate the mdev until the operator + # modifies the configuration. + parent = "{}:{}:{}.{}".format(*parent[4:].split('_')) + msg = ("The instance UUID %(inst)s uses a VGPU that " + "its parent pGPU %(parent)s no longer " + "supports as the instance vGPU type %(type)s " + "is not accepted for the pGPU. Please correct " + "the configuration accordingly." % + {'inst': instance_uuid, + 'parent': parent, + 'type': dev_info['type']}) + raise exception.InvalidLibvirtGPUConfig(reason=msg) self._create_new_mediated_device(parent, uuid=mdev_uuid) def _set_multiattach_support(self): @@ -6497,12 +6516,80 @@ class LibvirtDriver(driver.ComputeDriver): def _get_supported_vgpu_types(self): if not CONF.devices.enabled_vgpu_types: return [] - # TODO(sbauza): Move this check up to compute_manager.init_host - if len(CONF.devices.enabled_vgpu_types) > 1: - LOG.warning('libvirt only supports one GPU type per compute node,' - ' only first type will be used.') - requested_types = CONF.devices.enabled_vgpu_types[:1] - return requested_types + + for vgpu_type in CONF.devices.enabled_vgpu_types: + group = getattr(CONF, 'vgpu_%s' % vgpu_type, None) + if group is None or not group.device_addresses: + first_type = CONF.devices.enabled_vgpu_types[0] + if len(CONF.devices.enabled_vgpu_types) > 1: + # Only provide the warning if the operator provided more + # than one type as it's not needed to provide groups + # if you only use one vGPU type. + msg = ("The vGPU type '%(type)s' was listed in '[devices] " + "enabled_vgpu_types' but no corresponding " + "'[vgpu_%(type)s]' group or " + "'[vgpu_%(type)s] device_addresses' " + "option was defined. Only the first type " + "'%(ftype)s' will be used." % {'type': vgpu_type, + 'ftype': first_type}) + LOG.warning(msg) + # We need to reset the mapping table that we started to provide + # keys and values from previously processed vGPUs but since + # there is a problem for this vGPU type, we only want to + # support only the first type. + self.pgpu_type_mapping.clear() + return [first_type] + for device_address in group.device_addresses: + if device_address in self.pgpu_type_mapping: + raise exception.InvalidLibvirtGPUConfig( + reason="duplicate types for PCI ID %s" % device_address + ) + # Just checking whether the operator fat-fingered the address. + # If it's wrong, it will return an exception + try: + pci_utils.parse_address(device_address) + except exception.PciDeviceWrongAddressFormat: + raise exception.InvalidLibvirtGPUConfig( + reason="incorrect PCI address: %s" % device_address + ) + self.pgpu_type_mapping[device_address] = vgpu_type + return CONF.devices.enabled_vgpu_types + + def _get_vgpu_type_per_pgpu(self, device_address): + """Provides the vGPU type the pGPU supports. + + :param device_address: the libvirt PCI device name, + eg.'pci_0000_84_00_0' + """ + # Bail out quickly if we don't support vGPUs + if not self.supported_vgpu_types: + return + + if len(self.supported_vgpu_types) == 1: + # The operator wanted to only support one single type so we can + # blindly return it for every single pGPU + return self.supported_vgpu_types[0] + # The libvirt name is like 'pci_0000_84_00_0' + try: + device_address = "{}:{}:{}.{}".format( + *device_address[4:].split('_')) + # Validates whether it's a PCI ID... + pci_utils.parse_address(device_address) + # .format() can return IndexError + except (exception.PciDeviceWrongAddressFormat, IndexError): + # this is not a valid PCI address + LOG.warning("The PCI address %s was invalid for getting the" + "related vGPU type", device_address) + return + try: + return self.pgpu_type_mapping.get(device_address) + except KeyError: + LOG.warning("No vGPU type was configured for PCI address: %s", + device_address) + # We accept to return None instead of raising an exception + # because we prefer the callers to return the existing exceptions + # in case we can't find a specific pGPU + return def _count_mediated_devices(self, enabled_vgpu_types): """Counts the sysfs objects (handles) that represent a mediated device @@ -6518,6 +6605,12 @@ class LibvirtDriver(driver.ComputeDriver): counts_per_parent = collections.defaultdict(int) mediated_devices = self._get_mediated_devices(types=enabled_vgpu_types) for mdev in mediated_devices: + parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent']) + if mdev['type'] != parent_vgpu_type: + # Even if some mdev was created for another vGPU type, just + # verify all the mdevs related to the type that their pGPU + # has + continue counts_per_parent[mdev['parent']] += 1 return counts_per_parent @@ -6536,10 +6629,13 @@ class LibvirtDriver(driver.ComputeDriver): # dev_id is the libvirt name for the PCI device, # eg. pci_0000_84_00_0 which matches a PCI address of 0000:84:00.0 dev_name = dev['dev_id'] + dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name) for _type in dev['types']: + if _type != dev_supported_type: + # This is not the type the operator wanted to support for + # this physical GPU + continue available = dev['types'][_type]['availableInstances'] - # TODO(sbauza): Once we support multiple types, check which - # PCI devices are set for this type # NOTE(sbauza): Even if we support multiple types, Nova will # only use one per physical GPU. counts_per_dev[dev_name] += available @@ -6562,7 +6658,7 @@ class LibvirtDriver(driver.ComputeDriver): """ # Bail out early if operator doesn't care about providing vGPUs - enabled_vgpu_types = self._get_supported_vgpu_types() + enabled_vgpu_types = self.supported_vgpu_types if not enabled_vgpu_types: return {} inventories = {} @@ -6923,6 +7019,11 @@ class LibvirtDriver(driver.ComputeDriver): mdevs = self._get_mediated_devices(requested_types) available_mdevs = set() for mdev in mdevs: + parent_vgpu_type = self._get_vgpu_type_per_pgpu(mdev['parent']) + if mdev['type'] != parent_vgpu_type: + # This mdev is using a vGPU type that is not supported by the + # configuration option for its pGPU parent, so we can't use it. + continue # FIXME(sbauza): No longer accept the parent value to be nullable # once we fix the reshape functional test if parent is None or mdev['parent'] == parent: @@ -6940,7 +7041,7 @@ class LibvirtDriver(driver.ComputeDriver): :returns: the newly created mdev UUID or None if not possible """ - supported_types = self._get_supported_vgpu_types() + supported_types = self.supported_vgpu_types # Try to see if we can still create a new mediated device devices = self._get_mdev_capable_devices(supported_types) for device in devices: @@ -6951,19 +7052,15 @@ class LibvirtDriver(driver.ComputeDriver): # The device is not the one that was called, not creating # the mdev continue - # For the moment, the libvirt driver only supports one - # type per host - # TODO(sbauza): Once we support more than one type, make - # sure we lookup which type the parent pGPU supports. - asked_type = supported_types[0] - if device['types'][asked_type]['availableInstances'] > 0: + dev_supported_type = self._get_vgpu_type_per_pgpu(dev_name) + if dev_supported_type and device['types'][ + dev_supported_type]['availableInstances'] > 0: # That physical GPU has enough room for a new mdev # We need the PCI address, not the libvirt name # The libvirt name is like 'pci_0000_84_00_0' pci_addr = "{}:{}:{}.{}".format(*dev_name[4:].split('_')) - chosen_mdev = nova.privsep.libvirt.create_mdev(pci_addr, - asked_type, - uuid=uuid) + chosen_mdev = nova.privsep.libvirt.create_mdev( + pci_addr, dev_supported_type, uuid=uuid) return chosen_mdev @utils.synchronized(VGPU_RESOURCE_SEMAPHORE) @@ -6984,12 +7081,8 @@ class LibvirtDriver(driver.ComputeDriver): vgpu_allocations = self._vgpu_allocations(allocations) if not vgpu_allocations: return - # TODO(sbauza): Once we have nested resource providers, find which one - # is having the related allocation for the specific VGPU type. - # For the moment, we should only have one allocation for - # ResourceProvider. - # TODO(sbauza): Iterate over all the allocations once we have - # nested Resource Providers. For the moment, just take the first. + # TODO(sbauza): For the moment, we only support allocations for only + # one pGPU. if len(vgpu_allocations) > 1: LOG.warning('More than one allocation was passed over to libvirt ' 'while at the moment libvirt only supports one. Only ' @@ -7038,7 +7131,7 @@ class LibvirtDriver(driver.ComputeDriver): raise exception.ComputeResourcesUnavailable( reason='vGPU resource is not available') - supported_types = self._get_supported_vgpu_types() + supported_types = self.supported_vgpu_types # Which mediated devices are created but not assigned to a guest ? mdevs_available = self._get_existing_mdevs_not_assigned( parent_device, supported_types) @@ -7399,12 +7492,9 @@ class LibvirtDriver(driver.ComputeDriver): 'reserved': self._get_reserved_host_disk_gb_from_config(), } - # NOTE(sbauza): For the moment, the libvirt driver only supports - # providing the total number of virtual GPUs for a single GPU type. If - # you have multiple physical GPUs, each of them providing multiple GPU - # types, only one type will be used for each of the physical GPUs. - # If one of the pGPUs doesn't support this type, it won't be used. - # TODO(sbauza): Use traits to make a better world. + # TODO(sbauza): Use traits to providing vGPU types. For the moment, + # it will be only documentation support by explaining to use + # osc-placement to create custom traits for each of the pGPU RPs. self._update_provider_tree_for_vgpu( provider_tree, nodename, allocations=allocations) @@ -7543,13 +7633,6 @@ class LibvirtDriver(driver.ComputeDriver): representing that resource provider in the tree """ # Create the VGPU child providers if they do not already exist. - # TODO(mriedem): For the moment, _get_supported_vgpu_types() only - # returns one single type but that will be changed once we support - # multiple types. - # Note that we can't support multiple vgpu types until a reshape has - # been performed on the vgpu resources provided by the root provider, - # if any. - # Dict of PGPU RPs keyed by their libvirt PCI name pgpu_rps = {} for pgpu_dev_id, inventory in inventories_dict.items(): diff --git a/releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml b/releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml new file mode 100644 index 000000000000..421e1b50450b --- /dev/null +++ b/releasenotes/notes/vgpu-multiple-types-2b1ded7d1cc28880.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + The libvirt driver now supports defining different virtual GPU types for + each physical GPU. See the ``[devices]/enabled_vgpu_types`` configuration + option for knowing how to do it. Please refer to + https://docs.openstack.org/nova/latest/admin/virtual-gpu.html for further + documentation.