From fad0332cb4d46cd146bb0b6fdff5adb655422a45 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Tue, 3 Sep 2019 09:42:43 +0100 Subject: [PATCH] libvirt: Start reporting PCPU inventory to placement This requires a lot of hand holding for legacy configuration options (hence the plethora of tests) but this is all required to not break upgrades. A new '[compute] cpu_dedicated_set' config option is added, including a wall of text necessary to explain the upgrade implications of this option. The 'vcpu_pin_set' option is deprecated, while the help text for the 'reserved_host_cpus' option is updated to specify that it will be ignored for the libvirt driver. Part of blueprint cpu-resources Change-Id: I491eb7abe1405b27c552cdef142c688a46e914f2 Signed-off-by: Stephen Finucane --- nova/conf/compute.py | 146 +++++++-- nova/tests/unit/virt/libvirt/test_driver.py | 289 +++++++++++++++--- nova/tests/unit/virt/test_hardware.py | 36 ++- nova/virt/hardware.py | 38 ++- nova/virt/libvirt/driver.py | 177 +++++++++-- .../notes/cpu-resources-d4e6a0c12681fa87.yaml | 27 ++ 6 files changed, 594 insertions(+), 119 deletions(-) create mode 100644 releasenotes/notes/cpu-resources-d4e6a0c12681fa87.yaml diff --git a/nova/conf/compute.py b/nova/conf/compute.py index f2ca30d58a4c..fccebabb3700 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -307,18 +307,41 @@ Related options: resource_tracker_opts = [ cfg.StrOpt('vcpu_pin_set', + deprecated_for_removal=True, + deprecated_since='20.0.0', + deprecated_reason=""" +This option has been superseded by the ``[compute] cpu_dedicated_set`` and +``[compute] cpu_shared_set`` options, which allow things like the co-existence +of pinned and unpinned instances on the same host (for the libvirt driver). +""", help=""" -Defines which physical CPUs (pCPUs) can be used by instance -virtual CPUs (vCPUs). +Mask of host CPUs that can be used for ``VCPU`` resources. + +The behavior of this option depends on the definition of the ``[compute] +cpu_dedicated_set`` option and affects the behavior of the ``[compute] +cpu_shared_set`` option. + +* If ``[compute] cpu_dedicated_set`` is defined, defining this option will + result in an error. + +* If ``[compute] cpu_dedicated_set`` is not defined, this option will be used + to determine inventory for ``VCPU`` resources and to limit the host CPUs + that both pinned and unpinned instances can be scheduled to, overriding the + ``[compute] cpu_shared_set`` option. Possible values: * A comma-separated list of physical CPU numbers that virtual CPUs can be - allocated to by default. Each element should be either a single CPU number, - a range of CPU numbers, or a caret followed by a CPU number to be - excluded from a previous range. For example:: + allocated from. Each element should be either a single CPU number, a range of + CPU numbers, or a caret followed by a CPU number to be excluded from a + previous range. For example:: vcpu_pin_set = "4-12,^8,15" + +Related options: + +* ``[compute] cpu_dedicated_set`` +* ``[compute] cpu_shared_set`` """), cfg.MultiOpt('reserved_huge_pages', item_type=types.Dict(), @@ -369,15 +392,26 @@ Possible values: default=0, min=0, help=""" -Number of physical CPUs to reserve for the host. The host resources usage is -reported back to the scheduler continuously from nova-compute running on the -compute node. To prevent the host CPU from being considered as available, -this option is used to reserve random pCPU(s) for the host. +Number of host CPUs to reserve for host processes. + +The host resources usage is reported back to the scheduler continuously from +nova-compute running on the compute node. This value is used to determine the +``reserved`` value reported to placement. + +This option cannot be set if the ``[compute] cpu_shared_set`` or ``[compute] +cpu_dedicated_set`` config options have been defined. When these options are +defined, any host CPUs not included in these values are considered reserved for +the host. Possible values: * Any positive integer representing number of physical CPUs to reserve for the host. + +Related options: + +* ``[compute] cpu_shared_set`` +* ``[compute] cpu_dedicated_set`` """), ] @@ -388,9 +422,15 @@ allocation_ratio_opts = [ help=""" Virtual CPU to physical CPU allocation ratio. -This option is used to influence the hosts selected by the Placement API. In -addition, the ``AggregateCoreFilter`` will fall back to this configuration -value if no per-aggregate setting is found. +This option is used to influence the hosts selected by the Placement API by +configuring the allocation ratio for ``VCPU`` inventory. In addition, the +``AggregateCoreFilter`` (deprecated) will fall back to this configuration value +if no per-aggregate setting is found. + +.. note:: + + This option does not affect ``PCPU`` inventory, which cannot be + overcommitted. .. note:: @@ -415,8 +455,9 @@ Related options: help=""" Virtual RAM to physical RAM allocation ratio. -This option is used to influence the hosts selected by the Placement API. In -addition, the ``AggregateRamFilter`` will fall back to this configuration value +This option is used to influence the hosts selected by the Placement API by +configuring the allocation ratio for ``MEMORY_MB`` inventory. In addition, the +``AggregateRamFilter`` (deprecated) will fall back to this configuration value if no per-aggregate setting is found. .. note:: @@ -442,9 +483,10 @@ Related options: help=""" Virtual disk to physical disk allocation ratio. -This option is used to influence the hosts selected by the Placement API. In -addition, the ``AggregateDiskFilter`` will fall back to this configuration -value if no per-aggregate setting is found. +This option is used to influence the hosts selected by the Placement API by +configuring the allocation ratio for ``DISK_GB`` inventory. In addition, the +``AggregateDiskFilter`` (deprecated) will fall back to this configuration value +if no per-aggregate setting is found. When configured, a ratio greater than 1.0 will result in over-subscription of the available physical disk, which can be useful for more efficiently packing @@ -741,15 +783,75 @@ Possible values: """), cfg.StrOpt('cpu_shared_set', help=""" -Defines which physical CPUs (pCPUs) will be used for best-effort guest vCPU -resources. +Mask of host CPUs that can be used for ``VCPU`` resources and offloaded +emulator threads. -Currently only used by libvirt driver to place guest emulator threads when -the flavor extra spec is set to ``hw:emulator_threads_policy=share``. +The behavior of this option depends on the definition of the deprecated +``vcpu_pin_set`` option. -For example:: +* If ``vcpu_pin_set`` is not defined, ``[compute] cpu_shared_set`` will be be + used to provide ``VCPU`` inventory and to determine the host CPUs that + unpinned instances can be scheduled to. It will also be used to determine the + host CPUS that instance emulator threads should be offloaded to for instances + configured with the ``share`` emulator thread policy + (``hw:emulator_threads_policy=share``). + +* If ``vcpu_pin_set`` is defined, ``[compute] cpu_shared_set`` will only be + used to determine the host CPUs that instance emulator threads should be + offloaded to for instances configured with the ``share`` emulator thread + policy (``hw:emulator_threads_policy=share``). ``vcpu_pin_set`` will be used + to provide ``VCPU`` inventory and to determine the host CPUs that both pinned + and unpinned instances can be scheduled to. + +This behavior will be simplified in a future release when ``vcpu_pin_set`` is +removed. + +Possible values: + +* A comma-separated list of physical CPU numbers that instance VCPUs can be + allocated from. Each element should be either a single CPU number, a range of + CPU numbers, or a caret followed by a CPU number to be excluded from a + previous range. For example:: cpu_shared_set = "4-12,^8,15" + +Related options: + +* ``[compute] cpu_dedicated_set``: This is the counterpart option for defining + where ``PCPU`` resources should be allocated from. +* ``vcpu_pin_set``: A legacy option whose definition may change the behavior of + this option. +"""), + cfg.StrOpt('cpu_dedicated_set', + help=""" +Mask of host CPUs that can be used for ``PCPU`` resources. + +The behavior of this option affects the behavior of the deprecated +``vcpu_pin_set`` option. + +* If this option is defined, defining ``vcpu_pin_set`` will result in an error. + +* If this option is not defined, ``vcpu_pin_set`` will be used to determine + inventory for ``VCPU`` resources and to limit the host CPUs that both pinned + and unpinned instances can be scheduled to. + +This behavior will be simplified in a future release when ``vcpu_pin_set`` is +removed. + +Possible values: + +* A comma-separated list of physical CPU numbers that instance VCPUs can be + allocated from. Each element should be either a single CPU number, a range of + CPU numbers, or a caret followed by a CPU number to be excluded from a + previous range. For example:: + + cpu_dedicated_set = "4-12,^8,15" + +Related options: + +* ``[compute] cpu_shared_set``: This is the counterpart option for defining + where ``VCPU`` resources should be allocated from. +* ``vcpu_pin_set``: A legacy option that this option partially replaces. """), cfg.BoolOpt('live_migration_wait_for_vif_plug', default=True, diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index b9872a3f6506..b5598e367e04 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -1357,6 +1357,106 @@ class LibvirtConnTestCase(test.NoDBTestCase, drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) self.assertRaises(exception.Invalid, drvr.init_host, "dummyhost") + @mock.patch.object(libvirt_driver.LOG, 'warning') + def test_check_cpu_set_configuration__no_configuration(self, mock_log): + """Test that configuring no CPU option results no errors or logs. + """ + self.flags(vcpu_pin_set=None, reserved_host_cpus=None) + self.flags(cpu_shared_set=None, cpu_dedicated_set=None, + group='compute') + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._check_cpu_set_configuration() + + mock_log.assert_not_called() + + def test_check_cpu_set_configuration__cpu_shared_set_cpu_dedicated_set( + self): + """Test that configuring 'cpu_shared_set' and 'cpu_dedicated_set' such + that they overlap (are not disjoint) results in an error stating that + this is not allowed. + """ + self.flags(vcpu_pin_set=None, reserved_host_cpus=None) + self.flags(cpu_shared_set='0-3', cpu_dedicated_set='3-5', + group='compute') + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + + self.assertRaises(exception.InvalidConfiguration, + drvr._check_cpu_set_configuration) + + def test_check_cpu_set_configuration__reserved_host_cpus_cpu_shared_set( + self): + """Test that configuring 'reserved_host_cpus' with one of the new + options, in this case '[compute] cpu_shared_set', results in an error + stating that this is not allowed. + """ + self.flags(vcpu_pin_set=None, reserved_host_cpus=1) + self.flags(cpu_shared_set='1-10', cpu_dedicated_set=None, + group='compute') + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + + ex = self.assertRaises(exception.InvalidConfiguration, + drvr._check_cpu_set_configuration) + self.assertIn("The 'reserved_host_cpus' config option cannot be " + "defined alongside ", six.text_type(ex)) + + @mock.patch.object(libvirt_driver.LOG, 'warning') + def test_check_cpu_set_configuration__vcpu_pin_set(self, mock_log): + """Test that configuring only 'vcpu_pin_set' results in a warning that + the option is being used for VCPU inventory but this is deprecated + behavior. + """ + self.flags(vcpu_pin_set='0-3', reserved_host_cpus=None) + self.flags(cpu_shared_set=None, cpu_dedicated_set=None, + group='compute') + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._check_cpu_set_configuration() + + mock_log.assert_called_once() + self.assertIn("When defined, 'vcpu_pin_set' will be used to calculate " + "'VCPU' inventory and schedule instances that have " + "'VCPU' allocations.", + six.text_type(mock_log.call_args[0])) + + @mock.patch.object(libvirt_driver.LOG, 'warning') + def test_check_cpu_set_configuration__vcpu_pin_set_cpu_shared_set( + self, mock_log): + """Test that configuring both 'vcpu_pin_set' and 'cpu_shared_set' + results in a warning that 'cpu_shared_set' is being ignored for + calculating VCPU inventory. + """ + self.flags(vcpu_pin_set='0-3', reserved_host_cpus=None) + self.flags(cpu_shared_set='4-5', cpu_dedicated_set=None, + group='compute') + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + drvr._check_cpu_set_configuration() + + mock_log.assert_called_once() + self.assertIn("The '[compute] cpu_shared_set' and 'vcpu_pin_set' " + "config options have both been defined.", + six.text_type(mock_log.call_args[0])) + + def test_check_cpu_set_configuration__vcpu_pin_set_cpu_dedicated_set( + self): + """Test that configuring both 'vcpu_pin_set' and 'cpu_dedicated_set' + results in an error stating that the two options cannot co-exist. + """ + self.flags(vcpu_pin_set='0-3', reserved_host_cpus=None) + self.flags(cpu_shared_set=None, cpu_dedicated_set='4-5', + group='compute') + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) + + ex = self.assertRaises(exception.InvalidConfiguration, + drvr._check_cpu_set_configuration) + self.assertIn("The 'vcpu_pin_set' config option has been deprecated " + "and cannot be defined alongside '[compute] " + "cpu_dedicated_set'.", six.text_type(ex)) + def _do_test_parse_migration_flags(self, lm_expected=None, bm_expected=None): drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), True) @@ -8084,42 +8184,118 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_list.assert_called_with(only_guests=True, only_running=False) @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', - return_value=None) + return_value=set([4, 5, 6])) + def test_get_pcpu_total(self, get_online_cpus): + """Test what happens when the '[compute] cpu_dedicated_set' config + option is set. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_dedicated_set='4-5', cpu_shared_set=None, + group='compute') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + pcpus = drvr._get_pcpu_total() + self.assertEqual(2, pcpus) + + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5, 6])) + def test_get_pcpu_total__cpu_dedicated_set_unset(self, get_online_cpus): + """Test what happens when the '[compute] cpu_dedicated_set' config + option is not set. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_dedicated_set=None, cpu_shared_set=None, + group='compute') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + pcpus = drvr._get_pcpu_total() + self.assertEqual(0, pcpus) + + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5])) + def test_get_pcpu_total__cpu_dedicated_set_invalid(self, get_online_cpus): + """Test what happens when the '[compute] cpu_dedicated_set' config + option is set but it's invalid. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_dedicated_set='4-6', cpu_shared_set=None, + group='compute') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + self.assertRaises(exception.Invalid, drvr._get_pcpu_total) + @mock.patch('nova.virt.libvirt.host.Host.get_cpu_count', return_value=4) - def test_get_host_vcpus_is_empty(self, get_cpu_count, get_online_cpus): + def test_get_vcpu_total(self, get_cpu_count): + """Test what happens when neither the '[compute] cpu_shared_set' or + legacy 'vcpu_pin_set' config options are set. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_shared_set=None, cpu_dedicated_set=None, + group='compute') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - self.flags(vcpu_pin_set="") vcpus = drvr._get_vcpu_total() self.assertEqual(4, vcpus) - @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus') - def test_get_host_vcpus(self, get_online_cpus): + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5, 6])) + def test_get_vcpu_total__with_cpu_shared_set(self, get_online_cpus): + """Test what happens when the '[compute] cpu_shared_set' config option + is set. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_shared_set='4-5', cpu_dedicated_set=None, + group='compute') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - self.flags(vcpu_pin_set="4-5") - get_online_cpus.return_value = set([4, 5, 6]) - expected_vcpus = 2 vcpus = drvr._get_vcpu_total() - self.assertEqual(expected_vcpus, vcpus) + self.assertEqual(2, vcpus) - @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus') - def test_get_host_vcpus_out_of_range(self, get_online_cpus): + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5, 6])) + def test_get_vcpu_total__with_vcpu_pin_set(self, get_online_cpus): + """Test what happens when the legacy 'vcpu_pin_set' config option is + set. + """ + self.flags(vcpu_pin_set='4-5') + self.flags(cpu_shared_set=None, cpu_dedicated_set=None, + group='compute') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + vcpus = drvr._get_vcpu_total() + self.assertEqual(2, vcpus) + + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5, 6])) + def test_get_vcpu_total__with_cpu_dedicated_set(self, get_online_cpus): + """Test what happens when the '[compute] cpu_dedicated_set' config + option is set. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_shared_set=None, cpu_dedicated_set='4-5', + group='compute') + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + vcpus = drvr._get_vcpu_total() + self.assertEqual(0, vcpus) + + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5])) + def test_get_vcpu_total__cpu_shared_set_invalid(self, get_online_cpus): + """Test what happens when the '[compute] cpu_shared_set' config option + is set but it's invalid. + """ + self.flags(vcpu_pin_set=None) + self.flags(cpu_shared_set='4-6', cpu_dedicated_set=None, + group='compute') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - self.flags(vcpu_pin_set="4-6") - get_online_cpus.return_value = set([4, 5]) self.assertRaises(exception.Invalid, drvr._get_vcpu_total) - @mock.patch('nova.virt.libvirt.host.Host.get_cpu_count') - def test_get_host_vcpus_after_hotplug(self, get_cpu_count): + @mock.patch('nova.virt.libvirt.host.Host.get_online_cpus', + return_value=set([4, 5])) + def test_get_vcpu_total__vcpu_pin_set_invalid(self, get_online_cpus): + """Test what happens when the legacy 'vcpu_pin_set' config option is + set but it's invalid. + """ + self.flags(vcpu_pin_set='4-6') + self.flags(cpu_shared_set=None, cpu_dedicated_set=None, + group='compute') drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - get_cpu_count.return_value = 2 - expected_vcpus = 2 - vcpus = drvr._get_vcpu_total() - self.assertEqual(expected_vcpus, vcpus) - get_cpu_count.return_value = 3 - expected_vcpus = 3 - vcpus = drvr._get_vcpu_total() - self.assertEqual(expected_vcpus, vcpus) + self.assertRaises(exception.Invalid, drvr._get_vcpu_total) @mock.patch.object(host.Host, "has_min_version", return_value=True) def test_quiesce(self, mock_has_min_version): @@ -19269,6 +19445,9 @@ class HostStateTestCase(test.NoDBTestCase): self._host.get_memory_mb_total = _get_memory_mb_total self._host.get_memory_mb_used = _get_memory_mb_used + def _get_pcpu_total(self): + return 0 + def _get_vcpu_total(self): return 1 @@ -19346,6 +19525,7 @@ class HostStateTestCase(test.NoDBTestCase): class TestUpdateProviderTree(test.NoDBTestCase): vcpus = 24 + pcpus = 12 memory_mb = 1024 disk_gb = 200 cpu_traits = {t: False for t in libvirt_utils.CPU_TRAITS_MAPPING.values()} @@ -19383,6 +19563,14 @@ class TestUpdateProviderTree(test.NoDBTestCase): 'allocation_ratio': 16.0, 'reserved': 0, }, + orc.PCPU: { + 'total': self.pcpus, + 'min_unit': 1, + 'max_unit': self.pcpus, + 'step_size': 1, + 'allocation_ratio': 1.0, + 'reserved': 0, + }, orc.MEMORY_MB: { 'total': self.memory_mb, 'min_unit': 1, @@ -19401,18 +19589,20 @@ class TestUpdateProviderTree(test.NoDBTestCase): }, } + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' '_get_cpu_feature_traits', new=mock.Mock(return_value=cpu_traits)) - @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', - return_value={'total': disk_gb}) + new=mock.Mock(return_value={'total': disk_gb})) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', - return_value=memory_mb) + new=mock.Mock(return_value=memory_mb)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_total', + new=mock.Mock(return_value=pcpus)) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', - return_value=vcpus) - def _test_update_provider_tree(self, mock_vcpu, mock_mem, mock_disk, - mock_gpu_invs, gpu_invs=None, vpmems=None): + new=mock.Mock(return_value=vcpus)) + def _test_update_provider_tree( + self, mock_gpu_invs, gpu_invs=None, vpmems=None): if gpu_invs: self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') mock_gpu_invs.return_value = gpu_invs @@ -19535,15 +19725,16 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.pt.data(self.cn_rp['uuid']).resources) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', - return_value={'total': disk_gb}) + new=mock.Mock(return_value={'total': disk_gb})) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', - return_value=memory_mb) + new=mock.Mock(return_value=memory_mb)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_total', + new=mock.Mock(return_value=pcpus)) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', - return_value=vcpus) + new=mock.Mock(return_value=vcpus)) # TODO(efried): Bug #1784020 @unittest.expectedFailure - def test_update_provider_tree_for_shared_disk_gb_resource( - self, mock_vcpu, mock_mem, mock_disk): + def test_update_provider_tree_for_shared_disk_gb_resource(self): """Test to check DISK_GB is reported from shared resource provider. """ @@ -19591,23 +19782,24 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.assertEqual(set(['HW_CPU_X86_AVX512F', 'HW_CPU_X86_BMI']), self.pt.data(self.cn_rp['uuid']).traits) - @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' - '_get_cpu_feature_traits', - new=mock.Mock(return_value=cpu_traits)) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' '_get_mediated_device_information') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' '_get_all_assigned_mediated_devices') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_cpu_feature_traits', + new=mock.Mock(return_value=cpu_traits)) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', - return_value={'total': disk_gb}) + new=mock.Mock(return_value={'total': disk_gb})) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', - return_value=memory_mb) + new=mock.Mock(return_value=memory_mb)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_total', + new=mock.Mock(return_value=pcpus)) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', - return_value=vcpus) + new=mock.Mock(return_value=vcpus)) def test_update_provider_tree_for_vgpu_reshape( - self, mock_vcpu, mock_mem, mock_disk, mock_gpus, mock_get_devs, - mock_get_mdev_info): + self, mock_gpus, mock_get_devs, mock_get_mdev_info): """Tests the VGPU reshape scenario.""" self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') # Let's assume we have two PCI devices each having 4 pGPUs for this @@ -19724,18 +19916,19 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.assertEqual(original_allocations[uuids.consumer2], allocations[uuids.consumer2]) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' '_get_cpu_feature_traits', new=mock.Mock(return_value=cpu_traits)) - @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', - return_value={'total': disk_gb}) + new=mock.Mock(return_value={'total': disk_gb})) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', - return_value=memory_mb) + new=mock.Mock(return_value=memory_mb)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_pcpu_total', + new=mock.Mock(return_value=pcpus)) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', - return_value=vcpus) - def test_update_provider_tree_for_vgpu_reshape_fails( - self, mock_vcpu, mock_mem, mock_disk, mock_gpus): + new=mock.Mock(return_value=vcpus)) + def test_update_provider_tree_for_vgpu_reshape_fails(self, mock_gpus): """Tests the VGPU reshape failure scenario where VGPU allocations are not on the root compute node provider as expected. """ diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py index c625c58bfb0f..1deb4e2def11 100644 --- a/nova/tests/unit/virt/test_hardware.py +++ b/nova/tests/unit/virt/test_hardware.py @@ -51,31 +51,49 @@ class InstanceInfoTests(test.NoDBTestCase): self.assertNotEqual(ii1, ii3) -class CpuSetTestCase(test.NoDBTestCase): +class CPUSetTestCase(test.NoDBTestCase): def test_get_vcpu_pin_set(self): self.flags(vcpu_pin_set="1-3,5,^2") cpuset_ids = hw.get_vcpu_pin_set() self.assertEqual(set([1, 3, 5]), cpuset_ids) - def test_get_cpu_shared_set(self): - self.flags(cpu_shared_set="0-5,6,^2", group='compute') - cpuset_ids = hw.get_cpu_shared_set() - self.assertEqual(set([0, 1, 3, 4, 5, 6]), cpuset_ids) - - def test_parse_cpu_spec_none_returns_none(self): + def test_get_vcpu_pin_set__unset(self): self.flags(vcpu_pin_set=None) cpuset_ids = hw.get_vcpu_pin_set() self.assertIsNone(cpuset_ids) - def test_parse_cpu_shared_set_returns_none(self): + def test_get_vcpu_pin_set__invalid(self): + self.flags(vcpu_pin_set='0-1,^0,^1') + self.assertRaises(exception.Invalid, hw.get_vcpu_pin_set) + + def test_get_cpu_shared_set(self): + self.flags(cpu_shared_set='0-5,6,^2', group='compute') + cpuset_ids = hw.get_cpu_shared_set() + self.assertEqual(set([0, 1, 3, 4, 5, 6]), cpuset_ids) + + def test_get_cpu_shared_set__unset(self): self.flags(cpu_shared_set=None, group='compute') cpuset_ids = hw.get_cpu_shared_set() self.assertIsNone(cpuset_ids) - def test_parse_cpu_shared_set_error(self): + def test_get_cpu_shared_set__error(self): self.flags(cpu_shared_set="0-1,^0,^1", group='compute') self.assertRaises(exception.Invalid, hw.get_cpu_shared_set) + def test_get_cpu_dedicated_set(self): + self.flags(cpu_dedicated_set='0-5,6,^2', group='compute') + cpuset_ids = hw.get_cpu_dedicated_set() + self.assertEqual(set([0, 1, 3, 4, 5, 6]), cpuset_ids) + + def test_get_cpu_dedicated_set__unset(self): + self.flags(cpu_dedicated_set=None, group='compute') + cpuset_ids = hw.get_cpu_dedicated_set() + self.assertIsNone(cpuset_ids) + + def test_get_cpu_dedicated_set__error(self): + self.flags(cpu_dedicated_set="0-1,^0,^1", group='compute') + self.assertRaises(exception.Invalid, hw.get_cpu_dedicated_set) + def test_parse_cpu_spec_valid_syntax_works(self): cpuset_ids = hw.parse_cpu_spec("1") self.assertEqual(set([1]), cpuset_ids) diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index 1f8d2ca42b4a..0a7829204b58 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -38,33 +38,51 @@ MEMPAGES_ANY = -3 def get_vcpu_pin_set(): - """Parse vcpu_pin_set config. + """Parse ``vcpu_pin_set`` config. - :returns: a set of pcpu ids can be used by instances + :returns: A set of host CPU IDs that can be used for VCPU and PCPU + allocations. """ if not CONF.vcpu_pin_set: return None cpuset_ids = parse_cpu_spec(CONF.vcpu_pin_set) if not cpuset_ids: - raise exception.Invalid(_("No CPUs available after parsing %r") % - CONF.vcpu_pin_set) + msg = _("No CPUs available after parsing 'vcpu_pin_set' config, %r") + raise exception.Invalid(msg % CONF.vcpu_pin_set) return cpuset_ids -def get_cpu_shared_set(): - """Parse cpu_shared_set config. +def get_cpu_dedicated_set(): + """Parse ``[compute] cpu_dedicated_set`` config. - :returns: a set of pcpu ids can be used for best effort workloads + :returns: A set of host CPU IDs that can be used for PCPU allocations. + """ + if not CONF.compute.cpu_dedicated_set: + return None + + cpu_ids = parse_cpu_spec(CONF.compute.cpu_dedicated_set) + if not cpu_ids: + msg = _("No CPUs available after parsing '[compute] " + "cpu_dedicated_set' config, %r") + raise exception.Invalid(msg % CONF.compute.cpu_dedicated_set) + return cpu_ids + + +def get_cpu_shared_set(): + """Parse ``[compute] cpu_shared_set`` config. + + :returns: A set of host CPU IDs that can be used for emulator threads and, + optionally, for VCPU allocations. """ if not CONF.compute.cpu_shared_set: return None shared_ids = parse_cpu_spec(CONF.compute.cpu_shared_set) if not shared_ids: - raise exception.Invalid(_("No CPUs available after parsing " - "cpu_shared_set config. %r ") % - CONF.compute.cpu_shared_set) + msg = _("No CPUs available after parsing '[compute] cpu_shared_set' " + "config, %r") + raise exception.Invalid(msg % CONF.compute.cpu_shared_set) return shared_ids diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 70199c7e25e7..4b6a48b9c9a3 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -631,6 +631,8 @@ class LibvirtDriver(driver.ComputeDriver): def init_host(self, host): self._host.initialize() + self._check_cpu_set_configuration() + self._do_quality_warnings() self._parse_migration_flags() @@ -872,6 +874,70 @@ class LibvirtDriver(driver.ComputeDriver): 'any of the interfaces: %(ifaces)s', {'my_ip': CONF.my_ip, 'ifaces': ", ".join(ips)}) + def _check_cpu_set_configuration(self): + # evaluate these now to force a quick fail if they're invalid + vcpu_pin_set = hardware.get_vcpu_pin_set() or set() + cpu_shared_set = hardware.get_cpu_shared_set() or set() + cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set() + + # TODO(stephenfin): Remove this in U once we remove the 'vcpu_pin_set' + # option + if not vcpu_pin_set: + if not (cpu_shared_set or cpu_dedicated_set): + return + + if not cpu_dedicated_set.isdisjoint(cpu_shared_set): + msg = _( + "The '[compute] cpu_dedicated_set' and '[compute] " + "cpu_shared_set' configuration options must be " + "disjoint.") + raise exception.InvalidConfiguration(msg) + + if CONF.reserved_host_cpus: + msg = _( + "The 'reserved_host_cpus' config option cannot be defined " + "alongside the '[compute] cpu_shared_set' or '[compute] " + "cpu_dedicated_set' options. Unset 'reserved_host_cpus'.") + raise exception.InvalidConfiguration(msg) + + return + + if cpu_dedicated_set: + # NOTE(stephenfin): This is a new option in Train so it can be + # an error + msg = _( + "The 'vcpu_pin_set' config option has been deprecated and " + "cannot be defined alongside '[compute] cpu_dedicated_set'. " + "Unset 'vcpu_pin_set'.") + raise exception.InvalidConfiguration(msg) + + if cpu_shared_set: + LOG.warning( + "The '[compute] cpu_shared_set' and 'vcpu_pin_set' config " + "options have both been defined. While 'vcpu_pin_set' is " + "defined, it will continue to be used to configure the " + "specific host CPUs used for 'VCPU' inventory, while " + "'[compute] cpu_shared_set' will only be used for guest " + "emulator threads when 'hw:emulator_threads_policy=shared' " + "is defined in the flavor. This is legacy behavior and will " + "not be supported in a future release. " + "If you wish to define specific host CPUs to be used for " + "'VCPU' or 'PCPU' inventory, you must migrate the " + "'vcpu_pin_set' config option value to '[compute] " + "cpu_shared_set' and '[compute] cpu_dedicated_set', " + "respectively, and undefine 'vcpu_pin_set'.") + else: + LOG.warning( + "The 'vcpu_pin_set' config option has been deprecated and " + "will be removed in a future release. When defined, " + "'vcpu_pin_set' will be used to calculate 'VCPU' inventory " + "and schedule instances that have 'VCPU' allocations. " + "If you wish to define specific host CPUs to be used for " + "'VCPU' or 'PCPU' inventory, you must migrate the " + "'vcpu_pin_set' config option value to '[compute] " + "cpu_shared_set' and '[compute] cpu_dedicated_set', " + "respectively, and undefine 'vcpu_pin_set'.") + def _prepare_migration_flags(self): migration_flags = 0 @@ -6182,33 +6248,69 @@ class LibvirtDriver(driver.ComputeDriver): guest.resume() return guest - def _get_vcpu_total(self): - """Get available vcpu number of physical computer. - - :returns: the number of cpu core instances can be used. + def _get_pcpu_total(self): + """Get number of host cores to be used for PCPUs. + :returns: The number of host cores to be used for PCPUs. """ - try: - total_pcpus = self._host.get_cpu_count() - except libvirt.libvirtError: - LOG.warning("Cannot get the number of cpu, because this " - "function is not implemented for this platform.") + if not CONF.compute.cpu_dedicated_set: return 0 - if not CONF.vcpu_pin_set: - return total_pcpus + online_cpus = self._host.get_online_cpus() + dedicated_cpus = hardware.get_cpu_dedicated_set() - available_ids = hardware.get_vcpu_pin_set() - online_pcpus = self._host.get_online_cpus() - if not (available_ids <= online_pcpus): - msg = _("Invalid 'vcpu_pin_set' config: one or more of the " - "requested CPUs is not online. Online cpuset(s): " - "%(online)s, requested cpuset(s): %(req)s") + if not dedicated_cpus.issubset(online_cpus): + msg = _("Invalid '[compute] cpu_dedicated_set' config: one or " + "more of the configured CPUs is not online. Online " + "cpuset(s): %(online)s, configured cpuset(s): %(req)s") raise exception.Invalid(msg % { - 'online': sorted(online_pcpus), - 'req': sorted(available_ids)}) + 'online': sorted(online_cpus), + 'req': sorted(dedicated_cpus)}) - return len(available_ids) + return len(dedicated_cpus) + + def _get_vcpu_total(self): + """Get number of host cores to be used for VCPUs. + + :returns: the number of cpu core instances can be used. + """ + # NOTE(stephenfin): The use of the legacy 'vcpu_pin_set' option happens + # if it's defined, regardless of whether '[compute] cpu_shared_set' is + # also configured. This is legacy behavior required for upgrades that + # should be removed in the future, when we can rely exclusively on + # '[compute] cpu_shared_set'. + if CONF.vcpu_pin_set: + # TODO(stephenfin): Remove this in U + shared_cpus = hardware.get_vcpu_pin_set() + elif CONF.compute.cpu_shared_set: + shared_cpus = hardware.get_cpu_shared_set() + elif CONF.compute.cpu_dedicated_set: + return 0 + else: + try: + return self._host.get_cpu_count() + except libvirt.libvirtError: + LOG.warning("Cannot get the number of host CPUs because this " + "function is not implemented for this platform.") + return 0 + + online_cpus = self._host.get_online_cpus() + if not shared_cpus.issubset(online_cpus): + msg = _("Invalid '%(config_opt)s' config: one or " + "more of the configured CPUs is not online. Online " + "cpuset(s): %(online)s, configured cpuset(s): %(req)s") + + if CONF.vcpu_pin_set: + config_opt = 'vcpu_pin_set' + else: # CONF.compute.cpu_shared_set + config_opt = '[compute] cpu_shared_set' + + raise exception.Invalid(msg % { + 'config_opt': config_opt, + 'online': sorted(online_cpus), + 'req': sorted(shared_cpus)}) + + return len(shared_cpus) @staticmethod def _get_local_gb_info(): @@ -7096,6 +7198,8 @@ class LibvirtDriver(driver.ComputeDriver): disk_gb = int(self._get_local_gb_info()['total']) memory_mb = int(self._host.get_memory_mb_total()) vcpus = self._get_vcpu_total() + pcpus = self._get_pcpu_total() + memory_enc_slots = self._get_memory_encrypted_slots() # NOTE(yikun): If the inv record does not exists, the allocation_ratio # will use the CONF.xxx_allocation_ratio value if xxx_allocation_ratio @@ -7105,14 +7209,6 @@ class LibvirtDriver(driver.ComputeDriver): ratios = self._get_allocation_ratios(inv) resources = collections.defaultdict(set) result = { - orc.VCPU: { - 'total': vcpus, - 'min_unit': 1, - 'max_unit': vcpus, - 'step_size': 1, - 'allocation_ratio': ratios[orc.VCPU], - 'reserved': CONF.reserved_host_cpus, - }, orc.MEMORY_MB: { 'total': memory_mb, 'min_unit': 1, @@ -7123,8 +7219,29 @@ class LibvirtDriver(driver.ComputeDriver): }, } - memory_enc_slots = self._get_memory_encrypted_slots() - if memory_enc_slots > 0: + # NOTE(stephenfin): We have to optionally report these since placement + # forbids reporting inventory with total=0 + if vcpus: + result[orc.VCPU] = { + 'total': vcpus, + 'min_unit': 1, + 'max_unit': vcpus, + 'step_size': 1, + 'allocation_ratio': ratios[orc.VCPU], + 'reserved': CONF.reserved_host_cpus, + } + + if pcpus: + result[orc.PCPU] = { + 'total': pcpus, + 'min_unit': 1, + 'max_unit': pcpus, + 'step_size': 1, + 'allocation_ratio': 1, + 'reserved': 0, + } + + if memory_enc_slots: result[orc.MEM_ENCRYPTION_CONTEXT] = { 'total': memory_enc_slots, 'min_unit': 1, diff --git a/releasenotes/notes/cpu-resources-d4e6a0c12681fa87.yaml b/releasenotes/notes/cpu-resources-d4e6a0c12681fa87.yaml new file mode 100644 index 000000000000..d38b00b9c811 --- /dev/null +++ b/releasenotes/notes/cpu-resources-d4e6a0c12681fa87.yaml @@ -0,0 +1,27 @@ +--- +features: + - | + Compute nodes using the libvirt driver can now report ``PCPU`` inventory. + This is consumed by instances with dedicated (pinned) CPUs. This can be + configured using the ``[compute] cpu_dedicated_set`` config option. A + legacy path using the now deprecated ``vcpu_pin_set`` config option is + provided to assist with upgrades. Refer to the help text of the ``[compute] + cpu_dedicated_set``, ``[compute] cpu_shared_set`` and ``vcpu_pin_set`` + config options for more information. + - | + A new configuration option, ``[compute] cpu_dedicated_set``, has been + added. This can be used to configure the host CPUs that should be used for + ``PCPU`` inventory. +deprecations: + - | + The ``vcpu_pin_set`` configuration option has been deprecated. You should + migrate host CPU configuration to the ``[compute] cpu_dedicated_set`` or + ``[compute] cpu_shared_set`` config options, or both. Refer to the help + text of these config options for more information. +upgrade: + - | + Previously, if ``vcpu_pin_set`` was not defined, the libvirt driver would + count all available host CPUs when calculating ``VCPU`` inventory, + regardless of whether those CPUs were online or not. The driver will now + only report the total number of online CPUs. This should result in fewer + build failures on hosts with offlined CPUs.