diff --git a/nova/conf/libvirt.py b/nova/conf/libvirt.py index e32452c4cea6..204fe5c4b867 100644 --- a/nova/conf/libvirt.py +++ b/nova/conf/libvirt.py @@ -1482,6 +1482,11 @@ libvirt_cpu_mgmt_opts = [ cfg.BoolOpt('cpu_power_management', default=False, help='Use libvirt to manage CPU cores performance.'), + cfg.StrOpt('cpu_power_management_strategy', + choices=['cpu_state', 'governor'], + default='cpu_state', + help='Tuning strategy to reduce CPU power consumption when ' + 'unused'), cfg.StrOpt('cpu_power_governor_low', default='powersave', help='Governor to use in order ' diff --git a/nova/tests/fixtures/__init__.py b/nova/tests/fixtures/__init__.py index df254608fdb0..9ff4a2a601de 100644 --- a/nova/tests/fixtures/__init__.py +++ b/nova/tests/fixtures/__init__.py @@ -16,6 +16,8 @@ from .cast_as_call import CastAsCallFixture # noqa: F401 from .cinder import CinderFixture # noqa: F401 from .conf import ConfFixture # noqa: F401, F403 from .cyborg import CyborgFixture # noqa: F401 +from .filesystem import SysFileSystemFixture # noqa: F401 +from .filesystem import TempFileSystemFixture # noqa: F401 from .glance import GlanceFixture # noqa: F401 from .libvirt import LibvirtFixture # noqa: F401 from .libvirt_imagebackend import LibvirtImageBackendFixture # noqa: F401 diff --git a/nova/tests/fixtures/filesystem.py b/nova/tests/fixtures/filesystem.py new file mode 100644 index 000000000000..932d42fe27ed --- /dev/null +++ b/nova/tests/fixtures/filesystem.py @@ -0,0 +1,81 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import shutil +import tempfile +from unittest import mock + +import fixtures + +from nova import filesystem +from nova.virt.libvirt.cpu import core + + +SYS = 'sys' + + +class TempFileSystemFixture(fixtures.Fixture): + """Creates a fake / filesystem""" + + def _setUp(self): + self.temp_dir = tempfile.TemporaryDirectory(prefix='fake_fs') + # NOTE(sbauza): I/O disk errors may raise an exception here, as we + # don't ignore them. If that's causing a problem in our CI jobs, the + # recommended solution is to use shutil.rmtree instead of cleanup() + # with ignore_errors parameter set to True (or wait for the minimum + # python version to be 3.10 as TemporaryDirectory will provide + # ignore_cleanup_errors parameter) + self.addCleanup(self.temp_dir.cleanup) + + +class SysFileSystemFixture(TempFileSystemFixture): + """Creates a fake /sys filesystem""" + + def __init__(self, cpus_supported=None): + self.cpus_supported = cpus_supported or 10 + + def _setUp(self): + super()._setUp() + self.sys_path = os.path.join(self.temp_dir.name, SYS) + self.addCleanup(shutil.rmtree, self.sys_path, ignore_errors=True) + + sys_patcher = mock.patch( + 'nova.filesystem.SYS', + new_callable=mock.PropertyMock(return_value=self.sys_path)) + self.sys_mock = sys_patcher.start() + self.addCleanup(sys_patcher.stop) + + avail_path_patcher = mock.patch( + 'nova.virt.libvirt.cpu.core.AVAILABLE_PATH', + new_callable=mock.PropertyMock( + return_value=os.path.join(self.sys_path, + 'devices/system/cpu/present'))) + self.avail_path_mock = avail_path_patcher.start() + self.addCleanup(avail_path_patcher.stop) + + cpu_path_patcher = mock.patch( + 'nova.virt.libvirt.cpu.core.CPU_PATH_TEMPLATE', + new_callable=mock.PropertyMock( + return_value=os.path.join(self.sys_path, + 'devices/system/cpu/cpu%(core)s'))) + self.cpu_path_mock = cpu_path_patcher.start() + self.addCleanup(cpu_path_patcher.stop) + + for cpu_nr in range(self.cpus_supported): + cpu_dir = os.path.join(self.cpu_path_mock % {'core': cpu_nr}) + os.makedirs(os.path.join(cpu_dir, 'cpufreq')) + filesystem.write_sys( + os.path.join(cpu_dir, 'cpufreq/scaling_governor'), + data='powersave') + filesystem.write_sys(core.AVAILABLE_PATH, + f'0-{self.cpus_supported - 1}') diff --git a/nova/tests/functional/libvirt/test_power_manage.py b/nova/tests/functional/libvirt/test_power_manage.py new file mode 100644 index 000000000000..fb1ac7d0cde1 --- /dev/null +++ b/nova/tests/functional/libvirt/test_power_manage.py @@ -0,0 +1,270 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from unittest import mock + +import fixtures + +from nova import context as nova_context +from nova import exception +from nova import objects +from nova.tests import fixtures as nova_fixtures +from nova.tests.fixtures import libvirt as fakelibvirt +from nova.tests.functional.libvirt import base +from nova.virt import hardware +from nova.virt.libvirt import cpu + + +class PowerManagementTestsBase(base.ServersTestBase): + + ADDITIONAL_FILTERS = ['NUMATopologyFilter'] + + ADMIN_API = True + + def setUp(self): + super(PowerManagementTestsBase, self).setUp() + + self.ctxt = nova_context.get_admin_context() + + # Mock the 'NUMATopologyFilter' filter, as most tests need to inspect + # this + host_manager = self.scheduler.manager.host_manager + numa_filter_class = host_manager.filter_cls_map['NUMATopologyFilter'] + host_pass_mock = mock.Mock(wraps=numa_filter_class().host_passes) + _p = mock.patch('nova.scheduler.filters' + '.numa_topology_filter.NUMATopologyFilter.host_passes', + side_effect=host_pass_mock) + self.mock_filter = _p.start() + self.addCleanup(_p.stop) + + # for the sake of resizing, we need to patch the two methods below + self.useFixture(fixtures.MockPatch( + 'nova.virt.libvirt.LibvirtDriver._get_instance_disk_info', + return_value=[])) + self.useFixture(fixtures.MockPatch('os.rename')) + + self.useFixture(nova_fixtures.PrivsepFixture()) + + # Defining the main flavor for 4 vCPUs all pinned + self.extra_spec = { + 'hw:cpu_policy': 'dedicated', + 'hw:cpu_thread_policy': 'prefer', + } + self.pcpu_flavor_id = self._create_flavor( + vcpu=4, extra_spec=self.extra_spec) + + def _assert_server_cpus_state(self, server, expected='online'): + inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) + if not inst.numa_topology: + self.fail('Instance should have a NUMA topology in order to know ' + 'its physical CPUs') + instance_pcpus = inst.numa_topology.cpu_pinning + self._assert_cpu_set_state(instance_pcpus, expected=expected) + return instance_pcpus + + def _assert_cpu_set_state(self, cpu_set, expected='online'): + for i in cpu_set: + core = cpu.Core(i) + if expected == 'online': + self.assertTrue(core.online, f'{i} is not online') + elif expected == 'offline': + self.assertFalse(core.online, f'{i} is online') + elif expected == 'powersave': + self.assertEqual('powersave', core.governor) + elif expected == 'performance': + self.assertEqual('performance', core.governor) + + +class PowerManagementTests(PowerManagementTestsBase): + """Test suite for a single host with 9 dedicated cores and 1 used for OS""" + + def setUp(self): + super(PowerManagementTests, self).setUp() + + self.useFixture(nova_fixtures.SysFileSystemFixture()) + + # Definining the CPUs to be pinned. + self.flags(cpu_dedicated_set='1-9', cpu_shared_set=None, + group='compute') + self.flags(vcpu_pin_set=None) + self.flags(cpu_power_management=True, group='libvirt') + + self.flags(allow_resize_to_same_host=True) + self.host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=5, cpu_threads=2) + self.compute1 = self.start_compute(host_info=self.host_info, + hostname='compute1') + + # All cores are shutdown at startup, let's check. + cpu_dedicated_set = hardware.get_cpu_dedicated_set() + self._assert_cpu_set_state(cpu_dedicated_set, expected='offline') + + def test_hardstop_compute_service_if_wrong_opt(self): + self.flags(cpu_dedicated_set=None, cpu_shared_set=None, + group='compute') + self.flags(vcpu_pin_set=None) + self.flags(cpu_power_management=True, group='libvirt') + self.assertRaises(exception.InvalidConfiguration, + self.start_compute, host_info=self.host_info, + hostname='compute2') + + def test_create_server(self): + server = self._create_server( + flavor_id=self.pcpu_flavor_id, + expected_state='ACTIVE') + # Let's verify that the pinned CPUs are now online + self._assert_server_cpus_state(server, expected='online') + + # Verify that the unused CPUs are still offline + inst = objects.Instance.get_by_uuid(self.ctxt, server['id']) + instance_pcpus = inst.numa_topology.cpu_pinning + cpu_dedicated_set = hardware.get_cpu_dedicated_set() + unused_cpus = cpu_dedicated_set - instance_pcpus + self._assert_cpu_set_state(unused_cpus, expected='offline') + + def test_stop_start_server(self): + server = self._create_server( + flavor_id=self.pcpu_flavor_id, + expected_state='ACTIVE') + + server = self._stop_server(server) + # Let's verify that the pinned CPUs are now stopped... + self._assert_server_cpus_state(server, expected='offline') + + server = self._start_server(server) + # ...and now, they should be back. + self._assert_server_cpus_state(server, expected='online') + + def test_resize(self): + server = self._create_server( + flavor_id=self.pcpu_flavor_id, + expected_state='ACTIVE') + server_pcpus = self._assert_server_cpus_state(server, + expected='online') + + new_flavor_id = self._create_flavor( + vcpu=5, extra_spec=self.extra_spec) + self._resize_server(server, new_flavor_id) + server2_pcpus = self._assert_server_cpus_state(server, + expected='online') + # Even if the resize is not confirmed yet, the original guest is now + # destroyed so the cores are now offline. + self._assert_cpu_set_state(server_pcpus, expected='offline') + + # let's revert the resize + self._revert_resize(server) + # So now the original CPUs will be online again, while the previous + # cores should be back offline. + self._assert_cpu_set_state(server_pcpus, expected='online') + self._assert_cpu_set_state(server2_pcpus, expected='offline') + + def test_changing_strategy_fails(self): + # As a reminder, all cores have been shutdown before. + # Now we want to change the strategy and then we restart the service + self.flags(cpu_power_management_strategy='governor', group='libvirt') + # See, this is not possible as we would have offline CPUs. + self.assertRaises(exception.InvalidConfiguration, + self.restart_compute_service, hostname='compute1') + + +class PowerManagementTestsGovernor(PowerManagementTestsBase): + """Test suite for speific governor usage (same 10-core host)""" + + def setUp(self): + super(PowerManagementTestsGovernor, self).setUp() + + self.useFixture(nova_fixtures.SysFileSystemFixture()) + + # Definining the CPUs to be pinned. + self.flags(cpu_dedicated_set='1-9', cpu_shared_set=None, + group='compute') + self.flags(vcpu_pin_set=None) + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_power_management_strategy='governor', group='libvirt') + + self.flags(allow_resize_to_same_host=True) + self.host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=5, cpu_threads=2) + self.compute1 = self.start_compute(host_info=self.host_info, + hostname='compute1') + + def test_create(self): + cpu_dedicated_set = hardware.get_cpu_dedicated_set() + # With the governor strategy, cores are still online but run with a + # powersave governor. + self._assert_cpu_set_state(cpu_dedicated_set, expected='powersave') + + # Now, start an instance + server = self._create_server( + flavor_id=self.pcpu_flavor_id, + expected_state='ACTIVE') + # When pinned cores are run, the governor state is now performance + self._assert_server_cpus_state(server, expected='performance') + + def test_changing_strategy_fails(self): + # Arbitratly set a core governor strategy to be performance + cpu.Core(1).set_high_governor() + # and then forget about it while changing the strategy. + self.flags(cpu_power_management_strategy='cpu_state', group='libvirt') + # This time, this wouldn't be acceptable as some core would have a + # difference performance while Nova would only online/offline it. + self.assertRaises(exception.InvalidConfiguration, + self.restart_compute_service, hostname='compute1') + + +class PowerManagementMixedInstances(PowerManagementTestsBase): + """Test suite for a single host with 6 dedicated cores, 3 shared and one + OS-restricted. + """ + + def setUp(self): + super(PowerManagementMixedInstances, self).setUp() + + self.useFixture(nova_fixtures.SysFileSystemFixture()) + + # Definining 6 CPUs to be dedicated, not all of them in a series. + self.flags(cpu_dedicated_set='1-3,5-7', cpu_shared_set='4,8-9', + group='compute') + self.flags(vcpu_pin_set=None) + self.flags(cpu_power_management=True, group='libvirt') + + self.host_info = fakelibvirt.HostInfo(cpu_nodes=1, cpu_sockets=1, + cpu_cores=5, cpu_threads=2) + self.compute1 = self.start_compute(host_info=self.host_info, + hostname='compute1') + + # Make sure only 6 are offline now + cpu_dedicated_set = hardware.get_cpu_dedicated_set() + self._assert_cpu_set_state(cpu_dedicated_set, expected='offline') + + # cores 4 and 8-9 should be online + self._assert_cpu_set_state({4, 8, 9}, expected='online') + + def test_standard_server_works_and_passes(self): + + std_flavor_id = self._create_flavor(vcpu=2) + self._create_server(flavor_id=std_flavor_id, expected_state='ACTIVE') + + # Since this is an instance with floating vCPUs on the shared set, we + # can only lookup the host CPUs and see they haven't changed state. + cpu_dedicated_set = hardware.get_cpu_dedicated_set() + self._assert_cpu_set_state(cpu_dedicated_set, expected='offline') + self._assert_cpu_set_state({4, 8, 9}, expected='online') + + # We can now try to boot an instance with pinned CPUs to test the mix + pinned_server = self._create_server( + flavor_id=self.pcpu_flavor_id, + expected_state='ACTIVE') + # We'll see that its CPUs are now online + self._assert_server_cpus_state(pinned_server, expected='online') + # but it doesn't change the shared set + self._assert_cpu_set_state({4, 8, 9}, expected='online') diff --git a/nova/tests/unit/virt/libvirt/cpu/test_api.py b/nova/tests/unit/virt/libvirt/cpu/test_api.py index d47b3690a38e..b5bcb762f3f7 100644 --- a/nova/tests/unit/virt/libvirt/cpu/test_api.py +++ b/nova/tests/unit/virt/libvirt/cpu/test_api.py @@ -12,6 +12,8 @@ from unittest import mock +from nova import exception +from nova import objects from nova import test from nova.virt.libvirt.cpu import api from nova.virt.libvirt.cpu import core @@ -23,6 +25,13 @@ class TestAPI(test.NoDBTestCase): super(TestAPI, self).setUp() self.core_1 = api.Core(1) + # Create a fake instance with two pinned CPUs but only one is on the + # dedicated set + numa_topology = objects.InstanceNUMATopology(cells=[ + objects.InstanceNUMACell(cpu_pinning_raw={'0': '0', '2': '2'}), + ]) + self.fake_inst = objects.Instance(numa_topology=numa_topology) + @mock.patch.object(core, 'get_online') def test_online(self, mock_get_online): mock_get_online.return_value = True @@ -61,3 +70,125 @@ class TestAPI(test.NoDBTestCase): self.core_1.set_high_governor() mock_set_governor.assert_called_once_with(self.core_1.ident, 'fake_high_gov') + + @mock.patch.object(core, 'set_online') + def test_power_up_online(self, mock_online): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + + api.power_up(self.fake_inst) + # only core #0 can be set as core #2 is not on the dedicated set + # As a reminder, core(i).online calls set_online(i) + mock_online.assert_called_once_with(0) + + @mock.patch.object(core, 'set_governor') + def test_power_up_governor(self, mock_set_governor): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_power_management_strategy='governor', group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + + api.power_up(self.fake_inst) + # only core #0 can be set as core #2 is not on the dedicated set + # As a reminder, core(i).set_high_governor calls set_governor(i) + mock_set_governor.assert_called_once_with(0, 'performance') + + @mock.patch.object(core, 'set_online') + def test_power_up_skipped(self, mock_online): + self.flags(cpu_power_management=False, group='libvirt') + api.power_up(self.fake_inst) + mock_online.assert_not_called() + + @mock.patch.object(core, 'set_online') + def test_power_up_skipped_if_standard_instance(self, mock_online): + self.flags(cpu_power_management=True, group='libvirt') + api.power_up(objects.Instance(numa_topology=None)) + mock_online.assert_not_called() + + @mock.patch.object(core, 'set_offline') + def test_power_down_offline(self, mock_offline): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + + api.power_down(self.fake_inst) + # only core #0 can be set as core #2 is not on the dedicated set + # As a reminder, core(i).online calls set_online(i) + mock_offline.assert_called_once_with(0) + + @mock.patch.object(core, 'set_governor') + def test_power_down_governor(self, mock_set_governor): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_power_management_strategy='governor', group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + + api.power_down(self.fake_inst) + # only core #0 can be set as core #2 is not on the dedicated set + # As a reminder, core(i).set_high_governor calls set_governor(i) + mock_set_governor.assert_called_once_with(0, 'powersave') + + @mock.patch.object(core, 'set_offline') + def test_power_down_skipped(self, mock_offline): + self.flags(cpu_power_management=False, group='libvirt') + api.power_down(self.fake_inst) + mock_offline.assert_not_called() + + @mock.patch.object(core, 'set_offline') + def test_power_down_skipped_if_standard_instance(self, mock_offline): + self.flags(cpu_power_management=True, group='libvirt') + api.power_down(objects.Instance(numa_topology=None)) + mock_offline.assert_not_called() + + @mock.patch.object(core, 'set_offline') + def test_power_down_all_dedicated_cpus_offline(self, mock_offline): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + + api.power_down_all_dedicated_cpus() + # All dedicated CPUs are turned offline + mock_offline.assert_has_calls([mock.call(0), mock.call(1)]) + + @mock.patch.object(core, 'set_governor') + def test_power_down_all_dedicated_cpus_governor(self, mock_set_governor): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_power_management_strategy='governor', group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + + api.power_down_all_dedicated_cpus() + # All dedicated CPUs are turned offline + mock_set_governor.assert_has_calls([mock.call(0, 'powersave'), + mock.call(1, 'powersave')]) + + @mock.patch.object(core, 'set_offline') + def test_power_down_all_dedicated_cpus_skipped(self, mock_offline): + self.flags(cpu_power_management=False, group='libvirt') + api.power_down_all_dedicated_cpus() + mock_offline.assert_not_called() + + def test_power_down_all_dedicated_cpus_wrong_config(self): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_dedicated_set=None, group='compute') + self.assertRaises(exception.InvalidConfiguration, + api.power_down_all_dedicated_cpus) + + @mock.patch.object(core, 'get_governor') + @mock.patch.object(core, 'get_online') + def test_validate_all_dedicated_cpus_for_governor(self, mock_get_online, + mock_get_governor): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + self.flags(cpu_power_management_strategy='governor', group='libvirt') + mock_get_governor.return_value = 'performance' + mock_get_online.side_effect = (True, False) + self.assertRaises(exception.InvalidConfiguration, + api.validate_all_dedicated_cpus) + + @mock.patch.object(core, 'get_governor') + @mock.patch.object(core, 'get_online') + def test_validate_all_dedicated_cpus_for_cpu_state(self, mock_get_online, + mock_get_governor): + self.flags(cpu_power_management=True, group='libvirt') + self.flags(cpu_dedicated_set='0-1', group='compute') + self.flags(cpu_power_management_strategy='cpu_state', group='libvirt') + mock_get_online.return_value = True + mock_get_governor.side_effect = ('powersave', 'performance') + self.assertRaises(exception.InvalidConfiguration, + api.validate_all_dedicated_cpus) diff --git a/nova/virt/libvirt/cpu/__init__.py b/nova/virt/libvirt/cpu/__init__.py index 962c9469a0dc..4410a4e579bb 100644 --- a/nova/virt/libvirt/cpu/__init__.py +++ b/nova/virt/libvirt/cpu/__init__.py @@ -14,3 +14,9 @@ from nova.virt.libvirt.cpu import api Core = api.Core + + +power_up = api.power_up +power_down = api.power_down +validate_all_dedicated_cpus = api.validate_all_dedicated_cpus +power_down_all_dedicated_cpus = api.power_down_all_dedicated_cpus diff --git a/nova/virt/libvirt/cpu/api.py b/nova/virt/libvirt/cpu/api.py index e0b0a277d18f..1c17458d6b2f 100644 --- a/nova/virt/libvirt/cpu/api.py +++ b/nova/virt/libvirt/cpu/api.py @@ -15,6 +15,10 @@ from dataclasses import dataclass from oslo_log import log as logging import nova.conf +from nova import exception +from nova.i18n import _ +from nova import objects +from nova.virt import hardware from nova.virt.libvirt.cpu import core LOG = logging.getLogger(__name__) @@ -51,6 +55,9 @@ class Core: def __eq__(self, other): return self.ident == other.ident + def __str__(self): + return str(self.ident) + @property def governor(self) -> str: return core.get_governor(self.ident) @@ -60,3 +67,91 @@ class Core: def set_low_governor(self) -> None: core.set_governor(self.ident, CONF.libvirt.cpu_power_governor_low) + + +def power_up(instance: objects.Instance) -> None: + if not CONF.libvirt.cpu_power_management: + return + if instance.numa_topology is None: + return + + cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set() + pcpus = instance.numa_topology.cpu_pinning + powered_up = set() + for pcpu in pcpus: + if pcpu in cpu_dedicated_set: + pcpu = Core(pcpu) + if CONF.libvirt.cpu_power_management_strategy == 'cpu_state': + pcpu.online = True + else: + pcpu.set_high_governor() + powered_up.add(str(pcpu)) + LOG.debug("Cores powered up : %s", powered_up) + + +def power_down(instance: objects.Instance) -> None: + if not CONF.libvirt.cpu_power_management: + return + if instance.numa_topology is None: + return + + cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set() + pcpus = instance.numa_topology.cpu_pinning + powered_down = set() + for pcpu in pcpus: + if pcpu in cpu_dedicated_set: + pcpu = Core(pcpu) + if CONF.libvirt.cpu_power_management_strategy == 'cpu_state': + pcpu.online = False + else: + pcpu.set_low_governor() + powered_down.add(str(pcpu)) + LOG.debug("Cores powered down : %s", powered_down) + + +def power_down_all_dedicated_cpus() -> None: + if not CONF.libvirt.cpu_power_management: + return + if (CONF.libvirt.cpu_power_management and + not CONF.compute.cpu_dedicated_set + ): + msg = _("'[compute]/cpu_dedicated_set' is mandatory to be set if " + "'[libvirt]/cpu_power_management' is set." + "Please provide the CPUs that can be pinned or don't use the " + "power management if you only use shared CPUs.") + raise exception.InvalidConfiguration(msg) + + cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set() + for pcpu in cpu_dedicated_set: + pcpu = Core(pcpu) + if CONF.libvirt.cpu_power_management_strategy == 'cpu_state': + pcpu.online = False + else: + pcpu.set_low_governor() + LOG.debug("Cores powered down : %s", cpu_dedicated_set) + + +def validate_all_dedicated_cpus() -> None: + if not CONF.libvirt.cpu_power_management: + return + cpu_dedicated_set = hardware.get_cpu_dedicated_set() or set() + governors = set() + cpu_states = set() + for pcpu in cpu_dedicated_set: + pcpu = Core(pcpu) + # we need to collect the governors strategy and the CPU states + governors.add(pcpu.governor) + cpu_states.add(pcpu.online) + if CONF.libvirt.cpu_power_management_strategy == 'cpu_state': + # all the cores need to have the same governor strategy + if len(governors) > 1: + msg = _("All the cores need to have the same governor strategy" + "before modifying the CPU states. You can reboot the " + "compute node if you prefer.") + raise exception.InvalidConfiguration(msg) + elif CONF.libvirt.cpu_power_management_strategy == 'governor': + # all the cores need to be online + if False in cpu_states: + msg = _("All the cores need to be online before modifying the " + "governor strategy.") + raise exception.InvalidConfiguration(msg) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 905ec0f306a2..869996f615fa 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -114,6 +114,7 @@ from nova.virt.image import model as imgmodel from nova.virt import images from nova.virt.libvirt import blockinfo from nova.virt.libvirt import config as vconfig +from nova.virt.libvirt import cpu as libvirt_cpu from nova.virt.libvirt import designer from nova.virt.libvirt import event as libvirtevent from nova.virt.libvirt import guest as libvirt_guest @@ -817,6 +818,18 @@ class LibvirtDriver(driver.ComputeDriver): "force_raw_images to True.") raise exception.InvalidConfiguration(msg) + # NOTE(sbauza): We verify first if the dedicated CPU performances were + # modified by Nova before. Note that it can provide an exception if + # either the governor strategies are different between the cores or if + # the cores are offline. + libvirt_cpu.validate_all_dedicated_cpus() + # NOTE(sbauza): We powerdown all dedicated CPUs but if some instances + # exist that are pinned for some CPUs, then we'll later powerup those + # CPUs when rebooting the instance in _init_instance() + # Note that it can provide an exception if the config options are + # wrongly modified. + libvirt_cpu.power_down_all_dedicated_cpus() + # TODO(sbauza): Remove this code once mediated devices are persisted # across reboots. self._recreate_assigned_mediated_devices() @@ -1512,6 +1525,8 @@ class LibvirtDriver(driver.ComputeDriver): # NOTE(GuanQiang): teardown container to avoid resource leak if CONF.libvirt.virt_type == 'lxc': self._teardown_container(instance) + # We're sure the instance is gone, we can shutdown the core if so + libvirt_cpu.power_down(instance) def destroy(self, context, instance, network_info, block_device_info=None, destroy_disks=True, destroy_secrets=True): @@ -3164,6 +3179,7 @@ class LibvirtDriver(driver.ComputeDriver): current_power_state = guest.get_power_state(self._host) + libvirt_cpu.power_up(instance) # TODO(stephenfin): Any reason we couldn't use 'self.resume' here? guest.launch(pause=current_power_state == power_state.PAUSED) @@ -7646,6 +7662,7 @@ class LibvirtDriver(driver.ComputeDriver): post_xml_callback() if power_on or pause: + libvirt_cpu.power_up(instance) guest.launch(pause=pause) return guest diff --git a/releasenotes/notes/bp-libvirt-cpu-state-mgmt-fbc9c1f9f473003c.yaml b/releasenotes/notes/bp-libvirt-cpu-state-mgmt-fbc9c1f9f473003c.yaml new file mode 100644 index 000000000000..95422fce6795 --- /dev/null +++ b/releasenotes/notes/bp-libvirt-cpu-state-mgmt-fbc9c1f9f473003c.yaml @@ -0,0 +1,18 @@ +--- +features: + - | + This is now possible to configure nova-compute services using libvirt driver + by setting ``[libvirt]cpu_power_management`` to ``True`` in order to let the + service to powering down or up physical CPUs depending on whether those CPUs + are pinned or not to instances. In order on to support this feature, the + compute service needs to be set with ``[compute]cpu_dedicated_set``. If so, + all the related CPUs will be powering down until they are used by an + instance where the related pinned CPU will be powering up just before + starting the guest. If ``[compute]cpu_dedicated_set`` isn't set, then the + compute service will refuse to start. + By default the power strategy will offline CPUs when powering down and + online the CPUs on powering up but another strategy is possible by using + ``[libvirt]cpu_power_management_strategy=governor`` which will rather modify + the related CPU governor using ``[libvirt]cpu_power_governor_low`` and + ``[libvirt]cpu_power_governor_high`` configuration values (respective + defaults being ``powersave`` and ``performance``)