From cf33be68713e30b7e4a4ca8dd3c4138329914503 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Wed, 1 Feb 2023 08:27:08 -0800 Subject: [PATCH] Abort startup if nodename conflict is detected We do run update_available_resource() synchronously during service startup, but we only allow certain exceptions to abort startup. This makes us abort for InvalidConfiguration, and makes the resource tracker raise that for the case where the compute node create failed due to a duplicate entry. This also modifies the object to raise a nova-specific error for that condition to avoid the compute node needing to import oslo_db stuff just to be able to catch it. Change-Id: I5de98e6fe52e45996bc2e1014fa8a09a2de53682 --- nova/compute/manager.py | 8 ++++++++ nova/compute/resource_tracker.py | 8 +++++++- nova/exception.py | 4 ++++ nova/objects/compute_node.py | 8 +++++++- nova/tests/unit/compute/test_resource_tracker.py | 14 ++++++++++++++ nova/tests/unit/objects/test_compute_node.py | 9 +++++++++ 6 files changed, 49 insertions(+), 2 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index c4537cd9a2f7..952ab3e19941 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -10480,6 +10480,14 @@ class ComputeManager(manager.Manager): LOG.exception( "Error updating PCI resources for node %(node)s.", {'node': nodename}) + except exception.InvalidConfiguration as e: + if startup: + # If this happens during startup, we need to let it raise to + # abort our service startup. + raise + else: + LOG.error("Error updating resources for node %s: %s", + nodename, e) except Exception: LOG.exception("Error updating resources for node %(node)s.", {'node': nodename}) diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 70c56fd2e3bd..3f911f37084f 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -728,7 +728,13 @@ class ResourceTracker(object): cn = objects.ComputeNode(context) cn.host = self.host self._copy_resources(cn, resources, initial=True) - cn.create() + try: + cn.create() + except exception.DuplicateRecord: + raise exception.InvalidConfiguration( + 'Duplicate compute node record found for host %s node %s' % ( + cn.host, cn.hypervisor_hostname)) + # Only map the ComputeNode into compute_nodes if create() was OK # because if create() fails, on the next run through here nodename # would be in compute_nodes and we won't try to create again (because diff --git a/nova/exception.py b/nova/exception.py index 20c112b62845..f5993e79f8f9 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2512,6 +2512,10 @@ class InvalidNodeConfiguration(NovaException): msg_fmt = _('Invalid node identity configuration: %(reason)s') +class DuplicateRecord(NovaException): + msg_fmt = _('Unable to create duplicate record for %(target)s') + + class NotSupportedComputeForEvacuateV295(NotSupported): msg_fmt = _("Starting to microversion 2.95, evacuate API will stop " "instance on destination. To evacuate before upgrades are " diff --git a/nova/objects/compute_node.py b/nova/objects/compute_node.py index 528cfc077698..dfc1b2ae284f 100644 --- a/nova/objects/compute_node.py +++ b/nova/objects/compute_node.py @@ -12,6 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. +from oslo_db import exception as db_exc from oslo_serialization import jsonutils from oslo_utils import uuidutils from oslo_utils import versionutils @@ -339,7 +340,12 @@ class ComputeNode(base.NovaPersistentObject, base.NovaObject): self._convert_supported_instances_to_db_format(updates) self._convert_pci_stats_to_db_format(updates) - db_compute = db.compute_node_create(self._context, updates) + try: + db_compute = db.compute_node_create(self._context, updates) + except db_exc.DBDuplicateEntry: + target = 'compute node %s:%s' % (updates['hypervisor_hostname'], + updates['uuid']) + raise exception.DuplicateRecord(target=target) self._from_db_object(self._context, self, db_compute) @base.remotable diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index dfea323a9a82..cd36b8987f18 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -1552,6 +1552,20 @@ class TestInitComputeNode(BaseTestCase): self.assertEqual('fake-host', node.host) mock_update.assert_called() + @mock.patch.object(resource_tracker.ResourceTracker, + '_get_compute_node', + return_value=None) + @mock.patch('nova.objects.compute_node.ComputeNode.create') + def test_create_failed_conflict(self, mock_create, mock_getcn): + self._setup_rt() + resources = {'hypervisor_hostname': 'node1', + 'uuid': uuids.node1} + mock_create.side_effect = exc.DuplicateRecord(target='foo') + self.assertRaises(exc.InvalidConfiguration, + self.rt._init_compute_node, + mock.MagicMock, + resources) + @ddt.ddt class TestUpdateComputeNode(BaseTestCase): diff --git a/nova/tests/unit/objects/test_compute_node.py b/nova/tests/unit/objects/test_compute_node.py index 63b070c5438b..84c4e8778567 100644 --- a/nova/tests/unit/objects/test_compute_node.py +++ b/nova/tests/unit/objects/test_compute_node.py @@ -16,6 +16,7 @@ import copy from unittest import mock import netaddr +from oslo_db import exception as db_exc from oslo_serialization import jsonutils from oslo_utils.fixture import uuidsentinel from oslo_utils import timeutils @@ -341,6 +342,14 @@ class _TestComputeNodeObject(object): 'uuid': uuidsentinel.fake_compute_node} mock_create.assert_called_once_with(self.context, param_dict) + @mock.patch('nova.db.main.api.compute_node_create') + def test_create_duplicate(self, mock_create): + mock_create.side_effect = db_exc.DBDuplicateEntry + compute = compute_node.ComputeNode(context=self.context) + compute.service_id = 456 + compute.hypervisor_hostname = 'node1' + self.assertRaises(exception.DuplicateRecord, compute.create) + @mock.patch.object(db, 'compute_node_update') @mock.patch( 'nova.db.main.api.compute_node_get', return_value=fake_compute_node)