Merge "Detect host renames and abort startup"
This commit is contained in:
commit
e9d716f555
@ -1536,6 +1536,22 @@ class ComputeManager(manager.Manager):
|
|||||||
|
|
||||||
nova.virt.node.write_local_node_uuid(db_nodes[0].uuid)
|
nova.virt.node.write_local_node_uuid(db_nodes[0].uuid)
|
||||||
|
|
||||||
|
def _check_for_host_rename(self, nodes_by_uuid):
|
||||||
|
if 'ironic' in CONF.compute_driver.lower():
|
||||||
|
# Ironic (currently) rebalances nodes at various times, and as
|
||||||
|
# such, nodes being discovered as assigned to this host with a
|
||||||
|
# different hostname is not surprising. Skip this check for
|
||||||
|
# ironic.
|
||||||
|
return
|
||||||
|
for node in nodes_by_uuid.values():
|
||||||
|
if node.host != self.host:
|
||||||
|
raise exception.InvalidConfiguration(
|
||||||
|
'My node %s has host %r but my host is %r; '
|
||||||
|
'Possible rename detected, refusing to start!' % (
|
||||||
|
node.uuid, node.host, self.host))
|
||||||
|
LOG.debug('Verified node %s matches my host %s',
|
||||||
|
node.uuid, self.host)
|
||||||
|
|
||||||
def init_host(self, service_ref):
|
def init_host(self, service_ref):
|
||||||
"""Initialization for a standalone compute service."""
|
"""Initialization for a standalone compute service."""
|
||||||
|
|
||||||
@ -1574,15 +1590,6 @@ class ComputeManager(manager.Manager):
|
|||||||
raise exception.InvalidConfiguration(msg)
|
raise exception.InvalidConfiguration(msg)
|
||||||
|
|
||||||
self.driver.init_host(host=self.host)
|
self.driver.init_host(host=self.host)
|
||||||
context = nova.context.get_admin_context()
|
|
||||||
instances = objects.InstanceList.get_by_host(
|
|
||||||
context, self.host,
|
|
||||||
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
|
|
||||||
|
|
||||||
self.init_virt_events()
|
|
||||||
|
|
||||||
self._validate_pinning_configuration(instances)
|
|
||||||
self._validate_vtpm_configuration(instances)
|
|
||||||
|
|
||||||
# NOTE(gibi): At this point the compute_nodes of the resource tracker
|
# NOTE(gibi): At this point the compute_nodes of the resource tracker
|
||||||
# has not been populated yet so we cannot rely on the resource tracker
|
# has not been populated yet so we cannot rely on the resource tracker
|
||||||
@ -1593,8 +1600,23 @@ class ComputeManager(manager.Manager):
|
|||||||
# _destroy_evacuated_instances and
|
# _destroy_evacuated_instances and
|
||||||
# _error_out_instances_whose_build_was_interrupted out in the
|
# _error_out_instances_whose_build_was_interrupted out in the
|
||||||
# background on startup
|
# background on startup
|
||||||
|
context = nova.context.get_admin_context()
|
||||||
nodes_by_uuid = self._get_nodes(context)
|
nodes_by_uuid = self._get_nodes(context)
|
||||||
|
|
||||||
|
# NOTE(danms): Check for a possible host rename and abort
|
||||||
|
# startup before we start mucking with instances we think are
|
||||||
|
# ours.
|
||||||
|
self._check_for_host_rename(nodes_by_uuid)
|
||||||
|
|
||||||
|
instances = objects.InstanceList.get_by_host(
|
||||||
|
context, self.host,
|
||||||
|
expected_attrs=['info_cache', 'metadata', 'numa_topology'])
|
||||||
|
|
||||||
|
self.init_virt_events()
|
||||||
|
|
||||||
|
self._validate_pinning_configuration(instances)
|
||||||
|
self._validate_vtpm_configuration(instances)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# checking that instance was not already evacuated to other host
|
# checking that instance was not already evacuated to other host
|
||||||
evacuated_instances = self._destroy_evacuated_instances(
|
evacuated_instances = self._destroy_evacuated_instances(
|
||||||
|
@ -934,7 +934,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
|||||||
inst_list = _make_instance_list(startup_instances)
|
inst_list = _make_instance_list(startup_instances)
|
||||||
mock_host_get.return_value = inst_list
|
mock_host_get.return_value = inst_list
|
||||||
our_node = objects.ComputeNode(
|
our_node = objects.ComputeNode(
|
||||||
host='fake-host', uuid=uuids.our_node_uuid,
|
host=self.compute.host, uuid=uuids.our_node_uuid,
|
||||||
hypervisor_hostname='fake-node')
|
hypervisor_hostname='fake-node')
|
||||||
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
|
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
|
||||||
|
|
||||||
@ -983,7 +983,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
|||||||
"""
|
"""
|
||||||
mock_get_nodes.return_value = {
|
mock_get_nodes.return_value = {
|
||||||
uuids.cn_uuid1: objects.ComputeNode(
|
uuids.cn_uuid1: objects.ComputeNode(
|
||||||
uuid=uuids.cn_uuid1, hypervisor_hostname='node1')}
|
uuid=uuids.cn_uuid1, hypervisor_hostname='node1',
|
||||||
|
host=self.compute.host)}
|
||||||
self.compute.init_host(None)
|
self.compute.init_host(None)
|
||||||
|
|
||||||
mock_error_interrupted.assert_called_once_with(
|
mock_error_interrupted.assert_called_once_with(
|
||||||
@ -1148,7 +1149,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
|||||||
uuids.evac_instance: evacuating_instance
|
uuids.evac_instance: evacuating_instance
|
||||||
}
|
}
|
||||||
our_node = objects.ComputeNode(
|
our_node = objects.ComputeNode(
|
||||||
host='fake-host', uuid=uuids.our_node_uuid,
|
host=self.compute.host, uuid=uuids.our_node_uuid,
|
||||||
hypervisor_hostname='fake-node')
|
hypervisor_hostname='fake-node')
|
||||||
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
|
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
|
||||||
|
|
||||||
@ -1227,6 +1228,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
|||||||
|
|
||||||
@mock.patch.object(objects.InstanceList, 'get_by_host',
|
@mock.patch.object(objects.InstanceList, 'get_by_host',
|
||||||
new=mock.Mock())
|
new=mock.Mock())
|
||||||
|
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids',
|
||||||
|
new=mock.Mock(return_value=[mock.MagicMock()]))
|
||||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||||
'_validate_pinning_configuration')
|
'_validate_pinning_configuration')
|
||||||
def test_init_host_pinning_configuration_validation_failure(self,
|
def test_init_host_pinning_configuration_validation_failure(self,
|
||||||
@ -1244,6 +1247,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
|||||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||||
'_validate_pinning_configuration',
|
'_validate_pinning_configuration',
|
||||||
new=mock.Mock())
|
new=mock.Mock())
|
||||||
|
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids',
|
||||||
|
new=mock.Mock(return_value=[mock.MagicMock()]))
|
||||||
@mock.patch('nova.compute.manager.ComputeManager.'
|
@mock.patch('nova.compute.manager.ComputeManager.'
|
||||||
'_validate_vtpm_configuration')
|
'_validate_vtpm_configuration')
|
||||||
def test_init_host_vtpm_configuration_validation_failure(self,
|
def test_init_host_vtpm_configuration_validation_failure(self,
|
||||||
@ -6434,6 +6439,49 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
|||||||
mock.sentinel.service_ref)
|
mock.sentinel.service_ref)
|
||||||
m.assert_called_once_with(mock.sentinel.service_ref)
|
m.assert_called_once_with(mock.sentinel.service_ref)
|
||||||
|
|
||||||
|
def test_check_for_host_rename_ironic(self):
|
||||||
|
self.flags(compute_driver='ironic')
|
||||||
|
# Passing None here makes sure we take the early exit because of our
|
||||||
|
# virt driver
|
||||||
|
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||||
|
host='not-this-host')}
|
||||||
|
self.compute._check_for_host_rename(nodes)
|
||||||
|
|
||||||
|
def test_check_for_host_rename_renamed_only(self):
|
||||||
|
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||||
|
host='not-this-host')}
|
||||||
|
self.assertRaises(exception.InvalidConfiguration,
|
||||||
|
self.compute._check_for_host_rename, nodes)
|
||||||
|
|
||||||
|
def test_check_for_host_rename_renamed_one(self):
|
||||||
|
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||||
|
host=self.compute.host),
|
||||||
|
uuids.node2: mock.MagicMock(uuid=uuids.node2,
|
||||||
|
host='not-this-host')}
|
||||||
|
self.assertRaises(exception.InvalidConfiguration,
|
||||||
|
self.compute._check_for_host_rename, nodes)
|
||||||
|
|
||||||
|
def test_check_for_host_rename_not_renamed(self):
|
||||||
|
nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
|
||||||
|
host=self.compute.host)}
|
||||||
|
with mock.patch.object(manager.LOG, 'debug') as mock_debug:
|
||||||
|
self.compute._check_for_host_rename(nodes)
|
||||||
|
mock_debug.assert_called_once_with(
|
||||||
|
'Verified node %s matches my host %s',
|
||||||
|
uuids.node1, self.compute.host)
|
||||||
|
|
||||||
|
@mock.patch('nova.compute.manager.ComputeManager._get_nodes')
|
||||||
|
def test_check_for_host_rename_called_by_init_host(self, mock_nodes):
|
||||||
|
# Since testing init_host() requires a billion mocks, this
|
||||||
|
# tests that we do call it when expected, but make it raise
|
||||||
|
# to avoid running the rest of init_host().
|
||||||
|
with mock.patch.object(self.compute,
|
||||||
|
'_check_for_host_rename') as m:
|
||||||
|
m.side_effect = test.TestingException
|
||||||
|
self.assertRaises(test.TestingException,
|
||||||
|
self.compute.init_host, None)
|
||||||
|
m.assert_called_once_with(mock_nodes.return_value)
|
||||||
|
|
||||||
|
|
||||||
class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
19
releasenotes/notes/stable-compute-uuid-08663a0955616728.yaml
Normal file
19
releasenotes/notes/stable-compute-uuid-08663a0955616728.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
The compute manager now uses a local file to provide node uuid persistence
|
||||||
|
to guard against problems with renamed services, among other things.
|
||||||
|
Deployers wishing to ensure that *new* compute services get a predicatble
|
||||||
|
uuid before initial startup may provision that file and nova will use it,
|
||||||
|
otherwise nova will generate and write one to a `compute_id` file in
|
||||||
|
`CONF.state_path` the first time it starts up. Accidental renames of a
|
||||||
|
compute node's hostname will be detected and the manager will exit to avoid
|
||||||
|
database corruption. Note that none of this applies to Ironic computes, as
|
||||||
|
they manage nodes and uuids differently.
|
||||||
|
upgrade:
|
||||||
|
- |
|
||||||
|
Existing compute nodes will, upon upgrade, perist the uuid of the compute
|
||||||
|
node assigned to their hostname at first startup. Since this must match
|
||||||
|
what is currently in the database, it is important to let nova provision
|
||||||
|
this file from its database. Nova will only persist to a `compute_id` file
|
||||||
|
in the `CONF.state_path` directory, which should already be writable.
|
Loading…
x
Reference in New Issue
Block a user