Merge "Block servers with vGPU and device profile in heal_allocations"
This commit is contained in:
commit
cca1bb1bae
@ -180,8 +180,13 @@ things:
|
||||
* `Migration-based allocations`_ would be lost if manually deleted during a
|
||||
resize. These are allocations tracked by the migration resource record
|
||||
on the source compute service during a migration.
|
||||
* Healing allocations does not supported nested resource allocations before the
|
||||
20.0.0 (Train) release.
|
||||
* Healing allocations only partially support nested allocations. Nested
|
||||
allocations due to Neutron ports having QoS policies are supported since
|
||||
20.0.0 (Train) release. But nested allocations due to vGPU or Cyborg device
|
||||
profile requests in the flavor are not supported. Also if you are using
|
||||
provider.yaml files on compute hosts to define additional resources, if those
|
||||
resources are defined on child resource providers then instances using such
|
||||
resources are not supported.
|
||||
|
||||
If you do use the ``heal_allocations`` command to cleanup allocations for a
|
||||
specific trouble instance, it is recommended to take note of what the
|
||||
|
@ -1160,6 +1160,15 @@ state transition. For each instance found, allocations are created against
|
||||
the compute node resource provider for that instance based on the flavor
|
||||
associated with the instance.
|
||||
|
||||
.. note::
|
||||
Nested allocations are only partially supported. Nested allocations due to
|
||||
Neutron ports having QoS policies are supported since 20.0.0 (Train)
|
||||
release. But nested allocations due to vGPU or Cyborg device profile
|
||||
requests in the flavor are not supported. Also if you are using
|
||||
provider.yaml files on compute hosts to define additional resources, if
|
||||
those resources are defined on child resource providers then instances
|
||||
using such resources are not supported.
|
||||
|
||||
Also if the instance has any port attached that has resource request
|
||||
(e.g. :neutron-doc:`Quality of Service (QoS): Guaranteed Bandwidth
|
||||
<admin/config-qos-min-bw.html>`) but the corresponding
|
||||
|
@ -1721,6 +1721,18 @@ class PlacementCommands(object):
|
||||
allocations['user_id'] = instance.user_id
|
||||
return allocations
|
||||
|
||||
@staticmethod
|
||||
def ensure_instance_has_no_vgpu_request(instance):
|
||||
if instance.flavor.extra_specs.get("resources:VGPU"):
|
||||
raise exception.HealvGPUAllocationNotSupported(
|
||||
instance_uuid=instance.uuid)
|
||||
|
||||
@staticmethod
|
||||
def ensure_instance_has_no_cyborg_device_profile_request(instance):
|
||||
if instance.flavor.extra_specs.get("accel:device_profile"):
|
||||
raise exception.HealDeviceProfileAllocationNotSupported(
|
||||
instance_uuid=instance.uuid)
|
||||
|
||||
def _heal_allocations_for_instance(self, ctxt, instance, node_cache,
|
||||
output, placement, dry_run,
|
||||
heal_port_allocations, neutron,
|
||||
@ -1777,6 +1789,9 @@ class PlacementCommands(object):
|
||||
output(_('Instance %s is not on a host.') % instance.uuid)
|
||||
return
|
||||
|
||||
self.ensure_instance_has_no_vgpu_request(instance)
|
||||
self.ensure_instance_has_no_cyborg_device_profile_request(instance)
|
||||
|
||||
try:
|
||||
allocations = placement.get_allocs_for_consumer(
|
||||
ctxt, instance.uuid)
|
||||
@ -1893,7 +1908,7 @@ class PlacementCommands(object):
|
||||
:param max_count: batch size (limit per instance query)
|
||||
:param unlimited: True if all instances in the cell should be
|
||||
processed, else False to just process $max_count instances
|
||||
:param outout: function that takes a single message for verbose output
|
||||
:param output: function that takes a single message for verbose output
|
||||
:param placement: nova.scheduler.client.report.SchedulerReportClient
|
||||
to communicate with the Placement service API.
|
||||
:param dry_run: Process instances and print output but do not commit
|
||||
@ -2033,6 +2048,7 @@ class PlacementCommands(object):
|
||||
* 5: Unable to query ports from neutron
|
||||
* 6: Unable to update ports in neutron
|
||||
* 7: Cannot roll back neutron port updates. Manual steps needed.
|
||||
* 8: Cannot heal instance with vGPU or Cyborg resource request
|
||||
* 127: Invalid input.
|
||||
"""
|
||||
# NOTE(mriedem): Thoughts on ways to expand this:
|
||||
@ -2161,6 +2177,12 @@ class PlacementCommands(object):
|
||||
except exception.UnableToRollbackPortUpdates as e:
|
||||
print(e.format_message())
|
||||
return 7
|
||||
except (
|
||||
exception.HealvGPUAllocationNotSupported,
|
||||
exception.HealDeviceProfileAllocationNotSupported,
|
||||
) as e:
|
||||
print(e.format_message())
|
||||
return 8
|
||||
|
||||
# Make sure we don't go over the max count. Note that we
|
||||
# don't include instances that already have allocations in the
|
||||
|
@ -2240,6 +2240,24 @@ class MissingDomainCapabilityFeatureException(NovaException):
|
||||
"including <%(feature)s> feature.")
|
||||
|
||||
|
||||
class HealAllocationException(NovaException):
|
||||
msg_fmt = _("Healing instance allocation failed.")
|
||||
|
||||
|
||||
class HealvGPUAllocationNotSupported(HealAllocationException):
|
||||
msg_fmt = _(
|
||||
"Healing allocation for instance %(instance_uuid)s with vGPU resource "
|
||||
"request is not supported."
|
||||
)
|
||||
|
||||
|
||||
class HealDeviceProfileAllocationNotSupported(HealAllocationException):
|
||||
msg_fmt = _(
|
||||
"Healing allocation for instance %(instance_uuid)s with Cyborg device "
|
||||
"profile request is not supported."
|
||||
)
|
||||
|
||||
|
||||
class HealPortAllocationException(NovaException):
|
||||
msg_fmt = _("Healing port allocation failed.")
|
||||
|
||||
|
@ -780,6 +780,50 @@ class TestNovaManagePlacementHealAllocations(
|
||||
)
|
||||
self.assertEqual(4, result, self.output.getvalue())
|
||||
|
||||
def test_instance_with_vgpu_is_blocked(self):
|
||||
# we cannot boot with VGPU in these tests so manipulate the
|
||||
# instance.flavor directly after the boot to simulate an instance with
|
||||
# VGPU request
|
||||
server, _ = self._boot_and_remove_allocations(self.flavor, 'cell1')
|
||||
instance = objects.Instance.get_by_uuid(
|
||||
context.get_admin_context(), server['id'])
|
||||
instance.flavor.extra_specs["resources:VGPU"] = 1
|
||||
instance.save()
|
||||
|
||||
result = self.cli.heal_allocations(
|
||||
verbose=True, instance_uuid=server['id'],
|
||||
force=True
|
||||
)
|
||||
|
||||
self.assertIn(
|
||||
f"Healing allocation for instance {server['id']} with vGPU "
|
||||
f"resource request is not supported.",
|
||||
self.output.getvalue()
|
||||
)
|
||||
self.assertEqual(8, result, self.output.getvalue())
|
||||
|
||||
def test_instance_with_cyborg_dev_profile_is_blocked(self):
|
||||
# we cannot boot with cyborg device in these tests so manipulate the
|
||||
# instance.flavor directly after the boot to simulate an instance with
|
||||
# cyborg request
|
||||
server, _ = self._boot_and_remove_allocations(self.flavor, 'cell1')
|
||||
instance = objects.Instance.get_by_uuid(
|
||||
context.get_admin_context(), server['id'])
|
||||
instance.flavor.extra_specs["accel:device_profile"] = "foo"
|
||||
instance.save()
|
||||
|
||||
result = self.cli.heal_allocations(
|
||||
verbose=True, instance_uuid=server['id'],
|
||||
force=True
|
||||
)
|
||||
|
||||
self.assertIn(
|
||||
f"Healing allocation for instance {server['id']} with Cyborg "
|
||||
f"device profile request is not supported.",
|
||||
self.output.getvalue()
|
||||
)
|
||||
self.assertEqual(8, result, self.output.getvalue())
|
||||
|
||||
|
||||
class TestNovaManagePlacementHealPortAllocations(
|
||||
test_servers.PortResourceRequestBasedSchedulingTestBase):
|
||||
|
@ -2399,7 +2399,8 @@ class TestNovaManagePlacement(test.NoDBTestCase):
|
||||
return_value=objects.InstanceList(objects=[
|
||||
objects.Instance(
|
||||
uuid=uuidsentinel.instance, host='fake', node='fake',
|
||||
task_state=None)]))
|
||||
task_state=None,
|
||||
flavor=objects.Flavor(extra_specs={}))]))
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'get_allocs_for_consumer', return_value={})
|
||||
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
|
||||
@ -2419,7 +2420,7 @@ class TestNovaManagePlacement(test.NoDBTestCase):
|
||||
return_value=objects.InstanceList(objects=[
|
||||
objects.Instance(
|
||||
uuid=uuidsentinel.instance, host='fake', node='fake',
|
||||
task_state=None, flavor=objects.Flavor(),
|
||||
task_state=None, flavor=objects.Flavor(extra_specs={}),
|
||||
project_id='fake-project', user_id='fake-user')]))
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'get_allocs_for_consumer', return_value={})
|
||||
@ -2463,7 +2464,7 @@ class TestNovaManagePlacement(test.NoDBTestCase):
|
||||
new=mock.Mock(return_value=objects.InstanceList(objects=[
|
||||
objects.Instance(
|
||||
uuid=uuidsentinel.instance, host='fake', node='fake',
|
||||
task_state=None, flavor=objects.Flavor(),
|
||||
task_state=None, flavor=objects.Flavor(extra_specs={}),
|
||||
project_id='fake-project', user_id='fake-user')])))
|
||||
def test_heal_allocations_get_allocs_placement_fails(self):
|
||||
self.assertEqual(3, self.cli.heal_allocations())
|
||||
@ -2480,7 +2481,7 @@ class TestNovaManagePlacement(test.NoDBTestCase):
|
||||
side_effect=[
|
||||
objects.InstanceList(objects=[objects.Instance(
|
||||
uuid=uuidsentinel.instance, host='fake', node='fake',
|
||||
task_state=None, flavor=objects.Flavor(),
|
||||
task_state=None, flavor=objects.Flavor(extra_specs={}),
|
||||
project_id='fake-project', user_id='fake-user')]),
|
||||
objects.InstanceList(objects=[])])
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
@ -2505,7 +2506,8 @@ class TestNovaManagePlacement(test.NoDBTestCase):
|
||||
objects.Instance(
|
||||
uuid=uuidsentinel.instance, host='fake',
|
||||
node='fake', task_state=None,
|
||||
project_id='fake-project', user_id='fake-user')]),
|
||||
project_id='fake-project', user_id='fake-user',
|
||||
flavor=objects.Flavor(extra_specs={}))]),
|
||||
objects.InstanceList()))
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'get_allocs_for_consumer')
|
||||
@ -2559,7 +2561,8 @@ class TestNovaManagePlacement(test.NoDBTestCase):
|
||||
objects.Instance(
|
||||
uuid=uuidsentinel.instance, host='fake', node='fake',
|
||||
task_state=None, project_id='fake-project',
|
||||
user_id='fake-user')]))
|
||||
user_id='fake-user',
|
||||
flavor=objects.Flavor(extra_specs={}))]))
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
|
||||
'get_allocs_for_consumer')
|
||||
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
|
||||
|
Loading…
x
Reference in New Issue
Block a user