diff --git a/nova/api/openstack/compute/migrate_server.py b/nova/api/openstack/compute/migrate_server.py index 2f93450cf8e9..5c7ef1def454 100644 --- a/nova/api/openstack/compute/migrate_server.py +++ b/nova/api/openstack/compute/migrate_server.py @@ -76,15 +76,14 @@ class MigrateServerController(wsgi.Controller): host_name=host_name) except (exception.TooManyInstances, exception.QuotaError) as e: raise exc.HTTPForbidden(explanation=e.format_message()) - except (exception.InstanceIsLocked, - exception.AllocationMoveFailed) as e: + except exception.InstanceIsLocked as e: raise exc.HTTPConflict(explanation=e.format_message()) except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, 'migrate', id) except exception.InstanceNotFound as e: raise exc.HTTPNotFound(explanation=e.format_message()) - except (exception.NoValidHost, exception.ComputeHostNotFound, + except (exception.ComputeHostNotFound, exception.CannotMigrateToSameHost) as e: raise exc.HTTPBadRequest(explanation=e.format_message()) diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index 7ff7cb6dd4cb..95e32cc545bc 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -958,8 +958,7 @@ class ServersController(wsgi.Controller): except exception.QuotaError as error: raise exc.HTTPForbidden( explanation=error.format_message()) - except (exception.InstanceIsLocked, - exception.AllocationMoveFailed) as e: + except exception.InstanceIsLocked as e: raise exc.HTTPConflict(explanation=e.format_message()) except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, @@ -975,8 +974,7 @@ class ServersController(wsgi.Controller): except (exception.AutoDiskConfigDisabledByImage, exception.CannotResizeDisk, exception.CannotResizeToSameFlavor, - exception.FlavorNotFound, - exception.NoValidHost) as e: + exception.FlavorNotFound) as e: raise exc.HTTPBadRequest(explanation=e.format_message()) except INVALID_FLAVOR_IMAGE_EXCEPTIONS as e: raise exc.HTTPBadRequest(explanation=e.format_message()) diff --git a/nova/compute/api.py b/nova/compute/api.py index 9d22ec3e281b..b248cac1c35b 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -3837,17 +3837,15 @@ class API(base.Base): host=node.host, node=node.hypervisor_hostname, allow_cross_cell_move=allow_cross_cell_resize) - # This is by default a synchronous RPC call to conductor which does not - # return until the MigrationTask in conductor RPC casts to the - # prep_resize method on the selected destination nova-compute service. - # However, if we are allowed to do a cross-cell resize, then we - # asynchronously RPC cast since the CrossCellMigrationTask is blocking. + # Asynchronously RPC cast to conductor so the response is not blocked + # during scheduling. If something fails the user can find out via + # instance actions. self.compute_task_api.resize_instance(context, instance, scheduler_hint=scheduler_hint, flavor=new_instance_type, clean_shutdown=clean_shutdown, request_spec=request_spec, - do_cast=allow_cross_cell_resize) + do_cast=True) @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, diff --git a/nova/objects/instance_action.py b/nova/objects/instance_action.py index e8b89f62b8a3..0e798839259e 100644 --- a/nova/objects/instance_action.py +++ b/nova/objects/instance_action.py @@ -184,6 +184,17 @@ class InstanceActionEvent(base.NovaPersistentObject, base.NovaObject, values['result'] = 'Success' else: values['result'] = 'Error' + # FIXME(mriedem): message is not used. The instance_actions_events + # table has a "details" column but not a "message" column which + # means the exc_val is never stored in the record. So far it does + # not matter because the exc_val is not exposed out of the API, + # but we should consider storing at least the exception type so + # we could expose that to non-admin owners of a server in the API + # since then they could see something like NoValidHost to know why + # the operation failed. Note by default policy non-admins are not + # allowed to see the traceback field. If we expose exc_val somehow + # we might consider re-using logic from exception_to_dict which + # is used to store an instance fault message. values['message'] = exc_val values['traceback'] = exc_tb return values diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index e73c416406c2..dbe1ee2c972f 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -347,6 +347,19 @@ class InstanceHelperMixin(object): 'actions: %s. Events in the last matching action: %s' % (event_name, actions, events)) + def _assert_resize_migrate_action_fail(self, server, action, error_in_tb): + """Waits for the conductor_migrate_server action event to fail for + the given action and asserts the error is in the event traceback. + + :param server: API response dict of the server being resized/migrated + :param action: Either "resize" or "migrate" instance action. + :param error_in_tb: Some expected part of the error event traceback. + """ + api = self.admin_api if hasattr(self, 'admin_api') else self.api + event = self._wait_for_action_fail_completion( + server, action, 'conductor_migrate_server', api=api) + self.assertIn(error_in_tb, event['traceback']) + def _wait_for_migration_status(self, server, expected_statuses): """Waits for a migration record with the given statuses to be found for the given server, else the test fails. The migration record, if diff --git a/nova/tests/functional/notification_sample_tests/test_compute_task.py b/nova/tests/functional/notification_sample_tests/test_compute_task.py index b57d9259021a..7c76a34e3373 100644 --- a/nova/tests/functional/notification_sample_tests/test_compute_task.py +++ b/nova/tests/functional/notification_sample_tests/test_compute_task.py @@ -11,7 +11,6 @@ # under the License. from nova.tests import fixtures -from nova.tests.functional.api import client as api_client from nova.tests.functional.notification_sample_tests \ import notification_sample_base from nova.tests.unit import fake_notifier @@ -107,9 +106,9 @@ class TestComputeTaskNotificationSample( fake_notifier.reset() - self.assertRaises(api_client.OpenStackApiException, - self.admin_api.post_server_action, - server['id'], {'migrate': None}) + # Note that the operation will return a 202 response but fail with + # NoValidHost asynchronously. + self.admin_api.post_server_action(server['id'], {'migrate': None}) self._wait_for_notification('compute_task.migrate_server.error') # 0. scheduler.select_destinations.start # 1. compute_task.migrate_server.error diff --git a/nova/tests/functional/regressions/test_bug_1790204.py b/nova/tests/functional/regressions/test_bug_1790204.py index 7747a68bac62..2b59b7e5a73e 100644 --- a/nova/tests/functional/regressions/test_bug_1790204.py +++ b/nova/tests/functional/regressions/test_bug_1790204.py @@ -11,9 +11,8 @@ # under the License. from oslo_utils.fixture import uuidsentinel as uuids -import six -from nova.tests.functional.api import client as api_client +from nova.compute import instance_actions from nova.tests.functional import integrated_helpers @@ -70,8 +69,8 @@ class ResizeSameHostDoubledAllocations( # fails because DISK_GB inventory allocation ratio is 1.0 and when # resizing to the same host we'll be trying to claim 2048 but the # provider only has a total of 1024. - ex = self.assertRaises(api_client.OpenStackApiException, - self.api.post_server_action, - server['id'], resize_req) - self.assertEqual(400, ex.response.status_code) - self.assertIn('No valid host was found', six.text_type(ex)) + self.api.post_server_action(server['id'], resize_req, + check_response_status=[202]) + # The instance action should have failed with details. + self._assert_resize_migrate_action_fail( + server, instance_actions.RESIZE, 'NoValidHost') diff --git a/nova/tests/functional/regressions/test_bug_1815153.py b/nova/tests/functional/regressions/test_bug_1815153.py index f4f42c38ddfb..8e461322c0b9 100644 --- a/nova/tests/functional/regressions/test_bug_1815153.py +++ b/nova/tests/functional/regressions/test_bug_1815153.py @@ -12,13 +12,11 @@ # License for the specific language governing permissions and limitations # under the License. -import six - +from nova.compute import instance_actions from nova import context from nova import objects from nova import test from nova.tests import fixtures as nova_fixtures -from nova.tests.functional.api import client from nova.tests.functional import fixtures as func_fixtures from nova.tests.functional import integrated_helpers from nova.tests.unit.image import fake as image_fake @@ -118,18 +116,15 @@ class NonPersistentFieldNotResetTest( source_compute_id, {'forced_down': 'true'}) # Cold migrate a server with a target host. - # If requested_destination is reset, the server is moved to a host - # that is not a requested target host. - # In that case, the response code is 202. - # If requested_destination is not reset, no valid host error (400) is - # returned because the target host is down. - ex = self.assertRaises(client.OpenStackApiException, - self.api.post_server_action, - server['id'], - {'migrate': {'host': target_host}}) - self.assertEqual(400, ex.response.status_code) - self.assertIn('No valid host was found. No valid host found ' - 'for cold migrate', six.text_type(ex)) + # The response status code is 202 even though the operation will + # fail because the requested target host is down which will result + # in a NoValidHost error. + self.api.post_server_action( + server['id'], {'migrate': {'host': target_host}}, + check_response_status=[202]) + # The instance action should have failed with details. + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'NoValidHost') # Make sure 'is_bfv' is set. reqspec = objects.RequestSpec.get_by_instance_uuid(self.ctxt, diff --git a/nova/tests/functional/regressions/test_bug_1848343.py b/nova/tests/functional/regressions/test_bug_1848343.py index 9841883ee5b8..63dc38166c0d 100644 --- a/nova/tests/functional/regressions/test_bug_1848343.py +++ b/nova/tests/functional/regressions/test_bug_1848343.py @@ -96,16 +96,17 @@ class DeletedServerAllocationRevertTest( self._stub_delete_server_during_scheduling(server) # Now start the cold migration which will fail due to NoValidHost. - # Note that we get a 404 back because this is a blocking call until - # conductor RPC casts to prep_resize on the selected dest host but - # we never get that far. self.api.post_server_action(server['id'], {'migrate': None}, - check_response_status=[404]) + check_response_status=[202]) # We cannot monitor the migration from the API since it is deleted # when the instance is deleted so just wait for the failed instance # action event after the task rollback happens. - self._wait_for_action_fail_completion( - server, instance_actions.MIGRATE, 'cold_migrate', api=self.api) + # Note that we get InstanceNotFound rather than NoValidHost because + # the NoValidHost handler in ComputeTaskManager._cold_migrate calls + # _set_vm_state_and_notify which raises InstanceNotFound and masks + # the NoValidHost error. + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'InstanceNotFound') self._assert_no_allocations(server) def test_live_migration_task_rollback(self): diff --git a/nova/tests/functional/test_scheduler.py b/nova/tests/functional/test_scheduler.py index bb36e399cfc3..9b50bc5809e1 100644 --- a/nova/tests/functional/test_scheduler.py +++ b/nova/tests/functional/test_scheduler.py @@ -10,6 +10,7 @@ # License for the specific language governing permissions and limitations # under the License. +from nova.compute import instance_actions from nova import test from nova.tests import fixtures as nova_fixtures from nova.tests.functional import fixtures as func_fixtures @@ -63,7 +64,7 @@ class MultiCellSchedulerTestCase(test.TestCase, return self.admin_api.api_post( '/servers/%s/action' % found_server['id'], {'migrate': None}, - check_response_status=[expected_status]) + check_response_status=[expected_status]), found_server def test_migrate_between_cells(self): """Verify that migrating between cells is not allowed. @@ -75,8 +76,10 @@ class MultiCellSchedulerTestCase(test.TestCase, self.start_service('compute', host='compute1', cell=CELL1_NAME) self.start_service('compute', host='compute2', cell=CELL2_NAME) - result = self._test_create_and_migrate(expected_status=400) - self.assertIn('No valid host', result.body['badRequest']['message']) + _, server = self._test_create_and_migrate(expected_status=202) + # The instance action should have failed with details. + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'NoValidHost') def test_migrate_within_cell(self): """Verify that migrating within cells is allowed. diff --git a/nova/tests/functional/test_server_group.py b/nova/tests/functional/test_server_group.py index 9cee6ce6473a..9560c6d590e4 100644 --- a/nova/tests/functional/test_server_group.py +++ b/nova/tests/functional/test_server_group.py @@ -17,6 +17,7 @@ import mock from oslo_config import cfg import six +from nova.compute import instance_actions from nova import context from nova.db import api as db from nova.db.sqlalchemy import api as db_api @@ -346,11 +347,13 @@ class ServerGroupTestV21(ServerGroupTestBase): servers = self._boot_servers_to_group(created_group) post = {'migrate': {}} - ex = self.assertRaises(client.OpenStackApiException, - self.admin_api.post_server_action, - servers[1]['id'], post) - self.assertEqual(400, ex.response.status_code) - self.assertIn('No valid host found for cold migrate', ex.response.text) + # NoValidHost errors are handled in conductor after the API has cast + # off and returned a 202 response to the user. + self.admin_api.post_server_action(servers[1]['id'], post, + check_response_status=[202]) + # The instance action should have failed with details. + self._assert_resize_migrate_action_fail( + servers[1], instance_actions.MIGRATE, 'NoValidHost') def test_migrate_with_group_no_valid_host(self): for group in [self.affinity, self.anti_affinity]: diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index c4c8b3dc5873..c6a9fdb59ece 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -2006,11 +2006,10 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase): } } - resp = self.api.post_server_action( - server['id'], resize_req, check_response_status=[400]) - self.assertEqual( - resp['badRequest']['message'], - "No valid host was found. No valid host found for resize") + self.api.post_server_action( + server['id'], resize_req, check_response_status=[202]) + self._assert_resize_migrate_action_fail( + server, instance_actions.RESIZE, 'NoValidHost') server = self.admin_api.get_server(server['id']) self.assertEqual(source_hostname, server['OS-EXT-SRV-ATTR:host']) @@ -2210,10 +2209,9 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase): # migrate the server post = {'migrate': None} - ex = self.assertRaises(client.OpenStackApiException, - self.api.post_server_action, - server['id'], post) - self.assertIn('No valid host', six.text_type(ex)) + self.api.post_server_action(server['id'], post) + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'NoValidHost') expected_params = {'OS-EXT-SRV-ATTR:host': source_hostname, 'status': 'ACTIVE'} self._wait_for_server_parameter(self.api, server, expected_params) @@ -3300,15 +3298,6 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase): data = {"resize": {"flavorRef": self.flavor2["id"]}} self.api.post_server_action(server_uuid, data) - # fill_provider_mapping should have been called once for the initial - # build, once for the resize scheduling to the primary host and then - # once per reschedule. - expected_fill_count = 2 - if num_alts > 1: - expected_fill_count += self.num_fails - 1 - self.assertGreaterEqual(mock_fill_provider_map.call_count, - expected_fill_count) - if num_alts < fails: # We will run out of alternates before populate_retry will # raise a MaxRetriesExceeded exception, so the migration will @@ -3349,6 +3338,15 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase): migrations = objects.MigrationList.get_by_filters(ctxt, filters) self.assertEqual(1, len(migrations.objects)) + # fill_provider_mapping should have been called once for the initial + # build, once for the resize scheduling to the primary host and then + # once per reschedule. + expected_fill_count = 2 + if num_alts > 1: + expected_fill_count += self.num_fails - 1 + self.assertGreaterEqual(mock_fill_provider_map.call_count, + expected_fill_count) + def test_resize_reschedule_uses_host_lists_1_fail(self): self._test_resize_reschedule_uses_host_lists(fails=1) @@ -4701,14 +4699,19 @@ class ConsumerGenerationConflictTest( mock_put.return_value = rsp request = {'migrate': None} - exception = self.assertRaises(client.OpenStackApiException, - self.api.post_server_action, - server['id'], request) + self.api.post_server_action(server['id'], request, + check_response_status=[202]) + self._wait_for_server_parameter(self.admin_api, server, + {'OS-EXT-STS:task_state': None}) - # I know that HTTP 500 is harsh code but I think this conflict case - # signals either a serious db inconsistency or a bug in nova's - # claim code. - self.assertEqual(500, exception.response.status_code) + # The instance action should have failed with details. + # save_and_reraise_exception gets different results between py2 and py3 + # for the traceback but we want to use the more specific + # "claim_resources" for py3. We can remove this when we drop support + # for py2. + error_in_tb = 'claim_resources' if six.PY3 else 'select_destinations' + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, error_in_tb) # The migration is aborted so the instance is ACTIVE on the source # host instead of being in VERIFY_RESIZE state. @@ -4741,15 +4744,16 @@ class ConsumerGenerationConflictTest( mock_post.return_value = rsp request = {'migrate': None} - exception = self.assertRaises(client.OpenStackApiException, - self.api.post_server_action, - server['id'], request) + self.api.post_server_action(server['id'], request, + check_response_status=[202]) + self._wait_for_server_parameter(self.admin_api, server, + {'OS-EXT-STS:task_state': None}) + + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'move_allocations') self.assertEqual(1, mock_post.call_count) - self.assertEqual(409, exception.response.status_code) - self.assertIn('Failed to move allocations', exception.response.text) - migrations = self.api.get_migrations() self.assertEqual(1, len(migrations)) self.assertEqual('migration', migrations[0]['migration_type']) @@ -6542,12 +6546,13 @@ class ServerMoveWithPortResourceRequestTest( 'nova.objects.Service.get_by_host_and_binary', side_effect=fake_get_service): - ex = self.assertRaises( - client.OpenStackApiException, - self.api.post_server_action, server['id'], {'migrate': None}) + self.api.post_server_action(server['id'], {'migrate': None}, + check_response_status=[202]) + self._wait_for_server_parameter(self.admin_api, server, + {'OS-EXT-STS:task_state': None}) - self.assertEqual(400, ex.response.status_code) - self.assertIn('No valid host was found.', six.text_type(ex)) + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'NoValidHost') # check that the server still allocates from the original host self._check_allocation( @@ -6607,8 +6612,7 @@ class ServerMoveWithPortResourceRequestTest( side_effect=fake_get_service): self.api.post_server_action(server['id'], {'migrate': None}) - - self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') + self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') migration_uuid = self.get_migration_uuid_for_instance(server['id']) @@ -6954,13 +6958,11 @@ class ServerMoveWithPortResourceRequestTest( # enough information to do a proper port binding on the target host. # The MigrationTask in the conductor checks that the RPC is new enough # for this request for each possible destination provided by the - # scheduler and skips the old hosts. - ex = self.assertRaises( - client.OpenStackApiException, self.api.post_server_action, - server['id'], {'migrate': None}) - - self.assertEqual(400, ex.response.status_code) - self.assertIn('No valid host was found.', six.text_type(ex)) + # scheduler and skips the old hosts. The actual response will be a 202 + # so we have to wait for the failed instance action event. + self.api.post_server_action(server['id'], {'migrate': None}) + self._assert_resize_migrate_action_fail( + server, instance_actions.MIGRATE, 'NoValidHost') # The migration is put into error self._wait_for_migration_status(server, ['error']) diff --git a/nova/tests/unit/api/openstack/compute/test_server_actions.py b/nova/tests/unit/api/openstack/compute/test_server_actions.py index e84aae44aa3b..96931f92ef9a 100644 --- a/nova/tests/unit/api/openstack/compute/test_server_actions.py +++ b/nova/tests/unit/api/openstack/compute/test_server_actions.py @@ -721,8 +721,6 @@ class ServerActionsControllerTestV21(test.TestCase): (exception.ImageNotFound(image_id=image_id), webob.exc.HTTPBadRequest), (exception.Invalid, webob.exc.HTTPBadRequest), - (exception.NoValidHost(reason='Bad host'), - webob.exc.HTTPBadRequest), (exception.AutoDiskConfigDisabledByImage(image=image_id), webob.exc.HTTPBadRequest), ] @@ -795,15 +793,6 @@ class ServerActionsControllerTestV21(test.TestCase): self.controller._action_resize, self.req, FAKE_UUID, body=body) - @mock.patch('nova.compute.api.API.resize', - side_effect=exception.NoValidHost(reason='')) - def test_resize_raises_no_valid_host(self, mock_resize): - body = dict(resize=dict(flavorRef="http://localhost/3")) - - self.assertRaises(webob.exc.HTTPBadRequest, - self.controller._action_resize, - self.req, FAKE_UUID, body=body) - @mock.patch.object(compute_api.API, 'resize') def test_resize_instance_raise_auto_disk_config_exc(self, mock_resize): mock_resize.side_effect = exception.AutoDiskConfigDisabledByImage( diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 85ed941ec6ac..51d5a751c6d5 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -1941,7 +1941,7 @@ class _ComputeAPIUnitTestMixIn(object): scheduler_hint=scheduler_hint, flavor=test.MatchType(objects.Flavor), clean_shutdown=clean_shutdown, - request_spec=fake_spec, do_cast=allow_cross_cell_resize) + request_spec=fake_spec, do_cast=True) else: mock_resize.assert_not_called() diff --git a/releasenotes/notes/resize-api-cast-always-7eb1dbef8f7fe228.yaml b/releasenotes/notes/resize-api-cast-always-7eb1dbef8f7fe228.yaml new file mode 100644 index 000000000000..ed07e2c5835f --- /dev/null +++ b/releasenotes/notes/resize-api-cast-always-7eb1dbef8f7fe228.yaml @@ -0,0 +1,12 @@ +--- +other: + - | + The ``resize`` and ``migrate`` server action APIs used to synchronously + block until a destination host is selected by the scheduler. Those APIs + now asynchronously return a response to the user before scheduling. + The response code remains 202 and users can monitor the operation via the + ``status`` and ``OS-EXT-STS:task_state`` fields on the server resource and + also by using the ``os-instance-actions`` API. The most notable + change is ``NoValidHost`` will not be returned in a 400 error response + from the API if scheduling fails but that information is available via the + instance actions API interface.