diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 2b9a9a521b2e..a836a8f17cbd 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -198,7 +198,7 @@ def wrap_instance_fault(function): raise except Exception, e: with excutils.save_and_reraise_exception(): - self._add_instance_fault_from_exc(context, + compute_utils.add_instance_fault_from_exc(context, kwargs['instance']['uuid'], e, sys.exc_info()) return decorated_function @@ -1099,8 +1099,8 @@ class ComputeManager(manager.SchedulerDependentManager): except Exception, exc: LOG.error(_('Cannot reboot instance: %(exc)s'), locals(), context=context, instance=instance) - self._add_instance_fault_from_exc(context, instance['uuid'], exc, - sys.exc_info()) + compute_utils.add_instance_fault_from_exc(context, + instance['uuid'], exc, sys.exc_info()) # Fall through and reset task_state to None current_power_state = self._get_power_state(context, instance) @@ -2717,27 +2717,6 @@ class ComputeManager(manager.SchedulerDependentManager): """ self.resource_tracker.update_available_resource(context) - def _add_instance_fault_from_exc(self, context, instance_uuid, fault, - exc_info=None): - """Adds the specified fault to the database.""" - - code = 500 - if hasattr(fault, "kwargs"): - code = fault.kwargs.get('code', 500) - - details = unicode(fault) - if exc_info and code == 500: - tb = exc_info[2] - details += '\n' + ''.join(traceback.format_tb(tb)) - - values = { - 'instance_uuid': instance_uuid, - 'code': code, - 'message': fault.__class__.__name__, - 'details': unicode(details), - } - self.db.instance_fault_create(context, values) - @manager.periodic_task( ticks_between_runs=FLAGS.running_deleted_instance_poll_interval) def _cleanup_running_deleted_instances(self, context): diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 1eb595e80602..1f46d8019669 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -18,6 +18,7 @@ import re import string +import traceback from nova import block_device from nova import db @@ -33,6 +34,27 @@ FLAGS = flags.FLAGS LOG = log.getLogger(__name__) +def add_instance_fault_from_exc(context, instance_uuid, fault, exc_info=None): + """Adds the specified fault to the database.""" + + code = 500 + if hasattr(fault, "kwargs"): + code = fault.kwargs.get('code', 500) + + details = unicode(fault) + if exc_info and code == 500: + tb = exc_info[2] + details += '\n' + ''.join(traceback.format_tb(tb)) + + values = { + 'instance_uuid': instance_uuid, + 'code': code, + 'message': fault.__class__.__name__, + 'details': unicode(details), + } + db.instance_fault_create(context, values) + + def get_device_name_for_instance(context, instance, device): """Validates (or generates) a device name for instance. diff --git a/nova/scheduler/chance.py b/nova/scheduler/chance.py index 00042f43ad47..e44e9d7d2c1e 100644 --- a/nova/scheduler/chance.py +++ b/nova/scheduler/chance.py @@ -61,23 +61,28 @@ class ChanceScheduler(driver.Scheduler): requested_networks, is_first_time, filter_properties): """Create and run an instance or instances""" - instances = [] instance_uuids = request_spec.get('instance_uuids') for num, instance_uuid in enumerate(instance_uuids): - host = self._schedule(context, 'compute', request_spec, - filter_properties) request_spec['instance_properties']['launch_index'] = num - updated_instance = driver.instance_update_db(context, - instance_uuid, host) - self.compute_rpcapi.run_instance(context, - instance=updated_instance, host=host, - requested_networks=requested_networks, - injected_files=injected_files, - admin_password=admin_password, is_first_time=is_first_time, - request_spec=request_spec, - filter_properties=filter_properties) - instances.append(driver.encode_instance(updated_instance)) - return instances + try: + host = self._schedule(context, 'compute', request_spec, + filter_properties) + updated_instance = driver.instance_update_db(context, + instance_uuid, host) + self.compute_rpcapi.run_instance(context, + instance=updated_instance, host=host, + requested_networks=requested_networks, + injected_files=injected_files, + admin_password=admin_password, + is_first_time=is_first_time, + request_spec=request_spec, + filter_properties=filter_properties) + except Exception as ex: + # NOTE(vish): we don't reraise the exception here to make sure + # that all instances in the request get set to + # error properly + driver.handle_schedule_error(context, ex, instance_uuid, + request_spec) def schedule_prep_resize(self, context, image, request_spec, filter_properties, instance, instance_type, diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index 705cb8636214..ee7db02b94f3 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -21,10 +21,13 @@ Scheduler base class that all Schedulers should inherit from """ +import sys + from nova.compute import api as compute_api from nova.compute import power_state from nova.compute import rpcapi as compute_rpcapi -from nova.compute import task_states +from nova.compute import utils as compute_utils +from nova.compute import vm_states from nova import db from nova import exception from nova import flags @@ -32,9 +35,9 @@ from nova import notifications from nova.openstack.common import cfg from nova.openstack.common import importutils from nova.openstack.common import log as logging +from nova.openstack.common.notifier import api as notifier from nova.openstack.common import rpc from nova.openstack.common import timeutils -from nova import quota from nova import utils @@ -56,6 +59,34 @@ flags.DECLARE('instances_path', 'nova.compute.manager') flags.DECLARE('libvirt_type', 'nova.virt.libvirt.driver') +def handle_schedule_error(context, ex, instance_uuid, request_spec): + if not isinstance(ex, exception.NoValidHost): + LOG.exception(_("Exception during scheduler.run_instance")) + compute_utils.add_instance_fault_from_exc(context, + instance_uuid, ex, sys.exc_info()) + state = vm_states.ERROR.upper() + LOG.warning(_('Setting instance to %(state)s state.'), + locals(), instance_uuid=instance_uuid) + + # update instance state and notify on the transition + (old_ref, new_ref) = db.instance_update_and_get_original(context, + instance_uuid, {'vm_state': vm_states.ERROR, + 'task_state': None}) + notifications.send_update(context, old_ref, new_ref, + service="scheduler") + + properties = request_spec.get('instance_properties', {}) + payload = dict(request_spec=request_spec, + instance_properties=properties, + instance_id=instance_uuid, + state=vm_states.ERROR, + method='run_instance', + reason=ex) + + notifier.notify(context, notifier.publisher_id("scheduler"), + 'scheduler.run_instance', notifier.ERROR, payload) + + def cast_to_volume_host(context, host, method, **kwargs): """Cast request to a volume host queue""" diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py index da4c4c0ddb51..a3e3e1354a9b 100644 --- a/nova/scheduler/filter_scheduler.py +++ b/nova/scheduler/filter_scheduler.py @@ -72,27 +72,32 @@ class FilterScheduler(driver.Scheduler): weighted_hosts = self._schedule(context, "compute", request_spec, filter_properties, instance_uuids) - if not weighted_hosts: - raise exception.NoValidHost(reason="") - # NOTE(comstud): Make sure we do not pass this through. It # contains an instance of RpcContext that cannot be serialized. filter_properties.pop('context', None) for num, instance_uuid in enumerate(instance_uuids): - if not weighted_hosts: - break - weighted_host = weighted_hosts.pop(0) - request_spec['instance_properties']['launch_index'] = num - self._provision_resource(elevated, weighted_host, - request_spec, - filter_properties, - requested_networks, - injected_files, admin_password, - is_first_time, - instance_uuid=instance_uuid) + try: + try: + weighted_host = weighted_hosts.pop(0) + except IndexError: + raise exception.NoValidHost(reason="") + + self._provision_resource(elevated, weighted_host, + request_spec, + filter_properties, + requested_networks, + injected_files, admin_password, + is_first_time, + instance_uuid=instance_uuid) + except Exception as ex: + # NOTE(vish): we don't reraise the exception here to make sure + # that all instances in the request get set to + # error properly + driver.handle_schedule_error(context, ex, instance_uuid, + request_spec) # scrub retry host list in case we're scheduling multiple # instances: retry = filter_properties.get('retry', {}) diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index 45fffd9c41f0..de07d4d68995 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -35,6 +35,7 @@ from nova.compute import manager as compute_manager from nova.compute import power_state from nova.compute import rpcapi as compute_rpcapi from nova.compute import task_states +from nova.compute import utils as compute_utils from nova.compute import vm_states from nova import context from nova import db @@ -212,7 +213,7 @@ class ComputeTestCase(BaseTestCase): def did_it_add_fault(*args): called['fault_added'] = True - self.stubs.Set(self.compute, '_add_instance_fault_from_exc', + self.stubs.Set(compute_utils, 'add_instance_fault_from_exc', did_it_add_fault) @nova.compute.manager.wrap_instance_fault @@ -232,7 +233,7 @@ class ComputeTestCase(BaseTestCase): def did_it_add_fault(*args): called['fault_added'] = True - self.stubs.Set(self.compute, '_add_instance_fault_from_exc', + self.stubs.Set(compute_utils, 'add_instance_fault_from_exc', did_it_add_fault) @nova.compute.manager.wrap_instance_fault @@ -2131,7 +2132,7 @@ class ComputeTestCase(BaseTestCase): self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - self.compute._add_instance_fault_from_exc(ctxt, instance_uuid, + compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, NotImplementedError('test'), exc_info) @@ -2159,7 +2160,7 @@ class ComputeTestCase(BaseTestCase): self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - self.compute._add_instance_fault_from_exc(ctxt, instance_uuid, + compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, user_exc, exc_info) def test_add_instance_fault_no_exc_info(self): @@ -2177,7 +2178,7 @@ class ComputeTestCase(BaseTestCase): self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create) ctxt = context.get_admin_context() - self.compute._add_instance_fault_from_exc(ctxt, instance_uuid, + compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid, NotImplementedError('test')) def test_cleanup_running_deleted_instances(self): diff --git a/nova/tests/scheduler/test_chance_scheduler.py b/nova/tests/scheduler/test_chance_scheduler.py index 7560f72f4c26..4fb9ab617cea 100644 --- a/nova/tests/scheduler/test_chance_scheduler.py +++ b/nova/tests/scheduler/test_chance_scheduler.py @@ -23,7 +23,10 @@ import random import mox from nova.compute import rpcapi as compute_rpcapi +from nova.compute import utils as compute_utils +from nova.compute import vm_states from nova import context +from nova import db from nova import exception from nova.scheduler import chance from nova.scheduler import driver @@ -79,7 +82,6 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase): self.mox.StubOutWithMock(ctxt, 'elevated') self.mox.StubOutWithMock(self.driver, 'hosts_up') self.mox.StubOutWithMock(random, 'random') - self.mox.StubOutWithMock(driver, 'encode_instance') self.mox.StubOutWithMock(driver, 'instance_update_db') self.mox.StubOutWithMock(compute_rpcapi.ComputeAPI, 'run_instance') @@ -94,7 +96,6 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase): instance=instance1, requested_networks=None, injected_files=None, admin_password=None, is_first_time=None, request_spec=request_spec, filter_properties={}) - driver.encode_instance(instance1).AndReturn(instance1_encoded) # instance 2 ctxt.elevated().AndReturn(ctxt_elevated) @@ -107,33 +108,37 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase): instance=instance2, requested_networks=None, injected_files=None, admin_password=None, is_first_time=None, request_spec=request_spec, filter_properties={}) - driver.encode_instance(instance2).AndReturn(instance2_encoded) self.mox.ReplayAll() - result = self.driver.schedule_run_instance(ctxt, request_spec, + self.driver.schedule_run_instance(ctxt, request_spec, None, None, None, None, {}) - expected = [instance1_encoded, instance2_encoded] - self.assertEqual(result, expected) def test_basic_schedule_run_instance_no_hosts(self): ctxt = context.RequestContext('fake', 'fake', False) ctxt_elevated = 'fake-context-elevated' fake_args = (1, 2, 3) + uuid = 'fake-uuid1' instance_opts = {'fake_opt1': 'meow', 'launch_index': -1} - request_spec = {'instance_uuids': ['fake-uuid1'], + request_spec = {'instance_uuids': [uuid], 'instance_properties': instance_opts} self.mox.StubOutWithMock(ctxt, 'elevated') self.mox.StubOutWithMock(self.driver, 'hosts_up') + self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') + self.mox.StubOutWithMock(db, 'instance_update_and_get_original') # instance 1 ctxt.elevated().AndReturn(ctxt_elevated) self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn([]) + compute_utils.add_instance_fault_from_exc(ctxt, + uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) + db.instance_update_and_get_original(ctxt, uuid, + {'vm_state': vm_states.ERROR, + 'task_state': None}).AndReturn(({}, {})) self.mox.ReplayAll() - self.assertRaises(exception.NoValidHost, - self.driver.schedule_run_instance, ctxt, request_spec, - None, None, None, None, {}) + self.driver.schedule_run_instance( + ctxt, request_spec, None, None, None, None, {}) def test_schedule_prep_resize_doesnt_update_host(self): fake_context = context.RequestContext('user', 'project', diff --git a/nova/tests/scheduler/test_filter_scheduler.py b/nova/tests/scheduler/test_filter_scheduler.py index 909bad5fc2b7..130251a0f34a 100644 --- a/nova/tests/scheduler/test_filter_scheduler.py +++ b/nova/tests/scheduler/test_filter_scheduler.py @@ -18,7 +18,10 @@ Tests For Filter Scheduler. import mox +from nova.compute import utils as compute_utils +from nova.compute import vm_states from nova import context +from nova import db from nova import exception from nova.scheduler import driver from nova.scheduler import filter_scheduler @@ -38,26 +41,31 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): driver_cls = filter_scheduler.FilterScheduler def test_run_instance_no_hosts(self): - """ - Ensure empty hosts & child_zones result in NoValidHosts exception. - """ + def _fake_empty_call_zone_method(*args, **kwargs): return [] sched = fakes.FakeFilterScheduler() + uuid = 'fake-uuid1' fake_context = context.RequestContext('user', 'project') request_spec = {'instance_type': {'memory_mb': 1, 'root_gb': 1, 'ephemeral_gb': 0}, 'instance_properties': {'project_id': 1}, - 'instance_uuids': ['fake-uuid1']} - self.assertRaises(exception.NoValidHost, sched.schedule_run_instance, - fake_context, request_spec, None, None, None, - None, {}) + 'instance_uuids': [uuid]} + + self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') + self.mox.StubOutWithMock(db, 'instance_update_and_get_original') + compute_utils.add_instance_fault_from_exc(fake_context, + uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) + db.instance_update_and_get_original(fake_context, uuid, + {'vm_state': vm_states.ERROR, + 'task_state': None}).AndReturn(({}, {})) + self.mox.ReplayAll() + sched.schedule_run_instance( + fake_context, request_spec, None, None, None, None, {}) def test_run_instance_non_admin(self): - """Test creating an instance locally using run_instance, passing - a non-admin context. DB actions should work.""" self.was_admin = False def fake_get(context, *args, **kwargs): @@ -71,12 +79,20 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): fake_context = context.RequestContext('user', 'project') + uuid = 'fake-uuid1' request_spec = {'instance_type': {'memory_mb': 1, 'local_gb': 1}, 'instance_properties': {'project_id': 1}, - 'instance_uuids': ['fake-uuid1']} - self.assertRaises(exception.NoValidHost, sched.schedule_run_instance, - fake_context, request_spec, None, None, None, - None, {}) + 'instance_uuids': [uuid]} + self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') + self.mox.StubOutWithMock(db, 'instance_update_and_get_original') + compute_utils.add_instance_fault_from_exc(fake_context, + uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg()) + db.instance_update_and_get_original(fake_context, uuid, + {'vm_state': vm_states.ERROR, + 'task_state': None}).AndReturn(({}, {})) + self.mox.ReplayAll() + sched.schedule_run_instance( + fake_context, request_spec, None, None, None, None, {}) self.assertTrue(self.was_admin) def test_schedule_bad_topic(self):