Merge "Fixes error handling during schedule_run_instance"

This commit is contained in:
Jenkins 2012-09-18 07:55:02 +00:00 committed by Gerrit Code Review
commit 9f5f7dae75
8 changed files with 146 additions and 82 deletions

View File

@ -198,7 +198,7 @@ def wrap_instance_fault(function):
raise
except Exception, e:
with excutils.save_and_reraise_exception():
self._add_instance_fault_from_exc(context,
compute_utils.add_instance_fault_from_exc(context,
kwargs['instance']['uuid'], e, sys.exc_info())
return decorated_function
@ -1099,8 +1099,8 @@ class ComputeManager(manager.SchedulerDependentManager):
except Exception, exc:
LOG.error(_('Cannot reboot instance: %(exc)s'), locals(),
context=context, instance=instance)
self._add_instance_fault_from_exc(context, instance['uuid'], exc,
sys.exc_info())
compute_utils.add_instance_fault_from_exc(context,
instance['uuid'], exc, sys.exc_info())
# Fall through and reset task_state to None
current_power_state = self._get_power_state(context, instance)
@ -2717,27 +2717,6 @@ class ComputeManager(manager.SchedulerDependentManager):
"""
self.resource_tracker.update_available_resource(context)
def _add_instance_fault_from_exc(self, context, instance_uuid, fault,
exc_info=None):
"""Adds the specified fault to the database."""
code = 500
if hasattr(fault, "kwargs"):
code = fault.kwargs.get('code', 500)
details = unicode(fault)
if exc_info and code == 500:
tb = exc_info[2]
details += '\n' + ''.join(traceback.format_tb(tb))
values = {
'instance_uuid': instance_uuid,
'code': code,
'message': fault.__class__.__name__,
'details': unicode(details),
}
self.db.instance_fault_create(context, values)
@manager.periodic_task(
ticks_between_runs=FLAGS.running_deleted_instance_poll_interval)
def _cleanup_running_deleted_instances(self, context):

View File

@ -18,6 +18,7 @@
import re
import string
import traceback
from nova import block_device
from nova import db
@ -33,6 +34,27 @@ FLAGS = flags.FLAGS
LOG = log.getLogger(__name__)
def add_instance_fault_from_exc(context, instance_uuid, fault, exc_info=None):
"""Adds the specified fault to the database."""
code = 500
if hasattr(fault, "kwargs"):
code = fault.kwargs.get('code', 500)
details = unicode(fault)
if exc_info and code == 500:
tb = exc_info[2]
details += '\n' + ''.join(traceback.format_tb(tb))
values = {
'instance_uuid': instance_uuid,
'code': code,
'message': fault.__class__.__name__,
'details': unicode(details),
}
db.instance_fault_create(context, values)
def get_device_name_for_instance(context, instance, device):
"""Validates (or generates) a device name for instance.

View File

@ -61,23 +61,28 @@ class ChanceScheduler(driver.Scheduler):
requested_networks, is_first_time,
filter_properties):
"""Create and run an instance or instances"""
instances = []
instance_uuids = request_spec.get('instance_uuids')
for num, instance_uuid in enumerate(instance_uuids):
host = self._schedule(context, 'compute', request_spec,
filter_properties)
request_spec['instance_properties']['launch_index'] = num
updated_instance = driver.instance_update_db(context,
instance_uuid, host)
self.compute_rpcapi.run_instance(context,
instance=updated_instance, host=host,
requested_networks=requested_networks,
injected_files=injected_files,
admin_password=admin_password, is_first_time=is_first_time,
request_spec=request_spec,
filter_properties=filter_properties)
instances.append(driver.encode_instance(updated_instance))
return instances
try:
host = self._schedule(context, 'compute', request_spec,
filter_properties)
updated_instance = driver.instance_update_db(context,
instance_uuid, host)
self.compute_rpcapi.run_instance(context,
instance=updated_instance, host=host,
requested_networks=requested_networks,
injected_files=injected_files,
admin_password=admin_password,
is_first_time=is_first_time,
request_spec=request_spec,
filter_properties=filter_properties)
except Exception as ex:
# NOTE(vish): we don't reraise the exception here to make sure
# that all instances in the request get set to
# error properly
driver.handle_schedule_error(context, ex, instance_uuid,
request_spec)
def schedule_prep_resize(self, context, image, request_spec,
filter_properties, instance, instance_type,

View File

@ -21,10 +21,13 @@
Scheduler base class that all Schedulers should inherit from
"""
import sys
from nova.compute import api as compute_api
from nova.compute import power_state
from nova.compute import rpcapi as compute_rpcapi
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova import db
from nova import exception
from nova import flags
@ -32,9 +35,9 @@ from nova import notifications
from nova.openstack.common import cfg
from nova.openstack.common import importutils
from nova.openstack.common import log as logging
from nova.openstack.common.notifier import api as notifier
from nova.openstack.common import rpc
from nova.openstack.common import timeutils
from nova import quota
from nova import utils
@ -56,6 +59,34 @@ flags.DECLARE('instances_path', 'nova.compute.manager')
flags.DECLARE('libvirt_type', 'nova.virt.libvirt.driver')
def handle_schedule_error(context, ex, instance_uuid, request_spec):
if not isinstance(ex, exception.NoValidHost):
LOG.exception(_("Exception during scheduler.run_instance"))
compute_utils.add_instance_fault_from_exc(context,
instance_uuid, ex, sys.exc_info())
state = vm_states.ERROR.upper()
LOG.warning(_('Setting instance to %(state)s state.'),
locals(), instance_uuid=instance_uuid)
# update instance state and notify on the transition
(old_ref, new_ref) = db.instance_update_and_get_original(context,
instance_uuid, {'vm_state': vm_states.ERROR,
'task_state': None})
notifications.send_update(context, old_ref, new_ref,
service="scheduler")
properties = request_spec.get('instance_properties', {})
payload = dict(request_spec=request_spec,
instance_properties=properties,
instance_id=instance_uuid,
state=vm_states.ERROR,
method='run_instance',
reason=ex)
notifier.notify(context, notifier.publisher_id("scheduler"),
'scheduler.run_instance', notifier.ERROR, payload)
def cast_to_volume_host(context, host, method, **kwargs):
"""Cast request to a volume host queue"""

View File

@ -72,27 +72,32 @@ class FilterScheduler(driver.Scheduler):
weighted_hosts = self._schedule(context, "compute", request_spec,
filter_properties, instance_uuids)
if not weighted_hosts:
raise exception.NoValidHost(reason="")
# NOTE(comstud): Make sure we do not pass this through. It
# contains an instance of RpcContext that cannot be serialized.
filter_properties.pop('context', None)
for num, instance_uuid in enumerate(instance_uuids):
if not weighted_hosts:
break
weighted_host = weighted_hosts.pop(0)
request_spec['instance_properties']['launch_index'] = num
self._provision_resource(elevated, weighted_host,
request_spec,
filter_properties,
requested_networks,
injected_files, admin_password,
is_first_time,
instance_uuid=instance_uuid)
try:
try:
weighted_host = weighted_hosts.pop(0)
except IndexError:
raise exception.NoValidHost(reason="")
self._provision_resource(elevated, weighted_host,
request_spec,
filter_properties,
requested_networks,
injected_files, admin_password,
is_first_time,
instance_uuid=instance_uuid)
except Exception as ex:
# NOTE(vish): we don't reraise the exception here to make sure
# that all instances in the request get set to
# error properly
driver.handle_schedule_error(context, ex, instance_uuid,
request_spec)
# scrub retry host list in case we're scheduling multiple
# instances:
retry = filter_properties.get('retry', {})

View File

@ -35,6 +35,7 @@ from nova.compute import manager as compute_manager
from nova.compute import power_state
from nova.compute import rpcapi as compute_rpcapi
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova import context
from nova import db
@ -212,7 +213,7 @@ class ComputeTestCase(BaseTestCase):
def did_it_add_fault(*args):
called['fault_added'] = True
self.stubs.Set(self.compute, '_add_instance_fault_from_exc',
self.stubs.Set(compute_utils, 'add_instance_fault_from_exc',
did_it_add_fault)
@nova.compute.manager.wrap_instance_fault
@ -232,7 +233,7 @@ class ComputeTestCase(BaseTestCase):
def did_it_add_fault(*args):
called['fault_added'] = True
self.stubs.Set(self.compute, '_add_instance_fault_from_exc',
self.stubs.Set(compute_utils, 'add_instance_fault_from_exc',
did_it_add_fault)
@nova.compute.manager.wrap_instance_fault
@ -2131,7 +2132,7 @@ class ComputeTestCase(BaseTestCase):
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
self.compute._add_instance_fault_from_exc(ctxt, instance_uuid,
compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
NotImplementedError('test'),
exc_info)
@ -2159,7 +2160,7 @@ class ComputeTestCase(BaseTestCase):
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
self.compute._add_instance_fault_from_exc(ctxt, instance_uuid,
compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
user_exc, exc_info)
def test_add_instance_fault_no_exc_info(self):
@ -2177,7 +2178,7 @@ class ComputeTestCase(BaseTestCase):
self.stubs.Set(nova.db, 'instance_fault_create', fake_db_fault_create)
ctxt = context.get_admin_context()
self.compute._add_instance_fault_from_exc(ctxt, instance_uuid,
compute_utils.add_instance_fault_from_exc(ctxt, instance_uuid,
NotImplementedError('test'))
def test_cleanup_running_deleted_instances(self):

View File

@ -23,7 +23,10 @@ import random
import mox
from nova.compute import rpcapi as compute_rpcapi
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova import context
from nova import db
from nova import exception
from nova.scheduler import chance
from nova.scheduler import driver
@ -79,7 +82,6 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase):
self.mox.StubOutWithMock(ctxt, 'elevated')
self.mox.StubOutWithMock(self.driver, 'hosts_up')
self.mox.StubOutWithMock(random, 'random')
self.mox.StubOutWithMock(driver, 'encode_instance')
self.mox.StubOutWithMock(driver, 'instance_update_db')
self.mox.StubOutWithMock(compute_rpcapi.ComputeAPI, 'run_instance')
@ -94,7 +96,6 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase):
instance=instance1, requested_networks=None,
injected_files=None, admin_password=None, is_first_time=None,
request_spec=request_spec, filter_properties={})
driver.encode_instance(instance1).AndReturn(instance1_encoded)
# instance 2
ctxt.elevated().AndReturn(ctxt_elevated)
@ -107,33 +108,37 @@ class ChanceSchedulerTestCase(test_scheduler.SchedulerTestCase):
instance=instance2, requested_networks=None,
injected_files=None, admin_password=None, is_first_time=None,
request_spec=request_spec, filter_properties={})
driver.encode_instance(instance2).AndReturn(instance2_encoded)
self.mox.ReplayAll()
result = self.driver.schedule_run_instance(ctxt, request_spec,
self.driver.schedule_run_instance(ctxt, request_spec,
None, None, None, None, {})
expected = [instance1_encoded, instance2_encoded]
self.assertEqual(result, expected)
def test_basic_schedule_run_instance_no_hosts(self):
ctxt = context.RequestContext('fake', 'fake', False)
ctxt_elevated = 'fake-context-elevated'
fake_args = (1, 2, 3)
uuid = 'fake-uuid1'
instance_opts = {'fake_opt1': 'meow', 'launch_index': -1}
request_spec = {'instance_uuids': ['fake-uuid1'],
request_spec = {'instance_uuids': [uuid],
'instance_properties': instance_opts}
self.mox.StubOutWithMock(ctxt, 'elevated')
self.mox.StubOutWithMock(self.driver, 'hosts_up')
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
# instance 1
ctxt.elevated().AndReturn(ctxt_elevated)
self.driver.hosts_up(ctxt_elevated, 'compute').AndReturn([])
compute_utils.add_instance_fault_from_exc(ctxt,
uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
db.instance_update_and_get_original(ctxt, uuid,
{'vm_state': vm_states.ERROR,
'task_state': None}).AndReturn(({}, {}))
self.mox.ReplayAll()
self.assertRaises(exception.NoValidHost,
self.driver.schedule_run_instance, ctxt, request_spec,
None, None, None, None, {})
self.driver.schedule_run_instance(
ctxt, request_spec, None, None, None, None, {})
def test_schedule_prep_resize_doesnt_update_host(self):
fake_context = context.RequestContext('user', 'project',

View File

@ -18,7 +18,10 @@ Tests For Filter Scheduler.
import mox
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova import context
from nova import db
from nova import exception
from nova.scheduler import driver
from nova.scheduler import filter_scheduler
@ -38,26 +41,31 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
driver_cls = filter_scheduler.FilterScheduler
def test_run_instance_no_hosts(self):
"""
Ensure empty hosts & child_zones result in NoValidHosts exception.
"""
def _fake_empty_call_zone_method(*args, **kwargs):
return []
sched = fakes.FakeFilterScheduler()
uuid = 'fake-uuid1'
fake_context = context.RequestContext('user', 'project')
request_spec = {'instance_type': {'memory_mb': 1, 'root_gb': 1,
'ephemeral_gb': 0},
'instance_properties': {'project_id': 1},
'instance_uuids': ['fake-uuid1']}
self.assertRaises(exception.NoValidHost, sched.schedule_run_instance,
fake_context, request_spec, None, None, None,
None, {})
'instance_uuids': [uuid]}
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
compute_utils.add_instance_fault_from_exc(fake_context,
uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
db.instance_update_and_get_original(fake_context, uuid,
{'vm_state': vm_states.ERROR,
'task_state': None}).AndReturn(({}, {}))
self.mox.ReplayAll()
sched.schedule_run_instance(
fake_context, request_spec, None, None, None, None, {})
def test_run_instance_non_admin(self):
"""Test creating an instance locally using run_instance, passing
a non-admin context. DB actions should work."""
self.was_admin = False
def fake_get(context, *args, **kwargs):
@ -71,12 +79,20 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
fake_context = context.RequestContext('user', 'project')
uuid = 'fake-uuid1'
request_spec = {'instance_type': {'memory_mb': 1, 'local_gb': 1},
'instance_properties': {'project_id': 1},
'instance_uuids': ['fake-uuid1']}
self.assertRaises(exception.NoValidHost, sched.schedule_run_instance,
fake_context, request_spec, None, None, None,
None, {})
'instance_uuids': [uuid]}
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
compute_utils.add_instance_fault_from_exc(fake_context,
uuid, mox.IsA(exception.NoValidHost), mox.IgnoreArg())
db.instance_update_and_get_original(fake_context, uuid,
{'vm_state': vm_states.ERROR,
'task_state': None}).AndReturn(({}, {}))
self.mox.ReplayAll()
sched.schedule_run_instance(
fake_context, request_spec, None, None, None, None, {})
self.assertTrue(self.was_admin)
def test_schedule_bad_topic(self):