Fix poll_rescued_instances periodic task

The poll_rescued_instances periodic task now checks the amount of time
that an instance has been in the RESCUED stated before timing out the
rescue.  It also now performs the unrescue through the compute api in
order to make sure the database is left in a consistent state.

The poll_rescued_instances method is no longer necessary in the virt
driver interface and has been removed.  And also removed from the
different virt drivers, since it was just doing a 'pass' in each of
them.

bug 1088625
bug 1088627

Change-Id: I75f7dc188cc49e5f6e5c8a3cb256d1c42ff7d882
This commit is contained in:
Andrew Laski 2012-12-11 13:48:11 -05:00
parent dc48ce7fb1
commit c40fc8a4db
9 changed files with 51 additions and 64 deletions

View File

@ -1592,6 +1592,7 @@ class ComputeManager(manager.SchedulerDependentManager):
vm_state=vm_states.RESCUED,
task_state=None,
power_state=current_power_state,
launched_at=timeutils.utcnow(),
expected_task_state=task_states.RESCUING)
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
@ -2818,7 +2819,22 @@ class ComputeManager(manager.SchedulerDependentManager):
@manager.periodic_task
def _poll_rescued_instances(self, context):
if CONF.rescue_timeout > 0:
self.driver.poll_rescued_instances(CONF.rescue_timeout)
instances = self.conductor_api.instance_get_all_by_host(context,
self.host)
rescued_instances = []
for instance in instances:
if instance['vm_state'] == vm_states.RESCUED:
rescued_instances.append(instance)
to_unrescue = []
for instance in rescued_instances:
if timeutils.is_older_than(instance['launched_at'],
CONF.rescue_timeout):
to_unrescue.append(instance)
for instance in to_unrescue:
self.compute_api.unrescue(context, instance)
@manager.periodic_task
def _poll_unconfirmed_resizes(self, context):

View File

@ -2788,6 +2788,40 @@ class ComputeTestCase(BaseTestCase):
self.assertEqual(call_info['get_by_uuid'], 3)
self.assertEqual(call_info['get_nw_info'], 4)
def test_poll_rescued_instances(self):
timed_out_time = timeutils.utcnow() - datetime.timedelta(minutes=5)
not_timed_out_time = timeutils.utcnow()
instances = [{'uuid': 'fake_uuid1', 'vm_state': vm_states.RESCUED,
'launched_at': timed_out_time},
{'uuid': 'fake_uuid2', 'vm_state': vm_states.ACTIVE,
'launched_at': timed_out_time},
{'uuid': 'fake_uuid3', 'vm_state': vm_states.ACTIVE,
'launched_at': not_timed_out_time},
{'uuid': 'fake_uuid4', 'vm_state': vm_states.RESCUED,
'launched_at': timed_out_time},
{'uuid': 'fake_uuid5', 'vm_state': vm_states.RESCUED,
'launched_at': not_timed_out_time}]
unrescued_instances = {'fake_uuid1': False, 'fake_uuid4': False}
def fake_instance_get_all_by_host(context, host):
return instances
def fake_unrescue(self, context, instance):
unrescued_instances[instance['uuid']] = True
self.stubs.Set(self.compute.conductor_api, 'instance_get_all_by_host',
fake_instance_get_all_by_host)
self.stubs.Set(compute_api.API, 'unrescue', fake_unrescue)
self.flags(rescue_timeout=60)
ctxt = context.get_admin_context()
self.compute._poll_rescued_instances(ctxt)
for instance in unrescued_instances.values():
self.assertTrue(instance)
def test_poll_unconfirmed_resizes(self):
instances = [{'uuid': 'fake_uuid1', 'vm_state': vm_states.RESIZED,
'task_state': None},

View File

@ -282,10 +282,6 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase):
instances = [self._get_running_instance()]
self.connection.poll_rebooting_instances(10, instances)
@catch_notimplementederror
def test_poll_rescued_instances(self):
self.connection.poll_rescued_instances(10)
@catch_notimplementederror
def test_migrate_disk_and_power_off(self):
instance_ref, network_info = self._get_running_instance()

View File

@ -635,11 +635,6 @@ class ComputeDriver(object):
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()
def poll_rescued_instances(self, timeout):
"""Poll for rescued instances"""
# TODO(Vek): Need to pass context in for access to auth_token
raise NotImplementedError()
def host_power_action(self, host, action):
"""Reboots, shuts down or powers up the host."""
raise NotImplementedError()

View File

@ -154,9 +154,6 @@ class FakeDriver(driver.ComputeDriver):
def poll_rebooting_instances(self, timeout, instances):
pass
def poll_rescued_instances(self, timeout):
pass
def migrate_disk_and_power_off(self, context, instance, dest,
instance_type, network_info,
block_device_info=None):

View File

@ -119,9 +119,6 @@ class HyperVDriver(driver.ComputeDriver):
def get_volume_connector(self, instance):
return self._volumeops.get_volume_connector(instance)
def poll_rescued_instances(self, timeout):
pass
def get_available_resource(self, nodename):
return self._hostops.get_available_resource()

View File

@ -1083,10 +1083,6 @@ class LibvirtDriver(driver.ComputeDriver):
def poll_rebooting_instances(self, timeout, instances):
pass
@exception.wrap_exception()
def poll_rescued_instances(self, timeout):
pass
def _enable_hairpin(self, xml):
interfaces = self.get_interfaces(xml)
for interface in interfaces:

View File

@ -290,10 +290,6 @@ class XenAPIDriver(driver.ComputeDriver):
"""Poll for rebooting instances"""
self._vmops.poll_rebooting_instances(timeout, instances)
def poll_rescued_instances(self, timeout):
"""Poll for rescued instances"""
self._vmops.poll_rescued_instances(timeout)
def reset_network(self, instance):
"""reset networking for specified instance"""
self._vmops.reset_network(instance)

View File

@ -147,7 +147,6 @@ class VMOps(object):
self.compute_api = compute.API()
self._session = session
self._virtapi = virtapi
self.poll_rescue_last_ran = None
self.firewall_driver = firewall.load_driver(
DEFAULT_FIREWALL_DRIVER,
self._virtapi,
@ -1217,45 +1216,6 @@ class VMOps(object):
LOG.info(_("Automatically hard rebooting"), instance=instance)
self.compute_api.reboot(ctxt, instance, "HARD")
def poll_rescued_instances(self, timeout):
"""Look for expirable rescued instances.
- forcibly exit rescue mode for any instances that have been
in rescue mode for >= the provided timeout
"""
last_ran = self.poll_rescue_last_ran
if not last_ran:
# We need a base time to start tracking.
self.poll_rescue_last_ran = timeutils.utcnow()
return
if not timeutils.is_older_than(last_ran, timeout):
# Do not run. Let's bail.
return
# Update the time tracker and proceed.
self.poll_rescue_last_ran = timeutils.utcnow()
rescue_vms = []
for instance in self.list_instances():
if instance.endswith("-rescue"):
rescue_vms.append(dict(name=instance,
vm_ref=vm_utils.lookup(self._session,
instance)))
for vm in rescue_vms:
rescue_vm_ref = vm["vm_ref"]
original_name = vm["name"].split("-rescue", 1)[0]
original_vm_ref = vm_utils.lookup(self._session, original_name)
self._destroy_rescue_instance(rescue_vm_ref, original_vm_ref)
self._release_bootlock(original_vm_ref)
self._session.call_xenapi("VM.start", original_vm_ref, False,
False)
def get_info(self, instance, vm_ref=None):
"""Return data about VM instance."""
vm_ref = vm_ref or self._get_vm_opaque_ref(instance)