xenapi: increase timeout for resetnetwork agent request

Windows can take longer than the default 30 seconds for resetnetwork
requests. Double the timeout for the command to 60 seconds, but add
a flag so it can be changed without code changes in the future.

At the same time, add a flag for all other agent requests too.

Change-Id: Iba91c37fd5596ea0dd63c20f74925972df1ca715
This commit is contained in:
Johannes Erdfelt 2012-09-26 15:33:52 +00:00
parent c367fa5e4a
commit fe478bd49f
2 changed files with 34 additions and 16 deletions

View File

@ -31,26 +31,37 @@ from nova import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
xenapi_agent_opts = [ xenapi_agent_opts = [
cfg.IntOpt('agent_timeout',
default=30,
help='number of seconds to wait for agent reply'),
cfg.IntOpt('agent_version_timeout', cfg.IntOpt('agent_version_timeout',
default=300, default=300,
help='number of seconds to wait for agent ' help='number of seconds to wait for agent '
'to be fully operational'), 'to be fully operational'),
cfg.IntOpt('agent_resetnetwork_timeout',
default=60,
help='number of seconds to wait for agent reply '
'to resetnetwork request'),
] ]
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
FLAGS.register_opts(xenapi_agent_opts) FLAGS.register_opts(xenapi_agent_opts)
def _call_agent(session, instance, vm_ref, method, addl_args=None): def _call_agent(session, instance, vm_ref, method, addl_args=None,
timeout=None):
"""Abstracts out the interaction with the agent xenapi plugin.""" """Abstracts out the interaction with the agent xenapi plugin."""
if addl_args is None: if addl_args is None:
addl_args = {} addl_args = {}
if timeout is None:
timeout = FLAGS.agent_timeout
vm_rec = session.call_xenapi("VM.get_record", vm_ref) vm_rec = session.call_xenapi("VM.get_record", vm_ref)
args = { args = {
'id': str(uuid.uuid4()), 'id': str(uuid.uuid4()),
'dom_id': vm_rec['domid'], 'dom_id': vm_rec['domid'],
'timeout': str(timeout),
} }
args.update(addl_args) args.update(addl_args)
@ -204,7 +215,8 @@ def inject_file(session, instance, vm_ref, path, contents):
def resetnetwork(session, instance, vm_ref): def resetnetwork(session, instance, vm_ref):
LOG.debug(_('Resetting network'), instance=instance) LOG.debug(_('Resetting network'), instance=instance)
resp = _call_agent(session, instance, vm_ref, 'resetnetwork') resp = _call_agent(session, instance, vm_ref, 'resetnetwork',
timeout=FLAGS.agent_resetnetwork_timeout)
if resp['returncode'] != '0': if resp['returncode'] != '0':
LOG.error(_('Failed to reset network: %(resp)r'), locals(), LOG.error(_('Failed to reset network: %(resp)r'), locals(),
instance=instance) instance=instance)

View File

@ -40,7 +40,7 @@ from pluginlib_nova import *
configure_logging("agent") configure_logging("agent")
import xenstore import xenstore
AGENT_TIMEOUT = 30 DEFAULT_TIMEOUT = 30
class TimeoutError(StandardError): class TimeoutError(StandardError):
@ -49,12 +49,13 @@ class TimeoutError(StandardError):
def version(self, arg_dict): def version(self, arg_dict):
"""Get version of agent.""" """Get version of agent."""
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
arg_dict["value"] = json.dumps({"name": "version", "value": "agent"}) arg_dict["value"] = json.dumps({"name": "version", "value": "agent"})
request_id = arg_dict["id"] request_id = arg_dict["id"]
arg_dict["path"] = "data/host/%s" % request_id arg_dict["path"] = "data/host/%s" % request_id
xenstore.write_record(self, arg_dict) xenstore.write_record(self, arg_dict)
try: try:
resp = _wait_for_agent(self, request_id, arg_dict) resp = _wait_for_agent(self, request_id, arg_dict, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
return resp return resp
@ -66,6 +67,7 @@ def key_init(self, arg_dict):
info to be passed, such as passwords. Returns the shared info to be passed, such as passwords. Returns the shared
secret key value. secret key value.
""" """
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
# WARNING: Some older Windows agents will crash if the public key isn't # WARNING: Some older Windows agents will crash if the public key isn't
# a string # a string
pub = arg_dict["pub"] pub = arg_dict["pub"]
@ -74,7 +76,7 @@ def key_init(self, arg_dict):
arg_dict["path"] = "data/host/%s" % request_id arg_dict["path"] = "data/host/%s" % request_id
xenstore.write_record(self, arg_dict) xenstore.write_record(self, arg_dict)
try: try:
resp = _wait_for_agent(self, request_id, arg_dict) resp = _wait_for_agent(self, request_id, arg_dict, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
return resp return resp
@ -87,13 +89,14 @@ def password(self, arg_dict):
previous call to key_init. The encrypted password value should previous call to key_init. The encrypted password value should
be passed as the value for the 'enc_pass' key in arg_dict. be passed as the value for the 'enc_pass' key in arg_dict.
""" """
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
enc_pass = arg_dict["enc_pass"] enc_pass = arg_dict["enc_pass"]
arg_dict["value"] = json.dumps({"name": "password", "value": enc_pass}) arg_dict["value"] = json.dumps({"name": "password", "value": enc_pass})
request_id = arg_dict["id"] request_id = arg_dict["id"]
arg_dict["path"] = "data/host/%s" % request_id arg_dict["path"] = "data/host/%s" % request_id
xenstore.write_record(self, arg_dict) xenstore.write_record(self, arg_dict)
try: try:
resp = _wait_for_agent(self, request_id, arg_dict) resp = _wait_for_agent(self, request_id, arg_dict, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
return resp return resp
@ -103,12 +106,13 @@ def resetnetwork(self, arg_dict):
"""Writes a resquest to xenstore that tells the agent """Writes a resquest to xenstore that tells the agent
to reset networking. to reset networking.
""" """
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
arg_dict['value'] = json.dumps({'name': 'resetnetwork', 'value': ''}) arg_dict['value'] = json.dumps({'name': 'resetnetwork', 'value': ''})
request_id = arg_dict['id'] request_id = arg_dict['id']
arg_dict['path'] = "data/host/%s" % request_id arg_dict['path'] = "data/host/%s" % request_id
xenstore.write_record(self, arg_dict) xenstore.write_record(self, arg_dict)
try: try:
resp = _wait_for_agent(self, request_id, arg_dict) resp = _wait_for_agent(self, request_id, arg_dict, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
return resp return resp
@ -125,6 +129,7 @@ def inject_file(self, arg_dict):
need to test to determine if the file injection method on the agent has need to test to determine if the file injection method on the agent has
been disabled, and raise a NotImplemented error if that is the case. been disabled, and raise a NotImplemented error if that is the case.
""" """
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
b64_path = arg_dict["b64_path"] b64_path = arg_dict["b64_path"]
b64_file = arg_dict["b64_contents"] b64_file = arg_dict["b64_contents"]
request_id = arg_dict["id"] request_id = arg_dict["id"]
@ -151,7 +156,7 @@ def inject_file(self, arg_dict):
arg_dict["path"] = "data/host/%s" % request_id arg_dict["path"] = "data/host/%s" % request_id
xenstore.write_record(self, arg_dict) xenstore.write_record(self, arg_dict)
try: try:
resp = _wait_for_agent(self, request_id, arg_dict) resp = _wait_for_agent(self, request_id, arg_dict, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
return resp return resp
@ -160,6 +165,7 @@ def inject_file(self, arg_dict):
def agent_update(self, arg_dict): def agent_update(self, arg_dict):
"""Expects an URL and md5sum of the contents, then directs the agent to """Expects an URL and md5sum of the contents, then directs the agent to
update itself.""" update itself."""
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
request_id = arg_dict["id"] request_id = arg_dict["id"]
url = arg_dict["url"] url = arg_dict["url"]
md5sum = arg_dict["md5sum"] md5sum = arg_dict["md5sum"]
@ -168,7 +174,7 @@ def agent_update(self, arg_dict):
arg_dict["path"] = "data/host/%s" % request_id arg_dict["path"] = "data/host/%s" % request_id
xenstore.write_record(self, arg_dict) xenstore.write_record(self, arg_dict)
try: try:
resp = _wait_for_agent(self, request_id, arg_dict) resp = _wait_for_agent(self, request_id, arg_dict, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
return resp return resp
@ -176,6 +182,7 @@ def agent_update(self, arg_dict):
def _get_agent_features(self, arg_dict): def _get_agent_features(self, arg_dict):
"""Return an array of features that an agent supports.""" """Return an array of features that an agent supports."""
timeout = int(arg_dict.pop('timeout', DEFAULT_TIMEOUT))
tmp_id = commands.getoutput("uuidgen") tmp_id = commands.getoutput("uuidgen")
dct = {} dct = {}
dct.update(arg_dict) dct.update(arg_dict)
@ -183,7 +190,7 @@ def _get_agent_features(self, arg_dict):
dct["path"] = "data/host/%s" % tmp_id dct["path"] = "data/host/%s" % tmp_id
xenstore.write_record(self, dct) xenstore.write_record(self, dct)
try: try:
resp = _wait_for_agent(self, tmp_id, dct) resp = _wait_for_agent(self, tmp_id, dct, timeout)
except TimeoutError, e: except TimeoutError, e:
raise PluginError(e) raise PluginError(e)
response = json.loads(resp) response = json.loads(resp)
@ -193,18 +200,17 @@ def _get_agent_features(self, arg_dict):
return {} return {}
def _wait_for_agent(self, request_id, arg_dict): def _wait_for_agent(self, request_id, arg_dict, timeout):
"""Periodically checks xenstore for a response from the agent. """Periodically checks xenstore for a response from the agent.
The request is always written to 'data/host/{id}', and The request is always written to 'data/host/{id}', and
the agent's response for that request will be in 'data/guest/{id}'. the agent's response for that request will be in 'data/guest/{id}'.
If no value appears from the agent within the time specified by If no value appears from the agent within the timeout specified,
AGENT_TIMEOUT, the original request is deleted and a TimeoutError the original request is deleted and a TimeoutError is raised.
is returned.
""" """
arg_dict["path"] = "data/guest/%s" % request_id arg_dict["path"] = "data/guest/%s" % request_id
arg_dict["ignore_missing_path"] = True arg_dict["ignore_missing_path"] = True
start = time.time() start = time.time()
while time.time() - start < AGENT_TIMEOUT: while time.time() - start < timeout:
ret = xenstore.read_record(self, arg_dict) ret = xenstore.read_record(self, arg_dict)
# Note: the response for None with be a string that includes # Note: the response for None with be a string that includes
# double quotes. # double quotes.
@ -219,7 +225,7 @@ def _wait_for_agent(self, request_id, arg_dict):
arg_dict["path"] = "data/host/%s" % request_id arg_dict["path"] = "data/host/%s" % request_id
xenstore.delete_record(self, arg_dict) xenstore.delete_record(self, arg_dict)
raise TimeoutError(_("TIMEOUT: No response from agent within" raise TimeoutError(_("TIMEOUT: No response from agent within"
" %s seconds.") % AGENT_TIMEOUT) " %s seconds.") % timeout)
if __name__ == "__main__": if __name__ == "__main__":