Improve wait-for-realization

- Use tenacity retry methos with random wait
- Add config for sleep time between retries

Change-Id: Iec470d5019b554435d7797a4e94ae73ef6179aa4
This commit is contained in:
Adit Sarfaty 2019-03-28 11:32:39 +02:00
parent a754908731
commit 6616254449
3 changed files with 28 additions and 25 deletions

View File

@ -84,7 +84,8 @@ class NsxLibConfig(object):
not supported by the policy manager api. not supported by the policy manager api.
:param realization_max_attempts: Maximum number of times to retry while :param realization_max_attempts: Maximum number of times to retry while
waiting for a resource to be realized. waiting for a resource to be realized.
. :param realization_wait_sec: Number of seconds to wait between attempts
for a resource to be realized.
""" """
def __init__(self, def __init__(self,
@ -111,8 +112,8 @@ class NsxLibConfig(object):
rate_limit_retry=True, rate_limit_retry=True,
cluster_unavailable_retry=False, cluster_unavailable_retry=False,
allow_passthrough=False, allow_passthrough=False,
# TODO(asarfaty): reduce the default once plugin is stable realization_max_attempts=50,
realization_max_attempts=50): realization_wait_sec=1.0):
self.nsx_api_managers = nsx_api_managers self.nsx_api_managers = nsx_api_managers
self._username = username self._username = username
@ -137,6 +138,7 @@ class NsxLibConfig(object):
self.cluster_unavailable_retry = cluster_unavailable_retry self.cluster_unavailable_retry = cluster_unavailable_retry
self.allow_passthrough = allow_passthrough self.allow_passthrough = allow_passthrough
self.realization_max_attempts = realization_max_attempts self.realization_max_attempts = realization_max_attempts
self.realization_wait_sec = realization_wait_sec
if dhcp_profile_uuid: if dhcp_profile_uuid:
# this is deprecated, and never used. # this is deprecated, and never used.

View File

@ -215,33 +215,29 @@ class NsxPolicyResourceBase(object):
Return the realization info, or raise an error Return the realization info, or raise an error
""" """
if sleep is None: if sleep is None:
sleep = 0.5 sleep = self.nsxlib_config.realization_wait_sec
if max_attempts is None: if max_attempts is None:
max_attempts = self.nsxlib_config.realization_max_attempts max_attempts = self.nsxlib_config.realization_max_attempts
test_num = 0 @utils.retry_upon_none_result(max_attempts, delay=sleep, random=True)
while test_num < max_attempts: def get_info():
info = self._get_realization_info( info = self._get_realization_info(
resource_def, entity_type=entity_type) resource_def, entity_type=entity_type)
if info and info['state'] == constants.STATE_REALIZED: if info and info['state'] == constants.STATE_REALIZED:
# TODO(asarfaty): why sometimes realization takes so long?
if test_num > 5:
LOG.warning("Waited %(time)s seconds for realization of "
"%(type)s %(id)s",
{'time': test_num * sleep,
'type': resource_def.resource_type(),
'id': resource_def.get_id()})
return info return info
eventlet.sleep(sleep)
test_num += 1
err_msg = (_("%(type)s ID %(id)s was not realized after %(attempts)s " try:
"attempts with %(sleep)s seconds sleep") % return get_info()
{'type': resource_def.resource_type(), except Exception:
'id': resource_def.get_id(), # max retries reached
'attempts': max_attempts, err_msg = (_("%(type)s ID %(id)s was not realized after "
'sleep': sleep}) "%(attempts)s attempts with %(sleep)s seconds "
raise exceptions.ManagerError(details=err_msg) "sleep") %
{'type': resource_def.resource_type(),
'id': resource_def.get_id(),
'attempts': max_attempts,
'sleep': sleep})
raise exceptions.ManagerError(details=err_msg)
def _get_extended_attr_from_realized_info(self, realization_info, def _get_extended_attr_from_realized_info(self, realization_info,
requested_attr): requested_attr):

View File

@ -184,11 +184,16 @@ def retry_random_upon_exception(exc, delay=0.5, max_delay=5,
before=_log_before_retry, after=_log_after_retry) before=_log_before_retry, after=_log_after_retry)
def retry_upon_none_result(max_attempts, delay=0.5, max_delay=2): def retry_upon_none_result(max_attempts, delay=0.5, max_delay=2, random=False):
if random:
wait_func = tenacity.wait_exponential(
multiplier=delay, max=max_delay)
else:
wait_func = tenacity.wait_random_exponential(
multiplier=delay, max=max_delay)
return tenacity.retry(reraise=True, return tenacity.retry(reraise=True,
retry=tenacity.retry_if_result(lambda x: x is None), retry=tenacity.retry_if_result(lambda x: x is None),
wait=tenacity.wait_exponential( wait=wait_func,
multiplier=delay, max=max_delay),
stop=tenacity.stop_after_attempt(max_attempts), stop=tenacity.stop_after_attempt(max_attempts),
before=_log_before_retry, after=_log_after_retry) before=_log_before_retry, after=_log_after_retry)