From 3481739598a73ce95edbcedbb8988a62d13dee18 Mon Sep 17 00:00:00 2001 From: rmiao106 Date: Wed, 9 Dec 2020 23:26:58 +0000 Subject: [PATCH] Added debug for Retry Urllib3 Retry class's increment method has been used and we don't understand why. Since it's impossible to add logs directly in Retry class, this patch subclasses Retry and adds logging capabilities to log server response and relevant cause if we hit this bug again Signed-off-by: rmiao106 Change-Id: I2bd13ee635879a343c7a05886b397b3ffda5006a --- vmware_nsxlib/tests/unit/v3/test_cluster.py | 15 ++- vmware_nsxlib/v3/cluster.py | 3 +- vmware_nsxlib/v3/debug_retry.py | 112 ++++++++++++++++++++ 3 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 vmware_nsxlib/v3/debug_retry.py diff --git a/vmware_nsxlib/tests/unit/v3/test_cluster.py b/vmware_nsxlib/tests/unit/v3/test_cluster.py index b3ab25be..e906f008 100644 --- a/vmware_nsxlib/tests/unit/v3/test_cluster.py +++ b/vmware_nsxlib/tests/unit/v3/test_cluster.py @@ -133,13 +133,15 @@ class RequestsHTTPProviderTestCase(unittest.TestCase): mock_get_def_headers.assert_called_once_with( mock.ANY, cluster_provider, False, token_provider_inst) + @mock.patch("vmware_nsxlib.v3.debug_retry.RetryDebug.from_int") @mock.patch("vmware_nsxlib.v3.cluster.NSXHTTPAdapter.__init__") - def test_new_connection_with_ca_file(self, mock_adaptor_init): + def test_new_connection_with_ca_file(self, mock_adaptor_init, mock_retry): mock_api = mock.Mock() mock_api.nsxlib_config = mock.Mock() mock_api.nsxlib_config.retries = 100 mock_api.nsxlib_config.insecure = False mock_adaptor_init.return_value = None + mock_retry.return_value = 100 provider = cluster.NSXRequestsHTTPProvider() with mock.patch.object(cluster.TimeoutSession, 'request', return_value=get_sess_create_resp()): @@ -150,15 +152,19 @@ class RequestsHTTPProviderTestCase(unittest.TestCase): self.assertEqual("ca_file", session.verify) mock_adaptor_init.assert_called_once_with( pool_connections=1, pool_maxsize=1, - max_retries=100, pool_block=False, thumbprint=None) + max_retries=100, pool_block=False, + thumbprint=None) + @mock.patch("vmware_nsxlib.v3.debug_retry.RetryDebug.from_int") @mock.patch("vmware_nsxlib.v3.cluster.NSXHTTPAdapter.__init__") - def test_new_connection_with_thumbprint(self, mock_adaptor_init): + def test_new_connection_with_thumbprint(self, mock_adaptor_init, + mock_retry): mock_api = mock.Mock() mock_api.nsxlib_config = mock.Mock() mock_api.nsxlib_config.retries = 100 mock_api.nsxlib_config.insecure = False mock_adaptor_init.return_value = None + mock_retry.return_value = 100 provider = cluster.NSXRequestsHTTPProvider() with mock.patch.object(cluster.TimeoutSession, 'request', return_value=get_sess_create_resp()): @@ -169,7 +175,8 @@ class RequestsHTTPProviderTestCase(unittest.TestCase): self.assertIsNone(session.verify) mock_adaptor_init.assert_called_once_with( pool_connections=1, pool_maxsize=1, - max_retries=100, pool_block=False, thumbprint="thumbprint") + max_retries=100, pool_block=False, + thumbprint="thumbprint") def test_validate_connection_keep_alive(self): mock_conn = mocks.MockRequestSessionApi() diff --git a/vmware_nsxlib/v3/cluster.py b/vmware_nsxlib/v3/cluster.py index 874351d3..f150d872 100644 --- a/vmware_nsxlib/v3/cluster.py +++ b/vmware_nsxlib/v3/cluster.py @@ -39,6 +39,7 @@ from requests import adapters from vmware_nsxlib._i18n import _ from vmware_nsxlib.v3 import client as nsx_client from vmware_nsxlib.v3 import constants +from vmware_nsxlib.v3.debug_retry import RetryDebug from vmware_nsxlib.v3 import exceptions from vmware_nsxlib.v3 import utils @@ -224,7 +225,7 @@ class NSXRequestsHTTPProvider(AbstractHTTPProvider): # we are pooling with eventlet in the cluster class adapter = NSXHTTPAdapter( pool_connections=1, pool_maxsize=1, - max_retries=config.retries, + max_retries=RetryDebug.from_int(config.retries), pool_block=False, thumbprint=thumbprint) session.mount('http://', adapter) session.mount('https://', adapter) diff --git a/vmware_nsxlib/v3/debug_retry.py b/vmware_nsxlib/v3/debug_retry.py new file mode 100644 index 00000000..f739d120 --- /dev/null +++ b/vmware_nsxlib/v3/debug_retry.py @@ -0,0 +1,112 @@ +# Copyright 2020 VMware, Inc. +# All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +from collections import namedtuple +import logging +import traceback + +import six +from urllib3.exceptions import MaxRetryError +from urllib3.exceptions import ResponseError +from urllib3.util.retry import Retry + +log = logging.getLogger(__name__) + +RequestHistory = namedtuple( + "RequestHistory", ["method", "url", "error", "status", "redirect_location"] +) + + +class RetryDebug(Retry): + """Class that adds debugging capabilities of Retry""" + def __init__(self, *args, **kw): + super(RetryDebug, self).__init__(*args, **kw) + + def increment(self, method=None, url=None, response=None, error=None, + _pool=None, _stacktrace=None, ): + log.debug("Retry Increment %s", traceback.format_stack()) + if self.total is False and error: + # Disabled, indicate to re-raise the error. + raise six.reraise(type(error), error, _stacktrace) + + total = self.total + if total is not None: + total -= 1 + + connect = self.connect + read = self.read + redirect = self.redirect + status_count = self.status + cause = "unknown" + status = None + redirect_location = None + + if error and self._is_connection_error(error): + # Connect retry? + if connect is False: + raise six.reraise(type(error), error, _stacktrace) + elif connect is not None: + connect -= 1 + + elif error and self._is_read_error(error): + # Read retry? + if read is False or not self._is_method_retryable(method): + raise six.reraise(type(error), error, _stacktrace) + elif read is not None: + read -= 1 + + elif response and response.get_redirect_location(): + # Redirect retry? + if redirect is not None: + redirect -= 1 + cause = "too many redirects" + redirect_location = response.get_redirect_location() + status = response.status + + else: + # Incrementing because of a server error like a 500 in + # status_forcelist and a the given method is in the whitelist + cause = ResponseError.GENERIC_ERROR + if response and response.status: + if status_count is not None: + status_count -= 1 + cause = ResponseError.SPECIFIC_ERROR.format( + status_code=response.status) + status = response.status + + history = self.history + ( + RequestHistory(method, url, error, status, redirect_location), + ) + + new_retry = self.new( + total=total, + connect=connect, + read=read, + redirect=redirect, + status=status_count, + history=history, + ) + + if new_retry.is_exhausted(): + raise MaxRetryError(_pool, url, error or ResponseError(cause)) + + # log the cause for this retry + log.debug("Cause for retry: %s", cause) + # log the server response for this retry + log.debug("Response: %s", response) + log.debug("Incremented Retry for (url='%s'): %r", url, new_retry) + + return new_retry