From b510abce176e0f9a6ec1cb98e2e1426e13318c45 Mon Sep 17 00:00:00 2001 From: Allen Sanabria Date: Tue, 13 Sep 2016 13:46:59 -0700 Subject: [PATCH] CloudRetry/AWSRetry backoff decorator with unit tests (#17039) * Added aws_retry decorator function with unit tests * Restructured the code to be used with a base class. This base class CloudRetry can be reused by any other cloud provider. This decorator should be used in situations, where you need to implement a backoff algorithm and want to retry based on the status code from the exception. * updated documentation * fixed tabs * added botocore and boto3 to requirements.txt * removed cloud.py from py24 tests, as it depends on boto3 * fix relative imports * updated test to be 2.6 compat * updated method name from retry to backoff * readded lxd * Updated default backoff from 2 seconds to 1.1s. This will be about a total of 48 seconds in 10 tries. This is configurable. --- lib/ansible/module_utils/cloud.py | 108 ++++++++++++++++++++++++ lib/ansible/module_utils/ec2.py | 26 ++++++ test/units/module_utils/ec2/__init__.py | 0 test/units/module_utils/ec2/test_aws.py | 87 +++++++++++++++++++ test/utils/shippable/sanity.sh | 2 +- test/utils/tox/requirements-py3.txt | 2 + test/utils/tox/requirements.txt | 2 + 7 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 lib/ansible/module_utils/cloud.py create mode 100644 test/units/module_utils/ec2/__init__.py create mode 100644 test/units/module_utils/ec2/test_aws.py diff --git a/lib/ansible/module_utils/cloud.py b/lib/ansible/module_utils/cloud.py new file mode 100644 index 00000000000..c0bb7b6cd07 --- /dev/null +++ b/lib/ansible/module_utils/cloud.py @@ -0,0 +1,108 @@ +# +# (c) 2016 Allen Sanabria, +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . +# +""" +This module adds shared support for generic cloud modules + +In order to use this module, include it as part of a custom +module as shown below. + +from ansible.module_utils.cloud import * + +The 'cloud' module provides the following common classes: + + * CloudRetry + - The base class to be used by other cloud providers, in order to + provide a backoff/retry decorator based on status codes. + + - Example using the AWSRetry class which inherits from CloudRetry. + @AWSRetry.retry(tries=20, delay=2, backoff=2) + get_ec2_security_group_ids_from_names() + +""" +from functools import wraps +import syslog +import time + +from ansible.module_utils.pycompat24 import get_exception + + +class CloudRetry(object): + """ CloudRetry can be used by any cloud provider, in order to implement a + backoff algorithm/retry effect based on Status Code from Exceptions. + """ + # This is the base class of the exception. + # AWS Example botocore.exceptions.ClientError + base_class = None + + @staticmethod + def status_code_from_exception(error): + """ Return the status code from the exception object + Args: + error (object): The exception itself. + """ + pass + + @staticmethod + def found(response_code): + """ Return True if the Response Code to retry on was found. + Args: + response_code (str): This is the Response Code that is being matched against. + """ + pass + + @classmethod + def backoff(cls, tries=10, delay=3, backoff=1.1): + """ Retry calling the Cloud decorated function using an exponential backoff. + Kwargs: + tries (int): Number of times to try (not retry) before giving up + default=10 + delay (int): Initial delay between retries in seconds + default=3 + backoff (int): backoff multiplier e.g. value of 2 will double the delay each retry + default=2 + + """ + def deco(f): + @wraps(f) + def retry_func(*args, **kwargs): + max_tries, max_delay = tries, delay + while max_tries > 1: + try: + return f(*args, **kwargs) + except Exception: + e = get_exception() + if isinstance(e, cls.base_class): + response_code = cls.status_code_from_exception(e) + if cls.found(response_code): + msg = "{0}: Retrying in {1} seconds...".format(str(e), max_delay) + syslog.syslog(syslog.LOG_INFO, msg) + time.sleep(max_delay) + max_tries -= 1 + max_delay *= backoff + else: + # Return original exception if exception is not a ClientError + raise e + else: + # Return original exception if exception is not a ClientError + raise e + return f(*args, **kwargs) + + return retry_func # true decorator + + return deco diff --git a/lib/ansible/module_utils/ec2.py b/lib/ansible/module_utils/ec2.py index e6f2d3a252c..4e654c8f222 100644 --- a/lib/ansible/module_utils/ec2.py +++ b/lib/ansible/module_utils/ec2.py @@ -27,8 +27,11 @@ # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os +import re from time import sleep +from ansible.module_utils.cloud import CloudRetry + try: import boto import boto.ec2 #boto does weird import stuff @@ -55,6 +58,29 @@ class AnsibleAWSError(Exception): pass +class AWSRetry(CloudRetry): + base_class = botocore.exceptions.ClientError + + @staticmethod + def status_code_from_exception(error): + return error.response['Error']['Code'] + + @staticmethod + def found(response_code): + # This list of failures is based on this API Reference + # http://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html + retry_on = [ + 'RequestLimitExceeded', 'Unavailable', 'ServiceUnavailable', + 'InternalFailure', 'InternalError' + ] + + not_found = re.compile(r'^\w+.NotFound') + if response_code in retry_on or not_found.search(response_code): + return True + else: + return False + + def boto3_conn(module, conn_type=None, resource=None, region=None, endpoint=None, **params): try: return _boto3_conn(conn_type=conn_type, resource=resource, region=region, endpoint=endpoint, **params) diff --git a/test/units/module_utils/ec2/__init__.py b/test/units/module_utils/ec2/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/test/units/module_utils/ec2/test_aws.py b/test/units/module_utils/ec2/test_aws.py new file mode 100644 index 00000000000..afb86b66c44 --- /dev/null +++ b/test/units/module_utils/ec2/test_aws.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# (c) 2015, Allen Sanabria +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +import unittest +import botocore +import boto3 + +from ansible.module_utils.ec2 import AWSRetry + +class RetryTestCase(unittest.TestCase): + + def test_no_failures(self): + self.counter = 0 + + @AWSRetry.backoff(tries=2, delay=0.1) + def no_failures(): + self.counter += 1 + + r = no_failures() + self.assertEqual(self.counter, 1) + + def test_retry_once(self): + self.counter = 0 + err_msg = {'Error': {'Code': 'InstanceId.NotFound'}} + + @AWSRetry.backoff(tries=2, delay=0.1) + def retry_once(): + self.counter += 1 + if self.counter < 2: + raise botocore.exceptions.ClientError(err_msg, 'Could not find you') + else: + return 'success' + + r = retry_once() + self.assertEqual(r, 'success') + self.assertEqual(self.counter, 2) + + def test_reached_limit(self): + self.counter = 0 + err_msg = {'Error': {'Code': 'RequestLimitExceeded'}} + + @AWSRetry.backoff(tries=4, delay=0.1) + def fail(): + self.counter += 1 + raise botocore.exceptions.ClientError(err_msg, 'toooo fast!!') + + #with self.assertRaises(botocore.exceptions.ClientError): + try: + fail() + except Exception as e: + self.assertEqual(e.response['Error']['Code'], 'RequestLimitExceeded') + self.assertEqual(self.counter, 4) + + def test_unexpected_exception_does_not_retry(self): + self.counter = 0 + err_msg = {'Error': {'Code': 'AuthFailure'}} + + @AWSRetry.backoff(tries=4, delay=0.1) + def raise_unexpected_error(): + self.counter += 1 + raise botocore.exceptions.ClientError(err_msg, 'unexpected error') + + #with self.assertRaises(botocore.exceptions.ClientError): + try: + raise_unexpected_error() + except Exception as e: + self.assertEqual(e.response['Error']['Code'], 'AuthFailure') + + self.assertEqual(self.counter, 1) + +if __name__ == '__main__': + unittest.main() diff --git a/test/utils/shippable/sanity.sh b/test/utils/shippable/sanity.sh index 335418d6c32..dfa641c92f4 100755 --- a/test/utils/shippable/sanity.sh +++ b/test/utils/shippable/sanity.sh @@ -12,7 +12,7 @@ if [ "${TOXENV}" = 'py24' ]; then fi python2.4 -V - python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils + python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|cloud|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils else if [ "${install_deps}" != "" ]; then pip install -r "${source_root}/test/utils/shippable/sanity-requirements.txt" --upgrade diff --git a/test/utils/tox/requirements-py3.txt b/test/utils/tox/requirements-py3.txt index 1ff4fb0cb26..dd5fbe901e5 100644 --- a/test/utils/tox/requirements-py3.txt +++ b/test/utils/tox/requirements-py3.txt @@ -11,3 +11,5 @@ unittest2 redis python3-memcached python-systemd +botocore +boto3 diff --git a/test/utils/tox/requirements.txt b/test/utils/tox/requirements.txt index e4f4f03e3c9..ffed65293d5 100644 --- a/test/utils/tox/requirements.txt +++ b/test/utils/tox/requirements.txt @@ -12,3 +12,5 @@ redis python-memcached python-systemd pycrypto +botocore +boto3