CloudRetry/AWSRetry backoff decorator with unit tests (#17039)
* Added aws_retry decorator function with unit tests * Restructured the code to be used with a base class. This base class CloudRetry can be reused by any other cloud provider. This decorator should be used in situations, where you need to implement a backoff algorithm and want to retry based on the status code from the exception. * updated documentation * fixed tabs * added botocore and boto3 to requirements.txt * removed cloud.py from py24 tests, as it depends on boto3 * fix relative imports * updated test to be 2.6 compat * updated method name from retry to backoff * readded lxd * Updated default backoff from 2 seconds to 1.1s. This will be about a total of 48 seconds in 10 tries. This is configurable.
This commit is contained in:
parent
60706cc12e
commit
b510abce17
7 changed files with 226 additions and 1 deletions
108
lib/ansible/module_utils/cloud.py
Normal file
108
lib/ansible/module_utils/cloud.py
Normal file
|
@ -0,0 +1,108 @@
|
|||
#
|
||||
# (c) 2016 Allen Sanabria, <asanabria@linuxdynasty.org>
|
||||
#
|
||||
# This file is part of Ansible
|
||||
#
|
||||
# Ansible is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Ansible is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
"""
|
||||
This module adds shared support for generic cloud modules
|
||||
|
||||
In order to use this module, include it as part of a custom
|
||||
module as shown below.
|
||||
|
||||
from ansible.module_utils.cloud import *
|
||||
|
||||
The 'cloud' module provides the following common classes:
|
||||
|
||||
* CloudRetry
|
||||
- The base class to be used by other cloud providers, in order to
|
||||
provide a backoff/retry decorator based on status codes.
|
||||
|
||||
- Example using the AWSRetry class which inherits from CloudRetry.
|
||||
@AWSRetry.retry(tries=20, delay=2, backoff=2)
|
||||
get_ec2_security_group_ids_from_names()
|
||||
|
||||
"""
|
||||
from functools import wraps
|
||||
import syslog
|
||||
import time
|
||||
|
||||
from ansible.module_utils.pycompat24 import get_exception
|
||||
|
||||
|
||||
class CloudRetry(object):
|
||||
""" CloudRetry can be used by any cloud provider, in order to implement a
|
||||
backoff algorithm/retry effect based on Status Code from Exceptions.
|
||||
"""
|
||||
# This is the base class of the exception.
|
||||
# AWS Example botocore.exceptions.ClientError
|
||||
base_class = None
|
||||
|
||||
@staticmethod
|
||||
def status_code_from_exception(error):
|
||||
""" Return the status code from the exception object
|
||||
Args:
|
||||
error (object): The exception itself.
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def found(response_code):
|
||||
""" Return True if the Response Code to retry on was found.
|
||||
Args:
|
||||
response_code (str): This is the Response Code that is being matched against.
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def backoff(cls, tries=10, delay=3, backoff=1.1):
|
||||
""" Retry calling the Cloud decorated function using an exponential backoff.
|
||||
Kwargs:
|
||||
tries (int): Number of times to try (not retry) before giving up
|
||||
default=10
|
||||
delay (int): Initial delay between retries in seconds
|
||||
default=3
|
||||
backoff (int): backoff multiplier e.g. value of 2 will double the delay each retry
|
||||
default=2
|
||||
|
||||
"""
|
||||
def deco(f):
|
||||
@wraps(f)
|
||||
def retry_func(*args, **kwargs):
|
||||
max_tries, max_delay = tries, delay
|
||||
while max_tries > 1:
|
||||
try:
|
||||
return f(*args, **kwargs)
|
||||
except Exception:
|
||||
e = get_exception()
|
||||
if isinstance(e, cls.base_class):
|
||||
response_code = cls.status_code_from_exception(e)
|
||||
if cls.found(response_code):
|
||||
msg = "{0}: Retrying in {1} seconds...".format(str(e), max_delay)
|
||||
syslog.syslog(syslog.LOG_INFO, msg)
|
||||
time.sleep(max_delay)
|
||||
max_tries -= 1
|
||||
max_delay *= backoff
|
||||
else:
|
||||
# Return original exception if exception is not a ClientError
|
||||
raise e
|
||||
else:
|
||||
# Return original exception if exception is not a ClientError
|
||||
raise e
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return retry_func # true decorator
|
||||
|
||||
return deco
|
|
@ -27,8 +27,11 @@
|
|||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import os
|
||||
import re
|
||||
from time import sleep
|
||||
|
||||
from ansible.module_utils.cloud import CloudRetry
|
||||
|
||||
try:
|
||||
import boto
|
||||
import boto.ec2 #boto does weird import stuff
|
||||
|
@ -55,6 +58,29 @@ class AnsibleAWSError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class AWSRetry(CloudRetry):
|
||||
base_class = botocore.exceptions.ClientError
|
||||
|
||||
@staticmethod
|
||||
def status_code_from_exception(error):
|
||||
return error.response['Error']['Code']
|
||||
|
||||
@staticmethod
|
||||
def found(response_code):
|
||||
# This list of failures is based on this API Reference
|
||||
# http://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html
|
||||
retry_on = [
|
||||
'RequestLimitExceeded', 'Unavailable', 'ServiceUnavailable',
|
||||
'InternalFailure', 'InternalError'
|
||||
]
|
||||
|
||||
not_found = re.compile(r'^\w+.NotFound')
|
||||
if response_code in retry_on or not_found.search(response_code):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def boto3_conn(module, conn_type=None, resource=None, region=None, endpoint=None, **params):
|
||||
try:
|
||||
return _boto3_conn(conn_type=conn_type, resource=resource, region=region, endpoint=endpoint, **params)
|
||||
|
|
0
test/units/module_utils/ec2/__init__.py
Normal file
0
test/units/module_utils/ec2/__init__.py
Normal file
87
test/units/module_utils/ec2/test_aws.py
Normal file
87
test/units/module_utils/ec2/test_aws.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# (c) 2015, Allen Sanabria <asanabria@linuxdynasty.org>
|
||||
#
|
||||
# This file is part of Ansible
|
||||
#
|
||||
# Ansible is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Ansible is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import unittest
|
||||
import botocore
|
||||
import boto3
|
||||
|
||||
from ansible.module_utils.ec2 import AWSRetry
|
||||
|
||||
class RetryTestCase(unittest.TestCase):
|
||||
|
||||
def test_no_failures(self):
|
||||
self.counter = 0
|
||||
|
||||
@AWSRetry.backoff(tries=2, delay=0.1)
|
||||
def no_failures():
|
||||
self.counter += 1
|
||||
|
||||
r = no_failures()
|
||||
self.assertEqual(self.counter, 1)
|
||||
|
||||
def test_retry_once(self):
|
||||
self.counter = 0
|
||||
err_msg = {'Error': {'Code': 'InstanceId.NotFound'}}
|
||||
|
||||
@AWSRetry.backoff(tries=2, delay=0.1)
|
||||
def retry_once():
|
||||
self.counter += 1
|
||||
if self.counter < 2:
|
||||
raise botocore.exceptions.ClientError(err_msg, 'Could not find you')
|
||||
else:
|
||||
return 'success'
|
||||
|
||||
r = retry_once()
|
||||
self.assertEqual(r, 'success')
|
||||
self.assertEqual(self.counter, 2)
|
||||
|
||||
def test_reached_limit(self):
|
||||
self.counter = 0
|
||||
err_msg = {'Error': {'Code': 'RequestLimitExceeded'}}
|
||||
|
||||
@AWSRetry.backoff(tries=4, delay=0.1)
|
||||
def fail():
|
||||
self.counter += 1
|
||||
raise botocore.exceptions.ClientError(err_msg, 'toooo fast!!')
|
||||
|
||||
#with self.assertRaises(botocore.exceptions.ClientError):
|
||||
try:
|
||||
fail()
|
||||
except Exception as e:
|
||||
self.assertEqual(e.response['Error']['Code'], 'RequestLimitExceeded')
|
||||
self.assertEqual(self.counter, 4)
|
||||
|
||||
def test_unexpected_exception_does_not_retry(self):
|
||||
self.counter = 0
|
||||
err_msg = {'Error': {'Code': 'AuthFailure'}}
|
||||
|
||||
@AWSRetry.backoff(tries=4, delay=0.1)
|
||||
def raise_unexpected_error():
|
||||
self.counter += 1
|
||||
raise botocore.exceptions.ClientError(err_msg, 'unexpected error')
|
||||
|
||||
#with self.assertRaises(botocore.exceptions.ClientError):
|
||||
try:
|
||||
raise_unexpected_error()
|
||||
except Exception as e:
|
||||
self.assertEqual(e.response['Error']['Code'], 'AuthFailure')
|
||||
|
||||
self.assertEqual(self.counter, 1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -12,7 +12,7 @@ if [ "${TOXENV}" = 'py24' ]; then
|
|||
fi
|
||||
|
||||
python2.4 -V
|
||||
python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils
|
||||
python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|cloud|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils
|
||||
else
|
||||
if [ "${install_deps}" != "" ]; then
|
||||
pip install -r "${source_root}/test/utils/shippable/sanity-requirements.txt" --upgrade
|
||||
|
|
|
@ -11,3 +11,5 @@ unittest2
|
|||
redis
|
||||
python3-memcached
|
||||
python-systemd
|
||||
botocore
|
||||
boto3
|
||||
|
|
|
@ -12,3 +12,5 @@ redis
|
|||
python-memcached
|
||||
python-systemd
|
||||
pycrypto
|
||||
botocore
|
||||
boto3
|
||||
|
|
Loading…
Reference in a new issue