CloudRetry/AWSRetry backoff decorator with unit tests (#17039)
* Added aws_retry decorator function with unit tests * Restructured the code to be used with a base class. This base class CloudRetry can be reused by any other cloud provider. This decorator should be used in situations, where you need to implement a backoff algorithm and want to retry based on the status code from the exception. * updated documentation * fixed tabs * added botocore and boto3 to requirements.txt * removed cloud.py from py24 tests, as it depends on boto3 * fix relative imports * updated test to be 2.6 compat * updated method name from retry to backoff * readded lxd * Updated default backoff from 2 seconds to 1.1s. This will be about a total of 48 seconds in 10 tries. This is configurable.
This commit is contained in:
parent
60706cc12e
commit
b510abce17
7 changed files with 226 additions and 1 deletions
108
lib/ansible/module_utils/cloud.py
Normal file
108
lib/ansible/module_utils/cloud.py
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
#
|
||||||
|
# (c) 2016 Allen Sanabria, <asanabria@linuxdynasty.org>
|
||||||
|
#
|
||||||
|
# This file is part of Ansible
|
||||||
|
#
|
||||||
|
# Ansible is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Ansible is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
"""
|
||||||
|
This module adds shared support for generic cloud modules
|
||||||
|
|
||||||
|
In order to use this module, include it as part of a custom
|
||||||
|
module as shown below.
|
||||||
|
|
||||||
|
from ansible.module_utils.cloud import *
|
||||||
|
|
||||||
|
The 'cloud' module provides the following common classes:
|
||||||
|
|
||||||
|
* CloudRetry
|
||||||
|
- The base class to be used by other cloud providers, in order to
|
||||||
|
provide a backoff/retry decorator based on status codes.
|
||||||
|
|
||||||
|
- Example using the AWSRetry class which inherits from CloudRetry.
|
||||||
|
@AWSRetry.retry(tries=20, delay=2, backoff=2)
|
||||||
|
get_ec2_security_group_ids_from_names()
|
||||||
|
|
||||||
|
"""
|
||||||
|
from functools import wraps
|
||||||
|
import syslog
|
||||||
|
import time
|
||||||
|
|
||||||
|
from ansible.module_utils.pycompat24 import get_exception
|
||||||
|
|
||||||
|
|
||||||
|
class CloudRetry(object):
|
||||||
|
""" CloudRetry can be used by any cloud provider, in order to implement a
|
||||||
|
backoff algorithm/retry effect based on Status Code from Exceptions.
|
||||||
|
"""
|
||||||
|
# This is the base class of the exception.
|
||||||
|
# AWS Example botocore.exceptions.ClientError
|
||||||
|
base_class = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def status_code_from_exception(error):
|
||||||
|
""" Return the status code from the exception object
|
||||||
|
Args:
|
||||||
|
error (object): The exception itself.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def found(response_code):
|
||||||
|
""" Return True if the Response Code to retry on was found.
|
||||||
|
Args:
|
||||||
|
response_code (str): This is the Response Code that is being matched against.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def backoff(cls, tries=10, delay=3, backoff=1.1):
|
||||||
|
""" Retry calling the Cloud decorated function using an exponential backoff.
|
||||||
|
Kwargs:
|
||||||
|
tries (int): Number of times to try (not retry) before giving up
|
||||||
|
default=10
|
||||||
|
delay (int): Initial delay between retries in seconds
|
||||||
|
default=3
|
||||||
|
backoff (int): backoff multiplier e.g. value of 2 will double the delay each retry
|
||||||
|
default=2
|
||||||
|
|
||||||
|
"""
|
||||||
|
def deco(f):
|
||||||
|
@wraps(f)
|
||||||
|
def retry_func(*args, **kwargs):
|
||||||
|
max_tries, max_delay = tries, delay
|
||||||
|
while max_tries > 1:
|
||||||
|
try:
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
except Exception:
|
||||||
|
e = get_exception()
|
||||||
|
if isinstance(e, cls.base_class):
|
||||||
|
response_code = cls.status_code_from_exception(e)
|
||||||
|
if cls.found(response_code):
|
||||||
|
msg = "{0}: Retrying in {1} seconds...".format(str(e), max_delay)
|
||||||
|
syslog.syslog(syslog.LOG_INFO, msg)
|
||||||
|
time.sleep(max_delay)
|
||||||
|
max_tries -= 1
|
||||||
|
max_delay *= backoff
|
||||||
|
else:
|
||||||
|
# Return original exception if exception is not a ClientError
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
# Return original exception if exception is not a ClientError
|
||||||
|
raise e
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
|
return retry_func # true decorator
|
||||||
|
|
||||||
|
return deco
|
|
@ -27,8 +27,11 @@
|
||||||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
|
from ansible.module_utils.cloud import CloudRetry
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import boto
|
import boto
|
||||||
import boto.ec2 #boto does weird import stuff
|
import boto.ec2 #boto does weird import stuff
|
||||||
|
@ -55,6 +58,29 @@ class AnsibleAWSError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AWSRetry(CloudRetry):
|
||||||
|
base_class = botocore.exceptions.ClientError
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def status_code_from_exception(error):
|
||||||
|
return error.response['Error']['Code']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def found(response_code):
|
||||||
|
# This list of failures is based on this API Reference
|
||||||
|
# http://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html
|
||||||
|
retry_on = [
|
||||||
|
'RequestLimitExceeded', 'Unavailable', 'ServiceUnavailable',
|
||||||
|
'InternalFailure', 'InternalError'
|
||||||
|
]
|
||||||
|
|
||||||
|
not_found = re.compile(r'^\w+.NotFound')
|
||||||
|
if response_code in retry_on or not_found.search(response_code):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def boto3_conn(module, conn_type=None, resource=None, region=None, endpoint=None, **params):
|
def boto3_conn(module, conn_type=None, resource=None, region=None, endpoint=None, **params):
|
||||||
try:
|
try:
|
||||||
return _boto3_conn(conn_type=conn_type, resource=resource, region=region, endpoint=endpoint, **params)
|
return _boto3_conn(conn_type=conn_type, resource=resource, region=region, endpoint=endpoint, **params)
|
||||||
|
|
0
test/units/module_utils/ec2/__init__.py
Normal file
0
test/units/module_utils/ec2/__init__.py
Normal file
87
test/units/module_utils/ec2/test_aws.py
Normal file
87
test/units/module_utils/ec2/test_aws.py
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# (c) 2015, Allen Sanabria <asanabria@linuxdynasty.org>
|
||||||
|
#
|
||||||
|
# This file is part of Ansible
|
||||||
|
#
|
||||||
|
# Ansible is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Ansible is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import botocore
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
from ansible.module_utils.ec2 import AWSRetry
|
||||||
|
|
||||||
|
class RetryTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_no_failures(self):
|
||||||
|
self.counter = 0
|
||||||
|
|
||||||
|
@AWSRetry.backoff(tries=2, delay=0.1)
|
||||||
|
def no_failures():
|
||||||
|
self.counter += 1
|
||||||
|
|
||||||
|
r = no_failures()
|
||||||
|
self.assertEqual(self.counter, 1)
|
||||||
|
|
||||||
|
def test_retry_once(self):
|
||||||
|
self.counter = 0
|
||||||
|
err_msg = {'Error': {'Code': 'InstanceId.NotFound'}}
|
||||||
|
|
||||||
|
@AWSRetry.backoff(tries=2, delay=0.1)
|
||||||
|
def retry_once():
|
||||||
|
self.counter += 1
|
||||||
|
if self.counter < 2:
|
||||||
|
raise botocore.exceptions.ClientError(err_msg, 'Could not find you')
|
||||||
|
else:
|
||||||
|
return 'success'
|
||||||
|
|
||||||
|
r = retry_once()
|
||||||
|
self.assertEqual(r, 'success')
|
||||||
|
self.assertEqual(self.counter, 2)
|
||||||
|
|
||||||
|
def test_reached_limit(self):
|
||||||
|
self.counter = 0
|
||||||
|
err_msg = {'Error': {'Code': 'RequestLimitExceeded'}}
|
||||||
|
|
||||||
|
@AWSRetry.backoff(tries=4, delay=0.1)
|
||||||
|
def fail():
|
||||||
|
self.counter += 1
|
||||||
|
raise botocore.exceptions.ClientError(err_msg, 'toooo fast!!')
|
||||||
|
|
||||||
|
#with self.assertRaises(botocore.exceptions.ClientError):
|
||||||
|
try:
|
||||||
|
fail()
|
||||||
|
except Exception as e:
|
||||||
|
self.assertEqual(e.response['Error']['Code'], 'RequestLimitExceeded')
|
||||||
|
self.assertEqual(self.counter, 4)
|
||||||
|
|
||||||
|
def test_unexpected_exception_does_not_retry(self):
|
||||||
|
self.counter = 0
|
||||||
|
err_msg = {'Error': {'Code': 'AuthFailure'}}
|
||||||
|
|
||||||
|
@AWSRetry.backoff(tries=4, delay=0.1)
|
||||||
|
def raise_unexpected_error():
|
||||||
|
self.counter += 1
|
||||||
|
raise botocore.exceptions.ClientError(err_msg, 'unexpected error')
|
||||||
|
|
||||||
|
#with self.assertRaises(botocore.exceptions.ClientError):
|
||||||
|
try:
|
||||||
|
raise_unexpected_error()
|
||||||
|
except Exception as e:
|
||||||
|
self.assertEqual(e.response['Error']['Code'], 'AuthFailure')
|
||||||
|
|
||||||
|
self.assertEqual(self.counter, 1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -12,7 +12,7 @@ if [ "${TOXENV}" = 'py24' ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
python2.4 -V
|
python2.4 -V
|
||||||
python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils
|
python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|cloud|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils
|
||||||
else
|
else
|
||||||
if [ "${install_deps}" != "" ]; then
|
if [ "${install_deps}" != "" ]; then
|
||||||
pip install -r "${source_root}/test/utils/shippable/sanity-requirements.txt" --upgrade
|
pip install -r "${source_root}/test/utils/shippable/sanity-requirements.txt" --upgrade
|
||||||
|
|
|
@ -11,3 +11,5 @@ unittest2
|
||||||
redis
|
redis
|
||||||
python3-memcached
|
python3-memcached
|
||||||
python-systemd
|
python-systemd
|
||||||
|
botocore
|
||||||
|
boto3
|
||||||
|
|
|
@ -12,3 +12,5 @@ redis
|
||||||
python-memcached
|
python-memcached
|
||||||
python-systemd
|
python-systemd
|
||||||
pycrypto
|
pycrypto
|
||||||
|
botocore
|
||||||
|
boto3
|
||||||
|
|
Loading…
Reference in a new issue