Add exponential backoff retries to ec2_elb_lb (#3379)

ec2_elb_lb doesn't react well to AWS API throttling errors. This
implements an exponential backoff operation around some of the AWS API
calls (with random jitter, in line with AWS recommendations) to make
this more resilient.
This commit is contained in:
Joel Thompson 2016-04-16 15:43:03 -04:00 committed by Matt Clay
parent d4d86b4bc6
commit 1db444cdc2

View file

@ -358,6 +358,29 @@ try:
except ImportError: except ImportError:
HAS_BOTO = False HAS_BOTO = False
import time
import random
def _throttleable_operation(max_retries):
def _operation_wrapper(op):
def _do_op(*args, **kwargs):
retry = 0
while True:
try:
return op(*args, **kwargs)
except boto.exception.BotoServerError, e:
if retry < max_retries and e.code in \
("Throttling", "RequestLimitExceeded"):
retry = retry + 1
time.sleep(min(random.random() * (2 ** retry), 300))
continue
else:
raise
return _do_op
return _operation_wrapper
_THROTTLING_RETRIES = 5
class ElbManager(object): class ElbManager(object):
"""Handles ELB creation and destruction""" """Handles ELB creation and destruction"""
@ -401,6 +424,7 @@ class ElbManager(object):
self.elb = self._get_elb() self.elb = self._get_elb()
self.ec2_conn = self._get_ec2_connection() self.ec2_conn = self._get_ec2_connection()
@_throttleable_operation(_THROTTLING_RETRIES)
def ensure_ok(self): def ensure_ok(self):
"""Create the ELB""" """Create the ELB"""
if not self.elb: if not self.elb:
@ -544,6 +568,7 @@ class ElbManager(object):
return info return info
@_throttleable_operation(_THROTTLING_RETRIES)
def _wait_for_elb_removed(self): def _wait_for_elb_removed(self):
polling_increment_secs = 15 polling_increment_secs = 15
max_retries = (self.wait_timeout / polling_increment_secs) max_retries = (self.wait_timeout / polling_increment_secs)
@ -561,6 +586,7 @@ class ElbManager(object):
return status_achieved return status_achieved
@_throttleable_operation(_THROTTLING_RETRIES)
def _wait_for_elb_interface_removed(self): def _wait_for_elb_interface_removed(self):
polling_increment_secs = 15 polling_increment_secs = 15
max_retries = (self.wait_timeout / polling_increment_secs) max_retries = (self.wait_timeout / polling_increment_secs)
@ -588,6 +614,7 @@ class ElbManager(object):
return status_achieved return status_achieved
@_throttleable_operation(_THROTTLING_RETRIES)
def _get_elb(self): def _get_elb(self):
elbs = self.elb_conn.get_all_load_balancers() elbs = self.elb_conn.get_all_load_balancers()
for elb in elbs: for elb in elbs:
@ -609,6 +636,7 @@ class ElbManager(object):
except (boto.exception.NoAuthHandlerFound, StandardError), e: except (boto.exception.NoAuthHandlerFound, StandardError), e:
self.module.fail_json(msg=str(e)) self.module.fail_json(msg=str(e))
@_throttleable_operation(_THROTTLING_RETRIES)
def _delete_elb(self): def _delete_elb(self):
# True if succeeds, exception raised if not # True if succeeds, exception raised if not
result = self.elb_conn.delete_load_balancer(name=self.name) result = self.elb_conn.delete_load_balancer(name=self.name)
@ -625,6 +653,16 @@ class ElbManager(object):
subnets=self.subnets, subnets=self.subnets,
scheme=self.scheme) scheme=self.scheme)
if self.elb: if self.elb:
# HACK: Work around a boto bug in which the listeners attribute is
# always set to the listeners argument to create_load_balancer, and
# not the complex_listeners
# We're not doing a self.elb = self._get_elb here because there
# might be eventual consistency issues and it doesn't necessarily
# make sense to wait until the ELB gets returned from the EC2 API.
# This is necessary in the event we hit the throttling errors and
# need to retry ensure_ok
# See https://github.com/boto/boto/issues/3526
self.elb.listeners = self.listeners
self.changed = True self.changed = True
self.status = 'created' self.status = 'created'