Add exponential backoff retries to ec2_elb_lb (#3379)
ec2_elb_lb doesn't react well to AWS API throttling errors. This implements an exponential backoff operation around some of the AWS API calls (with random jitter, in line with AWS recommendations) to make this more resilient.
This commit is contained in:
parent
d4d86b4bc6
commit
1db444cdc2
1 changed files with 38 additions and 0 deletions
|
@ -358,6 +358,29 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
HAS_BOTO = False
|
HAS_BOTO = False
|
||||||
|
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
|
def _throttleable_operation(max_retries):
|
||||||
|
def _operation_wrapper(op):
|
||||||
|
def _do_op(*args, **kwargs):
|
||||||
|
retry = 0
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
return op(*args, **kwargs)
|
||||||
|
except boto.exception.BotoServerError, e:
|
||||||
|
if retry < max_retries and e.code in \
|
||||||
|
("Throttling", "RequestLimitExceeded"):
|
||||||
|
retry = retry + 1
|
||||||
|
time.sleep(min(random.random() * (2 ** retry), 300))
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
return _do_op
|
||||||
|
return _operation_wrapper
|
||||||
|
|
||||||
|
|
||||||
|
_THROTTLING_RETRIES = 5
|
||||||
|
|
||||||
class ElbManager(object):
|
class ElbManager(object):
|
||||||
"""Handles ELB creation and destruction"""
|
"""Handles ELB creation and destruction"""
|
||||||
|
@ -401,6 +424,7 @@ class ElbManager(object):
|
||||||
self.elb = self._get_elb()
|
self.elb = self._get_elb()
|
||||||
self.ec2_conn = self._get_ec2_connection()
|
self.ec2_conn = self._get_ec2_connection()
|
||||||
|
|
||||||
|
@_throttleable_operation(_THROTTLING_RETRIES)
|
||||||
def ensure_ok(self):
|
def ensure_ok(self):
|
||||||
"""Create the ELB"""
|
"""Create the ELB"""
|
||||||
if not self.elb:
|
if not self.elb:
|
||||||
|
@ -544,6 +568,7 @@ class ElbManager(object):
|
||||||
|
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
@_throttleable_operation(_THROTTLING_RETRIES)
|
||||||
def _wait_for_elb_removed(self):
|
def _wait_for_elb_removed(self):
|
||||||
polling_increment_secs = 15
|
polling_increment_secs = 15
|
||||||
max_retries = (self.wait_timeout / polling_increment_secs)
|
max_retries = (self.wait_timeout / polling_increment_secs)
|
||||||
|
@ -561,6 +586,7 @@ class ElbManager(object):
|
||||||
|
|
||||||
return status_achieved
|
return status_achieved
|
||||||
|
|
||||||
|
@_throttleable_operation(_THROTTLING_RETRIES)
|
||||||
def _wait_for_elb_interface_removed(self):
|
def _wait_for_elb_interface_removed(self):
|
||||||
polling_increment_secs = 15
|
polling_increment_secs = 15
|
||||||
max_retries = (self.wait_timeout / polling_increment_secs)
|
max_retries = (self.wait_timeout / polling_increment_secs)
|
||||||
|
@ -588,6 +614,7 @@ class ElbManager(object):
|
||||||
|
|
||||||
return status_achieved
|
return status_achieved
|
||||||
|
|
||||||
|
@_throttleable_operation(_THROTTLING_RETRIES)
|
||||||
def _get_elb(self):
|
def _get_elb(self):
|
||||||
elbs = self.elb_conn.get_all_load_balancers()
|
elbs = self.elb_conn.get_all_load_balancers()
|
||||||
for elb in elbs:
|
for elb in elbs:
|
||||||
|
@ -609,6 +636,7 @@ class ElbManager(object):
|
||||||
except (boto.exception.NoAuthHandlerFound, StandardError), e:
|
except (boto.exception.NoAuthHandlerFound, StandardError), e:
|
||||||
self.module.fail_json(msg=str(e))
|
self.module.fail_json(msg=str(e))
|
||||||
|
|
||||||
|
@_throttleable_operation(_THROTTLING_RETRIES)
|
||||||
def _delete_elb(self):
|
def _delete_elb(self):
|
||||||
# True if succeeds, exception raised if not
|
# True if succeeds, exception raised if not
|
||||||
result = self.elb_conn.delete_load_balancer(name=self.name)
|
result = self.elb_conn.delete_load_balancer(name=self.name)
|
||||||
|
@ -625,6 +653,16 @@ class ElbManager(object):
|
||||||
subnets=self.subnets,
|
subnets=self.subnets,
|
||||||
scheme=self.scheme)
|
scheme=self.scheme)
|
||||||
if self.elb:
|
if self.elb:
|
||||||
|
# HACK: Work around a boto bug in which the listeners attribute is
|
||||||
|
# always set to the listeners argument to create_load_balancer, and
|
||||||
|
# not the complex_listeners
|
||||||
|
# We're not doing a self.elb = self._get_elb here because there
|
||||||
|
# might be eventual consistency issues and it doesn't necessarily
|
||||||
|
# make sense to wait until the ELB gets returned from the EC2 API.
|
||||||
|
# This is necessary in the event we hit the throttling errors and
|
||||||
|
# need to retry ensure_ok
|
||||||
|
# See https://github.com/boto/boto/issues/3526
|
||||||
|
self.elb.listeners = self.listeners
|
||||||
self.changed = True
|
self.changed = True
|
||||||
self.status = 'created'
|
self.status = 'created'
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue