feature pull request: catch and retry recoverable errors

boto can throw SSLError when timeouts occur (among other SSL errors). Catch these so proper JSON can be returned, and also add the ability to retry the operation.

There's an open issue in boto for this: https://github.com/boto/boto/issues/2409

Here's a sample stacktrace that inspired me to work on this. I'm on 1.7, but there's no meaningful differences in the 1.8 release that would affect this. I've added line breaks to the trace for readability.

    failed to parse: Traceback (most recent call last):
      File "/home/ubuntu/.ansible/tmp/ansible-tmp-1419895753.17-160808281985012/s3", line 2031, in <module> main()
      File "/home/ubuntu/.ansible/tmp/ansible-tmp-1419895753.17-160808281985012/s3", line 353, in main download_s3file(module, s3, bucket, obj, dest)
      File "/home/ubuntu/.ansible/tmp/ansible-tmp-1419895753.17-160808281985012/s3", line 234, in download_s3file key.get_contents_to_filename(dest)
      File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1665, in get_contents_to_filename response_headers=response_headers)
      File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1603, in get_contents_to_file response_headers=response_headers)
      File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1435, in get_file query_args=None)
      File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1488, in _get_file_internal for bytes in self:
      File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 368, in next data = self.resp.read(self.BufferSize)
      File "/usr/local/lib/python2.7/dist-packages/boto/connection.py", line 416, in read return httplib.HTTPResponse.read(self, amt)
      File "/usr/lib/python2.7/httplib.py", line 567, in read s = self.fp.read(amt)
      File "/usr/lib/python2.7/socket.py", line 380, in read data = self._sock.recv(left)
      File "/usr/lib/python2.7/ssl.py", line 341, in recv return self.read(buflen)
      File "/usr/lib/python2.7/ssl.py", line 260, in read return self._sslobj.read(len) ssl.SSLError: The read operation timed out
This commit is contained in:
tedder 2014-12-29 16:38:08 -08:00
parent 19b328c4df
commit c5fe40661d

View file

@ -95,6 +95,13 @@ options:
required: false required: false
default: null default: null
version_added: "1.8" version_added: "1.8"
retries:
description:
- On recoverable failure, how many times to retry before actually failing.
required: false
default: 0
version_added: "1.9"
requirements: [ "boto" ] requirements: [ "boto" ]
author: Lester Wade, Ralph Tice author: Lester Wade, Ralph Tice
@ -133,6 +140,7 @@ import sys
import os import os
import urlparse import urlparse
import hashlib import hashlib
from ssl import SSLError
try: try:
import boto import boto
@ -237,14 +245,23 @@ def upload_s3file(module, s3, bucket, obj, src, expiry, metadata):
except s3.provider.storage_copy_error, e: except s3.provider.storage_copy_error, e:
module.fail_json(msg= str(e)) module.fail_json(msg= str(e))
def download_s3file(module, s3, bucket, obj, dest): def download_s3file(module, s3, bucket, obj, dest, retries):
try: # retries is the number of loops; range/xrange needs to be one
bucket = s3.lookup(bucket) # more to get that count of loops.
key = bucket.lookup(obj) bucket = s3.lookup(bucket)
key.get_contents_to_filename(dest) key = bucket.lookup(obj)
module.exit_json(msg="GET operation complete", changed=True) for x in xrange(0, retries + 1):
except s3.provider.storage_copy_error, e: try:
module.fail_json(msg= str(e)) key.get_contents_to_filename(dest)
module.exit_json(msg="GET operation complete", changed=True)
except s3.provider.storage_copy_error, e:
module.fail_json(msg= str(e))
except SSLError as e:
# actually fail on last pass through the loop.
if x == retries:
module.fail_json(msg="s3 download failed; %s" % e)
# otherwise, try again, this may be a transient timeout.
pass
def download_s3str(module, s3, bucket, obj): def download_s3str(module, s3, bucket, obj):
try: try:
@ -292,7 +309,8 @@ def main():
expiry = dict(default=600, aliases=['expiration']), expiry = dict(default=600, aliases=['expiration']),
s3_url = dict(aliases=['S3_URL']), s3_url = dict(aliases=['S3_URL']),
overwrite = dict(aliases=['force'], default=True, type='bool'), overwrite = dict(aliases=['force'], default=True, type='bool'),
metadata = dict(type='dict'), metadata = dict(type='dict'),
retries = dict(aliases=['retry'], type='str', default=0),
), ),
) )
module = AnsibleModule(argument_spec=argument_spec) module = AnsibleModule(argument_spec=argument_spec)
@ -307,6 +325,7 @@ def main():
s3_url = module.params.get('s3_url') s3_url = module.params.get('s3_url')
overwrite = module.params.get('overwrite') overwrite = module.params.get('overwrite')
metadata = module.params.get('metadata') metadata = module.params.get('metadata')
retries = int(module.params.get('retries'))
ec2_url, aws_access_key, aws_secret_key, region = get_ec2_creds(module) ec2_url, aws_access_key, aws_secret_key, region = get_ec2_creds(module)
@ -368,7 +387,7 @@ def main():
# If the destination path doesn't exist, no need to md5um etag check, so just download. # If the destination path doesn't exist, no need to md5um etag check, so just download.
pathrtn = path_check(dest) pathrtn = path_check(dest)
if pathrtn is False: if pathrtn is False:
download_s3file(module, s3, bucket, obj, dest) download_s3file(module, s3, bucket, obj, dest, retries)
# Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists. # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
if pathrtn is True: if pathrtn is True:
@ -377,13 +396,13 @@ def main():
if md5_local == md5_remote: if md5_local == md5_remote:
sum_matches = True sum_matches = True
if overwrite is True: if overwrite is True:
download_s3file(module, s3, bucket, obj, dest) download_s3file(module, s3, bucket, obj, dest, retries)
else: else:
module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False) module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False)
else: else:
sum_matches = False sum_matches = False
if overwrite is True: if overwrite is True:
download_s3file(module, s3, bucket, obj, dest) download_s3file(module, s3, bucket, obj, dest, retries)
else: else:
module.fail_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.", failed=True) module.fail_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.", failed=True)
@ -393,7 +412,7 @@ def main():
# At this point explicitly define the overwrite condition. # At this point explicitly define the overwrite condition.
if sum_matches is True and pathrtn is True and overwrite is True: if sum_matches is True and pathrtn is True and overwrite is True:
download_s3file(module, s3, bucket, obj, dest) download_s3file(module, s3, bucket, obj, dest, retries)
# If sum does not match but the destination exists, we # If sum does not match but the destination exists, we