From ed5ea7e921c0a8a776b2cf42ef86b3e871d643f8 Mon Sep 17 00:00:00 2001
From: "Ching Yi, Chan" <chingyichan.tw@gmail.com>
Date: Thu, 4 Jun 2015 14:28:57 +0800
Subject: [PATCH 1/2] Prevent memory-error from a large file

---
 cloud/amazon/s3.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/cloud/amazon/s3.py b/cloud/amazon/s3.py
index 6f8e447397d..545955e90cd 100644
--- a/cloud/amazon/s3.py
+++ b/cloud/amazon/s3.py
@@ -296,6 +296,15 @@ def is_walrus(s3_url):
     else:
         return False
 
+def get_md5_digest(local_file):
+    md5 = hashlib.md5()
+    with open(local_file, 'rb') as f:
+        while True:
+            data = f.read(1024 ** 2)
+            if not data: break
+            md5.update(data)
+    return md5.hexdigest()
+
 
 def main():
     argument_spec = ec2_argument_spec()
@@ -410,7 +419,7 @@ def main():
         # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
         if pathrtn is True:
             md5_remote = keysum(module, s3, bucket, obj)
-            md5_local = hashlib.md5(open(dest, 'rb').read()).hexdigest()
+            md5_local = get_md5_digest(dest)
             if md5_local == md5_remote:
                 sum_matches = True
                 if overwrite == 'always':
@@ -454,7 +463,8 @@ def main():
         # Lets check key state. Does it exist and if it does, compute the etag md5sum.
         if bucketrtn is True and keyrtn is True:
                 md5_remote = keysum(module, s3, bucket, obj)
-                md5_local = hashlib.md5(open(src, 'rb').read()).hexdigest()
+                md5_local = get_md5_digest(src)
+
                 if md5_local == md5_remote:
                     sum_matches = True
                     if overwrite == 'always':

From 1517ae8ab27d2493a51fb1eff7cf0c30b5c54f0a Mon Sep 17 00:00:00 2001
From: "Ching Yi, Chan" <chingyichan.tw@gmail.com>
Date: Fri, 5 Jun 2015 23:29:11 +0800
Subject: [PATCH 2/2] Refactoring for easier to read

---
 cloud/amazon/s3.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cloud/amazon/s3.py b/cloud/amazon/s3.py
index 545955e90cd..d08d1b6a46c 100644
--- a/cloud/amazon/s3.py
+++ b/cloud/amazon/s3.py
@@ -299,9 +299,7 @@ def is_walrus(s3_url):
 def get_md5_digest(local_file):
     md5 = hashlib.md5()
     with open(local_file, 'rb') as f:
-        while True:
-            data = f.read(1024 ** 2)
-            if not data: break
+        for data in f.read(1024 ** 2):
             md5.update(data)
     return md5.hexdigest()