Ensure unicode characters in zip-compressed filenames work correctly (#4702)

* Ensure unicode characters in zip-compressed filenames work correctly

Another corner-case we are fixing hoping it doesn't break anything else.

This fixes:
- The correct encoding of unicode paths internally (so the filenames we scrape from the output and is returned by zipfile match)
- Disable LANG=C for the unzip command (because it breaks the unicode output, unlike on gtar)

* Fix for python3 and other suggestions from @abadger
This commit is contained in:
Dag Wieers 2016-09-09 18:26:19 +02:00 committed by Matt Clay
parent 53bcf18eab
commit 976d876e55

View file

@ -133,6 +133,7 @@ import time
import binascii import binascii
import codecs import codecs
from zipfile import ZipFile, BadZipfile from zipfile import ZipFile, BadZipfile
from ansible.module_utils._text import to_text
try: # python 3.3+ try: # python 3.3+
from shlex import quote from shlex import quote
@ -352,7 +353,7 @@ class ZipArchive(object):
version = pcs[1] version = pcs[1]
ostype = pcs[2] ostype = pcs[2]
size = int(pcs[3]) size = int(pcs[3])
path = pcs[7] path = to_text(pcs[7], errors='surrogate_or_strict')
# Skip excluded files # Skip excluded files
if path in self.excludes: if path in self.excludes:
@ -597,7 +598,7 @@ class TgzArchive(object):
if self.excludes: if self.excludes:
cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ]) cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ])
cmd.extend([ '-f', self.src ]) cmd.extend([ '-f', self.src ])
rc, out, err = self.module.run_command(cmd) rc, out, err = self.module.run_command(cmd, cwd=self.dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C'))
if rc != 0: if rc != 0:
raise UnarchiveError('Unable to list files in the archive') raise UnarchiveError('Unable to list files in the archive')
@ -626,7 +627,7 @@ class TgzArchive(object):
if self.excludes: if self.excludes:
cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ]) cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ])
cmd.extend([ '-f', self.src ]) cmd.extend([ '-f', self.src ])
rc, out, err = self.module.run_command(cmd) rc, out, err = self.module.run_command(cmd, cwd=self.dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C'))
# Check whether the differences are in something that we're # Check whether the differences are in something that we're
# setting anyway # setting anyway
@ -675,7 +676,7 @@ class TgzArchive(object):
if self.excludes: if self.excludes:
cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ]) cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ])
cmd.extend([ '-f', self.src ]) cmd.extend([ '-f', self.src ])
rc, out, err = self.module.run_command(cmd, cwd=self.dest) rc, out, err = self.module.run_command(cmd, cwd=self.dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C'))
return dict(cmd=cmd, rc=rc, out=out, err=err) return dict(cmd=cmd, rc=rc, out=out, err=err)
def can_handle_archive(self): def can_handle_archive(self):
@ -746,9 +747,6 @@ def main():
supports_check_mode = True, supports_check_mode = True,
) )
# We screenscrape a huge amount of commands so use C locale anytime we do
module.run_command_environ_update = dict(LANG='C', LC_ALL='C', LC_MESSAGES='C', LC_CTYPE='C')
src = os.path.expanduser(module.params['src']) src = os.path.expanduser(module.params['src'])
dest = os.path.expanduser(module.params['dest']) dest = os.path.expanduser(module.params['dest'])
copy = module.params['copy'] copy = module.params['copy']