unarchive: use Python's tarfile module for tar listing (#3575)
* unarchive: use Python's tarfile module for tar listing fixes https://github.com/ansible/ansible/issues/11348 Depending on the current active locale, `tar`'s file listing can end up spitting backslash-escaped characters. Unfortunately, when that happens, we end up with double-escaped backslashes, giving us a wrong path, making our action fail. We could try un-double-escaping our paths, but that would be complicated and, I think, error-prone. The easiest way forward seemed to simply use the `tarfile` module. Why use it only for listing? Because the `unarchive` option also supports the `extra_opts` option, and that supporting this would require us to mimick `tar`'s interface. For listing files, however, I don't think that the loss of `extra_opts` support causes problems (well, I hope so). * unarchive: re-add xz decompression support Following previous change to use Python's `tarfile` module for tar file listing, we lost `xz` decompression support. This commits re-add it by adding a special case in `TarXzArchive` that pre-decompresses the source file.
This commit is contained in:
parent
2b36edd55c
commit
82aeaed45d
1 changed files with 26 additions and 9 deletions
|
@ -5,6 +5,7 @@
|
|||
# (c) 2013, Dylan Martin <dmartin@seattlecentral.edu>
|
||||
# (c) 2015, Toshio Kuratomi <tkuratomi@ansible.com>
|
||||
# (c) 2016, Dag Wieers <dag@wieers.com>
|
||||
# (c) 2016, Virgil Dupras <hsoft@hardcoded.net>
|
||||
#
|
||||
# This file is part of Ansible
|
||||
#
|
||||
|
@ -114,6 +115,8 @@ import datetime
|
|||
import time
|
||||
import binascii
|
||||
from zipfile import ZipFile
|
||||
import tarfile
|
||||
import subprocess
|
||||
|
||||
# String from tar that shows the tar contents are different from the
|
||||
# filesystem
|
||||
|
@ -492,22 +495,23 @@ class TgzArchive(object):
|
|||
self.zipflag = 'z'
|
||||
self._files_in_archive = []
|
||||
|
||||
def _get_tar_fileobj(self):
|
||||
"""Returns a file object that can be read by ``tarfile.open()``."""
|
||||
return open(self.src, 'rb')
|
||||
|
||||
@property
|
||||
def files_in_archive(self, force_refresh=False):
|
||||
if self._files_in_archive and not force_refresh:
|
||||
return self._files_in_archive
|
||||
|
||||
cmd = '%s -t%s' % (self.cmd_path, self.zipflag)
|
||||
if self.opts:
|
||||
cmd += ' ' + ' '.join(self.opts)
|
||||
if self.excludes:
|
||||
cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"'
|
||||
cmd += ' -f "%s"' % self.src
|
||||
rc, out, err = self.module.run_command(cmd)
|
||||
if rc != 0:
|
||||
# The use of Python's tarfile module here allows us to easily avoid tricky file encoding
|
||||
# problems. Ref #11348
|
||||
try:
|
||||
tf = tarfile.open(fileobj=self._get_tar_fileobj())
|
||||
except Exception:
|
||||
raise UnarchiveError('Unable to list files in the archive')
|
||||
|
||||
for filename in out.splitlines():
|
||||
for filename in tf.getnames():
|
||||
if filename and filename not in self.excludes:
|
||||
self._files_in_archive.append(filename)
|
||||
return self._files_in_archive
|
||||
|
@ -605,6 +609,19 @@ class TarXzArchive(TgzArchive):
|
|||
super(TarXzArchive, self).__init__(src, dest, file_args, module)
|
||||
self.zipflag = 'J'
|
||||
|
||||
def _get_tar_fileobj(self):
|
||||
# Python's tarfile module doesn't support xz compression so we have to manually uncompress
|
||||
# it first.
|
||||
xz_bin_path = self.module.get_bin_path('xz')
|
||||
xz_stdout = tempfile.TemporaryFile()
|
||||
# we don't use self.module.run_command() to avoid loading the whole archive in memory.
|
||||
cmd = subprocess.Popen([xz_bin_path, '-dc', self.src], stdout=xz_stdout)
|
||||
rc = cmd.wait()
|
||||
if rc != 0:
|
||||
raise UnarchiveError("Could not uncompress with xz")
|
||||
xz_stdout.seek(0)
|
||||
return xz_stdout
|
||||
|
||||
|
||||
# try handlers in order and return the one that works or bail if none work
|
||||
def pick_handler(src, dest, file_args, module):
|
||||
|
|
Loading…
Reference in a new issue