#!/usr/bin/python # -*- coding: utf-8 -*- # (c) 2012, Michael DeHaan # (c) 2013, Dylan Martin # (c) 2015, Toshio Kuratomi # (c) 2016, Dag Wieers # # This file is part of Ansible # # Ansible is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Ansible is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Ansible. If not, see . DOCUMENTATION = ''' --- module: unarchive version_added: 1.4 short_description: Unpacks an archive after (optionally) copying it from the local machine. extends_documentation_fragment: files description: - The M(unarchive) module unpacks an archive. By default, it will copy the source file from the local system to the target before unpacking - set copy=no to unpack an archive which already exists on the target.. options: src: description: - If copy=yes (default), local path to archive file to copy to the target server; can be absolute or relative. If copy=no, path on the target server to existing archive file to unpack. - If copy=no and src contains ://, the remote machine will download the file from the url first. (version_added 2.0) required: true default: null dest: description: - Remote absolute path where the archive should be unpacked required: true default: null copy: description: - "If true, the file is copied from local 'master' to the target machine, otherwise, the plugin will look for src archive at the target machine." required: false choices: [ "yes", "no" ] default: "yes" creates: description: - a filename, when it already exists, this step will B(not) be run. required: no default: null version_added: "1.6" list_files: description: - If set to True, return the list of files that are contained in the tarball. required: false choices: [ "yes", "no" ] default: "no" version_added: "2.0" exclude: description: - List the directory and file entries that you would like to exclude from the unarchive action. required: false default: [] version_added: "2.1" keep_newer: description: - Do not replace existing files that are newer than files from the archive. required: false default: no version_added: "2.1" extra_opts: description: - Specify additional options by passing in an array. default: required: false version_added: "2.1" validate_certs: description: - This only applies if using a https url as the source of the file. - This should only set to C(no) used on personally controlled sites using self-signed cer - Prior to 2.2 the code worked as if this was set to C(yes). required: false default: "yes" choices: ["yes", "no"] version_added: "2.2" author: "Dag Wieers (@dagwieers)" todo: - re-implement tar support using native tarfile module - re-implement zip support using native zipfile module notes: - requires C(gtar)/C(unzip) command on target host - can handle I(gzip), I(bzip2) and I(xz) compressed as well as uncompressed tar files - detects type of archive automatically - uses gtar's C(--diff arg) to calculate if changed or not. If this C(arg) is not supported, it will always unpack the archive - existing files/directories in the destination which are not in the archive are not touched. This is the same behavior as a normal archive extraction - existing files/directories in the destination which are not in the archive are ignored for purposes of deciding if the archive should be unpacked or not ''' EXAMPLES = ''' # Example from Ansible Playbooks - unarchive: src=foo.tgz dest=/var/lib/foo # Unarchive a file that is already on the remote machine - unarchive: src=/tmp/foo.zip dest=/usr/local/bin copy=no # Unarchive a file that needs to be downloaded (added in 2.0) - unarchive: src=https://example.com/example.zip dest=/usr/local/bin copy=no ''' import re import os import stat import pwd import grp import datetime import time import binascii import codecs from zipfile import ZipFile, BadZipfile # String from tar that shows the tar contents are different from the # filesystem OWNER_DIFF_RE = re.compile(r': Uid differs$') GROUP_DIFF_RE = re.compile(r': Gid differs$') MODE_DIFF_RE = re.compile(r': Mode differs$') MOD_TIME_DIFF_RE = re.compile(r': Mod time differs$') #NEWER_DIFF_RE = re.compile(r' is newer or same age.$') MISSING_FILE_RE = re.compile(r': Warning: Cannot stat: No such file or directory$') ZIP_FILE_MODE_RE = re.compile(r'([r-][w-][stx-]){3}') # When downloading an archive, how much of the archive to download before # saving to a tempfile (64k) BUFSIZE = 65536 # Return a CRC32 checksum of a file def crc32(path): return binascii.crc32(open(path).read()) & 0xffffffff class UnarchiveError(Exception): pass # class to handle .zip files class ZipArchive(object): def __init__(self, src, dest, file_args, module): self.src = src self.dest = dest self.file_args = file_args self.opts = module.params['extra_opts'] self.module = module self.excludes = module.params['exclude'] self.includes = [] self.cmd_path = self.module.get_bin_path('unzip') self._files_in_archive = [] self._infodict = dict() def _permstr_to_octal(self, modestr, umask): ''' Convert a Unix permission string (rw-r--r--) into a mode (0644) ''' revstr = modestr[::-1] mode = 0 for j in range(0, 3): for i in range(0, 3): if revstr[i+3*j] in ['r', 'w', 'x', 's', 't']: mode += 2**(i+3*j) # The unzip utility does not support setting the stST bits # if revstr[i+3*j] in ['s', 't', 'S', 'T' ]: # mode += 2**(9+j) return ( mode & ~umask ) def _legacy_file_list(self, force_refresh=False): unzip_bin = self.module.get_bin_path('unzip') if not unzip_bin: raise UnarchiveError('Python Zipfile cannot read %s and unzip not found' % self.src) rc, out, err = self.module.run_command([unzip_bin, '-v', self.src]) if rc: raise UnarchiveError('Neither python zipfile nor unzip can read %s' % self.src) for line in out.splitlines()[3:-2]: fields = line.split(None, 7) self._files_in_archive.append(fields[7]) self._infodict[fields[7]] = long(fields[6]) def _crc32(self, path): if self._infodict: return self._infodict[path] try: archive = ZipFile(self.src) except BadZipfile: e = get_exception() if e.args[0].lower().startswith('bad magic number'): # Python2.4 can't handle zipfiles with > 64K files. Try using # /usr/bin/unzip instead self._legacy_file_list() else: raise else: try: for item in archive.infolist(): self._infodict[item.filename] = long(item.CRC) except: archive.close() raise UnarchiveError('Unable to list files in the archive') return self._infodict[path] @property def files_in_archive(self, force_refresh=False): if self._files_in_archive and not force_refresh: return self._files_in_archive self._files_in_archive = [] try: archive = ZipFile(self.src) except BadZipfile: e = get_exception() if e.args[0].lower().startswith('bad magic number'): # Python2.4 can't handle zipfiles with > 64K files. Try using # /usr/bin/unzip instead self._legacy_file_list(force_refresh) else: raise else: try: for member in archive.namelist(): if member not in self.excludes: self._files_in_archive.append(member) except: archive.close() raise UnarchiveError('Unable to list files in the archive') archive.close() return self._files_in_archive def is_unarchived(self): cmd = '%s -ZT -s "%s"' % (self.cmd_path, self.src) if self.excludes: cmd += ' -x "' + '" "'.join(self.excludes) + '"' rc, out, err = self.module.run_command(cmd) old_out = out diff = '' out = '' if rc == 0: unarchived = True else: unarchived = False # Get some information related to user/group ownership umask = os.umask(0) os.umask(umask) # Get current user and group information groups = os.getgroups() run_uid = os.getuid() run_gid = os.getgid() try: run_owner = pwd.getpwuid(run_uid).pw_name except: run_owner = run_uid try: run_group = grp.getgrgid(run_gid).gr_name except: run_group = run_gid # Get future user ownership fut_owner = fut_uid = None if self.file_args['owner']: try: tpw = pwd.getpwname(self.file_args['owner']) except: try: tpw = pwd.getpwuid(self.file_args['owner']) except: tpw = pwd.getpwuid(run_uid) fut_owner = tpw.pw_name fut_uid = tpw.pw_uid else: try: fut_owner = run_owner except: pass fut_uid = run_uid # Get future group ownership fut_group = fut_gid = None if self.file_args['group']: try: tgr = grp.getgrnam(self.file_args['group']) except: try: tgr = grp.getgrgid(self.file_args['group']) except: tgr = grp.getgrgid(run_gid) fut_group = tgr.gr_name fut_gid = tgr.gr_gid else: try: fut_group = run_group except: pass fut_gid = run_gid for line in old_out.splitlines(): change = False pcs = line.split(None, 7) if len(pcs) != 8: # Too few fields... probably a piece of the header or footer continue # Check first and seventh field in order to skip header/footer if len(pcs[0]) != 7 and len(pcs[0]) != 10: continue if len(pcs[6]) != 15: continue if pcs[0][0] not in 'dl-?' or not frozenset(pcs[0][1:]).issubset('rwxst-'): continue ztype = pcs[0][0] permstr = pcs[0][1:] version = pcs[1] ostype = pcs[2] size = int(pcs[3]) path = pcs[7] # Skip excluded files if path in self.excludes: out += 'Path %s is excluded on request\n' % path continue # Itemized change requires L for symlink if path[-1] == '/': if ztype != 'd': err += 'Path %s incorrectly tagged as "%s", but is a directory.\n' % (path, ztype) ftype = 'd' elif ztype == 'l': ftype = 'L' elif ztype == '-': ftype = 'f' elif ztype == '?': ftype = 'f' # Some files may be storing FAT permissions, not Unix permissions if len(permstr) == 6: if path[-1] == '/': permstr = 'rwxrwxrwx' elif permstr == 'rwx---': permstr = 'rwxrwxrwx' else: permstr = 'rw-rw-rw-' # Test string conformity if len(permstr) != 9 or not ZIP_FILE_MODE_RE.match(permstr): raise UnarchiveError('ZIP info perm format incorrect, %s' % permstr) # DEBUG # err += "%s%s %10d %s\n" % (ztype, permstr, size, path) dest = os.path.join(self.dest, path) try: st = os.lstat(dest) except: change = True self.includes.append(path) err += 'Path %s is missing\n' % path diff += '>%s++++++.?? %s\n' % (ftype, path) continue # Compare file types if ftype == 'd' and not stat.S_ISDIR(st.st_mode): change = True self.includes.append(path) err += 'File %s already exists, but not as a directory\n' % path diff += 'c%s++++++.?? %s\n' % (ftype, path) continue if ftype == 'f' and not stat.S_ISREG(st.st_mode): change = True unarchived = False self.includes.append(path) err += 'Directory %s already exists, but not as a regular file\n' % path diff += 'c%s++++++.?? %s\n' % (ftype, path) continue if ftype == 'L' and not stat.S_ISLNK(st.st_mode): change = True self.includes.append(path) err += 'Directory %s already exists, but not as a symlink\n' % path diff += 'c%s++++++.?? %s\n' % (ftype, path) continue itemized = list('.%s.......??' % ftype) # Note: this timestamp calculation has a rounding error # somewhere... unzip and this timestamp can be one second off # When that happens, we report a change and re-unzip the file dt_object = datetime.datetime(*(time.strptime(pcs[6], '%Y%m%d.%H%M%S')[0:6])) timestamp = time.mktime(dt_object.timetuple()) # Compare file timestamps if stat.S_ISREG(st.st_mode): if self.module.params['keep_newer']: if timestamp > st.st_mtime: change = True self.includes.append(path) err += 'File %s is older, replacing file\n' % path itemized[4] = 't' elif stat.S_ISREG(st.st_mode) and timestamp < st.st_mtime: # Add to excluded files, ignore other changes out += 'File %s is newer, excluding file\n' % path continue else: if timestamp != st.st_mtime: change = True self.includes.append(path) err += 'File %s differs in mtime (%f vs %f)\n' % (path, timestamp, st.st_mtime) itemized[4] = 't' # Compare file sizes if stat.S_ISREG(st.st_mode) and size != st.st_size: change = True err += 'File %s differs in size (%d vs %d)\n' % (path, size, st.st_size) itemized[3] = 's' # Compare file checksums if stat.S_ISREG(st.st_mode): crc = crc32(dest) if crc != self._crc32(path): change = True err += 'File %s differs in CRC32 checksum (0x%08x vs 0x%08x)\n' % (path, self._crc32(path), crc) itemized[2] = 'c' # Compare file permissions # Do not handle permissions of symlinks if ftype != 'L': # Use the new mode provided with the action, if there is one if self.file_args['mode']: if isinstance(self.file_args['mode'], int): mode = self.file_args['mode'] else: try: mode = int(self.file_args['mode'], 8) except Exception: e = get_exception() self.module.fail_json(path=path, msg="mode %(mode)s must be in octal form" % self.file_args, details=str(e)) # Only special files require no umask-handling elif ztype == '?': mode = self._permstr_to_octal(permstr, 0) else: mode = self._permstr_to_octal(permstr, umask) if mode != stat.S_IMODE(st.st_mode): change = True itemized[5] = 'p' err += 'Path %s differs in permissions (%o vs %o)\n' % (path, mode, stat.S_IMODE(st.st_mode)) # Compare file user ownership owner = uid = None try: owner = pwd.getpwuid(st.st_uid).pw_name except: uid = st.st_uid # If we are not root and requested owner is not our user, fail if run_uid != 0 and (fut_owner != run_owner or fut_uid != run_uid): raise UnarchiveError('Cannot change ownership of %s to %s, as user %s' % (path, fut_owner, run_owner)) if owner and owner != fut_owner: change = True err += 'Path %s is owned by user %s, not by user %s as expected\n' % (path, owner, fut_owner) itemized[6] = 'o' elif uid and uid != fut_uid: change = True err += 'Path %s is owned by uid %s, not by uid %s as expected\n' % (path, uid, fut_uid) itemized[6] = 'o' # Compare file group ownership group = gid = None try: group = grp.getgrgid(st.st_gid).gr_name except: gid = st.st_gid if run_uid != 0 and fut_gid not in groups: raise UnarchiveError('Cannot change group ownership of %s to %s, as user %s' % (path, fut_group, run_owner)) if group and group != fut_group: change = True err += 'Path %s is owned by group %s, not by group %s as expected\n' % (path, group, fut_group) itemized[6] = 'g' elif gid and gid != fut_gid: change = True err += 'Path %s is owned by gid %s, not by gid %s as expected\n' % (path, gid, fut_gid) itemized[6] = 'g' # Register changed files and finalize diff output if change: if path not in self.includes: self.includes.append(path) diff += '%s %s\n' % (''.join(itemized), path) if self.includes: unarchived = False # DEBUG # out = old_out + out return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd, diff=diff) def unarchive(self): cmd = '%s -o "%s"' % (self.cmd_path, self.src) if self.opts: cmd += ' ' + ' '.join(self.opts) if self.includes: cmd += ' "' + '" "'.join(self.includes) + '"' # We don't need to handle excluded files, since we simply do not include them # if self.excludes: # cmd += ' -x ' + ' '.join(self.excludes) cmd += ' -d "%s"' % self.dest rc, out, err = self.module.run_command(cmd) return dict(cmd=cmd, rc=rc, out=out, err=err) def can_handle_archive(self): if not self.cmd_path: return False cmd = '%s -l "%s"' % (self.cmd_path, self.src) rc, out, err = self.module.run_command(cmd) if rc == 0: return True return False # class to handle gzipped tar files class TgzArchive(object): def __init__(self, src, dest, file_args, module): self.src = src self.dest = dest self.file_args = file_args self.opts = module.params['extra_opts'] self.module = module self.excludes = [ path.rstrip('/') for path in self.module.params['exclude']] # Prefer gtar (GNU tar) as it supports the compression options -zjJ self.cmd_path = self.module.get_bin_path('gtar', None) if not self.cmd_path: # Fallback to tar self.cmd_path = self.module.get_bin_path('tar') self.zipflag = 'z' self.compress_mode = 'gz' self._files_in_archive = [] @property def files_in_archive(self, force_refresh=False): if self._files_in_archive and not force_refresh: return self._files_in_archive cmd = '%s -t%s' % (self.cmd_path, self.zipflag) if self.opts: cmd += ' ' + ' '.join(self.opts) if self.excludes: cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' cmd += ' -f "%s"' % self.src rc, out, err = self.module.run_command(cmd) if rc != 0: raise UnarchiveError('Unable to list files in the archive') for filename in out.splitlines(): # Compensate for locale-related problems in gtar output (octal unicode representation) #11348 # filename = filename.decode('string_escape') filename = codecs.escape_decode(filename)[0] if filename and filename not in self.excludes: self._files_in_archive.append(filename) return self._files_in_archive def is_unarchived(self): cmd = '%s -C "%s" -d%s' % (self.cmd_path, self.dest, self.zipflag) if self.opts: cmd += ' ' + ' '.join(self.opts) if self.file_args['owner']: cmd += ' --owner="%s"' % self.file_args['owner'] if self.file_args['group']: cmd += ' --group="%s"' % self.file_args['group'] if self.file_args['mode']: cmd += ' --mode="%s"' % self.file_args['mode'] if self.module.params['keep_newer']: cmd += ' --keep-newer-files' if self.excludes: cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' cmd += ' -f "%s"' % self.src rc, out, err = self.module.run_command(cmd) # Check whether the differences are in something that we're # setting anyway # What is different unarchived = True old_out = out out = '' run_uid = os.getuid() # When unarchiving as a user, or when owner/group/mode is supplied --diff is insufficient # Only way to be sure is to check request with what is on disk (as we do for zip) # Leave this up to set_fs_attributes_if_different() instead of inducing a (false) change for line in old_out.splitlines() + err.splitlines(): if run_uid == 0 and not self.file_args['owner'] and OWNER_DIFF_RE.search(line): out += line + '\n' if run_uid == 0 and not self.file_args['group'] and GROUP_DIFF_RE.search(line): out += line + '\n' if not self.file_args['mode'] and MODE_DIFF_RE.search(line): out += line + '\n' if MOD_TIME_DIFF_RE.search(line): out += line + '\n' if MISSING_FILE_RE.search(line): out += line + '\n' if out: unarchived = False return dict(unarchived=unarchived, rc=rc, out=out, err=err, cmd=cmd) def unarchive(self): cmd = '%s -C "%s" -x%s' % (self.cmd_path, self.dest, self.zipflag) if self.opts: cmd += ' ' + ' '.join(self.opts) if self.file_args['owner']: cmd += ' --owner="%s"' % self.file_args['owner'] if self.file_args['group']: cmd += ' --group="%s"' % self.file_args['group'] if self.file_args['mode']: cmd += ' --mode="%s"' % self.file_args['mode'] if self.module.params['keep_newer']: cmd += ' --keep-newer-files' if self.excludes: cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' cmd += ' -f "%s"' % (self.src) rc, out, err = self.module.run_command(cmd, cwd=self.dest) return dict(cmd=cmd, rc=rc, out=out, err=err) def can_handle_archive(self): if not self.cmd_path: return False try: if self.files_in_archive: return True except UnarchiveError: pass # Errors and no files in archive assume that we weren't able to # properly unarchive it return False # class to handle tar files that aren't compressed class TarArchive(TgzArchive): def __init__(self, src, dest, file_args, module): super(TarArchive, self).__init__(src, dest, file_args, module) # argument to tar self.zipflag = '' # parameter for python tarfile library self.compress_mode = '' # class to handle bzip2 compressed tar files class TarBzipArchive(TgzArchive): def __init__(self, src, dest, file_args, module): super(TarBzipArchive, self).__init__(src, dest, file_args, module) self.zipflag = 'j' self.compress_mode = 'bz2' # class to handle xz compressed tar files class TarXzArchive(TgzArchive): def __init__(self, src, dest, file_args, module): super(TarXzArchive, self).__init__(src, dest, file_args, module) self.zipflag = 'J' self.compress_mode = '' # try handlers in order and return the one that works or bail if none work def pick_handler(src, dest, file_args, module): handlers = [TgzArchive, ZipArchive, TarArchive, TarBzipArchive, TarXzArchive] for handler in handlers: obj = handler(src, dest, file_args, module) if obj.can_handle_archive(): return obj module.fail_json(msg='Failed to find handler for "%s". Make sure the required command to extract the file is installed.' % src) def main(): module = AnsibleModule( # not checking because of daisy chain to file module argument_spec = dict( src = dict(required=True, type='path'), original_basename = dict(required=False, type='str'), # used to handle 'dest is a directory' via template, a slight hack dest = dict(required=True, type='path'), copy = dict(default=True, type='bool'), creates = dict(required=False, type='path'), list_files = dict(required=False, default=False, type='bool'), keep_newer = dict(required=False, default=False, type='bool'), exclude = dict(required=False, default=[], type='list'), extra_opts = dict(required=False, default=[], type='list'), validate_certs = dict(required=False, default=True, type='bool'), ), add_file_common_args = True, # check-mode only works for zip files # supports_check_mode = True, ) # We screenscrape a huge amount of commands so use C locale anytime we do module.run_command_environ_update = dict(LANG='C', LC_ALL='C', LC_MESSAGES='C', LC_CTYPE='C') src = os.path.expanduser(module.params['src']) dest = os.path.expanduser(module.params['dest']) copy = module.params['copy'] file_args = module.load_file_common_arguments(module.params) # did tar file arrive? if not os.path.exists(src): if copy: module.fail_json(msg="Source '%s' failed to transfer" % src) # If copy=false, and src= contains ://, try and download the file to a temp directory. elif '://' in src: tempdir = os.path.dirname(os.path.realpath(__file__)) package = os.path.join(tempdir, str(src.rsplit('/', 1)[1])) try: rsp, info = fetch_url(module, src) # If download fails, raise a proper exception if rsp is None: raise Exception(info['msg']) f = open(package, 'w') # Read 1kb at a time to save on ram while True: data = rsp.read(BUFSIZE) if data == "": break # End of file, break while loop f.write(data) f.close() src = package except Exception: e = get_exception() module.fail_json(msg="Failure downloading %s, %s" % (src, e)) else: module.fail_json(msg="Source '%s' does not exist" % src) if not os.access(src, os.R_OK): module.fail_json(msg="Source '%s' not readable" % src) # skip working with 0 size archives try: if os.path.getsize(src) == 0: module.fail_json(msg="Invalid archive '%s', the file is 0 bytes" % src) except Exception: e = get_exception() module.fail_json(msg="Source '%s' not readable" % src) # is dest OK to receive tar file? if not os.path.isdir(dest): module.fail_json(msg="Destination '%s' is not a directory" % dest) handler = pick_handler(src, dest, file_args, module) res_args = dict(handler=handler.__class__.__name__, dest=dest, src=src) # do we need to do unpack? check_results = handler.is_unarchived() # DEBUG # res_args['check_results'] = check_results if check_results['unarchived']: res_args['changed'] = False else: # do the unpack try: res_args['extract_results'] = handler.unarchive() if res_args['extract_results']['rc'] != 0: module.fail_json(msg="failed to unpack %s to %s" % (src, dest), **res_args) except IOError: module.fail_json(msg="failed to unpack %s to %s" % (src, dest), **res_args) else: res_args['changed'] = True if check_results.get('diff', False): res_args['diff'] = { 'prepared': check_results['diff'] } # Run only if we found differences (idempotence) or diff was missing if res_args.get('diff', True): # do we need to change perms? for filename in handler.files_in_archive: file_args['path'] = os.path.join(dest, filename) try: res_args['changed'] = module.set_fs_attributes_if_different(file_args, res_args['changed']) except (IOError, OSError): e = get_exception() module.fail_json(msg="Unexpected error when accessing exploded file: %s" % str(e), **res_args) if module.params['list_files']: res_args['files'] = handler.files_in_archive module.exit_json(**res_args) # import module snippets from ansible.module_utils.basic import * from ansible.module_utils.urls import * if __name__ == '__main__': main()