9730157525
* Adds another module utility file which generalizes the access of urls via the urllib* libraries. * Adds a new spec generator for common arguments. * Makes the user-agent string configurable. Fixes #6211
293 lines
10 KiB
Python
293 lines
10 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# (c) 2012, Jan-Piet Mens <jpmens () gmail.com>
|
|
#
|
|
# This file is part of Ansible
|
|
#
|
|
# Ansible is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Ansible is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
# see examples/playbooks/get_url.yml
|
|
|
|
import shutil
|
|
import datetime
|
|
import re
|
|
import tempfile
|
|
|
|
DOCUMENTATION = '''
|
|
---
|
|
module: get_url
|
|
short_description: Downloads files from HTTP, HTTPS, or FTP to node
|
|
description:
|
|
- Downloads files from HTTP, HTTPS, or FTP to the remote server. The remote
|
|
server I(must) have direct access to the remote resource.
|
|
- By default, if an environment variable C(<protocol>_proxy) is set on
|
|
the target host, requests will be sent through that proxy. This
|
|
behaviour can be overridden by setting a variable for this task
|
|
(see `setting the environment
|
|
<http://docs.ansible.com/playbooks_environment.html>`_),
|
|
or by using the use_proxy option.
|
|
version_added: "0.6"
|
|
options:
|
|
url:
|
|
description:
|
|
- HTTP, HTTPS, or FTP URL in the form (http|https|ftp)://[user[:pass]]@host.domain[:port]/path
|
|
required: true
|
|
default: null
|
|
aliases: []
|
|
dest:
|
|
description:
|
|
- absolute path of where to download the file to.
|
|
- If C(dest) is a directory, either the server provided filename or, if
|
|
none provided, the base name of the URL on the remote server will be
|
|
used. If a directory, C(force) has no effect.
|
|
If C(dest) is a directory, the file will always be
|
|
downloaded (regardless of the force option), but replaced only if the contents changed.
|
|
required: true
|
|
default: null
|
|
force:
|
|
description:
|
|
- If C(yes) and C(dest) is not a directory, will download the file every
|
|
time and replace the file if the contents change. If C(no), the file
|
|
will only be downloaded if the destination does not exist. Generally
|
|
should be C(yes) only for small local files. Prior to 0.6, this module
|
|
behaved as if C(yes) was the default.
|
|
version_added: "0.7"
|
|
required: false
|
|
choices: [ "yes", "no" ]
|
|
default: "no"
|
|
aliases: [ "thirsty" ]
|
|
sha256sum:
|
|
description:
|
|
- If a SHA-256 checksum is passed to this parameter, the digest of the
|
|
destination file will be calculated after it is downloaded to ensure
|
|
its integrity and verify that the transfer completed successfully.
|
|
version_added: "1.3"
|
|
required: false
|
|
default: null
|
|
use_proxy:
|
|
description:
|
|
- if C(no), it will not use a proxy, even if one is defined in
|
|
an environment variable on the target hosts.
|
|
required: false
|
|
default: 'yes'
|
|
choices: ['yes', 'no']
|
|
validate_certs:
|
|
description:
|
|
- If C(no), SSL certificates will not be validated. This should only be used
|
|
on personally controlled sites using self-signed certificates.
|
|
required: false
|
|
default: 'yes'
|
|
choices: ['yes', 'no']
|
|
others:
|
|
description:
|
|
- all arguments accepted by the M(file) module also work here
|
|
required: false
|
|
notes:
|
|
- This module doesn't yet support configuration for proxies.
|
|
# informational: requirements for nodes
|
|
requirements: [ urllib2, urlparse ]
|
|
author: Jan-Piet Mens
|
|
'''
|
|
|
|
EXAMPLES='''
|
|
- name: download foo.conf
|
|
get_url: url=http://example.com/path/file.conf dest=/etc/foo.conf mode=0440
|
|
|
|
- name: download file with sha256 check
|
|
get_url: url=http://example.com/path/file.conf dest=/etc/foo.conf sha256sum=b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c
|
|
'''
|
|
|
|
try:
|
|
import hashlib
|
|
HAS_HASHLIB=True
|
|
except ImportError:
|
|
HAS_HASHLIB=False
|
|
|
|
# ==============================================================
|
|
# url handling
|
|
|
|
def url_filename(url):
|
|
fn = os.path.basename(urlparse.urlsplit(url)[2])
|
|
if fn == '':
|
|
return 'index.html'
|
|
return fn
|
|
|
|
def url_get(module, url, dest, use_proxy, last_mod_time, force, validate_certs):
|
|
"""
|
|
Download data from the url and store in a temporary file.
|
|
|
|
Return (tempfile, info about the request)
|
|
"""
|
|
|
|
rsp, info = fetch_url(module, url, use_proxy=use_proxy, force=force, last_mod_time=last_mod_time, validate_certs=validate_certs)
|
|
|
|
if info['status'] == 304:
|
|
module.exit_json(url=url, dest=dest, changed=False, msg=info.get('msg', ''))
|
|
|
|
# create a temporary file and copy content to do md5-based replacement
|
|
if info['status'] != 200:
|
|
module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'], url=url, dest=dest)
|
|
|
|
fd, tempname = tempfile.mkstemp()
|
|
f = os.fdopen(fd, 'wb')
|
|
try:
|
|
shutil.copyfileobj(rsp, f)
|
|
except Exception, err:
|
|
os.remove(tempname)
|
|
module.fail_json(msg="failed to create temporary content file: %s" % str(err))
|
|
f.close()
|
|
rsp.close()
|
|
return tempname, info
|
|
|
|
def extract_filename_from_headers(headers):
|
|
"""
|
|
Extracts a filename from the given dict of HTTP headers.
|
|
|
|
Looks for the content-disposition header and applies a regex.
|
|
Returns the filename if successful, else None."""
|
|
cont_disp_regex = 'attachment; ?filename="(.+)"'
|
|
res = None
|
|
|
|
if 'content-disposition' in headers:
|
|
cont_disp = headers['content-disposition']
|
|
match = re.match(cont_disp_regex, cont_disp)
|
|
if match:
|
|
res = match.group(1)
|
|
# Try preventing any funny business.
|
|
res = os.path.basename(res)
|
|
|
|
return res
|
|
|
|
# ==============================================================
|
|
# main
|
|
|
|
def main():
|
|
|
|
argument_spec = url_argument_spec()
|
|
argument_spec.update(
|
|
dest = dict(required=True),
|
|
sha256sum = dict(default=''),
|
|
)
|
|
|
|
module = AnsibleModule(
|
|
# not checking because of daisy chain to file module
|
|
argument_spec = argument_spec,
|
|
add_file_common_args=True
|
|
)
|
|
|
|
url = module.params['url']
|
|
dest = os.path.expanduser(module.params['dest'])
|
|
force = module.params['force']
|
|
sha256sum = module.params['sha256sum']
|
|
use_proxy = module.params['use_proxy']
|
|
validate_certs = module.params['validate_certs']
|
|
|
|
dest_is_dir = os.path.isdir(dest)
|
|
last_mod_time = None
|
|
|
|
if not dest_is_dir and os.path.exists(dest):
|
|
if not force:
|
|
module.exit_json(msg="file already exists", dest=dest, url=url, changed=False)
|
|
|
|
# If the file already exists, prepare the last modified time for the
|
|
# request.
|
|
mtime = os.path.getmtime(dest)
|
|
last_mod_time = datetime.datetime.utcfromtimestamp(mtime)
|
|
|
|
# download to tmpsrc
|
|
tmpsrc, info = url_get(module, url, dest, use_proxy, last_mod_time, force, validate_certs)
|
|
|
|
# Now the request has completed, we can finally generate the final
|
|
# destination file name from the info dict.
|
|
|
|
if dest_is_dir:
|
|
filename = extract_filename_from_headers(info)
|
|
if not filename:
|
|
# Fall back to extracting the filename from the URL.
|
|
# Pluck the URL from the info, since a redirect could have changed
|
|
# it.
|
|
filename = url_filename(info['url'])
|
|
dest = os.path.join(dest, filename)
|
|
|
|
md5sum_src = None
|
|
md5sum_dest = None
|
|
|
|
# raise an error if there is no tmpsrc file
|
|
if not os.path.exists(tmpsrc):
|
|
os.remove(tmpsrc)
|
|
module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'])
|
|
if not os.access(tmpsrc, os.R_OK):
|
|
os.remove(tmpsrc)
|
|
module.fail_json( msg="Source %s not readable" % (tmpsrc))
|
|
md5sum_src = module.md5(tmpsrc)
|
|
|
|
# check if there is no dest file
|
|
if os.path.exists(dest):
|
|
# raise an error if copy has no permission on dest
|
|
if not os.access(dest, os.W_OK):
|
|
os.remove(tmpsrc)
|
|
module.fail_json( msg="Destination %s not writable" % (dest))
|
|
if not os.access(dest, os.R_OK):
|
|
os.remove(tmpsrc)
|
|
module.fail_json( msg="Destination %s not readable" % (dest))
|
|
md5sum_dest = module.md5(dest)
|
|
else:
|
|
if not os.access(os.path.dirname(dest), os.W_OK):
|
|
os.remove(tmpsrc)
|
|
module.fail_json( msg="Destination %s not writable" % (os.path.dirname(dest)))
|
|
|
|
if md5sum_src != md5sum_dest:
|
|
try:
|
|
shutil.copyfile(tmpsrc, dest)
|
|
except Exception, err:
|
|
os.remove(tmpsrc)
|
|
module.fail_json(msg="failed to copy %s to %s: %s" % (tmpsrc, dest, str(err)))
|
|
changed = True
|
|
else:
|
|
changed = False
|
|
|
|
# Check the digest of the destination file and ensure that it matches the
|
|
# sha256sum parameter if it is present
|
|
if sha256sum != '':
|
|
# Remove any non-alphanumeric characters, including the infamous
|
|
# Unicode zero-width space
|
|
stripped_sha256sum = re.sub(r'\W+', '', sha256sum)
|
|
|
|
if not HAS_HASHLIB:
|
|
os.remove(dest)
|
|
module.fail_json(msg="The sha256sum parameter requires hashlib, which is available in Python 2.5 and higher")
|
|
else:
|
|
destination_checksum = module.sha256(dest)
|
|
|
|
if stripped_sha256sum != destination_checksum:
|
|
os.remove(dest)
|
|
module.fail_json(msg="The SHA-256 checksum for %s did not match %s; it was %s." % (dest, sha256sum, destination_checksum))
|
|
|
|
os.remove(tmpsrc)
|
|
|
|
# allow file attribute changes
|
|
module.params['path'] = dest
|
|
file_args = module.load_file_common_arguments(module.params)
|
|
file_args['path'] = dest
|
|
changed = module.set_file_attributes_if_different(file_args, changed)
|
|
|
|
# Mission complete
|
|
module.exit_json(url=url, dest=dest, src=tmpsrc, md5sum=md5sum_src,
|
|
sha256sum=sha256sum, changed=changed, msg=info.get('msg', ''))
|
|
|
|
# import module snippets
|
|
from ansible.module_utils.basic import *
|
|
from ansible.module_utils.urls import *
|
|
main()
|