From 1e026cb8ac049b68bb3ae164bd207d297e15acfb Mon Sep 17 00:00:00 2001
From: Brian Coca <brian.coca+git@gmail.com>
Date: Sun, 29 Mar 2015 02:25:12 -0400
Subject: [PATCH] find module updated to be able to get checksums and stat
 data, also now it only stats a file once so it should be quite faster on
 large file sets

---
 files/find.py | 299 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 299 insertions(+)
 create mode 100644 files/find.py

diff --git a/files/find.py b/files/find.py
new file mode 100644
index 00000000000..d5441aad273
--- /dev/null
+++ b/files/find.py
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# (c) 2014, Ruggero Marchei <ruggero.marchei@daemonzone.net>
+# (c) 2015, Brian Coca <bcoca@ansible.com>
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>
+
+
+import os
+import stat
+import fnmatch
+import time
+import re
+import shutil
+
+
+DOCUMENTATION = '''
+---
+module: find
+author: Brian Coca (based on Ruggero Marchei's Tidy)
+version_added: "2.0"
+short_description: return a list of files based on specific criteria
+requirements: []
+description:
+    - Return a list files based on specific criteria. Multiple criteria are AND'd together.
+options:
+    age:
+        required: false
+        default: null
+        description:
+            - Select files whose age is equal to or greater than the specified time.
+              Use a negative age to find files equal to or less than the specified time.
+              You can choose seconds, minutes, hours, days, or weeks by specifying the
+              first letter of any of those words (e.g., "1w").
+    patterns:
+        required: false
+        default: '*'
+        description:
+            - One or more (shell type) file glob patterns, which restrict the list of files to be returned to
+              those whose basenames match at least one of the patterns specified.  Multiple patterns can be
+              specified using a list.
+    paths:
+        required: true
+        aliases: [ "name" ]
+        description:
+            - List of paths to the file or directory to search. All paths must be fully qualified.
+    file_type:
+        required: false
+        description:
+            - Type of file to select
+        choices: [ "file", "directory" ]
+        default: "file"
+    recurse:
+        required: false
+        default: "no"
+        choices: [ "yes", "no" ]
+        description:
+            - If target is a directory, recursively descend into the directory looking for files.
+    size:
+        required: false
+        default: null
+        description:
+            - Select files whose size is equal to or greater than the specified size.
+              Use a negative size to find files equal to or less than the specified size.
+              Unqualified values are in bytes, but b, k, m, g, and t can be appended to specify
+              bytes, kilobytes, megabytes, gigabytes, and terabytes, respectively.
+              Size is not evaluated for directories.
+    age_stamp:
+        required: false
+        default: "mtime"
+        choices: [ "atime", "mtime", "ctime" ]
+        description:
+            - Choose the file property against which we compare age. Default is mtime.
+    hidden:
+        required: false
+        default: "False"
+        choices: [ True, False ]
+        description:
+            - Set this to true to include hidden files, otherwise they'll be ignored.
+    follow:
+        required: false
+        default: "False"
+        choices: [ True, False ]
+        description:
+            - Set this to true to follow symlinks in path.
+    get_checksum:
+        required: false
+        default: "False"
+        choices: [ True, False ]
+        description:
+            - Set this to true to retrieve a file's sha1 checksum
+'''
+
+
+EXAMPLES = '''
+# Recursively find /tmp files older than 2 days
+- find: paths="/tmp" age="2d" recurse=yes
+
+# Recursively find /tmp files older than 4 weeks and equal or greater than 1 megabyte
+- find: paths="/tmp" age="4w" size="1m" recurse=yes
+
+# Recursively find /var/tmp files with last access time greater than 3600 seconds
+- find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes
+
+# find /var/log files equal or greater than 10 megabytes ending with .log or .log.gz
+- find: paths="/var/tmp" patterns="*.log","*.log.gz" size="10m"
+'''
+
+RETURN = '''
+files:
+    description: all matches found with the specified criteria (see stat module for full output of each dictionary)
+    returned: success
+    type: list of dictionaries
+    sample: [
+        { path="/var/tmp/test1",
+          mode=0644,
+          ...,
+          checksum=16fac7be61a6e4591a33ef4b729c5c3302307523
+        },
+        { path="/var/tmp/test2",
+          ...
+        },
+        ]
+matched:
+    description: number of matches
+    returned: success
+    type: string
+    sample: 14
+examined:
+    description: number of filesystem objects looked at
+    returned: success
+    type: string
+    sample: 34
+'''
+
+def pfilter(f, patterns=None):
+    '''filter using glob patterns'''
+    if patterns is None:
+        return True
+    for p in patterns:
+        if fnmatch.fnmatch(f, p):
+             return True
+    return False
+
+
+def agefilter(st, now, age, timestamp):
+    '''filter files older than age'''
+    if age is None or \
+      (age >= 0 and now - st.__getattribute__("st_%s" % timestamp) >= abs(age)) or \
+      (age < 0 and now - st.__getattribute__("st_%s" % timestamp) <= abs(age)):
+
+        return True
+    return False
+
+
+def sizefilter(st, size):
+    '''filter files greater than size'''
+    if size is None or \
+       (size >= 0 and st.st_size >= abs(size)) or \
+       (size < 0 and st.st_size <= abs(size)):
+
+        return True
+
+    return False
+
+
+def statinfo(st):
+    return {
+        'mode'     : "%04o" % stat.S_IMODE(st.st_mode),
+        'isdir'    : stat.S_ISDIR(st.st_mode),
+        'ischr'    : stat.S_ISCHR(st.st_mode),
+        'isblk'    : stat.S_ISBLK(st.st_mode),
+        'isreg'    : stat.S_ISREG(st.st_mode),
+        'isfifo'   : stat.S_ISFIFO(st.st_mode),
+        'islnk'    : stat.S_ISLNK(st.st_mode),
+        'issock'   : stat.S_ISSOCK(st.st_mode),
+        'uid'      : st.st_uid,
+        'gid'      : st.st_gid,
+        'size'     : st.st_size,
+        'inode'    : st.st_ino,
+        'dev'      : st.st_dev,
+        'nlink'    : st.st_nlink,
+        'atime'    : st.st_atime,
+        'mtime'    : st.st_mtime,
+        'ctime'    : st.st_ctime,
+        'wusr'     : bool(st.st_mode & stat.S_IWUSR),
+        'rusr'     : bool(st.st_mode & stat.S_IRUSR),
+        'xusr'     : bool(st.st_mode & stat.S_IXUSR),
+        'wgrp'     : bool(st.st_mode & stat.S_IWGRP),
+        'rgrp'     : bool(st.st_mode & stat.S_IRGRP),
+        'xgrp'     : bool(st.st_mode & stat.S_IXGRP),
+        'woth'     : bool(st.st_mode & stat.S_IWOTH),
+        'roth'     : bool(st.st_mode & stat.S_IROTH),
+        'xoth'     : bool(st.st_mode & stat.S_IXOTH),
+        'isuid'    : bool(st.st_mode & stat.S_ISUID),
+        'isgid'    : bool(st.st_mode & stat.S_ISGID),
+    }
+
+
+def main():
+    module = AnsibleModule(
+        argument_spec = dict(
+            paths         = dict(required=True, aliases=['name'], type='list'),
+            patterns      = dict(default=['*'], type='list'),
+            file_type     = dict(default="file", choices=['file', 'directory'], type='str'),
+            age           = dict(default=None, type='str'),
+            age_stamp     = dict(default="mtime", choices=['atime','mtime','ctime'], type='str'),
+            size          = dict(default=None, type='str'),
+            recurse       = dict(default='no', type='bool'),
+            hidden        = dict(default="False", type='bool'),
+            follow        = dict(default="False", type='bool'),
+            get_checksum  = dict(default="False", type='bool'),
+        ),
+    )
+
+    params = module.params
+
+    filelist = []
+
+    if params['age'] is None:
+        age = None
+    else:
+        # convert age to seconds:
+        m = re.match("^(-?\d+)(s|m|h|d|w)?$", params['age'].lower())
+        seconds_per_unit = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800}
+        if m:
+            age = int(m.group(1)) * seconds_per_unit.get(m.group(2), 1)
+        else:
+            module.fail_json(age=params['age'], msg="failed to process age")
+
+    if params['size'] is None:
+        size = None
+    else:
+        # convert size to bytes:
+        m = re.match("^(-?\d+)(b|k|m|g|t)?$", params['size'].lower())
+        bytes_per_unit = {"b": 1, "k": 1024, "m": 1024**2, "g": 1024**3, "t": 1024**4}
+        if m:
+            size = int(m.group(1)) * bytes_per_unit.get(m.group(2), 1)
+        else:
+            module.fail_json(size=params['size'], msg="failed to process size")
+
+    now = time.time()
+    msg = ''
+    looked = 0
+    for npath in params['paths']:
+        if os.path.isdir(npath):
+            for root,dirs,files in os.walk( npath, followlinks=params['follow'] ):
+
+                looked = looked + len(files) + len(dirs)
+                for fsobj in (files + dirs):
+                    fsname=os.path.normpath(os.path.join(root, fsobj))
+
+                    if os.path.basename(fsname).startswith('.') and not params['hidden']:
+                       continue
+
+                    st = os.stat(fsname)
+                    r = {'path': fsname}
+                    if stat.S_ISDIR(st.st_mode) and params['file_type'] == 'directory':
+                        if pfilter(fsobj, params['patterns']) and agefilter(st, now, age, params['age_stamp']):
+
+                            r.update(statinfo(st))
+                            filelist.append(r)
+
+                    elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file':
+                        if pfilter(fsobj, params['patterns']) and \
+                           agefilter(st, now, age, params['age_stamp']) and \
+                           sizefilter(st, size):
+
+                            r.update(statinfo(st))
+                            if params['get_checksum']:
+                                r['checksum'] = module.sha1(fsname)
+                            filelist.append(r)
+
+                if not params['recurse']:
+                    break
+        else:
+            msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n"
+
+    matched = len(filelist)
+    module.exit_json(files=filelist, changed=False, msg=msg, matched=matched, examined=looked)
+
+# import module snippets
+from ansible.module_utils.basic import *
+main()
+