Merge pull request #2262 from bcoca/find_regex

added regex support to find
2015-10-23 17:39:49 -04:00 · 2015-10-23 17:39:49 -04:00 · 486d233cbe
commit 486d233cbe
parent 211196bdf6 074aad23e7
1 changed files with 43 additions and 19 deletions
--- a/files/find.py
+++ b/files/find.py
@ -25,8 +25,6 @@ import stat
 import fnmatch
 import time
 import re
 import shutil
 DOCUMENTATION = '''
 ---
@ -50,17 +48,18 @@ options:
        required: false
        default: '*'
        description:
-            - One or more (shell type) file glob patterns, which restrict the list of files to be returned to
+            - One or more (shell or regex) patterns, which type is controled by C(use_regex) option.
-              those whose basenames match at least one of the patterns specified.  Multiple patterns can be
+            - The patterns restrict the list of files to be returned to those whose basenames match at
-              specified using a list.
+              least one of the patterns specified. Multiple patterns can be specified using a list.
        aliases: ['pattern']
    contains:
        required: false
        default: null
        description:
-            - One or more re patterns which should be matched against the file content 
+            - One or more re patterns which should be matched against the file content
    paths:
        required: true
-        aliases: [ "name" ]
+        aliases: [ "name", "path" ]
        description:
            - List of paths to the file or directory to search. All paths must be fully qualified.
    file_type:
@ -108,6 +107,12 @@ options:
        choices: [ True, False ]
        description:
            - Set this to true to retrieve a file's sha1 checksum
    use_regex:
        required: false
        default: "False"
        choices: [ True, False ]
        description:
            - If false the patterns are file globs (shell) if true they are python regexes
 '''
@ -121,8 +126,11 @@ EXAMPLES = '''
 # Recursively find /var/tmp files with last access time greater than 3600 seconds
 - find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes
-# find /var/log files equal or greater than 10 megabytes ending with .log or .log.gz
+# find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz
- find: paths="/var/tmp" patterns="*.log","*.log.gz" size="10m"
+- find: paths="/var/tmp" patterns="'*.old','*.log.gz'" size="10m"
 # find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex
 - find: paths="/var/tmp" patterns="^.*?\.(?:old|log\.gz)$" size="10m" use_regex=True
 '''
 RETURN = '''
@ -152,13 +160,23 @@ examined:
    sample: 34
 '''
-def pfilter(f, patterns=None):
+def pfilter(f, patterns=None, use_regex=False):
    '''filter using glob patterns'''
    if patterns is None:
        return True
-    for p in patterns:
+
-        if fnmatch.fnmatch(f, p):
+    if use_regex:
-             return True
+        for p in patterns:
            r =  re.compile(p)
            if r.match(f):
                return True
    else:
        for p in patterns:
            if fnmatch.fnmatch(f, p):
                return True
    return False
@ -236,8 +254,8 @@ def statinfo(st):
 def main():
    module = AnsibleModule(
        argument_spec = dict(
-            paths         = dict(required=True, aliases=['name'], type='list'),
+            paths         = dict(required=True, aliases=['name','path'], type='list'),
-            patterns      = dict(default=['*'], type='list'),
+            patterns      = dict(default=['*'], type='list', aliases=['pattern']),
            contains      = dict(default=None, type='str'),
            file_type     = dict(default="file", choices=['file', 'directory'], type='str'),
            age           = dict(default=None, type='str'),
@ -247,6 +265,7 @@ def main():
            hidden        = dict(default="False", type='bool'),
            follow        = dict(default="False", type='bool'),
            get_checksum  = dict(default="False", type='bool'),
            use_regex     = dict(default="False", type='bool'),
        ),
    )
@ -292,16 +311,21 @@ def main():
                    if os.path.basename(fsname).startswith('.') and not params['hidden']:
                       continue
-                    st = os.stat(fsname)
+                    try:
                        st = os.stat(fsname)
                    except:
                        msg+="%s was skipped as it does not seem to be a valid file or it cannot be accessed\n" % fsname
                        continue
                    r = {'path': fsname}
                    if stat.S_ISDIR(st.st_mode) and params['file_type'] == 'directory':
-                        if pfilter(fsobj, params['patterns']) and agefilter(st, now, age, params['age_stamp']):
+                        if pfilter(fsobj, params['patterns'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']):
                            r.update(statinfo(st))
                            filelist.append(r)
                    elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file':
-                        if pfilter(fsobj, params['patterns']) and \
+                        if pfilter(fsobj, params['patterns'], params['use_regex']) and \
                           agefilter(st, now, age, params['age_stamp']) and \
                           sizefilter(st, size) and \
                           contentfilter(fsname, params['contains']):
@ -314,7 +338,7 @@ def main():
                if not params['recurse']:
                    break
        else:
-            msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n"
+            msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n" % npath
    matched = len(filelist)
    module.exit_json(files=filelist, changed=False, msg=msg, matched=matched, examined=looked)