Merge pull request #2262 from bcoca/find_regex

added regex support to find
This commit is contained in:
Brian Coca 2015-10-23 17:39:49 -04:00
commit 486d233cbe

View file

@ -25,8 +25,6 @@ import stat
import fnmatch import fnmatch
import time import time
import re import re
import shutil
DOCUMENTATION = ''' DOCUMENTATION = '''
--- ---
@ -50,17 +48,18 @@ options:
required: false required: false
default: '*' default: '*'
description: description:
- One or more (shell type) file glob patterns, which restrict the list of files to be returned to - One or more (shell or regex) patterns, which type is controled by C(use_regex) option.
those whose basenames match at least one of the patterns specified. Multiple patterns can be - The patterns restrict the list of files to be returned to those whose basenames match at
specified using a list. least one of the patterns specified. Multiple patterns can be specified using a list.
aliases: ['pattern']
contains: contains:
required: false required: false
default: null default: null
description: description:
- One or more re patterns which should be matched against the file content - One or more re patterns which should be matched against the file content
paths: paths:
required: true required: true
aliases: [ "name" ] aliases: [ "name", "path" ]
description: description:
- List of paths to the file or directory to search. All paths must be fully qualified. - List of paths to the file or directory to search. All paths must be fully qualified.
file_type: file_type:
@ -108,6 +107,12 @@ options:
choices: [ True, False ] choices: [ True, False ]
description: description:
- Set this to true to retrieve a file's sha1 checksum - Set this to true to retrieve a file's sha1 checksum
use_regex:
required: false
default: "False"
choices: [ True, False ]
description:
- If false the patterns are file globs (shell) if true they are python regexes
''' '''
@ -121,8 +126,11 @@ EXAMPLES = '''
# Recursively find /var/tmp files with last access time greater than 3600 seconds # Recursively find /var/tmp files with last access time greater than 3600 seconds
- find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes - find: paths="/var/tmp" age="3600" age_stamp=atime recurse=yes
# find /var/log files equal or greater than 10 megabytes ending with .log or .log.gz # find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz
- find: paths="/var/tmp" patterns="*.log","*.log.gz" size="10m" - find: paths="/var/tmp" patterns="'*.old','*.log.gz'" size="10m"
# find /var/log files equal or greater than 10 megabytes ending with .old or .log.gz via regex
- find: paths="/var/tmp" patterns="^.*?\.(?:old|log\.gz)$" size="10m" use_regex=True
''' '''
RETURN = ''' RETURN = '''
@ -152,13 +160,23 @@ examined:
sample: 34 sample: 34
''' '''
def pfilter(f, patterns=None): def pfilter(f, patterns=None, use_regex=False):
'''filter using glob patterns''' '''filter using glob patterns'''
if patterns is None: if patterns is None:
return True return True
for p in patterns:
if fnmatch.fnmatch(f, p): if use_regex:
return True for p in patterns:
r = re.compile(p)
if r.match(f):
return True
else:
for p in patterns:
if fnmatch.fnmatch(f, p):
return True
return False return False
@ -236,8 +254,8 @@ def statinfo(st):
def main(): def main():
module = AnsibleModule( module = AnsibleModule(
argument_spec = dict( argument_spec = dict(
paths = dict(required=True, aliases=['name'], type='list'), paths = dict(required=True, aliases=['name','path'], type='list'),
patterns = dict(default=['*'], type='list'), patterns = dict(default=['*'], type='list', aliases=['pattern']),
contains = dict(default=None, type='str'), contains = dict(default=None, type='str'),
file_type = dict(default="file", choices=['file', 'directory'], type='str'), file_type = dict(default="file", choices=['file', 'directory'], type='str'),
age = dict(default=None, type='str'), age = dict(default=None, type='str'),
@ -247,6 +265,7 @@ def main():
hidden = dict(default="False", type='bool'), hidden = dict(default="False", type='bool'),
follow = dict(default="False", type='bool'), follow = dict(default="False", type='bool'),
get_checksum = dict(default="False", type='bool'), get_checksum = dict(default="False", type='bool'),
use_regex = dict(default="False", type='bool'),
), ),
) )
@ -292,16 +311,21 @@ def main():
if os.path.basename(fsname).startswith('.') and not params['hidden']: if os.path.basename(fsname).startswith('.') and not params['hidden']:
continue continue
st = os.stat(fsname) try:
st = os.stat(fsname)
except:
msg+="%s was skipped as it does not seem to be a valid file or it cannot be accessed\n" % fsname
continue
r = {'path': fsname} r = {'path': fsname}
if stat.S_ISDIR(st.st_mode) and params['file_type'] == 'directory': if stat.S_ISDIR(st.st_mode) and params['file_type'] == 'directory':
if pfilter(fsobj, params['patterns']) and agefilter(st, now, age, params['age_stamp']): if pfilter(fsobj, params['patterns'], params['use_regex']) and agefilter(st, now, age, params['age_stamp']):
r.update(statinfo(st)) r.update(statinfo(st))
filelist.append(r) filelist.append(r)
elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file': elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file':
if pfilter(fsobj, params['patterns']) and \ if pfilter(fsobj, params['patterns'], params['use_regex']) and \
agefilter(st, now, age, params['age_stamp']) and \ agefilter(st, now, age, params['age_stamp']) and \
sizefilter(st, size) and \ sizefilter(st, size) and \
contentfilter(fsname, params['contains']): contentfilter(fsname, params['contains']):
@ -314,7 +338,7 @@ def main():
if not params['recurse']: if not params['recurse']:
break break
else: else:
msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n" msg+="%s was skipped as it does not seem to be a valid directory or it cannot be accessed\n" % npath
matched = len(filelist) matched = len(filelist)
module.exit_json(files=filelist, changed=False, msg=msg, matched=matched, examined=looked) module.exit_json(files=filelist, changed=False, msg=msg, matched=matched, examined=looked)