find: Allow reading whole file for contains regex (#71083)

Change:
- Add a parameter `read_whole_file` which allows for reading the whole
  file when doing a `contains` regex search.
- This allows for (for example) matching a pattern at the very end of
  a file.

Test Plan:
- New integration tests

Tickets:
- Fixes #63378

Signed-off-by: Rick Elrod <rick@elrod.me>
This commit is contained in:
Rick Elrod 2020-08-04 12:49:45 -05:00 committed by GitHub
parent 5ca3aec3c4
commit 810a9a5593
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 103 additions and 2 deletions

View file

@ -0,0 +1,2 @@
minor_changes:
- find module - Now has a ``read_whole_file`` boolean parameter which allows for reading the whole file and doing an ``re.search()`` regex evaluation on it when searching using the ``contains`` option. This allows (for example) for ensuring the very end of the file matches a pattern.

View file

@ -57,6 +57,15 @@ options:
description:
- A regular expression or pattern which should be matched against the file content.
type: str
read_whole_file:
description:
- When doing a C(contains) search, determines whether the whole file should be read into
memory or if the regex should be applied to the file line-by-line.
- Setting this to C(true) can have performance and memory implications for large files.
- This uses C(re.search()) instead of C(re.match()).
type: bool
default: false
version_added: "2.11"
paths:
description:
- List of paths of directories to search. All paths must be fully qualified.
@ -283,11 +292,12 @@ def sizefilter(st, size):
return False
def contentfilter(fsname, pattern):
def contentfilter(fsname, pattern, read_whole_file=False):
"""
Filter files which contain the given expression
:arg fsname: Filename to scan for lines matching a pattern
:arg pattern: Pattern to look for inside of line
:arg read_whole_file: If true, the whole file is read into memory before the regex is applied against it. Otherwise, the regex is applied line-by-line.
:rtype: bool
:returns: True if one of the lines in fsname matches the pattern. Otherwise False
"""
@ -298,6 +308,9 @@ def contentfilter(fsname, pattern):
try:
with open(fsname) as f:
if read_whole_file:
return bool(prog.search(f.read()))
for line in f:
if prog.match(line):
return True
@ -363,6 +376,7 @@ def main():
patterns=dict(type='list', default=['*'], aliases=['pattern'], elements='str'),
excludes=dict(type='list', aliases=['exclude'], elements='str'),
contains=dict(type='str'),
read_whole_file=dict(type='bool', default=False),
file_type=dict(type='str', default="file", choices=['any', 'directory', 'file', 'link']),
age=dict(type='str'),
age_stamp=dict(type='str', default="mtime", choices=['atime', 'ctime', 'mtime']),
@ -445,7 +459,7 @@ def main():
elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file':
if pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and \
agefilter(st, now, age, params['age_stamp']) and \
sizefilter(st, size) and contentfilter(fsname, params['contains']):
sizefilter(st, size) and contentfilter(fsname, params['contains'], params['read_whole_file']):
r.update(statinfo(st))
if params['get_checksum']:

View file

@ -0,0 +1,2 @@
this is a file that has
a few lines in it

View file

@ -0,0 +1,4 @@
01/01- OK
01/02- OK
01/03- KO
01/04- OK

View file

@ -114,3 +114,82 @@
- 'find_test3.matched == 1'
- 'find_test3.files[0].pw_name is defined'
- 'find_test3.files[0].gr_name is defined'
- name: Copy some files into the test dir
copy:
src: "{{ item }}"
dest: "{{ output_dir_test }}/{{ item }}"
mode: 0644
with_items:
- a.txt
- log.txt
- name: Ensure '$' only matches the true end of the file with read_whole_file, not a line
find:
paths: "{{ output_dir_test }}"
patterns: "*.txt"
contains: "KO$"
read_whole_file: true
register: whole_no_match
- debug: var=whole_no_match
- assert:
that:
- whole_no_match.matched == 0
- name: Match the end of the file successfully
find:
paths: "{{ output_dir_test }}"
patterns: "*.txt"
contains: "OK$"
read_whole_file: true
register: whole_match
- debug: var=whole_match
- assert:
that:
- whole_match.matched == 1
- name: When read_whole_file=False, $ should match an individual line
find:
paths: "{{ output_dir_test }}"
patterns: "*.txt"
contains: ".*KO$"
read_whole_file: false
register: match_end_of_line
- debug: var=match_end_of_line
- assert:
that:
- match_end_of_line.matched == 1
- name: When read_whole_file=True, match across line boundaries
find:
paths: "{{ output_dir_test }}"
patterns: "*.txt"
contains: "has\na few"
read_whole_file: true
register: match_line_boundaries
- debug: var=match_line_boundaries
- assert:
that:
- match_line_boundaries.matched == 1
- name: When read_whole_file=False, do not match across line boundaries
find:
paths: "{{ output_dir_test }}"
patterns: "*.txt"
contains: "has\na few"
read_whole_file: false
register: no_match_line_boundaries
- debug: var=no_match_line_boundaries
- assert:
that:
- no_match_line_boundaries.matched == 0