From 810a9a55930951e547529a44e46c6f1829355fbf Mon Sep 17 00:00:00 2001 From: Rick Elrod Date: Tue, 4 Aug 2020 12:49:45 -0500 Subject: [PATCH] find: Allow reading whole file for contains regex (#71083) Change: - Add a parameter `read_whole_file` which allows for reading the whole file when doing a `contains` regex search. - This allows for (for example) matching a pattern at the very end of a file. Test Plan: - New integration tests Tickets: - Fixes #63378 Signed-off-by: Rick Elrod --- .../63378_find_module_regex_whole_file.yml | 2 + lib/ansible/modules/find.py | 18 ++++- test/integration/targets/find/files/a.txt | 2 + test/integration/targets/find/files/log.txt | 4 + test/integration/targets/find/tasks/main.yml | 79 +++++++++++++++++++ 5 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 changelogs/fragments/63378_find_module_regex_whole_file.yml create mode 100644 test/integration/targets/find/files/a.txt create mode 100644 test/integration/targets/find/files/log.txt diff --git a/changelogs/fragments/63378_find_module_regex_whole_file.yml b/changelogs/fragments/63378_find_module_regex_whole_file.yml new file mode 100644 index 00000000000..28012ece6cb --- /dev/null +++ b/changelogs/fragments/63378_find_module_regex_whole_file.yml @@ -0,0 +1,2 @@ +minor_changes: + - find module - Now has a ``read_whole_file`` boolean parameter which allows for reading the whole file and doing an ``re.search()`` regex evaluation on it when searching using the ``contains`` option. This allows (for example) for ensuring the very end of the file matches a pattern. diff --git a/lib/ansible/modules/find.py b/lib/ansible/modules/find.py index cfefaaa9630..9e0da0c95e1 100644 --- a/lib/ansible/modules/find.py +++ b/lib/ansible/modules/find.py @@ -57,6 +57,15 @@ options: description: - A regular expression or pattern which should be matched against the file content. type: str + read_whole_file: + description: + - When doing a C(contains) search, determines whether the whole file should be read into + memory or if the regex should be applied to the file line-by-line. + - Setting this to C(true) can have performance and memory implications for large files. + - This uses C(re.search()) instead of C(re.match()). + type: bool + default: false + version_added: "2.11" paths: description: - List of paths of directories to search. All paths must be fully qualified. @@ -283,11 +292,12 @@ def sizefilter(st, size): return False -def contentfilter(fsname, pattern): +def contentfilter(fsname, pattern, read_whole_file=False): """ Filter files which contain the given expression :arg fsname: Filename to scan for lines matching a pattern :arg pattern: Pattern to look for inside of line + :arg read_whole_file: If true, the whole file is read into memory before the regex is applied against it. Otherwise, the regex is applied line-by-line. :rtype: bool :returns: True if one of the lines in fsname matches the pattern. Otherwise False """ @@ -298,6 +308,9 @@ def contentfilter(fsname, pattern): try: with open(fsname) as f: + if read_whole_file: + return bool(prog.search(f.read())) + for line in f: if prog.match(line): return True @@ -363,6 +376,7 @@ def main(): patterns=dict(type='list', default=['*'], aliases=['pattern'], elements='str'), excludes=dict(type='list', aliases=['exclude'], elements='str'), contains=dict(type='str'), + read_whole_file=dict(type='bool', default=False), file_type=dict(type='str', default="file", choices=['any', 'directory', 'file', 'link']), age=dict(type='str'), age_stamp=dict(type='str', default="mtime", choices=['atime', 'ctime', 'mtime']), @@ -445,7 +459,7 @@ def main(): elif stat.S_ISREG(st.st_mode) and params['file_type'] == 'file': if pfilter(fsobj, params['patterns'], params['excludes'], params['use_regex']) and \ agefilter(st, now, age, params['age_stamp']) and \ - sizefilter(st, size) and contentfilter(fsname, params['contains']): + sizefilter(st, size) and contentfilter(fsname, params['contains'], params['read_whole_file']): r.update(statinfo(st)) if params['get_checksum']: diff --git a/test/integration/targets/find/files/a.txt b/test/integration/targets/find/files/a.txt new file mode 100644 index 00000000000..30b622a3b93 --- /dev/null +++ b/test/integration/targets/find/files/a.txt @@ -0,0 +1,2 @@ +this is a file that has +a few lines in it diff --git a/test/integration/targets/find/files/log.txt b/test/integration/targets/find/files/log.txt new file mode 100644 index 00000000000..679893bcbf6 --- /dev/null +++ b/test/integration/targets/find/files/log.txt @@ -0,0 +1,4 @@ +01/01- OK +01/02- OK +01/03- KO +01/04- OK diff --git a/test/integration/targets/find/tasks/main.yml b/test/integration/targets/find/tasks/main.yml index 456f4bc680d..cc718e92db2 100644 --- a/test/integration/targets/find/tasks/main.yml +++ b/test/integration/targets/find/tasks/main.yml @@ -114,3 +114,82 @@ - 'find_test3.matched == 1' - 'find_test3.files[0].pw_name is defined' - 'find_test3.files[0].gr_name is defined' + +- name: Copy some files into the test dir + copy: + src: "{{ item }}" + dest: "{{ output_dir_test }}/{{ item }}" + mode: 0644 + with_items: + - a.txt + - log.txt + +- name: Ensure '$' only matches the true end of the file with read_whole_file, not a line + find: + paths: "{{ output_dir_test }}" + patterns: "*.txt" + contains: "KO$" + read_whole_file: true + register: whole_no_match + +- debug: var=whole_no_match + +- assert: + that: + - whole_no_match.matched == 0 + +- name: Match the end of the file successfully + find: + paths: "{{ output_dir_test }}" + patterns: "*.txt" + contains: "OK$" + read_whole_file: true + register: whole_match + +- debug: var=whole_match + +- assert: + that: + - whole_match.matched == 1 + +- name: When read_whole_file=False, $ should match an individual line + find: + paths: "{{ output_dir_test }}" + patterns: "*.txt" + contains: ".*KO$" + read_whole_file: false + register: match_end_of_line + +- debug: var=match_end_of_line + +- assert: + that: + - match_end_of_line.matched == 1 + +- name: When read_whole_file=True, match across line boundaries + find: + paths: "{{ output_dir_test }}" + patterns: "*.txt" + contains: "has\na few" + read_whole_file: true + register: match_line_boundaries + +- debug: var=match_line_boundaries + +- assert: + that: + - match_line_boundaries.matched == 1 + +- name: When read_whole_file=False, do not match across line boundaries + find: + paths: "{{ output_dir_test }}" + patterns: "*.txt" + contains: "has\na few" + read_whole_file: false + register: no_match_line_boundaries + +- debug: var=no_match_line_boundaries + +- assert: + that: + - no_match_line_boundaries.matched == 0