From f18f480a3c3f6c2cce67d7a7e11e425c3794bc50 Mon Sep 17 00:00:00 2001
From: Matt Clay <mclay@redhat.com>
Date: Wed, 11 Mar 2020 18:23:35 -0700
Subject: [PATCH] Add hacking/shippable/incidental.py script. (#68182)

* Add hacking/shippable/incidental.py script.

* Add README.md.
---
 hacking/shippable/README.md     |  97 ++++++++
 hacking/shippable/incidental.py | 403 ++++++++++++++++++++++++++++++++
 2 files changed, 500 insertions(+)
 create mode 100644 hacking/shippable/README.md
 create mode 100755 hacking/shippable/incidental.py

diff --git a/hacking/shippable/README.md b/hacking/shippable/README.md
new file mode 100644
index 00000000000..1f48d65d809
--- /dev/null
+++ b/hacking/shippable/README.md
@@ -0,0 +1,97 @@
+# Shippable Scripts
+
+## Scripts
+
+This directory contains the following scripts:
+
+- download.py - Download results from Shippable.
+- incidental.py - Report on incidental code coverage using data from Shippable.
+- run.py - Start new runs on Shippable.
+
+## Incidental Code Coverage
+
+### Background
+
+Incidental testing and code coverage occurs when a test covers one or more portions of code as an unintentional side-effect of testing another portion of code.
+
+For example, the ``yum`` integration test intentionally tests the ``yum`` Ansible module.
+However, in doing so it also uses, and unintentionally tests the ``file`` module as well.
+
+As part of the process of migrating modules and plugins into collections, integration tests were identified that provided exclusive incidental code coverage.
+That is, tests which would be migrated out of the repository which covered code which would not be covered by any remaining tests.
+
+These integration test targets were preserved as incidental tests with the ``incidental_`` prefix prior to migration.
+The plugins necessary to support these tests were also preserved in the ``test/support/`` directory.
+
+The long-term goal for these incidental tests is to replace them with tests that intentionally cover the relevant code.
+As additional intentional tests are added, the exclusive coverage provided by incidental tests will decline, permitting them to be removed without loss of test coverage.
+
+### Reducing Incidental Coverage
+
+Reducing incidental test coverage, and eventually removing incidental tests involves the following process:
+
+1. Run the entire test suite with code coverage enabled. 
+   This is done automatically each day on Shippable.
+2. Download code coverage data from Shippable for local analysis. 
+   Example:
+   ```shell
+   # download results to ansible/ansible directory under cwd
+   # substitute the correct run number for the Shippable coverage run you want to download
+   hacking/shippable/download.py https://app.shippable.com/github/ansible/ansible/runs/162160 --all -v
+   ```
+3. Analyze code coverage data to see which portions of the code are covered by each test.
+   Example:
+   ```shell script
+   # make sure ansible-test is in $PATH
+   source hacking/env-setup
+   # run the script using whichever directory results were downloaded into
+   hacking/shippable/incidental.py ansible/ansible/162160
+   ```
+4. Create new intentional tests, or extend existing ones, to cover code that is currently covered by incidental tests.
+   Reports are created by default in a ``test/results/.tmp/incidental/{hash}/reports/`` directory.
+   The ``{hash}`` value is based on the input files used to generate the report.
+
+Over time, as the above process is repeated, exclusive incidental code coverage will decline.
+When incidental tests no longer provide exclusive coverage they can be removed.
+
+> CAUTION: Only one incidental test should be removed at a time, as doing so may cause another test to gain exclusive incidental coverage.
+
+### Reading Incidental Coverage Reports
+
+Each line of code covered will be included in a report.
+The left column contains the line number where the source occurs.
+If the coverage is for Python code a comment on the right side will indicate the coverage arcs involved.
+
+Below is an example of a report:
+
+```
+Target: incidental_win_psexec
+GitHub: https://github.com/ansible/ansible/blob/6994ef0b554a816f02e0771cb14341a421f7cead/test/integration/targets/incidental_win_psexec
+
+Source: lib/ansible/executor/task_executor.py (2 arcs, 3/1141 lines):
+GitHub: https://github.com/ansible/ansible/blob/6994ef0b554a816f02e0771cb14341a421f7cead/lib/ansible/executor/task_executor.py
+
+ 705                  if 'rc' in result and result['rc'] not in [0, "0"]:  ### (here) -> 706
+ 706                      result['failed'] = True  ### 705 -> (here)  ### (here) -> 711
+
+ 711              if self._task.until:  ### 706 -> (here)
+```
+
+The report indicates the test target responsible for the coverage and provides a link to the source on GitHub using the appropriate commit to match the code coverage.
+
+Each file covered in the report indicates the lines affected, and in the case of Python code, arcs.
+A link to the source file on GitHub using the appropriate commit is also included.
+
+The left column includes the line number for the source code found to the right.
+In the case of Python files, the rightmost comment indicates the coverage arcs involved.
+
+``### (here) -> 706`` for source line 705 indicates that execution flowed from line 705 to line 706.
+Multiple outbound line numbers can be present.
+
+``### 706 -> (here)`` for source line 711 indicates that execution flowed from line 706 to line 711.
+Multiple inbound line numbers can be present.
+
+In both cases ``(here)`` is simply a reference to the current source line.
+
+Arcs are only available for Python code.
+PowerShell code only reports covered line numbers.
diff --git a/hacking/shippable/incidental.py b/hacking/shippable/incidental.py
new file mode 100755
index 00000000000..5266de503fe
--- /dev/null
+++ b/hacking/shippable/incidental.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python
+# PYTHON_ARGCOMPLETE_OK
+
+# (c) 2020 Red Hat, Inc.
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible.  If not, see <http://www.gnu.org/licenses/>.
+"""CLI tool for reporting on incidental test coverage."""
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+# noinspection PyCompatibility
+import argparse
+import glob
+import json
+import os
+import re
+import subprocess
+import sys
+import hashlib
+
+try:
+    # noinspection PyPackageRequirements
+    import argcomplete
+except ImportError:
+    argcomplete = None
+
+
+def main():
+    """Main program body."""
+    args = parse_args()
+
+    try:
+        incidental_report(args)
+    except ApplicationError as ex:
+        sys.exit(ex)
+
+
+def parse_args():
+    """Parse and return args."""
+    source = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+
+    parser = argparse.ArgumentParser(description='Report on incidental test coverage downloaded from Shippable.')
+
+    parser.add_argument('result',
+                        type=directory,
+                        help='path to directory containing test results downloaded from Shippable')
+
+    parser.add_argument('--output',
+                        type=optional_directory,
+                        default=os.path.join(source, 'test', 'results', '.tmp', 'incidental'),
+                        help='path to directory where reports should be written')
+
+    parser.add_argument('--source',
+                        type=optional_directory,
+                        default=source,
+                        help='path to git repository containing Ansible source')
+
+    parser.add_argument('--targets',
+                        type=regex,
+                        default='^incidental_',
+                        help='regex for targets to analyze, default: %(default)s')
+
+    parser.add_argument('--skip-checks',
+                        action='store_true',
+                        help='skip integrity checks, use only for debugging')
+
+    parser.add_argument('--ignore-cache',
+                        dest='use_cache',
+                        action='store_false',
+                        help='ignore cached files')
+
+    if argcomplete:
+        argcomplete.autocomplete(parser)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def optional_directory(value):
+    if not os.path.exists(value):
+        return value
+
+    return directory(value)
+
+
+def directory(value):
+    if not os.path.isdir(value):
+        raise argparse.ArgumentTypeError('"%s" is not a directory' % value)
+
+    return value
+
+
+def regex(value):
+    try:
+        return re.compile(value)
+    except Exception as ex:
+        raise argparse.ArgumentTypeError('"%s" is not a valid regex: %s' % (value, ex))
+
+
+def incidental_report(args):
+    """Generate incidental coverage report."""
+    ct = CoverageTool()
+    git = Git(os.path.abspath(args.source))
+    coverage_data = CoverageData(os.path.abspath(args.result))
+
+    try:
+        git.show([coverage_data.result_sha, '--'])
+    except subprocess.CalledProcessError:
+        raise ApplicationError('%s: commit not found: %s\n'
+                               'make sure your source repository is up-to-date' % (git.path, coverage_data.result_sha))
+
+    if coverage_data.status_code != 30:
+        check_failed(args, 'results from Shippable indicate tests did not pass (status code: %d)\n'
+                           're-run until passing, then download the latest results and re-run the report using those results' % coverage_data.status_code)
+
+    if coverage_data.missing_jobs or coverage_data.extra_jobs:
+        check_failed(args, 'unexpected results from Shippable -- missing jobs: %s, extra jobs: %s\n'
+                           'make sure the tests were successfull and the all results were downloaded\n' % (
+                               sorted(coverage_data.missing_jobs), sorted(coverage_data.extra_jobs)))
+
+    if not coverage_data.paths:
+        raise ApplicationError('no coverage data found\n'
+                               'make sure the downloaded results are from a code coverage run on Shippable')
+
+    # generate a unique subdirectory in the output directory based on the input files being used
+    path_hash = hashlib.sha256(b'\n'.join(p.encode() for p in coverage_data.paths)).hexdigest()
+    output_path = os.path.abspath(os.path.join(args.output, path_hash))
+
+    data_path = os.path.join(output_path, 'data')
+    reports_path = os.path.join(output_path, 'reports')
+
+    for path in [data_path, reports_path]:
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+    # combine coverage results into a single file
+    combined_path = os.path.join(output_path, 'combined.json')
+    cached(combined_path, args.use_cache,
+           lambda: ct.combine(coverage_data.paths, combined_path))
+
+    with open(combined_path) as combined_file:
+        combined = json.load(combined_file)
+
+    # identify integration test targets to analyze
+    target_names = sorted(combined['targets'])
+    incidental_target_names = [target for target in target_names if re.search(args.targets, target)]
+
+    if not incidental_target_names:
+        raise ApplicationError('no targets to analyze')
+
+    # exclude test support plugins from analysis
+    # also exclude six, which for an unknown reason reports bogus coverage lines (indicating coverage of comments)
+    exclude_path = '^(test/support/|lib/ansible/module_utils/six/)'
+
+    # process coverage for each target and then generate a report
+    # save sources for generating a summary report at the end
+    summary = {}
+
+    for target_name in incidental_target_names:
+        only_target_path = os.path.join(data_path, 'only-%s.json' % target_name)
+        cached(only_target_path, args.use_cache,
+               lambda: ct.filter(combined_path, only_target_path, include_targets=[target_name], exclude_path=exclude_path))
+
+        without_target_path = os.path.join(data_path, 'without-%s.json' % target_name)
+        cached(without_target_path, args.use_cache,
+               lambda: ct.filter(combined_path, without_target_path, exclude_targets=[target_name], exclude_path=exclude_path))
+
+        exclusive_target_path = os.path.join(data_path, 'exclusive-%s.json' % target_name)
+        cached(exclusive_target_path, args.use_cache,
+               lambda: ct.missing(only_target_path, without_target_path, exclusive_target_path, only_gaps=True))
+
+        exclusive_expanded_target_path = os.path.join(data_path, 'exclusive-expanded-%s.json' % target_name)
+        cached(exclusive_expanded_target_path, args.use_cache,
+               lambda: ct.expand(exclusive_target_path, exclusive_expanded_target_path))
+
+        summary[target_name] = sources = collect_sources(exclusive_expanded_target_path, git, coverage_data)
+
+        txt_report_path = os.path.join(reports_path, '%s.txt' % target_name)
+        cached(txt_report_path, args.use_cache,
+               lambda: generate_report(sources, txt_report_path, coverage_data, target_name))
+
+    # provide a summary report of results
+    for target_name in incidental_target_names:
+        sources = summary[target_name]
+
+        print('%s: %d arcs, %d lines, %d files' % (
+            target_name,
+            sum(len(s.covered_arcs) for s in sources),
+            sum(len(s.covered_lines) for s in sources),
+            len(sources),
+        ))
+
+    sys.stderr.write('NOTE: This report shows only coverage exclusive to the reported targets. '
+                     'As targets are removed, exclusive coverage on the remaining targets will increase.\n')
+
+
+class CoverageData:
+    def __init__(self, result_path):
+        with open(os.path.join(result_path, 'run.json')) as run_file:
+            run = json.load(run_file)
+
+        self.org_name = run['subscriptionOrgName']
+        self.project_name = run['projectName']
+        self.result_sha = run['commitSha']
+        self.status_code = run['statusCode']
+
+        self.github_base_url = 'https://github.com/%s/%s/blob/%s/' % (self.org_name, self.project_name, self.result_sha)
+
+        # locate available results
+        self.paths = sorted(glob.glob(os.path.join(result_path, '*', 'test', 'testresults', 'coverage-analyze-targets.json')))
+
+        # make sure the test matrix is complete
+        matrix_include = run['cleanRunYml']['matrix']['include']
+        matrix_jobs = list((idx, dict(tuple(item.split('=', 1)) for item in value['env'])) for idx, value in enumerate(matrix_include, start=1))
+        sanity_job_numbers = set(idx for idx, env in matrix_jobs if env['T'].startswith('sanity/'))
+        units_job_numbers = set(idx for idx, env in matrix_jobs if env['T'].startswith('units/'))
+        expected_job_numbers = set(idx for idx, env in matrix_jobs)
+        actual_job_numbers = set(int(os.path.relpath(path, result_path).split(os.path.sep)[0]) for path in self.paths)
+
+        self.missing_jobs = expected_job_numbers - actual_job_numbers - sanity_job_numbers - units_job_numbers
+        self.extra_jobs = actual_job_numbers - expected_job_numbers - sanity_job_numbers - units_job_numbers
+
+
+class Git:
+    def __init__(self, path):
+        self.git = 'git'
+        self.path = path
+
+        try:
+            self.show()
+        except subprocess.CalledProcessError:
+            raise ApplicationError('%s: not a git repository' % path)
+
+    def show(self, args=None):
+        return self.run(['show'] + (args or []))
+
+    def run(self, command):
+        return subprocess.check_output([self.git] + command, cwd=self.path)
+
+
+class CoverageTool:
+    def __init__(self):
+        self.analyze_cmd = ['ansible-test', 'coverage', 'analyze', 'targets']
+
+    def combine(self, input_paths, output_path):
+        subprocess.check_call(self.analyze_cmd + ['combine'] + input_paths + [output_path])
+
+    def filter(self, input_path, output_path, include_targets=None, exclude_targets=None, exclude_path=None):
+        args = []
+
+        if include_targets:
+            for target in include_targets:
+                args.extend(['--include-target', target])
+
+        if exclude_targets:
+            for target in exclude_targets:
+                args.extend(['--exclude-target', target])
+
+        if exclude_path:
+            args.extend(['--exclude-path', exclude_path])
+
+        subprocess.check_call(self.analyze_cmd + ['filter', input_path, output_path] + args)
+
+    def missing(self, from_path, to_path, output_path, only_gaps=False):
+        args = []
+
+        if only_gaps:
+            args.append('--only-gaps')
+
+        subprocess.check_call(self.analyze_cmd + ['missing', from_path, to_path, output_path] + args)
+
+    def expand(self, input_path, output_path):
+        subprocess.check_call(self.analyze_cmd + ['expand', input_path, output_path])
+
+
+class SourceFile:
+    def __init__(self, path, source, coverage_data, coverage_points):
+        self.path = path
+        self.lines = source.decode().splitlines()
+        self.coverage_data = coverage_data
+        self.coverage_points = coverage_points
+        self.github_url = coverage_data.github_base_url + path
+
+        is_arcs = ':' in dict(coverage_points).popitem()[0]
+
+        if is_arcs:
+            parse = parse_arc
+        else:
+            parse = int
+
+        self.covered_points = set(parse(v) for v in coverage_points)
+        self.covered_arcs = self.covered_points if is_arcs else None
+        self.covered_lines = set(abs(p[0]) for p in self.covered_points) | set(abs(p[1]) for p in self.covered_points)
+
+
+def collect_sources(data_path, git, coverage_data):
+    with open(data_path) as data_file:
+        data = json.load(data_file)
+
+    sources = []
+
+    for path_coverage in data.values():
+        for path, path_data in path_coverage.items():
+            sources.append(SourceFile(path, git.show(['%s:%s' % (coverage_data.result_sha, path)]), coverage_data, path_data))
+
+    return sources
+
+
+def generate_report(sources, report_path, coverage_data, target_name):
+    output = [
+        'Target: %s' % target_name,
+        'GitHub: %stest/integration/targets/%s' % (coverage_data.github_base_url, target_name),
+    ]
+
+    for source in sources:
+        if source.covered_arcs:
+            output.extend([
+                '',
+                'Source: %s (%d arcs, %d/%d lines):' % (source.path, len(source.covered_arcs), len(source.covered_lines), len(source.lines)),
+                'GitHub: %s' % source.github_url,
+                '',
+            ])
+        else:
+            output.extend([
+                '',
+                'Source: %s (%d/%d lines):' % (source.path, len(source.covered_lines), len(source.lines)),
+                'GitHub: %s' % source.github_url,
+                '',
+            ])
+
+        last_line_no = 0
+
+        for line_no, line in enumerate(source.lines, start=1):
+            if line_no not in source.covered_lines:
+                continue
+
+            if last_line_no and last_line_no != line_no - 1:
+                output.append('')
+
+            notes = ''
+
+            if source.covered_arcs:
+                from_lines = sorted(p[0] for p in source.covered_points if abs(p[1]) == line_no)
+                to_lines = sorted(p[1] for p in source.covered_points if abs(p[0]) == line_no)
+
+                if from_lines:
+                    notes += '  ### %s -> (here)' % ', '.join(str(from_line) for from_line in from_lines)
+
+                if to_lines:
+                    notes += '  ### (here) -> %s' % ', '.join(str(to_line) for to_line in to_lines)
+
+            output.append('%4d  %s%s' % (line_no, line, notes))
+            last_line_no = line_no
+
+    with open(report_path, 'w') as report_file:
+        report_file.write('\n'.join(output) + '\n')
+
+
+def parse_arc(value):
+    return tuple(int(v) for v in value.split(':'))
+
+
+def cached(path, use_cache, func):
+    if os.path.exists(path) and use_cache:
+        sys.stderr.write('%s: cached\n' % path)
+        sys.stderr.flush()
+        return
+
+    sys.stderr.write('%s: generating ... ' % path)
+    sys.stderr.flush()
+    func()
+    sys.stderr.write('done\n')
+    sys.stderr.flush()
+
+
+def check_failed(args, message):
+    if args.skip_checks:
+        sys.stderr.write('WARNING: %s\n' % message)
+        return
+
+    raise ApplicationError(message)
+
+
+class ApplicationError(Exception):
+    pass
+
+
+if __name__ == '__main__':
+    main()