ansible/hacking/shippable/rebalance.py

#!/usr/bin/env python
# PYTHON_ARGCOMPLETE_OK

# (c) 2020 Ansible Project
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)

"""
CLI tool that analyses a Shippable run's test result and re-balances the test targets into new groups.

Before running this script you must run download.py like:

    ./download.py https://app.shippable.com/github/<team>/<repo>/runs/<run_num> --test-results --job-number x --job-number y

Or to get all job results from a run:

    ./download.py https://app.shippable.com/github/<team>/<repo>/runs/<run_num> --test-results --all


Set the dir <team>/<repo>/<run_num> as the value of '-p/--test-path' for this script.
"""

from __future__ import (absolute_import, division, print_function)
__metaclass__ = type

import argparse
import json
import operator
import os
import re

from glob import glob

try:
    import argcomplete
except ImportError:
    argcomplete = None


def main():
    """Main program body."""
    args = parse_args()
    rebalance(args)


def parse_args():
    """Parse and return args."""
    parser = argparse.ArgumentParser(description='Re-balance Shippable group(s) from a downloaded results directory.')

    parser.add_argument('group_count',
                        metavar='group_count',
                        help='The number of groups to re-balance the tests to.')

    parser.add_argument('-v', '--verbose',
                        dest='verbose',
                        action='store_true',
                        help='Display more detailed info about files being read and edited.')

    parser.add_argument('-p', '--test-results-path',
                        dest='test_results_path',
                        required=True,
                        help='The directory where the downloaded Shippable job test results are.')

    parser.add_argument('-t', '--target-path',
                        dest='target_path',
                        required=False,
                        help='The directory where the test targets are located. If set the aliases will automatically '
                             'by rewritten with the new proposed group.')

    if argcomplete:
        argcomplete.autocomplete(parser)

    args = parser.parse_args()

    return args


def get_raw_test_targets(args, test_path):
    """Scans the test directory for all the test targets that was run and get's the max runtime for each target."""
    target_times = {}

    for job_id in os.listdir(test_path):
        json_path = os.path.join(test_path, job_id, 'test', 'testresults', 'data')

        # Some tests to do not have a data directory
        if not os.path.exists(json_path):
            continue

        json_file = glob(os.path.join(json_path, '*integration-*.json'))[0]
        if not os.path.isfile(json_file):
            if args.verbose:
                print("The test json file '%s' does not exist or is not a file, skipping test job run" % json_file)
            continue

        with open(json_file, mode='rb') as fd:
            test_info = json.loads(fd.read().decode('utf-8'))

        targets = test_info.get('targets', {})

        for target_name, target_info in targets.items():
            target_runtime = int(target_info.get('run_time_seconds', 0))

            # If that target already is found and has a higher runtime than the current one, ignore this entry.
            if target_times.get(target_name, 0) > target_runtime:
                continue

            target_times[target_name] = target_runtime

    return dict(sorted(target_times.items(), key=lambda i: i[1], reverse=True))


def print_test_runtime(target_times):
    """Prints a nice summary of a dict containing test target names and their runtime."""
    target_name_max_len = 0
    for target_name in target_times.keys():
        target_name_max_len = max(target_name_max_len, len(target_name))

    print("%s | Seconds |" % ("Target Name".ljust(target_name_max_len),))
    print("%s | ------- |" % ("-" * target_name_max_len,))
    for target_name, target_time in target_times.items():
        print("%s | %s |" % (target_name.ljust(target_name_max_len), str(target_time).ljust(7)))


def rebalance(args):
    """Prints a nice summary of a proposed rebalanced configuration based on the downloaded Shippable result."""
    test_path = os.path.expanduser(os.path.expandvars(args.test_results_path))
    target_times = get_raw_test_targets(args, test_path)

    group_info = dict([(i, {'targets': [], 'total_time': 0}) for i in range(1, int(args.group_count) + 1)])

    # Now add each test to the group with the lowest running time.
    for target_name, target_time in target_times.items():
        index, total_time = min(enumerate([g['total_time'] for g in group_info.values()]), key=operator.itemgetter(1))
        group_info[index + 1]['targets'].append(target_name)
        group_info[index + 1]['total_time'] = total_time + target_time

    # Print a summary of the proposed test split.
    for group_number, test_info in group_info.items():
        print("Group %d - Total Runtime (s): %d" % (group_number, test_info['total_time']))
        print_test_runtime(dict([(n, target_times[n]) for n in test_info['targets']]))
        print()

    if args.target_path:
        target_path = os.path.expanduser(os.path.expandvars(args.target_path))

        for test_root in ['test', 'tests']:  # ansible/ansible uses 'test' but collections use 'tests'.
            integration_root = os.path.join(target_path, test_root, 'integration', 'targets')
            if os.path.isdir(integration_root):
                if args.verbose:
                    print("Found test integration target dir at '%s'" % integration_root)
                break

        else:
            # Failed to find test integration target folder
            raise ValueError("Failed to find the test target folder on test/integration/targets or "
                             "tests/integration/targets under '%s'." % target_path)

        for group_number, test_info in group_info.items():
            for test_target in test_info['targets']:
                test_target_aliases = os.path.join(integration_root, test_target, 'aliases')
                if not os.path.isfile(test_target_aliases):
                    if args.verbose:
                        print("Cannot find test target alias file at '%s', skipping." % test_target_aliases)
                    continue

                with open(test_target_aliases, mode='r') as fd:
                    test_aliases = fd.readlines()

                changed = False
                for idx, line in enumerate(test_aliases):
                    group_match = re.match(r'shippable/(.*)/group(\d+)', line)
                    if group_match:
                        if int(group_match.group(2)) != group_number:
                            new_group = 'shippable/%s/group%d\n' % (group_match.group(1), group_number)
                            if args.verbose:
                                print("Changing %s group from '%s' to '%s'" % (test_target, group_match.group(0),
                                                                               new_group.rstrip()))
                            test_aliases[idx] = new_group
                            changed = True
                            break
                else:
                    if args.verbose:
                        print("Test target %s matches proposed group number, no changed required" % test_target)

                if changed:
                    with open(test_target_aliases, mode='w') as fd:
                        fd.writelines(test_aliases)


if __name__ == '__main__':
    main()
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00			`#!/usr/bin/env python`
			`# PYTHON_ARGCOMPLETE_OK`

			`# (c) 2020 Ansible Project`
			`# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)`

			`"""`
			`CLI tool that analyses a Shippable run's test result and re-balances the test targets into new groups.`

Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`Before running this script you must run download.py like:`

			`./download.py https://app.shippable.com/github/<team>/<repo>/runs/<run_num> --test-results --job-number x --job-number y`

			`Or to get all job results from a run:`

			`./download.py https://app.shippable.com/github/<team>/<repo>/runs/<run_num> --test-results --all`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00

			`Set the dir <team>/<repo>/<run_num> as the value of '-p/--test-path' for this script.`
			`"""`

			`from __future__ import (absolute_import, division, print_function)`
			`__metaclass__ = type`

			`import argparse`
			`import json`
			`import operator`
			`import os`
			`import re`

Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`from glob import glob`

hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00			`try:`
			`import argcomplete`
			`except ImportError:`
			`argcomplete = None`


			`def main():`
			`"""Main program body."""`
			`args = parse_args()`
			`rebalance(args)`


			`def parse_args():`
			`"""Parse and return args."""`
			`parser = argparse.ArgumentParser(description='Re-balance Shippable group(s) from a downloaded results directory.')`

			`parser.add_argument('group_count',`
			`metavar='group_count',`
			`help='The number of groups to re-balance the tests to.')`

			`parser.add_argument('-v', '--verbose',`
			`dest='verbose',`
			`action='store_true',`
			`help='Display more detailed info about files being read and edited.')`

			`parser.add_argument('-p', '--test-results-path',`
			`dest='test_results_path',`
			`required=True,`
			`help='The directory where the downloaded Shippable job test results are.')`

			`parser.add_argument('-t', '--target-path',`
			`dest='target_path',`
			`required=False,`
			`help='The directory where the test targets are located. If set the aliases will automatically '`
			`'by rewritten with the new proposed group.')`

			`if argcomplete:`
			`argcomplete.autocomplete(parser)`

			`args = parser.parse_args()`

			`return args`


			`def get_raw_test_targets(args, test_path):`
			`"""Scans the test directory for all the test targets that was run and get's the max runtime for each target."""`
			`target_times = {}`

			`for job_id in os.listdir(test_path):`
Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`json_path = os.path.join(test_path, job_id, 'test', 'testresults', 'data')`

			`# Some tests to do not have a data directory`
			`if not os.path.exists(json_path):`
			`continue`

			`json_file = glob(os.path.join(json_path, 'integration-.json'))[0]`
			`if not os.path.isfile(json_file):`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00			`if args.verbose:`
Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`print("The test json file '%s' does not exist or is not a file, skipping test job run" % json_file)`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00			`continue`

Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`with open(json_file, mode='rb') as fd:`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00			`test_info = json.loads(fd.read().decode('utf-8'))`

Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`targets = test_info.get('targets', {})`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00
Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`for target_name, target_info in targets.items():`
			`target_runtime = int(target_info.get('run_time_seconds', 0))`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00
Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`# If that target already is found and has a higher runtime than the current one, ignore this entry.`
			`if target_times.get(target_name, 0) > target_runtime:`
			`continue`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00
Get test data file directly in rebalance script (#70107) Rather than looking through tests.json to find the data file, look for it explicitly within a given target to avoid problems processing data in tests.json. 2020-06-19 15:57:50 +02:00			`target_times[target_name] = target_runtime`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00
Avoid unnecessary comprehensions. 2020-06-18 19:53:00 +02:00			`return dict(sorted(target_times.items(), key=lambda i: i[1], reverse=True))`
hacking - Add script to easily rebalance Shippable groups (#67976) * hacking - Add script to easily rebalance Shippable groups * Fix py26 compile issue * Add option to automatically change test target aliases 2020-03-23 23:14:09 +01:00

			`def print_test_runtime(target_times):`
			`"""Prints a nice summary of a dict containing test target names and their runtime."""`
			`target_name_max_len = 0`
			`for target_name in target_times.keys():`
			`target_name_max_len = max(target_name_max_len, len(target_name))`

			`print("%s \| Seconds \|" % ("Target Name".ljust(target_name_max_len),))`
			`print("%s \| ------- \|" % ("-" * target_name_max_len,))`
			`for target_name, target_time in target_times.items():`
			`print("%s \| %s \|" % (target_name.ljust(target_name_max_len), str(target_time).ljust(7)))`


			`def rebalance(args):`
			`"""Prints a nice summary of a proposed rebalanced configuration based on the downloaded Shippable result."""`
			`test_path = os.path.expanduser(os.path.expandvars(args.test_results_path))`
			`target_times = get_raw_test_targets(args, test_path)`

			`group_info = dict([(i, {'targets': [], 'total_time': 0}) for i in range(1, int(args.group_count) + 1)])`

			`# Now add each test to the group with the lowest running time.`
			`for target_name, target_time in target_times.items():`
			`index, total_time = min(enumerate([g['total_time'] for g in group_info.values()]), key=operator.itemgetter(1))`
			`group_info[index + 1]['targets'].append(target_name)`
			`group_info[index + 1]['total_time'] = total_time + target_time`

			`# Print a summary of the proposed test split.`
			`for group_number, test_info in group_info.items():`
			`print("Group %d - Total Runtime (s): %d" % (group_number, test_info['total_time']))`
			`print_test_runtime(dict([(n, target_times[n]) for n in test_info['targets']]))`
			`print()`

			`if args.target_path:`
			`target_path = os.path.expanduser(os.path.expandvars(args.target_path))`

			`for test_root in ['test', 'tests']: # ansible/ansible uses 'test' but collections use 'tests'.`
			`integration_root = os.path.join(target_path, test_root, 'integration', 'targets')`
			`if os.path.isdir(integration_root):`
			`if args.verbose:`
			`print("Found test integration target dir at '%s'" % integration_root)`
			`break`

			`else:`
			`# Failed to find test integration target folder`
			`raise ValueError("Failed to find the test target folder on test/integration/targets or "`
			`"tests/integration/targets under '%s'." % target_path)`

			`for group_number, test_info in group_info.items():`
			`for test_target in test_info['targets']:`
			`test_target_aliases = os.path.join(integration_root, test_target, 'aliases')`
			`if not os.path.isfile(test_target_aliases):`
			`if args.verbose:`
			`print("Cannot find test target alias file at '%s', skipping." % test_target_aliases)`
			`continue`

			`with open(test_target_aliases, mode='r') as fd:`
			`test_aliases = fd.readlines()`

			`changed = False`
			`for idx, line in enumerate(test_aliases):`
			`group_match = re.match(r'shippable/(.*)/group(\d+)', line)`
			`if group_match:`
			`if int(group_match.group(2)) != group_number:`
			`new_group = 'shippable/%s/group%d\n' % (group_match.group(1), group_number)`
			`if args.verbose:`
			`print("Changing %s group from '%s' to '%s'" % (test_target, group_match.group(0),`
			`new_group.rstrip()))`
			`test_aliases[idx] = new_group`
			`changed = True`
			`break`
			`else:`
			`if args.verbose:`
			`print("Test target %s matches proposed group number, no changed required" % test_target)`

			`if changed:`
			`with open(test_target_aliases, mode='w') as fd:`
			`fd.writelines(test_aliases)`


			`if __name__ == '__main__':`
			`main()`