Add timeout support to ansible-test. (#53302)
* Add timeout support to ansible-test. * Fix ansible-test tar filename filter bug. * Update timeouts used on Shippable. * Kill subprocesses when parent process terminates. * Require explicit use of env --show option.
This commit is contained in:
parent
44b347aef5
commit
a8d829d9c3
9 changed files with 218 additions and 20 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -105,3 +105,4 @@ test/units/.coverage.*
|
||||||
/test/integration/cloud-config-azure.yml
|
/test/integration/cloud-config-azure.yml
|
||||||
/SYMLINK_CACHE.json
|
/SYMLINK_CACHE.json
|
||||||
changelogs/.plugin-cache.yaml
|
changelogs/.plugin-cache.yaml
|
||||||
|
.ansible-test-timeout.json
|
||||||
|
|
|
@ -52,6 +52,7 @@ from lib.config import (
|
||||||
from lib.env import (
|
from lib.env import (
|
||||||
EnvConfig,
|
EnvConfig,
|
||||||
command_env,
|
command_env,
|
||||||
|
configure_timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
from lib.sanity import (
|
from lib.sanity import (
|
||||||
|
@ -95,6 +96,7 @@ def main():
|
||||||
display.color = config.color
|
display.color = config.color
|
||||||
display.info_stderr = (isinstance(config, SanityConfig) and config.lint) or (isinstance(config, IntegrationConfig) and config.list_targets)
|
display.info_stderr = (isinstance(config, SanityConfig) and config.lint) or (isinstance(config, IntegrationConfig) and config.list_targets)
|
||||||
check_startup()
|
check_startup()
|
||||||
|
configure_timeout(config)
|
||||||
|
|
||||||
display.info('RLIMIT_NOFILE: %s' % (CURRENT_RLIMIT_NOFILE,), verbosity=2)
|
display.info('RLIMIT_NOFILE: %s' % (CURRENT_RLIMIT_NOFILE,), verbosity=2)
|
||||||
display.info('MAXFD: %d' % MAXFD, verbosity=2)
|
display.info('MAXFD: %d' % MAXFD, verbosity=2)
|
||||||
|
@ -509,6 +511,11 @@ def parse_args():
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='dump environment to disk')
|
help='dump environment to disk')
|
||||||
|
|
||||||
|
env.add_argument('--timeout',
|
||||||
|
type=int,
|
||||||
|
metavar='MINUTES',
|
||||||
|
help='timeout for future ansible-test commands (0 clears)')
|
||||||
|
|
||||||
if argcomplete:
|
if argcomplete:
|
||||||
argcomplete.autocomplete(parser, always_complete_options=False, validator=lambda i, k: True)
|
argcomplete.autocomplete(parser, always_complete_options=False, validator=lambda i, k: True)
|
||||||
|
|
||||||
|
|
|
@ -3,3 +3,6 @@
|
||||||
# Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True.
|
# Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True.
|
||||||
# This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time.
|
# This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time.
|
||||||
SOFT_RLIMIT_NOFILE = 1024
|
SOFT_RLIMIT_NOFILE = 1024
|
||||||
|
|
||||||
|
# File used to track the ansible-test test execution timeout.
|
||||||
|
TIMEOUT_PATH = '.ansible-test-timeout.json'
|
||||||
|
|
|
@ -4,13 +4,17 @@ from __future__ import absolute_import, print_function
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
|
import functools
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
from lib.config import (
|
from lib.config import (
|
||||||
CommonConfig,
|
CommonConfig,
|
||||||
|
TestConfig,
|
||||||
)
|
)
|
||||||
|
|
||||||
from lib.util import (
|
from lib.util import (
|
||||||
|
@ -34,6 +38,18 @@ from lib.docker_util import (
|
||||||
docker_version
|
docker_version
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from lib.thread import (
|
||||||
|
WrappedThread,
|
||||||
|
)
|
||||||
|
|
||||||
|
from lib.constants import (
|
||||||
|
TIMEOUT_PATH,
|
||||||
|
)
|
||||||
|
|
||||||
|
from lib.test import (
|
||||||
|
TestTimeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EnvConfig(CommonConfig):
|
class EnvConfig(CommonConfig):
|
||||||
"""Configuration for the tools command."""
|
"""Configuration for the tools command."""
|
||||||
|
@ -43,14 +59,26 @@ class EnvConfig(CommonConfig):
|
||||||
"""
|
"""
|
||||||
super(EnvConfig, self).__init__(args, 'env')
|
super(EnvConfig, self).__init__(args, 'env')
|
||||||
|
|
||||||
self.show = args.show or not args.dump
|
self.show = args.show
|
||||||
self.dump = args.dump
|
self.dump = args.dump
|
||||||
|
self.timeout = args.timeout
|
||||||
|
|
||||||
|
|
||||||
def command_env(args):
|
def command_env(args):
|
||||||
"""
|
"""
|
||||||
:type args: EnvConfig
|
:type args: EnvConfig
|
||||||
"""
|
"""
|
||||||
|
show_dump_env(args)
|
||||||
|
set_timeout(args)
|
||||||
|
|
||||||
|
|
||||||
|
def show_dump_env(args):
|
||||||
|
"""
|
||||||
|
:type args: EnvConfig
|
||||||
|
"""
|
||||||
|
if not args.show and not args.dump:
|
||||||
|
return
|
||||||
|
|
||||||
data = dict(
|
data = dict(
|
||||||
ansible=dict(
|
ansible=dict(
|
||||||
version=get_ansible_version(args),
|
version=get_ansible_version(args),
|
||||||
|
@ -84,6 +112,105 @@ def command_env(args):
|
||||||
results_fd.write(json.dumps(data, sort_keys=True))
|
results_fd.write(json.dumps(data, sort_keys=True))
|
||||||
|
|
||||||
|
|
||||||
|
def set_timeout(args):
|
||||||
|
"""
|
||||||
|
:type args: EnvConfig
|
||||||
|
"""
|
||||||
|
if args.timeout is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.timeout:
|
||||||
|
deadline = (datetime.datetime.utcnow() + datetime.timedelta(minutes=args.timeout)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
|
||||||
|
display.info('Setting a %d minute test timeout which will end at: %s' % (args.timeout, deadline), verbosity=1)
|
||||||
|
else:
|
||||||
|
deadline = None
|
||||||
|
|
||||||
|
display.info('Clearing existing test timeout.', verbosity=1)
|
||||||
|
|
||||||
|
if args.explain:
|
||||||
|
return
|
||||||
|
|
||||||
|
if deadline:
|
||||||
|
data = dict(
|
||||||
|
duration=args.timeout,
|
||||||
|
deadline=deadline,
|
||||||
|
)
|
||||||
|
|
||||||
|
with open(TIMEOUT_PATH, 'w') as timeout_fd:
|
||||||
|
json.dump(data, timeout_fd, indent=4, sort_keys=True)
|
||||||
|
elif os.path.exists(TIMEOUT_PATH):
|
||||||
|
os.remove(TIMEOUT_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def get_timeout():
|
||||||
|
"""
|
||||||
|
:rtype: dict[str, any] | None
|
||||||
|
"""
|
||||||
|
if not os.path.exists(TIMEOUT_PATH):
|
||||||
|
return None
|
||||||
|
|
||||||
|
with open(TIMEOUT_PATH, 'r') as timeout_fd:
|
||||||
|
data = json.load(timeout_fd)
|
||||||
|
|
||||||
|
data['deadline'] = datetime.datetime.strptime(data['deadline'], '%Y-%m-%dT%H:%M:%SZ')
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def configure_timeout(args):
|
||||||
|
"""
|
||||||
|
:type args: CommonConfig
|
||||||
|
"""
|
||||||
|
if isinstance(args, TestConfig):
|
||||||
|
configure_test_timeout(args) # only tests are subject to the timeout
|
||||||
|
|
||||||
|
|
||||||
|
def configure_test_timeout(args):
|
||||||
|
"""
|
||||||
|
:type args: TestConfig
|
||||||
|
"""
|
||||||
|
timeout = get_timeout()
|
||||||
|
|
||||||
|
if not timeout:
|
||||||
|
return
|
||||||
|
|
||||||
|
timeout_start = datetime.datetime.utcnow()
|
||||||
|
timeout_duration = timeout['duration']
|
||||||
|
timeout_deadline = timeout['deadline']
|
||||||
|
timeout_remaining = timeout_deadline - timeout_start
|
||||||
|
|
||||||
|
test_timeout = TestTimeout(timeout_duration)
|
||||||
|
|
||||||
|
if timeout_remaining <= datetime.timedelta():
|
||||||
|
test_timeout.write(args)
|
||||||
|
|
||||||
|
raise ApplicationError('The %d minute test timeout expired %s ago at %s.' % (
|
||||||
|
timeout_duration, timeout_remaining * -1, timeout_deadline))
|
||||||
|
|
||||||
|
display.info('The %d minute test timeout expires in %s at %s.' % (
|
||||||
|
timeout_duration, timeout_remaining, timeout_deadline), verbosity=1)
|
||||||
|
|
||||||
|
def timeout_handler(_dummy1, _dummy2):
|
||||||
|
"""Runs when SIGUSR1 is received."""
|
||||||
|
test_timeout.write(args)
|
||||||
|
|
||||||
|
raise ApplicationError('Tests aborted after exceeding the %d minute time limit.' % timeout_duration)
|
||||||
|
|
||||||
|
def timeout_waiter(timeout_seconds):
|
||||||
|
"""
|
||||||
|
:type timeout_seconds: int
|
||||||
|
"""
|
||||||
|
time.sleep(timeout_seconds)
|
||||||
|
os.kill(os.getpid(), signal.SIGUSR1)
|
||||||
|
|
||||||
|
signal.signal(signal.SIGUSR1, timeout_handler)
|
||||||
|
|
||||||
|
instance = WrappedThread(functools.partial(timeout_waiter, timeout_remaining.seconds))
|
||||||
|
instance.daemon = True
|
||||||
|
instance.start()
|
||||||
|
|
||||||
|
|
||||||
def show_dict(data, verbose, root_verbosity=0, path=None):
|
def show_dict(data, verbose, root_verbosity=0, path=None):
|
||||||
"""
|
"""
|
||||||
:type data: dict[str, any]
|
:type data: dict[str, any]
|
||||||
|
|
|
@ -11,6 +11,10 @@ from lib.util import (
|
||||||
ABC,
|
ABC,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from lib.constants import (
|
||||||
|
TIMEOUT_PATH,
|
||||||
|
)
|
||||||
|
|
||||||
# improve performance by disabling uid/gid lookups
|
# improve performance by disabling uid/gid lookups
|
||||||
tarfile.pwd = None
|
tarfile.pwd = None
|
||||||
tarfile.grp = None
|
tarfile.grp = None
|
||||||
|
@ -45,6 +49,7 @@ class DefaultTarFilter(TarFilter):
|
||||||
self.ignore_files = (
|
self.ignore_files = (
|
||||||
'.gitignore',
|
'.gitignore',
|
||||||
'.gitdir',
|
'.gitdir',
|
||||||
|
TIMEOUT_PATH,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.ignore_extensions = (
|
self.ignore_extensions = (
|
||||||
|
@ -58,7 +63,7 @@ class DefaultTarFilter(TarFilter):
|
||||||
:rtype: tarfile.TarInfo | None
|
:rtype: tarfile.TarInfo | None
|
||||||
"""
|
"""
|
||||||
filename = os.path.basename(item.path)
|
filename = os.path.basename(item.path)
|
||||||
name, ext = os.path.splitext(filename)
|
ext = os.path.splitext(filename)[1]
|
||||||
dirs = os.path.split(item.path)
|
dirs = os.path.split(item.path)
|
||||||
|
|
||||||
if not item.isdir():
|
if not item.isdir():
|
||||||
|
@ -68,7 +73,7 @@ class DefaultTarFilter(TarFilter):
|
||||||
if item.path.startswith('./docs/docsite/_build/'):
|
if item.path.startswith('./docs/docsite/_build/'):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if name in self.ignore_files:
|
if filename in self.ignore_files:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if ext in self.ignore_extensions:
|
if ext in self.ignore_extensions:
|
||||||
|
|
|
@ -57,15 +57,17 @@ def calculate_confidence(path, line, metadata):
|
||||||
|
|
||||||
class TestResult(object):
|
class TestResult(object):
|
||||||
"""Base class for test results."""
|
"""Base class for test results."""
|
||||||
def __init__(self, command, test, python_version=None):
|
def __init__(self, command, test, python_version=None, enable_junit=False):
|
||||||
"""
|
"""
|
||||||
:type command: str
|
:type command: str
|
||||||
:type test: str
|
:type test: str
|
||||||
:type python_version: str
|
:type python_version: str
|
||||||
|
:type enable_junit: bool
|
||||||
"""
|
"""
|
||||||
self.command = command
|
self.command = command
|
||||||
self.test = test
|
self.test = test
|
||||||
self.python_version = python_version
|
self.python_version = python_version
|
||||||
|
self.enable_junit = enable_junit
|
||||||
self.name = self.test or self.command
|
self.name = self.test or self.command
|
||||||
|
|
||||||
if self.python_version:
|
if self.python_version:
|
||||||
|
@ -88,7 +90,7 @@ class TestResult(object):
|
||||||
if args.lint:
|
if args.lint:
|
||||||
self.write_lint()
|
self.write_lint()
|
||||||
|
|
||||||
if args.junit:
|
if args.junit or self.enable_junit:
|
||||||
if self.junit:
|
if self.junit:
|
||||||
self.write_junit(args)
|
self.write_junit(args)
|
||||||
else:
|
else:
|
||||||
|
@ -159,6 +161,38 @@ class TestResult(object):
|
||||||
xml.write(report.encode('utf-8', 'strict'))
|
xml.write(report.encode('utf-8', 'strict'))
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimeout(TestResult):
|
||||||
|
"""Test timeout."""
|
||||||
|
def __init__(self, timeout_duration):
|
||||||
|
"""
|
||||||
|
:type timeout_duration: int
|
||||||
|
"""
|
||||||
|
super(TestTimeout, self).__init__(command='timeout', test='', enable_junit=True)
|
||||||
|
|
||||||
|
self.timeout_duration = timeout_duration
|
||||||
|
|
||||||
|
def write_junit(self, args):
|
||||||
|
"""
|
||||||
|
:type args: TestConfig
|
||||||
|
"""
|
||||||
|
message = 'Tests were aborted after exceeding the %d minute time limit.' % self.timeout_duration
|
||||||
|
output = '''One or more of the following situations may be responsible:
|
||||||
|
|
||||||
|
- Code changes have resulted in tests that hang or run for an excessive amount of time.
|
||||||
|
- Tests have been added which exceed the time limit when combined with existing tests.
|
||||||
|
- Test infrastructure and/or external dependencies are operating slower than normal.'''
|
||||||
|
|
||||||
|
if args.coverage:
|
||||||
|
output += '\n- Additional overhead from collecting code coverage has resulted in tests exceeding the time limit.'
|
||||||
|
|
||||||
|
output += '\n\nConsult the console log for additional details on where the timeout occurred.'
|
||||||
|
|
||||||
|
test_case = self.junit.TestCase(classname=self.command, name=self.name)
|
||||||
|
test_case.add_error_info(message, output)
|
||||||
|
|
||||||
|
self.save_junit(args, test_case)
|
||||||
|
|
||||||
|
|
||||||
class TestSuccess(TestResult):
|
class TestSuccess(TestResult):
|
||||||
"""Test success."""
|
"""Test success."""
|
||||||
def write_junit(self, args):
|
def write_junit(self, args):
|
||||||
|
|
|
@ -380,7 +380,9 @@ def raw_command(cmd, capture=False, env=None, data=None, cwd=None, explain=False
|
||||||
stderr = None
|
stderr = None
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
process = None
|
||||||
|
|
||||||
|
try:
|
||||||
try:
|
try:
|
||||||
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
|
process = subprocess.Popen(cmd, env=env, stdin=stdin, stdout=stdout, stderr=stderr, cwd=cwd)
|
||||||
except OSError as ex:
|
except OSError as ex:
|
||||||
|
@ -397,6 +399,11 @@ def raw_command(cmd, capture=False, env=None, data=None, cwd=None, explain=False
|
||||||
else:
|
else:
|
||||||
process.wait()
|
process.wait()
|
||||||
stdout_text, stderr_text = None, None
|
stdout_text, stderr_text = None, None
|
||||||
|
finally:
|
||||||
|
if process and process.returncode is None:
|
||||||
|
process.kill()
|
||||||
|
display.info('') # the process we're interrupting may have completed a partial line of output
|
||||||
|
display.notice('Killed command to avoid an orphaned child process during handling of an unexpected exception.')
|
||||||
|
|
||||||
status = process.returncode
|
status = process.returncode
|
||||||
runtime = time.time() - start
|
runtime = time.time() - start
|
||||||
|
|
|
@ -116,6 +116,12 @@ function cleanup
|
||||||
|
|
||||||
trap cleanup EXIT
|
trap cleanup EXIT
|
||||||
|
|
||||||
ansible-test env --dump --show --color -v
|
if [[ "${COVERAGE:-}" ]]; then
|
||||||
|
timeout=60
|
||||||
|
else
|
||||||
|
timeout=45
|
||||||
|
fi
|
||||||
|
|
||||||
|
ansible-test env --dump --show --timeout "${timeout}" --color -v
|
||||||
|
|
||||||
"test/utils/shippable/${script}.sh" "${test}"
|
"test/utils/shippable/${script}.sh" "${test}"
|
||||||
|
|
|
@ -7,5 +7,13 @@ IFS='/:' read -ra args <<< "$1"
|
||||||
|
|
||||||
version="${args[1]}"
|
version="${args[1]}"
|
||||||
|
|
||||||
|
if [[ "${COVERAGE:-}" ]]; then
|
||||||
|
timeout=90
|
||||||
|
else
|
||||||
|
timeout=10
|
||||||
|
fi
|
||||||
|
|
||||||
|
ansible-test env --timeout "${timeout}" --color -v
|
||||||
|
|
||||||
# shellcheck disable=SC2086
|
# shellcheck disable=SC2086
|
||||||
ansible-test units --color -v --docker default --python "${version}" ${COVERAGE:+"$COVERAGE"} ${CHANGED:+"$CHANGED"} \
|
ansible-test units --color -v --docker default --python "${version}" ${COVERAGE:+"$COVERAGE"} ${CHANGED:+"$CHANGED"} \
|
||||||
|
|
Loading…
Reference in a new issue