Make the timeout decorator raise an exception out of the function's scope (#49921)

* Revert "allow caller to deal with timeout (#49449)"

This reverts commit 63279823a7.

Flawed on many levels

* Adds poor API to a public function
* Papers over the fact that the public function is doing something bad
  by catching exceptions it cannot handle in the first place
* Papers over the real cause of the issue which is a bug in the timeout
  decorator
* Doesn't reraise properly
* Catches the wrong exception

Fixes #49824
Fixes #49817

* Make the timeout decorator properly raise an exception outside of the function's scope

signal handlers which raise exceptions will never work well because the
exception can be raised anywhere in the called code.  This leads to
exception race conditions where the exceptions could end up being
hanlded by unintended pieces of the called code.

The timeout decorator was using just that idiom.  It was especially bad
because the decorator syntactically occurs outside of the called code
but because of the signal handler, the exception was being raised inside
of the called code.

This change uses a thread instead of a signal to manage the timeout in
parallel to the execution of the decorated function.  Since raising of
the exception happens inside of the decorator, now, instead of inside of
a signal handler, the timeout exception is raised from outside of the
called code as expected which makes reasoning about where exceptions are
to be expected intuitive again.

Fixes #43884

* Add a common case test.

Adding an integration test driven from our unittests.  Most of the time
we'll timeout in run_command which is running things in a subprocess.
Create a test for that specific case in case anything funky comes up
between threading and execve.

* Don't use OSError-based TimeoutError as a base class

Unlike most standard exceptions, OSError has a specific parameter list
with specific meanings.  Instead follow the example of other stdlib
functions, concurrent.futures and multiprocessing and define a separate
TimeoutException.

* Add comment and docstring to point out that this is not hte Python3 TimeoutError
This commit is contained in:
Toshio Kuratomi 2018-12-18 18:01:46 -08:00 committed by GitHub
parent 2b48c0187c
commit bd072fe83a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 78 additions and 28 deletions

View file

@ -2605,7 +2605,7 @@ class AnsibleModule(object):
def run_command(self, args, check_rc=False, close_fds=True, executable=None, data=None, binary_data=False, path_prefix=None, cwd=None, def run_command(self, args, check_rc=False, close_fds=True, executable=None, data=None, binary_data=False, path_prefix=None, cwd=None,
use_unsafe_shell=False, prompt_regex=None, environ_update=None, umask=None, encoding='utf-8', errors='surrogate_or_strict', use_unsafe_shell=False, prompt_regex=None, environ_update=None, umask=None, encoding='utf-8', errors='surrogate_or_strict',
expand_user_and_vars=True, pass_fds=None, before_communicate_callback=None, raise_timeouts=False): expand_user_and_vars=True, pass_fds=None, before_communicate_callback=None):
''' '''
Execute a command, returns rc, stdout, and stderr. Execute a command, returns rc, stdout, and stderr.
@ -2655,9 +2655,6 @@ class AnsibleModule(object):
after ``Popen`` object will be created after ``Popen`` object will be created
but before communicating to the process. but before communicating to the process.
(``Popen`` object will be passed to callback as a first argument) (``Popen`` object will be passed to callback as a first argument)
:kw raise_timeouts: This is a boolean, which when True, will allow the
caller to deal with timeout exceptions. When false we use the previous
behaviour of having run_command directly call fail_json when they occur.
:returns: A 3-tuple of return code (integer), stdout (native string), :returns: A 3-tuple of return code (integer), stdout (native string),
and stderr (native string). On python2, stdout and stderr are both and stderr (native string). On python2, stdout and stderr are both
byte strings. On python3, stdout and stderr are text strings converted byte strings. On python3, stdout and stderr are text strings converted
@ -2831,12 +2828,6 @@ class AnsibleModule(object):
cmd.stderr.close() cmd.stderr.close()
rc = cmd.returncode rc = cmd.returncode
except TimeoutError as e:
self.log("Timeout Executing CMD:%s Timeout :%s" % (self._clean_args(args), to_native(e)))
if raise_timeouts:
raise e
else:
self.fail_json(rc=e.errno, msg=to_native(e), cmd=self._clean_args(args))
except (OSError, IOError) as e: except (OSError, IOError) as e:
self.log("Error Executing CMD:%s Exception:%s" % (self._clean_args(args), to_native(e))) self.log("Error Executing CMD:%s Exception:%s" % (self._clean_args(args), to_native(e)))
self.fail_json(rc=e.errno, msg=to_native(e), cmd=self._clean_args(args)) self.fail_json(rc=e.errno, msg=to_native(e), cmd=self._clean_args(args))

View file

@ -16,7 +16,8 @@
from __future__ import (absolute_import, division, print_function) from __future__ import (absolute_import, division, print_function)
__metaclass__ = type __metaclass__ = type
import signal import multiprocessing
import multiprocessing.pool as mp
# timeout function to make sure some fact gathering # timeout function to make sure some fact gathering
# steps do not exceed a time limit # steps do not exceed a time limit
@ -30,24 +31,25 @@ class TimeoutError(Exception):
def timeout(seconds=None, error_message="Timer expired"): def timeout(seconds=None, error_message="Timer expired"):
"""
Timeout decorator to expire after a set number of seconds. This raises an
ansible.module_utils.facts.TimeoutError if the timeout is hit before the
function completes.
"""
def decorator(func): def decorator(func):
def _handle_timeout(signum, frame):
msg = 'Timer expired after %s seconds' % globals().get('GATHER_TIMEOUT')
raise TimeoutError(msg)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
local_seconds = seconds timeout_value = seconds
if local_seconds is None: if timeout_value is None:
local_seconds = globals().get('GATHER_TIMEOUT') or DEFAULT_GATHER_TIMEOUT timeout_value = globals().get('GATHER_TIMEOUT') or DEFAULT_GATHER_TIMEOUT
signal.signal(signal.SIGALRM, _handle_timeout)
signal.alarm(local_seconds)
pool = mp.ThreadPool(processes=1)
res = pool.apply_async(func, args, kwargs)
pool.close()
try: try:
result = func(*args, **kwargs) return res.get(timeout_value)
finally: except multiprocessing.TimeoutError:
signal.alarm(0) # This is an ansible.module_utils.common.facts.timeout.TimeoutError
return result raise TimeoutError('Timer expired after %s seconds' % timeout_value)
return wrapper return wrapper

View file

@ -20,13 +20,11 @@
from __future__ import (absolute_import, division) from __future__ import (absolute_import, division)
__metaclass__ = type __metaclass__ = type
import sys
import time import time
import pytest import pytest
from units.compat import unittest
from units.compat.mock import patch, MagicMock
from ansible.module_utils.facts import timeout from ansible.module_utils.facts import timeout
@ -67,6 +65,10 @@ def sleep_amount_explicit_lower(amount):
return 'Succeeded after {0} sec'.format(amount) return 'Succeeded after {0} sec'.format(amount)
#
# Tests for how the timeout decorator is specified
#
def test_defaults_still_within_bounds(): def test_defaults_still_within_bounds():
# If the default changes outside of these bounds, some of the tests will # If the default changes outside of these bounds, some of the tests will
# no longer test the right thing. Need to review and update the timeouts # no longer test the right thing. Need to review and update the timeouts
@ -110,3 +112,58 @@ def test_explicit_timeout():
sleep_time = 3 sleep_time = 3
with pytest.raises(timeout.TimeoutError): with pytest.raises(timeout.TimeoutError):
assert sleep_amount_explicit_lower(sleep_time) == '(Not expected to succeed)' assert sleep_amount_explicit_lower(sleep_time) == '(Not expected to succeed)'
#
# Test that exception handling works
#
@timeout.timeout(1)
def function_times_out():
time.sleep(2)
# This is just about the same test as function_times_out but uses a separate process which is where
# we normally have our timeouts. It's more of an integration test than a unit test.
@timeout.timeout(1)
def function_times_out_in_run_command(am):
am.run_command([sys.executable, '-c', 'import time ; time.sleep(2)'])
@timeout.timeout(1)
def function_other_timeout():
raise TimeoutError('Vanilla Timeout')
@timeout.timeout(1)
def function_raises():
1 / 0
@timeout.timeout(1)
def function_catches_all_exceptions():
try:
time.sleep(10)
except BaseException:
raise RuntimeError('We should not have gotten here')
def test_timeout_raises_timeout():
with pytest.raises(timeout.TimeoutError):
assert function_times_out() == '(Not expected to succeed)'
@pytest.mark.parametrize('stdin', ({},), indirect=['stdin'])
def test_timeout_raises_timeout_integration_test(am):
with pytest.raises(timeout.TimeoutError):
assert function_times_out_in_run_command(am) == '(Not expected to succeed)'
def test_timeout_raises_other_exception():
with pytest.raises(ZeroDivisionError):
assert function_raises() == '(Not expected to succeed)'
def test_exception_not_caught_by_called_code():
with pytest.raises(timeout.TimeoutError):
assert function_catches_all_exceptions() == '(Not expected to succeed)'