From e7819609ceaa6e3abf3bb8c1904b46fdb2d934e6 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 27 Sep 2016 10:31:40 -0700 Subject: [PATCH] swallow errors during async polling (#17760) also use connection reset on exceptions if available (eg, prevent reuse of "stuck" WinRM connections due to reboot/NIC bounce/etc) --- lib/ansible/executor/task_executor.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/ansible/executor/task_executor.py b/lib/ansible/executor/task_executor.py index 485533f25ba..e7e781106f1 100644 --- a/lib/ansible/executor/task_executor.py +++ b/lib/ansible/executor/task_executor.py @@ -619,14 +619,24 @@ class TaskExecutor: while time_left > 0: time.sleep(self._task.poll) - async_result = normal_handler.run(task_vars=task_vars) - # We do not bail out of the loop in cases where the failure - # is associated with a parsing error. The async_runner can - # have issues which result in a half-written/unparseable result - # file on disk, which manifests to the user as a timeout happening - # before it's time to timeout. - if int(async_result.get('finished', 0)) == 1 or ('failed' in async_result and async_result.get('_ansible_parsed', False)) or 'skipped' in async_result: - break + try: + async_result = normal_handler.run(task_vars=task_vars) + # We do not bail out of the loop in cases where the failure + # is associated with a parsing error. The async_runner can + # have issues which result in a half-written/unparseable result + # file on disk, which manifests to the user as a timeout happening + # before it's time to timeout. + if int(async_result.get('finished', 0)) == 1 or ('failed' in async_result and async_result.get('_ansible_parsed', False)) or 'skipped' in async_result: + break + except Exception as e: + # Connections can raise exceptions during polling (eg, network bounce, reboot); these should be non-fatal. + # On an exception, call the connection's reset method if it has one (eg, drop/recreate WinRM connection; some reused connections are in a broken state) + display.vvvv("Exception during async poll, retrying... (%s)" % to_text(e)) + display.debug("Async poll exception was:\n%s" % to_text(traceback.format_exc())) + try: + normal_handler._connection._reset() + except AttributeError: + pass time_left -= self._task.poll