diff --git a/lib/ansible/constants.py b/lib/ansible/constants.py index d24dc311a79..9c1c820421a 100644 --- a/lib/ansible/constants.py +++ b/lib/ansible/constants.py @@ -195,7 +195,9 @@ RETRY_FILES_SAVE_PATH = get_config(p, DEFAULTS, 'retry_files_save_path' ANSIBLE_SSH_ARGS = get_config(p, 'ssh_connection', 'ssh_args', 'ANSIBLE_SSH_ARGS', None) ANSIBLE_SSH_CONTROL_PATH = get_config(p, 'ssh_connection', 'control_path', 'ANSIBLE_SSH_CONTROL_PATH', "%(directory)s/ansible-ssh-%%h-%%p-%%r") ANSIBLE_SSH_PIPELINING = get_config(p, 'ssh_connection', 'pipelining', 'ANSIBLE_SSH_PIPELINING', False, boolean=True) +ANSIBLE_SSH_RETRIES = get_config(p, 'ssh_connection', 'retries', 'ANSIBLE_SSH_RETRIES', 0, integer=True) PARAMIKO_RECORD_HOST_KEYS = get_config(p, 'paramiko_connection', 'record_host_keys', 'ANSIBLE_PARAMIKO_RECORD_HOST_KEYS', True, boolean=True) + # obsolete -- will be formally removed ZEROMQ_PORT = get_config(p, 'fireball_connection', 'zeromq_port', 'ANSIBLE_ZEROMQ_PORT', 5099, integer=True) ACCELERATE_PORT = get_config(p, 'accelerate', 'accelerate_port', 'ACCELERATE_PORT', 5099, integer=True) diff --git a/v1/ansible/runner/connection_plugins/ssh.py b/v1/ansible/runner/connection_plugins/ssh.py index 036175f6a9c..ff7e8e03c87 100644 --- a/v1/ansible/runner/connection_plugins/ssh.py +++ b/v1/ansible/runner/connection_plugins/ssh.py @@ -16,21 +16,22 @@ # along with Ansible. If not, see . # -import os -import re -import subprocess -import shlex -import pipes -import random -import select import fcntl -import hmac -import pwd import gettext +import hmac +import os +import pipes import pty +import pwd +import random +import re +import select +import shlex +import subprocess +import time from hashlib import sha1 import ansible.constants as C -from ansible.callbacks import vvv +from ansible.callbacks import vvv, vv from ansible import errors from ansible import utils @@ -256,7 +257,51 @@ class Connection(object): vvv("EXEC previous known host file not found for %s" % host) return True - def exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None): + def exec_command(self, *args, **kwargs): + """ Wrapper around _exec_command to retry in the case of an ssh + failure + + Will retry if: + * an exception is caught + * ssh returns 255 + + Will not retry if + * remaining_tries is <2 + * retries limit reached + """ + remaining_tries = C.get_config( + C.p, 'ssh_connection', 'retries', + 'ANSIBLE_SSH_RETRIES', 3, integer=True) + 1 + cmd_summary = "%s %s..." % (args[0], str(kwargs)[:200]) + for attempt in xrange(remaining_tries): + pause = 2 ** attempt - 1 + if pause > 30: + pause = 30 + time.sleep(pause) + try: + return_tuple = self._exec_command(*args, **kwargs) + except Exception as e: + msg = ("ssh_retry: attempt: %d, caught exception(%s) from cmd " + "(%s).") % (attempt, e, cmd_summary) + vv(msg) + if attempt == remaining_tries - 1: + raise e + else: + continue + # 0 = success + # 1-254 = remote command return code + # 255 = failure from the ssh command itself + if return_tuple[0] != 255: + break + else: + msg = ('ssh_retry: attempt: %d, ssh return code is 255. cmd ' + '(%s).') % (attempt, cmd_summary) + vv(msg) + + return return_tuple + + + def _exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None): ''' run a command on the remote host ''' if sudoable and self.runner.become and self.runner.become_method not in self.become_methods_supported: