wait_for_connection: Wait for system to become reachable (#20011)

* WIP: wait_for_connection: Wait for system to be reachable

This action plugin allows to check when a system is back online and
usable by Ansible.

As an example, when doing a SysPrep and running Enable-WinRM.ps1, it
takes between 10 to 20 seconds between the WinRM TCP port to open, and
it actually being able to server Ansible requests. This time is variable
and depends on the boot process.

Current implementation is specific for Windows (WinRM) only, this will
be fixed shortly.

This fixes #19998

* Support other transport types

* Various improvements

- Fix reported typo
- Add transport_test support in accelerate plugin
- Ensure port is an integer

* Improve examples

* Small fixes

- Use correct ConfigureRemotingForAnsible.ps1 script name
- Only use win_ping when remote shell is known to be Powershell
- Add integration tests to CI framework
This commit is contained in:
Dag Wieers 2017-03-01 20:00:49 +01:00 committed by Matt Davis
parent 0bd2714122
commit 52959ebdc1
9 changed files with 270 additions and 1 deletions

View file

@ -0,0 +1,108 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# (c) 2017, Dag Wieers <dag@wieers.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
ANSIBLE_METADATA = {'status': ['stableinterface'],
'supported_by': 'core',
'version': '1.0'}
DOCUMENTATION = r'''
---
module: wait_for_connection
short_description: Waits until remote system is reachable/usable
description:
- Waits for a total of C(timeout) seconds.
- Retries the transport connection after a timeout of C(connect_timeout).
- Tests the transport connection every C(sleep) seconds.
- This module makes use of internal ansible transport (and configuration) and the ping/win_ping module to guarantee correct end-to-end functioning.
version_added: "2.3"
options:
connect_timeout:
description:
- Maximum number of seconds to wait for a connection to happen before closing and retrying.
default: 5
delay:
description:
- Number of seconds to wait before starting to poll.
default: 0
sleep:
default: 1
description:
- Number of seconds to sleep between checks.
timeout:
description:
- Maximum number of seconds to wait for.
default: 300
author: "Dag Wieers (@dagwieers)"
'''
EXAMPLES = r'''
- name: Wait 300 seconds for target connection to become reachable/usable
wait_for_connection:
- name: Wait 600 seconds, but only start checking after 60 seconds
wait_for_connection:
delay: 60
timeout: 600
# Wake desktops, wait for them to become ready and continue playbook
- hosts: all
gather_facts: no
tasks:
- name: Send magic Wake-On-Lan packet to turn on individual systems
wakeonlan:
mac: '{{ mac }}'
broadcast: 192.168.0.255
delegate_to: localhost
- name: Wait for system to become reachable
wait_for_connection:
- name: Gather facts for first time
setup:
# Build a new VM, wait for it to become ready and continue playbook
- hosts: all
gather_facts: no
tasks:
- name: Clone new VM, if missing
vmware_guest:
hostname: '{{ vcenter_ipaddress }}'
name: '{{ inventory_hostname_short }}'
template: Windows 2012R2
customization:
hostname: '{{ vm_shortname }}'
runonce:
- powershell.exe -ExecutionPolicy Unrestricted -File C:\Windows\Temp\ConfigureRemotingForAnsible.ps1 -CertValidityDays 3650 -ForceNewSSLCert
delegate_to: localhost
- name: Wait for system to become reachable over WinRM
wait_for_connection:
- name: Gather facts for first time
setup:
'''
RETURN = r'''
elapsed:
description: The number of seconds that elapsed waiting for the connection to appear.
returned: always
type: integer
sample: 23
'''

View file

@ -0,0 +1,119 @@
# (c) 2017, Dag Wieers <dag@wieers.com>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
# CI-required python3 boilerplate
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
import time
from datetime import datetime, timedelta
from ansible.module_utils.pycompat24 import get_exception
from ansible.plugins.action import ActionBase
try:
from __main__ import display
except ImportError:
from ansible.utils.display import Display
display = Display()
class TimedOutException(Exception):
pass
class ActionModule(ActionBase):
TRANSFERS_FILES = False
DEFAULT_CONNECT_TIMEOUT = 5
DEFAULT_DELAY = 0
DEFAULT_SLEEP = 1
DEFAULT_TIMEOUT = 600
def do_until_success_or_timeout(self, what, timeout, connect_timeout, what_desc, sleep=1):
max_end_time = datetime.utcnow() + timedelta(seconds=timeout)
while datetime.utcnow() < max_end_time:
try:
what(connect_timeout)
if what_desc:
display.debug("wait_for_connection: %s success" % what_desc)
return
except Exception:
e = get_exception()
if what_desc:
display.debug("wait_for_connection: %s fail (expected), retrying in %d seconds..." % (what_desc, sleep))
time.sleep(sleep)
raise TimedOutException("timed out waiting for %s: %s" % (what_desc, str(e)))
def run(self, tmp=None, task_vars=None):
if task_vars is None:
task_vars = dict()
connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT))
delay = int(self._task.args.get('delay', self.DEFAULT_DELAY))
sleep = int(self._task.args.get('sleep', self.DEFAULT_SLEEP))
timeout = int(self._task.args.get('timeout', self.DEFAULT_TIMEOUT))
if self._play_context.check_mode:
display.vvv("wait_for_connection: skipping for check_mode")
return dict(skipped=True)
result = super(ActionModule, self).run(tmp, task_vars)
def ping_module_test(connect_timeout):
''' Test ping module, if available '''
display.vvv("wait_for_connection: attempting ping module test")
# call connection reset between runs if it's there
try:
self._connection._reset()
except AttributeError:
pass
# Use win_ping on winrm/powershell, else use ping
if hasattr(self._connection, '_shell_type') and self._connection._shell_type == 'powershell':
ping_result = self._execute_module(module_name='win_ping', module_args=dict(), tmp=tmp, task_vars=task_vars)
else:
ping_result = self._execute_module(module_name='ping', module_args=dict(), tmp=tmp, task_vars=task_vars)
# Test module output
if ping_result['ping'] != 'pong':
raise Exception('ping test failed')
start = datetime.now()
if delay:
time.sleep(delay)
try:
# If the connection has a transport_test method, use it first
if hasattr(self._connection, 'transport_test'):
self.do_until_success_or_timeout(self._connection.transport_test, timeout, connect_timeout, what_desc="connection port up", sleep=sleep)
# Use the ping module test to determine end-to-end connectivity
self.do_until_success_or_timeout(ping_module_test, timeout, connect_timeout, what_desc="ping module test success", sleep=sleep)
except TimedOutException:
e = get_exception()
result['failed'] = True
result['msg'] = str(e)
elapsed = datetime.now() - start
result['elapsed'] = elapsed.seconds
return result

View file

@ -96,6 +96,14 @@ class Connection(ConnectionBase):
self._connected = True
return self
def transport_test(self, connect_timeout):
''' Test the transport mechanism, if available '''
host = self._play_context.remote_addr
port = int(self._play_context.accelerate_port or 5099)
display.vvv("attempting transport test to %s:%s" % (host, port))
sock = socket.create_connection((host, port), connect_timeout)
sock.close()
def send_data(self, data):
packed_len = struct.pack('!Q',len(data))
return self.conn.sendall(packed_len + data)

View file

@ -133,6 +133,14 @@ class Connection(ConnectionBase):
transport = 'paramiko'
def transport_test(self, connect_timeout):
''' Test the transport mechanism, if available '''
host = self._play_context.remote_addr
port = int(self._play_context.port or 22)
display.vvv("attempting transport test to %s:%s" % (host, port))
sock = socket.create_connection((host, port), connect_timeout)
sock.close()
def _cache_key(self):
return "%s__%s__" % (self._play_context.remote_addr, self._play_context.remote_user)

View file

@ -25,6 +25,7 @@ import fcntl
import hashlib
import os
import pty
import socket
import subprocess
import time
@ -61,7 +62,7 @@ class Connection(ConnectionBase):
super(Connection, self).__init__(*args, **kwargs)
self.host = self._play_context.remote_addr
self.port = self._play_context.port
self.port = int(self._play_context.port or 22)
self.user = self._play_context.remote_user
self.control_path = C.ANSIBLE_SSH_CONTROL_PATH
self.control_path_dir = C.ANSIBLE_SSH_CONTROL_PATH_DIR
@ -73,6 +74,12 @@ class Connection(ConnectionBase):
def _connect(self):
return self
def transport_test(self, connect_timeout):
''' Test the transport mechanism, if available '''
display.vvv("attempting transport test to %s:%s" % (self.host, self.port))
sock = socket.create_connection((self.host, self.port), connect_timeout)
sock.close()
@staticmethod
def _create_control_path(host, port, user):
'''Make a hash for the controlpath based on con attributes'''

View file

@ -23,6 +23,7 @@ import inspect
import os
import re
import shlex
import socket
import traceback
import json
import tempfile
@ -89,6 +90,14 @@ class Connection(ConnectionBase):
super(Connection, self).__init__(*args, **kwargs)
def transport_test(self, connect_timeout):
''' Test the transport mechanism, if available '''
host = self._play_context.remote_addr
port = int(self._play_context.port or 5986)
display.vvv("attempting transport test to %s:%s" % (host, port))
sock = socket.create_connection((host, port), connect_timeout)
sock.close()
def set_host_overrides(self, host, hostvars=None):
'''
Override WinRM-specific options from host variables.

View file

@ -38,3 +38,6 @@
local_action: file path={{ local_tmp }}-汉语 state=absent
- name: remove remote temp file
action: "{{ action_prefix }}file path={{ remote_tmp }}-汉语 state=absent"
### test wait_for_connection plugin
- wait_for_connection:

View file

@ -0,0 +1,2 @@
posix/ci/group1
windows/ci/group1

View file

@ -0,0 +1,5 @@
- name: Test normal connection to target node
wait_for_connection:
connect_timeout: 5
sleep: 1
timeout: 10