win_reboot: change to sample system uptime instead of checking port status (#31767)
* win_reboot: change to sample system uptime instead of checking port status * added connection timeout back in as now we can manually set it per connection. * some pep8 fixes * fix up error message on timeout in case an exception wasn't fired * Changed doc to English (US) and simplified uptime check * moved conn timeout over to new config connection options
This commit is contained in:
parent
0b128e2e29
commit
cf662ed74b
3 changed files with 89 additions and 40 deletions
|
@ -31,6 +31,7 @@ options:
|
|||
description:
|
||||
- Maximum seconds to wait for shutdown to occur
|
||||
- Increase this timeout for very slow hardware, large update applications, etc
|
||||
- This option has been removed since Ansible 2.5 as the win_reboot behavior has changed
|
||||
default: 600
|
||||
aliases: [ shutdown_timeout_sec ]
|
||||
reboot_timeout:
|
||||
|
@ -70,7 +71,6 @@ EXAMPLES = r'''
|
|||
|
||||
# Reboot a slow machine that might have lots of updates to apply
|
||||
- win_reboot:
|
||||
shutdown_timeout: 3600
|
||||
reboot_timeout: 3600
|
||||
'''
|
||||
|
||||
|
|
|
@ -4,12 +4,13 @@
|
|||
from __future__ import (absolute_import, division, print_function)
|
||||
__metaclass__ = type
|
||||
|
||||
import socket
|
||||
import time
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from ansible.errors import AnsibleError
|
||||
from ansible.plugins.action import ActionBase
|
||||
from ansible.module_utils._text import to_native
|
||||
|
||||
try:
|
||||
from __main__ import display
|
||||
|
@ -25,7 +26,6 @@ class TimedOutException(Exception):
|
|||
class ActionModule(ActionBase):
|
||||
TRANSFERS_FILES = False
|
||||
|
||||
DEFAULT_SHUTDOWN_TIMEOUT = 600
|
||||
DEFAULT_REBOOT_TIMEOUT = 600
|
||||
DEFAULT_CONNECT_TIMEOUT = 5
|
||||
DEFAULT_PRE_REBOOT_DELAY = 2
|
||||
|
@ -33,10 +33,20 @@ class ActionModule(ActionBase):
|
|||
DEFAULT_TEST_COMMAND = 'whoami'
|
||||
DEFAULT_REBOOT_MESSAGE = 'Reboot initiated by Ansible.'
|
||||
|
||||
def get_system_uptime(self):
|
||||
uptime_command = "(Get-WmiObject -ClassName Win32_OperatingSystem).LastBootUpTime"
|
||||
(rc, stdout, stderr) = self._connection.exec_command(uptime_command)
|
||||
|
||||
if rc != 0:
|
||||
raise Exception("win_reboot: failed to get host uptime info, rc: %d, stdout: %s, stderr: %s"
|
||||
% (rc, stdout, stderr))
|
||||
|
||||
return stdout
|
||||
|
||||
def do_until_success_or_timeout(self, what, timeout, what_desc, fail_sleep=1):
|
||||
max_end_time = datetime.utcnow() + timedelta(seconds=timeout)
|
||||
|
||||
e = None
|
||||
exc = ""
|
||||
while datetime.utcnow() < max_end_time:
|
||||
try:
|
||||
what()
|
||||
|
@ -44,11 +54,12 @@ class ActionModule(ActionBase):
|
|||
display.debug("win_reboot: %s success" % what_desc)
|
||||
return
|
||||
except Exception as e:
|
||||
exc = e
|
||||
if what_desc:
|
||||
display.debug("win_reboot: %s fail (expected), retrying in %d seconds..." % (what_desc, fail_sleep))
|
||||
time.sleep(fail_sleep)
|
||||
|
||||
raise TimedOutException("timed out waiting for %s: %s" % (what_desc, e))
|
||||
raise TimedOutException("timed out waiting for %s: %s" % (what_desc, exc))
|
||||
|
||||
def run(self, tmp=None, task_vars=None):
|
||||
|
||||
|
@ -66,25 +77,25 @@ class ActionModule(ActionBase):
|
|||
if result.get('skipped', False) or result.get('failed', False):
|
||||
return result
|
||||
|
||||
winrm_host = self._connection._winrm_host
|
||||
winrm_port = self._connection._winrm_port
|
||||
|
||||
# Handle timeout parameters and its alias
|
||||
if self._task.args.get('shutdown_timeout') is not None:
|
||||
shutdown_timeout = int(self._task.args.get('shutdown_timeout', self.DEFAULT_SHUTDOWN_TIMEOUT))
|
||||
else:
|
||||
shutdown_timeout = int(self._task.args.get('shutdown_timeout_sec', self.DEFAULT_SHUTDOWN_TIMEOUT))
|
||||
|
||||
if self._task.args.get('reboot_timeout') is not None:
|
||||
reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT))
|
||||
else:
|
||||
reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT))
|
||||
deprecated_args = {
|
||||
'shutdown_timeout': '2.5',
|
||||
'shutdown_timeout_sec': '2.5',
|
||||
}
|
||||
for arg, version in deprecated_args.items():
|
||||
if self._task.args.get(arg) is not None:
|
||||
display.warning("Since Ansible %s, %s is no longer used with win_reboot" % (arg, version))
|
||||
|
||||
if self._task.args.get('connect_timeout') is not None:
|
||||
connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT))
|
||||
else:
|
||||
connect_timeout = int(self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT))
|
||||
|
||||
if self._task.args.get('reboot_timeout') is not None:
|
||||
reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT))
|
||||
else:
|
||||
reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT))
|
||||
|
||||
if self._task.args.get('pre_reboot_delay') is not None:
|
||||
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self.DEFAULT_PRE_REBOOT_DELAY))
|
||||
else:
|
||||
|
@ -98,7 +109,17 @@ class ActionModule(ActionBase):
|
|||
test_command = str(self._task.args.get('test_command', self.DEFAULT_TEST_COMMAND))
|
||||
msg = str(self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE))
|
||||
|
||||
# Get current uptime
|
||||
try:
|
||||
before_uptime = self.get_system_uptime()
|
||||
except Exception as e:
|
||||
result['failed'] = True
|
||||
result['reboot'] = False
|
||||
result['msg'] = to_native(e)
|
||||
return result
|
||||
|
||||
# Initiate reboot
|
||||
display.vvv("rebooting server")
|
||||
(rc, stdout, stderr) = self._connection.exec_command('shutdown /r /t %d /c "%s"' % (pre_reboot_delay, msg))
|
||||
|
||||
# Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully
|
||||
|
@ -119,40 +140,52 @@ class ActionModule(ActionBase):
|
|||
result['msg'] = "Shutdown command failed, error text was %s" % stderr
|
||||
return result
|
||||
|
||||
def raise_if_port_open():
|
||||
try:
|
||||
sock = socket.create_connection((winrm_host, winrm_port), connect_timeout)
|
||||
sock.close()
|
||||
except:
|
||||
return False
|
||||
|
||||
raise Exception("port is open")
|
||||
|
||||
start = datetime.now()
|
||||
# Get the original connection_timeout option var so it can be reset after
|
||||
connection_timeout_orig = None
|
||||
try:
|
||||
connection_timeout_orig = self._connection.get_option('connection_timeout')
|
||||
except AnsibleError:
|
||||
display.debug("win_reboot: connection_timeout connection option has not been set")
|
||||
|
||||
try:
|
||||
self.do_until_success_or_timeout(raise_if_port_open, shutdown_timeout, what_desc="winrm port down")
|
||||
# keep on checking system uptime with short connection responses
|
||||
def check_uptime():
|
||||
display.vvv("attempting to get system uptime")
|
||||
|
||||
def connect_winrm_port():
|
||||
sock = socket.create_connection((winrm_host, winrm_port), connect_timeout)
|
||||
sock.close()
|
||||
|
||||
self.do_until_success_or_timeout(connect_winrm_port, reboot_timeout, what_desc="winrm port up")
|
||||
|
||||
def run_test_command():
|
||||
display.vvv("attempting post-reboot test command '%s'" % test_command)
|
||||
# call connection reset between runs if it's there
|
||||
# override connection timeout from defaults to custom value
|
||||
try:
|
||||
self._connection.set_options(direct={"connection_timeout": connect_timeout})
|
||||
self._connection._reset()
|
||||
except AttributeError:
|
||||
pass
|
||||
display.warning("Connection plugin does not allow the connection timeout to be overridden")
|
||||
|
||||
# try and get uptime
|
||||
try:
|
||||
current_uptime = self.get_system_uptime()
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
if current_uptime == before_uptime:
|
||||
raise Exception("uptime has not changed")
|
||||
|
||||
self.do_until_success_or_timeout(check_uptime, reboot_timeout, what_desc="reboot uptime check success")
|
||||
|
||||
# reset the connection to clear the custom connection timeout
|
||||
try:
|
||||
self._connection.set_options(direct={"connection_timeout": connection_timeout_orig})
|
||||
self._connection._reset()
|
||||
except (AnsibleError, AttributeError):
|
||||
display.debug("Failed to reset connection_timeout back to default")
|
||||
|
||||
# finally run test command to ensure everything is working
|
||||
def run_test_command():
|
||||
display.vvv("attempting post-reboot test command '%s'" % test_command)
|
||||
(rc, stdout, stderr) = self._connection.exec_command(test_command)
|
||||
|
||||
if rc != 0:
|
||||
raise Exception('test command failed')
|
||||
|
||||
# FUTURE: ensure that a reboot has actually occurred by watching for change in last boot time fact
|
||||
# FUTURE: add a stability check (system must remain up for N seconds) to deal with self-multi-reboot updates
|
||||
|
||||
self.do_until_success_or_timeout(run_test_command, reboot_timeout, what_desc="post-reboot test command success")
|
||||
|
@ -163,7 +196,7 @@ class ActionModule(ActionBase):
|
|||
except TimedOutException as toex:
|
||||
result['failed'] = True
|
||||
result['rebooted'] = True
|
||||
result['msg'] = toex.message
|
||||
result['msg'] = to_native(toex)
|
||||
|
||||
if post_reboot_delay != 0:
|
||||
display.vvv("win_reboot: waiting an additional %d seconds" % post_reboot_delay)
|
||||
|
|
|
@ -78,6 +78,17 @@ DOCUMENTATION = """
|
|||
choices: [managed, manual]
|
||||
vars:
|
||||
- name: ansible_winrm_kinit_mode
|
||||
connection_timeout:
|
||||
description:
|
||||
- Sets the operation and read timeout settings for the WinRM
|
||||
connection.
|
||||
- Corresponds to the C(operation_timeout_sec) and
|
||||
C(read_timeout_sec) args in pywinrm so avoid setting these vars
|
||||
with this one.
|
||||
- The default value is whatever is set in the installed version of
|
||||
pywinrm.
|
||||
vars:
|
||||
- name: ansible_winrm_connection_timeout
|
||||
"""
|
||||
|
||||
import base64
|
||||
|
@ -170,6 +181,7 @@ class Connection(ConnectionBase):
|
|||
self._winrm_path = self._options['path']
|
||||
self._kinit_cmd = self._options['kerberos_command']
|
||||
self._winrm_transport = self._options['transport']
|
||||
self._winrm_connection_timeout = self._options['connection_timeout']
|
||||
|
||||
if hasattr(winrm, 'FEATURE_SUPPORTED_AUTHTYPES'):
|
||||
self._winrm_supported_authtypes = set(winrm.FEATURE_SUPPORTED_AUTHTYPES)
|
||||
|
@ -261,7 +273,11 @@ class Connection(ConnectionBase):
|
|||
self._kerb_auth(self._winrm_user, self._winrm_pass)
|
||||
display.vvvvv('WINRM CONNECT: transport=%s endpoint=%s' % (transport, endpoint), host=self._winrm_host)
|
||||
try:
|
||||
protocol = Protocol(endpoint, transport=transport, **self._winrm_kwargs)
|
||||
winrm_kwargs = self._winrm_kwargs.copy()
|
||||
if self._winrm_connection_timeout:
|
||||
winrm_kwargs['operation_timeout_sec'] = self._winrm_connection_timeout
|
||||
winrm_kwargs['read_timeout_sec'] = self._winrm_connection_timeout + 1
|
||||
protocol = Protocol(endpoint, transport=transport, **winrm_kwargs)
|
||||
|
||||
# open the shell from connect so we know we're able to talk to the server
|
||||
if not self.shell_id:
|
||||
|
|
Loading…
Reference in a new issue