win_reboot: change to sample system uptime instead of checking port status (#31767)
* win_reboot: change to sample system uptime instead of checking port status * added connection timeout back in as now we can manually set it per connection. * some pep8 fixes * fix up error message on timeout in case an exception wasn't fired * Changed doc to English (US) and simplified uptime check * moved conn timeout over to new config connection options
This commit is contained in:
parent
0b128e2e29
commit
cf662ed74b
3 changed files with 89 additions and 40 deletions
|
@ -31,6 +31,7 @@ options:
|
||||||
description:
|
description:
|
||||||
- Maximum seconds to wait for shutdown to occur
|
- Maximum seconds to wait for shutdown to occur
|
||||||
- Increase this timeout for very slow hardware, large update applications, etc
|
- Increase this timeout for very slow hardware, large update applications, etc
|
||||||
|
- This option has been removed since Ansible 2.5 as the win_reboot behavior has changed
|
||||||
default: 600
|
default: 600
|
||||||
aliases: [ shutdown_timeout_sec ]
|
aliases: [ shutdown_timeout_sec ]
|
||||||
reboot_timeout:
|
reboot_timeout:
|
||||||
|
@ -70,7 +71,6 @@ EXAMPLES = r'''
|
||||||
|
|
||||||
# Reboot a slow machine that might have lots of updates to apply
|
# Reboot a slow machine that might have lots of updates to apply
|
||||||
- win_reboot:
|
- win_reboot:
|
||||||
shutdown_timeout: 3600
|
|
||||||
reboot_timeout: 3600
|
reboot_timeout: 3600
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,13 @@
|
||||||
from __future__ import (absolute_import, division, print_function)
|
from __future__ import (absolute_import, division, print_function)
|
||||||
__metaclass__ = type
|
__metaclass__ = type
|
||||||
|
|
||||||
import socket
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
from ansible.errors import AnsibleError
|
||||||
from ansible.plugins.action import ActionBase
|
from ansible.plugins.action import ActionBase
|
||||||
|
from ansible.module_utils._text import to_native
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from __main__ import display
|
from __main__ import display
|
||||||
|
@ -25,7 +26,6 @@ class TimedOutException(Exception):
|
||||||
class ActionModule(ActionBase):
|
class ActionModule(ActionBase):
|
||||||
TRANSFERS_FILES = False
|
TRANSFERS_FILES = False
|
||||||
|
|
||||||
DEFAULT_SHUTDOWN_TIMEOUT = 600
|
|
||||||
DEFAULT_REBOOT_TIMEOUT = 600
|
DEFAULT_REBOOT_TIMEOUT = 600
|
||||||
DEFAULT_CONNECT_TIMEOUT = 5
|
DEFAULT_CONNECT_TIMEOUT = 5
|
||||||
DEFAULT_PRE_REBOOT_DELAY = 2
|
DEFAULT_PRE_REBOOT_DELAY = 2
|
||||||
|
@ -33,10 +33,20 @@ class ActionModule(ActionBase):
|
||||||
DEFAULT_TEST_COMMAND = 'whoami'
|
DEFAULT_TEST_COMMAND = 'whoami'
|
||||||
DEFAULT_REBOOT_MESSAGE = 'Reboot initiated by Ansible.'
|
DEFAULT_REBOOT_MESSAGE = 'Reboot initiated by Ansible.'
|
||||||
|
|
||||||
|
def get_system_uptime(self):
|
||||||
|
uptime_command = "(Get-WmiObject -ClassName Win32_OperatingSystem).LastBootUpTime"
|
||||||
|
(rc, stdout, stderr) = self._connection.exec_command(uptime_command)
|
||||||
|
|
||||||
|
if rc != 0:
|
||||||
|
raise Exception("win_reboot: failed to get host uptime info, rc: %d, stdout: %s, stderr: %s"
|
||||||
|
% (rc, stdout, stderr))
|
||||||
|
|
||||||
|
return stdout
|
||||||
|
|
||||||
def do_until_success_or_timeout(self, what, timeout, what_desc, fail_sleep=1):
|
def do_until_success_or_timeout(self, what, timeout, what_desc, fail_sleep=1):
|
||||||
max_end_time = datetime.utcnow() + timedelta(seconds=timeout)
|
max_end_time = datetime.utcnow() + timedelta(seconds=timeout)
|
||||||
|
|
||||||
e = None
|
exc = ""
|
||||||
while datetime.utcnow() < max_end_time:
|
while datetime.utcnow() < max_end_time:
|
||||||
try:
|
try:
|
||||||
what()
|
what()
|
||||||
|
@ -44,11 +54,12 @@ class ActionModule(ActionBase):
|
||||||
display.debug("win_reboot: %s success" % what_desc)
|
display.debug("win_reboot: %s success" % what_desc)
|
||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
exc = e
|
||||||
if what_desc:
|
if what_desc:
|
||||||
display.debug("win_reboot: %s fail (expected), retrying in %d seconds..." % (what_desc, fail_sleep))
|
display.debug("win_reboot: %s fail (expected), retrying in %d seconds..." % (what_desc, fail_sleep))
|
||||||
time.sleep(fail_sleep)
|
time.sleep(fail_sleep)
|
||||||
|
|
||||||
raise TimedOutException("timed out waiting for %s: %s" % (what_desc, e))
|
raise TimedOutException("timed out waiting for %s: %s" % (what_desc, exc))
|
||||||
|
|
||||||
def run(self, tmp=None, task_vars=None):
|
def run(self, tmp=None, task_vars=None):
|
||||||
|
|
||||||
|
@ -66,25 +77,25 @@ class ActionModule(ActionBase):
|
||||||
if result.get('skipped', False) or result.get('failed', False):
|
if result.get('skipped', False) or result.get('failed', False):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
winrm_host = self._connection._winrm_host
|
|
||||||
winrm_port = self._connection._winrm_port
|
|
||||||
|
|
||||||
# Handle timeout parameters and its alias
|
# Handle timeout parameters and its alias
|
||||||
if self._task.args.get('shutdown_timeout') is not None:
|
deprecated_args = {
|
||||||
shutdown_timeout = int(self._task.args.get('shutdown_timeout', self.DEFAULT_SHUTDOWN_TIMEOUT))
|
'shutdown_timeout': '2.5',
|
||||||
else:
|
'shutdown_timeout_sec': '2.5',
|
||||||
shutdown_timeout = int(self._task.args.get('shutdown_timeout_sec', self.DEFAULT_SHUTDOWN_TIMEOUT))
|
}
|
||||||
|
for arg, version in deprecated_args.items():
|
||||||
if self._task.args.get('reboot_timeout') is not None:
|
if self._task.args.get(arg) is not None:
|
||||||
reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT))
|
display.warning("Since Ansible %s, %s is no longer used with win_reboot" % (arg, version))
|
||||||
else:
|
|
||||||
reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT))
|
|
||||||
|
|
||||||
if self._task.args.get('connect_timeout') is not None:
|
if self._task.args.get('connect_timeout') is not None:
|
||||||
connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT))
|
connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT))
|
||||||
else:
|
else:
|
||||||
connect_timeout = int(self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT))
|
connect_timeout = int(self._task.args.get('connect_timeout_sec', self.DEFAULT_CONNECT_TIMEOUT))
|
||||||
|
|
||||||
|
if self._task.args.get('reboot_timeout') is not None:
|
||||||
|
reboot_timeout = int(self._task.args.get('reboot_timeout', self.DEFAULT_REBOOT_TIMEOUT))
|
||||||
|
else:
|
||||||
|
reboot_timeout = int(self._task.args.get('reboot_timeout_sec', self.DEFAULT_REBOOT_TIMEOUT))
|
||||||
|
|
||||||
if self._task.args.get('pre_reboot_delay') is not None:
|
if self._task.args.get('pre_reboot_delay') is not None:
|
||||||
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self.DEFAULT_PRE_REBOOT_DELAY))
|
pre_reboot_delay = int(self._task.args.get('pre_reboot_delay', self.DEFAULT_PRE_REBOOT_DELAY))
|
||||||
else:
|
else:
|
||||||
|
@ -98,7 +109,17 @@ class ActionModule(ActionBase):
|
||||||
test_command = str(self._task.args.get('test_command', self.DEFAULT_TEST_COMMAND))
|
test_command = str(self._task.args.get('test_command', self.DEFAULT_TEST_COMMAND))
|
||||||
msg = str(self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE))
|
msg = str(self._task.args.get('msg', self.DEFAULT_REBOOT_MESSAGE))
|
||||||
|
|
||||||
|
# Get current uptime
|
||||||
|
try:
|
||||||
|
before_uptime = self.get_system_uptime()
|
||||||
|
except Exception as e:
|
||||||
|
result['failed'] = True
|
||||||
|
result['reboot'] = False
|
||||||
|
result['msg'] = to_native(e)
|
||||||
|
return result
|
||||||
|
|
||||||
# Initiate reboot
|
# Initiate reboot
|
||||||
|
display.vvv("rebooting server")
|
||||||
(rc, stdout, stderr) = self._connection.exec_command('shutdown /r /t %d /c "%s"' % (pre_reboot_delay, msg))
|
(rc, stdout, stderr) = self._connection.exec_command('shutdown /r /t %d /c "%s"' % (pre_reboot_delay, msg))
|
||||||
|
|
||||||
# Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully
|
# Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully
|
||||||
|
@ -119,40 +140,52 @@ class ActionModule(ActionBase):
|
||||||
result['msg'] = "Shutdown command failed, error text was %s" % stderr
|
result['msg'] = "Shutdown command failed, error text was %s" % stderr
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def raise_if_port_open():
|
|
||||||
try:
|
|
||||||
sock = socket.create_connection((winrm_host, winrm_port), connect_timeout)
|
|
||||||
sock.close()
|
|
||||||
except:
|
|
||||||
return False
|
|
||||||
|
|
||||||
raise Exception("port is open")
|
|
||||||
|
|
||||||
start = datetime.now()
|
start = datetime.now()
|
||||||
|
# Get the original connection_timeout option var so it can be reset after
|
||||||
|
connection_timeout_orig = None
|
||||||
|
try:
|
||||||
|
connection_timeout_orig = self._connection.get_option('connection_timeout')
|
||||||
|
except AnsibleError:
|
||||||
|
display.debug("win_reboot: connection_timeout connection option has not been set")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.do_until_success_or_timeout(raise_if_port_open, shutdown_timeout, what_desc="winrm port down")
|
# keep on checking system uptime with short connection responses
|
||||||
|
def check_uptime():
|
||||||
|
display.vvv("attempting to get system uptime")
|
||||||
|
|
||||||
def connect_winrm_port():
|
# override connection timeout from defaults to custom value
|
||||||
sock = socket.create_connection((winrm_host, winrm_port), connect_timeout)
|
|
||||||
sock.close()
|
|
||||||
|
|
||||||
self.do_until_success_or_timeout(connect_winrm_port, reboot_timeout, what_desc="winrm port up")
|
|
||||||
|
|
||||||
def run_test_command():
|
|
||||||
display.vvv("attempting post-reboot test command '%s'" % test_command)
|
|
||||||
# call connection reset between runs if it's there
|
|
||||||
try:
|
try:
|
||||||
|
self._connection.set_options(direct={"connection_timeout": connect_timeout})
|
||||||
self._connection._reset()
|
self._connection._reset()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
display.warning("Connection plugin does not allow the connection timeout to be overridden")
|
||||||
|
|
||||||
|
# try and get uptime
|
||||||
|
try:
|
||||||
|
current_uptime = self.get_system_uptime()
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if current_uptime == before_uptime:
|
||||||
|
raise Exception("uptime has not changed")
|
||||||
|
|
||||||
|
self.do_until_success_or_timeout(check_uptime, reboot_timeout, what_desc="reboot uptime check success")
|
||||||
|
|
||||||
|
# reset the connection to clear the custom connection timeout
|
||||||
|
try:
|
||||||
|
self._connection.set_options(direct={"connection_timeout": connection_timeout_orig})
|
||||||
|
self._connection._reset()
|
||||||
|
except (AnsibleError, AttributeError):
|
||||||
|
display.debug("Failed to reset connection_timeout back to default")
|
||||||
|
|
||||||
|
# finally run test command to ensure everything is working
|
||||||
|
def run_test_command():
|
||||||
|
display.vvv("attempting post-reboot test command '%s'" % test_command)
|
||||||
(rc, stdout, stderr) = self._connection.exec_command(test_command)
|
(rc, stdout, stderr) = self._connection.exec_command(test_command)
|
||||||
|
|
||||||
if rc != 0:
|
if rc != 0:
|
||||||
raise Exception('test command failed')
|
raise Exception('test command failed')
|
||||||
|
|
||||||
# FUTURE: ensure that a reboot has actually occurred by watching for change in last boot time fact
|
|
||||||
# FUTURE: add a stability check (system must remain up for N seconds) to deal with self-multi-reboot updates
|
# FUTURE: add a stability check (system must remain up for N seconds) to deal with self-multi-reboot updates
|
||||||
|
|
||||||
self.do_until_success_or_timeout(run_test_command, reboot_timeout, what_desc="post-reboot test command success")
|
self.do_until_success_or_timeout(run_test_command, reboot_timeout, what_desc="post-reboot test command success")
|
||||||
|
@ -163,7 +196,7 @@ class ActionModule(ActionBase):
|
||||||
except TimedOutException as toex:
|
except TimedOutException as toex:
|
||||||
result['failed'] = True
|
result['failed'] = True
|
||||||
result['rebooted'] = True
|
result['rebooted'] = True
|
||||||
result['msg'] = toex.message
|
result['msg'] = to_native(toex)
|
||||||
|
|
||||||
if post_reboot_delay != 0:
|
if post_reboot_delay != 0:
|
||||||
display.vvv("win_reboot: waiting an additional %d seconds" % post_reboot_delay)
|
display.vvv("win_reboot: waiting an additional %d seconds" % post_reboot_delay)
|
||||||
|
|
|
@ -78,6 +78,17 @@ DOCUMENTATION = """
|
||||||
choices: [managed, manual]
|
choices: [managed, manual]
|
||||||
vars:
|
vars:
|
||||||
- name: ansible_winrm_kinit_mode
|
- name: ansible_winrm_kinit_mode
|
||||||
|
connection_timeout:
|
||||||
|
description:
|
||||||
|
- Sets the operation and read timeout settings for the WinRM
|
||||||
|
connection.
|
||||||
|
- Corresponds to the C(operation_timeout_sec) and
|
||||||
|
C(read_timeout_sec) args in pywinrm so avoid setting these vars
|
||||||
|
with this one.
|
||||||
|
- The default value is whatever is set in the installed version of
|
||||||
|
pywinrm.
|
||||||
|
vars:
|
||||||
|
- name: ansible_winrm_connection_timeout
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
@ -170,6 +181,7 @@ class Connection(ConnectionBase):
|
||||||
self._winrm_path = self._options['path']
|
self._winrm_path = self._options['path']
|
||||||
self._kinit_cmd = self._options['kerberos_command']
|
self._kinit_cmd = self._options['kerberos_command']
|
||||||
self._winrm_transport = self._options['transport']
|
self._winrm_transport = self._options['transport']
|
||||||
|
self._winrm_connection_timeout = self._options['connection_timeout']
|
||||||
|
|
||||||
if hasattr(winrm, 'FEATURE_SUPPORTED_AUTHTYPES'):
|
if hasattr(winrm, 'FEATURE_SUPPORTED_AUTHTYPES'):
|
||||||
self._winrm_supported_authtypes = set(winrm.FEATURE_SUPPORTED_AUTHTYPES)
|
self._winrm_supported_authtypes = set(winrm.FEATURE_SUPPORTED_AUTHTYPES)
|
||||||
|
@ -261,7 +273,11 @@ class Connection(ConnectionBase):
|
||||||
self._kerb_auth(self._winrm_user, self._winrm_pass)
|
self._kerb_auth(self._winrm_user, self._winrm_pass)
|
||||||
display.vvvvv('WINRM CONNECT: transport=%s endpoint=%s' % (transport, endpoint), host=self._winrm_host)
|
display.vvvvv('WINRM CONNECT: transport=%s endpoint=%s' % (transport, endpoint), host=self._winrm_host)
|
||||||
try:
|
try:
|
||||||
protocol = Protocol(endpoint, transport=transport, **self._winrm_kwargs)
|
winrm_kwargs = self._winrm_kwargs.copy()
|
||||||
|
if self._winrm_connection_timeout:
|
||||||
|
winrm_kwargs['operation_timeout_sec'] = self._winrm_connection_timeout
|
||||||
|
winrm_kwargs['read_timeout_sec'] = self._winrm_connection_timeout + 1
|
||||||
|
protocol = Protocol(endpoint, transport=transport, **winrm_kwargs)
|
||||||
|
|
||||||
# open the shell from connect so we know we're able to talk to the server
|
# open the shell from connect so we know we're able to talk to the server
|
||||||
if not self.shell_id:
|
if not self.shell_id:
|
||||||
|
|
Loading…
Reference in a new issue