From f8fe5a2fcd1f9d5e01dea75f04598ceb1283ee5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Andersson=20+=20SU=20Sheng=20Loong?= Date: Wed, 3 Jun 2015 02:53:16 +0800 Subject: [PATCH 1/4] monit: Add retry for pending/initializing services If there are already ongoing actions for a process managed by monit, the module would exit unsuccessfully. It could also give off false positives because it did not determine whether the service was started/stopped when it was in a pending state. Which might be turning the service off, but the action was to start it. For example "Running - pending stop" would be regarded as the service running and "state=enabled" would do nothing. This will make Ansible wait for the state to finalize, or a timeout decided by the new `max_retries` option, before it decides what to do. This fixes issue #244. --- monitoring/monit.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/monitoring/monit.py b/monitoring/monit.py index 3d3c7c8c3ca..84a897e458d 100644 --- a/monitoring/monit.py +++ b/monitoring/monit.py @@ -18,6 +18,7 @@ # You should have received a copy of the GNU General Public License # along with Ansible. If not, see . # +from time import sleep DOCUMENTATION = ''' --- @@ -38,6 +39,12 @@ options: required: true default: null choices: [ "present", "started", "stopped", "restarted", "monitored", "unmonitored", "reloaded" ] + max_retries: + description: + - If there are pending actions for the service monitoried by monit Ansible will retry this + many times to perform the requested action. Between each retry Ansible will sleep for 1 second. + required: false + default: 10 requirements: [ ] author: "Darryl Stoflet (@dstoflet)" ''' @@ -50,6 +57,7 @@ EXAMPLES = ''' def main(): arg_spec = dict( name=dict(required=True), + max_retries=dict(default=10), state=dict(required=True, choices=['present', 'started', 'restarted', 'stopped', 'monitored', 'unmonitored', 'reloaded']) ) @@ -57,6 +65,7 @@ def main(): name = module.params['name'] state = module.params['state'] + max_retries = module.params['max_retries'] MONIT = module.get_bin_path('monit', True) @@ -103,7 +112,21 @@ def main(): module.exit_json(changed=True, name=name, state=state) module.exit_json(changed=False, name=name, state=state) - running = 'running' in process_status + running_status = status() + retries = 0 + while 'pending' in running_status or 'initializing' in running_status: + if retries >= max_retries: + module.fail_json( + msg='too many retries waiting for "pending" or "initiating" to go away (%s)' % running_status, + retries=retries, + state=state + ) + + sleep(1) + retries += 1 + running_status = status() + + running = 'running' in status() if running and state in ['started', 'monitored']: module.exit_json(changed=False, name=name, state=state) From 5835d06a4eca4438ff6f4e344cd67eb7f3d9ed88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Andersson?= Date: Fri, 5 Jun 2015 05:39:53 +0800 Subject: [PATCH 2/4] monit: Wait for pending state changes for reloads @mpeters reported that we're not checking that the named service is actually there after a reload. And that sometimes monit doesn't actually return anything at all after a reload. --- monitoring/monit.py | 54 +++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/monitoring/monit.py b/monitoring/monit.py index 84a897e458d..32fa526e0bb 100644 --- a/monitoring/monit.py +++ b/monitoring/monit.py @@ -69,14 +69,6 @@ def main(): MONIT = module.get_bin_path('monit', True) - if state == 'reloaded': - if module.check_mode: - module.exit_json(changed=True) - rc, out, err = module.run_command('%s reload' % MONIT) - if rc != 0: - module.fail_json(msg='monit reload failed', stdout=out, stderr=err) - module.exit_json(changed=True, name=name, state=state) - def status(): """Return the status of the process in monit, or the empty string if not present.""" rc, out, err = module.run_command('%s summary' % MONIT, check_rc=True) @@ -95,8 +87,35 @@ def main(): module.run_command('%s %s %s' % (MONIT, command, name), check_rc=True) return status() - process_status = status() - present = process_status != '' + def wait_for_monit_to_stop_pending(sleep_time=1): + """Fails this run if there is no status or it's pending/initalizing for max_retries""" + running_status = status() + retries = 0 + + while running_status == '' or 'pending' in running_status or 'initializing' in running_status: + if retries >= max_retries: + module.fail_json( + msg='too many retries waiting for empty, "pending", or "initiating" status to go away ({0})'.format( + running_status + ), + retries=retries, + state=state + ) + + sleep(sleep_time) + retries += 1 + running_status = status() + + if state == 'reloaded': + if module.check_mode: + module.exit_json(changed=True) + rc, out, err = module.run_command('%s reload' % MONIT) + if rc != 0: + module.fail_json(msg='monit reload failed', stdout=out, stderr=err) + wait_for_monit_to_stop_pending() + module.exit_json(changed=True, name=name, state=state) + + present = status() != '' if not present and not state == 'present': module.fail_json(msg='%s process not presently configured with monit' % name, name=name, state=state) @@ -112,20 +131,7 @@ def main(): module.exit_json(changed=True, name=name, state=state) module.exit_json(changed=False, name=name, state=state) - running_status = status() - retries = 0 - while 'pending' in running_status or 'initializing' in running_status: - if retries >= max_retries: - module.fail_json( - msg='too many retries waiting for "pending" or "initiating" to go away (%s)' % running_status, - retries=retries, - state=state - ) - - sleep(1) - retries += 1 - running_status = status() - + wait_for_monit_to_stop_pending() running = 'running' in status() if running and state in ['started', 'monitored']: From 262f2e9048cac9a3867a1f8722a0ebdf2d1fb974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Andersson?= Date: Tue, 4 Aug 2015 16:59:42 +0800 Subject: [PATCH 3/4] monit: Add version_added and type for new argument --- monitoring/monit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monitoring/monit.py b/monitoring/monit.py index 32fa526e0bb..f8151a71f2a 100644 --- a/monitoring/monit.py +++ b/monitoring/monit.py @@ -45,6 +45,7 @@ options: many times to perform the requested action. Between each retry Ansible will sleep for 1 second. required: false default: 10 + version_added: "2.0" requirements: [ ] author: "Darryl Stoflet (@dstoflet)" ''' @@ -57,7 +58,7 @@ EXAMPLES = ''' def main(): arg_spec = dict( name=dict(required=True), - max_retries=dict(default=10), + max_retries=dict(default=10, type='int'), state=dict(required=True, choices=['present', 'started', 'restarted', 'stopped', 'monitored', 'unmonitored', 'reloaded']) ) From 72155d40a33fee454ad2a4088d98842d1ab68e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Andersson?= Date: Sun, 29 Nov 2015 22:45:39 +0800 Subject: [PATCH 4/4] monit: Set a high timeout waiting for status changes Instead of waiting for up to a certain number of retries we set a high timeout and only re-check every five seconds. Certain services can take a minute or more to start and we want to avoid waisting resources by polling too often. --- monitoring/monit.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/monitoring/monit.py b/monitoring/monit.py index f8151a71f2a..35a386b6c6e 100644 --- a/monitoring/monit.py +++ b/monitoring/monit.py @@ -18,7 +18,7 @@ # You should have received a copy of the GNU General Public License # along with Ansible. If not, see . # -from time import sleep +import time DOCUMENTATION = ''' --- @@ -39,12 +39,13 @@ options: required: true default: null choices: [ "present", "started", "stopped", "restarted", "monitored", "unmonitored", "reloaded" ] - max_retries: + timeout: description: - - If there are pending actions for the service monitoried by monit Ansible will retry this - many times to perform the requested action. Between each retry Ansible will sleep for 1 second. + - If there are pending actions for the service monitored by monit, then Ansible will check + for up to this many seconds to verify the the requested action has been performed. + Ansible will sleep for five seconds between each check. required: false - default: 10 + default: 300 version_added: "2.0" requirements: [ ] author: "Darryl Stoflet (@dstoflet)" @@ -58,7 +59,7 @@ EXAMPLES = ''' def main(): arg_spec = dict( name=dict(required=True), - max_retries=dict(default=10, type='int'), + timeout=dict(default=300, type='int'), state=dict(required=True, choices=['present', 'started', 'restarted', 'stopped', 'monitored', 'unmonitored', 'reloaded']) ) @@ -66,7 +67,7 @@ def main(): name = module.params['name'] state = module.params['state'] - max_retries = module.params['max_retries'] + timeout = module.params['timeout'] MONIT = module.get_bin_path('monit', True) @@ -88,23 +89,22 @@ def main(): module.run_command('%s %s %s' % (MONIT, command, name), check_rc=True) return status() - def wait_for_monit_to_stop_pending(sleep_time=1): - """Fails this run if there is no status or it's pending/initalizing for max_retries""" - running_status = status() - retries = 0 + def wait_for_monit_to_stop_pending(): + """Fails this run if there is no status or it's pending/initalizing for timeout""" + timeout_time = time.time() + timeout + sleep_time = 5 + running_status = status() while running_status == '' or 'pending' in running_status or 'initializing' in running_status: - if retries >= max_retries: + if time.time() >= timeout_time: module.fail_json( - msg='too many retries waiting for empty, "pending", or "initiating" status to go away ({0})'.format( + msg='waited too long for "pending", or "initiating" status to go away ({0})'.format( running_status ), - retries=retries, state=state ) - sleep(sleep_time) - retries += 1 + time.sleep(sleep_time) running_status = status() if state == 'reloaded':