Add polling logic in runner such that all actions get kicked off everywhere then polling

happens only on needed hosts, allowing some hosts to fail and drop out of the running.
This commit is contained in:
Michael DeHaan 2012-03-11 20:54:54 -04:00
parent 49a636d8a0
commit db7ba87111
4 changed files with 71 additions and 50 deletions

View file

@ -28,10 +28,11 @@ import sys
import os
import getpass
import shlex
import time
from optparse import OptionParser
import ansible.runner
import ansible.playbook
import ansible.constants as C
from optparse import OptionParser
from ansible.utils import *
########################################################
@ -91,7 +92,7 @@ class Cli(object):
if options.ask_pass:
sshpass = getpass.getpass(prompt="SSH password: ")
return ansible.runner.Runner(
runner = ansible.runner.Runner(
module_name=options.module_name,
module_path=options.module_path,
module_args=shlex.split(options.module_args),
@ -101,15 +102,40 @@ class Cli(object):
timeout=options.timeout,
forks=options.forks,
background=options.seconds,
poll_interval=options.poll_interval,
async_poll_callback=async_poll_status,
pattern=pattern,
verbose=True,
).run()
)
return (runner, runner.run())
# ----------------------------------------------
def output(self, results, options, args):
def get_polling_runner(self, old_runner, hosts, jid):
return ansible.runner.Runner(
module_name='async_status',
module_path=old_runner.module_path,
module_args=[ "jid=%s" % jid ],
remote_user=old_runner.remote_user,
remote_pass=old_runner.remote_pass,
host_list=hosts,
timeout=old_runner.timeout,
forks=old_runner.forks,
pattern='*',
verbose=True,
)
# ----------------------------------------------
def hosts_to_poll(self, results):
hosts = []
for (host, res) in results['contacted'].iteritems():
if res.get('started',False):
hosts.append(host)
return hosts
# ----------------------------------------------
def output(self, runner, results, options, args):
''' summarize results from Runner '''
if results is None:
@ -117,6 +143,34 @@ class Cli(object):
if options.tree:
prepare_writeable_dir(options.tree)
# BACKGROUND POLL LOGIC when -B and -P are specified
# FIXME: refactor
if options.seconds and options.poll_interval > 0:
poll_hosts = results['contacted'].keys()
if len(poll_hosts) == 0:
exit("no jobs were launched successfully")
ahost = poll_hosts[0]
jid = results['contacted'][ahost].get('ansible_job_id', None)
if jid is None:
exit("unexpected error: unable to determine jid")
clock = options.seconds
while (clock >= 0):
polling_runner = self.get_polling_runner(runner, poll_hosts, jid)
poll_results = polling_runner.run()
if poll_results is None:
break
for (host, host_result) in poll_results['contacted'].iteritems():
# override last result with current status result for report
results['contacted'][host] = host_result
print async_poll_status(jid, host, clock, host_result)
clock = clock - options.poll_interval
time.sleep(options.poll_interval)
poll_hosts = self.hosts_to_poll(poll_results)
for (host, host_result) in results['contacted'].iteritems():
if 'started' in host_result:
results['contacted'][host] = { 'failed' : 1, 'rc' : None, 'msg' : 'timed out' }
buf = ''
for hostname in contacted_hosts(results):
msg = host_report_msg(
@ -139,7 +193,7 @@ class Cli(object):
if __name__ == '__main__':
cli = Cli()
(options, args) = cli.parse()
results = cli.run(options, args)
cli.output(results, options, args)
(runner, results) = cli.run(options, args)
cli.output(runner, results, options, args)

View file

@ -68,8 +68,6 @@ class Runner(object):
basedir=None,
setup_cache={},
transport='paramiko',
poll_interval=None,
async_poll_callback=None,
verbose=False):
'''
@ -99,11 +97,6 @@ class Runner(object):
self.remote_user = remote_user
self.remote_pass = remote_pass
self.background = background
self.poll_interval = poll_interval
self.async_poll_callback = async_poll_callback
if self.async_poll_callback is None:
self.async_poll_callback = async_poll_status
if basedir is None:
basedir = os.getcwd()
@ -363,33 +356,6 @@ class Runner(object):
result = self._execute_normal_module(conn, host, tmp)
else:
result = self._execute_async_module(conn, host, tmp)
if self.poll_interval > 0:
# poll for completion
# FIXME: refactor
(host, ok, launch_result) = result
jid = launch_result.get('ansible_job_id', None)
if jid is None:
return result
if self.async_poll_callback is None:
self.async_poll_callback = async_poll_callback
self.module_name = 'async_status'
self.module_args = [ "jid=%s" % jid ]
clock = self.background
while (clock >= 0):
time.sleep(self.poll_interval)
clock -= self.poll_interval
result = self._execute_normal_module(conn, host, tmp)
(host, ok, real_result) = result
self.async_poll_callback(self, clock, self.poll_interval, ok, host, jid, real_result)
if 'finished' in real_result or 'failed' in real_result:
clock=-1
elif (clock < 0 and not 'finished' in real_result):
return [ host, False, "timer expired" ]
self._delete_remote_files(conn, tmp)
conn.close()
return result
elif self.module_name == 'copy':
result = self._execute_copy(conn, host, tmp)

View file

@ -175,12 +175,12 @@ def path_dwim(basedir, given):
else:
return os.path.join(basedir, given)
def async_poll_status(runner, clock, poll_interval, ok, host, jid, result):
if ok and 'finished' in result:
print "<job %s> finished on %s" % (jid, host)
elif not ok or 'failed' in result:
print "<job %s> FAILED on %s" % (jid, host)
def async_poll_status(jid, host, clock, result):
if 'finished' in result:
return "<job %s> finished on %s" % (jid, host)
elif 'failed' in result:
return "<job %s> FAILED on %s" % (jid, host)
else:
print "<job %s> polling on %s, %s remaining" % (jid, host, clock)
return "<job %s> polling on %s, %s remaining" % (jid, host, clock)

View file

@ -81,12 +81,13 @@ if mode == 'cleanup':
data = file(log_path).read()
try:
data = json.loads(data)
except:
except Exception, e:
if data == '':
# file not written yet? That means it is running
print json.dumps({
"results_file" : log_path,
"ansible_job_id" : jid,
"traceback" : str(e),
"started" : 1,
})
else:
@ -96,7 +97,7 @@ except:
"results_file" : log_path,
"msg" : "Could not parse job output: %s" % data,
})
sys.exit(1)
sys.exit(0)
if not data.has_key("started"):
data['finished'] = 1