Tweak how strategies evaluate failed hosts via the iterator and bug fixes

* Added additional methods to the iterator code to assess host failures
  while also taking into account the block rescue/always states
* Fixed bugs in the free strategy, where results were not always being
  processed after being collected
* Added some prettier printing to the state output from iterator

Fixes #13699
This commit is contained in:
James Cammarata 2016-01-02 00:31:09 -05:00
parent 6f2f7a79b3
commit 210cf06d9a
3 changed files with 49 additions and 14 deletions

View file

@ -57,14 +57,32 @@ class HostState:
self.always_child_state = None self.always_child_state = None
def __repr__(self): def __repr__(self):
return "HOST STATE: block=%d, task=%d, rescue=%d, always=%d, role=%s, run_state=%d, fail_state=%d, pending_setup=%s, tasks child state? %s, rescue child state? %s, always child state? %s" % ( def _run_state_to_string(n):
states = ["ITERATING_SETUP", "ITERATING_TASKS", "ITERATING_RESCUE", "ITERATING_ALWAYS", "ITERATING_COMPLETE"]
try:
return states[n]
except IndexError:
return "UNKNOWN STATE"
def _failed_state_to_string(n):
states = {1:"FAILED_SETUP", 2:"FAILED_TASKS", 4:"FAILED_RESCUE", 8:"FAILED_ALWAYS"}
if n == 0:
return "FAILED_NONE"
else:
ret = []
for i in (1, 2, 4, 8):
if n & i:
ret.append(states[i])
return "|".join(ret)
return "HOST STATE: block=%d, task=%d, rescue=%d, always=%d, role=%s, run_state=%s, fail_state=%s, pending_setup=%s, tasks child state? %s, rescue child state? %s, always child state? %s" % (
self.cur_block, self.cur_block,
self.cur_regular_task, self.cur_regular_task,
self.cur_rescue_task, self.cur_rescue_task,
self.cur_always_task, self.cur_always_task,
self.cur_role, self.cur_role,
self.run_state, _run_state_to_string(self.run_state),
self.fail_state, _failed_state_to_string(self.fail_state),
self.pending_setup, self.pending_setup,
self.tasks_child_state, self.tasks_child_state,
self.rescue_child_state, self.rescue_child_state,
@ -347,6 +365,28 @@ class PlayIterator:
def get_failed_hosts(self): def get_failed_hosts(self):
return dict((host, True) for (host, state) in iteritems(self._host_states) if state.run_state == self.ITERATING_COMPLETE and state.fail_state != self.FAILED_NONE) return dict((host, True) for (host, state) in iteritems(self._host_states) if state.run_state == self.ITERATING_COMPLETE and state.fail_state != self.FAILED_NONE)
def _check_failed_state(self, state):
if state is None:
return False
elif state.run_state == self.ITERATING_TASKS and self._check_failed_state(state.tasks_child_state):
return True
elif state.run_state == self.ITERATING_RESCUE and self._check_failed_state(state.rescue_child_state):
return True
elif state.run_state == self.ITERATING_ALWAYS and self._check_failed_state(state.always_child_state):
return True
elif state.run_state == self.ITERATING_COMPLETE and state.fail_state != self.FAILED_NONE:
if state.run_state == self.ITERATING_RESCUE and state.fail_state&self.FAILED_RESCUE == 0:
return False
elif state.run_state == self.ITERATING_ALWAYS and state.fail_state&self.FAILED_ALWAYS == 0:
return False
else:
return True
return False
def is_failed(self, host):
s = self.get_host_state(host)
return self._check_failed_state(s)
def get_original_task(self, host, task): def get_original_task(self, host, task):
''' '''
Finds the task in the task list which matches the UUID of the given task. Finds the task in the task list which matches the UUID of the given task.

View file

@ -78,7 +78,7 @@ class StrategyModule(StrategyBase):
(state, task) = iterator.get_next_task_for_host(host, peek=True) (state, task) = iterator.get_next_task_for_host(host, peek=True)
display.debug("free host state: %s" % state) display.debug("free host state: %s" % state)
display.debug("free host task: %s" % task) display.debug("free host task: %s" % task)
if host_name not in self._tqm._failed_hosts and host_name not in self._tqm._unreachable_hosts and task: if not iterator.is_failed(host) and host_name not in self._tqm._unreachable_hosts and task:
# set the flag so the outer loop knows we've still found # set the flag so the outer loop knows we've still found
# some work which needs to be done # some work which needs to be done
@ -135,7 +135,7 @@ class StrategyModule(StrategyBase):
if last_host == starting_host: if last_host == starting_host:
break break
results = self._process_pending_results(iterator) results = self._wait_on_pending_results(iterator)
host_results.extend(results) host_results.extend(results)
try: try:
@ -176,13 +176,7 @@ class StrategyModule(StrategyBase):
display.debug("done adding collected blocks to iterator") display.debug("done adding collected blocks to iterator")
# pause briefly so we don't spin lock # pause briefly so we don't spin lock
time.sleep(0.05) time.sleep(0.001)
try:
results = self._wait_on_pending_results(iterator)
host_results.extend(results)
except Exception as e:
pass
# run the base class run() method, which executes the cleanup function # run the base class run() method, which executes the cleanup function
# and runs any outstanding handlers which have been triggered # and runs any outstanding handlers which have been triggered

View file

@ -54,6 +54,7 @@ class StrategyModule(StrategyBase):
host_tasks = {} host_tasks = {}
display.debug("building list of next tasks for hosts") display.debug("building list of next tasks for hosts")
for host in hosts: for host in hosts:
if not iterator.is_failed(host):
host_tasks[host.name] = iterator.get_next_task_for_host(host, peek=True) host_tasks[host.name] = iterator.get_next_task_for_host(host, peek=True)
display.debug("done building task lists") display.debug("done building task lists")
@ -98,7 +99,7 @@ class StrategyModule(StrategyBase):
rvals = [] rvals = []
display.debug("starting to advance hosts") display.debug("starting to advance hosts")
for host in hosts: for host in hosts:
host_state_task = host_tasks[host.name] host_state_task = host_tasks.get(host.name)
if host_state_task is None: if host_state_task is None:
continue continue
(s, t) = host_state_task (s, t) = host_state_task