Start of work to use threading instead of forking
Rather than using multiprocessing (Process and Queue objects) to do worker tasks in Ansible: * Using concurrent.futures * Using ThreadProcessExecutor * Making PluginLoader thread-safe * Gutting a lot of code dealing with message passing
This commit is contained in:
parent
a7229df469
commit
f95160723d
105
lib/ansible/executor/process/threading.py
Normal file
105
lib/ansible/executor/process/threading.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
# (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com>
|
||||
#
|
||||
# This file is part of Ansible
|
||||
#
|
||||
# Ansible is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Ansible is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Make coding more python3-ish
|
||||
from __future__ import (absolute_import, division, print_function)
|
||||
__metaclass__ = type
|
||||
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from jinja2.exceptions import TemplateNotFound
|
||||
|
||||
from ansible.errors import AnsibleConnectionFailure
|
||||
from ansible.executor.task_executor import TaskExecutor
|
||||
from ansible.executor.task_result import TaskResult
|
||||
from ansible.module_utils._text import to_text
|
||||
|
||||
try:
|
||||
from __main__ import display
|
||||
except ImportError:
|
||||
from ansible.utils.display import Display
|
||||
display = Display()
|
||||
|
||||
__all__ = ['WorkerProcess']
|
||||
|
||||
|
||||
def run_worker(task_vars, host, task, play_context, loader, variable_manager, shared_loader_obj):
|
||||
'''
|
||||
The worker thread class, which uses TaskExecutor to run tasks
|
||||
read from a job queue and pushes results into a results queue
|
||||
for reading later.
|
||||
'''
|
||||
|
||||
# import cProfile, pstats, StringIO
|
||||
# pr = cProfile.Profile()
|
||||
# pr.enable()
|
||||
|
||||
try:
|
||||
# execute the task and build a TaskResult from the result
|
||||
display.debug("running TaskExecutor() for %s/%s" % (host, task))
|
||||
executor_result = TaskExecutor(
|
||||
host,
|
||||
task,
|
||||
task_vars,
|
||||
play_context,
|
||||
None, #new_stdin
|
||||
loader,
|
||||
shared_loader_obj,
|
||||
None, #rslt_q
|
||||
).run()
|
||||
|
||||
display.debug("done running TaskExecutor() for %s/%s" % (host, task))
|
||||
task_result = TaskResult(
|
||||
host,
|
||||
task,
|
||||
executor_result,
|
||||
)
|
||||
|
||||
# put the result on the result queue
|
||||
display.debug("sending task result")
|
||||
return task_result
|
||||
|
||||
except AnsibleConnectionFailure:
|
||||
return TaskResult(
|
||||
host,
|
||||
task,
|
||||
dict(unreachable=True),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
if not isinstance(e, (IOError, EOFError, KeyboardInterrupt, SystemExit)) or isinstance(e, TemplateNotFound):
|
||||
try:
|
||||
return TaskResult(
|
||||
host,
|
||||
task,
|
||||
dict(failed=True, exception=to_text(traceback.format_exc()), stdout=''),
|
||||
)
|
||||
except:
|
||||
display.debug(u"WORKER EXCEPTION: %s" % to_text(e))
|
||||
display.debug(u"WORKER TRACEBACK: %s" % to_text(traceback.format_exc()))
|
||||
|
||||
display.debug("WORKER PROCESS EXITING")
|
||||
|
||||
# pr.disable()
|
||||
# s = StringIO.StringIO()
|
||||
# sortby = 'time'
|
||||
# ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
|
||||
# ps.print_stats()
|
||||
# with open('worker_%06d.stats' % os.getpid(), 'w') as f:
|
||||
# f.write(s.getvalue())
|
|
@ -19,10 +19,12 @@
|
|||
from __future__ import (absolute_import, division, print_function)
|
||||
__metaclass__ = type
|
||||
|
||||
import multiprocessing
|
||||
import threading
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor as PoolExecutor
|
||||
|
||||
from ansible import constants as C
|
||||
from ansible.errors import AnsibleError
|
||||
from ansible.executor.play_iterator import PlayIterator
|
||||
|
@ -100,18 +102,21 @@ class TaskQueueManager:
|
|||
self._failed_hosts = dict()
|
||||
self._unreachable_hosts = dict()
|
||||
|
||||
self._final_q = multiprocessing.Queue()
|
||||
|
||||
# A temporary file (opened pre-fork) used by connection
|
||||
# plugins for inter-process locking.
|
||||
self._connection_lockfile = tempfile.TemporaryFile()
|
||||
|
||||
self._executor = None
|
||||
|
||||
def _initialize_processes(self, num):
|
||||
# FIXME: be safe about creating this
|
||||
self._executor = PoolExecutor(max_workers=num)
|
||||
# FIXME: do we need a global lock for workers here instead of a per-worker?
|
||||
self._workers = []
|
||||
|
||||
for i in range(num):
|
||||
rslt_q = multiprocessing.Queue()
|
||||
self._workers.append([None, rslt_q])
|
||||
w_lock = threading.Lock()
|
||||
self._workers.append([None, w_lock])
|
||||
|
||||
def _initialize_notified_handlers(self, play):
|
||||
'''
|
||||
|
@ -312,18 +317,13 @@ class TaskQueueManager:
|
|||
def cleanup(self):
|
||||
display.debug("RUNNING CLEANUP")
|
||||
self.terminate()
|
||||
self._final_q.close()
|
||||
self._cleanup_processes()
|
||||
|
||||
def _cleanup_processes(self):
|
||||
if hasattr(self, '_workers'):
|
||||
for (worker_prc, rslt_q) in self._workers:
|
||||
rslt_q.close()
|
||||
if worker_prc and worker_prc.is_alive():
|
||||
try:
|
||||
worker_prc.terminate()
|
||||
except AttributeError:
|
||||
pass
|
||||
for (w_thread, w_lock) in self._workers:
|
||||
if w_thread and w_thread.is_running():
|
||||
w_thread.cancel()
|
||||
|
||||
def clear_failed_hosts(self):
|
||||
self._failed_hosts = dict()
|
||||
|
|
|
@ -24,13 +24,13 @@ import threading
|
|||
import time
|
||||
|
||||
from collections import deque
|
||||
from multiprocessing import Lock
|
||||
from jinja2.exceptions import UndefinedError
|
||||
|
||||
from ansible import constants as C
|
||||
from ansible.errors import AnsibleError, AnsibleParserError, AnsibleUndefinedVariable
|
||||
from ansible.executor import action_write_locks
|
||||
from ansible.executor.process.worker import WorkerProcess
|
||||
#from ansible.executor.process.worker import WorkerProcess
|
||||
from ansible.executor.process.threading import run_worker
|
||||
from ansible.executor.task_result import TaskResult
|
||||
from ansible.inventory.host import Host
|
||||
from ansible.module_utils.six.moves import queue as Queue
|
||||
|
@ -54,11 +54,6 @@ except ImportError:
|
|||
|
||||
__all__ = ['StrategyBase']
|
||||
|
||||
|
||||
class StrategySentinel:
|
||||
pass
|
||||
|
||||
|
||||
# TODO: this should probably be in the plugins/__init__.py, with
|
||||
# a smarter mechanism to set all of the attributes based on
|
||||
# the loaders created there
|
||||
|
@ -75,23 +70,30 @@ class SharedPluginLoaderObj:
|
|||
self.lookup_loader = lookup_loader
|
||||
self.module_loader = module_loader
|
||||
|
||||
_sentinel = StrategySentinel()
|
||||
|
||||
|
||||
def results_thread_main(strategy):
|
||||
while True:
|
||||
while not strategy._tqm._terminated:
|
||||
try:
|
||||
result = strategy._final_q.get()
|
||||
if isinstance(result, StrategySentinel):
|
||||
break
|
||||
else:
|
||||
strategy._results_lock.acquire()
|
||||
strategy._results.append(result)
|
||||
strategy._results_lock.release()
|
||||
except (IOError, EOFError):
|
||||
break
|
||||
except Queue.Empty:
|
||||
did_work = False
|
||||
for idx, slot in enumerate(strategy._tqm._workers):
|
||||
(w_thread, w_lock) = slot
|
||||
try:
|
||||
w_lock.acquire()
|
||||
if w_thread and w_thread.done():
|
||||
result = w_thread.result()
|
||||
try:
|
||||
strategy._results_lock.acquire()
|
||||
strategy._results.append(result)
|
||||
finally:
|
||||
strategy._results_lock.release()
|
||||
strategy._tqm._workers[idx] = [None, w_lock]
|
||||
did_work = True
|
||||
finally:
|
||||
w_lock.release()
|
||||
if not did_work:
|
||||
time.sleep(C.DEFAULT_INTERNAL_POLL_INTERVAL)
|
||||
except Exception as e:
|
||||
pass
|
||||
print("RESULTS THREAD EXITED!!!")
|
||||
|
||||
|
||||
class StrategyBase:
|
||||
|
@ -102,16 +104,15 @@ class StrategyBase:
|
|||
'''
|
||||
|
||||
def __init__(self, tqm):
|
||||
self._tqm = tqm
|
||||
self._inventory = tqm.get_inventory()
|
||||
self._workers = tqm.get_workers()
|
||||
self._notified_handlers = tqm._notified_handlers
|
||||
self._tqm = tqm
|
||||
self._inventory = tqm.get_inventory()
|
||||
self._workers = tqm._workers
|
||||
self._notified_handlers = tqm._notified_handlers
|
||||
self._listening_handlers = tqm._listening_handlers
|
||||
self._variable_manager = tqm.get_variable_manager()
|
||||
self._loader = tqm.get_loader()
|
||||
self._final_q = tqm._final_q
|
||||
self._step = getattr(tqm._options, 'step', False)
|
||||
self._diff = getattr(tqm._options, 'diff', False)
|
||||
self._variable_manager = tqm.get_variable_manager()
|
||||
self._loader = tqm.get_loader()
|
||||
self._step = getattr(tqm._options, 'step', False)
|
||||
self._diff = getattr(tqm._options, 'diff', False)
|
||||
|
||||
# Backwards compat: self._display isn't really needed, just import the global display and use that.
|
||||
self._display = display
|
||||
|
@ -133,7 +134,7 @@ class StrategyBase:
|
|||
self._results_thread.start()
|
||||
|
||||
def cleanup(self):
|
||||
self._final_q.put(_sentinel)
|
||||
self._tqm.terminate()
|
||||
self._results_thread.join()
|
||||
|
||||
def run(self, iterator, play_context, result=0):
|
||||
|
@ -203,11 +204,10 @@ class StrategyBase:
|
|||
|
||||
if task.action not in action_write_locks.action_write_locks:
|
||||
display.debug('Creating lock for %s' % task.action)
|
||||
action_write_locks.action_write_locks[task.action] = Lock()
|
||||
action_write_locks.action_write_locks[task.action] = threading.Lock()
|
||||
|
||||
# and then queue the new task
|
||||
try:
|
||||
|
||||
# create a dummy object with plugin loaders set as an easier
|
||||
# way to share them with the forked processes
|
||||
shared_loader_obj = SharedPluginLoaderObj()
|
||||
|
@ -215,12 +215,20 @@ class StrategyBase:
|
|||
queued = False
|
||||
starting_worker = self._cur_worker
|
||||
while True:
|
||||
(worker_prc, rslt_q) = self._workers[self._cur_worker]
|
||||
if worker_prc is None or not worker_prc.is_alive():
|
||||
worker_prc = WorkerProcess(self._final_q, task_vars, host, task, play_context, self._loader, self._variable_manager, shared_loader_obj)
|
||||
self._workers[self._cur_worker][0] = worker_prc
|
||||
worker_prc.start()
|
||||
display.debug("worker is %d (out of %d available)" % (self._cur_worker + 1, len(self._workers)))
|
||||
(w_thread, w_lock) = self._workers[self._cur_worker]
|
||||
if w_thread is None:
|
||||
w_thread = self._tqm._executor.submit(
|
||||
run_worker,
|
||||
task_vars,
|
||||
host,
|
||||
task,
|
||||
play_context,
|
||||
self._loader,
|
||||
self._variable_manager,
|
||||
shared_loader_obj
|
||||
)
|
||||
self._workers[self._cur_worker][0] = w_thread
|
||||
display.debug("worker is %d (out of %d available)" % (self._cur_worker+1, len(self._workers)))
|
||||
queued = True
|
||||
self._cur_worker += 1
|
||||
if self._cur_worker >= len(self._workers):
|
||||
|
|
Loading…
Reference in a new issue