Correctly account for cpu usage by background threads (#4074)

Wrap calls to deferToThread() in a thing which uses a child logcontext to
attribute CPU usage to the right request.

While we're in the area, remove the logcontext_tracer stuff, which is never
used, and afaik doesn't work.

Fixes #4064
This commit is contained in:
Richard van der Hoff 2018-10-23 13:12:32 +01:00 committed by GitHub
parent 1fe6bbb555
commit 5c445114d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 94 additions and 77 deletions

1
changelog.d/4074.bugfix Normal file
View file

@ -0,0 +1 @@
Correctly account for cpu usage by background threads

View file

@ -22,7 +22,7 @@ import bcrypt
import pymacaroons
from canonicaljson import json
from twisted.internet import defer, threads
from twisted.internet import defer
from twisted.web.client import PartialDownloadError
import synapse.util.stringutils as stringutils
@ -37,8 +37,8 @@ from synapse.api.errors import (
)
from synapse.module_api import ModuleApi
from synapse.types import UserID
from synapse.util import logcontext
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.logcontext import make_deferred_yieldable
from ._base import BaseHandler
@ -884,11 +884,7 @@ class AuthHandler(BaseHandler):
bcrypt.gensalt(self.bcrypt_rounds),
).decode('ascii')
return make_deferred_yieldable(
threads.deferToThreadPool(
self.hs.get_reactor(), self.hs.get_reactor().getThreadPool(), _do_hash
),
)
return logcontext.defer_to_thread(self.hs.get_reactor(), _do_hash)
def validate_hash(self, password, stored_hash):
"""Validates that self.hash(password) == stored_hash.
@ -913,13 +909,7 @@ class AuthHandler(BaseHandler):
if not isinstance(stored_hash, bytes):
stored_hash = stored_hash.encode('ascii')
return make_deferred_yieldable(
threads.deferToThreadPool(
self.hs.get_reactor(),
self.hs.get_reactor().getThreadPool(),
_do_validate_hash,
),
)
return logcontext.defer_to_thread(self.hs.get_reactor(), _do_validate_hash)
else:
return defer.succeed(False)

View file

@ -25,7 +25,7 @@ from six.moves.urllib import parse as urlparse
import twisted.internet.error
import twisted.web.http
from twisted.internet import defer, threads
from twisted.internet import defer
from twisted.web.resource import Resource
from synapse.api.errors import (
@ -36,8 +36,8 @@ from synapse.api.errors import (
)
from synapse.http.matrixfederationclient import MatrixFederationHttpClient
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.util import logcontext
from synapse.util.async_helpers import Linearizer
from synapse.util.logcontext import make_deferred_yieldable
from synapse.util.retryutils import NotRetryingDestination
from synapse.util.stringutils import is_ascii, random_string
@ -492,10 +492,11 @@ class MediaRepository(object):
))
thumbnailer = Thumbnailer(input_path)
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
t_byte_source = yield logcontext.defer_to_thread(
self.hs.get_reactor(),
self._generate_thumbnail,
thumbnailer, t_width, t_height, t_method, t_type
))
)
if t_byte_source:
try:
@ -534,10 +535,11 @@ class MediaRepository(object):
))
thumbnailer = Thumbnailer(input_path)
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
t_byte_source = yield logcontext.defer_to_thread(
self.hs.get_reactor(),
self._generate_thumbnail,
thumbnailer, t_width, t_height, t_method, t_type
))
)
if t_byte_source:
try:
@ -620,15 +622,17 @@ class MediaRepository(object):
for (t_width, t_height, t_type), t_method in iteritems(thumbnails):
# Generate the thumbnail
if t_method == "crop":
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
t_byte_source = yield logcontext.defer_to_thread(
self.hs.get_reactor(),
thumbnailer.crop,
t_width, t_height, t_type,
))
)
elif t_method == "scale":
t_byte_source = yield make_deferred_yieldable(threads.deferToThread(
t_byte_source = yield logcontext.defer_to_thread(
self.hs.get_reactor(),
thumbnailer.scale,
t_width, t_height, t_type,
))
)
else:
logger.error("Unrecognized method: %r", t_method)
continue

View file

@ -21,9 +21,10 @@ import sys
import six
from twisted.internet import defer, threads
from twisted.internet import defer
from twisted.protocols.basic import FileSender
from synapse.util import logcontext
from synapse.util.file_consumer import BackgroundFileConsumer
from synapse.util.logcontext import make_deferred_yieldable
@ -64,9 +65,10 @@ class MediaStorage(object):
with self.store_into_file(file_info) as (f, fname, finish_cb):
# Write to the main repository
yield make_deferred_yieldable(threads.deferToThread(
yield logcontext.defer_to_thread(
self.hs.get_reactor(),
_write_file_synchronously, source, f,
))
)
yield finish_cb()
defer.returnValue(fname)

View file

@ -17,9 +17,10 @@ import logging
import os
import shutil
from twisted.internet import defer, threads
from twisted.internet import defer
from synapse.config._base import Config
from synapse.util import logcontext
from synapse.util.logcontext import run_in_background
from .media_storage import FileResponder
@ -120,7 +121,8 @@ class FileStorageProviderBackend(StorageProvider):
if not os.path.exists(dirname):
os.makedirs(dirname)
return threads.deferToThread(
return logcontext.defer_to_thread(
self.hs.get_reactor(),
shutil.copyfile, primary_fname, backup_fname,
)

View file

@ -25,7 +25,7 @@ See doc/log_contexts.rst for details on how this works.
import logging
import threading
from twisted.internet import defer
from twisted.internet import defer, threads
logger = logging.getLogger(__name__)
@ -562,58 +562,76 @@ def _set_context_cb(result, context):
return result
# modules to ignore in `logcontext_tracer`
_to_ignore = [
"synapse.util.logcontext",
"synapse.http.server",
"synapse.storage._base",
"synapse.util.async_helpers",
]
def logcontext_tracer(frame, event, arg):
"""A tracer that logs whenever a logcontext "unexpectedly" changes within
a function. Probably inaccurate.
Use by calling `sys.settrace(logcontext_tracer)` in the main thread.
def defer_to_thread(reactor, f, *args, **kwargs):
"""
if event == 'call':
name = frame.f_globals["__name__"]
if name.startswith("synapse"):
if name == "synapse.util.logcontext":
if frame.f_code.co_name in ["__enter__", "__exit__"]:
tracer = frame.f_back.f_trace
if tracer:
tracer.just_changed = True
Calls the function `f` using a thread from the reactor's default threadpool and
returns the result as a Deferred.
tracer = frame.f_trace
if tracer:
return tracer
Creates a new logcontext for `f`, which is created as a child of the current
logcontext (so its CPU usage metrics will get attributed to the current
logcontext). `f` should preserve the logcontext it is given.
if not any(name.startswith(ig) for ig in _to_ignore):
return LineTracer()
The result deferred follows the Synapse logcontext rules: you should `yield`
on it.
Args:
reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
the Deferred will be invoked, and whose threadpool we should use for the
function.
Normally this will be hs.get_reactor().
f (callable): The function to call.
args: positional arguments to pass to f.
kwargs: keyword arguments to pass to f.
Returns:
Deferred: A Deferred which fires a callback with the result of `f`, or an
errback if `f` throws an exception.
"""
return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)
class LineTracer(object):
__slots__ = ["context", "just_changed"]
def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
"""
A wrapper for twisted.internet.threads.deferToThreadpool, which handles
logcontexts correctly.
def __init__(self):
self.context = LoggingContext.current_context()
self.just_changed = False
Calls the function `f` using a thread from the given threadpool and returns
the result as a Deferred.
def __call__(self, frame, event, arg):
if event in 'line':
if self.just_changed:
self.context = LoggingContext.current_context()
self.just_changed = False
else:
c = LoggingContext.current_context()
if c != self.context:
logger.info(
"Context changed! %s -> %s, %s, %s",
self.context, c,
frame.f_code.co_filename, frame.f_lineno
)
self.context = c
Creates a new logcontext for `f`, which is created as a child of the current
logcontext (so its CPU usage metrics will get attributed to the current
logcontext). `f` should preserve the logcontext it is given.
return self
The result deferred follows the Synapse logcontext rules: you should `yield`
on it.
Args:
reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
the Deferred will be invoked. Normally this will be hs.get_reactor().
threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
running `f`. Normally this will be hs.get_reactor().getThreadPool().
f (callable): The function to call.
args: positional arguments to pass to f.
kwargs: keyword arguments to pass to f.
Returns:
Deferred: A Deferred which fires a callback with the result of `f`, or an
errback if `f` throws an exception.
"""
logcontext = LoggingContext.current_context()
def g():
with LoggingContext(parent_context=logcontext):
return f(*args, **kwargs)
return make_deferred_yieldable(
threads.deferToThreadPool(reactor, threadpool, g)
)