mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-15 17:23:50 +01:00
Make the media /upload
tracing less ambiguous (#15888)
A lot of the functions have the same name in this space like `store_file`, and we also do it multiple times for different reasons (main media repo, other storage providers, thumbnails, etc) so it's good to differentiate them so your head doesn't explode. Follow-up to https://github.com/matrix-org/synapse/pull/15850 Tracing instrumentation to media `/upload` code paths to investigate https://github.com/matrix-org/synapse/issues/15841
This commit is contained in:
parent
5e82b07d2c
commit
2328e90fbb
3 changed files with 58 additions and 38 deletions
1
changelog.d/15888.misc
Normal file
1
changelog.d/15888.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Add tracing to media `/upload` code paths.
|
|
@ -38,7 +38,7 @@ from twisted.protocols.basic import FileSender
|
||||||
|
|
||||||
from synapse.api.errors import NotFoundError
|
from synapse.api.errors import NotFoundError
|
||||||
from synapse.logging.context import defer_to_thread, make_deferred_yieldable
|
from synapse.logging.context import defer_to_thread, make_deferred_yieldable
|
||||||
from synapse.logging.opentracing import trace
|
from synapse.logging.opentracing import start_active_span, trace, trace_with_opname
|
||||||
from synapse.util import Clock
|
from synapse.util import Clock
|
||||||
from synapse.util.file_consumer import BackgroundFileConsumer
|
from synapse.util.file_consumer import BackgroundFileConsumer
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ class MediaStorage:
|
||||||
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
|
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
|
||||||
self.clock = hs.get_clock()
|
self.clock = hs.get_clock()
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("MediaStorage.store_file")
|
||||||
async def store_file(self, source: IO, file_info: FileInfo) -> str:
|
async def store_file(self, source: IO, file_info: FileInfo) -> str:
|
||||||
"""Write `source` to the on disk media store, and also any other
|
"""Write `source` to the on disk media store, and also any other
|
||||||
configured storage providers
|
configured storage providers
|
||||||
|
@ -91,18 +91,19 @@ class MediaStorage:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with self.store_into_file(file_info) as (f, fname, finish_cb):
|
with self.store_into_file(file_info) as (f, fname, finish_cb):
|
||||||
# Write to the main repository
|
# Write to the main media repository
|
||||||
await self.write_to_file(source, f)
|
await self.write_to_file(source, f)
|
||||||
|
# Write to the other storage providers
|
||||||
await finish_cb()
|
await finish_cb()
|
||||||
|
|
||||||
return fname
|
return fname
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("MediaStorage.write_to_file")
|
||||||
async def write_to_file(self, source: IO, output: IO) -> None:
|
async def write_to_file(self, source: IO, output: IO) -> None:
|
||||||
"""Asynchronously write the `source` to `output`."""
|
"""Asynchronously write the `source` to `output`."""
|
||||||
await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
|
await defer_to_thread(self.reactor, _write_file_synchronously, source, output)
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("MediaStorage.store_into_file")
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def store_into_file(
|
def store_into_file(
|
||||||
self, file_info: FileInfo
|
self, file_info: FileInfo
|
||||||
|
@ -117,9 +118,9 @@ class MediaStorage:
|
||||||
fname can be used to read the contents from after upload, e.g. to
|
fname can be used to read the contents from after upload, e.g. to
|
||||||
generate thumbnails.
|
generate thumbnails.
|
||||||
|
|
||||||
finish_cb must be called and waited on after the file has been
|
finish_cb must be called and waited on after the file has been successfully been
|
||||||
successfully been written to. Should not be called if there was an
|
written to. Should not be called if there was an error. Checks for spam and
|
||||||
error.
|
stores the file into the configured storage providers.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_info: Info about the file to store
|
file_info: Info about the file to store
|
||||||
|
@ -139,35 +140,48 @@ class MediaStorage:
|
||||||
|
|
||||||
finished_called = [False]
|
finished_called = [False]
|
||||||
|
|
||||||
|
main_media_repo_write_trace_scope = start_active_span(
|
||||||
|
"writing to main media repo"
|
||||||
|
)
|
||||||
|
main_media_repo_write_trace_scope.__enter__()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(fname, "wb") as f:
|
with open(fname, "wb") as f:
|
||||||
|
|
||||||
async def finish() -> None:
|
async def finish() -> None:
|
||||||
# Ensure that all writes have been flushed and close the
|
# When someone calls finish, we assume they are done writing to the main media repo
|
||||||
# file.
|
main_media_repo_write_trace_scope.__exit__(None, None, None)
|
||||||
f.flush()
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
|
with start_active_span("writing to other storage providers"):
|
||||||
ReadableFileWrapper(self.clock, fname), file_info
|
# Ensure that all writes have been flushed and close the
|
||||||
)
|
# file.
|
||||||
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
|
f.flush()
|
||||||
logger.info("Blocking media due to spam checker")
|
f.close()
|
||||||
# Note that we'll delete the stored media, due to the
|
|
||||||
# try/except below. The media also won't be stored in
|
|
||||||
# the DB.
|
|
||||||
# We currently ignore any additional field returned by
|
|
||||||
# the spam-check API.
|
|
||||||
raise SpamMediaException(errcode=spam_check[0])
|
|
||||||
|
|
||||||
for provider in self.storage_providers:
|
spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
|
||||||
await provider.store_file(path, file_info)
|
ReadableFileWrapper(self.clock, fname), file_info
|
||||||
|
)
|
||||||
|
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
|
||||||
|
logger.info("Blocking media due to spam checker")
|
||||||
|
# Note that we'll delete the stored media, due to the
|
||||||
|
# try/except below. The media also won't be stored in
|
||||||
|
# the DB.
|
||||||
|
# We currently ignore any additional field returned by
|
||||||
|
# the spam-check API.
|
||||||
|
raise SpamMediaException(errcode=spam_check[0])
|
||||||
|
|
||||||
finished_called[0] = True
|
for provider in self.storage_providers:
|
||||||
|
with start_active_span(str(provider)):
|
||||||
|
await provider.store_file(path, file_info)
|
||||||
|
|
||||||
|
finished_called[0] = True
|
||||||
|
|
||||||
yield f, fname, finish
|
yield f, fname, finish
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
|
main_media_repo_write_trace_scope.__exit__(
|
||||||
|
type(e), None, e.__traceback__
|
||||||
|
)
|
||||||
os.remove(fname)
|
os.remove(fname)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
@ -175,7 +189,11 @@ class MediaStorage:
|
||||||
raise e from None
|
raise e from None
|
||||||
|
|
||||||
if not finished_called:
|
if not finished_called:
|
||||||
raise Exception("Finished callback not called")
|
exc = Exception("Finished callback not called")
|
||||||
|
main_media_repo_write_trace_scope.__exit__(
|
||||||
|
type(exc), None, exc.__traceback__
|
||||||
|
)
|
||||||
|
raise exc
|
||||||
|
|
||||||
async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
|
async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
|
||||||
"""Attempts to fetch media described by file_info from the local cache
|
"""Attempts to fetch media described by file_info from the local cache
|
||||||
|
|
|
@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Callable, Optional
|
||||||
|
|
||||||
from synapse.config._base import Config
|
from synapse.config._base import Config
|
||||||
from synapse.logging.context import defer_to_thread, run_in_background
|
from synapse.logging.context import defer_to_thread, run_in_background
|
||||||
from synapse.logging.opentracing import trace
|
from synapse.logging.opentracing import start_active_span, trace_with_opname
|
||||||
from synapse.util.async_helpers import maybe_awaitable
|
from synapse.util.async_helpers import maybe_awaitable
|
||||||
|
|
||||||
from ._base import FileInfo, Responder
|
from ._base import FileInfo, Responder
|
||||||
|
@ -87,7 +87,7 @@ class StorageProviderWrapper(StorageProvider):
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "StorageProviderWrapper[%s]" % (self.backend,)
|
return "StorageProviderWrapper[%s]" % (self.backend,)
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("StorageProviderWrapper.store_file")
|
||||||
async def store_file(self, path: str, file_info: FileInfo) -> None:
|
async def store_file(self, path: str, file_info: FileInfo) -> None:
|
||||||
if not file_info.server_name and not self.store_local:
|
if not file_info.server_name and not self.store_local:
|
||||||
return None
|
return None
|
||||||
|
@ -116,7 +116,7 @@ class StorageProviderWrapper(StorageProvider):
|
||||||
|
|
||||||
run_in_background(store)
|
run_in_background(store)
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("StorageProviderWrapper.fetch")
|
||||||
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
|
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
|
||||||
if file_info.url_cache:
|
if file_info.url_cache:
|
||||||
# Files in the URL preview cache definitely aren't stored here,
|
# Files in the URL preview cache definitely aren't stored here,
|
||||||
|
@ -144,7 +144,7 @@ class FileStorageProviderBackend(StorageProvider):
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "FileStorageProviderBackend[%s]" % (self.base_directory,)
|
return "FileStorageProviderBackend[%s]" % (self.base_directory,)
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("FileStorageProviderBackend.store_file")
|
||||||
async def store_file(self, path: str, file_info: FileInfo) -> None:
|
async def store_file(self, path: str, file_info: FileInfo) -> None:
|
||||||
"""See StorageProvider.store_file"""
|
"""See StorageProvider.store_file"""
|
||||||
|
|
||||||
|
@ -156,14 +156,15 @@ class FileStorageProviderBackend(StorageProvider):
|
||||||
|
|
||||||
# mypy needs help inferring the type of the second parameter, which is generic
|
# mypy needs help inferring the type of the second parameter, which is generic
|
||||||
shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
|
shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
|
||||||
await defer_to_thread(
|
with start_active_span("shutil_copyfile"):
|
||||||
self.hs.get_reactor(),
|
await defer_to_thread(
|
||||||
shutil_copyfile,
|
self.hs.get_reactor(),
|
||||||
primary_fname,
|
shutil_copyfile,
|
||||||
backup_fname,
|
primary_fname,
|
||||||
)
|
backup_fname,
|
||||||
|
)
|
||||||
|
|
||||||
@trace
|
@trace_with_opname("FileStorageProviderBackend.fetch")
|
||||||
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
|
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
|
||||||
"""See StorageProvider.fetch"""
|
"""See StorageProvider.fetch"""
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue