From bf4fb1fb400daad23702bc0b3231ec069d68e87e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:20:59 +0100 Subject: [PATCH 01/29] Basic implementation of backup media store --- synapse/config/repository.py | 18 ++ synapse/rest/media/v1/media_repository.py | 219 +++++++++++----------- synapse/rest/media/v1/thumbnailer.py | 16 +- synapse/rest/media/v1/upload_resource.py | 2 +- 4 files changed, 130 insertions(+), 125 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 2c6f57168..e3c83d56f 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -70,7 +70,17 @@ class ContentRepositoryConfig(Config): self.max_upload_size = self.parse_size(config["max_upload_size"]) self.max_image_pixels = self.parse_size(config["max_image_pixels"]) self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.media_store_path = self.ensure_directory(config["media_store_path"]) + + self.backup_media_store_path = config.get("backup_media_store_path") + if self.backup_media_store_path: + self.ensure_directory(self.backup_media_store_path) + + self.synchronous_backup_media_store = config.get( + "synchronous_backup_media_store", False + ) + self.uploads_path = self.ensure_directory(config["uploads_path"]) self.dynamic_thumbnails = config["dynamic_thumbnails"] self.thumbnail_requirements = parse_thumbnail_requirements( @@ -115,6 +125,14 @@ class ContentRepositoryConfig(Config): # Directory where uploaded images and attachments are stored. media_store_path: "%(media_store)s" + # A secondary directory where uploaded images and attachments are + # stored as a backup. + # backup_media_store_path: "%(media_store)s" + + # Whether to wait for successful write to backup media store before + # returning successfully. + # synchronous_backup_media_store: false + # Directory where in-progress uploads are stored. uploads_path: "%(uploads_path)s" diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 0ea1248ce..3b442cc16 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn +from synapse.util.logcontext import preserve_context_over_fn, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -59,7 +59,12 @@ class MediaRepository(object): self.store = hs.get_datastore() self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels + self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.backup_filepaths = None + if hs.config.backup_media_store_path: + self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -87,18 +92,43 @@ class MediaRepository(object): if not os.path.exists(dirname): os.makedirs(dirname) + @defer.inlineCallbacks + def _write_to_file(self, source, file_name_func): + def write_file_thread(file_name): + source.seek(0) # Ensure we read from the start of the file + with open(file_name, "wb") as f: + shutil.copyfileobj(source, f) + + fname = file_name_func(self.filepaths) + self._makedirs(fname) + + # Write to the main repository + yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + + # Write to backup repository + if self.backup_filepaths: + backup_fname = file_name_func(backup_filepaths) + self._makedirs(backup_fname) + + # We can either wait for successful writing to the backup repository + # or write in the background and immediately return + if hs.config.synchronous_backup_media_store: + yield preserve_context_over_fn( + threads.deferToThread, write_file_thread, backup_fname, + ) + else: + preserve_fn(threads.deferToThread)(write_file, backup_fname) + + defer.returnValue(fname) + @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = self.filepaths.local_media_filepath(media_id) - self._makedirs(fname) - - # This shouldn't block for very long because the content will have - # already been uploaded at this point. - with open(fname, "wb") as f: - f.write(content) + fname = yield self._write_to_file( + content, lambda f: f.local_media_filepath(media_id) + ) logger.info("Stored local media in file %r", fname) @@ -253,9 +283,8 @@ class MediaRepository(object): def _get_thumbnail_requirements(self, media_type): return self.thumbnail_requirements.get(media_type, ()) - def _generate_thumbnail(self, input_path, t_path, t_width, t_height, + def _generate_thumbnail(self, thumbnailer, t_width, t_height, t_method, t_type): - thumbnailer = Thumbnailer(input_path) m_width = thumbnailer.width m_height = thumbnailer.height @@ -267,36 +296,40 @@ class MediaRepository(object): return if t_method == "crop": - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) elif t_method == "scale": t_width, t_height = thumbnailer.aspect(t_width, t_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + t_byte_source = thumbnailer.scale(t_width, t_height, t_type) else: - t_len = None + t_byte_source = None - return t_len + return t_byte_source @defer.inlineCallbacks def generate_local_exact_thumbnail(self, media_id, t_width, t_height, t_method, t_type): input_path = self.filepaths.local_media_filepath(media_id) - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -306,21 +339,25 @@ class MediaRepository(object): t_width, t_height, t_method, t_type): input_path = self.filepaths.remote_media_filepath(server_name, file_id) - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - - t_len = yield preserve_context_over_fn( + thumbnailer = Thumbnailer(input_path) + t_byte_source = yield preserve_context_over_fn( threads.deferToThread, self._generate_thumbnail, - input_path, t_path, t_width, t_height, t_method, t_type + thumbnailer, t_width, t_height, t_method, t_type ) - if t_len: + if t_byte_source: + output_path = yield self._write_to_file( + content, + lambda f: f.remote_media_thumbnail( + server_name, file_id, t_width, t_height, t_type, t_method + ) + ) + logger.info("Stored thumbnail in file %r", output_path) + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) defer.returnValue(t_path) @@ -351,59 +388,32 @@ class MediaRepository(object): local_thumbnails = [] def generate_thumbnails(): - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, + ) local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len - )) - - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - if url_cache: - t_path = self.filepaths.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - t_path = self.filepaths.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - local_thumbnails.append(( - media_id, t_width, t_height, t_type, t_method, t_len + r_width, r_height, r_method, r_type, t_byte_source )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for l in local_thumbnails: - yield self.store.store_local_thumbnail(*l) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + if url_cache: + path_name_func = lambda f: f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + path_name_func = lambda f: f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) defer.returnValue({ "width": m_width, @@ -433,51 +443,32 @@ class MediaRepository(object): ) return - scales = set() - crops = set() for r_width, r_height, r_method, r_type in requirements: - if r_method == "scale": - t_width, t_height = thumbnailer.aspect(r_width, r_height) - scales.add(( - min(m_width, t_width), min(m_height, t_height), r_type, - )) - elif r_method == "crop": - crops.add((r_width, r_height, r_type)) - - for t_width, t_height, t_type in scales: - t_method = "scale" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method + t_byte_source = self._generate_thumbnail( + thumbnailer, r_width, r_height, r_method, r_type, ) - self._makedirs(t_path) - t_len = thumbnailer.scale(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) - for t_width, t_height, t_type in crops: - if (t_width, t_height, t_type) in scales: - # If the aspect ratio of the cropped thumbnail matches a purely - # scaled one then there is no point in calculating a separate - # thumbnail. - continue - t_method = "crop" - t_path = self.filepaths.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) - self._makedirs(t_path) - t_len = thumbnailer.crop(t_path, t_width, t_height, t_type) - remote_thumbnails.append([ - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ]) + remote_thumbnails.append(( + r_width, r_height, r_method, r_type, t_byte_source + )) yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) + for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: + path_name_func = lambda f: f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) + + yield self._write_to_file(t_byte_source, path_name_func) + + yield self.store.store_remote_media_thumbnail( + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + ) + defer.returnValue({ "width": m_width, "height": m_height, diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 3868d4f65..60498b08a 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -50,12 +50,12 @@ class Thumbnailer(object): else: return ((max_height * self.width) // self.height, max_height) - def scale(self, output_path, width, height, output_type): + def scale(self, width, height, output_type): """Rescales the image to the given dimensions""" scaled = self.image.resize((width, height), Image.ANTIALIAS) - return self.save_image(scaled, output_type, output_path) + return self._encode_image(scaled, output_type) - def crop(self, output_path, width, height, output_type): + def crop(self, width, height, output_type): """Rescales and crops the image to the given dimensions preserving aspect:: (w_in / h_in) = (w_scaled / h_scaled) @@ -82,13 +82,9 @@ class Thumbnailer(object): crop_left = (scaled_width - width) // 2 crop_right = width + crop_left cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - return self.save_image(cropped, output_type, output_path) + return self._encode_image(cropped, output_type) - def save_image(self, output_image, output_type, output_path): + def _encode_image(self, output_image, output_type): output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_bytes = output_bytes_io.getvalue() - with open(output_path, "wb") as output_file: - output_file.write(output_bytes) - logger.info("Stored thumbnail in file %r", output_path) - return len(output_bytes) + return output_bytes_io diff --git a/synapse/rest/media/v1/upload_resource.py b/synapse/rest/media/v1/upload_resource.py index 4ab33f73b..f6f498cdc 100644 --- a/synapse/rest/media/v1/upload_resource.py +++ b/synapse/rest/media/v1/upload_resource.py @@ -93,7 +93,7 @@ class UploadResource(Resource): # TODO(markjh): parse content-dispostion content_uri = yield self.media_repo.create_content( - media_type, upload_name, request.content.read(), + media_type, upload_name, request.content, content_length, requester.user ) From 67cb89fbdf62dfb2ff65f6f7f0ca23445cdac0ac Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:23:41 +0100 Subject: [PATCH 02/29] Fix typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3b442cc16..f26f793be 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -107,7 +107,7 @@ class MediaRepository(object): # Write to backup repository if self.backup_filepaths: - backup_fname = file_name_func(backup_filepaths) + backup_fname = file_name_func(self.backup_filepaths) self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository From c8eeef6947af762c3eabef6ecca0f69833fbf8ab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:28:24 +0100 Subject: [PATCH 03/29] Fix typos --- synapse/rest/media/v1/media_repository.py | 46 +++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f26f793be..a16034fd6 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -65,6 +65,8 @@ class MediaRepository(object): if hs.config.backup_media_store_path: self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store + self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.thumbnail_requirements = hs.config.thumbnail_requirements @@ -112,12 +114,12 @@ class MediaRepository(object): # We can either wait for successful writing to the backup repository # or write in the background and immediately return - if hs.config.synchronous_backup_media_store: + if self.synchronous_backup_media_store: yield preserve_context_over_fn( threads.deferToThread, write_file_thread, backup_fname, ) else: - preserve_fn(threads.deferToThread)(write_file, backup_fname) + preserve_fn(threads.deferToThread)(write_file_thread, backup_fname) defer.returnValue(fname) @@ -321,7 +323,7 @@ class MediaRepository(object): if t_byte_source: output_path = yield self._write_to_file( - content, + t_byte_source, lambda f: f.local_media_thumbnail( media_id, t_width, t_height, t_type, t_method ) @@ -329,10 +331,11 @@ class MediaRepository(object): logger.info("Stored thumbnail in file %r", output_path) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, + len(t_byte_source.getvalue()) ) - defer.returnValue(t_path) + defer.returnValue(output_path) @defer.inlineCallbacks def generate_remote_exact_thumbnail(self, server_name, file_id, media_id, @@ -348,7 +351,7 @@ class MediaRepository(object): if t_byte_source: output_path = yield self._write_to_file( - content, + t_byte_source, lambda f: f.remote_media_thumbnail( server_name, file_id, t_width, t_height, t_type, t_method ) @@ -360,7 +363,7 @@ class MediaRepository(object): t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) - defer.returnValue(t_path) + defer.returnValue(output_path) @defer.inlineCallbacks def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): @@ -400,19 +403,21 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - if url_cache: - path_name_func = lambda f: f.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - path_name_func = lambda f: f.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + def path_name_func(f): + if url_cache: + return f.url_cache_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) + else: + return f.local_media_thumbnail( + media_id, t_width, t_height, t_type, t_method + ) yield self._write_to_file(t_byte_source, path_name_func) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, + len(t_byte_source.getvalue()) ) defer.returnValue({ @@ -457,10 +462,11 @@ class MediaRepository(object): for r in remote_thumbnails: yield self.store.store_remote_media_thumbnail(*r) - for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - path_name_func = lambda f: f.remote_media_thumbnail( - server_name, media_id, file_id, t_width, t_height, t_type, t_method - ) + for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: + def path_name_func(f): + return f.remote_media_thumbnail( + server_name, media_id, file_id, t_width, t_height, t_type, t_method + ) yield self._write_to_file(t_byte_source, path_name_func) From 6dfde6d4856695890271232f8a2e4c5f32615dd1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:30:26 +0100 Subject: [PATCH 04/29] Remove dead code --- synapse/rest/media/v1/media_repository.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a16034fd6..1eeb128d2 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -459,9 +459,6 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) - for r in remote_thumbnails: - yield self.store.store_remote_media_thumbnail(*r) - for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: def path_name_func(f): return f.remote_media_thumbnail( From b77a13812c38b2e79b2ebfddb52ce88a2ac8e9b9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 15:32:32 +0100 Subject: [PATCH 05/29] Typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 1eeb128d2..93b35af9c 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -462,7 +462,7 @@ class MediaRepository(object): for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: def path_name_func(f): return f.remote_media_thumbnail( - server_name, media_id, file_id, t_width, t_height, t_type, t_method + server_name, file_id, t_width, t_height, t_type, t_method ) yield self._write_to_file(t_byte_source, path_name_func) From e283b555b1f20de4fd393fd947e82eb3c635b7e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:31:24 +0100 Subject: [PATCH 06/29] Copy everything to backup --- synapse/config/repository.py | 4 +- synapse/rest/media/v1/filepath.py | 99 ++++++++++------ synapse/rest/media/v1/media_repository.py | 109 +++++++++++------- synapse/rest/media/v1/preview_url_resource.py | 7 +- synapse/rest/media/v1/thumbnailer.py | 9 +- 5 files changed, 151 insertions(+), 77 deletions(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index e3c83d56f..6baa47493 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -75,7 +75,9 @@ class ContentRepositoryConfig(Config): self.backup_media_store_path = config.get("backup_media_store_path") if self.backup_media_store_path: - self.ensure_directory(self.backup_media_store_path) + self.backup_media_store_path = self.ensure_directory( + self.backup_media_store_path + ) self.synchronous_backup_media_store = config.get( "synchronous_backup_media_store", False diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index d5cec1012..43d0eea00 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -15,103 +15,134 @@ import os import re +import functools NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") +def _wrap_in_base_path(func): + """Takes a function that returns a relative path and turns it into an + absolute path based on the location of the primary media store + """ + @functools.wraps(func) + def _wrapped(self, *args, **kwargs): + path = func(self, *args, **kwargs) + return os.path.join(self.primary_base_path, path) + + return _wrapped + + class MediaFilePaths(object): + """Describes where files are stored on disk. - def __init__(self, base_path): - self.base_path = base_path + Most of the function have a `*_rel` variant which returns a file path that + is relative to the base media store path. This is mainly used when we want + to write to the backup media store (when one is configured) + """ - def default_thumbnail(self, default_top_level, default_sub_type, width, - height, content_type, method): + def __init__(self, primary_base_path): + self.primary_base_path = primary_base_path + + def default_thumbnail_rel(self, default_top_level, default_sub_type, width, + height, content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) return os.path.join( - self.base_path, "default_thumbnails", default_top_level, + "default_thumbnails", default_top_level, default_sub_type, file_name ) - def local_media_filepath(self, media_id): + default_thumbnail = _wrap_in_base_path(default_thumbnail_rel) + + def local_media_filepath_rel(self, media_id): return os.path.join( - self.base_path, "local_content", + "local_content", media_id[0:2], media_id[2:4], media_id[4:] ) - def local_media_thumbnail(self, media_id, width, height, content_type, - method): + local_media_filepath = _wrap_in_base_path(local_media_filepath_rel) + + def local_media_thumbnail_rel(self, media_id, width, height, content_type, + method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s-%s" % ( width, height, top_level_type, sub_type, method ) return os.path.join( - self.base_path, "local_thumbnails", + "local_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) - def remote_media_filepath(self, server_name, file_id): + local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + + def remote_media_filepath_rel(self, server_name, file_id): return os.path.join( - self.base_path, "remote_content", server_name, + "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] ) - def remote_media_thumbnail(self, server_name, file_id, width, height, - content_type, method): + remote_media_filepath = _wrap_in_base_path(remote_media_filepath_rel) + + def remote_media_thumbnail_rel(self, server_name, file_id, width, height, + content_type, method): top_level_type, sub_type = content_type.split("/") file_name = "%i-%i-%s-%s" % (width, height, top_level_type, sub_type) return os.path.join( - self.base_path, "remote_thumbnail", server_name, + "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], file_name ) + remote_media_thumbnail = _wrap_in_base_path(remote_media_thumbnail_rel) + def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - self.base_path, "remote_thumbnail", server_name, + "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) - def url_cache_filepath(self, media_id): + def url_cache_filepath_rel(self, media_id): if NEW_FORMAT_ID_RE.match(media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf return os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[:10], media_id[11:] ) else: return os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], media_id[2:4], media_id[4:], ) + url_cache_filepath = _wrap_in_base_path(url_cache_filepath_rel) + def url_cache_filepath_dirs_to_delete(self, media_id): "The dirs to try and remove if we delete the media_id file" if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[:10], ), ] else: return [ os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - self.base_path, "url_cache", + "url_cache", media_id[0:2], ), ] - def url_cache_thumbnail(self, media_id, width, height, content_type, - method): + def url_cache_thumbnail_rel(self, media_id, width, height, content_type, + method): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf @@ -122,29 +153,31 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], file_name ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], file_name ) + url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) + def url_cache_thumbnail_directory(self, media_id): # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -155,26 +188,26 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - self.base_path, "url_cache_thumbnails", + "url_cache_thumbnails", media_id[0:2], ), ] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 93b35af9c..398e973ca 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -60,10 +60,12 @@ class MediaRepository(object): self.max_upload_size = hs.config.max_upload_size self.max_image_pixels = hs.config.max_image_pixels - self.filepaths = MediaFilePaths(hs.config.media_store_path) - self.backup_filepaths = None + self.primary_base_path = hs.config.media_store_path + self.filepaths = MediaFilePaths(self.primary_base_path) + + self.backup_base_path = None if hs.config.backup_media_store_path: - self.backup_filepaths = MediaFilePaths(hs.config.backup_media_store_path) + self.backup_base_path = hs.config.backup_media_store_path self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store @@ -94,42 +96,63 @@ class MediaRepository(object): if not os.path.exists(dirname): os.makedirs(dirname) - @defer.inlineCallbacks - def _write_to_file(self, source, file_name_func): - def write_file_thread(file_name): - source.seek(0) # Ensure we read from the start of the file - with open(file_name, "wb") as f: - shutil.copyfileobj(source, f) + @staticmethod + def write_file_synchronously(source, fname): + source.seek(0) # Ensure we read from the start of the file + with open(fname, "wb") as f: + shutil.copyfileobj(source, f) - fname = file_name_func(self.filepaths) + @defer.inlineCallbacks + def write_to_file(self, source, path): + """Write `source` to the on disk media store, and also the backup store + if configured. + + Args: + source: A file like object that should be written + path: Relative path to write file to + + Returns: + string: the file path written to in the primary media store + """ + fname = os.path.join(self.primary_base_path, path) self._makedirs(fname) # Write to the main repository - yield preserve_context_over_fn(threads.deferToThread, write_file_thread, fname) + yield preserve_context_over_fn( + threads.deferToThread, + self.write_file_synchronously, source, fname, + ) # Write to backup repository - if self.backup_filepaths: - backup_fname = file_name_func(self.backup_filepaths) + yield self.copy_to_backup(source, path) + + defer.returnValue(fname) + + @defer.inlineCallbacks + def copy_to_backup(self, source, path): + if self.backup_base_path: + backup_fname = os.path.join(self.backup_base_path, path) self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: yield preserve_context_over_fn( - threads.deferToThread, write_file_thread, backup_fname, + threads.deferToThread, + self.write_file_synchronously, source, backup_fname, ) else: - preserve_fn(threads.deferToThread)(write_file_thread, backup_fname) - - defer.returnValue(fname) + preserve_fn(threads.deferToThread)( + self.write_file_synchronously, source, backup_fname, + ) @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): media_id = random_string(24) - fname = yield self._write_to_file( - content, lambda f: f.local_media_filepath(media_id) + fname = yield self.write_to_file( + content, self.filepaths.local_media_filepath_rel(media_id) ) logger.info("Stored local media in file %r", fname) @@ -180,9 +203,10 @@ class MediaRepository(object): def _download_remote_file(self, server_name, media_id): file_id = random_string(24) - fname = self.filepaths.remote_media_filepath( + fpath = self.filepaths.remote_media_filepath_rel( server_name, file_id ) + fname = os.path.join(self.primary_base_path, fpath) self._makedirs(fname) try: @@ -224,6 +248,9 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") + with open(fname) as f: + yield self.copy_to_backup(f, fpath) + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() @@ -322,15 +349,15 @@ class MediaRepository(object): ) if t_byte_source: - output_path = yield self._write_to_file( + output_path = yield self.write_to_file( t_byte_source, - lambda f: f.local_media_thumbnail( + self.filepaths.local_media_thumbnail_rel( media_id, t_width, t_height, t_type, t_method ) ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_local_thumbnail( + yield self.store.store_local_thumbnail_rel( media_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) @@ -350,15 +377,15 @@ class MediaRepository(object): ) if t_byte_source: - output_path = yield self._write_to_file( + output_path = yield self.write_to_file( t_byte_source, - lambda f: f.remote_media_thumbnail( + self.filepaths.remote_media_thumbnail_rel( server_name, file_id, t_width, t_height, t_type, t_method ) ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_remote_media_thumbnail( + yield self.store.store_remote_media_thumbnail_rel( server_name, media_id, file_id, t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) ) @@ -403,17 +430,16 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - def path_name_func(f): - if url_cache: - return f.url_cache_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) - else: - return f.local_media_thumbnail( - media_id, t_width, t_height, t_type, t_method - ) + if url_cache: + file_path = self.filepaths.url_cache_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) + else: + file_path = self.filepaths.local_media_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) - yield self._write_to_file(t_byte_source, path_name_func) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, @@ -460,12 +486,11 @@ class MediaRepository(object): yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: - def path_name_func(f): - return f.remote_media_thumbnail( - server_name, file_id, t_width, t_height, t_type, t_method - ) + file_path = self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) - yield self._write_to_file(t_byte_source, path_name_func) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, @@ -491,6 +516,8 @@ class MediaRepository(object): logger.info("Deleting: %r", key) + # TODO: Should we delete from the backup store + with (yield self.remote_media_linearizer.queue(key)): full_path = self.filepaths.remote_media_filepath(origin, file_id) try: diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 895b480d5..f82b8fbc5 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -59,6 +59,7 @@ class PreviewUrlResource(Resource): self.store = hs.get_datastore() self.client = SpiderHttpClient(hs) self.media_repo = media_repo + self.primary_base_path = media_repo.primary_base_path self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist @@ -262,7 +263,8 @@ class PreviewUrlResource(Resource): file_id = datetime.date.today().isoformat() + '_' + random_string(16) - fname = self.filepaths.url_cache_filepath(file_id) + fpath = self.filepaths.url_cache_filepath_rel(file_id) + fname = os.path.join(self.primary_base_path, fpath) self.media_repo._makedirs(fname) try: @@ -273,6 +275,9 @@ class PreviewUrlResource(Resource): ) # FIXME: pass through 404s and other error messages nicely + with open(fname) as f: + yield self.media_repo.copy_to_backup(f, fpath) + media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 60498b08a..e1ee535b9 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -51,7 +51,11 @@ class Thumbnailer(object): return ((max_height * self.width) // self.height, max_height) def scale(self, width, height, output_type): - """Rescales the image to the given dimensions""" + """Rescales the image to the given dimensions. + + Returns: + BytesIO: the bytes of the encoded image ready to be written to disk + """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -65,6 +69,9 @@ class Thumbnailer(object): Args: max_width: The largest possible width. max_height: The larget possible height. + + Returns: + BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width From 802ca12d0551ff761e01d9af8348df1dc96fc372 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:37:21 +0100 Subject: [PATCH 07/29] Don't close file prematurely --- synapse/rest/media/v1/media_repository.py | 22 ++++++++++++++----- synapse/rest/media/v1/preview_url_resource.py | 4 ++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 398e973ca..63ed1c426 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -97,16 +97,20 @@ class MediaRepository(object): os.makedirs(dirname) @staticmethod - def write_file_synchronously(source, fname): + def _write_file_synchronously(source, fname): source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) + source.close() + @defer.inlineCallbacks def write_to_file(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. + Will close source once finished. + Args: source: A file like object that should be written path: Relative path to write file to @@ -120,7 +124,7 @@ class MediaRepository(object): # Write to the main repository yield preserve_context_over_fn( threads.deferToThread, - self.write_file_synchronously, source, fname, + self._write_file_synchronously, source, fname, ) # Write to backup repository @@ -130,6 +134,10 @@ class MediaRepository(object): @defer.inlineCallbacks def copy_to_backup(self, source, path): + """Copy file like object source to the backup media store, if configured. + + Will close source after its done. + """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) self._makedirs(backup_fname) @@ -139,12 +147,14 @@ class MediaRepository(object): if self.synchronous_backup_media_store: yield preserve_context_over_fn( threads.deferToThread, - self.write_file_synchronously, source, backup_fname, + self._write_file_synchronously, source, backup_fname, ) else: preserve_fn(threads.deferToThread)( - self.write_file_synchronously, source, backup_fname, + self._write_file_synchronously, source, backup_fname, ) + else: + source.close() @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, @@ -248,8 +258,8 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - with open(fname) as f: - yield self.copy_to_backup(f, fpath) + # Will close the file after its done + yield self.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index f82b8fbc5..a3288c9cc 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -275,8 +275,8 @@ class PreviewUrlResource(Resource): ) # FIXME: pass through 404s and other error messages nicely - with open(fname) as f: - yield self.media_repo.copy_to_backup(f, fpath) + # Will close the file after its done + yield self.media_repo.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() From 1259a76047a0a718ce0c9fb26513c9127f8ea919 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:39:23 +0100 Subject: [PATCH 08/29] Get len before close --- synapse/rest/media/v1/media_repository.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 63ed1c426..d25b98db4 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -359,6 +359,8 @@ class MediaRepository(object): ) if t_byte_source: + t_len = len(t_byte_source.getvalue()) + output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -368,8 +370,7 @@ class MediaRepository(object): logger.info("Stored thumbnail in file %r", output_path) yield self.store.store_local_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method, - len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, t_len ) defer.returnValue(output_path) @@ -387,6 +388,7 @@ class MediaRepository(object): ) if t_byte_source: + t_len = len(t_byte_source.getvalue()) output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -397,7 +399,7 @@ class MediaRepository(object): yield self.store.store_remote_media_thumbnail_rel( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + t_width, t_height, t_type, t_method, t_len ) defer.returnValue(output_path) @@ -449,11 +451,12 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) + t_len = len(t_byte_source.getvalue()) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, - len(t_byte_source.getvalue()) + media_id, t_width, t_height, t_type, t_method, t_len ) defer.returnValue({ @@ -500,11 +503,13 @@ class MediaRepository(object): server_name, file_id, t_width, t_height, t_type, t_method ) + t_len = len(t_byte_source.getvalue()) + yield self.write_to_file(t_byte_source, file_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, - t_width, t_height, t_type, t_method, len(t_byte_source.getvalue()) + t_width, t_height, t_type, t_method, t_len ) defer.returnValue({ From cc505b4b5e98ba70d8576a562fc36b03d6aa5150 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:52:30 +0100 Subject: [PATCH 09/29] getvalue closes buffer --- synapse/rest/media/v1/media_repository.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d25b98db4..ff2ddd2f1 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -258,7 +258,7 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - # Will close the file after its done + # Will close the file after its done yield self.copy_to_backup(open(fname), fpath) media_type = headers["Content-Type"][0] @@ -359,8 +359,6 @@ class MediaRepository(object): ) if t_byte_source: - t_len = len(t_byte_source.getvalue()) - output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -369,6 +367,8 @@ class MediaRepository(object): ) logger.info("Stored thumbnail in file %r", output_path) + t_len = os.path.getsize(output_path) + yield self.store.store_local_thumbnail_rel( media_id, t_width, t_height, t_type, t_method, t_len ) @@ -388,7 +388,6 @@ class MediaRepository(object): ) if t_byte_source: - t_len = len(t_byte_source.getvalue()) output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -397,7 +396,9 @@ class MediaRepository(object): ) logger.info("Stored thumbnail in file %r", output_path) - yield self.store.store_remote_media_thumbnail_rel( + t_len = os.path.getsize(output_path) + + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len ) @@ -451,9 +452,8 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) - t_len = len(t_byte_source.getvalue()) - - yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file(t_byte_source, file_path) + t_len = os.path.getsize(output_path) yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len @@ -503,9 +503,8 @@ class MediaRepository(object): server_name, file_id, t_width, t_height, t_type, t_method ) - t_len = len(t_byte_source.getvalue()) - - yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file(t_byte_source, file_path) + t_len = os.path.getsize(output_path) yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, From 4ae85ae12190015595f979f1a302ee608de6fd65 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 17:57:31 +0100 Subject: [PATCH 10/29] Don't close prematurely.. --- synapse/rest/media/v1/media_repository.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index ff2ddd2f1..80b14a673 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -97,12 +97,13 @@ class MediaRepository(object): os.makedirs(dirname) @staticmethod - def _write_file_synchronously(source, fname): + def _write_file_synchronously(source, fname, close_source=False): source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) - source.close() + if close_source: + source.close() @defer.inlineCallbacks def write_to_file(self, source, path): @@ -148,10 +149,12 @@ class MediaRepository(object): yield preserve_context_over_fn( threads.deferToThread, self._write_file_synchronously, source, backup_fname, + close_source=True, ) else: preserve_fn(threads.deferToThread)( self._write_file_synchronously, source, backup_fname, + close_source=True, ) else: source.close() From d76621a47b7b4b778055760d43df9d02614dac19 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Oct 2017 18:16:25 +0100 Subject: [PATCH 11/29] Fix comments --- synapse/rest/media/v1/filepath.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 43d0eea00..6923a3fbd 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -35,7 +35,7 @@ def _wrap_in_base_path(func): class MediaFilePaths(object): """Describes where files are stored on disk. - Most of the function have a `*_rel` variant which returns a file path that + Most of the functions have a `*_rel` variant which returns a file path that is relative to the base media store path. This is mainly used when we want to write to the backup media store (when one is configured) """ diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index a3288c9cc..e986e855a 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -343,6 +343,9 @@ class PreviewUrlResource(Resource): def _expire_url_cache_data(self): """Clean up expired url cache content, media and thumbnails. """ + + # TODO: Delete from backup media store + now = self.clock.time_msec() # First we delete expired url cache entries From b60859d6cc429ea1934b94a8749caadd9a96ee21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:24:19 +0100 Subject: [PATCH 12/29] Use make_deferred_yieldable --- synapse/rest/media/v1/media_repository.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 80b14a673..5c5020fe9 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -33,7 +33,7 @@ from synapse.api.errors import SynapseError, HttpResponseException, \ from synapse.util.async import Linearizer from synapse.util.stringutils import is_ascii -from synapse.util.logcontext import preserve_context_over_fn, preserve_fn +from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.retryutils import NotRetryingDestination import os @@ -123,7 +123,7 @@ class MediaRepository(object): self._makedirs(fname) # Write to the main repository - yield preserve_context_over_fn( + yield make_deferred_yieldable( threads.deferToThread, self._write_file_synchronously, source, fname, ) @@ -146,7 +146,7 @@ class MediaRepository(object): # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: - yield preserve_context_over_fn( + yield make_deferred_yieldable( threads.deferToThread, self._write_file_synchronously, source, backup_fname, close_source=True, @@ -355,7 +355,7 @@ class MediaRepository(object): input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield preserve_context_over_fn( + t_byte_source = yield make_deferred_yieldable( threads.deferToThread, self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type @@ -384,7 +384,7 @@ class MediaRepository(object): input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield preserve_context_over_fn( + t_byte_source = yield make_deferred_yieldable( threads.deferToThread, self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type @@ -443,7 +443,7 @@ class MediaRepository(object): r_width, r_height, r_method, r_type, t_byte_source )) - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: if url_cache: @@ -499,7 +499,7 @@ class MediaRepository(object): r_width, r_height, r_method, r_type, t_byte_source )) - yield preserve_context_over_fn(threads.deferToThread, generate_thumbnails) + yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: file_path = self.filepaths.remote_media_thumbnail_rel( From 64db043a71238db3f65f575c40f29260b83145be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:25:01 +0100 Subject: [PATCH 13/29] Move makedirs to thread --- synapse/rest/media/v1/media_repository.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5c5020fe9..72aad221a 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -98,6 +98,7 @@ class MediaRepository(object): @staticmethod def _write_file_synchronously(source, fname, close_source=False): + MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: shutil.copyfileobj(source, f) @@ -120,7 +121,6 @@ class MediaRepository(object): string: the file path written to in the primary media store """ fname = os.path.join(self.primary_base_path, path) - self._makedirs(fname) # Write to the main repository yield make_deferred_yieldable( @@ -141,7 +141,6 @@ class MediaRepository(object): """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) - self._makedirs(backup_fname) # We can either wait for successful writing to the backup repository # or write in the background and immediately return From 35332298ef6a9828aa1fdb10f59230f47763084e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:39:32 +0100 Subject: [PATCH 14/29] Fix up comments --- synapse/rest/media/v1/media_repository.py | 28 +++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 72aad221a..f3a5b19a8 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -98,6 +98,14 @@ class MediaRepository(object): @staticmethod def _write_file_synchronously(source, fname, close_source=False): + """Write `source` to the path `fname` synchronously. Should be called + from a thread. + + Args: + source: A file like object to be written + fname (str): Path to write to + close_source (bool): Whether to close source after writing + """ MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file with open(fname, "wb") as f: @@ -115,10 +123,10 @@ class MediaRepository(object): Args: source: A file like object that should be written - path: Relative path to write file to + path(str): Relative path to write file to Returns: - string: the file path written to in the primary media store + Deferred[str]: the file path written to in the primary media store """ fname = os.path.join(self.primary_base_path, path) @@ -138,6 +146,10 @@ class MediaRepository(object): """Copy file like object source to the backup media store, if configured. Will close source after its done. + + Args: + source: A file like object that should be written + path(str): Relative path to write file to """ if self.backup_base_path: backup_fname = os.path.join(self.backup_base_path, path) @@ -161,6 +173,18 @@ class MediaRepository(object): @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, auth_user): + """Store uploaded content for a local user and return the mxc URL + + Args: + media_type(str): The content type of the file + upload_name(str): The name of the file + content: A file like object that is the content to store + content_length(int): The length of the content + auth_user(str): The user_id of the uploader + + Returns: + Deferred[str]: The mxc url of the stored content + """ media_id = random_string(24) fname = yield self.write_to_file( From e3428d26ca5a23a3dac6d106aff8ac19f9839f32 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 10:39:59 +0100 Subject: [PATCH 15/29] Fix typo --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f3a5b19a8..76220a553 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -395,7 +395,7 @@ class MediaRepository(object): t_len = os.path.getsize(output_path) - yield self.store.store_local_thumbnail_rel( + yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len ) From 505371414f6ba9aeaa95eb8d34f7893c4cc2b07e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:23:53 +0100 Subject: [PATCH 16/29] Fix up thumbnailing function --- synapse/rest/media/v1/media_repository.py | 127 ++++++++---------- synapse/rest/media/v1/preview_url_resource.py | 8 +- synapse/rest/media/v1/thumbnailer.py | 13 +- 3 files changed, 73 insertions(+), 75 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 76220a553..36f42c73b 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -206,7 +206,7 @@ class MediaRepository(object): "media_length": content_length, } - yield self._generate_local_thumbnails(media_id, media_info) + yield self._generate_thumbnails(None, media_id, media_info) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) @@ -339,7 +339,7 @@ class MediaRepository(object): "filesystem_id": file_id, } - yield self._generate_remote_thumbnails( + yield self._generate_thumbnails( server_name, media_id, media_info ) @@ -385,6 +385,8 @@ class MediaRepository(object): ) if t_byte_source: + t_width, t_height = t_byte_source.dimensions + output_path = yield self.write_to_file( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -414,6 +416,8 @@ class MediaRepository(object): ) if t_byte_source: + t_width, t_height = t_byte_source.dimensions + output_path = yield self.write_to_file( t_byte_source, self.filepaths.remote_media_thumbnail_rel( @@ -432,13 +436,28 @@ class MediaRepository(object): defer.returnValue(output_path) @defer.inlineCallbacks - def _generate_local_thumbnails(self, media_id, media_info, url_cache=False): + def _generate_thumbnails(self, server_name, media_id, media_info, url_cache=False): + """Generate and store thumbnails for an image. + + Args: + server_name(str|None): The server name if remote media, else None if local + media_id(str) + media_info(dict) + url_cache(bool): If we are thumbnailing images downloaded for the URL cache, + used exclusively by the url previewer + + Returns: + Deferred[dict]: Dict with "width" and "height" keys of original image + """ media_type = media_info["media_type"] + file_id = media_info.get("filesystem_id") requirements = self._get_thumbnail_requirements(media_type) if not requirements: return - if url_cache: + if server_name: + input_path = self.filepaths.remote_media_filepath(server_name, file_id) + elif url_cache: input_path = self.filepaths.url_cache_filepath(media_id) else: input_path = self.filepaths.local_media_filepath(media_id) @@ -454,22 +473,40 @@ class MediaRepository(object): ) return - local_thumbnails = [] + # We deduplicate the thumbnail sizes by ignoring the cropped versions if + # they have the same dimensions of a scaled one. + thumbnails = {} + for r_width, r_height, r_method, r_type in requirements: + if r_method == "crop": + thumbnails.setdefault[(r_width, r_height)] = (r_method, r_type) + elif r_method == "scale": + t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + thumbnails[(t_width, t_height)] = (r_method, r_type) - def generate_thumbnails(): - for r_width, r_height, r_method, r_type in requirements: - t_byte_source = self._generate_thumbnail( - thumbnailer, r_width, r_height, r_method, r_type, + # Now we generate the thumbnails for each dimension, store it + for (r_width, r_height), (r_method, r_type) in thumbnails.iteritems(): + t_byte_source = thumbnailer.crop(t_width, t_height, t_type) + + if r_type == "crop": + t_byte_source = yield make_deferred_yieldable( + threads.deferToThread, thumbnailer.crop, + r_width, r_height, r_type, + ) + else: + t_byte_source = yield make_deferred_yieldable( + threads.deferToThread, thumbnailer.scale, + r_width, r_height, r_type, ) - local_thumbnails.append(( - r_width, r_height, r_method, r_type, t_byte_source - )) + t_width, t_height = t_byte_source.dimensions - yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) - - for t_width, t_height, t_method, t_type, t_byte_source in local_thumbnails: - if url_cache: + if server_name: + file_path = self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) + elif url_cache: file_path = self.filepaths.url_cache_thumbnail_rel( media_id, t_width, t_height, t_type, t_method ) @@ -481,61 +518,15 @@ class MediaRepository(object): output_path = yield self.write_to_file(t_byte_source, file_path) t_len = os.path.getsize(output_path) - yield self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) - - defer.returnValue({ - "width": m_width, - "height": m_height, - }) - - @defer.inlineCallbacks - def _generate_remote_thumbnails(self, server_name, media_id, media_info): - media_type = media_info["media_type"] - file_id = media_info["filesystem_id"] - requirements = self._get_thumbnail_requirements(media_type) - if not requirements: - return - - remote_thumbnails = [] - - input_path = self.filepaths.remote_media_filepath(server_name, file_id) - thumbnailer = Thumbnailer(input_path) - m_width = thumbnailer.width - m_height = thumbnailer.height - - def generate_thumbnails(): - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, m_height, self.max_image_pixels - ) - return - - for r_width, r_height, r_method, r_type in requirements: - t_byte_source = self._generate_thumbnail( - thumbnailer, r_width, r_height, r_method, r_type, - ) - - remote_thumbnails.append(( - r_width, r_height, r_method, r_type, t_byte_source - )) - - yield make_deferred_yieldable(threads.deferToThread, generate_thumbnails) - - for t_width, t_height, t_method, t_type, t_byte_source in remote_thumbnails: - file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) - - output_path = yield self.write_to_file(t_byte_source, file_path) - t_len = os.path.getsize(output_path) - - yield self.store.store_remote_media_thumbnail( + if server_name: + yield self.store.store_remote_media_thumbnail( server_name, media_id, file_id, t_width, t_height, t_type, t_method, t_len ) + else: + yield self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len + ) defer.returnValue({ "width": m_width, diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index e986e855a..c734f6b7c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -171,8 +171,8 @@ class PreviewUrlResource(Resource): logger.debug("got media_info of '%s'" % media_info) if _is_media(media_info['media_type']): - dims = yield self.media_repo._generate_local_thumbnails( - media_info['filesystem_id'], media_info, url_cache=True, + dims = yield self.media_repo._generate_thumbnails( + None, media_info['filesystem_id'], media_info, url_cache=True, ) og = { @@ -217,8 +217,8 @@ class PreviewUrlResource(Resource): if _is_media(image_info['media_type']): # TODO: make sure we don't choke on white-on-transparent images - dims = yield self.media_repo._generate_local_thumbnails( - image_info['filesystem_id'], image_info, url_cache=True, + dims = yield self.media_repo._generate_thumbnails( + None, image_info['filesystem_id'], image_info, url_cache=True, ) if dims: og["og:image:width"] = dims['width'] diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index e1ee535b9..09650bd52 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -54,7 +54,7 @@ class Thumbnailer(object): """Rescales the image to the given dimensions. Returns: - BytesIO: the bytes of the encoded image ready to be written to disk + ImageIO: the bytes of the encoded image ready to be written to disk """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -71,7 +71,7 @@ class Thumbnailer(object): max_height: The larget possible height. Returns: - BytesIO: the bytes of the encoded image ready to be written to disk + ImageIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width @@ -92,6 +92,13 @@ class Thumbnailer(object): return self._encode_image(cropped, output_type) def _encode_image(self, output_image, output_type): - output_bytes_io = BytesIO() + output_bytes_io = ImageIO(output_image.size) output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) + output_image.close() return output_bytes_io + + +class ImageIO(BytesIO): + def __init__(self, dimensions): + super(ImageIO, self).__init__() + self.dimensions = dimensions From 0e28281a021101ac199cbf2d0d130190110921bb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:33:49 +0100 Subject: [PATCH 17/29] Fix up --- synapse/rest/media/v1/media_repository.py | 62 +++++++++++------------ synapse/rest/media/v1/thumbnailer.py | 13 ++--- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 36f42c73b..a310d08f5 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -131,10 +131,9 @@ class MediaRepository(object): fname = os.path.join(self.primary_base_path, path) # Write to the main repository - yield make_deferred_yieldable( - threads.deferToThread, + yield make_deferred_yieldable(threads.deferToThread( self._write_file_synchronously, source, fname, - ) + )) # Write to backup repository yield self.copy_to_backup(source, path) @@ -157,11 +156,10 @@ class MediaRepository(object): # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: - yield make_deferred_yieldable( - threads.deferToThread, + yield make_deferred_yieldable(threads.deferToThread( self._write_file_synchronously, source, backup_fname, close_source=True, - ) + )) else: preserve_fn(threads.deferToThread)( self._write_file_synchronously, source, backup_fname, @@ -378,11 +376,10 @@ class MediaRepository(object): input_path = self.filepaths.local_media_filepath(media_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type - ) + )) if t_byte_source: t_width, t_height = t_byte_source.dimensions @@ -409,11 +406,10 @@ class MediaRepository(object): input_path = self.filepaths.remote_media_filepath(server_name, file_id) thumbnailer = Thumbnailer(input_path) - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( self._generate_thumbnail, thumbnailer, t_width, t_height, t_method, t_type - ) + )) if t_byte_source: t_width, t_height = t_byte_source.dimensions @@ -478,34 +474,32 @@ class MediaRepository(object): thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault[(r_width, r_height)] = (r_method, r_type) + thumbnails.setdefault((r_width, r_height), (r_method, r_type)) elif r_method == "scale": - t_width, t_height = thumbnailer.aspect(t_width, t_height) + t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) thumbnails[(t_width, t_height)] = (r_method, r_type) # Now we generate the thumbnails for each dimension, store it - for (r_width, r_height), (r_method, r_type) in thumbnails.iteritems(): - t_byte_source = thumbnailer.crop(t_width, t_height, t_type) - - if r_type == "crop": - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, thumbnailer.crop, - r_width, r_height, r_type, - ) + for (t_width, t_height), (t_method, t_type) in thumbnails.iteritems(): + # Generate the thumbnail + if t_type == "crop": + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.crop, + r_width, r_height, t_type, + )) else: - t_byte_source = yield make_deferred_yieldable( - threads.deferToThread, thumbnailer.scale, - r_width, r_height, r_type, - ) - - t_width, t_height = t_byte_source.dimensions + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.scale, + r_width, r_height, t_type, + )) + # Work out the correct file name for thumbnail if server_name: file_path = self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method - ) + server_name, file_id, t_width, t_height, t_type, t_method + ) elif url_cache: file_path = self.filepaths.url_cache_thumbnail_rel( media_id, t_width, t_height, t_type, t_method @@ -515,14 +509,16 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) + # Write to disk output_path = yield self.write_to_file(t_byte_source, file_path) t_len = os.path.getsize(output_path) + # Write to database if server_name: yield self.store.store_remote_media_thumbnail( - server_name, media_id, file_id, - t_width, t_height, t_type, t_method, t_len - ) + server_name, media_id, file_id, + t_width, t_height, t_type, t_method, t_len + ) else: yield self.store.store_local_thumbnail( media_id, t_width, t_height, t_type, t_method, t_len diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 09650bd52..e1ee535b9 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -54,7 +54,7 @@ class Thumbnailer(object): """Rescales the image to the given dimensions. Returns: - ImageIO: the bytes of the encoded image ready to be written to disk + BytesIO: the bytes of the encoded image ready to be written to disk """ scaled = self.image.resize((width, height), Image.ANTIALIAS) return self._encode_image(scaled, output_type) @@ -71,7 +71,7 @@ class Thumbnailer(object): max_height: The larget possible height. Returns: - ImageIO: the bytes of the encoded image ready to be written to disk + BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: scaled_height = (width * self.height) // self.width @@ -92,13 +92,6 @@ class Thumbnailer(object): return self._encode_image(cropped, output_type) def _encode_image(self, output_image, output_type): - output_bytes_io = ImageIO(output_image.size) + output_bytes_io = BytesIO() output_image.save(output_bytes_io, self.FORMATS[output_type], quality=80) - output_image.close() return output_bytes_io - - -class ImageIO(BytesIO): - def __init__(self, dimensions): - super(ImageIO, self).__init__() - self.dimensions = dimensions From 9732ec6797339dd33bc472cef5081a858ddccb30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:34:41 +0100 Subject: [PATCH 18/29] s/write_to_file/write_to_file_and_backup/ --- synapse/rest/media/v1/media_repository.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a310d08f5..c9753ebb5 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -115,7 +115,7 @@ class MediaRepository(object): source.close() @defer.inlineCallbacks - def write_to_file(self, source, path): + def write_to_file_and_backup(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. @@ -185,7 +185,7 @@ class MediaRepository(object): """ media_id = random_string(24) - fname = yield self.write_to_file( + fname = yield self.write_to_file_and_backup( content, self.filepaths.local_media_filepath_rel(media_id) ) @@ -384,7 +384,7 @@ class MediaRepository(object): if t_byte_source: t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file( + output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.local_media_thumbnail_rel( media_id, t_width, t_height, t_type, t_method @@ -414,7 +414,7 @@ class MediaRepository(object): if t_byte_source: t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file( + output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.remote_media_thumbnail_rel( server_name, file_id, t_width, t_height, t_type, t_method @@ -510,7 +510,7 @@ class MediaRepository(object): ) # Write to disk - output_path = yield self.write_to_file(t_byte_source, file_path) + output_path = yield self.write_to_file_and_backup(t_byte_source, file_path) t_len = os.path.getsize(output_path) # Write to database From ae5d18617afd98bb5d51b43c2a12a99e9d96da39 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:35:44 +0100 Subject: [PATCH 19/29] Make things be absolute paths again --- synapse/rest/media/v1/filepath.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 6923a3fbd..a3a15ac30 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -172,12 +172,12 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -188,26 +188,26 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - "url_cache_thumbnails", + self.primary_base_path, "url_cache_thumbnails", media_id[0:2], ), ] From 4d7e1dde70e6f2300ab83fb3208152f3d73bde71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:36:32 +0100 Subject: [PATCH 20/29] Remove unnecessary diff --- synapse/rest/media/v1/media_repository.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index c9753ebb5..f06813c48 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -63,9 +63,7 @@ class MediaRepository(object): self.primary_base_path = hs.config.media_store_path self.filepaths = MediaFilePaths(self.primary_base_path) - self.backup_base_path = None - if hs.config.backup_media_store_path: - self.backup_base_path = hs.config.backup_media_store_path + self.backup_base_path = hs.config.backup_media_store_path self.synchronous_backup_media_store = hs.config.synchronous_backup_media_store From a675bd08bd1a016a16bd0e10547e8c26be391ee0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:41:06 +0100 Subject: [PATCH 21/29] Add paths back in... --- synapse/rest/media/v1/filepath.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index a3a15ac30..fec0bbf57 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -100,7 +100,7 @@ class MediaFilePaths(object): def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - "remote_thumbnail", server_name, + self.primary_base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) @@ -125,18 +125,18 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[:10], ), ] else: return [ os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - "url_cache", + self.primary_base_path, "url_cache", media_id[0:2], ), ] From 1f43d2239757db0b376a3582066190221942cddc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 11:42:07 +0100 Subject: [PATCH 22/29] Don't needlessly rename variable --- synapse/rest/media/v1/filepath.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index fec0bbf57..d5164e47e 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -27,7 +27,7 @@ def _wrap_in_base_path(func): @functools.wraps(func) def _wrapped(self, *args, **kwargs): path = func(self, *args, **kwargs) - return os.path.join(self.primary_base_path, path) + return os.path.join(self.base_path, path) return _wrapped @@ -41,7 +41,7 @@ class MediaFilePaths(object): """ def __init__(self, primary_base_path): - self.primary_base_path = primary_base_path + self.base_path = primary_base_path def default_thumbnail_rel(self, default_top_level, default_sub_type, width, height, content_type, method): @@ -100,7 +100,7 @@ class MediaFilePaths(object): def remote_media_thumbnail_dir(self, server_name, file_id): return os.path.join( - self.primary_base_path, "remote_thumbnail", server_name, + self.base_path, "remote_thumbnail", server_name, file_id[0:2], file_id[2:4], file_id[4:], ) @@ -125,18 +125,18 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[:10], ), ] else: return [ os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[0:2], media_id[2:4], ), os.path.join( - self.primary_base_path, "url_cache", + self.base_path, "url_cache", media_id[0:2], ), ] @@ -172,12 +172,12 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ) else: return os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) @@ -188,26 +188,26 @@ class MediaFilePaths(object): if NEW_FORMAT_ID_RE.match(media_id): return [ os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[:10], ), ] else: return [ os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], ), os.path.join( - self.primary_base_path, "url_cache_thumbnails", + self.base_path, "url_cache_thumbnails", media_id[0:2], ), ] From c021c39cbd0bf1f0a85c9699275600ac35aa9ec4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:46:53 +0100 Subject: [PATCH 23/29] Remove spurious addition --- synapse/rest/media/v1/media_repository.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index f06813c48..3b8fe5ddb 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -380,8 +380,6 @@ class MediaRepository(object): )) if t_byte_source: - t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.local_media_thumbnail_rel( @@ -410,8 +408,6 @@ class MediaRepository(object): )) if t_byte_source: - t_width, t_height = t_byte_source.dimensions - output_path = yield self.write_to_file_and_backup( t_byte_source, self.filepaths.remote_media_thumbnail_rel( From ad1911bbf46f658ee343ee26e3011b3f1bcbd572 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:47:05 +0100 Subject: [PATCH 24/29] Comment --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 3b8fe5ddb..700fd0dd2 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -121,7 +121,7 @@ class MediaRepository(object): Args: source: A file like object that should be written - path(str): Relative path to write file to + path (str): Relative path to write file to Returns: Deferred[str]: the file path written to in the primary media store From 31aa7bd8d1748548b2523f58b348bb6787dcc019 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:47:38 +0100 Subject: [PATCH 25/29] Move type into key --- synapse/rest/media/v1/media_repository.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 700fd0dd2..dee834389 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -468,15 +468,15 @@ class MediaRepository(object): thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault((r_width, r_height), (r_method, r_type)) + thumbnails.setdefault((r_width, r_height,r_type), r_method) elif r_method == "scale": t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) t_height = min(m_height, t_height) - thumbnails[(t_width, t_height)] = (r_method, r_type) + thumbnails[(t_width, t_height, r_type)] = r_method # Now we generate the thumbnails for each dimension, store it - for (t_width, t_height), (t_method, t_type) in thumbnails.iteritems(): + for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): # Generate the thumbnail if t_type == "crop": t_byte_source = yield make_deferred_yieldable(threads.deferToThread( From b92a8e6e4aa2283daaa4e6050f1dbd503ddc9434 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 13:58:57 +0100 Subject: [PATCH 26/29] PEP8 --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index dee834389..d2ac0175d 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -468,7 +468,7 @@ class MediaRepository(object): thumbnails = {} for r_width, r_height, r_method, r_type in requirements: if r_method == "crop": - thumbnails.setdefault((r_width, r_height,r_type), r_method) + thumbnails.setdefault((r_width, r_height, r_type), r_method) elif r_method == "scale": t_width, t_height = thumbnailer.aspect(r_width, r_height) t_width = min(m_width, t_width) From 2b24416e90b0bf1ee6d29cfc384670f4eeca0ced Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 14:11:34 +0100 Subject: [PATCH 27/29] Don't reuse source but instead copy from primary media store to backup --- synapse/rest/media/v1/media_repository.py | 28 ++++++------------- synapse/rest/media/v1/preview_url_resource.py | 3 +- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index d2ac0175d..e32a67e16 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -95,7 +95,7 @@ class MediaRepository(object): os.makedirs(dirname) @staticmethod - def _write_file_synchronously(source, fname, close_source=False): + def _write_file_synchronously(source, fname): """Write `source` to the path `fname` synchronously. Should be called from a thread. @@ -109,16 +109,11 @@ class MediaRepository(object): with open(fname, "wb") as f: shutil.copyfileobj(source, f) - if close_source: - source.close() - @defer.inlineCallbacks def write_to_file_and_backup(self, source, path): """Write `source` to the on disk media store, and also the backup store if configured. - Will close source once finished. - Args: source: A file like object that should be written path (str): Relative path to write file to @@ -134,37 +129,31 @@ class MediaRepository(object): )) # Write to backup repository - yield self.copy_to_backup(source, path) + yield self.copy_to_backup(path) defer.returnValue(fname) @defer.inlineCallbacks - def copy_to_backup(self, source, path): - """Copy file like object source to the backup media store, if configured. - - Will close source after its done. + def copy_to_backup(self, path): + """Copy a file from the primary to backup media store, if configured. Args: - source: A file like object that should be written path(str): Relative path to write file to """ if self.backup_base_path: + primary_fname = os.path.join(self.primary_base_path, path) backup_fname = os.path.join(self.backup_base_path, path) # We can either wait for successful writing to the backup repository # or write in the background and immediately return if self.synchronous_backup_media_store: yield make_deferred_yieldable(threads.deferToThread( - self._write_file_synchronously, source, backup_fname, - close_source=True, + shutil.copyfile, primary_fname, backup_fname, )) else: preserve_fn(threads.deferToThread)( - self._write_file_synchronously, source, backup_fname, - close_source=True, + shutil.copyfile, primary_fname, backup_fname, ) - else: - source.close() @defer.inlineCallbacks def create_content(self, media_type, upload_name, content, content_length, @@ -280,8 +269,7 @@ class MediaRepository(object): server_name, media_id) raise SynapseError(502, "Failed to fetch remote media") - # Will close the file after its done - yield self.copy_to_backup(open(fname), fpath) + yield self.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c734f6b7c..2a3e37fdf 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -275,8 +275,7 @@ class PreviewUrlResource(Resource): ) # FIXME: pass through 404s and other error messages nicely - # Will close the file after its done - yield self.media_repo.copy_to_backup(open(fname), fpath) + yield self.media_repo.copy_to_backup(fpath) media_type = headers["Content-Type"][0] time_now_ms = self.clock.time_msec() From 6b725cf56aaf090f9cc9d5409dec7912feae8869 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 15:23:41 +0100 Subject: [PATCH 28/29] Remove old comment --- synapse/rest/media/v1/media_repository.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e32a67e16..cc267d0c1 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -102,7 +102,6 @@ class MediaRepository(object): Args: source: A file like object to be written fname (str): Path to write to - close_source (bool): Whether to close source after writing """ MediaRepository._makedirs(fname) source.seek(0) # Ensure we read from the start of the file From 1b6b0b1e66ab1cd5682ad1fa99020474afd6db7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Oct 2017 15:34:08 +0100 Subject: [PATCH 29/29] Add try/finally block to close t_byte_source --- synapse/rest/media/v1/media_repository.py | 65 ++++++++++++++--------- 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index cc267d0c1..515b3d3e7 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -367,12 +367,16 @@ class MediaRepository(object): )) if t_byte_source: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.local_media_thumbnail_rel( - media_id, t_width, t_height, t_type, t_method + try: + output_path = yield self.write_to_file_and_backup( + t_byte_source, + self.filepaths.local_media_thumbnail_rel( + media_id, t_width, t_height, t_type, t_method + ) ) - ) + finally: + t_byte_source.close() + logger.info("Stored thumbnail in file %r", output_path) t_len = os.path.getsize(output_path) @@ -395,12 +399,16 @@ class MediaRepository(object): )) if t_byte_source: - output_path = yield self.write_to_file_and_backup( - t_byte_source, - self.filepaths.remote_media_thumbnail_rel( - server_name, file_id, t_width, t_height, t_type, t_method + try: + output_path = yield self.write_to_file_and_backup( + t_byte_source, + self.filepaths.remote_media_thumbnail_rel( + server_name, file_id, t_width, t_height, t_type, t_method + ) ) - ) + finally: + t_byte_source.close() + logger.info("Stored thumbnail in file %r", output_path) t_len = os.path.getsize(output_path) @@ -464,18 +472,6 @@ class MediaRepository(object): # Now we generate the thumbnails for each dimension, store it for (t_width, t_height, t_type), t_method in thumbnails.iteritems(): - # Generate the thumbnail - if t_type == "crop": - t_byte_source = yield make_deferred_yieldable(threads.deferToThread( - thumbnailer.crop, - r_width, r_height, t_type, - )) - else: - t_byte_source = yield make_deferred_yieldable(threads.deferToThread( - thumbnailer.scale, - r_width, r_height, t_type, - )) - # Work out the correct file name for thumbnail if server_name: file_path = self.filepaths.remote_media_thumbnail_rel( @@ -490,8 +486,29 @@ class MediaRepository(object): media_id, t_width, t_height, t_type, t_method ) - # Write to disk - output_path = yield self.write_to_file_and_backup(t_byte_source, file_path) + # Generate the thumbnail + if t_type == "crop": + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.crop, + r_width, r_height, t_type, + )) + else: + t_byte_source = yield make_deferred_yieldable(threads.deferToThread( + thumbnailer.scale, + r_width, r_height, t_type, + )) + + if not t_byte_source: + continue + + try: + # Write to disk + output_path = yield self.write_to_file_and_backup( + t_byte_source, file_path, + ) + finally: + t_byte_source.close() + t_len = os.path.getsize(output_path) # Write to database