diff --git a/changelog.d/15197.feature b/changelog.d/15197.feature new file mode 100644 index 000000000..c8a6f114e --- /dev/null +++ b/changelog.d/15197.feature @@ -0,0 +1 @@ +Add an option to prevent media downloads from configured domains. \ No newline at end of file diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 14c21f73f..6dd1a639e 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1768,6 +1768,30 @@ Example configuration: max_image_pixels: 35M ``` --- +### `prevent_media_downloads_from` + +A list of domains to never download media from. Media from these +domains that is already downloaded will not be deleted, but will be +inaccessible to users. This option does not affect admin APIs trying +to download/operate on media. + +This will not prevent the listed domains from accessing media themselves. +It simply prevents users on this server from downloading media originating +from the listed servers. + +This will have no effect on media originating from the local server. +This only affects media downloaded from other Matrix servers, to +block domains from URL previews see [`url_preview_url_blacklist`](#url_preview_url_blacklist). + +Defaults to an empty list (nothing blocked). + +Example configuration: +```yaml +prevent_media_downloads_from: + - evil.example.org + - evil2.example.org +``` +--- ### `dynamic_thumbnails` Whether to generate new thumbnails on the fly to precisely match diff --git a/synapse/config/repository.py b/synapse/config/repository.py index ecb3edbe3..655f06505 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -137,6 +137,10 @@ class ContentRepositoryConfig(Config): self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M")) self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M")) + self.prevent_media_downloads_from = config.get( + "prevent_media_downloads_from", [] + ) + self.media_store_path = self.ensure_directory( config.get("media_store_path", "media_store") ) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index b81e3c2b0..e81c987b1 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -93,6 +93,7 @@ class MediaRepository: self.federation_domain_whitelist = ( hs.config.federation.federation_domain_whitelist ) + self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from # List of StorageProviders where we should search for media and # potentially upload to. @@ -276,6 +277,14 @@ class MediaRepository: ): raise FederationDeniedError(server_name) + # Don't let users download media from domains listed in the config, even + # if we might have the media to serve. This is Trust & Safety tooling to + # block some servers' media from being accessible to local users. + # See `prevent_media_downloads_from` config docs for more info. + if server_name in self.prevent_media_downloads_from: + respond_404(request) + return + self.mark_recently_accessed(server_name, media_id) # We linearize here to ensure that we don't try and download remote diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py index a6396fb05..661e604b8 100644 --- a/synapse/rest/media/thumbnail_resource.py +++ b/synapse/rest/media/thumbnail_resource.py @@ -60,6 +60,7 @@ class ThumbnailResource(DirectServeJsonResource): self.media_storage = media_storage self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails self._is_mine_server_name = hs.is_mine_server_name + self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from async def _async_render_GET(self, request: SynapseRequest) -> None: set_cors_headers(request) @@ -82,6 +83,14 @@ class ThumbnailResource(DirectServeJsonResource): ) self.media_repo.mark_recently_accessed(None, media_id) else: + # Don't let users download media from configured domains, even if it + # is already downloaded. This is Trust & Safety tooling to make some + # media inaccessible to local users. + # See `prevent_media_downloads_from` config docs for more info. + if server_name in self.prevent_media_downloads_from: + respond_404(request) + return + if self.dynamic_thumbnails: await self._select_or_generate_remote_thumbnail( request, server_name, media_id, width, height, method, m_type diff --git a/tests/rest/media/test_domain_blocking.py b/tests/rest/media/test_domain_blocking.py new file mode 100644 index 000000000..9beeeab84 --- /dev/null +++ b/tests/rest/media/test_domain_blocking.py @@ -0,0 +1,139 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Dict + +from twisted.test.proto_helpers import MemoryReactor +from twisted.web.resource import Resource + +from synapse.media._base import FileInfo +from synapse.server import HomeServer +from synapse.util import Clock + +from tests import unittest +from tests.test_utils import SMALL_PNG +from tests.unittest import override_config + + +class MediaDomainBlockingTests(unittest.HomeserverTestCase): + remote_media_id = "doesnotmatter" + remote_server_name = "evil.com" + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + + # Inject a piece of media. We'll use this to ensure we're returning a sane + # response when we're not supposed to block it, distinguishing a media block + # from a regular 404. + file_id = "abcdefg12345" + file_info = FileInfo(server_name=self.remote_server_name, file_id=file_id) + with hs.get_media_repository().media_storage.store_into_file(file_info) as ( + f, + fname, + finish, + ): + f.write(SMALL_PNG) + self.get_success(finish()) + + self.get_success( + self.store.store_cached_remote_media( + origin=self.remote_server_name, + media_id=self.remote_media_id, + media_type="image/png", + media_length=1, + time_now_ms=clock.time_msec(), + upload_name="test.png", + filesystem_id=file_id, + ) + ) + + def create_resource_dict(self) -> Dict[str, Resource]: + # We need to manually set the resource tree to include media, the + # default only does `/_matrix/client` APIs. + return {"/_matrix/media": self.hs.get_media_repository_resource()} + + @override_config( + { + # Disable downloads from the domain we'll be trying to download from. + # Should result in a 404. + "prevent_media_downloads_from": ["evil.com"] + } + ) + def test_cannot_download_blocked_media(self) -> None: + """ + Tests to ensure that remote media which is blocked cannot be downloaded. + """ + response = self.make_request( + "GET", + f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}", + shorthand=False, + ) + self.assertEqual(response.code, 404) + + @override_config( + { + # Disable downloads from a domain we won't be requesting downloads from. + # This proves we haven't broken anything. + "prevent_media_downloads_from": ["not-listed.com"] + } + ) + def test_remote_media_normally_unblocked(self) -> None: + """ + Tests to ensure that remote media is normally able to be downloaded + when no domain block is in place. + """ + response = self.make_request( + "GET", + f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}", + shorthand=False, + ) + self.assertEqual(response.code, 200) + + @override_config( + { + # Disable downloads from the domain we'll be trying to download from. + # Should result in a 404. + "prevent_media_downloads_from": ["evil.com"], + "dynamic_thumbnails": True, + } + ) + def test_cannot_download_blocked_media_thumbnail(self) -> None: + """ + Same test as test_cannot_download_blocked_media but for thumbnails. + """ + response = self.make_request( + "GET", + f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100", + shorthand=False, + content={"width": 100, "height": 100}, + ) + self.assertEqual(response.code, 404) + + @override_config( + { + # Disable downloads from a domain we won't be requesting downloads from. + # This proves we haven't broken anything. + "prevent_media_downloads_from": ["not-listed.com"], + "dynamic_thumbnails": True, + } + ) + def test_remote_media_thumbnail_normally_unblocked(self) -> None: + """ + Same test as test_remote_media_normally_unblocked but for thumbnails. + """ + response = self.make_request( + "GET", + f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100", + shorthand=False, + ) + self.assertEqual(response.code, 200)