diff --git a/changelog.d/8887.feature b/changelog.d/8887.feature new file mode 100644 index 000000000..729eb1f1e --- /dev/null +++ b/changelog.d/8887.feature @@ -0,0 +1 @@ +Add `X-Robots-Tag` header to stop web crawlers from indexing media. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index 67aa993f1..47c2b44bf 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -155,6 +155,11 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400") request.setHeader(b"Content-Length", b"%d" % (file_size,)) + # Tell web crawlers to not index, archive, or follow links in media. This + # should help to prevent things in the media repo from showing up in web + # search results. + request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex") + # separators as defined in RFC2616. SP and HT are handled separately. # see _can_encode_filename_as_token. diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 4c749f1a6..6f0677d33 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -362,3 +362,16 @@ class MediaRepoTests(unittest.HomeserverTestCase): "error": "Not found [b'example.com', b'12345']", }, ) + + def test_x_robots_tag_header(self): + """ + Tests that the `X-Robots-Tag` header is present, which informs web crawlers + to not index, archive, or follow links in media. + """ + channel = self._req(b"inline; filename=out" + self.test_image.extension) + + headers = channel.headers + self.assertEqual( + headers.getRawHeaders(b"X-Robots-Tag"), + [b"noindex, nofollow, noarchive, noimageindex"], + )