From 148fe58a247d61ffb76c566ba397285480d93f74 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 6 Jun 2022 07:46:04 -0400 Subject: [PATCH] Do not break URL previews if an image is unreachable. (#12950) Avoid breaking a URL preview completely if the chosen image 404s or is unreachable for some other reason (e.g. DNS). --- changelog.d/12950.bugfix | 1 + synapse/rest/media/v1/preview_url_resource.py | 23 ++++++++---- tests/rest/media/v1/test_url_preview.py | 35 +++++++++++++++++++ 3 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 changelog.d/12950.bugfix diff --git a/changelog.d/12950.bugfix b/changelog.d/12950.bugfix new file mode 100644 index 000000000..e835d9aa7 --- /dev/null +++ b/changelog.d/12950.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where a URL preview would break if the image failed to download. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 2b2db63bf..54a849eac 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -586,12 +586,16 @@ class PreviewUrlResource(DirectServeJsonResource): og: The Open Graph dictionary. This is modified with image information. """ # If there's no image or it is blank, there's nothing to do. - if "og:image" not in og or not og["og:image"]: + if "og:image" not in og: + return + + # Remove the raw image URL, this will be replaced with an MXC URL, if successful. + image_url = og.pop("og:image") + if not image_url: return # The image URL from the HTML might be relative to the previewed page, # convert it to an URL which can be requested directly. - image_url = og["og:image"] url_parts = urlparse(image_url) if url_parts.scheme != "data": image_url = urljoin(media_info.uri, image_url) @@ -599,7 +603,16 @@ class PreviewUrlResource(DirectServeJsonResource): # FIXME: it might be cleaner to use the same flow as the main /preview_url # request itself and benefit from the same caching etc. But for now we # just rely on the caching on the master request to speed things up. - image_info = await self._handle_url(image_url, user, allow_data_urls=True) + try: + image_info = await self._handle_url(image_url, user, allow_data_urls=True) + except Exception as e: + # Pre-caching the image failed, don't block the entire URL preview. + logger.warning( + "Pre-caching image failed during URL preview: %s errored with %s", + image_url, + e, + ) + return if _is_media(image_info.media_type): # TODO: make sure we don't choke on white-on-transparent images @@ -611,13 +624,11 @@ class PreviewUrlResource(DirectServeJsonResource): og["og:image:width"] = dims["width"] og["og:image:height"] = dims["height"] else: - logger.warning("Couldn't get dims for %s", og["og:image"]) + logger.warning("Couldn't get dims for %s", image_url) og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}" og["og:image:type"] = image_info.media_type og["matrix:image:size"] = image_info.media_length - else: - del og["og:image"] async def _handle_oembed_response( self, url: str, media_info: MediaInfo, expiration_ms: int diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 3b24d0ace..2c321f8d0 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -656,6 +656,41 @@ class URLPreviewTests(unittest.HomeserverTestCase): server.data, ) + def test_nonexistent_image(self) -> None: + """If the preview image doesn't exist, ensure some data is returned.""" + self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] + + end_content = ( + b"""""" + ) + + channel = self.make_request( + "GET", + "preview_url?url=http://matrix.org", + shorthand=False, + await_result=False, + ) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="utf8"\r\n\r\n' + ) + % (len(end_content),) + + end_content + ) + + self.pump() + self.assertEqual(channel.code, 200) + + # The image should not be in the result. + self.assertNotIn("og:image", channel.json_body) + def test_data_url(self) -> None: """ Requesting to preview a data URL is not supported.