forked from MirrorHub/synapse
Apply url_preview_url_blacklist to oEmbed and pre-cached images (#15601)
There are two situations which were previously not properly checked: 1. If the requested URL was replaced with an oEmbed URL, then the oEmbed URL was not checked against url_preview_url_blacklist. 2. Follow-up URLs (either via autodiscovery of oEmbed or to pre-cache images) were not checked against url_preview_url_blacklist.
This commit is contained in:
parent
375b0a8a11
commit
4ee82c0576
4 changed files with 380 additions and 51 deletions
1
changelog.d/15601.bugfix
Normal file
1
changelog.d/15601.bugfix
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Fix a long-standing bug where the `url_preview_url_blacklist` configuration setting was not applied to oEmbed or image URLs found while previewing a URL.
|
|
@ -113,7 +113,7 @@ class UrlPreviewer:
|
||||||
1. Checks URL and timestamp against the database cache and returns the result if it
|
1. Checks URL and timestamp against the database cache and returns the result if it
|
||||||
has not expired and was successful (a 2xx return code).
|
has not expired and was successful (a 2xx return code).
|
||||||
2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
|
2. Checks if the URL matches an oEmbed (https://oembed.com/) pattern. If it
|
||||||
does, update the URL to download.
|
does and the new URL is not blocked, update the URL to download.
|
||||||
3. Downloads the URL and stores it into a file via the media storage provider
|
3. Downloads the URL and stores it into a file via the media storage provider
|
||||||
and saves the local media metadata.
|
and saves the local media metadata.
|
||||||
4. If the media is an image:
|
4. If the media is an image:
|
||||||
|
@ -127,14 +127,14 @@ class UrlPreviewer:
|
||||||
and saves the local media metadata.
|
and saves the local media metadata.
|
||||||
2. Convert the oEmbed response to an Open Graph response.
|
2. Convert the oEmbed response to an Open Graph response.
|
||||||
3. Override any Open Graph data from the HTML with data from oEmbed.
|
3. Override any Open Graph data from the HTML with data from oEmbed.
|
||||||
4. If an image exists in the Open Graph response:
|
4. If an image URL exists in the Open Graph response:
|
||||||
1. Downloads the URL and stores it into a file via the media storage
|
1. Downloads the URL and stores it into a file via the media storage
|
||||||
provider and saves the local media metadata.
|
provider and saves the local media metadata.
|
||||||
2. Generates thumbnails.
|
2. Generates thumbnails.
|
||||||
3. Updates the Open Graph response based on image properties.
|
3. Updates the Open Graph response based on image properties.
|
||||||
6. If the media is JSON and an oEmbed URL was found:
|
6. If an oEmbed URL was found and the media is JSON:
|
||||||
1. Convert the oEmbed response to an Open Graph response.
|
1. Convert the oEmbed response to an Open Graph response.
|
||||||
2. If a thumbnail or image is in the oEmbed response:
|
2. If an image URL is in the oEmbed response:
|
||||||
1. Downloads the URL and stores it into a file via the media storage
|
1. Downloads the URL and stores it into a file via the media storage
|
||||||
provider and saves the local media metadata.
|
provider and saves the local media metadata.
|
||||||
2. Generates thumbnails.
|
2. Generates thumbnails.
|
||||||
|
@ -144,7 +144,8 @@ class UrlPreviewer:
|
||||||
|
|
||||||
If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
|
If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
|
||||||
image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
|
image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
|
||||||
does not fail. As much information as possible is returned.
|
does not fail. If any of them are blocked, then those additional requests
|
||||||
|
are skipped. As much information as possible is returned.
|
||||||
|
|
||||||
The in-memory cache expires after 1 hour.
|
The in-memory cache expires after 1 hour.
|
||||||
|
|
||||||
|
@ -203,48 +204,14 @@ class UrlPreviewer:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def preview(self, url: str, user: UserID, ts: int) -> bytes:
|
async def preview(self, url: str, user: UserID, ts: int) -> bytes:
|
||||||
# XXX: we could move this into _do_preview if we wanted.
|
|
||||||
url_tuple = urlsplit(url)
|
|
||||||
for entry in self.url_preview_url_blacklist:
|
|
||||||
match = True
|
|
||||||
for attrib in entry:
|
|
||||||
pattern = entry[attrib]
|
|
||||||
value = getattr(url_tuple, attrib)
|
|
||||||
logger.debug(
|
|
||||||
"Matching attrib '%s' with value '%s' against pattern '%s'",
|
|
||||||
attrib,
|
|
||||||
value,
|
|
||||||
pattern,
|
|
||||||
)
|
|
||||||
|
|
||||||
if value is None:
|
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Some attributes might not be parsed as strings by urlsplit (such as the
|
|
||||||
# port, which is parsed as an int). Because we use match functions that
|
|
||||||
# expect strings, we want to make sure that's what we give them.
|
|
||||||
value_str = str(value)
|
|
||||||
|
|
||||||
if pattern.startswith("^"):
|
|
||||||
if not re.match(pattern, value_str):
|
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
if not fnmatch.fnmatch(value_str, pattern):
|
|
||||||
match = False
|
|
||||||
continue
|
|
||||||
if match:
|
|
||||||
logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
|
|
||||||
raise SynapseError(
|
|
||||||
403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
|
|
||||||
)
|
|
||||||
|
|
||||||
# the in-memory cache:
|
# the in-memory cache:
|
||||||
# * ensures that only one request is active at a time
|
# * ensures that only one request to a URL is active at a time
|
||||||
# * takes load off the DB for the thundering herds
|
# * takes load off the DB for the thundering herds
|
||||||
# * also caches any failures (unlike the DB) so we don't keep
|
# * also caches any failures (unlike the DB) so we don't keep
|
||||||
# requesting the same endpoint
|
# requesting the same endpoint
|
||||||
|
#
|
||||||
|
# Note that autodiscovered oEmbed URLs and pre-caching of images
|
||||||
|
# are not captured in the in-memory cache.
|
||||||
|
|
||||||
observable = self._cache.get(url)
|
observable = self._cache.get(url)
|
||||||
|
|
||||||
|
@ -283,7 +250,7 @@ class UrlPreviewer:
|
||||||
og = og.encode("utf8")
|
og = og.encode("utf8")
|
||||||
return og
|
return og
|
||||||
|
|
||||||
# If this URL can be accessed via oEmbed, use that instead.
|
# If this URL can be accessed via an allowed oEmbed, use that instead.
|
||||||
url_to_download = url
|
url_to_download = url
|
||||||
oembed_url = self._oembed.get_oembed_url(url)
|
oembed_url = self._oembed.get_oembed_url(url)
|
||||||
if oembed_url:
|
if oembed_url:
|
||||||
|
@ -329,6 +296,7 @@ class UrlPreviewer:
|
||||||
# defer to that.
|
# defer to that.
|
||||||
oembed_url = self._oembed.autodiscover_from_html(tree)
|
oembed_url = self._oembed.autodiscover_from_html(tree)
|
||||||
og_from_oembed: JsonDict = {}
|
og_from_oembed: JsonDict = {}
|
||||||
|
# Only download to the oEmbed URL if it is allowed.
|
||||||
if oembed_url:
|
if oembed_url:
|
||||||
try:
|
try:
|
||||||
oembed_info = await self._handle_url(
|
oembed_info = await self._handle_url(
|
||||||
|
@ -411,6 +379,59 @@ class UrlPreviewer:
|
||||||
|
|
||||||
return jsonog.encode("utf8")
|
return jsonog.encode("utf8")
|
||||||
|
|
||||||
|
def _is_url_blocked(self, url: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check whether the URL is allowed to be previewed (according to the homeserver
|
||||||
|
configuration).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The requested URL.
|
||||||
|
|
||||||
|
Return:
|
||||||
|
True if the URL is blocked, False if it is allowed.
|
||||||
|
"""
|
||||||
|
url_tuple = urlsplit(url)
|
||||||
|
for entry in self.url_preview_url_blacklist:
|
||||||
|
match = True
|
||||||
|
# Iterate over each entry. If *all* attributes of that entry match
|
||||||
|
# the current URL, then reject it.
|
||||||
|
for attrib, pattern in entry.items():
|
||||||
|
value = getattr(url_tuple, attrib)
|
||||||
|
logger.debug(
|
||||||
|
"Matching attrib '%s' with value '%s' against pattern '%s'",
|
||||||
|
attrib,
|
||||||
|
value,
|
||||||
|
pattern,
|
||||||
|
)
|
||||||
|
|
||||||
|
if value is None:
|
||||||
|
match = False
|
||||||
|
break
|
||||||
|
|
||||||
|
# Some attributes might not be parsed as strings by urlsplit (such as the
|
||||||
|
# port, which is parsed as an int). Because we use match functions that
|
||||||
|
# expect strings, we want to make sure that's what we give them.
|
||||||
|
value_str = str(value)
|
||||||
|
|
||||||
|
# Check the value against the pattern as either a regular expression or
|
||||||
|
# a glob. If it doesn't match, the entry doesn't match.
|
||||||
|
if pattern.startswith("^"):
|
||||||
|
if not re.match(pattern, value_str):
|
||||||
|
match = False
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if not fnmatch.fnmatch(value_str, pattern):
|
||||||
|
match = False
|
||||||
|
break
|
||||||
|
|
||||||
|
# All fields matched, return true (the URL is blocked).
|
||||||
|
if match:
|
||||||
|
logger.warning("URL %s blocked by url_blacklist entry %s", url, entry)
|
||||||
|
return match
|
||||||
|
|
||||||
|
# No matches were found, the URL is allowed.
|
||||||
|
return False
|
||||||
|
|
||||||
async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
|
async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResult:
|
||||||
"""
|
"""
|
||||||
Fetches a remote URL and parses the headers.
|
Fetches a remote URL and parses the headers.
|
||||||
|
@ -547,8 +568,16 @@ class UrlPreviewer:
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A MediaInfo object describing the fetched content.
|
A MediaInfo object describing the fetched content.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SynapseError if the URL is blocked.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if self._is_url_blocked(url):
|
||||||
|
raise SynapseError(
|
||||||
|
403, "URL blocked by url pattern blacklist entry", Codes.UNKNOWN
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: we should probably honour robots.txt... except in practice
|
# TODO: we should probably honour robots.txt... except in practice
|
||||||
# we're most likely being explicitly triggered by a human rather than a
|
# we're most likely being explicitly triggered by a human rather than a
|
||||||
# bot, so are we really a robot?
|
# bot, so are we really a robot?
|
||||||
|
@ -624,7 +653,7 @@ class UrlPreviewer:
|
||||||
return
|
return
|
||||||
|
|
||||||
# The image URL from the HTML might be relative to the previewed page,
|
# The image URL from the HTML might be relative to the previewed page,
|
||||||
# convert it to an URL which can be requested directly.
|
# convert it to a URL which can be requested directly.
|
||||||
url_parts = urlparse(image_url)
|
url_parts = urlparse(image_url)
|
||||||
if url_parts.scheme != "data":
|
if url_parts.scheme != "data":
|
||||||
image_url = urljoin(media_info.uri, image_url)
|
image_url = urljoin(media_info.uri, image_url)
|
||||||
|
|
113
tests/media/test_url_previewer.py
Normal file
113
tests/media/test_url_previewer.py
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
# Copyright 2023 The Matrix.org Foundation C.I.C.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import os
|
||||||
|
|
||||||
|
from twisted.test.proto_helpers import MemoryReactor
|
||||||
|
|
||||||
|
from synapse.server import HomeServer
|
||||||
|
from synapse.util import Clock
|
||||||
|
|
||||||
|
from tests import unittest
|
||||||
|
from tests.unittest import override_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
import lxml
|
||||||
|
except ImportError:
|
||||||
|
lxml = None
|
||||||
|
|
||||||
|
|
||||||
|
class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
|
if not lxml:
|
||||||
|
skip = "url preview feature requires lxml"
|
||||||
|
|
||||||
|
def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
|
||||||
|
config = self.default_config()
|
||||||
|
config["url_preview_enabled"] = True
|
||||||
|
config["max_spider_size"] = 9999999
|
||||||
|
config["url_preview_ip_range_blacklist"] = (
|
||||||
|
"192.168.1.1",
|
||||||
|
"1.0.0.0/8",
|
||||||
|
"3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
|
||||||
|
"2001:800::/21",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.storage_path = self.mktemp()
|
||||||
|
self.media_store_path = self.mktemp()
|
||||||
|
os.mkdir(self.storage_path)
|
||||||
|
os.mkdir(self.media_store_path)
|
||||||
|
config["media_store_path"] = self.media_store_path
|
||||||
|
|
||||||
|
provider_config = {
|
||||||
|
"module": "synapse.media.storage_provider.FileStorageProviderBackend",
|
||||||
|
"store_local": True,
|
||||||
|
"store_synchronous": False,
|
||||||
|
"store_remote": True,
|
||||||
|
"config": {"directory": self.storage_path},
|
||||||
|
}
|
||||||
|
|
||||||
|
config["media_storage_providers"] = [provider_config]
|
||||||
|
|
||||||
|
return self.setup_test_homeserver(config=config)
|
||||||
|
|
||||||
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
||||||
|
media_repo_resource = hs.get_media_repository_resource()
|
||||||
|
preview_url = media_repo_resource.children[b"preview_url"]
|
||||||
|
self.url_previewer = preview_url._url_previewer
|
||||||
|
|
||||||
|
def test_all_urls_allowed(self) -> None:
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("http://matrix.org"))
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("http://localhost:8000"))
|
||||||
|
self.assertFalse(
|
||||||
|
self.url_previewer._is_url_blocked("http://user:pass@matrix.org")
|
||||||
|
)
|
||||||
|
|
||||||
|
@override_config(
|
||||||
|
{
|
||||||
|
"url_preview_url_blacklist": [
|
||||||
|
{"username": "user"},
|
||||||
|
{"scheme": "http", "netloc": "matrix.org"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
def test_blocked_url(self) -> None:
|
||||||
|
# Blocked via scheme and URL.
|
||||||
|
self.assertTrue(self.url_previewer._is_url_blocked("http://matrix.org"))
|
||||||
|
# Not blocked because all components must match.
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
|
||||||
|
|
||||||
|
# Blocked due to the user.
|
||||||
|
self.assertTrue(
|
||||||
|
self.url_previewer._is_url_blocked("http://user:pass@example.com")
|
||||||
|
)
|
||||||
|
self.assertTrue(self.url_previewer._is_url_blocked("http://user@example.com"))
|
||||||
|
|
||||||
|
@override_config({"url_preview_url_blacklist": [{"netloc": "*.example.com"}]})
|
||||||
|
def test_glob_blocked_url(self) -> None:
|
||||||
|
# All subdomains are blocked.
|
||||||
|
self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
|
||||||
|
self.assertTrue(self.url_previewer._is_url_blocked("http://.example.com"))
|
||||||
|
|
||||||
|
# The TLD is not blocked.
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))
|
||||||
|
|
||||||
|
@override_config({"url_preview_url_blacklist": [{"netloc": "^.+\\.example\\.com"}]})
|
||||||
|
def test_regex_blocked_urL(self) -> None:
|
||||||
|
# All subdomains are blocked.
|
||||||
|
self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
|
||||||
|
# Requires a non-empty subdomain.
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("http://.example.com"))
|
||||||
|
|
||||||
|
# The TLD is not blocked.
|
||||||
|
self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))
|
|
@ -653,6 +653,57 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
server.data,
|
server.data,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_image(self) -> None:
|
||||||
|
"""An image should be precached if mentioned in the HTML."""
|
||||||
|
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")]
|
||||||
|
|
||||||
|
result = (
|
||||||
|
b"""<html><body><img src="http://cdn.matrix.org/foo.png"></body></html>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://matrix.org",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# Respond with the HTML.
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
|
||||||
|
)
|
||||||
|
% (len(result),)
|
||||||
|
+ result
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# Respond with the photo.
|
||||||
|
client = self.reactor.tcpClients[1][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b"Content-Type: image/png\r\n\r\n"
|
||||||
|
)
|
||||||
|
% (len(SMALL_PNG),)
|
||||||
|
+ SMALL_PNG
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# The image should be in the result.
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
self._assert_small_png(channel.json_body)
|
||||||
|
|
||||||
def test_nonexistent_image(self) -> None:
|
def test_nonexistent_image(self) -> None:
|
||||||
"""If the preview image doesn't exist, ensure some data is returned."""
|
"""If the preview image doesn't exist, ensure some data is returned."""
|
||||||
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
@ -683,9 +734,53 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
self.assertEqual(channel.code, 200)
|
|
||||||
|
# There should not be a second connection.
|
||||||
|
self.assertEqual(len(self.reactor.tcpClients), 1)
|
||||||
|
|
||||||
# The image should not be in the result.
|
# The image should not be in the result.
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
self.assertNotIn("og:image", channel.json_body)
|
||||||
|
|
||||||
|
@unittest.override_config(
|
||||||
|
{"url_preview_url_blacklist": [{"netloc": "cdn.matrix.org"}]}
|
||||||
|
)
|
||||||
|
def test_image_blocked(self) -> None:
|
||||||
|
"""If the preview image doesn't exist, ensure some data is returned."""
|
||||||
|
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
self.lookups["cdn.matrix.org"] = [(IPv4Address, "10.1.2.4")]
|
||||||
|
|
||||||
|
result = (
|
||||||
|
b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://matrix.org",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
|
||||||
|
)
|
||||||
|
% (len(result),)
|
||||||
|
+ result
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# There should not be a second connection.
|
||||||
|
self.assertEqual(len(self.reactor.tcpClients), 1)
|
||||||
|
|
||||||
|
# The image should not be in the result.
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
self.assertNotIn("og:image", channel.json_body)
|
self.assertNotIn("og:image", channel.json_body)
|
||||||
|
|
||||||
def test_oembed_failure(self) -> None:
|
def test_oembed_failure(self) -> None:
|
||||||
|
@ -880,6 +975,11 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
|
|
||||||
|
# Double check that the proper host is being connected to. (Note that
|
||||||
|
# twitter.com can't be resolved so this is already implicitly checked.)
|
||||||
|
self.assertIn(b"\r\nHost: publish.twitter.com\r\n", server.data)
|
||||||
|
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
body = channel.json_body
|
body = channel.json_body
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -940,6 +1040,22 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@unittest.override_config(
|
||||||
|
{"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]}
|
||||||
|
)
|
||||||
|
def test_oembed_blocked(self) -> None:
|
||||||
|
"""The oEmbed URL should not be downloaded if the oEmbed URL is blocked."""
|
||||||
|
self.lookups["twitter.com"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://twitter.com/matrixdotorg/status/12345",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 403, channel.result)
|
||||||
|
|
||||||
def test_oembed_autodiscovery(self) -> None:
|
def test_oembed_autodiscovery(self) -> None:
|
||||||
"""
|
"""
|
||||||
Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
|
Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
|
||||||
|
@ -980,7 +1096,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
% (len(result),)
|
% (len(result),)
|
||||||
+ result
|
+ result
|
||||||
)
|
)
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
|
|
||||||
# The oEmbed response.
|
# The oEmbed response.
|
||||||
|
@ -1004,7 +1119,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
% (len(oembed_content),)
|
% (len(oembed_content),)
|
||||||
+ oembed_content
|
+ oembed_content
|
||||||
)
|
)
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
|
|
||||||
# Ensure the URL is what was requested.
|
# Ensure the URL is what was requested.
|
||||||
|
@ -1023,7 +1137,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
% (len(SMALL_PNG),)
|
% (len(SMALL_PNG),)
|
||||||
+ SMALL_PNG
|
+ SMALL_PNG
|
||||||
)
|
)
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
|
|
||||||
# Ensure the URL is what was requested.
|
# Ensure the URL is what was requested.
|
||||||
|
@ -1036,6 +1149,59 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
self._assert_small_png(body)
|
self._assert_small_png(body)
|
||||||
|
|
||||||
|
@unittest.override_config(
|
||||||
|
{"url_preview_url_blacklist": [{"netloc": "publish.twitter.com"}]}
|
||||||
|
)
|
||||||
|
def test_oembed_autodiscovery_blocked(self) -> None:
|
||||||
|
"""
|
||||||
|
If the discovered oEmbed URL is blocked, it should be discarded.
|
||||||
|
"""
|
||||||
|
# This is a little cheesy in that we use the www subdomain (which isn't the
|
||||||
|
# list of oEmbed patterns) to get "raw" HTML response.
|
||||||
|
self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.4")]
|
||||||
|
|
||||||
|
result = b"""
|
||||||
|
<title>Test</title>
|
||||||
|
<link rel="alternate" type="application/json+oembed"
|
||||||
|
href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
|
||||||
|
title="matrixdotorg" />
|
||||||
|
"""
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
|
||||||
|
)
|
||||||
|
% (len(result),)
|
||||||
|
+ result
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# Ensure there's no additional connections.
|
||||||
|
self.assertEqual(len(self.reactor.tcpClients), 1)
|
||||||
|
|
||||||
|
# Ensure the URL is what was requested.
|
||||||
|
self.assertIn(b"\r\nHost: www.twitter.com\r\n", server.data)
|
||||||
|
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
body = channel.json_body
|
||||||
|
self.assertEqual(body["og:title"], "Test")
|
||||||
|
self.assertNotIn("og:image", body)
|
||||||
|
|
||||||
def _download_image(self) -> Tuple[str, str]:
|
def _download_image(self) -> Tuple[str, str]:
|
||||||
"""Downloads an image into the URL cache.
|
"""Downloads an image into the URL cache.
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -1192,7 +1358,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
@unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
|
@unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
|
||||||
def test_blacklist_port(self) -> None:
|
def test_blocked_port(self) -> None:
|
||||||
"""Tests that blacklisting URLs with a port makes previewing such URLs
|
"""Tests that blacklisting URLs with a port makes previewing such URLs
|
||||||
fail with a 403 error and doesn't impact other previews.
|
fail with a 403 error and doesn't impact other previews.
|
||||||
"""
|
"""
|
||||||
|
@ -1230,3 +1396,23 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
|
|
||||||
self.pump()
|
self.pump()
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
|
|
||||||
|
@unittest.override_config(
|
||||||
|
{"url_preview_url_blacklist": [{"netloc": "example.com"}]}
|
||||||
|
)
|
||||||
|
def test_blocked_url(self) -> None:
|
||||||
|
"""Tests that blacklisting URLs with a host makes previewing such URLs
|
||||||
|
fail with a 403 error.
|
||||||
|
"""
|
||||||
|
self.lookups["example.com"] = [(IPv4Address, "10.1.2.3")]
|
||||||
|
|
||||||
|
bad_url = quote("http://example.com/foo")
|
||||||
|
|
||||||
|
channel = self.make_request(
|
||||||
|
"GET",
|
||||||
|
"preview_url?url=" + bad_url,
|
||||||
|
shorthand=False,
|
||||||
|
await_result=False,
|
||||||
|
)
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 403, channel.result)
|
||||||
|
|
Loading…
Reference in a new issue