mirror of
https://mau.dev/maunium/synapse.git
synced 2024-12-14 19:33:52 +01:00
Strip overlong OpenGraph data from url preview
... to stop people causing DoSes with malicious web pages
This commit is contained in:
parent
9ffcf0f7ba
commit
e9bfe719ba
3 changed files with 54 additions and 1 deletions
1
changelog.d/6331.feature
Normal file
1
changelog.d/6331.feature
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Limit the length of data returned by url previews, to prevent DoS attacks.
|
|
@ -56,6 +56,9 @@ logger = logging.getLogger(__name__)
|
||||||
_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
|
_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
|
||||||
_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
|
_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
|
||||||
|
|
||||||
|
OG_TAG_NAME_MAXLEN = 50
|
||||||
|
OG_TAG_VALUE_MAXLEN = 1000
|
||||||
|
|
||||||
|
|
||||||
class PreviewUrlResource(DirectServeResource):
|
class PreviewUrlResource(DirectServeResource):
|
||||||
isLeaf = True
|
isLeaf = True
|
||||||
|
@ -167,7 +170,7 @@ class PreviewUrlResource(DirectServeResource):
|
||||||
ts (int):
|
ts (int):
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Deferred[str]: json-encoded og data
|
Deferred[bytes]: json-encoded og data
|
||||||
"""
|
"""
|
||||||
# check the URL cache in the DB (which will also provide us with
|
# check the URL cache in the DB (which will also provide us with
|
||||||
# historical previews, if we have any)
|
# historical previews, if we have any)
|
||||||
|
@ -268,6 +271,17 @@ class PreviewUrlResource(DirectServeResource):
|
||||||
logger.warn("Failed to find any OG data in %s", url)
|
logger.warn("Failed to find any OG data in %s", url)
|
||||||
og = {}
|
og = {}
|
||||||
|
|
||||||
|
# filter out any stupidly long values
|
||||||
|
keys_to_remove = []
|
||||||
|
for k, v in og.items():
|
||||||
|
if len(k) > OG_TAG_NAME_MAXLEN or len(v) > OG_TAG_VALUE_MAXLEN:
|
||||||
|
logger.warning(
|
||||||
|
"Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
|
||||||
|
)
|
||||||
|
keys_to_remove.append(k)
|
||||||
|
for k in keys_to_remove:
|
||||||
|
del og[k]
|
||||||
|
|
||||||
logger.debug("Calculated OG for %s as %s" % (url, og))
|
logger.debug("Calculated OG for %s as %s" % (url, og))
|
||||||
|
|
||||||
jsonog = json.dumps(og)
|
jsonog = json.dumps(og)
|
||||||
|
@ -502,6 +516,10 @@ def _calc_og(tree, media_uri):
|
||||||
og = {}
|
og = {}
|
||||||
for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
|
for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
|
||||||
if "content" in tag.attrib:
|
if "content" in tag.attrib:
|
||||||
|
# if we've got more than 50 tags, someone is taking the piss
|
||||||
|
if len(og) >= 50:
|
||||||
|
logger.warning("skipping OG for page with too many og: tags")
|
||||||
|
return {}
|
||||||
og[tag.attrib["property"]] = tag.attrib["content"]
|
og[tag.attrib["property"]] = tag.attrib["content"]
|
||||||
|
|
||||||
# TODO: grab article: meta tags too, e.g.:
|
# TODO: grab article: meta tags too, e.g.:
|
||||||
|
|
|
@ -247,6 +247,40 @@ class URLPreviewTests(unittest.HomeserverTestCase):
|
||||||
self.assertEqual(channel.code, 200)
|
self.assertEqual(channel.code, 200)
|
||||||
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
|
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
|
||||||
|
|
||||||
|
def test_overlong_title(self):
|
||||||
|
self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")]
|
||||||
|
|
||||||
|
end_content = (
|
||||||
|
b"<html><head>"
|
||||||
|
b"<title>" + b"x" * 2000 + b"</title>"
|
||||||
|
b'<meta property="og:description" content="hi" />'
|
||||||
|
b"</head></html>"
|
||||||
|
)
|
||||||
|
|
||||||
|
request, channel = self.make_request(
|
||||||
|
"GET", "url_preview?url=http://matrix.org", shorthand=False
|
||||||
|
)
|
||||||
|
request.render(self.preview_url)
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
client = self.reactor.tcpClients[0][2].buildProtocol(None)
|
||||||
|
server = AccumulatingProtocol()
|
||||||
|
server.makeConnection(FakeTransport(client, self.reactor))
|
||||||
|
client.makeConnection(FakeTransport(server, self.reactor))
|
||||||
|
client.dataReceived(
|
||||||
|
(
|
||||||
|
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
|
||||||
|
b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
|
||||||
|
)
|
||||||
|
% (len(end_content),)
|
||||||
|
+ end_content
|
||||||
|
)
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
self.assertEqual(channel.code, 200)
|
||||||
|
res = channel.json_body
|
||||||
|
self.assertCountEqual(["og:description"], res.keys())
|
||||||
|
|
||||||
def test_ipaddr(self):
|
def test_ipaddr(self):
|
||||||
"""
|
"""
|
||||||
IP addresses can be previewed directly.
|
IP addresses can be previewed directly.
|
||||||
|
|
Loading…
Reference in a new issue