From b3e843be88d67633d11711ecc80d4e0390b1e723 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 27 Oct 2021 10:48:02 -0400 Subject: [PATCH 1/4] Fix URL preview errors when previewing XML documents. (#11196) --- changelog.d/11196.bugfix | 1 + synapse/rest/media/v1/preview_url_resource.py | 9 ++++++--- tests/test_preview.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 changelog.d/11196.bugfix diff --git a/changelog.d/11196.bugfix b/changelog.d/11196.bugfix new file mode 100644 index 0000000000..3861eeb908 --- /dev/null +++ b/changelog.d/11196.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.46.0rc1 where URL previews of some XML documents would fail. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 278fd901e2..8ca97b5b18 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -718,9 +718,12 @@ def decode_body( if not body: return None + # The idea here is that multiple encodings are tried until one works. + # Unfortunately the result is never used and then LXML will decode the string + # again with the found encoding. for encoding in get_html_media_encodings(body, content_type): try: - body_str = body.decode(encoding) + body.decode(encoding) except Exception: pass else: @@ -732,11 +735,11 @@ def decode_body( from lxml import etree # Create an HTML parser. - parser = etree.HTMLParser(recover=True, encoding="utf-8") + parser = etree.HTMLParser(recover=True, encoding=encoding) # Attempt to parse the body. Returns None if the body was successfully # parsed, but no tree was found. - return etree.fromstring(body_str, parser) + return etree.fromstring(body, parser) def _calc_og(tree: "etree.Element", media_uri: str) -> Dict[str, Optional[str]]: diff --git a/tests/test_preview.py b/tests/test_preview.py index 9a576f9a4e..40b89fb2ef 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -277,6 +277,21 @@ class CalcOgTestCase(unittest.TestCase): tree = decode_body(html, "http://example.com/test.html") self.assertIsNone(tree) + def test_xml(self): + """Test decoding XML and ensure it works properly.""" + # Note that the strip() call is important to ensure the xml tag starts + # at the initial byte. + html = b""" + + + + + FooSome text. + """.strip() + tree = decode_body(html, "http://example.com/test.html") + og = _calc_og(tree, "http://example.com/test.html") + self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) + def test_invalid_encoding(self): """An invalid character encoding should be ignored and treated as UTF-8, if possible.""" html = b""" From 576921c66a35fa1023f7e9baf97b6304ff463549 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 27 Oct 2021 18:06:32 +0200 Subject: [PATCH 2/4] Force deb compression with `xz`. (#11197) Fixes a problem where `impish` packages could not be processed by `reprepro`. --- debian/changelog | 6 ++++++ debian/rules | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/debian/changelog b/debian/changelog index ea96676f74..c2ea5d2cfb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.46.0~rc1ubuntu1) UNRELEASED; urgency=medium + + * Compress debs with xz, to fix incompatibility of impish debs with reprepro. + + -- Richard van der Hoff Wed, 27 Oct 2021 15:32:51 +0100 + matrix-synapse-py3 (1.46.0~rc1) stable; urgency=medium * New synapse release 1.46.0~rc1. diff --git a/debian/rules b/debian/rules index b9d490adc9..5baf2475f0 100755 --- a/debian/rules +++ b/debian/rules @@ -51,5 +51,11 @@ override_dh_shlibdeps: override_dh_virtualenv: ./debian/build_virtualenv +override_dh_builddeb: + # force the compression to xzip, to stop dpkg-deb on impish defaulting to zstd + # (which requires reprepro 5.3.0-1.3, which is currently only in 'experimental' in Debian: + # https://metadata.ftp-master.debian.org/changelogs/main/r/reprepro/reprepro_5.3.0-1.3_changelog) + dh_builddeb -- -Zxz + %: dh $@ --with python-virtualenv From df84ad602b21a4cea3a63c9117b5cd7884f1ab05 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Nov 2021 13:23:01 +0000 Subject: [PATCH 3/4] 1.46.0 --- CHANGES.md | 9 +++++++++ changelog.d/11196.bugfix | 1 - debian/changelog | 8 ++++++-- synapse/__init__.py | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) delete mode 100644 changelog.d/11196.bugfix diff --git a/CHANGES.md b/CHANGES.md index f61d5c706f..124bdf320a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 1.46.0 (2021-11-02) +=========================== + +Bugfixes +-------- + +- Fix a bug introduced in v1.46.0rc1 where URL previews of some XML documents would fail. ([\#11196](https://github.com/matrix-org/synapse/issues/11196)) + + Synapse 1.46.0rc1 (2021-10-27) ============================== diff --git a/changelog.d/11196.bugfix b/changelog.d/11196.bugfix deleted file mode 100644 index 3861eeb908..0000000000 --- a/changelog.d/11196.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in v1.46.0rc1 where URL previews of some XML documents would fail. diff --git a/debian/changelog b/debian/changelog index c2ea5d2cfb..06e7a0862d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,12 @@ -matrix-synapse-py3 (1.46.0~rc1ubuntu1) UNRELEASED; urgency=medium +matrix-synapse-py3 (1.46.0) stable; urgency=medium + [ Richard van der Hoff ] * Compress debs with xz, to fix incompatibility of impish debs with reprepro. - -- Richard van der Hoff Wed, 27 Oct 2021 15:32:51 +0100 + [ Synapse Packaging team ] + * New synapse release 1.46.0. + + -- Synapse Packaging team Tue, 02 Nov 2021 13:22:53 +0000 matrix-synapse-py3 (1.46.0~rc1) stable; urgency=medium diff --git a/synapse/__init__.py b/synapse/__init__.py index 355b36fc63..5ef34bce40 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -47,7 +47,7 @@ try: except ImportError: pass -__version__ = "1.46.0rc1" +__version__ = "1.46.0" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From 2d44ee6868805d4ff23489a8dd6b4072ff358663 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Nov 2021 13:25:42 +0000 Subject: [PATCH 4/4] Update changelog --- CHANGES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 124bdf320a..e74544f489 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ Synapse 1.46.0 (2021-11-02) =========================== +The cause of the [performance regression affecting Synapse 1.44](https://github.com/matrix-org/synapse/issues/11049) has been identified and fixed. ([\#11177](https://github.com/matrix-org/synapse/issues/11177)) + Bugfixes -------- @@ -10,8 +12,6 @@ Bugfixes Synapse 1.46.0rc1 (2021-10-27) ============================== -The cause of the [performance regression affecting Synapse 1.44](https://github.com/matrix-org/synapse/issues/11049) has been identified and fixed. ([\#11177](https://github.com/matrix-org/synapse/issues/11177)) - Features --------