From d840019192769f900f6a1a5c768368a080e651cd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 28 Jan 2019 09:56:59 +0000 Subject: [PATCH] Fix idna and ipv6 literal handling in MatrixFederationAgent (#4487) Turns out that the library does a better job of parsing URIs than our reinvented wheel. Who knew. There are two things going on here. The first is that, unlike parse_server_name, URI.fromBytes will strip off square brackets from IPv6 literals, which means that it is valid input to ClientTLSOptionsFactory and HostnameEndpoint. The second is that we stay in `bytes` throughout (except for the argument to ClientTLSOptionsFactory), which avoids the weirdness of (sometimes) ending up with idna-encoded values being held in `unicode` variables. TBH it probably would have been ok but it made the tests fragile. --- changelog.d/4487.misc | 1 + .../federation/matrix_federation_agent.py | 23 +-- .../test_matrix_federation_agent.py | 181 +++++++++++++++++- 3 files changed, 193 insertions(+), 12 deletions(-) create mode 100644 changelog.d/4487.misc diff --git a/changelog.d/4487.misc b/changelog.d/4487.misc new file mode 100644 index 000000000..79de8eb3a --- /dev/null +++ b/changelog.d/4487.misc @@ -0,0 +1 @@ +Fix idna and ipv6 literal handling in MatrixFederationAgent diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 1788e9a34..9526f39cc 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -22,7 +22,6 @@ from twisted.web.client import URI, Agent, HTTPConnectionPool from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent -from synapse.http.endpoint import parse_server_name from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list from synapse.util.logcontext import make_deferred_yieldable @@ -87,9 +86,7 @@ class MatrixFederationAgent(object): from being sent). """ - parsed_uri = URI.fromBytes(uri) - server_name_bytes = parsed_uri.netloc - host, port = parse_server_name(server_name_bytes.decode("ascii")) + parsed_uri = URI.fromBytes(uri, defaultPort=-1) # XXX disabling TLS is really only supported here for the benefit of the # unit tests. We should make the UTs cope with TLS rather than having to make @@ -97,16 +94,20 @@ class MatrixFederationAgent(object): if self._tls_client_options_factory is None: tls_options = None else: - tls_options = self._tls_client_options_factory.get_options(host) + tls_options = self._tls_client_options_factory.get_options( + parsed_uri.host.decode("ascii") + ) - if port is not None: - target = (host, port) + if parsed_uri.port != -1: + # there was an explicit port in the URI + target = parsed_uri.host, parsed_uri.port else: - service_name = b"_matrix._tcp.%s" % (server_name_bytes, ) + service_name = b"_matrix._tcp.%s" % (parsed_uri.host, ) server_list = yield self._srv_resolver.resolve_service(service_name) if not server_list: - target = (host, 8448) - logger.debug("No SRV record for %s, using %s", host, target) + target = (parsed_uri.host, 8448) + logger.debug( + "No SRV record for %s, using %s", service_name, target) else: target = pick_server_from_list(server_list) @@ -117,7 +118,7 @@ class MatrixFederationAgent(object): headers = headers.copy() if not headers.hasHeader(b'host'): - headers.addRawHeader(b'host', server_name_bytes) + headers.addRawHeader(b'host', parsed_uri.netloc) class EndpointFactory(object): @staticmethod diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 261afb5f4..f144092a5 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -209,6 +209,95 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_get_ipv6_address(self): + """ + Test the behaviour when the server name contains an explicit IPv6 address + (with no port) + """ + + # the SRV lookup will return an empty list (XXX: why do we even do an SRV lookup?) + self.mock_resolver.resolve_service.side_effect = lambda _: [] + + # then there will be a getaddrinfo on the IP + self.reactor.lookups["::1"] = "::1" + + test_d = self._make_get_request(b"matrix://[::1]/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.::1", + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '::1') + self.assertEqual(port, 8448) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=None, + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'[::1]'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_get_ipv6_address_with_port(self): + """ + Test the behaviour when the server name contains an explicit IPv6 address + (with explicit port) + """ + + # there will be a getaddrinfo on the IP + self.reactor.lookups["::1"] = "::1" + + test_d = self._make_get_request(b"matrix://[::1]:80/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '::1') + self.assertEqual(port, 80) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=None, + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'[::1]:80'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + def test_get_hostname_no_srv(self): """ Test the behaviour when the server name has no port, and no SRV record @@ -258,7 +347,7 @@ class MatrixFederationAgentTests(TestCase): Test the behaviour when there is a single SRV record """ self.mock_resolver.resolve_service.side_effect = lambda _: [ - Server(host="srvtarget", port=8443) + Server(host=b"srvtarget", port=8443) ] self.reactor.lookups["srvtarget"] = "1.2.3.4" @@ -298,6 +387,96 @@ class MatrixFederationAgentTests(TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_idna_servername(self): + """test the behaviour when the server name has idna chars in""" + + self.mock_resolver.resolve_service.side_effect = lambda _: [] + + # hostnameendpoint does the lookup on the unicode value (getaddrinfo encodes + # it back to idna) + self.reactor.lookups[u"bücher.com"] = "1.2.3.4" + + # this is idna for bücher.com + test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.xn--bcher-kva.com", + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 8448) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=b'xn--bcher-kva.com', + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'xn--bcher-kva.com'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_idna_srv_target(self): + """test the behaviour when the target of a SRV record has idna chars""" + + self.mock_resolver.resolve_service.side_effect = lambda _: [ + Server(host=b"xn--trget-3qa.com", port=8443) # târget.com + ] + self.reactor.lookups[u"târget.com"] = "1.2.3.4" + + test_d = self._make_get_request(b"matrix://xn--bcher-kva.com/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix._tcp.xn--bcher-kva.com", + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, '1.2.3.4') + self.assertEqual(port, 8443) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, + expected_sni=b'xn--bcher-kva.com', + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b'GET') + self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'xn--bcher-kva.com'], + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + def _check_logcontext(context): current = LoggingContext.current_context()