forked from MirrorHub/synapse
Merge branch 'release-v0.99.0' into develop
This commit is contained in:
commit
9763a73af0
4 changed files with 86 additions and 52 deletions
1
changelog.d/4539.misc
Normal file
1
changelog.d/4539.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Update federation routing logic to check .well-known before SRV
|
1
changelog.d/4542.misc
Normal file
1
changelog.d/4542.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Improve performance of handling servers with invalid .well-known
|
|
@ -23,14 +23,17 @@ from zope.interface import implementer
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
|
from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
|
||||||
|
from twisted.internet.interfaces import IStreamClientEndpoint
|
||||||
from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent, readBody
|
from twisted.web.client import URI, Agent, HTTPConnectionPool, RedirectAgent, readBody
|
||||||
from twisted.web.http import stringToDatetime
|
from twisted.web.http import stringToDatetime
|
||||||
from twisted.web.http_headers import Headers
|
from twisted.web.http_headers import Headers
|
||||||
from twisted.web.iweb import IAgent
|
from twisted.web.iweb import IAgent
|
||||||
|
|
||||||
from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list
|
from synapse.http.federation.srv_resolver import SrvResolver, pick_server_from_list
|
||||||
|
from synapse.util import Clock
|
||||||
from synapse.util.caches.ttlcache import TTLCache
|
from synapse.util.caches.ttlcache import TTLCache
|
||||||
from synapse.util.logcontext import make_deferred_yieldable
|
from synapse.util.logcontext import make_deferred_yieldable
|
||||||
|
from synapse.util.metrics import Measure
|
||||||
|
|
||||||
# period to cache .well-known results for by default
|
# period to cache .well-known results for by default
|
||||||
WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600
|
WELL_KNOWN_DEFAULT_CACHE_PERIOD = 24 * 3600
|
||||||
|
@ -44,6 +47,8 @@ WELL_KNOWN_INVALID_CACHE_PERIOD = 1 * 3600
|
||||||
# cap for .well-known cache period
|
# cap for .well-known cache period
|
||||||
WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600
|
WELL_KNOWN_MAX_CACHE_PERIOD = 48 * 3600
|
||||||
|
|
||||||
|
# magic value to mark an invalid well-known
|
||||||
|
INVALID_WELL_KNOWN = object()
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
well_known_cache = TTLCache('well-known')
|
well_known_cache = TTLCache('well-known')
|
||||||
|
@ -78,6 +83,8 @@ class MatrixFederationAgent(object):
|
||||||
_well_known_cache=well_known_cache,
|
_well_known_cache=well_known_cache,
|
||||||
):
|
):
|
||||||
self._reactor = reactor
|
self._reactor = reactor
|
||||||
|
self._clock = Clock(reactor)
|
||||||
|
|
||||||
self._tls_client_options_factory = tls_client_options_factory
|
self._tls_client_options_factory = tls_client_options_factory
|
||||||
if _srv_resolver is None:
|
if _srv_resolver is None:
|
||||||
_srv_resolver = SrvResolver()
|
_srv_resolver = SrvResolver()
|
||||||
|
@ -98,6 +105,11 @@ class MatrixFederationAgent(object):
|
||||||
)
|
)
|
||||||
self._well_known_agent = _well_known_agent
|
self._well_known_agent = _well_known_agent
|
||||||
|
|
||||||
|
# our cache of .well-known lookup results, mapping from server name
|
||||||
|
# to delegated name. The values can be:
|
||||||
|
# `bytes`: a valid server-name
|
||||||
|
# `None`: there is no .well-known here
|
||||||
|
# INVALID_WELL_KNWOWN: the .well-known here is invalid
|
||||||
self._well_known_cache = _well_known_cache
|
self._well_known_cache = _well_known_cache
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
|
@ -152,12 +164,9 @@ class MatrixFederationAgent(object):
|
||||||
class EndpointFactory(object):
|
class EndpointFactory(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def endpointForURI(_uri):
|
def endpointForURI(_uri):
|
||||||
logger.info(
|
ep = LoggingHostnameEndpoint(
|
||||||
"Connecting to %s:%i",
|
self._reactor, res.target_host, res.target_port,
|
||||||
res.target_host.decode("ascii"),
|
|
||||||
res.target_port,
|
|
||||||
)
|
)
|
||||||
ep = HostnameEndpoint(self._reactor, res.target_host, res.target_port)
|
|
||||||
if tls_options is not None:
|
if tls_options is not None:
|
||||||
ep = wrapClientTLS(tls_options, ep)
|
ep = wrapClientTLS(tls_options, ep)
|
||||||
return ep
|
return ep
|
||||||
|
@ -210,11 +219,7 @@ class MatrixFederationAgent(object):
|
||||||
target_port=parsed_uri.port,
|
target_port=parsed_uri.port,
|
||||||
))
|
))
|
||||||
|
|
||||||
# try a SRV lookup
|
if lookup_well_known:
|
||||||
service_name = b"_matrix._tcp.%s" % (parsed_uri.host,)
|
|
||||||
server_list = yield self._srv_resolver.resolve_service(service_name)
|
|
||||||
|
|
||||||
if not server_list and lookup_well_known:
|
|
||||||
# try a .well-known lookup
|
# try a .well-known lookup
|
||||||
well_known_server = yield self._get_well_known(parsed_uri.host)
|
well_known_server = yield self._get_well_known(parsed_uri.host)
|
||||||
|
|
||||||
|
@ -250,6 +255,10 @@ class MatrixFederationAgent(object):
|
||||||
res = yield self._route_matrix_uri(new_uri, lookup_well_known=False)
|
res = yield self._route_matrix_uri(new_uri, lookup_well_known=False)
|
||||||
defer.returnValue(res)
|
defer.returnValue(res)
|
||||||
|
|
||||||
|
# try a SRV lookup
|
||||||
|
service_name = b"_matrix._tcp.%s" % (parsed_uri.host,)
|
||||||
|
server_list = yield self._srv_resolver.resolve_service(service_name)
|
||||||
|
|
||||||
if not server_list:
|
if not server_list:
|
||||||
target_host = parsed_uri.host
|
target_host = parsed_uri.host
|
||||||
port = 8448
|
port = 8448
|
||||||
|
@ -283,14 +292,35 @@ class MatrixFederationAgent(object):
|
||||||
None if there was no .well-known file.
|
None if there was no .well-known file.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
cached = self._well_known_cache[server_name]
|
result = self._well_known_cache[server_name]
|
||||||
defer.returnValue(cached)
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
# TODO: should we linearise so that we don't end up doing two .well-known
|
||||||
|
# requests for the same server in parallel?
|
||||||
|
with Measure(self._clock, "get_well_known"):
|
||||||
|
result, cache_period = yield self._do_get_well_known(server_name)
|
||||||
|
|
||||||
# TODO: should we linearise so that we don't end up doing two .well-known requests
|
if cache_period > 0:
|
||||||
# for the same server in parallel?
|
self._well_known_cache.set(server_name, result, cache_period)
|
||||||
|
|
||||||
|
if result == INVALID_WELL_KNOWN:
|
||||||
|
raise Exception("invalid .well-known on this server")
|
||||||
|
|
||||||
|
defer.returnValue(result)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _do_get_well_known(self, server_name):
|
||||||
|
"""Actually fetch and parse a .well-known, without checking the cache
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_name (bytes): name of the server, from the requested url
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deferred[Tuple[bytes|None|object],int]:
|
||||||
|
result, cache period, where result is one of:
|
||||||
|
- the new server name from the .well-known (as a `bytes`)
|
||||||
|
- None if there was no .well-known file.
|
||||||
|
- INVALID_WELL_KNOWN if the .well-known was invalid
|
||||||
|
"""
|
||||||
uri = b"https://%s/.well-known/matrix/server" % (server_name, )
|
uri = b"https://%s/.well-known/matrix/server" % (server_name, )
|
||||||
uri_str = uri.decode("ascii")
|
uri_str = uri.decode("ascii")
|
||||||
logger.info("Fetching %s", uri_str)
|
logger.info("Fetching %s", uri_str)
|
||||||
|
@ -308,9 +338,7 @@ class MatrixFederationAgent(object):
|
||||||
# after startup
|
# after startup
|
||||||
cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD
|
cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD
|
||||||
cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER)
|
cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER)
|
||||||
|
defer.returnValue((None, cache_period))
|
||||||
self._well_known_cache.set(server_name, None, cache_period)
|
|
||||||
defer.returnValue(None)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed_body = json.loads(body.decode('utf-8'))
|
parsed_body = json.loads(body.decode('utf-8'))
|
||||||
|
@ -320,7 +348,10 @@ class MatrixFederationAgent(object):
|
||||||
if "m.server" not in parsed_body:
|
if "m.server" not in parsed_body:
|
||||||
raise Exception("Missing key 'm.server'")
|
raise Exception("Missing key 'm.server'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception("invalid .well-known response from %s: %s" % (uri_str, e,))
|
logger.info("invalid .well-known response from %s: %s", uri_str, e)
|
||||||
|
cache_period = WELL_KNOWN_INVALID_CACHE_PERIOD
|
||||||
|
cache_period += random.uniform(0, WELL_KNOWN_DEFAULT_CACHE_PERIOD_JITTER)
|
||||||
|
defer.returnValue((INVALID_WELL_KNOWN, cache_period))
|
||||||
|
|
||||||
result = parsed_body["m.server"].encode("ascii")
|
result = parsed_body["m.server"].encode("ascii")
|
||||||
|
|
||||||
|
@ -336,10 +367,20 @@ class MatrixFederationAgent(object):
|
||||||
else:
|
else:
|
||||||
cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD)
|
cache_period = min(cache_period, WELL_KNOWN_MAX_CACHE_PERIOD)
|
||||||
|
|
||||||
if cache_period > 0:
|
defer.returnValue((result, cache_period))
|
||||||
self._well_known_cache.set(server_name, result, cache_period)
|
|
||||||
|
|
||||||
defer.returnValue(result)
|
|
||||||
|
@implementer(IStreamClientEndpoint)
|
||||||
|
class LoggingHostnameEndpoint(object):
|
||||||
|
"""A wrapper for HostnameEndpint which logs when it connects"""
|
||||||
|
def __init__(self, reactor, host, port, *args, **kwargs):
|
||||||
|
self.host = host
|
||||||
|
self.port = port
|
||||||
|
self.ep = HostnameEndpoint(reactor, host, port, *args, **kwargs)
|
||||||
|
|
||||||
|
def connect(self, protocol_factory):
|
||||||
|
logger.info("Connecting to %s:%i", self.host, self.port)
|
||||||
|
return self.ep.connect(protocol_factory)
|
||||||
|
|
||||||
|
|
||||||
def _cache_period_from_headers(headers, time_now=time.time):
|
def _cache_period_from_headers(headers, time_now=time.time):
|
||||||
|
|
|
@ -358,9 +358,8 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# Nothing happened yet
|
# Nothing happened yet
|
||||||
self.assertNoResult(test_d)
|
self.assertNoResult(test_d)
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
# No SRV record lookup yet
|
||||||
b"_matrix._tcp.testserv",
|
self.mock_resolver.resolve_service.assert_not_called()
|
||||||
)
|
|
||||||
|
|
||||||
# there should be an attempt to connect on port 443 for the .well-known
|
# there should be an attempt to connect on port 443 for the .well-known
|
||||||
clients = self.reactor.tcpClients
|
clients = self.reactor.tcpClients
|
||||||
|
@ -376,6 +375,11 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# .well-known request fails.
|
# .well-known request fails.
|
||||||
self.reactor.pump((0.4,))
|
self.reactor.pump((0.4,))
|
||||||
|
|
||||||
|
# now there should be a SRV lookup
|
||||||
|
self.mock_resolver.resolve_service.assert_called_once_with(
|
||||||
|
b"_matrix._tcp.testserv",
|
||||||
|
)
|
||||||
|
|
||||||
# we should fall back to a direct connection
|
# we should fall back to a direct connection
|
||||||
self.assertEqual(len(clients), 2)
|
self.assertEqual(len(clients), 2)
|
||||||
(host, port, client_factory, _timeout, _bindAddress) = clients[1]
|
(host, port, client_factory, _timeout, _bindAddress) = clients[1]
|
||||||
|
@ -403,8 +407,7 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
self.successResultOf(test_d)
|
self.successResultOf(test_d)
|
||||||
|
|
||||||
def test_get_well_known(self):
|
def test_get_well_known(self):
|
||||||
"""Test the behaviour when the server name has no port and no SRV record, but
|
"""Test the behaviour when the .well-known redirects elsewhere
|
||||||
the .well-known redirects elsewhere
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.side_effect = lambda _: []
|
self.mock_resolver.resolve_service.side_effect = lambda _: []
|
||||||
|
@ -416,11 +419,6 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# Nothing happened yet
|
# Nothing happened yet
|
||||||
self.assertNoResult(test_d)
|
self.assertNoResult(test_d)
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
|
||||||
b"_matrix._tcp.testserv",
|
|
||||||
)
|
|
||||||
self.mock_resolver.resolve_service.reset_mock()
|
|
||||||
|
|
||||||
# there should be an attempt to connect on port 443 for the .well-known
|
# there should be an attempt to connect on port 443 for the .well-known
|
||||||
clients = self.reactor.tcpClients
|
clients = self.reactor.tcpClients
|
||||||
self.assertEqual(len(clients), 1)
|
self.assertEqual(len(clients), 1)
|
||||||
|
@ -432,7 +430,7 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
client_factory, expected_sni=b"testserv", target_server=b"target-server",
|
client_factory, expected_sni=b"testserv", target_server=b"target-server",
|
||||||
)
|
)
|
||||||
|
|
||||||
# there should be another SRV lookup
|
# there should be a SRV lookup
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
self.mock_resolver.resolve_service.assert_called_once_with(
|
||||||
b"_matrix._tcp.target-server",
|
b"_matrix._tcp.target-server",
|
||||||
)
|
)
|
||||||
|
@ -483,11 +481,6 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# Nothing happened yet
|
# Nothing happened yet
|
||||||
self.assertNoResult(test_d)
|
self.assertNoResult(test_d)
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
|
||||||
b"_matrix._tcp.testserv",
|
|
||||||
)
|
|
||||||
self.mock_resolver.resolve_service.reset_mock()
|
|
||||||
|
|
||||||
# there should be an attempt to connect on port 443 for the .well-known
|
# there should be an attempt to connect on port 443 for the .well-known
|
||||||
clients = self.reactor.tcpClients
|
clients = self.reactor.tcpClients
|
||||||
self.assertEqual(len(clients), 1)
|
self.assertEqual(len(clients), 1)
|
||||||
|
@ -529,7 +522,7 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
|
|
||||||
self.reactor.pump((0.1, ))
|
self.reactor.pump((0.1, ))
|
||||||
|
|
||||||
# there should be another SRV lookup
|
# there should be a SRV lookup
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
self.mock_resolver.resolve_service.assert_called_once_with(
|
||||||
b"_matrix._tcp.target-server",
|
b"_matrix._tcp.target-server",
|
||||||
)
|
)
|
||||||
|
@ -581,6 +574,7 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# Nothing happened yet
|
# Nothing happened yet
|
||||||
self.assertNoResult(test_d)
|
self.assertNoResult(test_d)
|
||||||
|
|
||||||
|
# the request for a .well-known will have failed with a DNS lookup error.
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
self.mock_resolver.resolve_service.assert_called_once_with(
|
||||||
b"_matrix._tcp.testserv",
|
b"_matrix._tcp.testserv",
|
||||||
)
|
)
|
||||||
|
@ -613,11 +607,9 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
self.successResultOf(test_d)
|
self.successResultOf(test_d)
|
||||||
|
|
||||||
def test_get_well_known_srv(self):
|
def test_get_well_known_srv(self):
|
||||||
"""Test the behaviour when the server name has no port and no SRV record, but
|
"""Test the behaviour when the .well-known redirects to a place where there
|
||||||
the .well-known redirects to a place where there is a SRV.
|
is a SRV.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.side_effect = lambda _: []
|
|
||||||
self.reactor.lookups["testserv"] = "1.2.3.4"
|
self.reactor.lookups["testserv"] = "1.2.3.4"
|
||||||
self.reactor.lookups["srvtarget"] = "5.6.7.8"
|
self.reactor.lookups["srvtarget"] = "5.6.7.8"
|
||||||
|
|
||||||
|
@ -626,11 +618,6 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# Nothing happened yet
|
# Nothing happened yet
|
||||||
self.assertNoResult(test_d)
|
self.assertNoResult(test_d)
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
|
||||||
b"_matrix._tcp.testserv",
|
|
||||||
)
|
|
||||||
self.mock_resolver.resolve_service.reset_mock()
|
|
||||||
|
|
||||||
# there should be an attempt to connect on port 443 for the .well-known
|
# there should be an attempt to connect on port 443 for the .well-known
|
||||||
clients = self.reactor.tcpClients
|
clients = self.reactor.tcpClients
|
||||||
self.assertEqual(len(clients), 1)
|
self.assertEqual(len(clients), 1)
|
||||||
|
@ -646,7 +633,7 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
client_factory, expected_sni=b"testserv", target_server=b"target-server",
|
client_factory, expected_sni=b"testserv", target_server=b"target-server",
|
||||||
)
|
)
|
||||||
|
|
||||||
# there should be another SRV lookup
|
# there should be a SRV lookup
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
self.mock_resolver.resolve_service.assert_called_once_with(
|
||||||
b"_matrix._tcp.target-server",
|
b"_matrix._tcp.target-server",
|
||||||
)
|
)
|
||||||
|
@ -691,9 +678,8 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# Nothing happened yet
|
# Nothing happened yet
|
||||||
self.assertNoResult(test_d)
|
self.assertNoResult(test_d)
|
||||||
|
|
||||||
self.mock_resolver.resolve_service.assert_called_once_with(
|
# No SRV record lookup yet
|
||||||
b"_matrix._tcp.xn--bcher-kva.com",
|
self.mock_resolver.resolve_service.assert_not_called()
|
||||||
)
|
|
||||||
|
|
||||||
# there should be an attempt to connect on port 443 for the .well-known
|
# there should be an attempt to connect on port 443 for the .well-known
|
||||||
clients = self.reactor.tcpClients
|
clients = self.reactor.tcpClients
|
||||||
|
@ -709,6 +695,11 @@ class MatrixFederationAgentTests(TestCase):
|
||||||
# .well-known request fails.
|
# .well-known request fails.
|
||||||
self.reactor.pump((0.4,))
|
self.reactor.pump((0.4,))
|
||||||
|
|
||||||
|
# now there should have been a SRV lookup
|
||||||
|
self.mock_resolver.resolve_service.assert_called_once_with(
|
||||||
|
b"_matrix._tcp.xn--bcher-kva.com",
|
||||||
|
)
|
||||||
|
|
||||||
# We should fall back to port 8448
|
# We should fall back to port 8448
|
||||||
clients = self.reactor.tcpClients
|
clients = self.reactor.tcpClients
|
||||||
self.assertEqual(len(clients), 2)
|
self.assertEqual(len(clients), 2)
|
||||||
|
|
Loading…
Reference in a new issue