2021-09-16 18:01:14 +02:00
|
|
|
# Copyright 2020-2021 The Matrix.org Foundation C.I.C.
|
2020-10-30 11:55:24 +01:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
import logging
|
|
|
|
import os
|
2021-03-26 17:49:46 +01:00
|
|
|
from typing import Optional, Tuple
|
2020-10-30 11:55:24 +01:00
|
|
|
|
2022-11-16 16:25:24 +01:00
|
|
|
from twisted.internet.interfaces import IOpenSSLServerConnectionCreator
|
2020-10-30 11:55:24 +01:00
|
|
|
from twisted.internet.protocol import Factory
|
2022-11-16 16:25:24 +01:00
|
|
|
from twisted.protocols.tls import TLSMemoryBIOFactory, TLSMemoryBIOProtocol
|
2023-02-06 15:55:00 +01:00
|
|
|
from twisted.test.proto_helpers import MemoryReactor
|
2020-10-30 11:55:24 +01:00
|
|
|
from twisted.web.http import HTTPChannel
|
|
|
|
from twisted.web.server import Request
|
|
|
|
|
|
|
|
from synapse.rest import admin
|
2021-08-17 13:57:58 +02:00
|
|
|
from synapse.rest.client import login
|
2020-10-30 11:55:24 +01:00
|
|
|
from synapse.server import HomeServer
|
2023-02-06 15:55:00 +01:00
|
|
|
from synapse.util import Clock
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
from tests.http import TestServerTLSConnectionFactory, get_test_ca_cert_file
|
|
|
|
from tests.replication._base import BaseMultiWorkerStreamTestCase
|
2020-11-14 00:11:43 +01:00
|
|
|
from tests.server import FakeChannel, FakeSite, FakeTransport, make_request
|
2021-09-16 18:01:14 +02:00
|
|
|
from tests.test_utils import SMALL_PNG
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2021-07-13 12:52:58 +02:00
|
|
|
test_server_connection_factory: Optional[TestServerTLSConnectionFactory] = None
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
|
|
|
|
class MediaRepoShardTestCase(BaseMultiWorkerStreamTestCase):
|
|
|
|
"""Checks running multiple media repos work correctly."""
|
|
|
|
|
|
|
|
servlets = [
|
|
|
|
admin.register_servlets_for_client_rest_resource,
|
|
|
|
login.register_servlets,
|
|
|
|
]
|
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
|
2020-10-30 11:55:24 +01:00
|
|
|
self.user_id = self.register_user("user", "pass")
|
|
|
|
self.access_token = self.login("user", "pass")
|
|
|
|
|
2020-12-09 19:56:06 +01:00
|
|
|
self.reactor.lookups["example.com"] = "1.2.3.4"
|
2020-10-30 11:55:24 +01:00
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def default_config(self) -> dict:
|
2020-10-30 11:55:24 +01:00
|
|
|
conf = super().default_config()
|
|
|
|
conf["federation_custom_ca_list"] = [get_test_ca_cert_file()]
|
|
|
|
return conf
|
|
|
|
|
|
|
|
def _get_media_req(
|
|
|
|
self, hs: HomeServer, target: str, media_id: str
|
|
|
|
) -> Tuple[FakeChannel, Request]:
|
|
|
|
"""Request some remote media from the given HS by calling the download
|
|
|
|
API.
|
|
|
|
|
|
|
|
This then triggers an outbound request from the HS to the target.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The channel for the *client* request and the *outbound* request for
|
|
|
|
the media which the caller should respond to.
|
|
|
|
"""
|
2020-11-14 00:11:43 +01:00
|
|
|
resource = hs.get_media_repository_resource().children[b"download"]
|
2020-12-15 15:44:04 +01:00
|
|
|
channel = make_request(
|
2020-11-14 00:11:43 +01:00
|
|
|
self.reactor,
|
2021-09-24 12:01:25 +02:00
|
|
|
FakeSite(resource, self.reactor),
|
2020-10-30 11:55:24 +01:00
|
|
|
"GET",
|
2021-07-13 12:43:15 +02:00
|
|
|
f"/{target}/{media_id}",
|
2020-10-30 11:55:24 +01:00
|
|
|
shorthand=False,
|
|
|
|
access_token=self.access_token,
|
2020-11-15 23:47:54 +01:00
|
|
|
await_result=False,
|
2020-10-30 11:55:24 +01:00
|
|
|
)
|
|
|
|
self.pump()
|
|
|
|
|
|
|
|
clients = self.reactor.tcpClients
|
|
|
|
self.assertGreaterEqual(len(clients), 1)
|
|
|
|
(host, port, client_factory, _timeout, _bindAddress) = clients.pop()
|
|
|
|
|
|
|
|
# build the test server
|
|
|
|
server_tls_protocol = _build_test_server(get_connection_factory())
|
|
|
|
|
|
|
|
# now, tell the client protocol factory to build the client protocol (it will be a
|
|
|
|
# _WrappingProtocol, around a TLSMemoryBIOProtocol, around an
|
|
|
|
# HTTP11ClientProtocol) and wire the output of said protocol up to the server via
|
|
|
|
# a FakeTransport.
|
|
|
|
#
|
|
|
|
# Normally this would be done by the TCP socket code in Twisted, but we are
|
|
|
|
# stubbing that out here.
|
|
|
|
client_protocol = client_factory.buildProtocol(None)
|
|
|
|
client_protocol.makeConnection(
|
|
|
|
FakeTransport(server_tls_protocol, self.reactor, client_protocol)
|
|
|
|
)
|
|
|
|
|
|
|
|
# tell the server tls protocol to send its stuff back to the client, too
|
|
|
|
server_tls_protocol.makeConnection(
|
|
|
|
FakeTransport(client_protocol, self.reactor, server_tls_protocol)
|
|
|
|
)
|
|
|
|
|
|
|
|
# fish the test server back out of the server-side TLS protocol.
|
2022-11-16 16:25:24 +01:00
|
|
|
http_server: HTTPChannel = server_tls_protocol.wrappedProtocol # type: ignore[assignment]
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
# give the reactor a pump to get the TLS juices flowing.
|
|
|
|
self.reactor.pump((0.1,))
|
|
|
|
|
|
|
|
self.assertEqual(len(http_server.requests), 1)
|
|
|
|
request = http_server.requests[0]
|
|
|
|
|
|
|
|
self.assertEqual(request.method, b"GET")
|
|
|
|
self.assertEqual(
|
|
|
|
request.path,
|
2023-08-15 14:11:20 +02:00
|
|
|
f"/_matrix/media/r0/download/{target}/{media_id}".encode(),
|
2020-10-30 11:55:24 +01:00
|
|
|
)
|
|
|
|
self.assertEqual(
|
|
|
|
request.requestHeaders.getRawHeaders(b"host"), [target.encode("utf-8")]
|
|
|
|
)
|
|
|
|
|
|
|
|
return channel, request
|
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def test_basic(self) -> None:
|
2020-10-30 11:55:24 +01:00
|
|
|
"""Test basic fetching of remote media from a single worker."""
|
|
|
|
hs1 = self.make_worker_hs("synapse.app.generic_worker")
|
|
|
|
|
|
|
|
channel, request = self._get_media_req(hs1, "example.com:443", "ABC123")
|
|
|
|
|
|
|
|
request.setResponseCode(200)
|
|
|
|
request.responseHeaders.setRawHeaders(b"Content-Type", [b"text/plain"])
|
|
|
|
request.write(b"Hello!")
|
|
|
|
request.finish()
|
|
|
|
|
|
|
|
self.pump(0.1)
|
|
|
|
|
|
|
|
self.assertEqual(channel.code, 200)
|
|
|
|
self.assertEqual(channel.result["body"], b"Hello!")
|
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def test_download_simple_file_race(self) -> None:
|
2020-10-30 11:55:24 +01:00
|
|
|
"""Test that fetching remote media from two different processes at the
|
|
|
|
same time works.
|
|
|
|
"""
|
|
|
|
hs1 = self.make_worker_hs("synapse.app.generic_worker")
|
|
|
|
hs2 = self.make_worker_hs("synapse.app.generic_worker")
|
|
|
|
|
|
|
|
start_count = self._count_remote_media()
|
|
|
|
|
|
|
|
# Make two requests without responding to the outbound media requests.
|
|
|
|
channel1, request1 = self._get_media_req(hs1, "example.com:443", "ABC123")
|
|
|
|
channel2, request2 = self._get_media_req(hs2, "example.com:443", "ABC123")
|
|
|
|
|
|
|
|
# Respond to the first outbound media request and check that the client
|
|
|
|
# request is successful
|
|
|
|
request1.setResponseCode(200)
|
|
|
|
request1.responseHeaders.setRawHeaders(b"Content-Type", [b"text/plain"])
|
|
|
|
request1.write(b"Hello!")
|
|
|
|
request1.finish()
|
|
|
|
|
|
|
|
self.pump(0.1)
|
|
|
|
|
|
|
|
self.assertEqual(channel1.code, 200, channel1.result["body"])
|
|
|
|
self.assertEqual(channel1.result["body"], b"Hello!")
|
|
|
|
|
|
|
|
# Now respond to the second with the same content.
|
|
|
|
request2.setResponseCode(200)
|
|
|
|
request2.responseHeaders.setRawHeaders(b"Content-Type", [b"text/plain"])
|
|
|
|
request2.write(b"Hello!")
|
|
|
|
request2.finish()
|
|
|
|
|
|
|
|
self.pump(0.1)
|
|
|
|
|
|
|
|
self.assertEqual(channel2.code, 200, channel2.result["body"])
|
|
|
|
self.assertEqual(channel2.result["body"], b"Hello!")
|
|
|
|
|
|
|
|
# We expect only one new file to have been persisted.
|
|
|
|
self.assertEqual(start_count + 1, self._count_remote_media())
|
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def test_download_image_race(self) -> None:
|
2020-10-30 11:55:24 +01:00
|
|
|
"""Test that fetching remote *images* from two different processes at
|
|
|
|
the same time works.
|
|
|
|
|
|
|
|
This checks that races generating thumbnails are handled correctly.
|
|
|
|
"""
|
|
|
|
hs1 = self.make_worker_hs("synapse.app.generic_worker")
|
|
|
|
hs2 = self.make_worker_hs("synapse.app.generic_worker")
|
|
|
|
|
|
|
|
start_count = self._count_remote_thumbnails()
|
|
|
|
|
|
|
|
channel1, request1 = self._get_media_req(hs1, "example.com:443", "PIC1")
|
|
|
|
channel2, request2 = self._get_media_req(hs2, "example.com:443", "PIC1")
|
|
|
|
|
|
|
|
request1.setResponseCode(200)
|
|
|
|
request1.responseHeaders.setRawHeaders(b"Content-Type", [b"image/png"])
|
2021-09-16 18:01:14 +02:00
|
|
|
request1.write(SMALL_PNG)
|
2020-10-30 11:55:24 +01:00
|
|
|
request1.finish()
|
|
|
|
|
|
|
|
self.pump(0.1)
|
|
|
|
|
|
|
|
self.assertEqual(channel1.code, 200, channel1.result["body"])
|
2021-09-16 18:01:14 +02:00
|
|
|
self.assertEqual(channel1.result["body"], SMALL_PNG)
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
request2.setResponseCode(200)
|
|
|
|
request2.responseHeaders.setRawHeaders(b"Content-Type", [b"image/png"])
|
2021-09-16 18:01:14 +02:00
|
|
|
request2.write(SMALL_PNG)
|
2020-10-30 11:55:24 +01:00
|
|
|
request2.finish()
|
|
|
|
|
|
|
|
self.pump(0.1)
|
|
|
|
|
|
|
|
self.assertEqual(channel2.code, 200, channel2.result["body"])
|
2021-09-16 18:01:14 +02:00
|
|
|
self.assertEqual(channel2.result["body"], SMALL_PNG)
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
# We expect only three new thumbnails to have been persisted.
|
|
|
|
self.assertEqual(start_count + 3, self._count_remote_thumbnails())
|
|
|
|
|
|
|
|
def _count_remote_media(self) -> int:
|
|
|
|
"""Count the number of files in our remote media directory."""
|
|
|
|
path = os.path.join(
|
|
|
|
self.hs.get_media_repository().primary_base_path, "remote_content"
|
|
|
|
)
|
|
|
|
return sum(len(files) for _, _, files in os.walk(path))
|
|
|
|
|
|
|
|
def _count_remote_thumbnails(self) -> int:
|
|
|
|
"""Count the number of files in our remote thumbnails directory."""
|
|
|
|
path = os.path.join(
|
|
|
|
self.hs.get_media_repository().primary_base_path, "remote_thumbnail"
|
|
|
|
)
|
|
|
|
return sum(len(files) for _, _, files in os.walk(path))
|
|
|
|
|
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def get_connection_factory() -> TestServerTLSConnectionFactory:
|
2020-10-30 11:55:24 +01:00
|
|
|
# this needs to happen once, but not until we are ready to run the first test
|
|
|
|
global test_server_connection_factory
|
|
|
|
if test_server_connection_factory is None:
|
|
|
|
test_server_connection_factory = TestServerTLSConnectionFactory(
|
|
|
|
sanlist=[b"DNS:example.com"]
|
|
|
|
)
|
|
|
|
return test_server_connection_factory
|
|
|
|
|
|
|
|
|
2022-11-16 16:25:24 +01:00
|
|
|
def _build_test_server(
|
|
|
|
connection_creator: IOpenSSLServerConnectionCreator,
|
|
|
|
) -> TLSMemoryBIOProtocol:
|
2020-10-30 11:55:24 +01:00
|
|
|
"""Construct a test server
|
|
|
|
|
|
|
|
This builds an HTTP channel, wrapped with a TLSMemoryBIOProtocol
|
|
|
|
|
|
|
|
Args:
|
2022-11-16 16:25:24 +01:00
|
|
|
connection_creator: thing to build SSL connections
|
2020-10-30 11:55:24 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
TLSMemoryBIOProtocol
|
|
|
|
"""
|
|
|
|
server_factory = Factory.forProtocol(HTTPChannel)
|
|
|
|
# Request.finish expects the factory to have a 'log' method.
|
|
|
|
server_factory.log = _log_request
|
|
|
|
|
|
|
|
server_tls_factory = TLSMemoryBIOFactory(
|
|
|
|
connection_creator, isClient=False, wrappedFactory=server_factory
|
|
|
|
)
|
|
|
|
|
|
|
|
return server_tls_factory.buildProtocol(None)
|
|
|
|
|
|
|
|
|
2023-02-06 15:55:00 +01:00
|
|
|
def _log_request(request: Request) -> None:
|
2020-10-30 11:55:24 +01:00
|
|
|
"""Implements Factory.log, which is expected by Request.finish"""
|
|
|
|
logger.info("Completed request %s", request)
|