0
0
Fork 1
mirror of https://mau.dev/maunium/synapse.git synced 2024-06-17 10:08:28 +02:00

Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. (#16759)

The idea here being that the directory server shouldn't advertise rooms
to a requesting server is the requesting server would not be allowed to
join or participate in the room.

<!--
Fixes: # <!-- -->
<!--
Supersedes: # <!-- -->
<!--
Follows: # <!-- -->
<!--
Part of: # <!-- -->
Base: `develop` <!-- git-stack-base-branch:develop -->

<!--
This pull request is commit-by-commit review friendly. <!-- -->
<!--
This pull request is intended for commit-by-commit review. <!-- -->

Original commit schedule, with full messages:

<ol>
<li>

Pass `from_federation_origin` down into room list retrieval code 

</li>
<li>

Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

fixup! Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

Cap the number of /publicRooms entries to 100 

</li>
<li>

Simplify code now that you can't request unlimited rooms 

</li>
<li>

Filter out rooms from federated requests that don't have the correct ACL

</li>
<li>

Request a handful more when filtering ACLs so that we can try to avoid
shortchanging the requester

</li>
</ol>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
This commit is contained in:
reivilibre 2024-01-08 17:24:20 +00:00 committed by GitHub
parent 5d3850b038
commit a83a337c4d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 222 additions and 53 deletions

View file

@ -0,0 +1 @@
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.

View file

@ -154,7 +154,10 @@ class PublicRoomList(BaseFederationServlet):
limit = None limit = None
data = await self.handler.get_local_public_room_list( data = await self.handler.get_local_public_room_list(
limit, since_token, network_tuple=network_tuple, from_federation=True limit,
since_token,
network_tuple=network_tuple,
from_federation_origin=origin,
) )
return 200, data return 200, data
@ -195,7 +198,7 @@ class PublicRoomList(BaseFederationServlet):
since_token=since_token, since_token=since_token,
search_filter=search_filter, search_filter=search_filter,
network_tuple=network_tuple, network_tuple=network_tuple,
from_federation=True, from_federation_origin=origin,
) )
return 200, data return 200, data

View file

@ -19,7 +19,7 @@
# #
import logging import logging
from typing import TYPE_CHECKING, Any, Optional, Tuple from typing import TYPE_CHECKING, Any, List, Optional, Tuple
import attr import attr
import msgpack import msgpack
@ -54,6 +54,9 @@ REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
# This is used to indicate we should only return rooms published to the main list. # This is used to indicate we should only return rooms published to the main list.
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
# Maximum number of local public rooms returned over the CS or SS API
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
class RoomListHandler: class RoomListHandler:
def __init__(self, hs: "HomeServer"): def __init__(self, hs: "HomeServer"):
@ -74,7 +77,7 @@ class RoomListHandler:
since_token: Optional[str] = None, since_token: Optional[str] = None,
search_filter: Optional[dict] = None, search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID, network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False, from_federation_origin: Optional[str] = None,
) -> JsonDict: ) -> JsonDict:
"""Generate a local public room list. """Generate a local public room list.
@ -89,7 +92,8 @@ class RoomListHandler:
This can be (None, None) to indicate the main list, or a particular This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one. appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists. Setting to None returns all public rooms across all lists.
from_federation: true iff the request comes from the federation API from_federation_origin: the server name of the requester, or None
if the request is not from federation.
""" """
if not self.enable_room_list_search: if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0} return {"chunk": [], "total_room_count_estimate": 0}
@ -102,36 +106,43 @@ class RoomListHandler:
network_tuple, network_tuple,
) )
if search_filter: capped_limit: int = (
MAX_PUBLIC_ROOMS_IN_RESPONSE
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
else limit
)
if search_filter or from_federation_origin is not None:
# We explicitly don't bother caching searches or requests for # We explicitly don't bother caching searches or requests for
# appservice specific lists. # appservice specific lists.
logger.info("Bypassing cache as search request.") # We also don't bother caching requests from federated homeservers.
logger.debug("Bypassing cache as search or federation request.")
return await self._get_public_room_list( return await self._get_public_room_list(
limit, capped_limit,
since_token, since_token,
search_filter, search_filter,
network_tuple=network_tuple, network_tuple=network_tuple,
from_federation=from_federation, from_federation_origin=from_federation_origin,
) )
key = (limit, since_token, network_tuple) key = (capped_limit, since_token, network_tuple)
return await self.response_cache.wrap( return await self.response_cache.wrap(
key, key,
self._get_public_room_list, self._get_public_room_list,
limit, capped_limit,
since_token, since_token,
network_tuple=network_tuple, network_tuple=network_tuple,
from_federation=from_federation, from_federation_origin=from_federation_origin,
) )
async def _get_public_room_list( async def _get_public_room_list(
self, self,
limit: Optional[int] = None, limit: int,
since_token: Optional[str] = None, since_token: Optional[str] = None,
search_filter: Optional[dict] = None, search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID, network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False, from_federation_origin: Optional[str] = None,
) -> JsonDict: ) -> JsonDict:
"""Generate a public room list. """Generate a public room list.
Args: Args:
@ -142,8 +153,8 @@ class RoomListHandler:
This can be (None, None) to indicate the main list, or a particular This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one. appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists. Setting to None returns all public rooms across all lists.
from_federation: Whether this request originated from a from_federation_origin: the server name of the requester, or None
federating server or a client. Used for room filtering. if the request is not from federation.
""" """
# Pagination tokens work by storing the room ID sent in the last batch, # Pagination tokens work by storing the room ID sent in the last batch,
@ -165,8 +176,16 @@ class RoomListHandler:
forwards = True forwards = True
has_batch_token = False has_batch_token = False
# we request one more than wanted to see if there are more pages to come if from_federation_origin is None:
probing_limit = limit + 1 if limit is not None else None # Client-Server API:
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1
else:
# Federation API:
# we request a handful more in case any get filtered out by ACLs
# as a best easy effort attempt to return the full number of entries
# specified by `limit`.
probing_limit = limit + 10
results = await self.store.get_largest_public_rooms( results = await self.store.get_largest_public_rooms(
network_tuple, network_tuple,
@ -174,7 +193,7 @@ class RoomListHandler:
probing_limit, probing_limit,
bounds=bounds, bounds=bounds,
forwards=forwards, forwards=forwards,
ignore_non_federatable=from_federation, ignore_non_federatable=from_federation_origin is not None,
) )
def build_room_entry(room: LargestRoomStats) -> JsonDict: def build_room_entry(room: LargestRoomStats) -> JsonDict:
@ -195,59 +214,117 @@ class RoomListHandler:
# Filter out Nones rather omit the field altogether # Filter out Nones rather omit the field altogether
return {k: v for k, v in entry.items() if v is not None} return {k: v for k, v in entry.items() if v is not None}
response: JsonDict = {} # Build a list of up to `limit` entries.
num_results = len(results) room_entries: List[JsonDict] = []
if limit is not None: rooms_iterator = results if forwards else reversed(results)
more_to_come = num_results == probing_limit
# Depending on direction we trim either the front or back. # Track the first and last 'considered' rooms so that we can provide correct
if forwards: # next_batch/prev_batch tokens.
results = results[:limit] # This is needed because the loop might finish early when
# `len(room_entries) >= limit` and we might be left with rooms we didn't
# 'consider' (iterate over) and we should save those rooms for the next
# batch.
first_considered_room: Optional[LargestRoomStats] = None
last_considered_room: Optional[LargestRoomStats] = None
cut_off_due_to_limit: bool = False
for room_result in rooms_iterator:
if len(room_entries) >= limit:
cut_off_due_to_limit = True
break
if first_considered_room is None:
first_considered_room = room_result
last_considered_room = room_result
if from_federation_origin is not None:
# If this is a federated request, apply server ACLs if the room has any set
acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(
room_result.room_id
)
)
if acl_evaluator is not None:
if not acl_evaluator.server_matches_acl_event(
from_federation_origin
):
# the requesting server is ACL blocked by the room,
# don't show in directory
continue
room_entries.append(build_room_entry(room_result))
if not forwards:
# If we are paginating backwards, we still return the chunk in
# biggest-first order, so reverse again.
room_entries.reverse()
# Swap the order of first/last considered rooms.
first_considered_room, last_considered_room = (
last_considered_room,
first_considered_room,
)
response: JsonDict = {
"chunk": room_entries,
}
num_results = len(results)
more_to_come_from_database = num_results == probing_limit
if forwards and has_batch_token:
# If there was a token given then we assume that there
# must be previous results, even if there were no results in this batch.
if first_considered_room is not None:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
else: else:
results = results[-limit:] # If we didn't find any results this time,
else: # we don't have an actual room ID to put in the token.
more_to_come = False # But since `first_considered_room` is None, we know that we have
# reached the end of the results.
# So we can use a token of (0, empty room ID) to paginate from the end
# next time.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=0,
last_room_id="",
direction_is_forward=False,
).to_token()
if num_results > 0: if num_results > 0:
final_entry = results[-1] assert first_considered_room is not None
initial_entry = results[0] assert last_considered_room is not None
if forwards: if forwards:
if has_batch_token: if more_to_come_from_database or cut_off_due_to_limit:
# If there was a token given then we assume that there
# must be previous results.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
direction_is_forward=False,
).to_token()
if more_to_come:
response["next_batch"] = RoomListNextBatch( response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members, last_joined_members=last_considered_room.joined_members,
last_room_id=final_entry.room_id, last_room_id=last_considered_room.room_id,
direction_is_forward=True, direction_is_forward=True,
).to_token() ).to_token()
else: else: # backwards
if has_batch_token: if has_batch_token:
response["next_batch"] = RoomListNextBatch( response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members, last_joined_members=last_considered_room.joined_members,
last_room_id=final_entry.room_id, last_room_id=last_considered_room.room_id,
direction_is_forward=True, direction_is_forward=True,
).to_token() ).to_token()
if more_to_come: if more_to_come_from_database or cut_off_due_to_limit:
response["prev_batch"] = RoomListNextBatch( response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members, last_joined_members=first_considered_room.joined_members,
last_room_id=initial_entry.room_id, last_room_id=first_considered_room.room_id,
direction_is_forward=False, direction_is_forward=False,
).to_token() ).to_token()
response["chunk"] = [build_room_entry(r) for r in results] # We can't efficiently count the total number of rooms that are not
# blocked by ACLs, but this is just an estimate so that should be
# good enough.
response["total_room_count_estimate"] = await self.store.count_public_rooms( response["total_room_count_estimate"] = await self.store.count_public_rooms(
network_tuple, network_tuple,
ignore_non_federatable=from_federation, ignore_non_federatable=from_federation_origin is not None,
search_filter=search_filter, search_filter=search_filter,
) )

View file

@ -0,0 +1,88 @@
from http import HTTPStatus
from typing import Optional, Set
from synapse.rest import admin
from synapse.rest.client import directory, login, room
from synapse.types import JsonDict
from tests import unittest
class RoomListHandlerTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
room.register_servlets,
directory.register_servlets,
]
def _create_published_room(
self, tok: str, extra_content: Optional[JsonDict] = None
) -> str:
room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content)
channel = self.make_request(
"PUT",
f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}",
content={
"visibility": "public",
},
)
assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}"
return room_id
def test_acls_applied_to_room_directory_results(self) -> None:
"""
Creates 3 rooms. Room 2 has an ACL that only permits the homeservers
`test` and `test2` to access it.
We then simulate `test2` and `test3` requesting the room directory and assert
that `test3` does not see room 2, but `test2` sees all 3.
"""
self.register_user("u1", "p1")
u1tok = self.login("u1", "p1")
room1 = self._create_published_room(u1tok)
room2 = self._create_published_room(
u1tok,
extra_content={
"initial_state": [
{
"type": "m.room.server_acl",
"content": {
"allow": ["test", "test2"],
},
}
]
},
)
room3 = self._create_published_room(u1tok)
room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test2"
)
)
room_ids_in_test2_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}
room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test3"
)
)
room_ids_in_test3_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}
self.assertEqual(
room_ids_in_test2_list,
{room1, room2, room3},
"test2 should be able to see all 3 rooms",
)
self.assertEqual(
room_ids_in_test3_list,
{room1, room3},
"test3 should be able to see only 2 rooms",
)