0
0
Fork 1
mirror of https://mau.dev/maunium/synapse.git synced 2024-05-18 03:23:45 +02:00

Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. (#16759)

The idea here being that the directory server shouldn't advertise rooms
to a requesting server is the requesting server would not be allowed to
join or participate in the room.

<!--
Fixes: # <!-- -->
<!--
Supersedes: # <!-- -->
<!--
Follows: # <!-- -->
<!--
Part of: # <!-- -->
Base: `develop` <!-- git-stack-base-branch:develop -->

<!--
This pull request is commit-by-commit review friendly. <!-- -->
<!--
This pull request is intended for commit-by-commit review. <!-- -->

Original commit schedule, with full messages:

<ol>
<li>

Pass `from_federation_origin` down into room list retrieval code 

</li>
<li>

Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

fixup! Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

Cap the number of /publicRooms entries to 100 

</li>
<li>

Simplify code now that you can't request unlimited rooms 

</li>
<li>

Filter out rooms from federated requests that don't have the correct ACL

</li>
<li>

Request a handful more when filtering ACLs so that we can try to avoid
shortchanging the requester

</li>
</ol>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
This commit is contained in:
reivilibre 2024-01-08 17:24:20 +00:00 committed by GitHub
parent 5d3850b038
commit a83a337c4d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 222 additions and 53 deletions

View file

@ -0,0 +1 @@
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.

View file

@ -154,7 +154,10 @@ class PublicRoomList(BaseFederationServlet):
limit = None
data = await self.handler.get_local_public_room_list(
limit, since_token, network_tuple=network_tuple, from_federation=True
limit,
since_token,
network_tuple=network_tuple,
from_federation_origin=origin,
)
return 200, data
@ -195,7 +198,7 @@ class PublicRoomList(BaseFederationServlet):
since_token=since_token,
search_filter=search_filter,
network_tuple=network_tuple,
from_federation=True,
from_federation_origin=origin,
)
return 200, data

View file

@ -19,7 +19,7 @@
#
import logging
from typing import TYPE_CHECKING, Any, Optional, Tuple
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
import attr
import msgpack
@ -54,6 +54,9 @@ REMOTE_ROOM_LIST_POLL_INTERVAL = 60 * 1000
# This is used to indicate we should only return rooms published to the main list.
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
# Maximum number of local public rooms returned over the CS or SS API
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
class RoomListHandler:
def __init__(self, hs: "HomeServer"):
@ -74,7 +77,7 @@ class RoomListHandler:
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a local public room list.
@ -89,7 +92,8 @@ class RoomListHandler:
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation: true iff the request comes from the federation API
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
@ -102,36 +106,43 @@ class RoomListHandler:
network_tuple,
)
if search_filter:
capped_limit: int = (
MAX_PUBLIC_ROOMS_IN_RESPONSE
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
else limit
)
if search_filter or from_federation_origin is not None:
# We explicitly don't bother caching searches or requests for
# appservice specific lists.
logger.info("Bypassing cache as search request.")
# We also don't bother caching requests from federated homeservers.
logger.debug("Bypassing cache as search or federation request.")
return await self._get_public_room_list(
limit,
capped_limit,
since_token,
search_filter,
network_tuple=network_tuple,
from_federation=from_federation,
from_federation_origin=from_federation_origin,
)
key = (limit, since_token, network_tuple)
key = (capped_limit, since_token, network_tuple)
return await self.response_cache.wrap(
key,
self._get_public_room_list,
limit,
capped_limit,
since_token,
network_tuple=network_tuple,
from_federation=from_federation,
from_federation_origin=from_federation_origin,
)
async def _get_public_room_list(
self,
limit: Optional[int] = None,
limit: int,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a public room list.
Args:
@ -142,8 +153,8 @@ class RoomListHandler:
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation: Whether this request originated from a
federating server or a client. Used for room filtering.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
# Pagination tokens work by storing the room ID sent in the last batch,
@ -165,8 +176,16 @@ class RoomListHandler:
forwards = True
has_batch_token = False
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1 if limit is not None else None
if from_federation_origin is None:
# Client-Server API:
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1
else:
# Federation API:
# we request a handful more in case any get filtered out by ACLs
# as a best easy effort attempt to return the full number of entries
# specified by `limit`.
probing_limit = limit + 10
results = await self.store.get_largest_public_rooms(
network_tuple,
@ -174,7 +193,7 @@ class RoomListHandler:
probing_limit,
bounds=bounds,
forwards=forwards,
ignore_non_federatable=from_federation,
ignore_non_federatable=from_federation_origin is not None,
)
def build_room_entry(room: LargestRoomStats) -> JsonDict:
@ -195,59 +214,117 @@ class RoomListHandler:
# Filter out Nones rather omit the field altogether
return {k: v for k, v in entry.items() if v is not None}
response: JsonDict = {}
num_results = len(results)
if limit is not None:
more_to_come = num_results == probing_limit
# Build a list of up to `limit` entries.
room_entries: List[JsonDict] = []
rooms_iterator = results if forwards else reversed(results)
# Depending on direction we trim either the front or back.
if forwards:
results = results[:limit]
# Track the first and last 'considered' rooms so that we can provide correct
# next_batch/prev_batch tokens.
# This is needed because the loop might finish early when
# `len(room_entries) >= limit` and we might be left with rooms we didn't
# 'consider' (iterate over) and we should save those rooms for the next
# batch.
first_considered_room: Optional[LargestRoomStats] = None
last_considered_room: Optional[LargestRoomStats] = None
cut_off_due_to_limit: bool = False
for room_result in rooms_iterator:
if len(room_entries) >= limit:
cut_off_due_to_limit = True
break
if first_considered_room is None:
first_considered_room = room_result
last_considered_room = room_result
if from_federation_origin is not None:
# If this is a federated request, apply server ACLs if the room has any set
acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(
room_result.room_id
)
)
if acl_evaluator is not None:
if not acl_evaluator.server_matches_acl_event(
from_federation_origin
):
# the requesting server is ACL blocked by the room,
# don't show in directory
continue
room_entries.append(build_room_entry(room_result))
if not forwards:
# If we are paginating backwards, we still return the chunk in
# biggest-first order, so reverse again.
room_entries.reverse()
# Swap the order of first/last considered rooms.
first_considered_room, last_considered_room = (
last_considered_room,
first_considered_room,
)
response: JsonDict = {
"chunk": room_entries,
}
num_results = len(results)
more_to_come_from_database = num_results == probing_limit
if forwards and has_batch_token:
# If there was a token given then we assume that there
# must be previous results, even if there were no results in this batch.
if first_considered_room is not None:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
else:
results = results[-limit:]
else:
more_to_come = False
# If we didn't find any results this time,
# we don't have an actual room ID to put in the token.
# But since `first_considered_room` is None, we know that we have
# reached the end of the results.
# So we can use a token of (0, empty room ID) to paginate from the end
# next time.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=0,
last_room_id="",
direction_is_forward=False,
).to_token()
if num_results > 0:
final_entry = results[-1]
initial_entry = results[0]
assert first_considered_room is not None
assert last_considered_room is not None
if forwards:
if has_batch_token:
# If there was a token given then we assume that there
# must be previous results.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
direction_is_forward=False,
).to_token()
if more_to_come:
if more_to_come_from_database or cut_off_due_to_limit:
response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members,
last_room_id=final_entry.room_id,
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
else:
else: # backwards
if has_batch_token:
response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members,
last_room_id=final_entry.room_id,
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
if more_to_come:
if more_to_come_from_database or cut_off_due_to_limit:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
response["chunk"] = [build_room_entry(r) for r in results]
# We can't efficiently count the total number of rooms that are not
# blocked by ACLs, but this is just an estimate so that should be
# good enough.
response["total_room_count_estimate"] = await self.store.count_public_rooms(
network_tuple,
ignore_non_federatable=from_federation,
ignore_non_federatable=from_federation_origin is not None,
search_filter=search_filter,
)

View file

@ -0,0 +1,88 @@
from http import HTTPStatus
from typing import Optional, Set
from synapse.rest import admin
from synapse.rest.client import directory, login, room
from synapse.types import JsonDict
from tests import unittest
class RoomListHandlerTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
room.register_servlets,
directory.register_servlets,
]
def _create_published_room(
self, tok: str, extra_content: Optional[JsonDict] = None
) -> str:
room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content)
channel = self.make_request(
"PUT",
f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}",
content={
"visibility": "public",
},
)
assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}"
return room_id
def test_acls_applied_to_room_directory_results(self) -> None:
"""
Creates 3 rooms. Room 2 has an ACL that only permits the homeservers
`test` and `test2` to access it.
We then simulate `test2` and `test3` requesting the room directory and assert
that `test3` does not see room 2, but `test2` sees all 3.
"""
self.register_user("u1", "p1")
u1tok = self.login("u1", "p1")
room1 = self._create_published_room(u1tok)
room2 = self._create_published_room(
u1tok,
extra_content={
"initial_state": [
{
"type": "m.room.server_acl",
"content": {
"allow": ["test", "test2"],
},
}
]
},
)
room3 = self._create_published_room(u1tok)
room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test2"
)
)
room_ids_in_test2_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}
room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test3"
)
)
room_ids_in_test3_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}
self.assertEqual(
room_ids_in_test2_list,
{room1, room2, room3},
"test2 should be able to see all 3 rooms",
)
self.assertEqual(
room_ids_in_test3_list,
{room1, room3},
"test3 should be able to see only 2 rooms",
)