From cdc02594491b9410f250f0adc4ea6d223aa3de7f Mon Sep 17 00:00:00 2001 From: jejo86 <28619134+jejo86@users.noreply.github.com> Date: Wed, 29 Jun 2022 11:24:10 +0200 Subject: [PATCH 01/54] Document the `--report-stats` argument (#13029) Signed-off-by: jejo86 <28619134+jejo86@users.noreply.github.com> --- changelog.d/13029.doc | 1 + docs/setup/installation.md | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13029.doc diff --git a/changelog.d/13029.doc b/changelog.d/13029.doc new file mode 100644 index 000000000..d398f0fdb --- /dev/null +++ b/changelog.d/13029.doc @@ -0,0 +1 @@ +Add an explanation of the `--report-stats` argument to the docs. diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 5bdefe2bc..1580529fd 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -232,7 +232,9 @@ python -m synapse.app.homeserver \ --report-stats=[yes|no] ``` -... substituting an appropriate value for `--server-name`. +... substituting an appropriate value for `--server-name` and choosing whether +or not to report usage statistics (hostname, Synapse version, uptime, total +users, etc.) to the developers via the `--report-stats` argument. This command will generate you a config file that you can then customise, but it will also generate a set of keys for you. These keys will allow your homeserver to From 92a0c18ef0f42b80e382667141e6593ab30e3776 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 Jun 2022 11:32:38 +0100 Subject: [PATCH 02/54] Improve performance of getting unread counts in rooms (#13119) --- changelog.d/13119.misc | 1 + synapse/_scripts/synapse_port_db.py | 3 +++ synapse/storage/databases/main/__init__.py | 2 +- .../databases/main/event_push_actions.py | 16 ++++++++++++--- synapse/storage/databases/main/stream.py | 20 +++++++++++++++++++ tests/storage/test_event_push_actions.py | 2 ++ 6 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 changelog.d/13119.misc diff --git a/changelog.d/13119.misc b/changelog.d/13119.misc new file mode 100644 index 000000000..3bb51962e --- /dev/null +++ b/changelog.d/13119.misc @@ -0,0 +1 @@ +Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 9c06c837d..f3f9c6d54 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -270,6 +270,9 @@ class MockHomeserver: def get_instance_name(self) -> str: return "master" + def should_send_federation(self) -> bool: + return False + class Porter: def __init__( diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 57aaf778e..a3d31d373 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -87,7 +87,6 @@ class DataStore( RoomStore, RoomBatchStore, RegistrationStore, - StreamWorkerStore, ProfileStore, PresenceStore, TransactionWorkerStore, @@ -112,6 +111,7 @@ class DataStore( SearchStore, TagsStore, AccountDataStore, + StreamWorkerStore, OpenIdStore, ClientIpWorkerStore, DeviceStore, diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 80ca2fd0b..eae41d748 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -25,8 +25,8 @@ from synapse.storage.database import ( LoggingDatabaseConnection, LoggingTransaction, ) -from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore +from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -122,7 +122,7 @@ def _deserialize_action(actions: str, is_highlight: bool) -> List[Union[dict, st return DEFAULT_NOTIF_ACTION -class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBaseStore): +class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBaseStore): def __init__( self, database: DatabasePool, @@ -218,7 +218,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas retcol="event_id", ) - stream_ordering = self.get_stream_id_for_event_txn(txn, event_id) # type: ignore[attr-defined] + stream_ordering = self.get_stream_id_for_event_txn(txn, event_id) return self._get_unread_counts_by_pos_txn( txn, room_id, user_id, stream_ordering @@ -307,12 +307,22 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, EventsWorkerStore, SQLBas actions that have been deleted from `event_push_actions` table. """ + # If there have been no events in the room since the stream ordering, + # there can't be any push actions either. + if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering): + return 0, 0 + clause = "" args = [user_id, room_id, stream_ordering] if max_stream_ordering is not None: clause = "AND ea.stream_ordering <= ?" args.append(max_stream_ordering) + # If the max stream ordering is less than the min stream ordering, + # then obviously there are zero push actions in that range. + if max_stream_ordering <= stream_ordering: + return 0, 0 + sql = f""" SELECT COUNT(CASE WHEN notif = 1 THEN 1 END), diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 8e88784d3..3a1df7776 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -46,10 +46,12 @@ from typing import ( Set, Tuple, cast, + overload, ) import attr from frozendict import frozendict +from typing_extensions import Literal from twisted.internet import defer @@ -795,6 +797,24 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) return RoomStreamToken(topo, stream_ordering) + @overload + def get_stream_id_for_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_none: Literal[False] = False, + ) -> int: + ... + + @overload + def get_stream_id_for_event_txn( + self, + txn: LoggingTransaction, + event_id: str, + allow_none: bool = False, + ) -> Optional[int]: + ... + def get_stream_id_for_event_txn( self, txn: LoggingTransaction, diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index ef069a811..a5a2dab21 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -86,6 +86,8 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): event.internal_metadata.is_outlier.return_value = False event.depth = stream + self.store._events_stream_cache.entity_has_changed(room_id, stream) + self.get_success( self.store.db_pool.simple_insert( table="events", From e714b8a057f65fe07b4f3939e018e57862980cdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20St=C3=BCckler?= Date: Wed, 29 Jun 2022 18:41:39 +0200 Subject: [PATCH 03/54] Fix documentation header for `allow_public_rooms_over_federation` (#13116) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Moritz Stückler Co-authored-by: Patrick Cloke --- changelog.d/13116.doc | 1 + docs/usage/configuration/config_documentation.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13116.doc diff --git a/changelog.d/13116.doc b/changelog.d/13116.doc new file mode 100644 index 000000000..f99be50f4 --- /dev/null +++ b/changelog.d/13116.doc @@ -0,0 +1 @@ +Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 58a74ace4..19eb50449 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -317,7 +317,7 @@ Example configuration: allow_public_rooms_without_auth: true ``` --- -### `allow_public_rooms_without_auth` +### `allow_public_rooms_over_federation` If set to true, allows any other homeserver to fetch the server's public rooms directory via federation. Defaults to false. From 13e359aec8ae8be8dc56a036ae6d9f2bc1d07385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Wed, 29 Jun 2022 19:12:45 +0200 Subject: [PATCH 04/54] Implement MSC3827: Filtering of `/publicRooms` by room type (#13031) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- changelog.d/13031.feature | 1 + synapse/api/constants.py | 10 ++ synapse/config/experimental.py | 3 + synapse/handlers/room_list.py | 23 +++- synapse/handlers/stats.py | 3 + synapse/rest/client/versions.py | 2 + synapse/storage/databases/main/room.py | 126 +++++++++++++++++- synapse/storage/databases/main/stats.py | 10 +- .../72/01add_room_type_to_state_stats.sql | 19 +++ tests/rest/client/test_rooms.py | 92 ++++++++++++- tests/storage/databases/main/test_room.py | 69 ++++++++++ 11 files changed, 345 insertions(+), 13 deletions(-) create mode 100644 changelog.d/13031.feature create mode 100644 synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql diff --git a/changelog.d/13031.feature b/changelog.d/13031.feature new file mode 100644 index 000000000..fee8e9d1f --- /dev/null +++ b/changelog.d/13031.feature @@ -0,0 +1 @@ +Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index e1d31cabe..265376411 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -259,3 +259,13 @@ class ReceiptTypes: READ: Final = "m.read" READ_PRIVATE: Final = "org.matrix.msc2285.read.private" FULLY_READ: Final = "m.fully_read" + + +class PublicRoomsFilterFields: + """Fields in the search filter for `/publicRooms` that we understand. + + As defined in https://spec.matrix.org/v1.3/client-server-api/#post_matrixclientv3publicrooms + """ + + GENERIC_SEARCH_TERM: Final = "generic_search_term" + ROOM_TYPES: Final = "org.matrix.msc3827.room_types" diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 0a285dba3..ee443cea0 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -87,3 +87,6 @@ class ExperimentalConfig(Config): # MSC3715: dir param on /relations. self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False) + + # MSC3827: Filtering of /publicRooms by room type + self.msc3827_enabled: bool = experimental.get("msc3827_enabled", False) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 183d4ae3c..29868eb74 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -25,6 +25,7 @@ from synapse.api.constants import ( GuestAccess, HistoryVisibility, JoinRules, + PublicRoomsFilterFields, ) from synapse.api.errors import ( Codes, @@ -181,6 +182,7 @@ class RoomListHandler: == HistoryVisibility.WORLD_READABLE, "guest_can_join": room["guest_access"] == "can_join", "join_rule": room["join_rules"], + "org.matrix.msc3827.room_type": room["room_type"], } # Filter out Nones – rather omit the field altogether @@ -239,7 +241,9 @@ class RoomListHandler: response["chunk"] = results response["total_room_count_estimate"] = await self.store.count_public_rooms( - network_tuple, ignore_non_federatable=from_federation + network_tuple, + ignore_non_federatable=from_federation, + search_filter=search_filter, ) return response @@ -508,8 +512,21 @@ class RoomListNextBatch: def _matches_room_entry(room_entry: JsonDict, search_filter: dict) -> bool: - if search_filter and search_filter.get("generic_search_term", None): - generic_search_term = search_filter["generic_search_term"].upper() + """Determines whether the given search filter matches a room entry returned over + federation. + + Only used if the remote server does not support MSC2197 remote-filtered search, and + hence does not support MSC3827 filtering of `/publicRooms` by room type either. + + In this case, we cannot apply the `room_type` filter since no `room_type` field is + returned. + """ + if search_filter and search_filter.get( + PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None + ): + generic_search_term = search_filter[ + PublicRoomsFilterFields.GENERIC_SEARCH_TERM + ].upper() if generic_search_term in room_entry.get("name", "").upper(): return True elif generic_search_term in room_entry.get("topic", "").upper(): diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index f45e06eb0..5c01482ac 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -271,6 +271,9 @@ class StatsHandler: room_state["is_federatable"] = ( event_content.get(EventContentFields.FEDERATE, True) is True ) + room_type = event_content.get(EventContentFields.ROOM_TYPE) + if isinstance(room_type, str): + room_state["room_type"] = room_type elif typ == EventTypes.JoinRules: room_state["join_rules"] = event_content.get("join_rule") elif typ == EventTypes.RoomHistoryVisibility: diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index c1bd775fe..f4f06563d 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -95,6 +95,8 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled, # Supports receiving private read receipts as per MSC2285 "org.matrix.msc2285": self.config.experimental.msc2285_enabled, + # Supports filtering of /publicRooms by room type MSC3827 + "org.matrix.msc3827": self.config.experimental.msc3827_enabled, # Adds support for importing historical messages as per MSC2716 "org.matrix.msc2716": self.config.experimental.msc2716_enabled, # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030 diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 5760d3428..d8026e3fa 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -32,12 +32,17 @@ from typing import ( import attr -from synapse.api.constants import EventContentFields, EventTypes, JoinRules +from synapse.api.constants import ( + EventContentFields, + EventTypes, + JoinRules, + PublicRoomsFilterFields, +) from synapse.api.errors import StoreError from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.config.homeserver import HomeServerConfig from synapse.events import EventBase -from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, @@ -199,10 +204,29 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): desc="get_public_room_ids", ) + def _construct_room_type_where_clause( + self, room_types: Union[List[Union[str, None]], None] + ) -> Tuple[Union[str, None], List[str]]: + if not room_types or not self.config.experimental.msc3827_enabled: + return None, [] + else: + # We use None when we want get rooms without a type + is_null_clause = "" + if None in room_types: + is_null_clause = "OR room_type IS NULL" + room_types = [value for value in room_types if value is not None] + + list_clause, args = make_in_list_sql_clause( + self.database_engine, "room_type", room_types + ) + + return f"({list_clause} {is_null_clause})", args + async def count_public_rooms( self, network_tuple: Optional[ThirdPartyInstanceID], ignore_non_federatable: bool, + search_filter: Optional[dict], ) -> int: """Counts the number of public rooms as tracked in the room_stats_current and room_stats_state table. @@ -210,11 +234,20 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): Args: network_tuple ignore_non_federatable: If true filters out non-federatable rooms + search_filter """ def _count_public_rooms_txn(txn: LoggingTransaction) -> int: query_args = [] + room_type_clause, args = self._construct_room_type_where_clause( + search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None) + if search_filter + else None + ) + room_type_clause = f" AND {room_type_clause}" if room_type_clause else "" + query_args += args + if network_tuple: if network_tuple.appservice_id: published_sql = """ @@ -249,6 +282,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): OR join_rules = '{JoinRules.KNOCK_RESTRICTED}' OR history_visibility = 'world_readable' ) + {room_type_clause} AND joined_members > 0 """ @@ -347,8 +381,12 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): if ignore_non_federatable: where_clauses.append("is_federatable") - if search_filter and search_filter.get("generic_search_term", None): - search_term = "%" + search_filter["generic_search_term"] + "%" + if search_filter and search_filter.get( + PublicRoomsFilterFields.GENERIC_SEARCH_TERM, None + ): + search_term = ( + "%" + search_filter[PublicRoomsFilterFields.GENERIC_SEARCH_TERM] + "%" + ) where_clauses.append( """ @@ -365,6 +403,15 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): search_term.lower(), ] + room_type_clause, args = self._construct_room_type_where_clause( + search_filter.get(PublicRoomsFilterFields.ROOM_TYPES, None) + if search_filter + else None + ) + if room_type_clause: + where_clauses.append(room_type_clause) + query_args += args + where_clause = "" if where_clauses: where_clause = " AND " + " AND ".join(where_clauses) @@ -373,7 +420,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): sql = f""" SELECT room_id, name, topic, canonical_alias, joined_members, - avatar, history_visibility, guest_access, join_rules + avatar, history_visibility, guest_access, join_rules, room_type FROM ( {published_sql} ) published @@ -1166,6 +1213,7 @@ class _BackgroundUpdates: POPULATE_ROOM_DEPTH_MIN_DEPTH2 = "populate_room_depth_min_depth2" REPLACE_ROOM_DEPTH_MIN_DEPTH = "replace_room_depth_min_depth" POPULATE_ROOMS_CREATOR_COLUMN = "populate_rooms_creator_column" + ADD_ROOM_TYPE_COLUMN = "add_room_type_column" _REPLACE_ROOM_DEPTH_SQL_COMMANDS = ( @@ -1200,6 +1248,11 @@ class RoomBackgroundUpdateStore(SQLBaseStore): self._background_add_rooms_room_version_column, ) + self.db_pool.updates.register_background_update_handler( + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + self._background_add_room_type_column, + ) + # BG updates to change the type of room_depth.min_depth self.db_pool.updates.register_background_update_handler( _BackgroundUpdates.POPULATE_ROOM_DEPTH_MIN_DEPTH2, @@ -1569,6 +1622,69 @@ class RoomBackgroundUpdateStore(SQLBaseStore): return batch_size + async def _background_add_room_type_column( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update to go and add room_type information to `room_stats_state` + table from `event_json` table. + """ + + last_room_id = progress.get("room_id", "") + + def _background_add_room_type_column_txn( + txn: LoggingTransaction, + ) -> bool: + sql = """ + SELECT state.room_id, json FROM event_json + INNER JOIN current_state_events AS state USING (event_id) + WHERE state.room_id > ? AND type = 'm.room.create' + ORDER BY state.room_id + LIMIT ? + """ + + txn.execute(sql, (last_room_id, batch_size)) + room_id_to_create_event_results = txn.fetchall() + + new_last_room_id = None + for room_id, event_json in room_id_to_create_event_results: + event_dict = db_to_json(event_json) + + room_type = event_dict.get("content", {}).get( + EventContentFields.ROOM_TYPE, None + ) + if isinstance(room_type, str): + self.db_pool.simple_update_txn( + txn, + table="room_stats_state", + keyvalues={"room_id": room_id}, + updatevalues={"room_type": room_type}, + ) + + new_last_room_id = room_id + + if new_last_room_id is None: + return True + + self.db_pool.updates._background_update_progress_txn( + txn, + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + {"room_id": new_last_room_id}, + ) + + return False + + end = await self.db_pool.runInteraction( + "_background_add_room_type_column", + _background_add_room_type_column_txn, + ) + + if end: + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN + ) + + return batch_size + class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): def __init__( diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 82851ffa9..b4c652acf 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -16,7 +16,7 @@ import logging from enum import Enum from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast from typing_extensions import Counter @@ -238,6 +238,7 @@ class StatsStore(StateDeltasStore): * avatar * canonical_alias * guest_access + * room_type A is_federatable key can also be included with a boolean value. @@ -263,6 +264,7 @@ class StatsStore(StateDeltasStore): "avatar", "canonical_alias", "guest_access", + "room_type", ): field = fields.get(col, sentinel) if field is not sentinel and (not isinstance(field, str) or "\0" in field): @@ -572,7 +574,7 @@ class StatsStore(StateDeltasStore): state_event_map = await self.get_events(event_ids, get_prev_content=False) # type: ignore[attr-defined] - room_state = { + room_state: Dict[str, Union[None, bool, str]] = { "join_rules": None, "history_visibility": None, "encryption": None, @@ -581,6 +583,7 @@ class StatsStore(StateDeltasStore): "avatar": None, "canonical_alias": None, "is_federatable": True, + "room_type": None, } for event in state_event_map.values(): @@ -604,6 +607,9 @@ class StatsStore(StateDeltasStore): room_state["is_federatable"] = ( event.content.get(EventContentFields.FEDERATE, True) is True ) + room_type = event.content.get(EventContentFields.ROOM_TYPE) + if isinstance(room_type, str): + room_state["room_type"] = room_type await self.update_room_state(room_id, room_state) diff --git a/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql new file mode 100644 index 000000000..d5e076547 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/01add_room_type_to_state_stats.sql @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE room_stats_state ADD room_type TEXT; + +INSERT INTO background_updates (update_name, progress_json) + VALUES ('add_room_type_column', '{}'); diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 35c59ee9e..1ccd96a20 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -18,7 +18,7 @@ """Tests REST events for /rooms paths.""" import json -from typing import Any, Dict, Iterable, List, Optional, Union +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from unittest.mock import Mock, call from urllib import parse as urlparse @@ -33,7 +33,9 @@ from synapse.api.constants import ( EventContentFields, EventTypes, Membership, + PublicRoomsFilterFields, RelationTypes, + RoomTypes, ) from synapse.api.errors import Codes, HttpResponseException from synapse.handlers.pagination import PurgeStatus @@ -1858,6 +1860,90 @@ class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200, channel.result) +class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + + config = self.default_config() + config["allow_public_rooms_without_auth"] = True + config["experimental_features"] = {"msc3827_enabled": True} + self.hs = self.setup_test_homeserver(config=config) + self.url = b"/_matrix/client/r0/publicRooms" + + return self.hs + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + user = self.register_user("alice", "pass") + self.token = self.login(user, "pass") + + # Create a room + self.helper.create_room_as( + user, + is_public=True, + extra_content={"visibility": "public"}, + tok=self.token, + ) + # Create a space + self.helper.create_room_as( + user, + is_public=True, + extra_content={ + "visibility": "public", + "creation_content": {EventContentFields.ROOM_TYPE: RoomTypes.SPACE}, + }, + tok=self.token, + ) + + def make_public_rooms_request( + self, room_types: Union[List[Union[str, None]], None] + ) -> Tuple[List[Dict[str, Any]], int]: + channel = self.make_request( + "POST", + self.url, + {"filter": {PublicRoomsFilterFields.ROOM_TYPES: room_types}}, + self.token, + ) + chunk = channel.json_body["chunk"] + count = channel.json_body["total_room_count_estimate"] + + self.assertEqual(len(chunk), count) + + return chunk, count + + def test_returns_both_rooms_and_spaces_if_no_filter(self) -> None: + chunk, count = self.make_public_rooms_request(None) + + self.assertEqual(count, 2) + + def test_returns_only_rooms_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request([None]) + + self.assertEqual(count, 1) + self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), None) + + def test_returns_only_space_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request(["m.space"]) + + self.assertEqual(count, 1) + self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), "m.space") + + def test_returns_both_rooms_and_space_based_on_filter(self) -> None: + chunk, count = self.make_public_rooms_request(["m.space", None]) + + self.assertEqual(count, 2) + + def test_returns_both_rooms_and_spaces_if_array_is_empty(self) -> None: + chunk, count = self.make_public_rooms_request([]) + + self.assertEqual(count, 2) + + class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): """Test that we correctly fallback to local filtering if a remote server doesn't support search. @@ -1882,7 +1968,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): "Simple test for searching rooms over federation" self.federation_client.get_public_rooms.return_value = make_awaitable({}) # type: ignore[attr-defined] - search_filter = {"generic_search_term": "foobar"} + search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"} channel = self.make_request( "POST", @@ -1911,7 +1997,7 @@ class PublicRoomsTestRemoteSearchFallbackTestCase(unittest.HomeserverTestCase): make_awaitable({}), ) - search_filter = {"generic_search_term": "foobar"} + search_filter = {PublicRoomsFilterFields.GENERIC_SEARCH_TERM: "foobar"} channel = self.make_request( "POST", diff --git a/tests/storage/databases/main/test_room.py b/tests/storage/databases/main/test_room.py index 9abd0cb44..1edb61963 100644 --- a/tests/storage/databases/main/test_room.py +++ b/tests/storage/databases/main/test_room.py @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json + +from synapse.api.constants import RoomTypes from synapse.rest import admin from synapse.rest.client import login, room from synapse.storage.databases.main.room import _BackgroundUpdates @@ -91,3 +94,69 @@ class RoomBackgroundUpdateStoreTestCase(HomeserverTestCase): ) ) self.assertEqual(room_creator_after, self.user_id) + + def test_background_add_room_type_column(self): + """Test that the background update to populate the `room_type` column in + `room_stats_state` works properly. + """ + + # Create a room without a type + room_id = self._generate_room() + + # Get event_id of the m.room.create event + event_id = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={ + "room_id": room_id, + "type": "m.room.create", + }, + retcol="event_id", + ) + ) + + # Fake a room creation event with a room type + event = { + "content": { + "creator": "@user:server.org", + "room_version": "9", + "type": RoomTypes.SPACE, + }, + "type": "m.room.create", + } + self.get_success( + self.store.db_pool.simple_update( + table="event_json", + keyvalues={"event_id": event_id}, + updatevalues={"json": json.dumps(event)}, + desc="test", + ) + ) + + # Insert and run the background update + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN, + "progress_json": "{}", + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store.db_pool.updates._all_done = False + + # Now let's actually drive the updates to completion + self.wait_for_background_updates() + + # Make sure the background update filled in the room type + room_type_after = self.get_success( + self.store.db_pool.simple_select_one_onecol( + table="room_stats_state", + keyvalues={"room_id": room_id}, + retcol="room_type", + allow_none=True, + ) + ) + self.assertEqual(room_type_after, RoomTypes.SPACE) From 4d3b8fb23f0c9288b311efd7def83b641bda82b8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 30 Jun 2022 10:43:24 +0200 Subject: [PATCH 05/54] Don't actually one-line the SQL statements we send to the DB (#13129) --- changelog.d/13129.misc | 1 + synapse/storage/database.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13129.misc diff --git a/changelog.d/13129.misc b/changelog.d/13129.misc new file mode 100644 index 000000000..4c2dbb705 --- /dev/null +++ b/changelog.d/13129.misc @@ -0,0 +1 @@ +Only one-line SQL statements for logging and tracing. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e8c63cf56..e21ab0851 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -366,10 +366,11 @@ class LoggingTransaction: *args: P.args, **kwargs: P.kwargs, ) -> R: - sql = self._make_sql_one_line(sql) + # Generate a one-line version of the SQL to better log it. + one_line_sql = self._make_sql_one_line(sql) # TODO(paul): Maybe use 'info' and 'debug' for values? - sql_logger.debug("[SQL] {%s} %s", self.name, sql) + sql_logger.debug("[SQL] {%s} %s", self.name, one_line_sql) sql = self.database_engine.convert_param_style(sql) if args: @@ -389,7 +390,7 @@ class LoggingTransaction: "db.query", tags={ opentracing.tags.DATABASE_TYPE: "sql", - opentracing.tags.DATABASE_STATEMENT: sql, + opentracing.tags.DATABASE_STATEMENT: one_line_sql, }, ): return func(sql, *args, **kwargs) From 80c7a06777507beb5401718dd07fbcb1cd377de1 Mon Sep 17 00:00:00 2001 From: David Teller Date: Thu, 30 Jun 2022 11:44:47 +0200 Subject: [PATCH 06/54] Rate limiting invites per issuer (#13125) Co-authored-by: reivilibre --- changelog.d/13125.feature | 1 + synapse/config/ratelimiting.py | 5 +++++ synapse/handlers/room_member.py | 20 ++++++++++++++++++-- 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13125.feature diff --git a/changelog.d/13125.feature b/changelog.d/13125.feature new file mode 100644 index 000000000..9b0f60954 --- /dev/null +++ b/changelog.d/13125.feature @@ -0,0 +1 @@ +Add a rate limit for local users sending invites. \ No newline at end of file diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index d4090a1f9..4fc1784ef 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -136,6 +136,11 @@ class RatelimitConfig(Config): defaults={"per_second": 0.003, "burst_count": 5}, ) + self.rc_invites_per_issuer = RateLimitConfig( + config.get("rc_invites", {}).get("per_issuer", {}), + defaults={"per_second": 0.3, "burst_count": 10}, + ) + self.rc_third_party_invite = RateLimitConfig( config.get("rc_third_party_invite", {}), defaults={ diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index bf6bae123..5648ab4bf 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -101,19 +101,33 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): burst_count=hs.config.ratelimiting.rc_joins_remote.burst_count, ) + # Ratelimiter for invites, keyed by room (across all issuers, all + # recipients). self._invites_per_room_limiter = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_invites_per_room.per_second, burst_count=hs.config.ratelimiting.rc_invites_per_room.burst_count, ) - self._invites_per_user_limiter = Ratelimiter( + + # Ratelimiter for invites, keyed by recipient (across all rooms, all + # issuers). + self._invites_per_recipient_limiter = Ratelimiter( store=self.store, clock=self.clock, rate_hz=hs.config.ratelimiting.rc_invites_per_user.per_second, burst_count=hs.config.ratelimiting.rc_invites_per_user.burst_count, ) + # Ratelimiter for invites, keyed by issuer (across all rooms, all + # recipients). + self._invites_per_issuer_limiter = Ratelimiter( + store=self.store, + clock=self.clock, + rate_hz=hs.config.ratelimiting.rc_invites_per_issuer.per_second, + burst_count=hs.config.ratelimiting.rc_invites_per_issuer.burst_count, + ) + self._third_party_invite_limiter = Ratelimiter( store=self.store, clock=self.clock, @@ -258,7 +272,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if room_id: await self._invites_per_room_limiter.ratelimit(requester, room_id) - await self._invites_per_user_limiter.ratelimit(requester, invitee_user_id) + await self._invites_per_recipient_limiter.ratelimit(requester, invitee_user_id) + if requester is not None: + await self._invites_per_issuer_limiter.ratelimit(requester) async def _local_membership_update( self, From 09f6e430254889a8633787a09f154075a17aff23 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 30 Jun 2022 11:45:47 +0200 Subject: [PATCH 07/54] Actually typecheck `tests.test_server` (#13135) --- changelog.d/13135.misc | 1 + mypy.ini | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 changelog.d/13135.misc diff --git a/changelog.d/13135.misc b/changelog.d/13135.misc new file mode 100644 index 000000000..f096dd874 --- /dev/null +++ b/changelog.d/13135.misc @@ -0,0 +1 @@ +Enforce type annotations for `tests.test_server`. diff --git a/mypy.ini b/mypy.ini index 4b08f45c6..e062cf43a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -56,7 +56,6 @@ exclude = (?x) |tests/server.py |tests/server_notices/test_resource_limits_server_notices.py |tests/test_metrics.py - |tests/test_server.py |tests/test_state.py |tests/test_terms_auth.py |tests/util/caches/test_cached_call.py From 9667bad55d8b50fe08990a8cfd2ac82c8540bcc1 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 30 Jun 2022 12:58:12 +0100 Subject: [PATCH 08/54] Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. (#13127) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- .github/workflows/tests.yml | 49 +---- changelog.d/13127.misc | 1 + .../complement/conf/start_for_complement.sh | 3 + .../conf-workers/synapse.supervisord.conf.j2 | 26 ++- docker/conf/log.config | 4 + docker/configure_workers_and_start.py | 7 + docker/start.py | 6 +- synapse/app/_base.py | 8 +- synapse/app/complement_fork_starter.py | 190 ++++++++++++++++++ 9 files changed, 243 insertions(+), 51 deletions(-) create mode 100644 changelog.d/13127.misc create mode 100644 synapse/app/complement_fork_starter.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2e4ee723d..a775f70c4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -328,51 +328,8 @@ jobs: - arrangement: monolith database: Postgres - steps: - # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. - # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path - - name: "Set Go Version" - run: | - # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 - echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH - # Add the Go path to the PATH: We need this so we can call gotestfmt - echo "~/go/bin" >> $GITHUB_PATH - - - name: "Install Complement Dependencies" - run: | - sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev - go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest - - - name: Run actions/checkout@v2 for synapse - uses: actions/checkout@v2 - with: - path: synapse - - - name: "Install custom gotestfmt template" - run: | - mkdir .gotestfmt/github -p - cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl - - # Attempt to check out the same branch of Complement as the PR. If it - # doesn't exist, fallback to HEAD. - - name: Checkout complement - run: synapse/.ci/scripts/checkout_complement.sh - - - run: | - set -o pipefail - POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt - shell: bash - name: Run Complement Tests - - # We only run the workers tests on `develop` for now, because they're too slow to wait for on PRs. - # Sadly, you can't have an `if` condition on the value of a matrix, so this is a temporary, separate job for now. - # GitHub Actions doesn't support YAML anchors, so it's full-on duplication for now. - complement-developonly: - if: "${{ !failure() && !cancelled() && (github.ref == 'refs/heads/develop') }}" - needs: linting-done - runs-on: ubuntu-latest - - name: "Complement Workers (develop only)" + - arrangement: workers + database: Postgres steps: # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. @@ -406,7 +363,7 @@ jobs: - run: | set -o pipefail - WORKERS=1 COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt + POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt shell: bash name: Run Complement Tests diff --git a/changelog.d/13127.misc b/changelog.d/13127.misc new file mode 100644 index 000000000..1414811e0 --- /dev/null +++ b/changelog.d/13127.misc @@ -0,0 +1 @@ +Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. \ No newline at end of file diff --git a/docker/complement/conf/start_for_complement.sh b/docker/complement/conf/start_for_complement.sh index 773c7db22..cc6482f76 100755 --- a/docker/complement/conf/start_for_complement.sh +++ b/docker/complement/conf/start_for_complement.sh @@ -59,6 +59,9 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then synchrotron, \ appservice, \ pusher" + + # Improve startup times by using a launcher based on fork() + export SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER=1 else # Empty string here means 'main process only' export SYNAPSE_WORKER_TYPES="" diff --git a/docker/conf-workers/synapse.supervisord.conf.j2 b/docker/conf-workers/synapse.supervisord.conf.j2 index 644345049..481eb4fc9 100644 --- a/docker/conf-workers/synapse.supervisord.conf.j2 +++ b/docker/conf-workers/synapse.supervisord.conf.j2 @@ -1,3 +1,24 @@ +{% if use_forking_launcher %} +[program:synapse_fork] +command=/usr/local/bin/python -m synapse.app.complement_fork_starter + {{ main_config_path }} + synapse.app.homeserver + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + {%- for worker in workers %} + -- {{ worker.app }} + --config-path="{{ main_config_path }}" + --config-path=/conf/workers/shared.yaml + --config-path=/conf/workers/{{ worker.name }}.yaml + {%- endfor %} +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +autorestart=unexpected +exitcodes=0 + +{% else %} [program:synapse_main] command=/usr/local/bin/prefix-log /usr/local/bin/python -m synapse.app.homeserver --config-path="{{ main_config_path }}" @@ -13,7 +34,7 @@ autorestart=unexpected exitcodes=0 -{% for worker in workers %} + {% for worker in workers %} [program:synapse_{{ worker.name }}] command=/usr/local/bin/prefix-log /usr/local/bin/python -m {{ worker.app }} --config-path="{{ main_config_path }}" @@ -27,4 +48,5 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 -{% endfor %} + {% endfor %} +{% endif %} diff --git a/docker/conf/log.config b/docker/conf/log.config index dc8c70bef..d9e85aa53 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -2,7 +2,11 @@ version: 1 formatters: precise: + {% if include_worker_name_in_log_line %} + format: '{{ worker_name }} | %(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + {% else %} format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s - %(message)s' + {% endif %} handlers: {% if LOG_FILE_PATH %} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 2134b648d..4521f99eb 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -26,6 +26,9 @@ # * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. # * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, # Nginx will be configured to serve TLS on port 8448. +# * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher, +# only intended for usage in Complement at the moment. +# No stability guarantees are provided. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -525,6 +528,7 @@ def generate_worker_files( "/etc/supervisor/conf.d/synapse.conf", workers=worker_descriptors, main_config_path=config_path, + use_forking_launcher=environ.get("SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER"), ) # healthcheck config @@ -560,6 +564,9 @@ def generate_worker_log_config( log_config_filepath, worker_name=worker_name, **extra_log_template_args, + include_worker_name_in_log_line=environ.get( + "SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER" + ), ) return log_config_filepath diff --git a/docker/start.py b/docker/start.py index 4ac8f0347..5a98dce55 100755 --- a/docker/start.py +++ b/docker/start.py @@ -110,7 +110,11 @@ def generate_config_from_template( log_config_file = environ["SYNAPSE_LOG_CONFIG"] log("Generating log config file " + log_config_file) - convert("/conf/log.config", log_config_file, environ) + convert( + "/conf/log.config", + log_config_file, + {**environ, "include_worker_name_in_log_line": False}, + ) # Hopefully we already have a signing key, but generate one if not. args = [ diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 363ac98ea..923891ae0 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -106,7 +106,9 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs) def start_worker_reactor( appname: str, config: HomeServerConfig, - run_command: Callable[[], None] = reactor.run, + # Use a lambda to avoid binding to a given reactor at import time. + # (needed when synapse.app.complement_fork_starter is being used) + run_command: Callable[[], None] = lambda: reactor.run(), ) -> None: """Run the reactor in the main process @@ -141,7 +143,9 @@ def start_reactor( daemonize: bool, print_pidfile: bool, logger: logging.Logger, - run_command: Callable[[], None] = reactor.run, + # Use a lambda to avoid binding to a given reactor at import time. + # (needed when synapse.app.complement_fork_starter is being used) + run_command: Callable[[], None] = lambda: reactor.run(), ) -> None: """Run the reactor in the main process diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py new file mode 100644 index 000000000..89eb07df2 --- /dev/null +++ b/synapse/app/complement_fork_starter.py @@ -0,0 +1,190 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ## What this script does +# +# This script spawns multiple workers, whilst only going through the code loading +# process once. The net effect is that start-up time for a swarm of workers is +# reduced, particularly in CPU-constrained environments. +# +# Before the workers are spawned, the database is prepared in order to avoid the +# workers racing. +# +# ## Stability +# +# This script is only intended for use within the Synapse images for the +# Complement test suite. +# There are currently no stability guarantees whatsoever; especially not about: +# - whether it will continue to exist in future versions; +# - the format of its command-line arguments; or +# - any details about its behaviour or principles of operation. +# +# ## Usage +# +# The first argument should be the path to the database configuration, used to +# set up the database. The rest of the arguments are used as follows: +# Each worker is specified as an argument group (each argument group is +# separated by '--'). +# The first argument in each argument group is the Python module name of the application +# to start. Further arguments are then passed to that module as-is. +# +# ## Example +# +# python -m synapse.app.complement_fork_starter path_to_db_config.yaml \ +# synapse.app.homeserver [args..] -- \ +# synapse.app.generic_worker [args..] -- \ +# ... +# synapse.app.generic_worker [args..] +# +import argparse +import importlib +import itertools +import multiprocessing +import sys +from typing import Any, Callable, List + +from twisted.internet.main import installReactor + + +class ProxiedReactor: + """ + Twisted tracks the 'installed' reactor as a global variable. + (Actually, it does some module trickery, but the effect is similar.) + + The default EpollReactor is buggy if it's created before a process is + forked, then used in the child. + See https://twistedmatrix.com/trac/ticket/4759#comment:17. + + However, importing certain Twisted modules will automatically create and + install a reactor if one hasn't already been installed. + It's not normally possible to re-install a reactor. + + Given the goal of launching workers with fork() to only import the code once, + this presents a conflict. + Our work around is to 'install' this ProxiedReactor which prevents Twisted + from creating and installing one, but which lets us replace the actual reactor + in use later on. + """ + + def __init__(self) -> None: + self.___reactor_target: Any = None + + def _install_real_reactor(self, new_reactor: Any) -> None: + """ + Install a real reactor for this ProxiedReactor to forward lookups onto. + + This method is specific to our ProxiedReactor and should not clash with + any names used on an actual Twisted reactor. + """ + self.___reactor_target = new_reactor + + def __getattr__(self, attr_name: str) -> Any: + return getattr(self.___reactor_target, attr_name) + + +def _worker_entrypoint( + func: Callable[[], None], proxy_reactor: ProxiedReactor, args: List[str] +) -> None: + """ + Entrypoint for a forked worker process. + + We just need to set up the command-line arguments, create our real reactor + and then kick off the worker's main() function. + """ + + sys.argv = args + + from twisted.internet.epollreactor import EPollReactor + + proxy_reactor._install_real_reactor(EPollReactor()) + func() + + +def main() -> None: + """ + Entrypoint for the forking launcher. + """ + parser = argparse.ArgumentParser() + parser.add_argument("db_config", help="Path to database config file") + parser.add_argument( + "args", + nargs="...", + help="Argument groups separated by `--`. " + "The first argument of each group is a Synapse app name. " + "Subsequent arguments are passed through.", + ) + ns = parser.parse_args() + + # Split up the subsequent arguments into each workers' arguments; + # `--` is our delimiter of choice. + args_by_worker: List[List[str]] = [ + list(args) + for cond, args in itertools.groupby(ns.args, lambda ele: ele != "--") + if cond and args + ] + + # Prevent Twisted from installing a shared reactor that all the workers will + # inherit when we fork(), by installing our own beforehand. + proxy_reactor = ProxiedReactor() + installReactor(proxy_reactor) + + # Import the entrypoints for all the workers. + worker_functions = [] + for worker_args in args_by_worker: + worker_module = importlib.import_module(worker_args[0]) + worker_functions.append(worker_module.main) + + # We need to prepare the database first as otherwise all the workers will + # try to create a schema version table and some will crash out. + from synapse._scripts import update_synapse_database + + update_proc = multiprocessing.Process( + target=_worker_entrypoint, + args=( + update_synapse_database.main, + proxy_reactor, + [ + "update_synapse_database", + "--database-config", + ns.db_config, + "--run-background-updates", + ], + ), + ) + print("===== PREPARING DATABASE =====", file=sys.stderr) + update_proc.start() + update_proc.join() + print("===== PREPARED DATABASE =====", file=sys.stderr) + + # At this point, we've imported all the main entrypoints for all the workers. + # Now we basically just fork() out to create the workers we need. + # Because we're using fork(), all the workers get a clone of this launcher's + # memory space and don't need to repeat the work of loading the code! + # Instead of using fork() directly, we use the multiprocessing library, + # which uses fork() on Unix platforms. + processes = [] + for (func, worker_args) in zip(worker_functions, args_by_worker): + process = multiprocessing.Process( + target=_worker_entrypoint, args=(func, proxy_reactor, worker_args) + ) + process.start() + processes.append(process) + + # Be a good parent and wait for our children to die before exiting. + for process in processes: + process.join() + + +if __name__ == "__main__": + main() From 6ad012ef89c966cbb3616c1be63d964db48d49ca Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 30 Jun 2022 09:05:06 -0400 Subject: [PATCH 09/54] More type hints for `synapse.logging` (#13103) Completes type hints for synapse.logging.scopecontextmanager and (partially) for synapse.logging.opentracing. --- changelog.d/13103.misc | 1 + mypy.ini | 3 -- synapse/logging/opentracing.py | 61 +++++++++++++++----------- synapse/logging/scopecontextmanager.py | 35 ++++++++------- tests/logging/test_opentracing.py | 2 +- 5 files changed, 56 insertions(+), 46 deletions(-) create mode 100644 changelog.d/13103.misc diff --git a/changelog.d/13103.misc b/changelog.d/13103.misc new file mode 100644 index 000000000..4de5f9e90 --- /dev/null +++ b/changelog.d/13103.misc @@ -0,0 +1 @@ +Add missing type hints to `synapse.logging`. diff --git a/mypy.ini b/mypy.ini index e062cf43a..b9b16860d 100644 --- a/mypy.ini +++ b/mypy.ini @@ -88,9 +88,6 @@ disallow_untyped_defs = False [mypy-synapse.logging.opentracing] disallow_untyped_defs = False -[mypy-synapse.logging.scopecontextmanager] -disallow_untyped_defs = False - [mypy-synapse.metrics._reactor_metrics] disallow_untyped_defs = False # This module imports select.epoll. That exists on Linux, but doesn't on macOS. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 903ec40c8..50c57940f 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -164,6 +164,7 @@ Gotchas with an active span? """ import contextlib +import enum import inspect import logging import re @@ -268,7 +269,7 @@ try: _reporter: Reporter = attr.Factory(Reporter) - def set_process(self, *args, **kwargs): + def set_process(self, *args: Any, **kwargs: Any) -> None: return self._reporter.set_process(*args, **kwargs) def report_span(self, span: "opentracing.Span") -> None: @@ -319,7 +320,11 @@ _homeserver_whitelist: Optional[Pattern[str]] = None # Util methods -Sentinel = object() + +class _Sentinel(enum.Enum): + # defining a sentinel in this way allows mypy to correctly handle the + # type of a dictionary lookup. + sentinel = object() P = ParamSpec("P") @@ -339,12 +344,12 @@ def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]: return _only_if_tracing_inner -def ensure_active_span(message, ret=None): +def ensure_active_span(message: str, ret=None): """Executes the operation only if opentracing is enabled and there is an active span. If there is no active span it logs message at the error level. Args: - message (str): Message which fills in "There was no active span when trying to %s" + message: Message which fills in "There was no active span when trying to %s" in the error log if there is no active span and opentracing is enabled. ret (object): return value if opentracing is None or there is no active span. @@ -402,7 +407,7 @@ def init_tracer(hs: "HomeServer") -> None: config = JaegerConfig( config=hs.config.tracing.jaeger_config, service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}", - scope_manager=LogContextScopeManager(hs.config), + scope_manager=LogContextScopeManager(), metrics_factory=PrometheusMetricsFactory(), ) @@ -451,15 +456,15 @@ def whitelisted_homeserver(destination: str) -> bool: # Could use kwargs but I want these to be explicit def start_active_span( - operation_name, - child_of=None, - references=None, - tags=None, - start_time=None, - ignore_active_span=False, - finish_on_close=True, + operation_name: str, + child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None, + references: Optional[List["opentracing.Reference"]] = None, + tags: Optional[Dict[str, str]] = None, + start_time: Optional[float] = None, + ignore_active_span: bool = False, + finish_on_close: bool = True, *, - tracer=None, + tracer: Optional["opentracing.Tracer"] = None, ): """Starts an active opentracing span. @@ -493,11 +498,11 @@ def start_active_span( def start_active_span_follows_from( operation_name: str, contexts: Collection, - child_of=None, + child_of: Optional[Union["opentracing.Span", "opentracing.SpanContext"]] = None, start_time: Optional[float] = None, *, - inherit_force_tracing=False, - tracer=None, + inherit_force_tracing: bool = False, + tracer: Optional["opentracing.Tracer"] = None, ): """Starts an active opentracing span, with additional references to previous spans @@ -540,7 +545,7 @@ def start_active_span_from_edu( edu_content: Dict[str, Any], operation_name: str, references: Optional[List["opentracing.Reference"]] = None, - tags: Optional[Dict] = None, + tags: Optional[Dict[str, str]] = None, start_time: Optional[float] = None, ignore_active_span: bool = False, finish_on_close: bool = True, @@ -617,23 +622,27 @@ def set_operation_name(operation_name: str) -> None: @only_if_tracing -def force_tracing(span=Sentinel) -> None: +def force_tracing( + span: Union["opentracing.Span", _Sentinel] = _Sentinel.sentinel +) -> None: """Force sampling for the active/given span and its children. Args: span: span to force tracing for. By default, the active span. """ - if span is Sentinel: - span = opentracing.tracer.active_span - if span is None: + if isinstance(span, _Sentinel): + span_to_trace = opentracing.tracer.active_span + else: + span_to_trace = span + if span_to_trace is None: logger.error("No active span in force_tracing") return - span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) + span_to_trace.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) # also set a bit of baggage, so that we have a way of figuring out if # it is enabled later - span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") + span_to_trace.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") def is_context_forced_tracing( @@ -789,7 +798,7 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte # Tracing decorators -def trace(func=None, opname=None): +def trace(func=None, opname: Optional[str] = None): """ Decorator to trace a function. Sets the operation name to that of the function's or that given @@ -822,11 +831,11 @@ def trace(func=None, opname=None): result = func(*args, **kwargs) if isinstance(result, defer.Deferred): - def call_back(result): + def call_back(result: R) -> R: scope.__exit__(None, None, None) return result - def err_back(result): + def err_back(result: R) -> R: scope.__exit__(None, None, None) return result diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index a26a1a58e..10877bdfc 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -16,11 +16,15 @@ import logging from types import TracebackType from typing import Optional, Type -from opentracing import Scope, ScopeManager +from opentracing import Scope, ScopeManager, Span import twisted -from synapse.logging.context import current_context, nested_logging_context +from synapse.logging.context import ( + LoggingContext, + current_context, + nested_logging_context, +) logger = logging.getLogger(__name__) @@ -35,11 +39,11 @@ class LogContextScopeManager(ScopeManager): but currently that doesn't work due to https://twistedmatrix.com/trac/ticket/10301. """ - def __init__(self, config): + def __init__(self) -> None: pass @property - def active(self): + def active(self) -> Optional[Scope]: """ Returns the currently active Scope which can be used to access the currently active Scope.span. @@ -48,19 +52,18 @@ class LogContextScopeManager(ScopeManager): Tracer.start_active_span() time. Return: - (Scope) : the Scope that is active, or None if not - available. + The Scope that is active, or None if not available. """ ctx = current_context() return ctx.scope - def activate(self, span, finish_on_close): + def activate(self, span: Span, finish_on_close: bool) -> Scope: """ Makes a Span active. Args - span (Span): the span that should become active. - finish_on_close (Boolean): whether Span should be automatically - finished when Scope.close() is called. + span: the span that should become active. + finish_on_close: whether Span should be automatically finished when + Scope.close() is called. Returns: Scope to control the end of the active period for @@ -112,8 +115,8 @@ class _LogContextScope(Scope): def __init__( self, manager: LogContextScopeManager, - span, - logcontext, + span: Span, + logcontext: LoggingContext, enter_logcontext: bool, finish_on_close: bool, ): @@ -121,13 +124,13 @@ class _LogContextScope(Scope): Args: manager: the manager that is responsible for this scope. - span (Span): + span: the opentracing span which this scope represents the local lifetime for. - logcontext (LogContext): - the logcontext to which this scope is attached. + logcontext: + the log context to which this scope is attached. enter_logcontext: - if True the logcontext will be exited when the scope is finished + if True the log context will be exited when the scope is finished finish_on_close: if True finish the span when the scope is closed """ diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py index e430941d2..40148d503 100644 --- a/tests/logging/test_opentracing.py +++ b/tests/logging/test_opentracing.py @@ -50,7 +50,7 @@ class LogContextScopeManagerTestCase(TestCase): # global variables that power opentracing. We create our own tracer instance # and test with it. - scope_manager = LogContextScopeManager({}) + scope_manager = LogContextScopeManager() config = jaeger_client.config.Config( config={}, service_name="test", scope_manager=scope_manager ) From 0ceb3af10b88f9f195fd42db12d33dafda8e6261 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 30 Jun 2022 15:59:11 +0100 Subject: [PATCH 10/54] Add a link to the configuration manual from the homeserver sample config documentation page (#13139) --- changelog.d/13139.doc | 1 + docs/usage/configuration/homeserver_sample_config.md | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 changelog.d/13139.doc diff --git a/changelog.d/13139.doc b/changelog.d/13139.doc new file mode 100644 index 000000000..f5d99d461 --- /dev/null +++ b/changelog.d/13139.doc @@ -0,0 +1 @@ +Add a link to the configuration manual from the homeserver sample config documentation. diff --git a/docs/usage/configuration/homeserver_sample_config.md b/docs/usage/configuration/homeserver_sample_config.md index 11e806998..2dbfb35ba 100644 --- a/docs/usage/configuration/homeserver_sample_config.md +++ b/docs/usage/configuration/homeserver_sample_config.md @@ -9,6 +9,9 @@ a real homeserver.yaml. Instead, if you are starting from scratch, please genera a fresh config using Synapse by following the instructions in [Installation](../../setup/installation.md). +Documentation for all configuration options can be found in the +[Configuration Manual](./config_documentation.md). + ```yaml {{#include ../../sample_config.yaml}} ``` From 8330fc9953032f21eb4c7d5f0627c1e6aba2459c Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 30 Jun 2022 09:21:39 -0700 Subject: [PATCH 11/54] Cleanup references to sample config in the docs and redirect users to configuration manual (#13077) --- changelog.d/13077.doc | 3 + docs/admin_api/user_admin_api.md | 5 +- docs/code_style.md | 93 +++++++------------ docs/jwt.md | 5 +- docs/manhole.md | 6 +- docs/message_retention_policies.md | 18 ++-- docs/openid.md | 4 +- docs/setup/forward_proxy.md | 4 +- docs/setup/installation.md | 14 +-- .../configuration/config_documentation.md | 2 +- .../user_authentication/single_sign_on/cas.md | 4 +- synapse/config/emailconfig.py | 2 +- 12 files changed, 72 insertions(+), 88 deletions(-) create mode 100644 changelog.d/13077.doc diff --git a/changelog.d/13077.doc b/changelog.d/13077.doc new file mode 100644 index 000000000..502f2d059 --- /dev/null +++ b/changelog.d/13077.doc @@ -0,0 +1,3 @@ +Clean up references to sample configuration and redirect users to the configuration manual instead. + + diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 62f89e8cb..1235f1cb9 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -124,9 +124,8 @@ Body parameters: - `address` - string. Value of third-party ID. belonging to a user. - `external_ids` - array, optional. Allow setting the identifier of the external identity - provider for SSO (Single sign-on). Details in - [Sample Configuration File](../usage/configuration/homeserver_sample_config.html) - section `sso` and `oidc_providers`. + provider for SSO (Single sign-on). Details in the configuration manual under the + sections [sso](../usage/configuration/config_documentation.md#sso) and [oidc_providers](../usage/configuration/config_documentation.md#oidc_providers). - `auth_provider` - string. ID of the external identity provider. Value of `idp_id` in the homeserver configuration. Note that no error is raised if the provided value is not in the homeserver configuration. diff --git a/docs/code_style.md b/docs/code_style.md index db7edcd76..d65fda62d 100644 --- a/docs/code_style.md +++ b/docs/code_style.md @@ -70,82 +70,61 @@ on save as they take a while and can be very resource intensive. - Avoid wildcard imports (`from synapse.types import *`) and relative imports (`from .types import UserID`). -## Configuration file format +## Configuration code and documentation format -The [sample configuration file](./sample_config.yaml) acts as a +When adding a configuration option to the code, if several settings are grouped into a single dict, ensure that your code +correctly handles the top-level option being set to `None` (as it will be if no sub-options are enabled). + +The [configuration manual](usage/configuration/config_documentation.md) acts as a reference to Synapse's configuration options for server administrators. Remember that many readers will be unfamiliar with YAML and server -administration in general, so that it is important that the file be as -easy to understand as possible, which includes following a consistent -format. +administration in general, so it is important that when you add +a configuration option the documentation be as easy to understand as possible, which +includes following a consistent format. Some guidelines follow: -- Sections should be separated with a heading consisting of a single - line prefixed and suffixed with `##`. There should be **two** blank - lines before the section header, and **one** after. -- Each option should be listed in the file with the following format: - - A comment describing the setting. Each line of this comment - should be prefixed with a hash (`#`) and a space. +- Each option should be listed in the config manual with the following format: + + - The name of the option, prefixed by `###`. - The comment should describe the default behaviour (ie, what + - A comment which describes the default behaviour (i.e. what happens if the setting is omitted), as well as what the effect will be if the setting is changed. - - Often, the comment end with something like "uncomment the - following to ". - - - A line consisting of only `#`. - - A commented-out example setting, prefixed with only `#`. + - An example setting, using backticks to define the code block For boolean (on/off) options, convention is that this example - should be the *opposite* to the default (so the comment will end - with "Uncomment the following to enable [or disable] - ." For other options, the example should give some - non-default value which is likely to be useful to the reader. + should be the *opposite* to the default. For other options, the example should give + some non-default value which is likely to be useful to the reader. -- There should be a blank line between each option. -- Where several settings are grouped into a single dict, *avoid* the - convention where the whole block is commented out, resulting in - comment lines starting `# #`, as this is hard to read and confusing - to edit. Instead, leave the top-level config option uncommented, and - follow the conventions above for sub-options. Ensure that your code - correctly handles the top-level option being set to `None` (as it - will be if no sub-options are enabled). -- Lines should be wrapped at 80 characters. -- Use two-space indents. -- `true` and `false` are spelt thus (as opposed to `True`, etc.) -- Use single quotes (`'`) rather than double-quotes (`"`) or backticks - (`` ` ``) to refer to configuration options. +- There should be a horizontal rule between each option, which can be achieved by adding `---` before and + after the option. +- `true` and `false` are spelt thus (as opposed to `True`, etc.) Example: +--- +### `modules` + +Use the `module` sub-option to add a module under `modules` to extend functionality. +The `module` setting then has a sub-option, `config`, which can be used to define some configuration +for the `module`. + +Defaults to none. + +Example configuration: ```yaml -## Frobnication ## - -# The frobnicator will ensure that all requests are fully frobnicated. -# To enable it, uncomment the following. -# -#frobnicator_enabled: true - -# By default, the frobnicator will frobnicate with the default frobber. -# The following will make it use an alternative frobber. -# -#frobincator_frobber: special_frobber - -# Settings for the frobber -# -frobber: - # frobbing speed. Defaults to 1. - # - #speed: 10 - - # frobbing distance. Defaults to 1000. - # - #distance: 100 +modules: + - module: my_super_module.MySuperClass + config: + do_thing: true + - module: my_other_super_module.SomeClass + config: {} ``` +--- Note that the sample configuration is generated from the synapse code and is maintained by a script, `scripts-dev/generate_sample_config.sh`. Making sure that the output from this script matches the desired format is left as an exercise for the reader! + diff --git a/docs/jwt.md b/docs/jwt.md index 8f859d59a..2e262583a 100644 --- a/docs/jwt.md +++ b/docs/jwt.md @@ -49,9 +49,8 @@ as follows: * For other installation mechanisms, see the documentation provided by the maintainer. -To enable the JSON web token integration, you should then add a `jwt_config` section -to your configuration file (or uncomment the `enabled: true` line in the -existing section). See [sample_config.yaml](./sample_config.yaml) for some +To enable the JSON web token integration, you should then add a `jwt_config` option +to your configuration file. See the [configuration manual](usage/configuration/config_documentation.md#jwt_config) for some sample settings. ## How to test JWT as a developer diff --git a/docs/manhole.md b/docs/manhole.md index a82fad0f0..4e5bf833c 100644 --- a/docs/manhole.md +++ b/docs/manhole.md @@ -13,8 +13,10 @@ environments where untrusted users have shell access. ## Configuring the manhole -To enable it, first uncomment the `manhole` listener configuration in -`homeserver.yaml`. The configuration is slightly different if you're using docker. +To enable it, first add the `manhole` listener configuration in your +`homeserver.yaml`. You can find information on how to do that +in the [configuration manual](usage/configuration/config_documentation.md#manhole_settings). +The configuration is slightly different if you're using docker. #### Docker config diff --git a/docs/message_retention_policies.md b/docs/message_retention_policies.md index b52c4aaa2..8c88f9393 100644 --- a/docs/message_retention_policies.md +++ b/docs/message_retention_policies.md @@ -49,9 +49,9 @@ clients. ## Server configuration -Support for this feature can be enabled and configured in the -`retention` section of the Synapse configuration file (see the -[sample file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518)). +Support for this feature can be enabled and configured by adding a the +`retention` in the Synapse configuration file (see +[configuration manual](usage/configuration/config_documentation.md#retention)). To enable support for message retention policies, set the setting `enabled` in this section to `true`. @@ -65,8 +65,8 @@ message retention policy configured in its state. This allows server admins to ensure that messages are never kept indefinitely in a server's database. -A default policy can be defined as such, in the `retention` section of -the configuration file: +A default policy can be defined as such, by adding the `retention` option in +the configuration file and adding these sub-options: ```yaml default_policy: @@ -86,8 +86,8 @@ Purge jobs are the jobs that Synapse runs in the background to purge expired events from the database. They are only run if support for message retention policies is enabled in the server's configuration. If no configuration for purge jobs is configured by the server admin, -Synapse will use a default configuration, which is described in the -[sample configuration file](https://github.com/matrix-org/synapse/blob/v1.36.0/docs/sample_config.yaml#L451-L518). +Synapse will use a default configuration, which is described here in the +[configuration manual](usage/configuration/config_documentation.md#retention). Some server admins might want a finer control on when events are removed depending on an event's room's policy. This can be done by setting the @@ -137,8 +137,8 @@ the server's database. ### Lifetime limits Server admins can set limits on the values of `max_lifetime` to use when -purging old events in a room. These limits can be defined as such in the -`retention` section of the configuration file: +purging old events in a room. These limits can be defined under the +`retention` option in the configuration file: ```yaml allowed_lifetime_min: 1d diff --git a/docs/openid.md b/docs/openid.md index 9d615a573..d0ccf36f7 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -45,8 +45,8 @@ as follows: maintainer. To enable the OpenID integration, you should then add a section to the `oidc_providers` -setting in your configuration file (or uncomment one of the existing examples). -See [sample_config.yaml](./sample_config.yaml) for some sample settings, as well as +setting in your configuration file. +See the [configuration manual](usage/configuration/config_documentation.md#oidc_providers) for some sample settings, as well as the text below for example configurations for specific providers. ## Sample configs diff --git a/docs/setup/forward_proxy.md b/docs/setup/forward_proxy.md index 494c14893..3482691f8 100644 --- a/docs/setup/forward_proxy.md +++ b/docs/setup/forward_proxy.md @@ -66,8 +66,8 @@ in Synapse can be deactivated. **NOTE**: This has an impact on security and is for testing purposes only! -To deactivate the certificate validation, the following setting must be made in -[homserver.yaml](../usage/configuration/homeserver_sample_config.md). +To deactivate the certificate validation, the following setting must be added to +your [homserver.yaml](../usage/configuration/homeserver_sample_config.md). ```yaml use_insecure_ssl_client_just_for_testing_do_not_use: true diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 1580529fd..260e50577 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -407,11 +407,11 @@ The recommended way to do so is to set up a reverse proxy on port Alternatively, you can configure Synapse to expose an HTTPS port. To do so, you will need to edit `homeserver.yaml`, as follows: -- First, under the `listeners` section, uncomment the configuration for the - TLS-enabled listener. (Remove the hash sign (`#`) at the start of - each line). The relevant lines are like this: +- First, under the `listeners` option, add the configuration for the + TLS-enabled listener like so: ```yaml +listeners: - port: 8448 type: http tls: true @@ -419,9 +419,11 @@ so, you will need to edit `homeserver.yaml`, as follows: - names: [client, federation] ``` -- You will also need to uncomment the `tls_certificate_path` and - `tls_private_key_path` lines under the `TLS` section. You will need to manage - provisioning of these certificates yourself. +- You will also need to add the options `tls_certificate_path` and + `tls_private_key_path`. to your configuration file. You will need to manage provisioning of + these certificates yourself. +- You can find more information about these options as well as how to configure synapse in the + [configuration manual](../usage/configuration/config_documentation.md). If you are using your own certificate, be sure to use a `.pem` file that includes the full certificate chain including any intermediate certificates diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 19eb50449..82edd53e3 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -2999,7 +2999,7 @@ This setting has the following sub-options: * `localdb_enabled`: Set to false to disable authentication against the local password database. This is ignored if `enabled` is false, and is only useful if you have other `password_providers`. Defaults to true. -* `pepper`: Set the value here to a secret random string for extra security. # Uncomment and change to a secret random string for extra security. +* `pepper`: Set the value here to a secret random string for extra security. DO NOT CHANGE THIS AFTER INITIAL SETUP! * `policy`: Define and enforce a password policy, such as minimum lengths for passwords, etc. Each parameter is optional. This is an implementation of MSC2000. Parameters are as follows: diff --git a/docs/usage/configuration/user_authentication/single_sign_on/cas.md b/docs/usage/configuration/user_authentication/single_sign_on/cas.md index 3bac1b29f..899face87 100644 --- a/docs/usage/configuration/user_authentication/single_sign_on/cas.md +++ b/docs/usage/configuration/user_authentication/single_sign_on/cas.md @@ -4,5 +4,5 @@ Synapse supports authenticating users via the [Central Authentication Service protocol](https://en.wikipedia.org/wiki/Central_Authentication_Service) (CAS) natively. -Please see the `cas_config` and `sso` sections of the [Synapse configuration -file](../../../configuration/homeserver_sample_config.md) for more details. \ No newline at end of file +Please see the [cas_config](../../../configuration/config_documentation.md#cas_config) and [sso](../../../configuration/config_documentation.md#sso) +sections of the configuration manual for more details. \ No newline at end of file diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index c82f3ee7a..6e11fbdb9 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -145,7 +145,7 @@ class EmailConfig(Config): raise ConfigError( 'The config option "trust_identity_server_for_password_resets" ' 'has been replaced by "account_threepid_delegate". ' - "Please consult the sample config at docs/sample_config.yaml for " + "Please consult the configuration manual at docs/usage/configuration/config_documentation.md for " "details and update your config file." ) From 046a6513bcad2f7111e12e3b750eb798466731da Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 30 Jun 2022 09:22:40 -0700 Subject: [PATCH 12/54] Don't process /send requests for users who have hit their ratelimit (#13134) --- changelog.d/13134.misc | 1 + synapse/handlers/message.py | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 changelog.d/13134.misc diff --git a/changelog.d/13134.misc b/changelog.d/13134.misc new file mode 100644 index 000000000..e3e16056d --- /dev/null +++ b/changelog.d/13134.misc @@ -0,0 +1 @@ +Apply ratelimiting earlier in processing of /send request. \ No newline at end of file diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 189f52fe5..c6b40a5b7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -903,6 +903,9 @@ class EventCreationHandler: await self.clock.sleep(random.randint(1, 10)) raise ShadowBanError() + if ratelimit: + await self.request_ratelimiter.ratelimit(requester, update=False) + # We limit the number of concurrent event sends in a room so that we # don't fork the DAG too much. If we don't limit then we can end up in # a situation where event persistence can't keep up, causing From 50f0e4028b334566a067b671d15246a9b05e8498 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacek=20Ku=C5=9Bnierz?= Date: Thu, 30 Jun 2022 19:48:04 +0200 Subject: [PATCH 13/54] Allow dependency errors to pass through (#13113) Signed-off-by: Jacek Kusnierz Co-authored-by: Brendan Abolivier --- changelog.d/13113.misc | 1 + synapse/config/cache.py | 9 ++------- synapse/config/jwt.py | 17 +++-------------- synapse/config/metrics.py | 9 ++------- synapse/config/oidc.py | 10 ++-------- synapse/config/repository.py | 10 ++-------- synapse/config/saml2.py | 9 ++------- synapse/config/tracer.py | 9 ++------- 8 files changed, 16 insertions(+), 58 deletions(-) create mode 100644 changelog.d/13113.misc diff --git a/changelog.d/13113.misc b/changelog.d/13113.misc new file mode 100644 index 000000000..7b1a50eec --- /dev/null +++ b/changelog.d/13113.misc @@ -0,0 +1 @@ +Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. \ No newline at end of file diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 63310c8d0..2db8cfb00 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -21,7 +21,7 @@ from typing import Any, Callable, Dict, Optional import attr from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from ._base import Config, ConfigError @@ -159,12 +159,7 @@ class CacheConfig(Config): self.track_memory_usage = cache_config.get("track_memory_usage", False) if self.track_memory_usage: - try: - check_requirements("cache_memory") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("cache_memory") expire_caches = cache_config.get("expire_caches", True) cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m") diff --git a/synapse/config/jwt.py b/synapse/config/jwt.py index 49aaca7cf..a973bb508 100644 --- a/synapse/config/jwt.py +++ b/synapse/config/jwt.py @@ -15,14 +15,9 @@ from typing import Any from synapse.types import JsonDict +from synapse.util.check_dependencies import check_requirements -from ._base import Config, ConfigError - -MISSING_AUTHLIB = """Missing authlib library. This is required for jwt login. - - Install by running: - pip install synapse[jwt] - """ +from ._base import Config class JWTConfig(Config): @@ -41,13 +36,7 @@ class JWTConfig(Config): # that the claims exist on the JWT. self.jwt_issuer = jwt_config.get("issuer") self.jwt_audiences = jwt_config.get("audiences") - - try: - from authlib.jose import JsonWebToken - - JsonWebToken # To stop unused lint. - except ImportError: - raise ConfigError(MISSING_AUTHLIB) + check_requirements("jwt") else: self.jwt_enabled = False self.jwt_secret = None diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index d63650788..3b42be5b5 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -18,7 +18,7 @@ from typing import Any, Optional import attr from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from ._base import Config, ConfigError @@ -57,12 +57,7 @@ class MetricsConfig(Config): self.sentry_enabled = "sentry" in config if self.sentry_enabled: - try: - check_requirements("sentry") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("sentry") self.sentry_dsn = config["sentry"].get("dsn") if not self.sentry_dsn: diff --git a/synapse/config/oidc.py b/synapse/config/oidc.py index 98e8cd8b5..5418a332d 100644 --- a/synapse/config/oidc.py +++ b/synapse/config/oidc.py @@ -24,7 +24,7 @@ from synapse.types import JsonDict from synapse.util.module_loader import load_module from synapse.util.stringutils import parse_and_validate_mxc_uri -from ..util.check_dependencies import DependencyException, check_requirements +from ..util.check_dependencies import check_requirements from ._base import Config, ConfigError, read_file DEFAULT_USER_MAPPING_PROVIDER = "synapse.handlers.oidc.JinjaOidcMappingProvider" @@ -41,12 +41,7 @@ class OIDCConfig(Config): if not self.oidc_providers: return - try: - check_requirements("oidc") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) from e + check_requirements("oidc") # check we don't have any duplicate idp_ids now. (The SSO handler will also # check for duplicates when the REST listeners get registered, but that happens @@ -146,7 +141,6 @@ OIDC_PROVIDER_CONFIG_WITH_ID_SCHEMA = { "allOf": [OIDC_PROVIDER_CONFIG_SCHEMA, {"required": ["idp_id", "idp_name"]}] } - # the `oidc_providers` list can either be None (as it is in the default config), or # a list of provider configs, each of which requires an explicit ID and name. OIDC_PROVIDER_LIST_SCHEMA = { diff --git a/synapse/config/repository.py b/synapse/config/repository.py index aadec1e54..3c69dd325 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -21,7 +21,7 @@ import attr from synapse.config.server import generate_ip_set from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from synapse.util.module_loader import load_module from ._base import Config, ConfigError @@ -184,13 +184,7 @@ class ContentRepositoryConfig(Config): ) self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: - try: - check_requirements("url_preview") - - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("url_preview") proxy_env = getproxies_environment() if "url_preview_ip_range_blacklist" not in config: diff --git a/synapse/config/saml2.py b/synapse/config/saml2.py index bd7c234d3..49ca663dd 100644 --- a/synapse/config/saml2.py +++ b/synapse/config/saml2.py @@ -18,7 +18,7 @@ from typing import Any, List, Set from synapse.config.sso import SsoAttributeRequirement from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from synapse.util.module_loader import load_module, load_python_module from ._base import Config, ConfigError @@ -76,12 +76,7 @@ class SAML2Config(Config): if not saml2_config.get("sp_config") and not saml2_config.get("config_path"): return - try: - check_requirements("saml2") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("saml2") self.saml2_enabled = True diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 6fbf927f1..c19270c6c 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -15,7 +15,7 @@ from typing import Any, List, Set from synapse.types import JsonDict -from synapse.util.check_dependencies import DependencyException, check_requirements +from synapse.util.check_dependencies import check_requirements from ._base import Config, ConfigError @@ -40,12 +40,7 @@ class TracerConfig(Config): if not self.opentracer_enabled: return - try: - check_requirements("opentracing") - except DependencyException as e: - raise ConfigError( - e.message # noqa: B306, DependencyException.message is a property - ) + check_requirements("opentracing") # The tracer is enabled so sanitize the config From c0efc689cb925ff42e5617e7cddba11f18ab22de Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 30 Jun 2022 22:12:28 +0100 Subject: [PATCH 14/54] Add documentation for phone home stats (#13086) --- changelog.d/13086.doc | 1 + docs/SUMMARY.md | 1 + .../reporting_anonymised_statistics.md | 81 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 changelog.d/13086.doc create mode 100644 docs/usage/administration/monitoring/reporting_anonymised_statistics.md diff --git a/changelog.d/13086.doc b/changelog.d/13086.doc new file mode 100644 index 000000000..a3960ca32 --- /dev/null +++ b/changelog.d/13086.doc @@ -0,0 +1 @@ +Add documentation for anonymised homeserver statistics collection. \ No newline at end of file diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index b51c7a3cb..3978f96fc 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -69,6 +69,7 @@ - [Federation](usage/administration/admin_api/federation.md) - [Manhole](manhole.md) - [Monitoring](metrics-howto.md) + - [Reporting Anonymised Statistics](usage/administration/monitoring/reporting_anonymised_statistics.md) - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md) - [Useful SQL for Admins](usage/administration/useful_sql_for_admins.md) - [Database Maintenance Tools](usage/administration/database_maintenance_tools.md) diff --git a/docs/usage/administration/monitoring/reporting_anonymised_statistics.md b/docs/usage/administration/monitoring/reporting_anonymised_statistics.md new file mode 100644 index 000000000..4f1e0fecf --- /dev/null +++ b/docs/usage/administration/monitoring/reporting_anonymised_statistics.md @@ -0,0 +1,81 @@ +# Reporting Anonymised Statistics + +When generating your Synapse configuration file, you are asked whether you +would like to report anonymised statistics to Matrix.org. These statistics +provide the foundation a glimpse into the number of Synapse homeservers +participating in the network, as well as statistics such as the number of +rooms being created and messages being sent. This feature is sometimes +affectionately called "phone-home" stats. Reporting +[is optional](../../configuration/config_documentation.md#report_stats) +and the reporting endpoint +[can be configured](../../configuration/config_documentation.md#report_stats_endpoint), +in case you would like to instead report statistics from a set of homeservers +to your own infrastructure. + +This documentation aims to define the statistics available and the +homeserver configuration options that exist to tweak it. + +## Available Statistics + +The following statistics are sent to the configured reporting endpoint: + +| Statistic Name | Type | Description | +|----------------------------|--------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `memory_rss` | int | The memory usage of the process (in kilobytes on Unix-based systems, bytes on MacOS). | +| `cpu_average` | int | CPU time in % of a single core (not % of all cores). | +| `homeserver` | string | The homeserver's server name. | +| `server_context` | string | An arbitrary string used to group statistics from a set of homeservers. | +| `timestamp` | int | The current time, represented as the number of seconds since the epoch. | +| `uptime_seconds` | int | The number of seconds since the homeserver was last started. | +| `python_version` | string | The Python version number in use (e.g "3.7.1"). Taken from `sys.version_info`. | +| `total_users` | int | The number of registered users on the homeserver. | +| `total_nonbridged_users` | int | The number of users, excluding those created by an Application Service. | +| `daily_user_type_native` | int | The number of native users created in the last 24 hours. | +| `daily_user_type_guest` | int | The number of guest users created in the last 24 hours. | +| `daily_user_type_bridged` | int | The number of users created by Application Services in the last 24 hours. | +| `total_room_count` | int | The total number of rooms present on the homeserver. | +| `daily_active_users` | int | The number of unique users[^1] that have used the homeserver in the last 24 hours. | +| `monthly_active_users` | int | The number of unique users[^1] that have used the homeserver in the last 30 days. | +| `daily_active_rooms` | int | The number of rooms that have had a (state) event with the type `m.room.message` sent in them in the last 24 hours. | +| `daily_active_e2ee_rooms` | int | The number of rooms that have had a (state) event with the type `m.room.encrypted` sent in them in the last 24 hours. | +| `daily_messages` | int | The number of (state) events with the type `m.room.message` seen in the last 24 hours. | +| `daily_e2ee_messages` | int | The number of (state) events with the type `m.room.encrypted` seen in the last 24 hours. | +| `daily_sent_messages` | int | The number of (state) events sent by a local user with the type `m.room.message` seen in the last 24 hours. | +| `daily_sent_e2ee_messages` | int | The number of (state) events sent by a local user with the type `m.room.encrypted` seen in the last 24 hours. | +| `r30_users_all` | int | The number of 30 day retained users, defined as users who have created their accounts more than 30 days ago, where they were last seen at most 30 days ago and where those two timestamps are over 30 days apart. Includes clients that do not fit into the below r30 client types. | +| `r30_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Android" in the user agent string. | +| `r30_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "iOS" in the user agent string. | +| `r30_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Electron" in the user agent string. | +| `r30_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "Mozilla" or "Gecko" in the user agent string. | +| `r30v2_users_all` | int | The number of 30 day retained users, with a revised algorithm. Defined as users that appear more than once in the past 60 days, and have more than 30 days between the most and least recent appearances in the past 60 days. Includes clients that do not fit into the below r30 client types. | +| `r30v2_users_android` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "android" (case-insensitive) in the user agent string. | +| `r30v2_users_ios` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "ios" (case-insensitive) in the user agent string. | +| `r30v2_users_electron` | int | The number of 30 day retained users, as defined above. Filtered only to clients with ("riot" or "element") and "electron" (case-insensitive) in the user agent string. | +| `r30v2_users_web` | int | The number of 30 day retained users, as defined above. Filtered only to clients with "mozilla" or "gecko" (case-insensitive) in the user agent string. | +| `cache_factor` | int | The configured [`global factor`](../../configuration/config_documentation.md#caching) value for caching. | +| `event_cache_size` | int | The configured [`event_cache_size`](../../configuration/config_documentation.md#caching) value for caching. | +| `database_engine` | string | The database engine that is in use. Either "psycopg2" meaning PostgreSQL is in use, or "sqlite3" for SQLite3. | +| `database_server_version` | string | The version of the database server. Examples being "10.10" for PostgreSQL server version 10.0, and "3.38.5" for SQLite 3.38.5 installed on the system. | +| `log_level` | string | The log level in use. Examples are "INFO", "WARNING", "ERROR", "DEBUG", etc. | + + +[^1]: Native matrix users and guests are always counted. If the +[`track_puppeted_user_ips`](../../configuration/config_documentation.md#track_puppeted_user_ips) +option is set to `true`, "puppeted" users (users that an Application Service have performed +[an action on behalf of](https://spec.matrix.org/v1.3/application-service-api/#identity-assertion)) +will also be counted. Note that an Application Service can "puppet" any user in their +[user namespace](https://spec.matrix.org/v1.3/application-service-api/#registration), +not only users that the Application Service has created. If this happens, the Application Service +will additionally be counted as a user (irrespective of `track_puppeted_user_ips`). + +## Using a Custom Statistics Collection Server + +If statistics reporting is enabled, the endpoint that Synapse sends metrics to is configured by the +[`report_stats_endpoint`](../../configuration/config_documentation.md#report_stats_endpoint) config +option. By default, statistics are sent to Matrix.org. + +If you would like to set up your own statistics collection server and send metrics there, you may +consider using one of the following known implementations: + +* [Matrix.org's Panopticon](https://github.com/matrix-org/panopticon) +* [Famedly's Barad-dûr](https://gitlab.com/famedly/company/devops/services/barad-dur) From 8c2825276fec6e03434f1924482788ea3281a9fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 1 Jul 2022 10:19:27 +0100 Subject: [PATCH 15/54] Skip waiting for full state for incoming events (#13144) When we receive an event over federation during a faster join, there is no need to wait for full state, since we have a whole reconciliation process designed to take the partial state into account. --- changelog.d/13144.misc | 1 + synapse/state/__init__.py | 12 +++++++++--- tests/test_state.py | 4 +++- 3 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 changelog.d/13144.misc diff --git a/changelog.d/13144.misc b/changelog.d/13144.misc new file mode 100644 index 000000000..34762e2fc --- /dev/null +++ b/changelog.d/13144.misc @@ -0,0 +1 @@ +Faster joins: skip waiting for full state when processing incoming events over federation. diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 9d3fe6610..d5cbdb3ee 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -249,8 +249,12 @@ class StateHandler: partial_state = True logger.debug("calling resolve_state_groups from compute_event_context") + # we've already taken into account partial state, so no need to wait for + # complete state here. entry = await self.resolve_state_groups_for_events( - event.room_id, event.prev_event_ids() + event.room_id, + event.prev_event_ids(), + await_full_state=False, ) state_ids_before_event = entry.state @@ -335,7 +339,7 @@ class StateHandler: @measure_func() async def resolve_state_groups_for_events( - self, room_id: str, event_ids: Collection[str] + self, room_id: str, event_ids: Collection[str], await_full_state: bool = True ) -> _StateCacheEntry: """Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -343,6 +347,8 @@ class StateHandler: Args: room_id event_ids + await_full_state: if true, will block if we do not yet have complete + state at these events. Returns: The resolved state @@ -350,7 +356,7 @@ class StateHandler: logger.debug("resolve_state_groups event_ids %s", event_ids) state_groups = await self._state_storage_controller.get_state_group_for_events( - event_ids + event_ids, await_full_state=await_full_state ) state_group_ids = state_groups.values() diff --git a/tests/test_state.py b/tests/test_state.py index b005dd8d0..7b3f52f68 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -131,7 +131,9 @@ class _DummyStore: async def get_room_version_id(self, room_id): return RoomVersions.V1.identifier - async def get_state_group_for_events(self, event_ids): + async def get_state_group_for_events( + self, event_ids, await_full_state: bool = True + ): res = {} for event in event_ids: res[event] = self._event_to_state_group[event] From 6da861ae6937e85689825c06c9198673f5209a2b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 1 Jul 2022 10:52:10 +0100 Subject: [PATCH 16/54] `_process_received_pdu`: Improve exception handling (#13145) `_check_event_auth` is expected to raise `AuthError`s, so no need to log it again. --- changelog.d/13145.misc | 1 + synapse/handlers/federation_event.py | 13 ++++++------- 2 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 changelog.d/13145.misc diff --git a/changelog.d/13145.misc b/changelog.d/13145.misc new file mode 100644 index 000000000..d5e2dba86 --- /dev/null +++ b/changelog.d/13145.misc @@ -0,0 +1 @@ +Improve exception handling when processing events received over federation. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index b7c54e642..479d936dc 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1092,20 +1092,19 @@ class FederationEventHandler: logger.debug("Processing event: %s", event) assert not event.internal_metadata.outlier + context = await self._state_handler.compute_event_context( + event, + state_ids_before_event=state_ids, + ) try: - context = await self._state_handler.compute_event_context( - event, - state_ids_before_event=state_ids, - ) context = await self._check_event_auth( origin, event, context, ) except AuthError as e: - # FIXME richvdh 2021/10/07 I don't think this is reachable. Let's log it - # for now - logger.exception("Unexpected AuthError from _check_event_auth") + # This happens only if we couldn't find the auth events. We'll already have + # logged a warning, so now we just convert to a FederationError. raise FederationError("ERROR", e.code, e.msg, affected=event.event_id) if not backfilled and not context.rejected: From d70ff5cc3508f4010ca2d19b090f0338e99c1d28 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 1 Jul 2022 12:04:56 +0200 Subject: [PATCH 17/54] Extra validation for rest/client/account_data (#13148) * Extra validation for rest/client/account_data This is a fairly simple endpoint and we did pretty well here. * Changelog --- changelog.d/13148.feature | 1 + synapse/rest/client/account_data.py | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13148.feature diff --git a/changelog.d/13148.feature b/changelog.d/13148.feature new file mode 100644 index 000000000..d1104b04b --- /dev/null +++ b/changelog.d/13148.feature @@ -0,0 +1 @@ +Improve validation logic in Synapse's REST endpoints. diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py index bfe985939..f13970b89 100644 --- a/synapse/rest/client/account_data.py +++ b/synapse/rest/client/account_data.py @@ -15,11 +15,11 @@ import logging from typing import TYPE_CHECKING, Tuple -from synapse.api.errors import AuthError, NotFoundError, SynapseError +from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.types import JsonDict +from synapse.types import JsonDict, RoomID from ._base import client_patterns @@ -104,6 +104,13 @@ class RoomAccountDataServlet(RestServlet): if user_id != requester.user.to_string(): raise AuthError(403, "Cannot add account data for other users.") + if not RoomID.is_valid(room_id): + raise SynapseError( + 400, + f"{room_id} is not a valid room ID", + Codes.INVALID_PARAM, + ) + body = parse_json_object_from_request(request) if account_data_type == "m.fully_read": @@ -111,6 +118,7 @@ class RoomAccountDataServlet(RestServlet): 405, "Cannot set m.fully_read through this API." " Use /rooms/!roomId:server.name/read_markers", + Codes.BAD_JSON, ) await self.handler.add_account_data_to_room( @@ -130,6 +138,13 @@ class RoomAccountDataServlet(RestServlet): if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get account data for other users.") + if not RoomID.is_valid(room_id): + raise SynapseError( + 400, + f"{room_id} is not a valid room ID", + Codes.INVALID_PARAM, + ) + event = await self.store.get_account_data_for_room_and_type( user_id, room_id, account_data_type ) From fe910fb10ef854c8c884c6e9a8e7034da5124464 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 1 Jul 2022 13:33:59 +0100 Subject: [PATCH 18/54] complement.sh: Permit skipping docker build (#13143) Add a `-f` argument to `complement.sh` to skip the docker build --- changelog.d/13143.misc | 1 + scripts-dev/complement.sh | 68 ++++++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 changelog.d/13143.misc diff --git a/changelog.d/13143.misc b/changelog.d/13143.misc new file mode 100644 index 000000000..1cb77c02d --- /dev/null +++ b/changelog.d/13143.misc @@ -0,0 +1 @@ +Add support to `complement.sh` for skipping the docker build. diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index f1843717c..20df5fbc2 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -14,9 +14,12 @@ # By default Synapse is run in monolith mode. This can be overridden by # setting the WORKERS environment variable. # -# A regular expression of test method names can be supplied as the first -# argument to the script. Complement will then only run those tests. If -# no regex is supplied, all tests are run. For example; +# You can optionally give a "-f" argument (for "fast") before any to skip +# rebuilding the docker images, if you just want to rerun the tests. +# +# Remaining commandline arguments are passed through to `go test`. For example, +# you can supply a regular expression of test method names via the "-run" +# argument: # # ./complement.sh -run "TestOutboundFederation(Profile|Send)" # @@ -32,6 +35,37 @@ echo_if_github() { fi } +# Helper to print out the usage instructions +usage() { + cat >&2 <... +Run the complement test suite on Synapse. + + -f Skip rebuilding the docker images, and just use the most recent + 'complement-synapse:latest' image + +For help on arguments to 'go test', run 'go help testflag'. +EOF +} + +# parse our arguments +skip_docker_build="" +while [ $# -ge 1 ]; do + arg=$1 + case "$arg" in + "-h") + usage + exit 1 + ;; + "-f") + skip_docker_build=1 + ;; + *) + # unknown arg: presumably an argument to gotest. break the loop. + break + esac + shift +done # enable buildkit for the docker builds export DOCKER_BUILDKIT=1 @@ -49,21 +83,23 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then echo "Checkout available at 'complement-${COMPLEMENT_REF}'" fi -# Build the base Synapse image from the local checkout -echo_if_github "::group::Build Docker image: matrixdotorg/synapse" -docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . -echo_if_github "::endgroup::" +if [ -z "$skip_docker_build" ]; then + # Build the base Synapse image from the local checkout + echo_if_github "::group::Build Docker image: matrixdotorg/synapse" + docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . + echo_if_github "::endgroup::" -# Build the workers docker image (from the base Synapse image we just built). -echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers" -docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . -echo_if_github "::endgroup::" + # Build the workers docker image (from the base Synapse image we just built). + echo_if_github "::group::Build Docker image: matrixdotorg/synapse-workers" + docker build -t matrixdotorg/synapse-workers -f "docker/Dockerfile-workers" . + echo_if_github "::endgroup::" -# Build the unified Complement image (from the worker Synapse image we just built). -echo_if_github "::group::Build Docker image: complement/Dockerfile" -docker build -t complement-synapse \ - -f "docker/complement/Dockerfile" "docker/complement" -echo_if_github "::endgroup::" + # Build the unified Complement image (from the worker Synapse image we just built). + echo_if_github "::group::Build Docker image: complement/Dockerfile" + docker build -t complement-synapse \ + -f "docker/complement/Dockerfile" "docker/complement" + echo_if_github "::endgroup::" +fi export COMPLEMENT_BASE_IMAGE=complement-synapse From c04e25789ee7fa5bd57864ad7687595f44996798 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Fri, 1 Jul 2022 16:42:49 +0100 Subject: [PATCH 19/54] Enable Complement testing in the 'Twisted Trunk' CI runs. (#13079) --- .github/workflows/twisted_trunk.yml | 67 +++++++++++++++++++++++++++++ changelog.d/13079.misc | 1 + docker/Dockerfile | 9 +++- scripts-dev/complement.sh | 7 ++- 4 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13079.misc diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml index 5f0671f35..12267405b 100644 --- a/.github/workflows/twisted_trunk.yml +++ b/.github/workflows/twisted_trunk.yml @@ -96,6 +96,72 @@ jobs: /logs/results.tap /logs/**/*.log* + complement: + if: "${{ !failure() && !cancelled() }}" + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + include: + - arrangement: monolith + database: SQLite + + - arrangement: monolith + database: Postgres + + - arrangement: workers + database: Postgres + + steps: + # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. + # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path + - name: "Set Go Version" + run: | + # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 + echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH + # Add the Go path to the PATH: We need this so we can call gotestfmt + echo "~/go/bin" >> $GITHUB_PATH + + - name: "Install Complement Dependencies" + run: | + sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev + go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest + + - name: Run actions/checkout@v2 for synapse + uses: actions/checkout@v2 + with: + path: synapse + + # This step is specific to the 'Twisted trunk' test run: + - name: Patch dependencies + run: | + set -x + DEBIAN_FRONTEND=noninteractive sudo apt-get install -yqq python3 pipx + pipx install poetry==1.1.12 + + poetry remove -n twisted + poetry add -n --extras tls git+https://github.com/twisted/twisted.git#trunk + poetry lock --no-update + # NOT IN 1.1.12 poetry lock --check + working-directory: synapse + + - name: "Install custom gotestfmt template" + run: | + mkdir .gotestfmt/github -p + cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl + + # Attempt to check out the same branch of Complement as the PR. If it + # doesn't exist, fallback to HEAD. + - name: Checkout complement + run: synapse/.ci/scripts/checkout_complement.sh + + - run: | + set -o pipefail + TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt + shell: bash + name: Run Complement Tests + # open an issue if the build fails, so we know about it. open-issue: if: failure() @@ -103,6 +169,7 @@ jobs: - mypy - trial - sytest + - complement runs-on: ubuntu-latest diff --git a/changelog.d/13079.misc b/changelog.d/13079.misc new file mode 100644 index 000000000..0133097c8 --- /dev/null +++ b/changelog.d/13079.misc @@ -0,0 +1 @@ +Enable Complement testing in the 'Twisted Trunk' CI runs. \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index c676f8377..22707ed14 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -62,7 +62,13 @@ WORKDIR /synapse # Copy just what we need to run `poetry export`... COPY pyproject.toml poetry.lock /synapse/ -RUN /root/.local/bin/poetry export --extras all -o /synapse/requirements.txt + +# If specified, we won't verify the hashes of dependencies. +# This is only needed if the hashes of dependencies cannot be checked for some +# reason, such as when a git repository is used directly as a dependency. +ARG TEST_ONLY_SKIP_DEP_HASH_VERIFICATION + +RUN /root/.local/bin/poetry export --extras all -o /synapse/requirements.txt ${TEST_ONLY_SKIP_DEP_HASH_VERIFICATION:+--without-hashes} ### ### Stage 1: builder @@ -85,6 +91,7 @@ RUN \ openssl \ rustc \ zlib1g-dev \ + git \ && rm -rf /var/lib/apt/lists/* # To speed up rebuilds, install all of the dependencies before we copy over diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 20df5fbc2..8448d49e2 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -23,6 +23,9 @@ # # ./complement.sh -run "TestOutboundFederation(Profile|Send)" # +# Specifying TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 will cause `poetry export` +# to not emit any hashes when building the Docker image. This then means that +# you can use 'unverifiable' sources such as git repositories as dependencies. # Exit if a line returns a non-zero exit code set -e @@ -86,7 +89,9 @@ fi if [ -z "$skip_docker_build" ]; then # Build the base Synapse image from the local checkout echo_if_github "::group::Build Docker image: matrixdotorg/synapse" - docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . + docker build -t matrixdotorg/synapse \ + --build-arg TEST_ONLY_SKIP_DEP_HASH_VERIFICATION \ + -f "docker/Dockerfile" . echo_if_github "::endgroup::" # Build the workers docker image (from the base Synapse image we just built). From fa10468eb4eebb5e648aa2d4ca5c87c0cd1aed88 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Mon, 4 Jul 2022 14:34:50 +0200 Subject: [PATCH 20/54] [Complement] Allow device_name lookup over federation (#13167) --- changelog.d/13167.misc | 1 + docker/complement/conf/workers-shared-extra.yaml.j2 | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/13167.misc diff --git a/changelog.d/13167.misc b/changelog.d/13167.misc new file mode 100644 index 000000000..a7c7a688d --- /dev/null +++ b/changelog.d/13167.misc @@ -0,0 +1 @@ +Update config used by Complement to allow device name lookup over federation. \ No newline at end of file diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2 index 7c6a0fd75..b5f675bc7 100644 --- a/docker/complement/conf/workers-shared-extra.yaml.j2 +++ b/docker/complement/conf/workers-shared-extra.yaml.j2 @@ -81,6 +81,8 @@ rc_invites: federation_rr_transactions_per_room_per_second: 9999 +allow_device_name_lookup_over_federation: true + ## Experimental Features ## experimental_features: From 9820665597ab6a3bbb1d23d0824752967b2170dd Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 4 Jul 2022 15:15:33 +0100 Subject: [PATCH 21/54] Remove tests/utils.py from mypy's exclude list (#13159) --- changelog.d/13159.misc | 1 + mypy.ini | 1 - tests/server.py | 1 - tests/utils.py | 4 ++-- 4 files changed, 3 insertions(+), 4 deletions(-) create mode 100644 changelog.d/13159.misc diff --git a/changelog.d/13159.misc b/changelog.d/13159.misc new file mode 100644 index 000000000..bb5554ebe --- /dev/null +++ b/changelog.d/13159.misc @@ -0,0 +1 @@ +Improve and fix type hints. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index b9b16860d..d757a88fd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -73,7 +73,6 @@ exclude = (?x) |tests/util/test_lrucache.py |tests/util/test_rwlock.py |tests/util/test_wheel_timer.py - |tests/utils.py )$ [mypy-synapse.federation.transport.client] diff --git a/tests/server.py b/tests/server.py index b9f465971..ce017ca0f 100644 --- a/tests/server.py +++ b/tests/server.py @@ -830,7 +830,6 @@ def setup_test_homeserver( # Mock TLS hs.tls_server_context_factory = Mock() - hs.tls_client_options_factory = Mock() hs.setup() if homeserver_to_use == TestHomeServer: diff --git a/tests/utils.py b/tests/utils.py index cabb2c0de..aca6a0083 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -64,7 +64,7 @@ def setupdb(): password=POSTGRES_PASSWORD, dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE, ) - db_conn.autocommit = True + db_engine.attempt_to_set_autocommit(db_conn, autocommit=True) cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) cur.execute( @@ -94,7 +94,7 @@ def setupdb(): password=POSTGRES_PASSWORD, dbname=POSTGRES_DBNAME_FOR_INITIAL_CREATE, ) - db_conn.autocommit = True + db_engine.attempt_to_set_autocommit(db_conn, autocommit=True) cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) cur.close() From dcc4e0621cc101271efc573600bd7591a12cea7c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 4 Jul 2022 17:47:44 +0100 Subject: [PATCH 22/54] Up the dependency on canonicaljson to ^1.5.0 --- pyproject.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b9f2ea432..c098b8df0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,7 +110,9 @@ jsonschema = ">=3.0.0" frozendict = ">=1,!=2.1.2" # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0 unpaddedbase64 = ">=2.1.0" -canonicaljson = "^1.4.0" +# We require 1.5.0 to work around an issue when running against the C implementation of +# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36 +canonicaljson = "^1.5.0" # we use the type definitions added in signedjson 1.1. signedjson = "^1.1.0" # validating SSL certs for IP addresses requires service_identity 18.1. From 5b5c943e7d978475c30b52941b678eac36008dc9 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 4 Jul 2022 17:48:09 +0100 Subject: [PATCH 23/54] Revert "Up the dependency on canonicaljson to ^1.5.0" This reverts commit dcc4e0621cc101271efc573600bd7591a12cea7c. --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c098b8df0..b9f2ea432 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,9 +110,7 @@ jsonschema = ">=3.0.0" frozendict = ">=1,!=2.1.2" # We require 2.1.0 or higher for type hints. Previous guard was >= 1.1.0 unpaddedbase64 = ">=2.1.0" -# We require 1.5.0 to work around an issue when running against the C implementation of -# frozendict: https://github.com/matrix-org/python-canonicaljson/issues/36 -canonicaljson = "^1.5.0" +canonicaljson = "^1.4.0" # we use the type definitions added in signedjson 1.1. signedjson = "^1.1.0" # validating SSL certs for IP addresses requires service_identity 18.1. From d102ad67fddc650c34baa89dc7b2926d46a9aeca Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 4 Jul 2022 18:08:56 +0100 Subject: [PATCH 24/54] annotate tests.server.FakeChannel (#13136) --- changelog.d/13136.misc | 1 + tests/rest/admin/test_room.py | 4 +-- tests/rest/admin/test_user.py | 2 +- tests/rest/client/test_account.py | 5 ++-- tests/rest/client/test_profile.py | 10 +++++--- tests/rest/client/test_relations.py | 2 +- tests/server.py | 38 ++++++++++++++++------------- 7 files changed, 36 insertions(+), 26 deletions(-) create mode 100644 changelog.d/13136.misc diff --git a/changelog.d/13136.misc b/changelog.d/13136.misc new file mode 100644 index 000000000..6cf451d8c --- /dev/null +++ b/changelog.d/13136.misc @@ -0,0 +1 @@ +Add type annotations to `tests.server`. diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index ca6af9417..230dc76f7 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -1579,8 +1579,8 @@ class RoomTestCase(unittest.HomeserverTestCase): access_token=self.admin_user_tok, ) self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - self.assertEqual(room_id, channel.json_body.get("rooms")[0].get("room_id")) - self.assertEqual("ж", channel.json_body.get("rooms")[0].get("name")) + self.assertEqual(room_id, channel.json_body["rooms"][0].get("room_id")) + self.assertEqual("ж", channel.json_body["rooms"][0].get("name")) def test_single_room(self) -> None: """Test that a single room can be requested correctly""" diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 0d4410223..e32aaadb9 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -1488,7 +1488,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): if channel.code != HTTPStatus.OK: raise HttpResponseException( - channel.code, channel.result["reason"], channel.json_body + channel.code, channel.result["reason"], channel.result["body"] ) # Set monthly active users to the limit diff --git a/tests/rest/client/test_account.py b/tests/rest/client/test_account.py index a43a13727..1f9b65351 100644 --- a/tests/rest/client/test_account.py +++ b/tests/rest/client/test_account.py @@ -949,7 +949,7 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): client_secret: str, next_link: Optional[str] = None, expect_code: int = 200, - ) -> str: + ) -> Optional[str]: """Request a validation token to add an email address to a user's account Args: @@ -959,7 +959,8 @@ class ThreepidEmailRestTestCase(unittest.HomeserverTestCase): expect_code: Expected return code of the call Returns: - The ID of the new threepid validation session + The ID of the new threepid validation session, or None if the response + did not contain a session ID. """ body = {"client_secret": client_secret, "email": email, "send_attempt": 1} if next_link: diff --git a/tests/rest/client/test_profile.py b/tests/rest/client/test_profile.py index 29bed0e87..8de5a342a 100644 --- a/tests/rest/client/test_profile.py +++ b/tests/rest/client/test_profile.py @@ -153,18 +153,22 @@ class ProfileTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 400, channel.result) - def _get_displayname(self, name: Optional[str] = None) -> str: + def _get_displayname(self, name: Optional[str] = None) -> Optional[str]: channel = self.make_request( "GET", "/profile/%s/displayname" % (name or self.owner,) ) self.assertEqual(channel.code, 200, channel.result) - return channel.json_body["displayname"] + # FIXME: If a user has no displayname set, Synapse returns 200 and omits a + # displayname from the response. This contradicts the spec, see #13137. + return channel.json_body.get("displayname") - def _get_avatar_url(self, name: Optional[str] = None) -> str: + def _get_avatar_url(self, name: Optional[str] = None) -> Optional[str]: channel = self.make_request( "GET", "/profile/%s/avatar_url" % (name or self.owner,) ) self.assertEqual(channel.code, 200, channel.result) + # FIXME: If a user has no avatar set, Synapse returns 200 and omits an + # avatar_url from the response. This contradicts the spec, see #13137. return channel.json_body.get("avatar_url") @unittest.override_config({"max_avatar_size": 50}) diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index aa8490654..ad03eee17 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -800,7 +800,7 @@ class RelationPaginationTestCase(BaseRelationsTestCase): ) expected_event_ids.append(channel.json_body["event_id"]) - prev_token = "" + prev_token: Optional[str] = "" found_event_ids: List[str] = [] for _ in range(20): from_token = "" diff --git a/tests/server.py b/tests/server.py index ce017ca0f..df3f1564c 100644 --- a/tests/server.py +++ b/tests/server.py @@ -43,6 +43,7 @@ from twisted.internet.defer import Deferred, fail, maybeDeferred, succeed from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import ( IAddress, + IConsumer, IHostnameResolver, IProtocol, IPullProducer, @@ -53,11 +54,7 @@ from twisted.internet.interfaces import ( ITransport, ) from twisted.python.failure import Failure -from twisted.test.proto_helpers import ( - AccumulatingProtocol, - MemoryReactor, - MemoryReactorClock, -) +from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock from twisted.web.http_headers import Headers from twisted.web.resource import IResource from twisted.web.server import Request, Site @@ -96,6 +93,7 @@ class TimedOutException(Exception): """ +@implementer(IConsumer) @attr.s(auto_attribs=True) class FakeChannel: """ @@ -104,7 +102,7 @@ class FakeChannel: """ site: Union[Site, "FakeSite"] - _reactor: MemoryReactor + _reactor: MemoryReactorClock result: dict = attr.Factory(dict) _ip: str = "127.0.0.1" _producer: Optional[Union[IPullProducer, IPushProducer]] = None @@ -122,7 +120,7 @@ class FakeChannel: self._request = request @property - def json_body(self): + def json_body(self) -> JsonDict: return json.loads(self.text_body) @property @@ -140,7 +138,7 @@ class FakeChannel: return self.result.get("done", False) @property - def code(self): + def code(self) -> int: if not self.result: raise Exception("No result yet.") return int(self.result["code"]) @@ -160,7 +158,7 @@ class FakeChannel: self.result["reason"] = reason self.result["headers"] = headers - def write(self, content): + def write(self, content: bytes) -> None: assert isinstance(content, bytes), "Should be bytes! " + repr(content) if "body" not in self.result: @@ -168,11 +166,16 @@ class FakeChannel: self.result["body"] += content - def registerProducer(self, producer, streaming): + # Type ignore: mypy doesn't like the fact that producer isn't an IProducer. + def registerProducer( # type: ignore[override] + self, + producer: Union[IPullProducer, IPushProducer], + streaming: bool, + ) -> None: self._producer = producer self.producerStreaming = streaming - def _produce(): + def _produce() -> None: if self._producer: self._producer.resumeProducing() self._reactor.callLater(0.1, _produce) @@ -180,31 +183,32 @@ class FakeChannel: if not streaming: self._reactor.callLater(0.0, _produce) - def unregisterProducer(self): + def unregisterProducer(self) -> None: if self._producer is None: return self._producer = None - def requestDone(self, _self): + def requestDone(self, _self: Request) -> None: self.result["done"] = True if isinstance(_self, SynapseRequest): + assert _self.logcontext is not None self.resource_usage = _self.logcontext.get_resource_usage() - def getPeer(self): + def getPeer(self) -> IAddress: # We give an address so that getClientAddress/getClientIP returns a non null entry, # causing us to record the MAU return address.IPv4Address("TCP", self._ip, 3423) - def getHost(self): + def getHost(self) -> IAddress: # this is called by Request.__init__ to configure Request.host. return address.IPv4Address("TCP", "127.0.0.1", 8888) - def isSecure(self): + def isSecure(self) -> bool: return False @property - def transport(self): + def transport(self) -> "FakeChannel": return self def await_result(self, timeout_ms: int = 1000) -> None: From e514495465a52531da6c833e4c926f3d1625ae5e Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 5 Jul 2022 11:10:26 +0200 Subject: [PATCH 25/54] Add missing links to config options (#13166) --- changelog.d/13166.doc | 1 + docs/usage/configuration/config_documentation.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13166.doc diff --git a/changelog.d/13166.doc b/changelog.d/13166.doc new file mode 100644 index 000000000..2d92e341e --- /dev/null +++ b/changelog.d/13166.doc @@ -0,0 +1 @@ +Add missing links to config options. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 82edd53e3..ef411c535 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -591,7 +591,7 @@ Example configuration: dummy_events_threshold: 5 ``` --- -Config option `delete_stale_devices_after` +### `delete_stale_devices_after` An optional duration. If set, Synapse will run a daily background task to log out and delete any device that hasn't been accessed for more than the specified amount of time. @@ -1843,7 +1843,7 @@ Example configuration: turn_shared_secret: "YOUR_SHARED_SECRET" ``` ---- -Config options: `turn_username` and `turn_password` +### `turn_username` and `turn_password` The Username and password if the TURN server needs them and does not use a token. @@ -3373,7 +3373,7 @@ alias_creation_rules: action: deny ``` --- -Config options: `room_list_publication_rules` +### `room_list_publication_rules` The `room_list_publication_rules` option controls who can publish and which rooms can be published in the public room list. From 65e675504fe060e5e99e145be450fe4d492f404f Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 5 Jul 2022 10:46:20 +0100 Subject: [PATCH 26/54] Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. (#13152) --- changelog.d/13152.misc | 1 + docker/README.md | 7 +++++++ docker/conf/log.config | 6 ++++++ docker/configure_workers_and_start.py | 20 ++++++++++++++------ docs/development/contributing_guide.md | 4 ++++ scripts-dev/complement.sh | 12 ++++++++++++ 6 files changed, 44 insertions(+), 6 deletions(-) create mode 100644 changelog.d/13152.misc diff --git a/changelog.d/13152.misc b/changelog.d/13152.misc new file mode 100644 index 000000000..0c919ab70 --- /dev/null +++ b/changelog.d/13152.misc @@ -0,0 +1 @@ +Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index 67c3bc65f..5b7de2fe3 100644 --- a/docker/README.md +++ b/docker/README.md @@ -67,6 +67,13 @@ The following environment variables are supported in `generate` mode: * `UID`, `GID`: the user id and group id to use for creating the data directories. If unset, and no user is set via `docker run --user`, defaults to `991`, `991`. +* `SYNAPSE_LOG_LEVEL`: the log level to use (one of `DEBUG`, `INFO`, `WARNING` or `ERROR`). + Defaults to `INFO`. +* `SYNAPSE_LOG_SENSITIVE`: if set and the log level is set to `DEBUG`, Synapse + will log sensitive information such as access tokens. + This should not be needed unless you are a developer attempting to debug something + particularly tricky. + ## Postgres diff --git a/docker/conf/log.config b/docker/conf/log.config index d9e85aa53..90b517983 100644 --- a/docker/conf/log.config +++ b/docker/conf/log.config @@ -49,11 +49,17 @@ handlers: class: logging.StreamHandler formatter: precise +{% if not SYNAPSE_LOG_SENSITIVE %} +{# + If SYNAPSE_LOG_SENSITIVE is unset, then override synapse.storage.SQL to INFO + so that DEBUG entries (containing sensitive information) are not emitted. +#} loggers: synapse.storage.SQL: # beware: increasing this to DEBUG will make synapse log sensitive # information such as access tokens. level: INFO +{% endif %} root: level: {{ SYNAPSE_LOG_LEVEL or "INFO" }} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index 4521f99eb..51583dc13 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -29,6 +29,10 @@ # * SYNAPSE_USE_EXPERIMENTAL_FORKING_LAUNCHER: Whether to use the forking launcher, # only intended for usage in Complement at the moment. # No stability guarantees are provided. +# * SYNAPSE_LOG_LEVEL: Set this to DEBUG, INFO, WARNING or ERROR to change the +# log level. INFO is the default. +# * SYNAPSE_LOG_SENSITIVE: If unset, SQL and SQL values won't be logged, +# regardless of the SYNAPSE_LOG_LEVEL setting. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -38,7 +42,7 @@ import os import subprocess import sys from pathlib import Path -from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Set +from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Optional, Set import yaml from jinja2 import Environment, FileSystemLoader @@ -552,13 +556,17 @@ def generate_worker_log_config( Returns: the path to the generated file """ # Check whether we should write worker logs to disk, in addition to the console - extra_log_template_args = {} + extra_log_template_args: Dict[str, Optional[str]] = {} if environ.get("SYNAPSE_WORKERS_WRITE_LOGS_TO_DISK"): - extra_log_template_args["LOG_FILE_PATH"] = "{dir}/logs/{name}.log".format( - dir=data_dir, name=worker_name - ) + extra_log_template_args["LOG_FILE_PATH"] = f"{data_dir}/logs/{worker_name}.log" + + extra_log_template_args["SYNAPSE_LOG_LEVEL"] = environ.get("SYNAPSE_LOG_LEVEL") + extra_log_template_args["SYNAPSE_LOG_SENSITIVE"] = environ.get( + "SYNAPSE_LOG_SENSITIVE" + ) + # Render and write the file - log_config_filepath = "/conf/workers/{name}.log.config".format(name=worker_name) + log_config_filepath = f"/conf/workers/{worker_name}.log.config" convert( "/conf/log.config", log_config_filepath, diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index 4738f8a6b..900369b80 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -309,6 +309,10 @@ The above will run a monolithic (single-process) Synapse with SQLite as the data - Passing `POSTGRES=1` as an environment variable to use the Postgres database instead. - Passing `WORKERS=1` as an environment variable to use a workerised setup instead. This option implies the use of Postgres. +To increase the log level for the tests, set `SYNAPSE_TEST_LOG_LEVEL`, e.g: +```sh +SYNAPSE_TEST_LOG_LEVEL=DEBUG COMPLEMENT_DIR=../complement ./scripts-dev/complement.sh -run TestImportHistoricalMessages +``` ### Prettier formatting with `gotestfmt` diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 8448d49e2..705243ca9 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -145,6 +145,18 @@ else test_tags="$test_tags,faster_joins" fi + +if [[ -n "$SYNAPSE_TEST_LOG_LEVEL" ]]; then + # Set the log level to what is desired + export PASS_SYNAPSE_LOG_LEVEL="$SYNAPSE_TEST_LOG_LEVEL" + + # Allow logging sensitive things (currently SQL queries & parameters). + # (This won't have any effect if we're not logging at DEBUG level overall.) + # Since this is just a test suite, this is fine and won't reveal anyone's + # personal information + export PASS_SYNAPSE_LOG_SENSITIVE=1 +fi + # Run the tests! echo "Images built; running complement" cd "$COMPLEMENT_DIR" From 2c2a42cc107fb02bbf7c8d4e6141cbe601221629 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 5 Jul 2022 05:56:06 -0500 Subject: [PATCH 27/54] Fix application service not being able to join remote federated room without a profile set (#13131) Fix https://github.com/matrix-org/synapse/issues/4778 Complement tests: https://github.com/matrix-org/complement/pull/399 --- changelog.d/13131.bugfix | 1 + synapse/handlers/room_member.py | 32 +++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 changelog.d/13131.bugfix diff --git a/changelog.d/13131.bugfix b/changelog.d/13131.bugfix new file mode 100644 index 000000000..06602f03f --- /dev/null +++ b/changelog.d/13131.bugfix @@ -0,0 +1 @@ +Fix application service not being able to join remote federated room without a profile set. diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 5648ab4bf..a1d8875dd 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -846,10 +846,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content["membership"] = Membership.JOIN - profile = self.profile_handler - if not content_specified: - content["displayname"] = await profile.get_displayname(target) - content["avatar_url"] = await profile.get_avatar_url(target) + try: + profile = self.profile_handler + if not content_specified: + content["displayname"] = await profile.get_displayname(target) + content["avatar_url"] = await profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information while processing remote join for %r: %s", + target, + e, + ) if requester.is_guest: content["kind"] = "guest" @@ -926,11 +933,18 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): content["membership"] = Membership.KNOCK - profile = self.profile_handler - if "displayname" not in content: - content["displayname"] = await profile.get_displayname(target) - if "avatar_url" not in content: - content["avatar_url"] = await profile.get_avatar_url(target) + try: + profile = self.profile_handler + if "displayname" not in content: + content["displayname"] = await profile.get_displayname(target) + if "avatar_url" not in content: + content["avatar_url"] = await profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information while processing remote knock for %r: %s", + target, + e, + ) return await self.remote_knock( remote_room_hosts, room_id, target, content From 578a5e24a905c5d90d5c609cb485a5ab7277f8a5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Jul 2022 13:51:04 +0100 Subject: [PATCH 28/54] Use upserts for updating `event_push_summary` (#13153) --- changelog.d/13153.misc | 1 + .../databases/main/event_push_actions.py | 47 +++---------------- 2 files changed, 8 insertions(+), 40 deletions(-) create mode 100644 changelog.d/13153.misc diff --git a/changelog.d/13153.misc b/changelog.d/13153.misc new file mode 100644 index 000000000..3bb51962e --- /dev/null +++ b/changelog.d/13153.misc @@ -0,0 +1 @@ +Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index bb6e104d7..32536430a 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -1013,8 +1013,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas sql = """ SELECT user_id, room_id, coalesce(old.%s, 0) + upd.cnt, - upd.stream_ordering, - old.user_id + upd.stream_ordering FROM ( SELECT user_id, room_id, count(*) as cnt, max(stream_ordering) as stream_ordering @@ -1042,7 +1041,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas summaries[(row[0], row[1])] = _EventPushSummary( unread_count=row[2], stream_ordering=row[3], - old_user_id=row[4], notif_count=0, ) @@ -1063,57 +1061,27 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas summaries[(row[0], row[1])] = _EventPushSummary( unread_count=0, stream_ordering=row[3], - old_user_id=row[4], notif_count=row[2], ) logger.info("Rotating notifications, handling %d rows", len(summaries)) - # If the `old.user_id` above is NULL then we know there isn't already an - # entry in the table, so we simply insert it. Otherwise we update the - # existing table. - self.db_pool.simple_insert_many_txn( + self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", - keys=( - "user_id", - "room_id", - "notif_count", - "unread_count", - "stream_ordering", - ), - values=[ + key_names=("user_id", "room_id"), + key_values=[(user_id, room_id) for user_id, room_id in summaries], + value_names=("notif_count", "unread_count", "stream_ordering"), + value_values=[ ( - user_id, - room_id, summary.notif_count, summary.unread_count, summary.stream_ordering, ) - for ((user_id, room_id), summary) in summaries.items() - if summary.old_user_id is None + for summary in summaries.values() ], ) - txn.execute_batch( - """ - UPDATE event_push_summary - SET notif_count = ?, unread_count = ?, stream_ordering = ? - WHERE user_id = ? AND room_id = ? - """, - ( - ( - summary.notif_count, - summary.unread_count, - summary.stream_ordering, - user_id, - room_id, - ) - for ((user_id, room_id), summary) in summaries.items() - if summary.old_user_id is not None - ), - ) - txn.execute( "UPDATE event_push_summary_stream_ordering SET stream_ordering = ?", (rotate_to_stream_ordering,), @@ -1293,5 +1261,4 @@ class _EventPushSummary: unread_count: int stream_ordering: int - old_user_id: str notif_count: int From 68695d80074f4d3bdf07970d541c07b98adffc76 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 5 Jul 2022 14:24:42 +0100 Subject: [PATCH 29/54] Factor out some common Complement CI setup commands to a script. (#13157) --- .ci/scripts/setup_complement_prerequisites.sh | 36 +++++++++++++++++++ .github/workflows/tests.yml | 25 ++----------- .github/workflows/twisted_trunk.yml | 27 ++------------ changelog.d/13157.misc | 1 + 4 files changed, 42 insertions(+), 47 deletions(-) create mode 100755 .ci/scripts/setup_complement_prerequisites.sh create mode 100644 changelog.d/13157.misc diff --git a/.ci/scripts/setup_complement_prerequisites.sh b/.ci/scripts/setup_complement_prerequisites.sh new file mode 100755 index 000000000..4848901cb --- /dev/null +++ b/.ci/scripts/setup_complement_prerequisites.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Common commands to set up Complement's prerequisites in a GitHub Actions CI run. +# +# Must be called after Synapse has been checked out to `synapse/`. +# +set -eu + +alias block='{ set +x; } 2>/dev/null; func() { echo "::group::$*"; set -x; }; func' +alias endblock='{ set +x; } 2>/dev/null; func() { echo "::endgroup::"; set -x; }; func' + +block Set Go Version + # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. + # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path + + # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 + echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH + # Add the Go path to the PATH: We need this so we can call gotestfmt + echo "~/go/bin" >> $GITHUB_PATH +endblock + +block Install Complement Dependencies + sudo apt-get -qq update && sudo apt-get install -qqy libolm3 libolm-dev + go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest +endblock + +block Install custom gotestfmt template + mkdir .gotestfmt/github -p + cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl +endblock + +block Check out Complement + # Attempt to check out the same branch of Complement as the PR. If it + # doesn't exist, fallback to HEAD. + synapse/.ci/scripts/checkout_complement.sh +endblock diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a775f70c4..4bc29c820 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -332,34 +332,13 @@ jobs: database: Postgres steps: - # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. - # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path - - name: "Set Go Version" - run: | - # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 - echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH - # Add the Go path to the PATH: We need this so we can call gotestfmt - echo "~/go/bin" >> $GITHUB_PATH - - - name: "Install Complement Dependencies" - run: | - sudo apt-get -qq update && sudo apt-get install -qqy libolm3 libolm-dev - go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest - - name: Run actions/checkout@v2 for synapse uses: actions/checkout@v2 with: path: synapse - - name: "Install custom gotestfmt template" - run: | - mkdir .gotestfmt/github -p - cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl - - # Attempt to check out the same branch of Complement as the PR. If it - # doesn't exist, fallback to HEAD. - - name: Checkout complement - run: synapse/.ci/scripts/checkout_complement.sh + - name: Prepare Complement's Prerequisites + run: synapse/.ci/scripts/setup_complement_prerequisites.sh - run: | set -o pipefail diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml index 12267405b..f35e82297 100644 --- a/.github/workflows/twisted_trunk.yml +++ b/.github/workflows/twisted_trunk.yml @@ -114,25 +114,14 @@ jobs: database: Postgres steps: - # The path is set via a file given by $GITHUB_PATH. We need both Go 1.17 and GOPATH on the path to run Complement. - # See https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#adding-a-system-path - - name: "Set Go Version" - run: | - # Add Go 1.17 to the PATH: see https://github.com/actions/virtual-environments/blob/main/images/linux/Ubuntu2004-Readme.md#environment-variables-2 - echo "$GOROOT_1_17_X64/bin" >> $GITHUB_PATH - # Add the Go path to the PATH: We need this so we can call gotestfmt - echo "~/go/bin" >> $GITHUB_PATH - - - name: "Install Complement Dependencies" - run: | - sudo apt-get update && sudo apt-get install -y libolm3 libolm-dev - go get -v github.com/haveyoudebuggedit/gotestfmt/v2/cmd/gotestfmt@latest - - name: Run actions/checkout@v2 for synapse uses: actions/checkout@v2 with: path: synapse + - name: Prepare Complement's Prerequisites + run: synapse/.ci/scripts/setup_complement_prerequisites.sh + # This step is specific to the 'Twisted trunk' test run: - name: Patch dependencies run: | @@ -146,16 +135,6 @@ jobs: # NOT IN 1.1.12 poetry lock --check working-directory: synapse - - name: "Install custom gotestfmt template" - run: | - mkdir .gotestfmt/github -p - cp synapse/.ci/complement_package.gotpl .gotestfmt/github/package.gotpl - - # Attempt to check out the same branch of Complement as the PR. If it - # doesn't exist, fallback to HEAD. - - name: Checkout complement - run: synapse/.ci/scripts/checkout_complement.sh - - run: | set -o pipefail TEST_ONLY_SKIP_DEP_HASH_VERIFICATION=1 POSTGRES=${{ (matrix.database == 'Postgres') && 1 || '' }} WORKERS=${{ (matrix.arrangement == 'workers') && 1 || '' }} COMPLEMENT_DIR=`pwd`/complement synapse/scripts-dev/complement.sh -json 2>&1 | gotestfmt diff --git a/changelog.d/13157.misc b/changelog.d/13157.misc new file mode 100644 index 000000000..0133097c8 --- /dev/null +++ b/changelog.d/13157.misc @@ -0,0 +1 @@ +Enable Complement testing in the 'Twisted Trunk' CI runs. \ No newline at end of file From 6ba732fefe732f92b0266b17cb6e45388bbe002a Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 5 Jul 2022 15:13:47 +0100 Subject: [PATCH 30/54] Type `tests.utils` (#13028) * Cast to postgres types when handling postgres db * Remove unused method * Easy annotations * Annotate create_room * Use `ParamSpec` to annotate looping_call * Annotate `default_config` * Track `now` as a float `time_ms` returns an int like the proper Synapse `Clock` * Introduce a `Timer` dataclass * Introduce a Looper type * Suppress checking of a mock * tests.utils is typed * Changelog * Whoops, import ParamSpec from typing_extensions * ditch the psycopg2 casts --- changelog.d/13028.misc | 1 + mypy.ini | 3 + synapse/util/__init__.py | 6 +- synapse/util/caches/lrucache.py | 2 +- tests/utils.py | 134 ++++++++++++++++++++++---------- 5 files changed, 101 insertions(+), 45 deletions(-) create mode 100644 changelog.d/13028.misc diff --git a/changelog.d/13028.misc b/changelog.d/13028.misc new file mode 100644 index 000000000..4e5f3d8f9 --- /dev/null +++ b/changelog.d/13028.misc @@ -0,0 +1 @@ +Add type annotations to `tests.utils`. diff --git a/mypy.ini b/mypy.ini index d757a88fd..ea0ab003a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -126,6 +126,9 @@ disallow_untyped_defs = True [mypy-tests.federation.transport.test_client] disallow_untyped_defs = True +[mypy-tests.utils] +disallow_untyped_defs = True + ;; Dependencies without annotations ;; Before ignoring a module, check to see if type stubs are available. diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index 6323d452e..a90f08dd4 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -20,6 +20,7 @@ from typing import Any, Callable, Dict, Generator, Optional import attr from frozendict import frozendict from matrix_common.versionstring import get_distribution_version_string +from typing_extensions import ParamSpec from twisted.internet import defer, task from twisted.internet.defer import Deferred @@ -82,6 +83,9 @@ def unwrapFirstError(failure: Failure) -> Failure: return failure.value.subFailure # type: ignore[union-attr] # Issue in Twisted's annotations +P = ParamSpec("P") + + @attr.s(slots=True) class Clock: """ @@ -110,7 +114,7 @@ class Clock: return int(self.time() * 1000) def looping_call( - self, f: Callable, msec: float, *args: Any, **kwargs: Any + self, f: Callable[P, object], msec: float, *args: P.args, **kwargs: P.kwargs ) -> LoopingCall: """Call a function repeatedly. diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index a3b60578e..8ed5325c5 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -109,7 +109,7 @@ GLOBAL_ROOT = ListNode["_Node"].create_root_node() @wrap_as_background_process("LruCache._expire_old_entries") async def _expire_old_entries( - clock: Clock, expiry_seconds: int, autotune_config: Optional[dict] + clock: Clock, expiry_seconds: float, autotune_config: Optional[dict] ) -> None: """Walks the global cache list to find cache entries that haven't been accessed in the given number of seconds, or if a given memory threshold has been breached. diff --git a/tests/utils.py b/tests/utils.py index aca6a0083..424cc4c2a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -15,12 +15,17 @@ import atexit import os +from typing import Any, Callable, Dict, List, Tuple, Union, overload + +import attr +from typing_extensions import Literal, ParamSpec from synapse.api.constants import EventTypes from synapse.api.room_versions import RoomVersions from synapse.config.homeserver import HomeServerConfig from synapse.config.server import DEFAULT_ROOM_VERSION from synapse.logging.context import current_context, set_current_context +from synapse.server import HomeServer from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.engines import create_engine from synapse.storage.prepare_database import prepare_database @@ -50,12 +55,11 @@ SQLITE_PERSIST_DB = os.environ.get("SYNAPSE_TEST_PERSIST_SQLITE_DB") is not None POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres" -def setupdb(): +def setupdb() -> None: # If we're using PostgreSQL, set up the db once if USE_POSTGRES_FOR_TESTS: # create a PostgresEngine db_engine = create_engine({"name": "psycopg2", "args": {}}) - # connect to postgres to create the base database. db_conn = db_engine.module.connect( user=POSTGRES_USER, @@ -82,11 +86,11 @@ def setupdb(): port=POSTGRES_PORT, password=POSTGRES_PASSWORD, ) - db_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests") - prepare_database(db_conn, db_engine, None) - db_conn.close() + logging_conn = LoggingDatabaseConnection(db_conn, db_engine, "tests") + prepare_database(logging_conn, db_engine, None) + logging_conn.close() - def _cleanup(): + def _cleanup() -> None: db_conn = db_engine.module.connect( user=POSTGRES_USER, host=POSTGRES_HOST, @@ -103,7 +107,19 @@ def setupdb(): atexit.register(_cleanup) -def default_config(name, parse=False): +@overload +def default_config(name: str, parse: Literal[False] = ...) -> Dict[str, object]: + ... + + +@overload +def default_config(name: str, parse: Literal[True]) -> HomeServerConfig: + ... + + +def default_config( + name: str, parse: bool = False +) -> Union[Dict[str, object], HomeServerConfig]: """ Create a reasonable test config. """ @@ -181,90 +197,122 @@ def default_config(name, parse=False): return config_dict -def mock_getRawHeaders(headers=None): +def mock_getRawHeaders(headers=None): # type: ignore[no-untyped-def] headers = headers if headers is not None else {} - def getRawHeaders(name, default=None): + def getRawHeaders(name, default=None): # type: ignore[no-untyped-def] + # If the requested header is present, the real twisted function returns + # List[str] if name is a str and List[bytes] if name is a bytes. + # This mock doesn't support that behaviour. + # Fortunately, none of the current callers of mock_getRawHeaders() provide a + # headers dict, so we don't encounter this discrepancy in practice. return headers.get(name, default) return getRawHeaders +P = ParamSpec("P") + + +@attr.s(slots=True, auto_attribs=True) +class Timer: + absolute_time: float + callback: Callable[[], None] + expired: bool + + +# TODO: Make this generic over a ParamSpec? +@attr.s(slots=True, auto_attribs=True) +class Looper: + func: Callable[..., Any] + interval: float # seconds + last: float + args: Tuple[object, ...] + kwargs: Dict[str, object] + + class MockClock: - now = 1000 + now = 1000.0 - def __init__(self): - # list of lists of [absolute_time, callback, expired] in no particular - # order - self.timers = [] - self.loopers = [] + def __init__(self) -> None: + # Timers in no particular order + self.timers: List[Timer] = [] + self.loopers: List[Looper] = [] - def time(self): + def time(self) -> float: return self.now - def time_msec(self): - return self.time() * 1000 + def time_msec(self) -> int: + return int(self.time() * 1000) - def call_later(self, delay, callback, *args, **kwargs): + def call_later( + self, + delay: float, + callback: Callable[P, object], + *args: P.args, + **kwargs: P.kwargs, + ) -> Timer: ctx = current_context() - def wrapped_callback(): + def wrapped_callback() -> None: set_current_context(ctx) callback(*args, **kwargs) - t = [self.now + delay, wrapped_callback, False] + t = Timer(self.now + delay, wrapped_callback, False) self.timers.append(t) return t - def looping_call(self, function, interval, *args, **kwargs): - self.loopers.append([function, interval / 1000.0, self.now, args, kwargs]) + def looping_call( + self, + function: Callable[P, object], + interval: float, + *args: P.args, + **kwargs: P.kwargs, + ) -> None: + # This type-ignore should be redundant once we use a mypy release with + # https://github.com/python/mypy/pull/12668. + self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs)) # type: ignore[arg-type] - def cancel_call_later(self, timer, ignore_errs=False): - if timer[2]: + def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None: + if timer.expired: if not ignore_errs: raise Exception("Cannot cancel an expired timer") - timer[2] = True + timer.expired = True self.timers = [t for t in self.timers if t != timer] # For unit testing - def advance_time(self, secs): + def advance_time(self, secs: float) -> None: self.now += secs timers = self.timers self.timers = [] for t in timers: - time, callback, expired = t - - if expired: + if t.expired: raise Exception("Timer already expired") - if self.now >= time: - t[2] = True - callback() + if self.now >= t.absolute_time: + t.expired = True + t.callback() else: self.timers.append(t) for looped in self.loopers: - func, interval, last, args, kwargs = looped - if last + interval < self.now: - func(*args, **kwargs) - looped[2] = self.now + if looped.last + looped.interval < self.now: + looped.func(*looped.args, **looped.kwargs) + looped.last = self.now - def advance_time_msec(self, ms): + def advance_time_msec(self, ms: float) -> None: self.advance_time(ms / 1000.0) - def time_bound_deferred(self, d, *args, **kwargs): - # We don't bother timing things out for now. - return d - -async def create_room(hs, room_id: str, creator_id: str): +async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None: """Creates and persist a creation event for the given room""" persistence_store = hs.get_storage_controllers().persistence + assert persistence_store is not None store = hs.get_datastores().main event_builder_factory = hs.get_event_builder_factory() event_creation_handler = hs.get_event_creation_handler() From 68db233f0cf16a20f21fd927374121966976d9c7 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 5 Jul 2022 16:12:52 +0100 Subject: [PATCH 31/54] Handle race between persisting an event and un-partial stating a room (#13100) Whenever we want to persist an event, we first compute an event context, which includes the state at the event and a flag indicating whether the state is partial. After a lot of processing, we finally try to store the event in the database, which can fail for partial state events when the containing room has been un-partial stated in the meantime. We detect the race as a foreign key constraint failure in the data store layer and turn it into a special `PartialStateConflictError` exception, which makes its way up to the method in which we computed the event context. To make things difficult, the exception needs to cross a replication request: `/fed_send_events` for events coming over federation and `/send_event` for events from clients. We transport the `PartialStateConflictError` as a `409 Conflict` over replication and turn `409`s back into `PartialStateConflictError`s on the worker making the request. All client events go through `EventCreationHandler.handle_new_client_event`, which is called in *a lot* of places. Instead of trying to update all the code which creates client events, we turn the `PartialStateConflictError` into a `429 Too Many Requests` in `EventCreationHandler.handle_new_client_event` and hope that clients take it as a hint to retry their request. On the federation event side, there are 7 places which compute event contexts. 4 of them use outlier event contexts: `FederationEventHandler._auth_and_persist_outliers_inner`, `FederationHandler.do_knock`, `FederationHandler.on_invite_request` and `FederationHandler.do_remotely_reject_invite`. These events won't have the partial state flag, so we do not need to do anything for then. The remaining 3 paths which create events are `FederationEventHandler.process_remote_join`, `FederationEventHandler.on_send_membership_event` and `FederationEventHandler._process_received_pdu`. We can't experience the race in `process_remote_join`, unless we're handling an additional join into a partial state room, which currently blocks, so we make no attempt to handle it correctly. `on_send_membership_event` is only called by `FederationServer._on_send_membership_event`, so we catch the `PartialStateConflictError` there and retry just once. `_process_received_pdu` is called by `on_receive_pdu` for incoming events and `_process_pulled_event` for backfill. The latter should never try to persist partial state events, so we ignore it. We catch the `PartialStateConflictError` in `on_receive_pdu` and retry just once. Refering to the graph of code paths in https://github.com/matrix-org/synapse/issues/12988#issuecomment-1156857648 may make the above make more sense. Signed-off-by: Sean Quah --- changelog.d/13100.misc | 1 + synapse/federation/federation_server.py | 18 ++++- synapse/handlers/federation.py | 39 +++++---- synapse/handlers/federation_event.py | 51 +++++++++--- synapse/handlers/message.py | 79 ++++++++++++------ synapse/replication/http/federation.py | 3 + synapse/replication/http/send_event.py | 3 + synapse/storage/controllers/persist_events.py | 12 +++ synapse/storage/databases/main/events.py | 80 ++++++++++++++++--- synapse/storage/databases/main/room.py | 22 +++-- 10 files changed, 234 insertions(+), 74 deletions(-) create mode 100644 changelog.d/13100.misc diff --git a/changelog.d/13100.misc b/changelog.d/13100.misc new file mode 100644 index 000000000..28f2fe034 --- /dev/null +++ b/changelog.d/13100.misc @@ -0,0 +1 @@ +Faster room joins: Handle race between persisting an event and un-partial stating a room. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3e1518f1f..5dfdc8674 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -67,6 +67,7 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, ReplicationGetQueryRestServlet, ) +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.lock import Lock from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_decoder, unwrapFirstError @@ -882,9 +883,20 @@ class FederationServer(FederationBase): logger.warning("%s", errmsg) raise SynapseError(403, errmsg, Codes.FORBIDDEN) - return await self._federation_event_handler.on_send_membership_event( - origin, event - ) + try: + return await self._federation_event_handler.on_send_membership_event( + origin, event + ) + except PartialStateConflictError: + # The room was un-partial stated while we were persisting the event. + # Try once more, with full state this time. + logger.info( + "Room %s was un-partial stated during `on_send_membership_event`, trying again.", + room_id, + ) + return await self._federation_event_handler.on_send_membership_event( + origin, event + ) async def on_event_auth( self, origin: str, room_id: str, event_id: str diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 34cc5ecd1..3c44b4bf8 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( FederationDeniedError, FederationError, HttpResponseException, + LimitExceededError, NotFoundError, RequestSendFailed, SynapseError, @@ -64,6 +65,7 @@ from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import JsonDict, StateMap, get_domain_from_id @@ -549,15 +551,29 @@ class FederationHandler: # https://github.com/matrix-org/synapse/issues/12998 await self.store.store_partial_state_room(room_id, ret.servers_in_room) - max_stream_id = await self._federation_event_handler.process_remote_join( - origin, - room_id, - auth_chain, - state, - event, - room_version_obj, - partial_state=ret.partial_state, - ) + try: + max_stream_id = ( + await self._federation_event_handler.process_remote_join( + origin, + room_id, + auth_chain, + state, + event, + room_version_obj, + partial_state=ret.partial_state, + ) + ) + except PartialStateConflictError as e: + # The homeserver was already in the room and it is no longer partial + # stated. We ought to be doing a local join instead. Turn the error into + # a 429, as a hint to the client to try again. + # TODO(faster_joins): `_should_perform_remote_join` suggests that we may + # do a remote join for restricted rooms even if we have full state. + logger.error( + "Room %s was un-partial stated while processing remote join.", + room_id, + ) + raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) if ret.partial_state: # Kick off the process of asynchronously fetching the state for this @@ -1567,11 +1583,6 @@ class FederationHandler: # we raced against more events arriving with partial state. Go round # the loop again. We've already logged a warning, so no need for more. - # TODO(faster_joins): there is still a race here, whereby incoming events which raced - # with us will fail to be persisted after the call to `clear_partial_state_room` due to - # having partial state. - # https://github.com/matrix-org/synapse/issues/12988 - # continue events = await self.store.get_events_as_list( diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 479d936dc..c74117c19 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -64,6 +64,7 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) from synapse.state import StateResolutionStore +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import ( @@ -275,7 +276,16 @@ class FederationEventHandler: affected=pdu.event_id, ) - await self._process_received_pdu(origin, pdu, state_ids=None) + try: + await self._process_received_pdu(origin, pdu, state_ids=None) + except PartialStateConflictError: + # The room was un-partial stated while we were processing the PDU. + # Try once more, with full state this time. + logger.info( + "Room %s was un-partial stated while processing the PDU, trying again.", + room_id, + ) + await self._process_received_pdu(origin, pdu, state_ids=None) async def on_send_membership_event( self, origin: str, event: EventBase @@ -306,6 +316,9 @@ class FederationEventHandler: Raises: SynapseError if the event is not accepted into the room + PartialStateConflictError if the room was un-partial stated in between + computing the state at the event and persisting it. The caller should + retry exactly once in this case. """ logger.debug( "on_send_membership_event: Got event: %s, signatures: %s", @@ -423,6 +436,8 @@ class FederationEventHandler: Raises: SynapseError if the response is in some way invalid. + PartialStateConflictError if the homeserver is already in the room and it + has been un-partial stated. """ create_event = None for e in state: @@ -1084,10 +1099,14 @@ class FederationEventHandler: state_ids: Normally None, but if we are handling a gap in the graph (ie, we are missing one or more prev_events), the resolved state at the - event + event. Must not be partial state. backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) + + PartialStateConflictError: if the room was un-partial stated in between + computing the state at the event and persisting it. The caller should retry + exactly once in this case. Will never be raised if `state_ids` is provided. """ logger.debug("Processing event: %s", event) assert not event.internal_metadata.outlier @@ -1933,6 +1952,9 @@ class FederationEventHandler: event: The event itself. context: The event context. backfilled: True if the event was backfilled. + + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # this method should not be called on outliers (those code paths call # persist_events_and_notify directly.) @@ -1985,6 +2007,10 @@ class FederationEventHandler: Returns: The stream ID after which all events have been persisted. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ if not event_and_contexts: return self._store.get_room_max_stream_ordering() @@ -1993,14 +2019,19 @@ class FederationEventHandler: if instance != self._instance_name: # Limit the number of events sent over replication. We choose 200 # here as that is what we default to in `max_request_body_size(..)` - for batch in batch_iter(event_and_contexts, 200): - result = await self._send_events( - instance_name=instance, - store=self._store, - room_id=room_id, - event_and_contexts=batch, - backfilled=backfilled, - ) + try: + for batch in batch_iter(event_and_contexts, 200): + result = await self._send_events( + instance_name=instance, + store=self._store, + room_id=room_id, + event_and_contexts=batch, + backfilled=backfilled, + ) + except SynapseError as e: + if e.code == HTTPStatus.CONFLICT: + raise PartialStateConflictError() + raise return result["max_stream_id"] else: assert self._storage_controllers.persistence diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c6b40a5b7..1980e37da 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -37,6 +37,7 @@ from synapse.api.errors import ( AuthError, Codes, ConsentNotGivenError, + LimitExceededError, NotFoundError, ShadowBanError, SynapseError, @@ -53,6 +54,7 @@ from synapse.handlers.directory import DirectoryHandler from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet +from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import ( @@ -1250,6 +1252,8 @@ class EventCreationHandler: Raises: ShadowBanError if the requester has been shadow-banned. + SynapseError(503) if attempting to persist a partial state event in + a room that has been un-partial stated. """ extra_users = extra_users or [] @@ -1300,24 +1304,35 @@ class EventCreationHandler: # We now persist the event (and update the cache in parallel, since we # don't want to block on it). - result, _ = await make_deferred_yieldable( - gather_results( - ( - run_in_background( - self._persist_event, - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - extra_users=extra_users, + try: + result, _ = await make_deferred_yieldable( + gather_results( + ( + run_in_background( + self._persist_event, + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + extra_users=extra_users, + ), + run_in_background( + self.cache_joined_hosts_for_event, event, context + ).addErrback( + log_failure, "cache_joined_hosts_for_event failed" + ), ), - run_in_background( - self.cache_joined_hosts_for_event, event, context - ).addErrback(log_failure, "cache_joined_hosts_for_event failed"), - ), - consumeErrors=True, + consumeErrors=True, + ) + ).addErrback(unwrapFirstError) + except PartialStateConflictError as e: + # The event context needs to be recomputed. + # Turn the error into a 429, as a hint to the client to try again. + logger.info( + "Room %s was un-partial stated while persisting client event.", + event.room_id, ) - ).addErrback(unwrapFirstError) + raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) return result @@ -1332,6 +1347,9 @@ class EventCreationHandler: """Actually persists the event. Should only be called by `handle_new_client_event`, and see its docstring for documentation of the arguments. + + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # Skip push notification actions for historical messages @@ -1348,16 +1366,21 @@ class EventCreationHandler: # If we're a worker we need to hit out to the master. writer_instance = self._events_shard_config.get_instance(event.room_id) if writer_instance != self._instance_name: - result = await self.send_event( - instance_name=writer_instance, - event_id=event.event_id, - store=self.store, - requester=requester, - event=event, - context=context, - ratelimit=ratelimit, - extra_users=extra_users, - ) + try: + result = await self.send_event( + instance_name=writer_instance, + event_id=event.event_id, + store=self.store, + requester=requester, + event=event, + context=context, + ratelimit=ratelimit, + extra_users=extra_users, + ) + except SynapseError as e: + if e.code == HTTPStatus.CONFLICT: + raise PartialStateConflictError() + raise stream_id = result["stream_id"] event_id = result["event_id"] if event_id != event.event_id: @@ -1485,6 +1508,10 @@ class EventCreationHandler: The persisted event. This may be different than the given event if it was de-duplicated (e.g. because we had already persisted an event with the same transaction ID.) + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ extra_users = extra_users or [] diff --git a/synapse/replication/http/federation.py b/synapse/replication/http/federation.py index eed29cd59..d3abafed2 100644 --- a/synapse/replication/http/federation.py +++ b/synapse/replication/http/federation.py @@ -60,6 +60,9 @@ class ReplicationFederationSendEventsRestServlet(ReplicationEndpoint): { "max_stream_id": 32443, } + + Responds with a 409 when a `PartialStateConflictError` is raised due to an event + context that needs to be recomputed due to the un-partial stating of a room. """ NAME = "fed_send_events" diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index c2b2588ea..486f04723 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -59,6 +59,9 @@ class ReplicationSendEventRestServlet(ReplicationEndpoint): { "stream_id": 12345, "event_id": "$abcdef..." } + Responds with a 409 when a `PartialStateConflictError` is raised due to an event + context that needs to be recomputed due to the un-partial stating of a room. + The returned event ID may not match the sent event if it was deduplicated. """ diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 4bcb99d06..c248fccc8 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -315,6 +315,10 @@ class EventsPersistenceStorageController: if they were deduplicated due to an event already existing that matched the transaction ID; the existing event is returned in such a case. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: @@ -363,6 +367,10 @@ class EventsPersistenceStorageController: latest persisted event. The returned event may not match the given event if it was deduplicated due to an existing event matching the transaction ID. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # add_to_queue returns a map from event ID to existing event ID if the # event was deduplicated. (The dict may also include other entries if @@ -453,6 +461,10 @@ class EventsPersistenceStorageController: Returns: A dictionary of event ID to event ID we didn't persist as we already had another event persisted with the same TXN ID. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ replaced_events: Dict[str, str] = {} if not events_and_contexts: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a3e12f1e9..8a0e4e958 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -16,6 +16,7 @@ import itertools import logging from collections import OrderedDict +from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -35,6 +36,7 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes +from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -69,6 +71,24 @@ event_counter = Counter( ) +class PartialStateConflictError(SynapseError): + """An internal error raised when attempting to persist an event with partial state + after the room containing the event has been un-partial stated. + + This error should be handled by recomputing the event context and trying again. + + This error has an HTTP status code so that it can be transported over replication. + It should not be exposed to clients. + """ + + def __init__(self) -> None: + super().__init__( + HTTPStatus.CONFLICT, + msg="Cannot persist partial state event in un-partial stated room", + errcode=Codes.UNKNOWN, + ) + + @attr.s(slots=True, auto_attribs=True) class DeltaState: """Deltas to use to update the `current_state_events` table. @@ -154,6 +174,10 @@ class PersistEventsStore: Returns: Resolves when the events have been persisted + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ # We want to calculate the stream orderings as late as possible, as @@ -354,6 +378,9 @@ class PersistEventsStore: For each room, a list of the event ids which are the forward extremities. + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ state_delta_for_room = state_delta_for_room or {} new_forward_extremities = new_forward_extremities or {} @@ -1304,6 +1331,10 @@ class PersistEventsStore: Returns: new list, without events which are already in the events table. + + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. """ txn.execute( "SELECT event_id, outlier FROM events WHERE event_id in (%s)" @@ -2215,6 +2246,11 @@ class PersistEventsStore: txn: LoggingTransaction, events_and_contexts: Collection[Tuple[EventBase, EventContext]], ) -> None: + """ + Raises: + PartialStateConflictError: if attempting to persist a partial state event in + a room that has been un-partial stated. + """ state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): @@ -2239,19 +2275,37 @@ class PersistEventsStore: # if we have partial state for these events, record the fact. (This happens # here rather than in _store_event_txn because it also needs to happen when # we de-outlier an event.) - self.db_pool.simple_insert_many_txn( - txn, - table="partial_state_events", - keys=("room_id", "event_id"), - values=[ - ( - event.room_id, - event.event_id, - ) - for event, ctx in events_and_contexts - if ctx.partial_state - ], - ) + try: + self.db_pool.simple_insert_many_txn( + txn, + table="partial_state_events", + keys=("room_id", "event_id"), + values=[ + ( + event.room_id, + event.event_id, + ) + for event, ctx in events_and_contexts + if ctx.partial_state + ], + ) + except self.db_pool.engine.module.IntegrityError: + logger.info( + "Cannot persist events %s in rooms %s: room has been un-partial stated", + [ + event.event_id + for event, ctx in events_and_contexts + if ctx.partial_state + ], + list( + { + event.room_id + for event, ctx in events_and_contexts + if ctx.partial_state + } + ), + ) + raise PartialStateConflictError() self.db_pool.simple_upsert_many_txn( txn, diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index d8026e3fa..13d6a1d5c 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1156,19 +1156,25 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): return room_servers async def clear_partial_state_room(self, room_id: str) -> bool: - # this can race with incoming events, so we watch out for FK errors. - # TODO(faster_joins): this still doesn't completely fix the race, since the persist process - # is not atomic. I fear we need an application-level lock. - # https://github.com/matrix-org/synapse/issues/12988 + """Clears the partial state flag for a room. + + Args: + room_id: The room whose partial state flag is to be cleared. + + Returns: + `True` if the partial state flag has been cleared successfully. + + `False` if the partial state flag could not be cleared because the room + still contains events with partial state. + """ try: await self.db_pool.runInteraction( "clear_partial_state_room", self._clear_partial_state_room_txn, room_id ) return True - except self.db_pool.engine.module.DatabaseError as e: - # TODO(faster_joins): how do we distinguish between FK errors and other errors? - # https://github.com/matrix-org/synapse/issues/12988 - logger.warning( + except self.db_pool.engine.module.IntegrityError as e: + # Assume that any `IntegrityError`s are due to partial state events. + logger.info( "Exception while clearing lazy partial-state-room %s, retrying: %s", room_id, e, From a0f51b059c2aa1bbe0a2d6991c369cba5cf43c0a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Jul 2022 12:09:19 +0100 Subject: [PATCH 32/54] Fix bug where we failed to delete old push actions (#13194) This happened if we encountered a stream ordering in `event_push_actions` that had more rows than the batch size of the delete, as If we don't delete any rows in an iteration then the next time round we get the exact same stream ordering and get stuck. --- changelog.d/13194.bugfix | 1 + synapse/storage/databases/main/event_push_actions.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13194.bugfix diff --git a/changelog.d/13194.bugfix b/changelog.d/13194.bugfix new file mode 100644 index 000000000..2c2e8bb21 --- /dev/null +++ b/changelog.d/13194.bugfix @@ -0,0 +1 @@ +Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 32536430a..a3edcbb39 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -1114,7 +1114,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn.execute( """ SELECT stream_ordering FROM event_push_actions - WHERE stream_ordering < ? AND highlight = 0 + WHERE stream_ordering <= ? AND highlight = 0 ORDER BY stream_ordering ASC LIMIT 1 OFFSET ? """, ( @@ -1129,10 +1129,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas else: stream_ordering = max_stream_ordering_to_delete + # We need to use a inclusive bound here to handle the case where a + # single stream ordering has more than `batch_size` rows. txn.execute( """ DELETE FROM event_push_actions - WHERE stream_ordering < ? AND highlight = 0 + WHERE stream_ordering <= ? AND highlight = 0 """, (stream_ordering,), ) From dcc7873700da4a818e84c44c6190525d39a854cb Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 6 Jul 2022 07:30:58 -0400 Subject: [PATCH 33/54] Add information on how the Synapse team does reviews. (#13132) --- changelog.d/13132.doc | 1 + docs/SUMMARY.md | 1 + docs/development/contributing_guide.md | 5 +++- docs/development/reviews.md | 41 ++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13132.doc create mode 100644 docs/development/reviews.md diff --git a/changelog.d/13132.doc b/changelog.d/13132.doc new file mode 100644 index 000000000..c57706929 --- /dev/null +++ b/changelog.d/13132.doc @@ -0,0 +1 @@ +Document how the Synapse team does reviews. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 3978f96fc..8d6030e34 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -81,6 +81,7 @@ # Development - [Contributing Guide](development/contributing_guide.md) - [Code Style](code_style.md) + - [Reviewing Code](development/reviews.md) - [Release Cycle](development/releases.md) - [Git Usage](development/git.md) - [Testing]() diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index 900369b80..ab320cbd7 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -351,7 +351,7 @@ To prepare a Pull Request, please: 3. `git push` your commit to your fork of Synapse; 4. on GitHub, [create the Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request); 5. add a [changelog entry](#changelog) and push it to your Pull Request; -6. for most contributors, that's all - however, if you are a member of the organization `matrix-org`, on GitHub, please request a review from `matrix.org / Synapse Core`. +6. that's it for now, a non-draft pull request will automatically request review from the team; 7. if you need to update your PR, please avoid rebasing and just add new commits to your branch. @@ -527,10 +527,13 @@ From this point, you should: 1. Look at the results of the CI pipeline. - If there is any error, fix the error. 2. If a developer has requested changes, make these changes and let us know if it is ready for a developer to review again. + - A pull request is a conversation, if you disagree with the suggestions, please respond and discuss it. 3. Create a new commit with the changes. - Please do NOT overwrite the history. New commits make the reviewer's life easier. - Push this commits to your Pull Request. 4. Back to 1. +5. Once the pull request is ready for review again please re-request review from whichever developer did your initial + review (or leave a comment in the pull request that you believe all required changes have been done). Once both the CI and the developers are happy, the patch will be merged into Synapse and released shortly! diff --git a/docs/development/reviews.md b/docs/development/reviews.md new file mode 100644 index 000000000..d0379949c --- /dev/null +++ b/docs/development/reviews.md @@ -0,0 +1,41 @@ +Some notes on how we do reviews +=============================== + +The Synapse team works off a shared review queue -- any new pull requests for +Synapse (or related projects) has a review requested from the entire team. Team +members should process this queue using the following rules: + +* Any high urgency pull requests (e.g. fixes for broken continuous integration + or fixes for release blockers); +* Follow-up reviews for pull requests which have previously received reviews; +* Any remaining pull requests. + +For the latter two categories above, older pull requests should be prioritised. + +It is explicit that there is no priority given to pull requests from the team +(vs from the community). If a pull request requires a quick turn around, please +explicitly communicate this via [#synapse-dev:matrix.org](https://matrix.to/#/#synapse-dev:matrix.org) +or as a comment on the pull request. + +Once an initial review has been completed and the author has made additional changes, +follow-up reviews should go back to the same reviewer. This helps build a shared +context and conversation between author and reviewer. + +As a team we aim to keep the number of inflight pull requests to a minimum to ensure +that ongoing work is finished before starting new work. + +Performing a review +------------------- + +To communicate to the rest of the team the status of each pull request, team +members should do the following: + +* Assign themselves to the pull request (they should be left assigned to the + pull request until it is merged, closed, or are no longer the reviewer); +* Review the pull request by leaving comments, questions, and suggestions; +* Mark the pull request appropriately (as needing changes or accepted). + +If you are unsure about a particular part of the pull request (or are not confident +in your understanding of part of the code) then ask questions or request review +from the team again. When requesting review from the team be sure to leave a comment +with the rationale on why you're putting it back in the queue. From 57f6f59e3eacac61038419639f234e1eb1f230ed Mon Sep 17 00:00:00 2001 From: David Teller Date: Thu, 7 Jul 2022 10:14:32 +0200 Subject: [PATCH 34/54] Make `_get_state_map_for_room` not break when room state events don't contain an event id. (#13174) Method `_get_state_map_for_room` seems to break in presence of some ill-formed events in the database. Reimplementing this method to use `get_current_state`, which is more robust to such events. --- changelog.d/13174.bugfix | 1 + synapse/events/third_party_rules.py | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) create mode 100644 changelog.d/13174.bugfix diff --git a/changelog.d/13174.bugfix b/changelog.d/13174.bugfix new file mode 100644 index 000000000..b17935b93 --- /dev/null +++ b/changelog.d/13174.bugfix @@ -0,0 +1 @@ +Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages. diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 35f3f3690..72ab69689 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -464,14 +464,7 @@ class ThirdPartyEventRules: Returns: A dict mapping (event type, state key) to state event. """ - state_ids = await self._storage_controllers.state.get_current_state_ids(room_id) - room_state_events = await self.store.get_events(state_ids.values()) - - state_events = {} - for key, event_id in state_ids.items(): - state_events[key] = room_state_events[event_id] - - return state_events + return await self._storage_controllers.state.get_current_state(room_id) async def on_profile_update( self, user_id: str, new_profile: ProfileInfo, by_admin: bool, deactivation: bool From fb7d24ab6de870ab21f83d49d9f1db569eff4b56 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 7 Jul 2022 11:08:04 +0100 Subject: [PATCH 35/54] Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. (#13195) --- changelog.d/13195.misc | 1 + docs/postgres.md | 8 +++++++ synapse/_scripts/synapse_port_db.py | 34 +++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 changelog.d/13195.misc diff --git a/changelog.d/13195.misc b/changelog.d/13195.misc new file mode 100644 index 000000000..5506f767b --- /dev/null +++ b/changelog.d/13195.misc @@ -0,0 +1 @@ +Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. \ No newline at end of file diff --git a/docs/postgres.md b/docs/postgres.md index cbc32e183..f2519f6b0 100644 --- a/docs/postgres.md +++ b/docs/postgres.md @@ -143,6 +143,14 @@ to do step 2. It is safe to at any time kill the port script and restart it. +However, under no circumstances should the SQLite database be `VACUUM`ed between +multiple runs of the script. Doing so can lead to an inconsistent copy of your database +into Postgres. +To avoid accidental error, the script will check that SQLite's `auto_vacuum` mechanism +is disabled, but the script is not able to protect against a manual `VACUUM` operation +performed either by the administrator or by any automated task that the administrator +may have configured. + Note that the database may take up significantly more (25% - 100% more) space on disk after porting to Postgres. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index d3b4887f6..642fd4162 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -621,6 +621,25 @@ class Porter: self.postgres_store.db_pool.updates.has_completed_background_updates() ) + @staticmethod + def _is_sqlite_autovacuum_enabled(txn: LoggingTransaction) -> bool: + """ + Returns true if auto_vacuum is enabled in SQLite. + https://www.sqlite.org/pragma.html#pragma_auto_vacuum + + Vacuuming changes the rowids on rows in the database. + Auto-vacuuming is therefore dangerous when used in conjunction with this script. + + Note that the auto_vacuum setting can't be changed without performing + a VACUUM after trying to change the pragma. + """ + txn.execute("PRAGMA auto_vacuum") + row = txn.fetchone() + assert row is not None, "`PRAGMA auto_vacuum` did not give a row." + (autovacuum_setting,) = row + # 0 means off. 1 means full. 2 means incremental. + return autovacuum_setting != 0 + async def run(self) -> None: """Ports the SQLite database to a PostgreSQL database. @@ -637,6 +656,21 @@ class Porter: allow_outdated_version=True, ) + # For safety, ensure auto_vacuums are disabled. + if await self.sqlite_store.db_pool.runInteraction( + "is_sqlite_autovacuum_enabled", self._is_sqlite_autovacuum_enabled + ): + end_error = ( + "auto_vacuum is enabled in the SQLite database." + " (This is not the default configuration.)\n" + " This script relies on rowids being consistent and must not" + " be used if the database could be vacuumed between re-runs.\n" + " To disable auto_vacuum, you need to stop Synapse and run the following SQL:\n" + " PRAGMA auto_vacuum=off;\n" + " VACUUM;" + ) + return + # Check if all background updates are done, abort if not. updates_complete = ( await self.sqlite_store.db_pool.updates.has_completed_background_updates() From 4aaeb87dad274e0f67a77917b6cec88b778425cc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 7 Jul 2022 10:56:52 +0000 Subject: [PATCH 36/54] Bump lxml from 4.8.0 to 4.9.1 (#13207) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: David Robertson --- changelog.d/13207.docker | 1 + poetry.lock | 133 +++++++++++++++++++++------------------ 2 files changed, 72 insertions(+), 62 deletions(-) create mode 100644 changelog.d/13207.docker diff --git a/changelog.d/13207.docker b/changelog.d/13207.docker new file mode 100644 index 000000000..63ba5c803 --- /dev/null +++ b/changelog.d/13207.docker @@ -0,0 +1 @@ +Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1. diff --git a/poetry.lock b/poetry.lock index f069f692d..b7c0a6869 100644 --- a/poetry.lock +++ b/poetry.lock @@ -502,7 +502,7 @@ pyasn1 = ">=0.4.6" [[package]] name = "lxml" -version = "4.8.0" +version = "4.9.1" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." category = "main" optional = true @@ -1937,67 +1937,76 @@ ldap3 = [ {file = "ldap3-2.9.1.tar.gz", hash = "sha256:f3e7fc4718e3f09dda568b57100095e0ce58633bcabbed8667ce3f8fbaa4229f"}, ] lxml = [ - {file = "lxml-4.8.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:e1ab2fac607842ac36864e358c42feb0960ae62c34aa4caaf12ada0a1fb5d99b"}, - {file = "lxml-4.8.0-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28d1af847786f68bec57961f31221125c29d6f52d9187c01cd34dc14e2b29430"}, - {file = "lxml-4.8.0-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b92d40121dcbd74831b690a75533da703750f7041b4bf951befc657c37e5695a"}, - {file = "lxml-4.8.0-cp27-cp27m-win32.whl", hash = "sha256:e01f9531ba5420838c801c21c1b0f45dbc9607cb22ea2cf132844453bec863a5"}, - {file = "lxml-4.8.0-cp27-cp27m-win_amd64.whl", hash = "sha256:6259b511b0f2527e6d55ad87acc1c07b3cbffc3d5e050d7e7bcfa151b8202df9"}, - {file = "lxml-4.8.0-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1010042bfcac2b2dc6098260a2ed022968dbdfaf285fc65a3acf8e4eb1ffd1bc"}, - {file = "lxml-4.8.0-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fa56bb08b3dd8eac3a8c5b7d075c94e74f755fd9d8a04543ae8d37b1612dd170"}, - {file = "lxml-4.8.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:31ba2cbc64516dcdd6c24418daa7abff989ddf3ba6d3ea6f6ce6f2ed6e754ec9"}, - {file = "lxml-4.8.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:31499847fc5f73ee17dbe1b8e24c6dafc4e8d5b48803d17d22988976b0171f03"}, - {file = "lxml-4.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:5f7d7d9afc7b293147e2d506a4596641d60181a35279ef3aa5778d0d9d9123fe"}, - {file = "lxml-4.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a3c5f1a719aa11866ffc530d54ad965063a8cbbecae6515acbd5f0fae8f48eaa"}, - {file = "lxml-4.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6268e27873a3d191849204d00d03f65c0e343b3bcb518a6eaae05677c95621d1"}, - {file = "lxml-4.8.0-cp310-cp310-win32.whl", hash = "sha256:330bff92c26d4aee79c5bc4d9967858bdbe73fdbdbacb5daf623a03a914fe05b"}, - {file = "lxml-4.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:b2582b238e1658c4061ebe1b4df53c435190d22457642377fd0cb30685cdfb76"}, - {file = "lxml-4.8.0-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a2bfc7e2a0601b475477c954bf167dee6d0f55cb167e3f3e7cefad906e7759f6"}, - {file = "lxml-4.8.0-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a1547ff4b8a833511eeaceacbcd17b043214fcdb385148f9c1bc5556ca9623e2"}, - {file = "lxml-4.8.0-cp35-cp35m-win32.whl", hash = "sha256:a9f1c3489736ff8e1c7652e9dc39f80cff820f23624f23d9eab6e122ac99b150"}, - {file = "lxml-4.8.0-cp35-cp35m-win_amd64.whl", hash = "sha256:530f278849031b0eb12f46cca0e5db01cfe5177ab13bd6878c6e739319bae654"}, - {file = "lxml-4.8.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:078306d19a33920004addeb5f4630781aaeabb6a8d01398045fcde085091a169"}, - {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:86545e351e879d0b72b620db6a3b96346921fa87b3d366d6c074e5a9a0b8dadb"}, - {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24f5c5ae618395ed871b3d8ebfcbb36e3f1091fd847bf54c4de623f9107942f3"}, - {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:bbab6faf6568484707acc052f4dfc3802bdb0cafe079383fbaa23f1cdae9ecd4"}, - {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7993232bd4044392c47779a3c7e8889fea6883be46281d45a81451acfd704d7e"}, - {file = "lxml-4.8.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d6483b1229470e1d8835e52e0ff3c6973b9b97b24cd1c116dca90b57a2cc613"}, - {file = "lxml-4.8.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:ad4332a532e2d5acb231a2e5d33f943750091ee435daffca3fec0a53224e7e33"}, - {file = "lxml-4.8.0-cp36-cp36m-win32.whl", hash = "sha256:db3535733f59e5605a88a706824dfcb9bd06725e709ecb017e165fc1d6e7d429"}, - {file = "lxml-4.8.0-cp36-cp36m-win_amd64.whl", hash = "sha256:5f148b0c6133fb928503cfcdfdba395010f997aa44bcf6474fcdd0c5398d9b63"}, - {file = "lxml-4.8.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:8a31f24e2a0b6317f33aafbb2f0895c0bce772980ae60c2c640d82caac49628a"}, - {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:719544565c2937c21a6f76d520e6e52b726d132815adb3447ccffbe9f44203c4"}, - {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:c0b88ed1ae66777a798dc54f627e32d3b81c8009967c63993c450ee4cbcbec15"}, - {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fa9b7c450be85bfc6cd39f6df8c5b8cbd76b5d6fc1f69efec80203f9894b885f"}, - {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e9f84ed9f4d50b74fbc77298ee5c870f67cb7e91dcdc1a6915cb1ff6a317476c"}, - {file = "lxml-4.8.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1d650812b52d98679ed6c6b3b55cbb8fe5a5460a0aef29aeb08dc0b44577df85"}, - {file = "lxml-4.8.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:80bbaddf2baab7e6de4bc47405e34948e694a9efe0861c61cdc23aa774fcb141"}, - {file = "lxml-4.8.0-cp37-cp37m-win32.whl", hash = "sha256:6f7b82934c08e28a2d537d870293236b1000d94d0b4583825ab9649aef7ddf63"}, - {file = "lxml-4.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e1fd7d2fe11f1cb63d3336d147c852f6d07de0d0020d704c6031b46a30b02ca8"}, - {file = "lxml-4.8.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:5045ee1ccd45a89c4daec1160217d363fcd23811e26734688007c26f28c9e9e7"}, - {file = "lxml-4.8.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0c1978ff1fd81ed9dcbba4f91cf09faf1f8082c9d72eb122e92294716c605428"}, - {file = "lxml-4.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cbf2ff155b19dc4d4100f7442f6a697938bf4493f8d3b0c51d45568d5666b5"}, - {file = "lxml-4.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ce13d6291a5f47c1c8dbd375baa78551053bc6b5e5c0e9bb8e39c0a8359fd52f"}, - {file = "lxml-4.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11527dc23d5ef44d76fef11213215c34f36af1608074561fcc561d983aeb870"}, - {file = "lxml-4.8.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:60d2f60bd5a2a979df28ab309352cdcf8181bda0cca4529769a945f09aba06f9"}, - {file = "lxml-4.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:62f93eac69ec0f4be98d1b96f4d6b964855b8255c345c17ff12c20b93f247b68"}, - {file = "lxml-4.8.0-cp38-cp38-win32.whl", hash = "sha256:20b8a746a026017acf07da39fdb10aa80ad9877046c9182442bf80c84a1c4696"}, - {file = "lxml-4.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:891dc8f522d7059ff0024cd3ae79fd224752676447f9c678f2a5c14b84d9a939"}, - {file = "lxml-4.8.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b6fc2e2fb6f532cf48b5fed57567ef286addcef38c28874458a41b7837a57807"}, - {file = "lxml-4.8.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:74eb65ec61e3c7c019d7169387d1b6ffcfea1b9ec5894d116a9a903636e4a0b1"}, - {file = "lxml-4.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:627e79894770783c129cc5e89b947e52aa26e8e0557c7e205368a809da4b7939"}, - {file = "lxml-4.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:545bd39c9481f2e3f2727c78c169425efbfb3fbba6e7db4f46a80ebb249819ca"}, - {file = "lxml-4.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5a58d0b12f5053e270510bf12f753a76aaf3d74c453c00942ed7d2c804ca845c"}, - {file = "lxml-4.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ec4b4e75fc68da9dc0ed73dcdb431c25c57775383fec325d23a770a64e7ebc87"}, - {file = "lxml-4.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5804e04feb4e61babf3911c2a974a5b86f66ee227cc5006230b00ac6d285b3a9"}, - {file = "lxml-4.8.0-cp39-cp39-win32.whl", hash = "sha256:aa0cf4922da7a3c905d000b35065df6184c0dc1d866dd3b86fd961905bbad2ea"}, - {file = "lxml-4.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:dd10383f1d6b7edf247d0960a3db274c07e96cf3a3fc7c41c8448f93eac3fb1c"}, - {file = "lxml-4.8.0-pp37-pypy37_pp73-macosx_10_14_x86_64.whl", hash = "sha256:2403a6d6fb61c285969b71f4a3527873fe93fd0abe0832d858a17fe68c8fa507"}, - {file = "lxml-4.8.0-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:986b7a96228c9b4942ec420eff37556c5777bfba6758edcb95421e4a614b57f9"}, - {file = "lxml-4.8.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6fe4ef4402df0250b75ba876c3795510d782def5c1e63890bde02d622570d39e"}, - {file = "lxml-4.8.0-pp38-pypy38_pp73-macosx_10_14_x86_64.whl", hash = "sha256:f10ce66fcdeb3543df51d423ede7e238be98412232fca5daec3e54bcd16b8da0"}, - {file = "lxml-4.8.0-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:730766072fd5dcb219dd2b95c4c49752a54f00157f322bc6d71f7d2a31fecd79"}, - {file = "lxml-4.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:8b99ec73073b37f9ebe8caf399001848fced9c08064effdbfc4da2b5a8d07b93"}, - {file = "lxml-4.8.0.tar.gz", hash = "sha256:f63f62fc60e6228a4ca9abae28228f35e1bd3ce675013d1dfb828688d50c6e23"}, + {file = "lxml-4.9.1-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed"}, + {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc"}, + {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc"}, + {file = "lxml-4.9.1-cp27-cp27m-win32.whl", hash = "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3"}, + {file = "lxml-4.9.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627"}, + {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84"}, + {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8"}, + {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8"}, + {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d"}, + {file = "lxml-4.9.1-cp310-cp310-win32.whl", hash = "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7"}, + {file = "lxml-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b"}, + {file = "lxml-4.9.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d"}, + {file = "lxml-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3"}, + {file = "lxml-4.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29"}, + {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d"}, + {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318"}, + {file = "lxml-4.9.1-cp35-cp35m-win32.whl", hash = "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7"}, + {file = "lxml-4.9.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4"}, + {file = "lxml-4.9.1-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf"}, + {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3"}, + {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391"}, + {file = "lxml-4.9.1-cp36-cp36m-win32.whl", hash = "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e"}, + {file = "lxml-4.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7"}, + {file = "lxml-4.9.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca"}, + {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785"}, + {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785"}, + {file = "lxml-4.9.1-cp37-cp37m-win32.whl", hash = "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a"}, + {file = "lxml-4.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e"}, + {file = "lxml-4.9.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715"}, + {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036"}, + {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387"}, + {file = "lxml-4.9.1-cp38-cp38-win32.whl", hash = "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94"}, + {file = "lxml-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345"}, + {file = "lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000"}, + {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25"}, + {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd"}, + {file = "lxml-4.9.1-cp39-cp39-win32.whl", hash = "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb"}, + {file = "lxml-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73"}, + {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c"}, + {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9"}, + {file = "lxml-4.9.1.tar.gz", hash = "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f"}, ] markupsafe = [ {file = "MarkupSafe-2.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3028252424c72b2602a323f70fbf50aa80a5d3aa616ea6add4ba21ae9cc9da4c"}, From 2b5ab8e3674b7d6003a5f17252c7933c2d6a381a Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Thu, 7 Jul 2022 12:02:09 +0100 Subject: [PATCH 37/54] Use a single query in `ProfileHandler.get_profile` (#13209) --- changelog.d/13209.misc | 1 + synapse/handlers/profile.py | 19 +++++++------------ 2 files changed, 8 insertions(+), 12 deletions(-) create mode 100644 changelog.d/13209.misc diff --git a/changelog.d/13209.misc b/changelog.d/13209.misc new file mode 100644 index 000000000..cb0b8b4e6 --- /dev/null +++ b/changelog.d/13209.misc @@ -0,0 +1 @@ +Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 6eed3826a..d8ff5289b 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -67,19 +67,14 @@ class ProfileHandler: target_user = UserID.from_string(user_id) if self.hs.is_mine(target_user): - try: - displayname = await self.store.get_profile_displayname( - target_user.localpart - ) - avatar_url = await self.store.get_profile_avatar_url( - target_user.localpart - ) - except StoreError as e: - if e.code == 404: - raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND) - raise + profileinfo = await self.store.get_profileinfo(target_user.localpart) + if profileinfo.display_name is None: + raise SynapseError(404, "Profile was not found", Codes.NOT_FOUND) - return {"displayname": displayname, "avatar_url": avatar_url} + return { + "displayname": profileinfo.display_name, + "avatar_url": profileinfo.avatar_url, + } else: try: result = await self.federation.make_query( From 1391a76cd2b287daebe61f7d8ea03b258ed522f5 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Thu, 7 Jul 2022 13:19:31 +0100 Subject: [PATCH 38/54] Faster room joins: fix race in recalculation of current room state (#13151) Bounce recalculation of current state to the correct event persister and move recalculation of current state into the event persistence queue, to avoid concurrent updates to a room's current state. Also give recalculation of a room's current state a real stream ordering. Signed-off-by: Sean Quah --- changelog.d/13151.misc | 1 + synapse/handlers/federation.py | 9 +- synapse/replication/http/__init__.py | 2 + synapse/replication/http/state.py | 75 ++++++++++ synapse/state/__init__.py | 25 ++++ synapse/storage/controllers/persist_events.py | 141 +++++++++++++----- synapse/storage/databases/main/events.py | 14 +- tests/test_state.py | 2 + 8 files changed, 214 insertions(+), 55 deletions(-) create mode 100644 changelog.d/13151.misc create mode 100644 synapse/replication/http/state.py diff --git a/changelog.d/13151.misc b/changelog.d/13151.misc new file mode 100644 index 000000000..cfe3eed3a --- /dev/null +++ b/changelog.d/13151.misc @@ -0,0 +1 @@ +Faster room joins: fix race in recalculation of current room state. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3c44b4bf8..e2564e934 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1559,14 +1559,9 @@ class FederationHandler: # all the events are updated, so we can update current state and # clear the lazy-loading flag. logger.info("Updating current state for %s", room_id) - # TODO(faster_joins): support workers + # TODO(faster_joins): notify workers in notify_room_un_partial_stated # https://github.com/matrix-org/synapse/issues/12994 - assert ( - self._storage_controllers.persistence is not None - ), "worker-mode deployments not currently supported here" - await self._storage_controllers.persistence.update_current_state( - room_id - ) + await self.state_handler.update_current_state(room_id) logger.info("Clearing partial-state flag for %s", room_id) success = await self.store.clear_partial_state_room(room_id) diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index aec040ee1..53aa7fa4c 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -25,6 +25,7 @@ from synapse.replication.http import ( push, register, send_event, + state, streams, ) @@ -48,6 +49,7 @@ class ReplicationRestResource(JsonResource): streams.register_servlets(hs, self) account_data.register_servlets(hs, self) push.register_servlets(hs, self) + state.register_servlets(hs, self) # The following can't currently be instantiated on workers. if hs.config.worker.worker_app is None: diff --git a/synapse/replication/http/state.py b/synapse/replication/http/state.py new file mode 100644 index 000000000..838b7584e --- /dev/null +++ b/synapse/replication/http/state.py @@ -0,0 +1,75 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING, Tuple + +from twisted.web.server import Request + +from synapse.api.errors import SynapseError +from synapse.http.server import HttpServer +from synapse.replication.http._base import ReplicationEndpoint +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class ReplicationUpdateCurrentStateRestServlet(ReplicationEndpoint): + """Recalculates the current state for a room, and persists it. + + The API looks like: + + POST /_synapse/replication/update_current_state/:room_id + + {} + + 200 OK + + {} + """ + + NAME = "update_current_state" + PATH_ARGS = ("room_id",) + + def __init__(self, hs: "HomeServer"): + super().__init__(hs) + + self._state_handler = hs.get_state_handler() + self._events_shard_config = hs.config.worker.events_shard_config + self._instance_name = hs.get_instance_name() + + @staticmethod + async def _serialize_payload(room_id: str) -> JsonDict: # type: ignore[override] + return {} + + async def _handle_request( # type: ignore[override] + self, request: Request, room_id: str + ) -> Tuple[int, JsonDict]: + writer_instance = self._events_shard_config.get_instance(room_id) + if writer_instance != self._instance_name: + raise SynapseError( + 400, "/update_current_state request was routed to the wrong worker" + ) + + await self._state_handler.update_current_state(room_id) + + return 200, {} + + +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: + if hs.get_instance_name() in hs.config.worker.writers.events: + ReplicationUpdateCurrentStateRestServlet(hs).register(http_server) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index d5cbdb3ee..781d9f06d 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -43,6 +43,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersio from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.logging.context import ContextResourceUsage +from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.roommember import ProfileInfo @@ -129,6 +130,12 @@ class StateHandler: self.hs = hs self._state_resolution_handler = hs.get_state_resolution_handler() self._storage_controllers = hs.get_storage_controllers() + self._events_shard_config = hs.config.worker.events_shard_config + self._instance_name = hs.get_instance_name() + + self._update_current_state_client = ( + ReplicationUpdateCurrentStateRestServlet.make_client(hs) + ) async def get_current_state_ids( self, @@ -423,6 +430,24 @@ class StateHandler: return {key: state_map[ev_id] for key, ev_id in new_state.items()} + async def update_current_state(self, room_id: str) -> None: + """Recalculates the current state for a room, and persists it. + + Raises: + SynapseError(502): if all attempts to connect to the event persister worker + fail + """ + writer_instance = self._events_shard_config.get_instance(room_id) + if writer_instance != self._instance_name: + await self._update_current_state_client( + instance_name=writer_instance, + room_id=room_id, + ) + return + + assert self._storage_controllers.persistence is not None + await self._storage_controllers.persistence.update_current_state(room_id) + @attr.s(slots=True, auto_attribs=True) class _StateResMetrics: diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index c248fccc8..ea499ce0f 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -22,6 +22,7 @@ from typing import ( Any, Awaitable, Callable, + ClassVar, Collection, Deque, Dict, @@ -33,6 +34,7 @@ from typing import ( Set, Tuple, TypeVar, + Union, ) import attr @@ -111,9 +113,43 @@ times_pruned_extremities = Counter( @attr.s(auto_attribs=True, slots=True) -class _EventPersistQueueItem: +class _PersistEventsTask: + """A batch of events to persist.""" + + name: ClassVar[str] = "persist_event_batch" # used for opentracing + events_and_contexts: List[Tuple[EventBase, EventContext]] backfilled: bool + + def try_merge(self, task: "_EventPersistQueueTask") -> bool: + """Batches events with the same backfilled option together.""" + if ( + not isinstance(task, _PersistEventsTask) + or self.backfilled != task.backfilled + ): + return False + + self.events_and_contexts.extend(task.events_and_contexts) + return True + + +@attr.s(auto_attribs=True, slots=True) +class _UpdateCurrentStateTask: + """A room whose current state needs recalculating.""" + + name: ClassVar[str] = "update_current_state" # used for opentracing + + def try_merge(self, task: "_EventPersistQueueTask") -> bool: + """Deduplicates consecutive recalculations of current state.""" + return isinstance(task, _UpdateCurrentStateTask) + + +_EventPersistQueueTask = Union[_PersistEventsTask, _UpdateCurrentStateTask] + + +@attr.s(auto_attribs=True, slots=True) +class _EventPersistQueueItem: + task: _EventPersistQueueTask deferred: ObservableDeferred parent_opentracing_span_contexts: List = attr.ib(factory=list) @@ -127,14 +163,16 @@ _PersistResult = TypeVar("_PersistResult") class _EventPeristenceQueue(Generic[_PersistResult]): - """Queues up events so that they can be persisted in bulk with only one - concurrent transaction per room. + """Queues up tasks so that they can be processed with only one concurrent + transaction per room. + + Tasks can be bulk persistence of events or recalculation of a room's current state. """ def __init__( self, per_item_callback: Callable[ - [List[Tuple[EventBase, EventContext]], bool], + [str, _EventPersistQueueTask], Awaitable[_PersistResult], ], ): @@ -150,18 +188,17 @@ class _EventPeristenceQueue(Generic[_PersistResult]): async def add_to_queue( self, room_id: str, - events_and_contexts: Iterable[Tuple[EventBase, EventContext]], - backfilled: bool, + task: _EventPersistQueueTask, ) -> _PersistResult: - """Add events to the queue, with the given persist_event options. + """Add a task to the queue. - If we are not already processing events in this room, starts off a background + If we are not already processing tasks in this room, starts off a background process to to so, calling the per_item_callback for each item. Args: room_id (str): - events_and_contexts (list[(EventBase, EventContext)]): - backfilled (bool): + task (_EventPersistQueueTask): A _PersistEventsTask or + _UpdateCurrentStateTask to process. Returns: the result returned by the `_per_item_callback` passed to @@ -169,26 +206,20 @@ class _EventPeristenceQueue(Generic[_PersistResult]): """ queue = self._event_persist_queues.setdefault(room_id, deque()) - # if the last item in the queue has the same `backfilled` setting, - # we can just add these new events to that item. - if queue and queue[-1].backfilled == backfilled: + if queue and queue[-1].task.try_merge(task): + # the new task has been merged into the last task in the queue end_item = queue[-1] else: - # need to make a new queue item deferred: ObservableDeferred[_PersistResult] = ObservableDeferred( defer.Deferred(), consumeErrors=True ) end_item = _EventPersistQueueItem( - events_and_contexts=[], - backfilled=backfilled, + task=task, deferred=deferred, ) queue.append(end_item) - # add our events to the queue item - end_item.events_and_contexts.extend(events_and_contexts) - # also add our active opentracing span to the item so that we get a link back span = opentracing.active_span() if span: @@ -202,7 +233,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): # add another opentracing span which links to the persist trace. with opentracing.start_active_span_follows_from( - "persist_event_batch_complete", (end_item.opentracing_span_context,) + f"{task.name}_complete", (end_item.opentracing_span_context,) ): pass @@ -234,16 +265,14 @@ class _EventPeristenceQueue(Generic[_PersistResult]): for item in queue: try: with opentracing.start_active_span_follows_from( - "persist_event_batch", + item.task.name, item.parent_opentracing_span_contexts, inherit_force_tracing=True, ) as scope: if scope: item.opentracing_span_context = scope.span.context - ret = await self._per_item_callback( - item.events_and_contexts, item.backfilled - ) + ret = await self._per_item_callback(room_id, item.task) except Exception: with PreserveLoggingContext(): item.deferred.errback() @@ -292,9 +321,32 @@ class EventsPersistenceStorageController: self._clock = hs.get_clock() self._instance_name = hs.get_instance_name() self.is_mine_id = hs.is_mine_id - self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch) + self._event_persist_queue = _EventPeristenceQueue( + self._process_event_persist_queue_task + ) self._state_resolution_handler = hs.get_state_resolution_handler() + async def _process_event_persist_queue_task( + self, + room_id: str, + task: _EventPersistQueueTask, + ) -> Dict[str, str]: + """Callback for the _event_persist_queue + + Returns: + A dictionary of event ID to event ID we didn't persist as we already + had another event persisted with the same TXN ID. + """ + if isinstance(task, _PersistEventsTask): + return await self._persist_event_batch(room_id, task) + elif isinstance(task, _UpdateCurrentStateTask): + await self._update_current_state(room_id, task) + return {} + else: + raise AssertionError( + f"Found an unexpected task type in event persistence queue: {task}" + ) + @opentracing.trace async def persist_events( self, @@ -329,7 +381,8 @@ class EventsPersistenceStorageController: ) -> Dict[str, str]: room_id, evs_ctxs = item return await self._event_persist_queue.add_to_queue( - room_id, evs_ctxs, backfilled=backfilled + room_id, + _PersistEventsTask(events_and_contexts=evs_ctxs, backfilled=backfilled), ) ret_vals = await yieldable_gather_results(enqueue, partitioned.items()) @@ -376,7 +429,10 @@ class EventsPersistenceStorageController: # event was deduplicated. (The dict may also include other entries if # the event was persisted in a batch with other events.) replaced_events = await self._event_persist_queue.add_to_queue( - event.room_id, [(event, context)], backfilled=backfilled + event.room_id, + _PersistEventsTask( + events_and_contexts=[(event, context)], backfilled=backfilled + ), ) replaced_event = replaced_events.get(event.event_id) if replaced_event: @@ -391,20 +447,22 @@ class EventsPersistenceStorageController: async def update_current_state(self, room_id: str) -> None: """Recalculate the current state for a room, and persist it""" + await self._event_persist_queue.add_to_queue( + room_id, + _UpdateCurrentStateTask(), + ) + + async def _update_current_state( + self, room_id: str, _task: _UpdateCurrentStateTask + ) -> None: + """Callback for the _event_persist_queue + + Recalculates the current state for a room, and persists it. + """ state = await self._calculate_current_state(room_id) delta = await self._calculate_state_delta(room_id, state) - # TODO(faster_joins): get a real stream ordering, to make this work correctly - # across workers. - # https://github.com/matrix-org/synapse/issues/12994 - # - # TODO(faster_joins): this can race against event persistence, in which case we - # will end up with incorrect state. Perhaps we should make this a job we - # farm out to the event persister thread, somehow. - # https://github.com/matrix-org/synapse/issues/13007 - # - stream_id = self.main_store.get_room_max_stream_ordering() - await self.persist_events_store.update_current_state(room_id, delta, stream_id) + await self.persist_events_store.update_current_state(room_id, delta) async def _calculate_current_state(self, room_id: str) -> StateMap[str]: """Calculate the current state of a room, based on the forward extremities @@ -449,9 +507,7 @@ class EventsPersistenceStorageController: return res.state async def _persist_event_batch( - self, - events_and_contexts: List[Tuple[EventBase, EventContext]], - backfilled: bool = False, + self, _room_id: str, task: _PersistEventsTask ) -> Dict[str, str]: """Callback for the _event_persist_queue @@ -466,6 +522,9 @@ class EventsPersistenceStorageController: PartialStateConflictError: if attempting to persist a partial state event in a room that has been un-partial stated. """ + events_and_contexts = task.events_and_contexts + backfilled = task.backfilled + replaced_events: Dict[str, str] = {} if not events_and_contexts: return replaced_events diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8a0e4e958..2ff3d2130 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1007,16 +1007,16 @@ class PersistEventsStore: self, room_id: str, state_delta: DeltaState, - stream_id: int, ) -> None: """Update the current state stored in the datatabase for the given room""" - await self.db_pool.runInteraction( - "update_current_state", - self._update_current_state_txn, - state_delta_by_room={room_id: state_delta}, - stream_id=stream_id, - ) + async with self._stream_id_gen.get_next() as stream_ordering: + await self.db_pool.runInteraction( + "update_current_state", + self._update_current_state_txn, + state_delta_by_room={room_id: state_delta}, + stream_id=stream_ordering, + ) def _update_current_state_txn( self, diff --git a/tests/test_state.py b/tests/test_state.py index 7b3f52f68..6ca8d8f21 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -195,6 +195,8 @@ class StateTestCase(unittest.TestCase): "get_state_resolution_handler", "get_account_validity_handler", "get_macaroon_generator", + "get_instance_name", + "get_simple_http_client", "hostname", ] ) From bb20113c8f04d574dd40becf57bf291e350ea8f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Thu, 7 Jul 2022 14:47:26 +0200 Subject: [PATCH 39/54] Remove obsolete RoomEventsStoreTestCase (#13200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All tests are prefixed with `STALE_` and therefore they are silently skipped. They were moved to `STALE_` in version `v0.5.0` in commit 2fcce3b3c508 - `Remove stale tests`. Tests from `RoomEventsStoreTestCase` class are not used for last 8 years, I believe the best would be to remove them entirely. Signed-off-by: Petr Vaněk --- changelog.d/13200.removal | 1 + tests/storage/test_room.py | 69 -------------------------------------- 2 files changed, 1 insertion(+), 69 deletions(-) create mode 100644 changelog.d/13200.removal diff --git a/changelog.d/13200.removal b/changelog.d/13200.removal new file mode 100644 index 000000000..755f5eb19 --- /dev/null +++ b/changelog.d/13200.removal @@ -0,0 +1 @@ +Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar. diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py index 3c79dabc9..3405efb6a 100644 --- a/tests/storage/test_room.py +++ b/tests/storage/test_room.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.api.constants import EventTypes from synapse.api.room_versions import RoomVersions from synapse.types import RoomAlias, RoomID, UserID @@ -65,71 +64,3 @@ class RoomStoreTestCase(HomeserverTestCase): self.assertIsNone( (self.get_success(self.store.get_room_with_stats("!uknown:test"))), ) - - -class RoomEventsStoreTestCase(HomeserverTestCase): - def prepare(self, reactor, clock, hs): - # Room events need the full datastore, for persist_event() and - # get_room_state() - self.store = hs.get_datastores().main - self._storage_controllers = hs.get_storage_controllers() - self.event_factory = hs.get_event_factory() - - self.room = RoomID.from_string("!abcde:test") - - self.get_success( - self.store.store_room( - self.room.to_string(), - room_creator_user_id="@creator:text", - is_public=True, - room_version=RoomVersions.V1, - ) - ) - - def inject_room_event(self, **kwargs): - self.get_success( - self._storage_controllers.persistence.persist_event( - self.event_factory.create_event(room_id=self.room.to_string(), **kwargs) - ) - ) - - def STALE_test_room_name(self): - name = "A-Room-Name" - - self.inject_room_event( - etype=EventTypes.Name, name=name, content={"name": name}, depth=1 - ) - - state = self.get_success( - self._storage_controllers.state.get_current_state( - room_id=self.room.to_string() - ) - ) - - self.assertEqual(1, len(state)) - self.assertObjectHasAttributes( - {"type": "m.room.name", "room_id": self.room.to_string(), "name": name}, - state[0], - ) - - def STALE_test_room_topic(self): - topic = "A place for things" - - self.inject_room_event( - etype=EventTypes.Topic, topic=topic, content={"topic": topic}, depth=1 - ) - - state = self.get_success( - self._storage_controllers.state.get_current_state( - room_id=self.room.to_string() - ) - ) - - self.assertEqual(1, len(state)) - self.assertObjectHasAttributes( - {"type": "m.room.topic", "room_id": self.room.to_string(), "topic": topic}, - state[0], - ) - - # Not testing the various 'level' methods for now because there's lots - # of them and need coalescing; see JIRA SPEC-11 From 0c95313a448ab38629a13443ea9b3e0e5cc65d39 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 7 Jul 2022 15:18:38 +0100 Subject: [PATCH 40/54] Add --build-only option to complement.sh to prevent actually running Complement. (#13158) --- changelog.d/13158.misc | 1 + scripts-dev/complement.sh | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13158.misc diff --git a/changelog.d/13158.misc b/changelog.d/13158.misc new file mode 100644 index 000000000..1cb77c02d --- /dev/null +++ b/changelog.d/13158.misc @@ -0,0 +1 @@ +Add support to `complement.sh` for skipping the docker build. diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 705243ca9..6381f7092 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -44,8 +44,14 @@ usage() { Usage: $0 [-f] ... Run the complement test suite on Synapse. - -f Skip rebuilding the docker images, and just use the most recent - 'complement-synapse:latest' image + -f, --fast + Skip rebuilding the docker images, and just use the most recent + 'complement-synapse:latest' image. + Conflicts with --build-only. + + --build-only + Only build the Docker images. Don't actually run Complement. + Conflicts with -f/--fast. For help on arguments to 'go test', run 'go help testflag'. EOF @@ -53,6 +59,7 @@ EOF # parse our arguments skip_docker_build="" +skip_complement_run="" while [ $# -ge 1 ]; do arg=$1 case "$arg" in @@ -60,9 +67,12 @@ while [ $# -ge 1 ]; do usage exit 1 ;; - "-f") + "-f"|"--fast") skip_docker_build=1 ;; + "--build-only") + skip_complement_run=1 + ;; *) # unknown arg: presumably an argument to gotest. break the loop. break @@ -106,6 +116,11 @@ if [ -z "$skip_docker_build" ]; then echo_if_github "::endgroup::" fi +if [ -n "$skip_complement_run" ]; then + echo "Skipping Complement run as requested." + exit +fi + export COMPLEMENT_BASE_IMAGE=complement-synapse extra_test_args=() From a962c5a56de69c03848646f25991fabe6e4c39d1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 7 Jul 2022 11:52:45 -0500 Subject: [PATCH 41/54] Fix exception when using MSC3030 to look for remote federated events before room creation (#13197) Complement tests: https://github.com/matrix-org/complement/pull/405 This happens when you have some messages imported before the room is created. Then use MSC3030 to look backwards before the room creation from a remote federated server. The server won't find anything locally, but will ask over federation which will have the remote event. The previous logic would choke on not having the local event assigned. ``` Failed to fetch /timestamp_to_event from hs2 because of exception(UnboundLocalError) local variable 'local_event' referenced before assignment args=("local variable 'local_event' referenced before assignment",) ``` --- changelog.d/13197.bugfix | 1 + synapse/handlers/room.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13197.bugfix diff --git a/changelog.d/13197.bugfix b/changelog.d/13197.bugfix new file mode 100644 index 000000000..841724152 --- /dev/null +++ b/changelog.d/13197.bugfix @@ -0,0 +1 @@ +Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 75c0be8c3..44f808457 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1375,6 +1375,7 @@ class TimestampLookupHandler: # the timestamp given and the event we were able to find locally is_event_next_to_backward_gap = False is_event_next_to_forward_gap = False + local_event = None if local_event_id: local_event = await self.store.get_event( local_event_id, allow_none=False, allow_rejected=False @@ -1461,7 +1462,10 @@ class TimestampLookupHandler: ex.args, ) - if not local_event_id: + # To appease mypy, we have to add both of these conditions to check for + # `None`. We only expect `local_event` to be `None` when + # `local_event_id` is `None` but mypy isn't as smart and assuming as us. + if not local_event_id or not local_event: raise SynapseError( 404, "Unable to find event from %s in direction %s" % (timestamp, direction), From 757bc0caefa596e747278b3bcf4269ec50ffc759 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 14:00:29 +0100 Subject: [PATCH 42/54] Fix notification count after a highlighted message (#13223) Fixes #13196 Broke by #13005 --- changelog.d/13223.bugfix | 1 + synapse/storage/databases/main/event_push_actions.py | 11 ++++++++--- tests/storage/test_event_push_actions.py | 7 +++++++ 3 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13223.bugfix diff --git a/changelog.d/13223.bugfix b/changelog.d/13223.bugfix new file mode 100644 index 000000000..6ee3aed91 --- /dev/null +++ b/changelog.d/13223.bugfix @@ -0,0 +1 @@ +Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index a3edcbb39..1a951ac02 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -1016,9 +1016,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas upd.stream_ordering FROM ( SELECT user_id, room_id, count(*) as cnt, - max(stream_ordering) as stream_ordering - FROM event_push_actions - WHERE ? < stream_ordering AND stream_ordering <= ? + max(ea.stream_ordering) as stream_ordering + FROM event_push_actions AS ea + LEFT JOIN event_push_summary AS old USING (user_id, room_id) + WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ? + AND ( + old.last_receipt_stream_ordering IS NULL + OR old.last_receipt_stream_ordering < ea.stream_ordering + ) AND %s = 1 GROUP BY user_id, room_id ) AS upd diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index e68126777..e8c53f16d 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -196,6 +196,13 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _mark_read(10, 10) _assert_counts(0, 0) + _inject_actions(11, HIGHLIGHT) + _assert_counts(1, 1) + _mark_read(11, 11) + _assert_counts(0, 0) + _rotate(11) + _assert_counts(0, 0) + def test_find_first_stream_ordering_after_ts(self) -> None: def add_event(so: int, ts: int) -> None: self.get_success( From 739adf15511b2ce983cb5d4d6a948ff543f3b0a8 Mon Sep 17 00:00:00 2001 From: Sumner Evans Date: Fri, 8 Jul 2022 10:40:25 -0600 Subject: [PATCH 43/54] editorconfig: add max_line_length for Python files (#13228) See the documentation for the property here: https://github.com/editorconfig/editorconfig/wiki/EditorConfig-Properties#max_line_length Signed-off-by: Sumner Evans --- .editorconfig | 1 + changelog.d/13228.misc | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/13228.misc diff --git a/.editorconfig b/.editorconfig index 3edf9e717..d629bede5 100644 --- a/.editorconfig +++ b/.editorconfig @@ -7,3 +7,4 @@ root = true [*.py] indent_style = space indent_size = 4 +max_line_length = 88 diff --git a/changelog.d/13228.misc b/changelog.d/13228.misc new file mode 100644 index 000000000..fec086557 --- /dev/null +++ b/changelog.d/13228.misc @@ -0,0 +1 @@ +Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper. From 28d96cb2b49c12b741d03e4b74f30f8910f9942b Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 11 Jul 2022 10:36:18 +0100 Subject: [PATCH 44/54] Ensure portdb selects _all_ rows with negative rowids (#13226) --- changelog.d/13226.bugfix | 1 + synapse/_scripts/synapse_port_db.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13226.bugfix diff --git a/changelog.d/13226.bugfix b/changelog.d/13226.bugfix new file mode 100644 index 000000000..df96d41f3 --- /dev/null +++ b/changelog.d/13226.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 642fd4162..26834a437 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -418,12 +418,15 @@ class Porter: self.progress.update(table, table_size) # Mark table as done return + # We sweep over rowids in two directions: one forwards (rowids 1, 2, 3, ...) + # and another backwards (rowids 0, -1, -2, ...). forward_select = ( "SELECT rowid, * FROM %s WHERE rowid >= ? ORDER BY rowid LIMIT ?" % (table,) ) backward_select = ( - "SELECT rowid, * FROM %s WHERE rowid <= ? ORDER BY rowid LIMIT ?" % (table,) + "SELECT rowid, * FROM %s WHERE rowid <= ? ORDER BY rowid DESC LIMIT ?" + % (table,) ) do_forward = [True] From a11301179494f5a2924dcd60069c06f5c192020f Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 11 Jul 2022 07:12:28 -0600 Subject: [PATCH 45/54] Fix appservice EDUs failing to send if the EDU doesn't have a room ID (#13236) * Fix appservice EDUs failing to send if the EDU doesn't have a room ID As is in the case of presence. * changelog * linter * fix linter again --- changelog.d/13236.bugfix | 1 + synapse/appservice/scheduler.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13236.bugfix diff --git a/changelog.d/13236.bugfix b/changelog.d/13236.bugfix new file mode 100644 index 000000000..7fddc4413 --- /dev/null +++ b/changelog.d/13236.bugfix @@ -0,0 +1 @@ +Fix appservices not receiving room-less EDUs, like presence, if enabled. \ No newline at end of file diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index de5e5216c..6c8695346 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -319,7 +319,9 @@ class _ServiceQueuer: rooms_of_interesting_users.update(event.room_id for event in events) # EDUs rooms_of_interesting_users.update( - ephemeral["room_id"] for ephemeral in ephemerals + ephemeral["room_id"] + for ephemeral in ephemerals + if ephemeral.get("room_id") is not None ) # Look up the AS users in those rooms From e610128c507149e46d459bf97ba0fb6a8bd34b34 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 14:14:09 +0100 Subject: [PATCH 46/54] Add a `filter_event_for_clients_with_state` function (#13222) --- changelog.d/13222.misc | 1 + synapse/app/admin_cmd.py | 13 +- synapse/visibility.py | 546 ++++++++++++++++++++++++++++----------- 3 files changed, 411 insertions(+), 149 deletions(-) create mode 100644 changelog.d/13222.misc diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc new file mode 100644 index 000000000..0bab1aed7 --- /dev/null +++ b/changelog.d/13222.misc @@ -0,0 +1 @@ +Improve memory usage of calculating push actions for events in large rooms. diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 561621a28..87f82bd9a 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -39,6 +39,7 @@ from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.server import HomeServer +from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main.room import RoomWorkerStore from synapse.types import StateMap from synapse.util import SYNAPSE_VERSION @@ -60,7 +61,17 @@ class AdminCmdSlavedStore( BaseSlavedStore, RoomWorkerStore, ): - pass + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + # Annoyingly `filter_events_for_client` assumes that this exists. We + # should refactor it to take a `Clock` directly. + self.clock = hs.get_clock() class AdminCmdServer(HomeServer): diff --git a/synapse/visibility.py b/synapse/visibility.py index 8aaa8c709..9abbaa5a6 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -13,16 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from enum import Enum, auto from typing import Collection, Dict, FrozenSet, List, Optional, Tuple +import attr from typing_extensions import Final from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase +from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event from synapse.storage.controllers import StorageControllers +from synapse.storage.databases.main import DataStore from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id +from synapse.util import Clock logger = logging.getLogger(__name__) @@ -102,153 +107,18 @@ async def filter_events_for_client( ] = await storage.main.get_retention_policy_for_room(room_id) def allowed(event: EventBase) -> Optional[EventBase]: - """ - Args: - event: event to check - - Returns: - None if the user cannot see this event at all - - a redacted copy of the event if they can only see a redacted - version - - the original event if they can see it as normal. - """ - # Only run some checks if these events aren't about to be sent to clients. This is - # because, if this is not the case, we're probably only checking if the users can - # see events in the room at that point in the DAG, and that shouldn't be decided - # on those checks. - if filter_send_to_client: - if event.type == EventTypes.Dummy: - return None - - if not event.is_state() and event.sender in ignore_list: - return None - - # Until MSC2261 has landed we can't redact malicious alias events, so for - # now we temporarily filter out m.room.aliases entirely to mitigate - # abuse, while we spec a better solution to advertising aliases - # on rooms. - if event.type == EventTypes.Aliases: - return None - - # Don't try to apply the room's retention policy if the event is a state - # event, as MSC1763 states that retention is only considered for non-state - # events. - if not event.is_state(): - retention_policy = retention_policies[event.room_id] - max_lifetime = retention_policy.max_lifetime - - if max_lifetime is not None: - oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime - - if event.origin_server_ts < oldest_allowed_ts: - return None - - if event.event_id in always_include_ids: - return event - - # we need to handle outliers separately, since we don't have the room state. - if event.internal_metadata.outlier: - # Normally these can't be seen by clients, but we make an exception for - # for out-of-band membership events (eg, incoming invites, or rejections of - # said invite) for the user themselves. - if event.type == EventTypes.Member and event.state_key == user_id: - logger.debug("Returning out-of-band-membership event %s", event) - return event - - return None - - state = event_id_to_state[event.event_id] - - # get the room_visibility at the time of the event. - visibility = get_effective_room_visibility_from_state(state) - - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) - - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED - - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility - - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership - membership = None - if event.type == EventTypes.Member and event.state_key == user_id: - membership = event.content.get("membership", None) - if membership not in MEMBERSHIP_PRIORITY: - membership = "leave" - - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership not in MEMBERSHIP_PRIORITY: - prev_membership = "leave" - - # Always allow the user to see their own leave events, otherwise - # they won't see the room disappear if they reject the invite - # - # (Note this doesn't work for out-of-band invite rejections, which don't - # have prev_state populated. They are handled above in the outlier code.) - if membership == "leave" and ( - prev_membership == "join" or prev_membership == "invite" - ): - return event - - new_priority = MEMBERSHIP_PRIORITY.index(membership) - old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) - if old_priority < new_priority: - membership = prev_membership - - # otherwise, get the user's membership at the time of the event. - if membership is None: - membership_event = state.get((EventTypes.Member, user_id), None) - if membership_event: - membership = membership_event.membership - - # if the user was a member of the room at the time of the event, - # they can see it. - if membership == Membership.JOIN: - return event - - # otherwise, it depends on the room visibility. - - if visibility == HistoryVisibility.JOINED: - # we weren't a member at the time of the event, so we can't - # see this event. - return None - - elif visibility == HistoryVisibility.INVITED: - # user can also see the event if they were *invited* at the time - # of the event. - return event if membership == Membership.INVITE else None - - elif visibility == HistoryVisibility.SHARED and is_peeking: - # if the visibility is shared, users cannot see the event unless - # they have *subsequently* joined the room (or were members at the - # time, of course) - # - # XXX: if the user has subsequently joined and then left again, - # ideally we would share history up to the point they left. But - # we don't know when they left. We just treat it as though they - # never joined, and restrict access. - return None - - # the visibility is either shared or world_readable, and the user was - # not a member at the time. We allow it, provided the original sender - # has not requested their data to be erased, in which case, we return - # a redacted version. - if erased_senders[event.sender]: - return prune_event(event) - - return event + return _check_client_allowed_to_see_event( + user_id=user_id, + event=event, + clock=storage.main.clock, + filter_send_to_client=filter_send_to_client, + sender_ignored=event.sender in ignore_list, + always_include_ids=always_include_ids, + retention_policy=retention_policies[room_id], + state=event_id_to_state.get(event.event_id), + is_peeking=is_peeking, + sender_erased=erased_senders.get(event.sender, False), + ) # Check each event: gives an iterable of None or (a potentially modified) # EventBase. @@ -258,9 +128,389 @@ async def filter_events_for_client( return [ev for ev in filtered_events if ev] +async def filter_event_for_clients_with_state( + store: DataStore, + user_ids: Collection[str], + event: EventBase, + context: EventContext, + is_peeking: bool = False, + filter_send_to_client: bool = True, +) -> Collection[str]: + """ + Checks to see if an event is visible to the users in the list at the time of + the event. + + Note: This does *not* check if the sender of the event was erased. + + Args: + store: databases + user_ids: user_ids to be checked + event: the event to be checked + context: EventContext for the event to be checked + is_peeking: Whether the users are peeking into the room, ie not + currently joined + filter_send_to_client: Whether we're checking an event that's going to be + sent to a client. This might not always be the case since this function can + also be called to check whether a user can see the state at a given point. + + Returns: + Collection of user IDs for whom the event is visible + """ + # None of the users should see the event if it is soft_failed + if event.internal_metadata.is_soft_failed(): + return [] + + # Make a set for all user IDs that haven't been filtered out by a check. + allowed_user_ids = set(user_ids) + + # Only run some checks if these events aren't about to be sent to clients. This is + # because, if this is not the case, we're probably only checking if the users can + # see events in the room at that point in the DAG, and that shouldn't be decided + # on those checks. + if filter_send_to_client: + ignored_by = await store.ignored_by(event.sender) + retention_policy = await store.get_retention_policy_for_room(event.room_id) + + for user_id in user_ids: + if ( + _check_filter_send_to_client( + event, + store.clock, + retention_policy, + sender_ignored=user_id in ignored_by, + ) + == _CheckFilter.DENIED + ): + allowed_user_ids.discard(user_id) + + if event.internal_metadata.outlier: + # Normally these can't be seen by clients, but we make an exception for + # for out-of-band membership events (eg, incoming invites, or rejections of + # said invite) for the user themselves. + if event.type == EventTypes.Member and event.state_key in allowed_user_ids: + logger.debug("Returning out-of-band-membership event %s", event) + return {event.state_key} + + return set() + + # First we get just the history visibility in case its shared/world-readable + # room. + visibility_state_map = await _get_state_map( + store, event, context, StateFilter.from_types([_HISTORY_VIS_KEY]) + ) + + visibility = get_effective_room_visibility_from_state(visibility_state_map) + if ( + _check_history_visibility(event, visibility, is_peeking=is_peeking) + == _CheckVisibility.ALLOWED + ): + return allowed_user_ids + + # The history visibility isn't lax, so we now need to fetch the membership + # events of all the users. + + filter_list = [] + for user_id in allowed_user_ids: + filter_list.append((EventTypes.Member, user_id)) + filter_list.append((EventTypes.RoomHistoryVisibility, "")) + + state_filter = StateFilter.from_types(filter_list) + state_map = await _get_state_map(store, event, context, state_filter) + + # Now we check whether the membership allows each user to see the event. + return { + user_id + for user_id in allowed_user_ids + if _check_membership(user_id, event, visibility, state_map, is_peeking).allowed + } + + +async def _get_state_map( + store: DataStore, event: EventBase, context: EventContext, state_filter: StateFilter +) -> StateMap[EventBase]: + """Helper function for getting a `StateMap[EventBase]` from an `EventContext`""" + state_map = await context.get_prev_state_ids(state_filter) + + # Use events rather than event ids as content from the events are needed in + # _check_visibility + event_map = await store.get_events(state_map.values(), get_prev_content=False) + + updated_state_map = {} + for state_key, event_id in state_map.items(): + state_event = event_map.get(event_id) + if state_event: + updated_state_map[state_key] = state_event + + if event.is_state(): + current_state_key = (event.type, event.state_key) + # Add current event to updated_state_map, we need to do this here as it + # may not have been persisted to the db yet + updated_state_map[current_state_key] = event + + return updated_state_map + + +def _check_client_allowed_to_see_event( + user_id: str, + event: EventBase, + clock: Clock, + filter_send_to_client: bool, + is_peeking: bool, + always_include_ids: FrozenSet[str], + sender_ignored: bool, + retention_policy: RetentionPolicy, + state: Optional[StateMap[EventBase]], + sender_erased: bool, +) -> Optional[EventBase]: + """Check with the given user is allowed to see the given event + + See `filter_events_for_client` for details about args + + Args: + user_id + event + clock + filter_send_to_client + is_peeking + always_include_ids + sender_ignored: Whether the user is ignoring the event sender + retention_policy: The retention policy of the room + state: The state at the event, unless its an outlier + sender_erased: Whether the event sender has been marked as "erased" + + Returns: + None if the user cannot see this event at all + + a redacted copy of the event if they can only see a redacted + version + + the original event if they can see it as normal. + """ + # Only run some checks if these events aren't about to be sent to clients. This is + # because, if this is not the case, we're probably only checking if the users can + # see events in the room at that point in the DAG, and that shouldn't be decided + # on those checks. + if filter_send_to_client: + if ( + _check_filter_send_to_client(event, clock, retention_policy, sender_ignored) + == _CheckFilter.DENIED + ): + return None + + if event.event_id in always_include_ids: + return event + + # we need to handle outliers separately, since we don't have the room state. + if event.internal_metadata.outlier: + # Normally these can't be seen by clients, but we make an exception for + # for out-of-band membership events (eg, incoming invites, or rejections of + # said invite) for the user themselves. + if event.type == EventTypes.Member and event.state_key == user_id: + logger.debug("Returning out-of-band-membership event %s", event) + return event + + return None + + if state is None: + raise Exception("Missing state for non-outlier event") + + # get the room_visibility at the time of the event. + visibility = get_effective_room_visibility_from_state(state) + + # Check if the room has lax history visibility, allowing us to skip + # membership checks. + # + # We can only do this check if the sender has *not* been erased, as if they + # have we need to check the user's membership. + if ( + not sender_erased + and _check_history_visibility(event, visibility, is_peeking) + == _CheckVisibility.ALLOWED + ): + return event + + membership_result = _check_membership(user_id, event, visibility, state, is_peeking) + if not membership_result.allowed: + return None + + # If the sender has been erased and the user was not joined at the time, we + # must only return the redacted form. + if sender_erased and not membership_result.joined: + event = prune_event(event) + + return event + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class _CheckMembershipReturn: + "Return value of _check_membership" + allowed: bool + joined: bool + + +def _check_membership( + user_id: str, + event: EventBase, + visibility: str, + state: StateMap[EventBase], + is_peeking: bool, +) -> _CheckMembershipReturn: + """Check whether the user can see the event due to their membership + + Returns: + True if they can, False if they can't, plus the membership of the user + at the event. + """ + # If the event is the user's own membership event, use the 'most joined' + # membership + membership = None + if event.type == EventTypes.Member and event.state_key == user_id: + membership = event.content.get("membership", None) + if membership not in MEMBERSHIP_PRIORITY: + membership = "leave" + + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership not in MEMBERSHIP_PRIORITY: + prev_membership = "leave" + + # Always allow the user to see their own leave events, otherwise + # they won't see the room disappear if they reject the invite + # + # (Note this doesn't work for out-of-band invite rejections, which don't + # have prev_state populated. They are handled above in the outlier code.) + if membership == "leave" and ( + prev_membership == "join" or prev_membership == "invite" + ): + return _CheckMembershipReturn(True, membership == Membership.JOIN) + + new_priority = MEMBERSHIP_PRIORITY.index(membership) + old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) + if old_priority < new_priority: + membership = prev_membership + + # otherwise, get the user's membership at the time of the event. + if membership is None: + membership_event = state.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + + # if the user was a member of the room at the time of the event, + # they can see it. + if membership == Membership.JOIN: + return _CheckMembershipReturn(True, True) + + # otherwise, it depends on the room visibility. + + if visibility == HistoryVisibility.JOINED: + # we weren't a member at the time of the event, so we can't + # see this event. + return _CheckMembershipReturn(False, False) + + elif visibility == HistoryVisibility.INVITED: + # user can also see the event if they were *invited* at the time + # of the event. + return _CheckMembershipReturn(membership == Membership.INVITE, False) + + elif visibility == HistoryVisibility.SHARED and is_peeking: + # if the visibility is shared, users cannot see the event unless + # they have *subsequently* joined the room (or were members at the + # time, of course) + # + # XXX: if the user has subsequently joined and then left again, + # ideally we would share history up to the point they left. But + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return _CheckMembershipReturn(False, False) + + # The visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it. + return _CheckMembershipReturn(True, False) + + +class _CheckFilter(Enum): + MAYBE_ALLOWED = auto() + DENIED = auto() + + +def _check_filter_send_to_client( + event: EventBase, + clock: Clock, + retention_policy: RetentionPolicy, + sender_ignored: bool, +) -> _CheckFilter: + """Apply checks for sending events to client + + Returns: + True if might be allowed to be sent to clients, False if definitely not. + """ + + if event.type == EventTypes.Dummy: + return _CheckFilter.DENIED + + if not event.is_state() and sender_ignored: + return _CheckFilter.DENIED + + # Until MSC2261 has landed we can't redact malicious alias events, so for + # now we temporarily filter out m.room.aliases entirely to mitigate + # abuse, while we spec a better solution to advertising aliases + # on rooms. + if event.type == EventTypes.Aliases: + return _CheckFilter.DENIED + + # Don't try to apply the room's retention policy if the event is a state + # event, as MSC1763 states that retention is only considered for non-state + # events. + if not event.is_state(): + max_lifetime = retention_policy.max_lifetime + + if max_lifetime is not None: + oldest_allowed_ts = clock.time_msec() - max_lifetime + + if event.origin_server_ts < oldest_allowed_ts: + return _CheckFilter.DENIED + + return _CheckFilter.MAYBE_ALLOWED + + +class _CheckVisibility(Enum): + ALLOWED = auto() + MAYBE_DENIED = auto() + + +def _check_history_visibility( + event: EventBase, visibility: str, is_peeking: bool +) -> _CheckVisibility: + """Check if event is allowed to be seen due to lax history visibility. + + Returns: + True if user can definitely see the event, False if maybe not. + """ + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + if visibility == HistoryVisibility.SHARED and not is_peeking: + return _CheckVisibility.ALLOWED + elif visibility == HistoryVisibility.WORLD_READABLE: + return _CheckVisibility.ALLOWED + + return _CheckVisibility.MAYBE_DENIED + + def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: """Get the actual history vis, from a state map including the history_visibility event - Handles missing and invalid history visibility events. """ visibility_event = state.get(_HISTORY_VIS_KEY, None) From 5ef2f875699da76e7070593418b066f5c293a12a Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 11 Jul 2022 15:05:24 +0100 Subject: [PATCH 47/54] Document the 'databases' homeserver config option (#13212) --- changelog.d/13212.doc | 1 + .../configuration/config_documentation.md | 92 +++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 changelog.d/13212.doc diff --git a/changelog.d/13212.doc b/changelog.d/13212.doc new file mode 100644 index 000000000..e6b65d826 --- /dev/null +++ b/changelog.d/13212.doc @@ -0,0 +1 @@ +Add documentation for the existing `databases` option in the homeserver configuration manual. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index ef411c535..5deabb53d 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1257,6 +1257,98 @@ database: cp_max: 10 ``` --- +### `databases` + +The `databases` option allows specifying a mapping between certain database tables and +database host details, spreading the load of a single Synapse instance across multiple +database backends. This is often referred to as "database sharding". This option is only +supported for PostgreSQL database backends. + +**Important note:** This is a supported option, but is not currently used in production by the +Matrix.org Foundation. Proceed with caution and always make backups. + +`databases` is a dictionary of arbitrarily-named database entries. Each entry is equivalent +to the value of the `database` homeserver config option (see above), with the addition of +a `data_stores` key. `data_stores` is an array of strings that specifies the data store(s) +(a defined label for a set of tables) that should be stored on the associated database +backend entry. + +The currently defined values for `data_stores` are: + +* `"state"`: Database that relates to state groups will be stored in this database. + + Specifically, that means the following tables: + * `state_groups` + * `state_group_edges` + * `state_groups_state` + + And the following sequences: + * `state_groups_seq_id` + +* `"main"`: All other database tables and sequences. + +All databases will end up with additional tables used for tracking database schema migrations +and any pending background updates. Synapse will create these automatically on startup when checking for +and/or performing database schema migrations. + +To migrate an existing database configuration (e.g. all tables on a single database) to a different +configuration (e.g. the "main" data store on one database, and "state" on another), do the following: + +1. Take a backup of your existing database. Things can and do go wrong and database corruption is no joke! +2. Ensure all pending database migrations have been applied and background updates have run. The simplest + way to do this is to use the `update_synapse_database` script supplied with your Synapse installation. + + ```sh + update_synapse_database --database-config homeserver.yaml --run-background-updates + ``` + +3. Copy over the necessary tables and sequences from one database to the other. Tables relating to database + migrations, schemas, schema versions and background updates should **not** be copied. + + As an example, say that you'd like to split out the "state" data store from an existing database which + currently contains all data stores. + + Simply copy the tables and sequences defined above for the "state" datastore from the existing database + to the secondary database. As noted above, additional tables will be created in the secondary database + when Synapse is started. + +4. Modify/create the `databases` option in your `homeserver.yaml` to match the desired database configuration. +5. Start Synapse. Check that it starts up successfully and that things generally seem to be working. +6. Drop the old tables that were copied in step 3. + +Only one of the options `database` or `databases` may be specified in your config, but not both. + +Example configuration: + +```yaml +databases: + basement_box: + name: psycopg2 + txn_limit: 10000 + data_stores: ["main"] + args: + user: synapse_user + password: secretpassword + database: synapse_main + host: localhost + port: 5432 + cp_min: 5 + cp_max: 10 + + my_other_database: + name: psycopg2 + txn_limit: 10000 + data_stores: ["state"] + args: + user: synapse_user + password: secretpassword + database: synapse_state + host: localhost + port: 5432 + cp_min: 5 + cp_max: 10 +``` +--- ## Logging ## Config options related to logging. From f1711e1f5c40232b5749d9df23b9857b8c1eb661 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 16:51:30 +0100 Subject: [PATCH 48/54] Remove delay when rotating event push actions (#13211) We want to be as up to date as possible, and sleeping doesn't help here and can mean we fall behind. --- changelog.d/13211.misc | 1 + synapse/storage/databases/main/event_push_actions.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13211.misc diff --git a/changelog.d/13211.misc b/changelog.d/13211.misc new file mode 100644 index 000000000..4d2a6dec6 --- /dev/null +++ b/changelog.d/13211.misc @@ -0,0 +1 @@ +More aggressively rotate push actions. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 1a951ac02..dd2627037 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -143,7 +143,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas self._find_stream_orderings_for_times, 10 * 60 * 1000 ) - self._rotate_delay = 3 self._rotate_count = 10000 self._doing_notif_rotation = False if hs.config.worker.run_background_tasks: @@ -847,7 +846,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) if caught_up: break - await self.hs.get_clock().sleep(self._rotate_delay) # Finally we clear out old event push actions. await self._remove_old_push_actions_that_have_rotated() @@ -1114,7 +1112,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) -> bool: # We don't want to clear out too much at a time, so we bound our # deletes. - batch_size = 10000 + batch_size = self._rotate_count txn.execute( """ From d736d5cfadcc9a56523fcb1cfe8cb1d2be47a4ec Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 11 Jul 2022 10:22:17 -0600 Subject: [PATCH 49/54] Fix to-device messages not being sent to MSC3202-enabled appservices (#13235) The field name was simply incorrect, leading to errors. --- changelog.d/13235.bugfix | 1 + synapse/appservice/scheduler.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13235.bugfix diff --git a/changelog.d/13235.bugfix b/changelog.d/13235.bugfix new file mode 100644 index 000000000..5c31fbc77 --- /dev/null +++ b/changelog.d/13235.bugfix @@ -0,0 +1 @@ +Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted. \ No newline at end of file diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index 6c8695346..430ffbcd1 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -331,8 +331,9 @@ class _ServiceQueuer: ) # Add recipients of to-device messages. - # device_message["user_id"] is the ID of the recipient. - users.update(device_message["user_id"] for device_message in to_device_messages) + users.update( + device_message["to_user_id"] for device_message in to_device_messages + ) # Compute and return the counts / fallback key usage states otk_counts = await self._store.count_bulk_e2e_one_time_keys_for_as(users) From 11f811470ff94dedc4232072b7f9ff099d4fcbd6 Mon Sep 17 00:00:00 2001 From: David Teller Date: Mon, 11 Jul 2022 18:52:10 +0200 Subject: [PATCH 50/54] Uniformize spam-checker API, part 5: expand other spam-checker callbacks to return `Tuple[Codes, dict]` (#13044) Signed-off-by: David Teller Co-authored-by: Brendan Abolivier --- changelog.d/13044.misc | 1 + synapse/api/errors.py | 10 +- synapse/events/spamcheck.py | 163 ++++++++++++++++----- synapse/handlers/directory.py | 6 +- synapse/handlers/federation.py | 3 +- synapse/handlers/room.py | 12 +- synapse/handlers/room_member.py | 27 +++- synapse/module_api/__init__.py | 1 + synapse/rest/media/v1/media_storage.py | 4 +- tests/rest/client/test_rooms.py | 168 +++++++++++++++++++++- tests/rest/client/utils.py | 21 +++ tests/rest/media/v1/test_media_storage.py | 70 ++++++++- 12 files changed, 426 insertions(+), 60 deletions(-) create mode 100644 changelog.d/13044.misc diff --git a/changelog.d/13044.misc b/changelog.d/13044.misc new file mode 100644 index 000000000..f9a0669dd --- /dev/null +++ b/changelog.d/13044.misc @@ -0,0 +1 @@ +Support temporary experimental return values for spam checker module callbacks. \ No newline at end of file diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cc7b78547..1c74e131f 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -297,8 +297,14 @@ class AuthError(SynapseError): other poorly-defined times. """ - def __init__(self, code: int, msg: str, errcode: str = Codes.FORBIDDEN): - super().__init__(code, msg, errcode) + def __init__( + self, + code: int, + msg: str, + errcode: str = Codes.FORBIDDEN, + additional_fields: Optional[dict] = None, + ): + super().__init__(code, msg, errcode, additional_fields) class InvalidClientCredentialsError(SynapseError): diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 32712d204..4a3bfb38f 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -21,7 +21,6 @@ from typing import ( Awaitable, Callable, Collection, - Dict, List, Optional, Tuple, @@ -32,10 +31,11 @@ from typing import ( from typing_extensions import Literal import synapse +from synapse.api.errors import Codes from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper from synapse.spam_checker_api import RegistrationBehaviour -from synapse.types import RoomAlias, UserProfile +from synapse.types import JsonDict, RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable from synapse.util.metrics import Measure @@ -50,12 +50,12 @@ CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ Awaitable[ Union[ str, - "synapse.api.errors.Codes", + Codes, # Highly experimental, not officially part of the spamchecker API, may # disappear without warning depending on the results of ongoing # experiments. # Use this to return additional information as part of an error. - Tuple["synapse.api.errors.Codes", Dict], + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -70,7 +70,12 @@ USER_MAY_JOIN_ROOM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -81,7 +86,12 @@ USER_MAY_INVITE_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -92,7 +102,12 @@ USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -103,7 +118,12 @@ USER_MAY_CREATE_ROOM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -114,7 +134,12 @@ USER_MAY_CREATE_ROOM_ALIAS_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -125,7 +150,12 @@ USER_MAY_PUBLISH_ROOM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -154,7 +184,12 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[ Awaitable[ Union[ Literal["NOT_SPAM"], - "synapse.api.errors.Codes", + Codes, + # Highly experimental, not officially part of the spamchecker API, may + # disappear without warning depending on the results of ongoing + # experiments. + # Use this to return additional information as part of an error. + Tuple[Codes, JsonDict], # Deprecated bool, ] @@ -345,7 +380,7 @@ class SpamChecker: async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[Tuple["synapse.api.errors.Codes", Dict], str]: + ) -> Union[Tuple[Codes, JsonDict], str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -376,7 +411,16 @@ class SpamChecker: elif res is True: # This spam-checker rejects the event with deprecated # return value `True` - return (synapse.api.errors.Codes.FORBIDDEN, {}) + return synapse.api.errors.Codes.FORBIDDEN, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): + return res + elif isinstance(res, synapse.api.errors.Codes): + return res, {} elif not isinstance(res, str): # mypy complains that we can't reach this code because of the # return type in CHECK_EVENT_FOR_SPAM_CALLBACK, but we don't know @@ -422,7 +466,7 @@ class SpamChecker: async def user_may_join_room( self, user_id: str, room_id: str, is_invited: bool - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, JsonDict], Literal["NOT_SPAM"]]: """Checks if a given users is allowed to join a room. Not called when a user creates a room. @@ -432,7 +476,7 @@ class SpamChecker: is_invited: Whether the user is invited into the room Returns: - NOT_SPAM if the operation is permitted, Codes otherwise. + NOT_SPAM if the operation is permitted, [Codes, Dict] otherwise. """ for callback in self._user_may_join_room_callbacks: with Measure( @@ -443,21 +487,28 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting join as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} # No spam-checker has rejected the request, let it pass. return self.NOT_SPAM async def user_may_invite( self, inviter_userid: str, invitee_userid: str, room_id: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may send an invite Args: @@ -479,21 +530,28 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting invite as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} # No spam-checker has rejected the request, let it pass. return self.NOT_SPAM async def user_may_send_3pid_invite( self, inviter_userid: str, medium: str, address: str, room_id: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may invite a given threepid into the room Note that if the threepid is already associated with a Matrix user ID, Synapse @@ -519,20 +577,27 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting 3pid invite as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM async def user_may_create_room( self, userid: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may create a room Args: @@ -546,20 +611,27 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting room creation as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM async def user_may_create_room_alias( self, userid: str, room_alias: RoomAlias - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may create a room alias Args: @@ -575,20 +647,27 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting room create as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM async def user_may_publish_room( self, userid: str, room_id: str - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a given user may publish a room to the directory Args: @@ -603,14 +682,21 @@ class SpamChecker: if res is True or res is self.NOT_SPAM: continue elif res is False: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting room publication as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM @@ -678,7 +764,7 @@ class SpamChecker: async def check_media_file_for_spam( self, file_wrapper: ReadableFileWrapper, file_info: FileInfo - ) -> Union["synapse.api.errors.Codes", Literal["NOT_SPAM"]]: + ) -> Union[Tuple[Codes, dict], Literal["NOT_SPAM"]]: """Checks if a piece of newly uploaded media should be blocked. This will be called for local uploads, downloads of remote media, each @@ -715,13 +801,20 @@ class SpamChecker: if res is False or res is self.NOT_SPAM: continue elif res is True: - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} elif isinstance(res, synapse.api.errors.Codes): + return res, {} + elif ( + isinstance(res, tuple) + and len(res) == 2 + and isinstance(res[0], synapse.api.errors.Codes) + and isinstance(res[1], dict) + ): return res else: logger.warning( "Module returned invalid value, rejecting media file as spam" ) - return synapse.api.errors.Codes.FORBIDDEN + return synapse.api.errors.Codes.FORBIDDEN, {} return self.NOT_SPAM diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8b0f16f96..09a7a4b23 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -149,7 +149,8 @@ class DirectoryHandler: raise AuthError( 403, "This user is not permitted to create this alias", - spam_check, + errcode=spam_check[0], + additional_fields=spam_check[1], ) if not self.config.roomdirectory.is_alias_creation_allowed( @@ -441,7 +442,8 @@ class DirectoryHandler: raise AuthError( 403, "This user is not permitted to publish rooms to the room list", - spam_check, + errcode=spam_check[0], + additional_fields=spam_check[1], ) if requester.is_guest: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e2564e934..3b5eaf515 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -844,7 +844,8 @@ class FederationHandler: raise SynapseError( 403, "This user is not permitted to send invites to this server/user", - spam_check, + errcode=spam_check[0], + additional_fields=spam_check[1], ) membership = event.content.get("membership") diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 44f808457..8dd94cbc7 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -440,7 +440,12 @@ class RoomCreationHandler: spam_check = await self.spam_checker.user_may_create_room(user_id) if spam_check != NOT_SPAM: - raise SynapseError(403, "You are not permitted to create rooms", spam_check) + raise SynapseError( + 403, + "You are not permitted to create rooms", + errcode=spam_check[0], + additional_fields=spam_check[1], + ) creation_content: JsonDict = { "room_version": new_room_version.identifier, @@ -731,7 +736,10 @@ class RoomCreationHandler: spam_check = await self.spam_checker.user_may_create_room(user_id) if spam_check != NOT_SPAM: raise SynapseError( - 403, "You are not permitted to create rooms", spam_check + 403, + "You are not permitted to create rooms", + errcode=spam_check[0], + additional_fields=spam_check[1], ) if ratelimit: diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index a1d8875dd..04c44b2cc 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -685,7 +685,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if target_id == self._server_notices_mxid: raise SynapseError(HTTPStatus.FORBIDDEN, "Cannot invite this user") - block_invite_code = None + block_invite_result = None if ( self._server_notices_mxid is not None @@ -703,18 +703,21 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): "Blocking invite: user is not admin and non-admin " "invites disabled" ) - block_invite_code = Codes.FORBIDDEN + block_invite_result = (Codes.FORBIDDEN, {}) spam_check = await self.spam_checker.user_may_invite( requester.user.to_string(), target_id, room_id ) if spam_check != NOT_SPAM: logger.info("Blocking invite due to spam checker") - block_invite_code = spam_check + block_invite_result = spam_check - if block_invite_code is not None: + if block_invite_result is not None: raise SynapseError( - 403, "Invites have been disabled on this server", block_invite_code + 403, + "Invites have been disabled on this server", + errcode=block_invite_result[0], + additional_fields=block_invite_result[1], ) # An empty prev_events list is allowed as long as the auth_event_ids are present @@ -828,7 +831,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): target.to_string(), room_id, is_invited=inviter is not None ) if spam_check != NOT_SPAM: - raise SynapseError(403, "Not allowed to join this room", spam_check) + raise SynapseError( + 403, + "Not allowed to join this room", + errcode=spam_check[0], + additional_fields=spam_check[1], + ) # Check if a remote join should be performed. remote_join, remote_room_hosts = await self._should_perform_remote_join( @@ -1387,7 +1395,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id=room_id, ) if spam_check != NOT_SPAM: - raise SynapseError(403, "Cannot send threepid invite", spam_check) + raise SynapseError( + 403, + "Cannot send threepid invite", + errcode=spam_check[0], + additional_fields=spam_check[1], + ) stream_id = await self._make_and_store_3pid_invite( requester, diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 6191c2dc9..6d8bf5408 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -35,6 +35,7 @@ from typing_extensions import ParamSpec from twisted.internet import defer from twisted.web.resource import Resource +from synapse.api import errors from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.events.presence_router import ( diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index 913741734..a5c3de192 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -154,7 +154,9 @@ class MediaStorage: # Note that we'll delete the stored media, due to the # try/except below. The media also won't be stored in # the DB. - raise SpamMediaException(errcode=spam_check) + # We currently ignore any additional field returned by + # the spam-check API. + raise SpamMediaException(errcode=spam_check[0]) for provider in self.storage_providers: await provider.store_file(path, file_info) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 1ccd96a20..e67844cfa 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -22,7 +22,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from unittest.mock import Mock, call from urllib import parse as urlparse -# `Literal` appears with Python 3.8. +from parameterized import param, parameterized from typing_extensions import Literal from twisted.test.proto_helpers import MemoryReactor @@ -815,14 +815,14 @@ class RoomsCreateTestCase(RoomBase): In this test, we use the more recent API in which callbacks return a `Union[Codes, Literal["NOT_SPAM"]]`. """ - async def user_may_join_room( + async def user_may_join_room_codes( mxid: str, room_id: str, is_invite: bool, ) -> Codes: return Codes.CONSENT_NOT_GIVEN - join_mock = Mock(side_effect=user_may_join_room) + join_mock = Mock(side_effect=user_may_join_room_codes) self.hs.get_spam_checker()._user_may_join_room_callbacks.append(join_mock) channel = self.make_request( @@ -834,6 +834,25 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(join_mock.call_count, 0) + # Now change the return value of the callback to deny any join. Since we're + # creating the room, despite the return value, we should be able to join. + async def user_may_join_room_tuple( + mxid: str, + room_id: str, + is_invite: bool, + ) -> Tuple[Codes, dict]: + return Codes.INCOMPATIBLE_ROOM_VERSION, {} + + join_mock.side_effect = user_may_join_room_tuple + + channel = self.make_request( + "POST", + "/createRoom", + {}, + ) + self.assertEqual(channel.code, 200, channel.json_body) + self.assertEqual(join_mock.call_count, 0) + class RoomTopicTestCase(RoomBase): """Tests /rooms/$room_id/topic REST events.""" @@ -1113,13 +1132,15 @@ class RoomJoinTestCase(RoomBase): """ # Register a dummy callback. Make it allow all room joins for now. - return_value: Union[Literal["NOT_SPAM"], Codes] = synapse.module_api.NOT_SPAM + return_value: Union[ + Literal["NOT_SPAM"], Tuple[Codes, dict], Codes + ] = synapse.module_api.NOT_SPAM async def user_may_join_room( userid: str, room_id: str, is_invited: bool, - ) -> Union[Literal["NOT_SPAM"], Codes]: + ) -> Union[Literal["NOT_SPAM"], Tuple[Codes, dict], Codes]: return return_value # `spec` argument is needed for this function mock to have `__qualname__`, which @@ -1163,8 +1184,28 @@ class RoomJoinTestCase(RoomBase): ) # Now make the callback deny all room joins, and check that a join actually fails. + # We pick an arbitrary Codes rather than the default `Codes.FORBIDDEN`. return_value = Codes.CONSENT_NOT_GIVEN - self.helper.join(self.room3, self.user2, expect_code=403, tok=self.tok2) + self.helper.invite(self.room3, self.user1, self.user2, tok=self.tok1) + self.helper.join( + self.room3, + self.user2, + expect_code=403, + expect_errcode=return_value, + tok=self.tok2, + ) + + # Now make the callback deny all room joins, and check that a join actually fails. + # As above, with the experimental extension that lets us return dictionaries. + return_value = (Codes.BAD_ALIAS, {"another_field": "12345"}) + self.helper.join( + self.room3, + self.user2, + expect_code=403, + expect_errcode=return_value[0], + tok=self.tok2, + expect_additional_fields=return_value[1], + ) class RoomJoinRatelimitTestCase(RoomBase): @@ -1314,6 +1355,97 @@ class RoomMessagesTestCase(RoomBase): channel = self.make_request("PUT", path, content) self.assertEqual(200, channel.code, msg=channel.result["body"]) + @parameterized.expand( + [ + # Allow + param( + name="NOT_SPAM", value="NOT_SPAM", expected_code=200, expected_fields={} + ), + param(name="False", value=False, expected_code=200, expected_fields={}), + # Block + param( + name="scalene string", + value="ANY OTHER STRING", + expected_code=403, + expected_fields={"errcode": "M_FORBIDDEN"}, + ), + param( + name="True", + value=True, + expected_code=403, + expected_fields={"errcode": "M_FORBIDDEN"}, + ), + param( + name="Code", + value=Codes.LIMIT_EXCEEDED, + expected_code=403, + expected_fields={"errcode": "M_LIMIT_EXCEEDED"}, + ), + param( + name="Tuple", + value=(Codes.SERVER_NOT_TRUSTED, {"additional_field": "12345"}), + expected_code=403, + expected_fields={ + "errcode": "M_SERVER_NOT_TRUSTED", + "additional_field": "12345", + }, + ), + ] + ) + def test_spam_checker_check_event_for_spam( + self, + name: str, + value: Union[str, bool, Codes, Tuple[Codes, JsonDict]], + expected_code: int, + expected_fields: dict, + ) -> None: + class SpamCheck: + mock_return_value: Union[ + str, bool, Codes, Tuple[Codes, JsonDict], bool + ] = "NOT_SPAM" + mock_content: Optional[JsonDict] = None + + async def check_event_for_spam( + self, + event: synapse.events.EventBase, + ) -> Union[str, Codes, Tuple[Codes, JsonDict], bool]: + self.mock_content = event.content + return self.mock_return_value + + spam_checker = SpamCheck() + + self.hs.get_spam_checker()._check_event_for_spam_callbacks.append( + spam_checker.check_event_for_spam + ) + + # Inject `value` as mock_return_value + spam_checker.mock_return_value = value + path = "/rooms/%s/send/m.room.message/check_event_for_spam_%s" % ( + urlparse.quote(self.room_id), + urlparse.quote(name), + ) + body = "test-%s" % name + content = '{"body":"%s","msgtype":"m.text"}' % body + channel = self.make_request("PUT", path, content) + + # Check that the callback has witnessed the correct event. + self.assertIsNotNone(spam_checker.mock_content) + if ( + spam_checker.mock_content is not None + ): # Checked just above, but mypy doesn't know about that. + self.assertEqual( + spam_checker.mock_content["body"], body, spam_checker.mock_content + ) + + # Check that we have the correct result. + self.assertEqual(expected_code, channel.code, msg=channel.result["body"]) + for expected_key, expected_value in expected_fields.items(): + self.assertEqual( + channel.json_body.get(expected_key, None), + expected_value, + "Field %s absent or invalid " % expected_key, + ) + class RoomPowerLevelOverridesTestCase(RoomBase): """Tests that the power levels can be overridden with server config.""" @@ -3235,7 +3367,8 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): make_invite_mock.assert_called_once() # Now change the return value of the callback to deny any invite and test that - # we can't send the invite. + # we can't send the invite. We pick an arbitrary error code to be able to check + # that the same code has been returned mock.return_value = make_awaitable(Codes.CONSENT_NOT_GIVEN) channel = self.make_request( method="POST", @@ -3249,6 +3382,27 @@ class ThreepidInviteTestCase(unittest.HomeserverTestCase): access_token=self.tok, ) self.assertEqual(channel.code, 403) + self.assertEqual(channel.json_body["errcode"], Codes.CONSENT_NOT_GIVEN) + + # Also check that it stopped before calling _make_and_store_3pid_invite. + make_invite_mock.assert_called_once() + + # Run variant with `Tuple[Codes, dict]`. + mock.return_value = make_awaitable((Codes.EXPIRED_ACCOUNT, {"field": "value"})) + channel = self.make_request( + method="POST", + path="/rooms/" + self.room_id + "/invite", + content={ + "id_server": "example.com", + "id_access_token": "sometoken", + "medium": "email", + "address": email_to_invite, + }, + access_token=self.tok, + ) + self.assertEqual(channel.code, 403) + self.assertEqual(channel.json_body["errcode"], Codes.EXPIRED_ACCOUNT) + self.assertEqual(channel.json_body["field"], "value") # Also check that it stopped before calling _make_and_store_3pid_invite. make_invite_mock.assert_called_once() diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index a0788b1bb..93f749744 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -41,6 +41,7 @@ from twisted.web.resource import Resource from twisted.web.server import Site from synapse.api.constants import Membership +from synapse.api.errors import Codes from synapse.server import HomeServer from synapse.types import JsonDict @@ -171,6 +172,8 @@ class RestHelper: expect_code: int = HTTPStatus.OK, tok: Optional[str] = None, appservice_user_id: Optional[str] = None, + expect_errcode: Optional[Codes] = None, + expect_additional_fields: Optional[dict] = None, ) -> None: self.change_membership( room=room, @@ -180,6 +183,8 @@ class RestHelper: appservice_user_id=appservice_user_id, membership=Membership.JOIN, expect_code=expect_code, + expect_errcode=expect_errcode, + expect_additional_fields=expect_additional_fields, ) def knock( @@ -263,6 +268,7 @@ class RestHelper: appservice_user_id: Optional[str] = None, expect_code: int = HTTPStatus.OK, expect_errcode: Optional[str] = None, + expect_additional_fields: Optional[dict] = None, ) -> None: """ Send a membership state event into a room. @@ -323,6 +329,21 @@ class RestHelper: channel.result["body"], ) + if expect_additional_fields is not None: + for expect_key, expect_value in expect_additional_fields.items(): + assert expect_key in channel.json_body, "Expected field %s, got %s" % ( + expect_key, + channel.json_body, + ) + assert ( + channel.json_body[expect_key] == expect_value + ), "Expected: %s at %s, got: %s, resp: %s" % ( + expect_value, + expect_key, + channel.json_body[expect_key], + channel.json_body, + ) + self.auth_user_id = temp_id def send( diff --git a/tests/rest/media/v1/test_media_storage.py b/tests/rest/media/v1/test_media_storage.py index 1c67e1ca9..79727c430 100644 --- a/tests/rest/media/v1/test_media_storage.py +++ b/tests/rest/media/v1/test_media_storage.py @@ -23,11 +23,13 @@ from urllib import parse import attr from parameterized import parameterized, parameterized_class from PIL import Image as Image +from typing_extensions import Literal from twisted.internet import defer from twisted.internet.defer import Deferred from twisted.test.proto_helpers import MemoryReactor +from synapse.api.errors import Codes from synapse.events import EventBase from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.logging.context import make_deferred_yieldable @@ -570,9 +572,11 @@ class MediaRepoTests(unittest.HomeserverTestCase): ) -class TestSpamChecker: +class TestSpamCheckerLegacy: """A spam checker module that rejects all media that includes the bytes `evil`. + + Uses the legacy Spam-Checker API. """ def __init__(self, config: Dict[str, Any], api: ModuleApi) -> None: @@ -613,7 +617,7 @@ class TestSpamChecker: return b"evil" in buf.getvalue() -class SpamCheckerTestCase(unittest.HomeserverTestCase): +class SpamCheckerTestCaseLegacy(unittest.HomeserverTestCase): servlets = [ login.register_servlets, admin.register_servlets, @@ -637,7 +641,8 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase): { "spam_checker": [ { - "module": TestSpamChecker.__module__ + ".TestSpamChecker", + "module": TestSpamCheckerLegacy.__module__ + + ".TestSpamCheckerLegacy", "config": {}, } ] @@ -662,3 +667,62 @@ class SpamCheckerTestCase(unittest.HomeserverTestCase): self.helper.upload_media( self.upload_resource, data, tok=self.tok, expect_code=400 ) + + +EVIL_DATA = b"Some evil data" +EVIL_DATA_EXPERIMENT = b"Some evil data to trigger the experimental tuple API" + + +class SpamCheckerTestCase(unittest.HomeserverTestCase): + servlets = [ + login.register_servlets, + admin.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.user = self.register_user("user", "pass") + self.tok = self.login("user", "pass") + + # Allow for uploading and downloading to/from the media repo + self.media_repo = hs.get_media_repository_resource() + self.download_resource = self.media_repo.children[b"download"] + self.upload_resource = self.media_repo.children[b"upload"] + + hs.get_module_api().register_spam_checker_callbacks( + check_media_file_for_spam=self.check_media_file_for_spam + ) + + async def check_media_file_for_spam( + self, file_wrapper: ReadableFileWrapper, file_info: FileInfo + ) -> Union[Codes, Literal["NOT_SPAM"]]: + buf = BytesIO() + await file_wrapper.write_chunks_to(buf.write) + + if buf.getvalue() == EVIL_DATA: + return Codes.FORBIDDEN + elif buf.getvalue() == EVIL_DATA_EXPERIMENT: + return (Codes.FORBIDDEN, {}) + else: + return "NOT_SPAM" + + def test_upload_innocent(self) -> None: + """Attempt to upload some innocent data that should be allowed.""" + self.helper.upload_media( + self.upload_resource, SMALL_PNG, tok=self.tok, expect_code=200 + ) + + def test_upload_ban(self) -> None: + """Attempt to upload some data that includes bytes "evil", which should + get rejected by the spam checker. + """ + + self.helper.upload_media( + self.upload_resource, EVIL_DATA, tok=self.tok, expect_code=400 + ) + + self.helper.upload_media( + self.upload_resource, + EVIL_DATA_EXPERIMENT, + tok=self.tok, + expect_code=400, + ) From 92202ce8670b3025bf7798831cdd5f21efa280d5 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Mon, 11 Jul 2022 19:00:12 +0200 Subject: [PATCH 51/54] Reduce event lookups during room creation by passing known event IDs (#13210) Inspired by the room batch handler, this uses previous event inserts to pre-populate prev events during room creation, reducing the number of queries required to create a room. Signed off by Nick @ Beeper (@Fizzadar) --- changelog.d/13210.misc | 1 + synapse/handlers/room.py | 18 ++++++++++++++++-- tests/rest/client/test_rooms.py | 15 +++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13210.misc diff --git a/changelog.d/13210.misc b/changelog.d/13210.misc new file mode 100644 index 000000000..407791b8e --- /dev/null +++ b/changelog.d/13210.misc @@ -0,0 +1 @@ +Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 8dd94cbc7..a54f163c0 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1019,6 +1019,8 @@ class RoomCreationHandler: event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""} + last_sent_event_id: Optional[str] = None + def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} @@ -1028,19 +1030,27 @@ class RoomCreationHandler: return e async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: + nonlocal last_sent_event_id + event = create(etype, content, **kwargs) logger.debug("Sending %s in new room", etype) # Allow these events to be sent even if the user is shadow-banned to # allow the room creation to complete. ( - _, + sent_event, last_stream_id, ) = await self.event_creation_handler.create_and_send_nonmember_event( creator, event, ratelimit=False, ignore_shadow_ban=True, + # Note: we don't pass state_event_ids here because this triggers + # an additional query per event to look them up from the events table. + prev_event_ids=[last_sent_event_id] if last_sent_event_id else [], ) + + last_sent_event_id = sent_event.event_id + return last_stream_id try: @@ -1054,7 +1064,9 @@ class RoomCreationHandler: await send(etype=EventTypes.Create, content=creation_content) logger.debug("Sending %s in new room", EventTypes.Member) - await self.room_member_handler.update_membership( + # Room create event must exist at this point + assert last_sent_event_id is not None + member_event_id, _ = await self.room_member_handler.update_membership( creator, creator.user, room_id, @@ -1062,7 +1074,9 @@ class RoomCreationHandler: ratelimit=ratelimit, content=creator_join_profile, new_room=True, + prev_event_ids=[last_sent_event_id], ) + last_sent_event_id = member_event_id # We treat the power levels override specially as this needs to be one # of the first events that get sent into a room. diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index e67844cfa..d19b1bb85 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -708,6 +708,21 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(200, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) + assert channel.resource_usage is not None + self.assertEqual(33, channel.resource_usage.db_txn_count) + + def test_post_room_initial_state(self) -> None: + # POST with initial_state config key, expect new room id + channel = self.make_request( + "POST", + "/createRoom", + b'{"initial_state":[{"type": "m.bridge", "content": {}}]}', + ) + + self.assertEqual(200, channel.code, channel.result) + self.assertTrue("room_id" in channel.json_body) + assert channel.resource_usage is not None + self.assertEqual(37, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id From bc8eefc1e144eaeda4cb3f8171135ba03b94f2b4 Mon Sep 17 00:00:00 2001 From: villepeh <100730729+villepeh@users.noreply.github.com> Date: Mon, 11 Jul 2022 20:33:53 +0300 Subject: [PATCH 52/54] Add a sample bash script to docs for creating multiple worker files (#13032) Signed-off-by: Ville Petteri Huh. --- changelog.d/13032.doc | 1 + .../create-multiple-workers.md | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 changelog.d/13032.doc create mode 100644 contrib/workers-bash-scripts/create-multiple-workers.md diff --git a/changelog.d/13032.doc b/changelog.d/13032.doc new file mode 100644 index 000000000..54d45ecd0 --- /dev/null +++ b/changelog.d/13032.doc @@ -0,0 +1 @@ +Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. diff --git a/contrib/workers-bash-scripts/create-multiple-workers.md b/contrib/workers-bash-scripts/create-multiple-workers.md new file mode 100644 index 000000000..ad5142fe1 --- /dev/null +++ b/contrib/workers-bash-scripts/create-multiple-workers.md @@ -0,0 +1,31 @@ +# Creating multiple workers with a bash script + +Setting up multiple worker configuration files manually can be time-consuming. +You can alternatively create multiple worker configuration files with a simple `bash` script. For example: + +```sh +#!/bin/bash +for i in {1..5} +do +cat << EOF >> generic_worker$i.yaml +worker_app: synapse.app.generic_worker +worker_name: generic_worker$i + +# The replication listener on the main synapse process. +worker_replication_host: 127.0.0.1 +worker_replication_http_port: 9093 + +worker_listeners: + - type: http + port: 808$i + resources: + - names: [client, federation] + +worker_log_config: /etc/matrix-synapse/generic-worker-log.yaml +EOF +done +``` + +This would create five generic workers with a unique `worker_name` field in each file and listening on ports 8081-8085. + +Customise the script to your needs. From e5716b631c6fe0b0a8510f16a5bffddb6396f434 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 21:08:39 +0100 Subject: [PATCH 53/54] Don't pull out the full state when calculating push actions (#13078) --- changelog.d/13078.misc | 1 + changelog.d/13222.misc | 2 +- synapse/push/bulk_push_rule_evaluator.py | 394 +++---------------- synapse/storage/_base.py | 9 + synapse/storage/databases/main/events.py | 12 + synapse/storage/databases/main/roommember.py | 86 ++++ tests/rest/client/test_rooms.py | 4 +- 7 files changed, 164 insertions(+), 344 deletions(-) create mode 100644 changelog.d/13078.misc diff --git a/changelog.d/13078.misc b/changelog.d/13078.misc new file mode 100644 index 000000000..3835e97ad --- /dev/null +++ b/changelog.d/13078.misc @@ -0,0 +1 @@ +Reduce memory consumption when processing incoming events in large rooms. diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc index 0bab1aed7..3835e97ad 100644 --- a/changelog.d/13222.misc +++ b/changelog.d/13222.misc @@ -1 +1 @@ -Improve memory usage of calculating push actions for events in large rooms. +Reduce memory consumption when processing incoming events in large rooms. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 7791b289e..e581af9a9 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -17,7 +17,6 @@ import itertools import logging from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union -import attr from prometheus_client import Counter from synapse.api.constants import EventTypes, Membership, RelationTypes @@ -26,13 +25,11 @@ from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership -from synapse.util.async_helpers import Linearizer -from synapse.util.caches import CacheMetric, register_cache -from synapse.util.caches.descriptors import lru_cache -from synapse.util.caches.lrucache import LruCache +from synapse.storage.state import StateFilter +from synapse.util.caches import register_cache from synapse.util.metrics import measure_func +from synapse.visibility import filter_event_for_clients_with_state -from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent if TYPE_CHECKING: @@ -48,15 +45,6 @@ push_rules_state_size_counter = Counter( "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", "" ) -# Measures whether we use the fast path of using state deltas, or if we have to -# recalculate from scratch -push_rules_delta_state_cache_metric = register_cache( - "cache", - "push_rules_delta_state_cache_metric", - cache=[], # Meaningless size, as this isn't a cache that stores values - resizable=False, -) - STATE_EVENT_TYPES_TO_MARK_UNREAD = { EventTypes.Topic, @@ -111,10 +99,6 @@ class BulkPushRuleEvaluator: self.clock = hs.get_clock() self._event_auth_handler = hs.get_event_auth_handler() - # Used by `RulesForRoom` to ensure only one thing mutates the cache at a - # time. Keyed off room_id. - self._rules_linearizer = Linearizer(name="rules_for_room") - self.room_push_rule_cache_metrics = register_cache( "cache", "room_push_rule_cache", @@ -126,48 +110,48 @@ class BulkPushRuleEvaluator: self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled async def _get_rules_for_event( - self, event: EventBase, context: EventContext + self, + event: EventBase, ) -> Dict[str, List[Dict[str, Any]]]: - """This gets the rules for all users in the room at the time of the event, - as well as the push rules for the invitee if the event is an invite. + """Get the push rules for all users who may need to be notified about + the event. + + Note: this does not check if the user is allowed to see the event. Returns: - dict of user_id -> push_rules + Mapping of user ID to their push rules. """ - room_id = event.room_id + # We get the users who may need to be notified by first fetching the + # local users currently in the room, finding those that have push rules, + # and *then* checking which users are actually allowed to see the event. + # + # The alternative is to first fetch all users that were joined at the + # event, but that requires fetching the full state at the event, which + # may be expensive for large rooms with few local users. - rules_for_room_data = self._get_rules_for_room(room_id) - rules_for_room = RulesForRoom( - hs=self.hs, - room_id=room_id, - rules_for_room_cache=self._get_rules_for_room.cache, - room_push_rule_cache_metrics=self.room_push_rule_cache_metrics, - linearizer=self._rules_linearizer, - cached_data=rules_for_room_data, - ) - - rules_by_user = await rules_for_room.get_rules(event, context) + local_users = await self.store.get_local_users_in_room(event.room_id) # if this event is an invite event, we may need to run rules for the user # who's been invited, otherwise they won't get told they've been invited - if event.type == "m.room.member" and event.content["membership"] == "invite": + if event.type == EventTypes.Member and event.membership == Membership.INVITE: invited = event.state_key - if invited and self.hs.is_mine_id(invited): - rules_by_user = dict(rules_by_user) - rules_by_user[invited] = await self.store.get_push_rules_for_user( - invited - ) + if invited and self.hs.is_mine_id(invited) and invited not in local_users: + local_users = list(local_users) + local_users.append(invited) + + rules_by_user = await self.store.bulk_get_push_rules(local_users) + + logger.debug("Users in room: %s", local_users) + + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Returning push rules for %r %r", + event.room_id, + list(rules_by_user.keys()), + ) return rules_by_user - @lru_cache() - def _get_rules_for_room(self, room_id: str) -> "RulesForRoomData": - """Get the current RulesForRoomData object for the given room id""" - # It's important that the RulesForRoomData object gets added to self._get_rules_for_room.cache - # before any lookup methods get called on it as otherwise there may be - # a race if invalidate_all gets called (which assumes its in the cache) - return RulesForRoomData() - async def _get_power_levels_and_sender_level( self, event: EventBase, context: EventContext ) -> Tuple[dict, int]: @@ -262,10 +246,12 @@ class BulkPushRuleEvaluator: count_as_unread = _should_count_as_unread(event, context) - rules_by_user = await self._get_rules_for_event(event, context) + rules_by_user = await self._get_rules_for_event(event) actions_by_user: Dict[str, List[Union[dict, str]]] = {} - room_members = await self.store.get_joined_users_from_context(event, context) + room_member_count = await self.store.get_number_joined_users_in_room( + event.room_id + ) ( power_levels, @@ -278,30 +264,36 @@ class BulkPushRuleEvaluator: evaluator = PushRuleEvaluatorForEvent( event, - len(room_members), + room_member_count, sender_power_level, power_levels, relations, self._relations_match_enabled, ) - # If the event is not a state event check if any users ignore the sender. - if not event.is_state(): - ignorers = await self.store.ignored_by(event.sender) - else: - ignorers = frozenset() + users = rules_by_user.keys() + profiles = await self.store.get_subset_users_in_room_with_profiles( + event.room_id, users + ) + + # This is a check for the case where user joins a room without being + # allowed to see history, and then the server receives a delayed event + # from before the user joined, which they should not be pushed for + uids_with_visibility = await filter_event_for_clients_with_state( + self.store, users, event, context + ) for uid, rules in rules_by_user.items(): if event.sender == uid: continue - if uid in ignorers: + if uid not in uids_with_visibility: continue display_name = None - profile_info = room_members.get(uid) - if profile_info: - display_name = profile_info.display_name + profile = profiles.get(uid) + if profile: + display_name = profile.display_name if not display_name: # Handle the case where we are pushing a membership event to @@ -346,283 +338,3 @@ MemberMap = Dict[str, Optional[EventIdMembership]] Rule = Dict[str, dict] RulesByUser = Dict[str, List[Rule]] StateGroup = Union[object, int] - - -@attr.s(slots=True, auto_attribs=True) -class RulesForRoomData: - """The data stored in the cache by `RulesForRoom`. - - We don't store `RulesForRoom` directly in the cache as we want our caches to - *only* include data, and not references to e.g. the data stores. - """ - - # event_id -> EventIdMembership - member_map: MemberMap = attr.Factory(dict) - # user_id -> rules - rules_by_user: RulesByUser = attr.Factory(dict) - - # The last state group we updated the caches for. If the state_group of - # a new event comes along, we know that we can just return the cached - # result. - # On invalidation of the rules themselves (if the user changes them), - # we invalidate everything and set state_group to `object()` - state_group: StateGroup = attr.Factory(object) - - # A sequence number to keep track of when we're allowed to update the - # cache. We bump the sequence number when we invalidate the cache. If - # the sequence number changes while we're calculating stuff we should - # not update the cache with it. - sequence: int = 0 - - # A cache of user_ids that we *know* aren't interesting, e.g. user_ids - # owned by AS's, or remote users, etc. (I.e. users we will never need to - # calculate push for) - # These never need to be invalidated as we will never set up push for - # them. - uninteresting_user_set: Set[str] = attr.Factory(set) - - -class RulesForRoom: - """Caches push rules for users in a room. - - This efficiently handles users joining/leaving the room by not invalidating - the entire cache for the room. - - A new instance is constructed for each call to - `BulkPushRuleEvaluator._get_rules_for_event`, with the cached data from - previous calls passed in. - """ - - def __init__( - self, - hs: "HomeServer", - room_id: str, - rules_for_room_cache: LruCache, - room_push_rule_cache_metrics: CacheMetric, - linearizer: Linearizer, - cached_data: RulesForRoomData, - ): - """ - Args: - hs: The HomeServer object. - room_id: The room ID. - rules_for_room_cache: The cache object that caches these - RoomsForUser objects. - room_push_rule_cache_metrics: The metrics object - linearizer: The linearizer used to ensure only one thing mutates - the cache at a time. Keyed off room_id - cached_data: Cached data from previous calls to `self.get_rules`, - can be mutated. - """ - self.room_id = room_id - self.is_mine_id = hs.is_mine_id - self.store = hs.get_datastores().main - self.room_push_rule_cache_metrics = room_push_rule_cache_metrics - - # Used to ensure only one thing mutates the cache at a time. Keyed off - # room_id. - self.linearizer = linearizer - - self.data = cached_data - - # We need to be clever on the invalidating caches callbacks, as - # otherwise the invalidation callback holds a reference to the object, - # potentially causing it to leak. - # To get around this we pass a function that on invalidations looks ups - # the RoomsForUser entry in the cache, rather than keeping a reference - # to self around in the callback. - self.invalidate_all_cb = _Invalidation(rules_for_room_cache, room_id) - - async def get_rules( - self, event: EventBase, context: EventContext - ) -> Dict[str, List[Dict[str, dict]]]: - """Given an event context return the rules for all users who are - currently in the room. - """ - state_group = context.state_group - - if state_group and self.data.state_group == state_group: - logger.debug("Using cached rules for %r", self.room_id) - self.room_push_rule_cache_metrics.inc_hits() - return self.data.rules_by_user - - async with self.linearizer.queue(self.room_id): - if state_group and self.data.state_group == state_group: - logger.debug("Using cached rules for %r", self.room_id) - self.room_push_rule_cache_metrics.inc_hits() - return self.data.rules_by_user - - self.room_push_rule_cache_metrics.inc_misses() - - ret_rules_by_user = {} - missing_member_event_ids = {} - if state_group and self.data.state_group == context.prev_group: - # If we have a simple delta then we can reuse most of the previous - # results. - ret_rules_by_user = self.data.rules_by_user - current_state_ids = context.delta_ids - - push_rules_delta_state_cache_metric.inc_hits() - else: - current_state_ids = await context.get_current_state_ids() - push_rules_delta_state_cache_metric.inc_misses() - # Ensure the state IDs exist. - assert current_state_ids is not None - - push_rules_state_size_counter.inc(len(current_state_ids)) - - logger.debug( - "Looking for member changes in %r %r", state_group, current_state_ids - ) - - # Loop through to see which member events we've seen and have rules - # for and which we need to fetch - for key in current_state_ids: - typ, user_id = key - if typ != EventTypes.Member: - continue - - if user_id in self.data.uninteresting_user_set: - continue - - if not self.is_mine_id(user_id): - self.data.uninteresting_user_set.add(user_id) - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - self.data.uninteresting_user_set.add(user_id) - continue - - event_id = current_state_ids[key] - - res = self.data.member_map.get(event_id, None) - if res: - if res.membership == Membership.JOIN: - rules = self.data.rules_by_user.get(res.user_id, None) - if rules: - ret_rules_by_user[res.user_id] = rules - continue - - # If a user has left a room we remove their push rule. If they - # joined then we re-add it later in _update_rules_with_member_event_ids - ret_rules_by_user.pop(user_id, None) - missing_member_event_ids[user_id] = event_id - - if missing_member_event_ids: - # If we have some member events we haven't seen, look them up - # and fetch push rules for them if appropriate. - logger.debug("Found new member events %r", missing_member_event_ids) - await self._update_rules_with_member_event_ids( - ret_rules_by_user, missing_member_event_ids, state_group, event - ) - else: - # The push rules didn't change but lets update the cache anyway - self.update_cache( - self.data.sequence, - members={}, # There were no membership changes - rules_by_user=ret_rules_by_user, - state_group=state_group, - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - "Returning push rules for %r %r", self.room_id, ret_rules_by_user.keys() - ) - return ret_rules_by_user - - async def _update_rules_with_member_event_ids( - self, - ret_rules_by_user: Dict[str, list], - member_event_ids: Dict[str, str], - state_group: Optional[int], - event: EventBase, - ) -> None: - """Update the partially filled rules_by_user dict by fetching rules for - any newly joined users in the `member_event_ids` list. - - Args: - ret_rules_by_user: Partially filled dict of push rules. Gets - updated with any new rules. - member_event_ids: Dict of user id to event id for membership events - that have happened since the last time we filled rules_by_user - state_group: The state group we are currently computing push rules - for. Used when updating the cache. - event: The event we are currently computing push rules for. - """ - sequence = self.data.sequence - - members = await self.store.get_membership_from_event_ids( - member_event_ids.values() - ) - - # If the event is a join event then it will be in current state events - # map but not in the DB, so we have to explicitly insert it. - if event.type == EventTypes.Member: - for event_id in member_event_ids.values(): - if event_id == event.event_id: - members[event_id] = EventIdMembership( - user_id=event.state_key, membership=event.membership - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Found members %r: %r", self.room_id, members.values()) - - joined_user_ids = { - entry.user_id - for entry in members.values() - if entry and entry.membership == Membership.JOIN - } - - logger.debug("Joined: %r", joined_user_ids) - - # Previously we only considered users with pushers or read receipts in that - # room. We can't do this anymore because we use push actions to calculate unread - # counts, which don't rely on the user having pushers or sent a read receipt into - # the room. Therefore we just need to filter for local users here. - user_ids = list(filter(self.is_mine_id, joined_user_ids)) - - rules_by_user = await self.store.bulk_get_push_rules( - user_ids, on_invalidate=self.invalidate_all_cb - ) - - ret_rules_by_user.update( - item for item in rules_by_user.items() if item[0] is not None - ) - - self.update_cache(sequence, members, ret_rules_by_user, state_group) - - def update_cache( - self, - sequence: int, - members: MemberMap, - rules_by_user: RulesByUser, - state_group: StateGroup, - ) -> None: - if sequence == self.data.sequence: - self.data.member_map.update(members) - self.data.rules_by_user = rules_by_user - self.data.state_group = state_group - - -@attr.attrs(slots=True, frozen=True, auto_attribs=True) -class _Invalidation: - # _Invalidation is passed as an `on_invalidate` callback to bulk_get_push_rules, - # which means that it it is stored on the bulk_get_push_rules cache entry. In order - # to ensure that we don't accumulate lots of redundant callbacks on the cache entry, - # we need to ensure that two _Invalidation objects are "equal" if they refer to the - # same `cache` and `room_id`. - # - # attrs provides suitable __hash__ and __eq__ methods, provided we remember to - # set `frozen=True`. - - cache: LruCache - room_id: str - - def __call__(self) -> None: - rules_data = self.cache.get(self.room_id, None, update_metrics=False) - if rules_data: - rules_data.sequence += 1 - rules_data.state_group = object() - rules_data.member_map = {} - rules_data.rules_by_user = {} - push_rules_invalidation_counter.inc() diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abfc56b06..b8c8dcd76 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -75,6 +75,15 @@ class SQLBaseStore(metaclass=ABCMeta): self._attempt_to_invalidate_cache( "get_users_in_room_with_profiles", (room_id,) ) + self._attempt_to_invalidate_cache( + "get_number_joined_users_in_room", (room_id,) + ) + self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,)) + + for user_id in members_changed: + self._attempt_to_invalidate_cache( + "get_user_in_room_with_profile", (room_id, user_id) + ) # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2ff3d2130..eb4efbb93 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1797,6 +1797,18 @@ class PersistEventsStore: self.store.get_invited_rooms_for_local_user.invalidate, (event.state_key,), ) + txn.call_after( + self.store.get_local_users_in_room.invalidate, + (event.room_id,), + ) + txn.call_after( + self.store.get_number_joined_users_in_room.invalidate, + (event.room_id,), + ) + txn.call_after( + self.store.get_user_in_room_with_profile.invalidate, + (event.room_id, event.state_key), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 31bc8c560..0b5e4e425 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -212,6 +212,60 @@ class RoomMemberWorkerStore(EventsWorkerStore): txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] + @cached() + def get_user_in_room_with_profile( + self, room_id: str, user_id: str + ) -> Dict[str, ProfileInfo]: + raise NotImplementedError() + + @cachedList( + cached_method_name="get_user_in_room_with_profile", list_name="user_ids" + ) + async def get_subset_users_in_room_with_profiles( + self, room_id: str, user_ids: Collection[str] + ) -> Dict[str, ProfileInfo]: + """Get a mapping from user ID to profile information for a list of users + in a given room. + + The profile information comes directly from this room's `m.room.member` + events, and so may be specific to this room rather than part of a user's + global profile. To avoid privacy leaks, the profile data should only be + revealed to users who are already in this room. + + Args: + room_id: The ID of the room to retrieve the users of. + user_ids: a list of users in the room to run the query for + + Returns: + A mapping from user ID to ProfileInfo. + """ + + def _get_subset_users_in_room_with_profiles( + txn: LoggingTransaction, + ) -> Dict[str, ProfileInfo]: + clause, ids = make_in_list_sql_clause( + self.database_engine, "m.user_id", user_ids + ) + + sql = """ + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? AND %s + """ % ( + clause, + ) + txn.execute(sql, (room_id, Membership.JOIN, *ids)) + + return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn} + + return await self.db_pool.runInteraction( + "get_subset_users_in_room_with_profiles", + _get_subset_users_in_room_with_profiles, + ) + @cached(max_entries=100000, iterable=True) async def get_users_in_room_with_profiles( self, room_id: str @@ -337,6 +391,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): "get_room_summary", _get_room_summary_txn ) + @cached() + async def get_number_joined_users_in_room(self, room_id: str) -> int: + return await self.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="COUNT(*)", + desc="get_number_joined_users_in_room", + ) + @cached() async def get_invited_rooms_for_local_user( self, user_id: str @@ -416,6 +479,17 @@ class RoomMemberWorkerStore(EventsWorkerStore): user_id: str, membership_list: List[str], ) -> List[RoomsForUser]: + """Get all the rooms for this *local* user where the membership for this user + matches one in the membership list. + + Args: + user_id: The user ID. + membership_list: A list of synapse.api.constants.Membership + values which the user must be in. + + Returns: + The RoomsForUser that the user matches the membership types. + """ # Paranoia check. if not self.hs.is_mine_id(user_id): raise Exception( @@ -444,6 +518,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results + @cached(iterable=True) + async def get_local_users_in_room(self, room_id: str) -> List[str]: + """ + Retrieves a list of the current roommembers who are local to the server. + """ + return await self.db_pool.simple_select_onecol( + table="local_current_membership", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="user_id", + desc="get_local_users_in_room", + ) + async def get_local_current_membership_for_user_in_room( self, user_id: str, room_id: str ) -> Tuple[Optional[str], Optional[str]]: diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index d19b1bb85..df7ffbe54 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -709,7 +709,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(200, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(33, channel.resource_usage.db_txn_count) + self.assertEqual(37, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -722,7 +722,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(200, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(37, channel.resource_usage.db_txn_count) + self.assertEqual(41, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id From 6173d585df189a763256ed6dc4fcfb5aa26e5e5c Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Tue, 12 Jul 2022 11:26:25 +0100 Subject: [PATCH 54/54] 1.63.0rc1 --- CHANGES.md | 80 +++++++++++++++++++++++++++++++++++++++ changelog.d/13028.misc | 1 - changelog.d/13029.doc | 1 - changelog.d/13031.feature | 1 - changelog.d/13032.doc | 1 - changelog.d/13044.misc | 1 - changelog.d/13077.doc | 3 -- changelog.d/13078.misc | 1 - changelog.d/13079.misc | 1 - changelog.d/13086.doc | 1 - changelog.d/13100.misc | 1 - changelog.d/13103.misc | 1 - changelog.d/13113.misc | 1 - changelog.d/13116.doc | 1 - changelog.d/13119.misc | 1 - changelog.d/13125.feature | 1 - changelog.d/13127.misc | 1 - changelog.d/13129.misc | 1 - changelog.d/13131.bugfix | 1 - changelog.d/13132.doc | 1 - changelog.d/13134.misc | 1 - changelog.d/13135.misc | 1 - changelog.d/13136.misc | 1 - changelog.d/13139.doc | 1 - changelog.d/13143.misc | 1 - changelog.d/13144.misc | 1 - changelog.d/13145.misc | 1 - changelog.d/13148.feature | 1 - changelog.d/13151.misc | 1 - changelog.d/13152.misc | 1 - changelog.d/13153.misc | 1 - changelog.d/13157.misc | 1 - changelog.d/13158.misc | 1 - changelog.d/13159.misc | 1 - changelog.d/13166.doc | 1 - changelog.d/13167.misc | 1 - changelog.d/13174.bugfix | 1 - changelog.d/13194.bugfix | 1 - changelog.d/13195.misc | 1 - changelog.d/13197.bugfix | 1 - changelog.d/13200.removal | 1 - changelog.d/13207.docker | 1 - changelog.d/13209.misc | 1 - changelog.d/13210.misc | 1 - changelog.d/13211.misc | 1 - changelog.d/13212.doc | 1 - changelog.d/13222.misc | 1 - changelog.d/13223.bugfix | 1 - changelog.d/13226.bugfix | 1 - changelog.d/13228.misc | 1 - changelog.d/13235.bugfix | 1 - changelog.d/13236.bugfix | 1 - debian/changelog | 6 +++ pyproject.toml | 2 +- 54 files changed, 87 insertions(+), 54 deletions(-) delete mode 100644 changelog.d/13028.misc delete mode 100644 changelog.d/13029.doc delete mode 100644 changelog.d/13031.feature delete mode 100644 changelog.d/13032.doc delete mode 100644 changelog.d/13044.misc delete mode 100644 changelog.d/13077.doc delete mode 100644 changelog.d/13078.misc delete mode 100644 changelog.d/13079.misc delete mode 100644 changelog.d/13086.doc delete mode 100644 changelog.d/13100.misc delete mode 100644 changelog.d/13103.misc delete mode 100644 changelog.d/13113.misc delete mode 100644 changelog.d/13116.doc delete mode 100644 changelog.d/13119.misc delete mode 100644 changelog.d/13125.feature delete mode 100644 changelog.d/13127.misc delete mode 100644 changelog.d/13129.misc delete mode 100644 changelog.d/13131.bugfix delete mode 100644 changelog.d/13132.doc delete mode 100644 changelog.d/13134.misc delete mode 100644 changelog.d/13135.misc delete mode 100644 changelog.d/13136.misc delete mode 100644 changelog.d/13139.doc delete mode 100644 changelog.d/13143.misc delete mode 100644 changelog.d/13144.misc delete mode 100644 changelog.d/13145.misc delete mode 100644 changelog.d/13148.feature delete mode 100644 changelog.d/13151.misc delete mode 100644 changelog.d/13152.misc delete mode 100644 changelog.d/13153.misc delete mode 100644 changelog.d/13157.misc delete mode 100644 changelog.d/13158.misc delete mode 100644 changelog.d/13159.misc delete mode 100644 changelog.d/13166.doc delete mode 100644 changelog.d/13167.misc delete mode 100644 changelog.d/13174.bugfix delete mode 100644 changelog.d/13194.bugfix delete mode 100644 changelog.d/13195.misc delete mode 100644 changelog.d/13197.bugfix delete mode 100644 changelog.d/13200.removal delete mode 100644 changelog.d/13207.docker delete mode 100644 changelog.d/13209.misc delete mode 100644 changelog.d/13210.misc delete mode 100644 changelog.d/13211.misc delete mode 100644 changelog.d/13212.doc delete mode 100644 changelog.d/13222.misc delete mode 100644 changelog.d/13223.bugfix delete mode 100644 changelog.d/13226.bugfix delete mode 100644 changelog.d/13228.misc delete mode 100644 changelog.d/13235.bugfix delete mode 100644 changelog.d/13236.bugfix diff --git a/CHANGES.md b/CHANGES.md index ec27cda1b..ee2f90632 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,83 @@ +Synapse 1.63.0rc1 (2022-07-12) +============================== + +Features +-------- + +- Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. ([\#13031](https://github.com/matrix-org/synapse/issues/13031)) +- Add a rate limit for local users sending invites. ([\#13125](https://github.com/matrix-org/synapse/issues/13125)) +- Improve validation logic in Synapse's REST endpoints. ([\#13148](https://github.com/matrix-org/synapse/issues/13148)) + + +Bugfixes +-------- + +- Fix application service not being able to join remote federated room without a profile set. ([\#13131](https://github.com/matrix-org/synapse/issues/13131)) +- Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages. ([\#13174](https://github.com/matrix-org/synapse/issues/13174)) +- Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0. ([\#13194](https://github.com/matrix-org/synapse/issues/13194)) +- Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. ([\#13197](https://github.com/matrix-org/synapse/issues/13197)) +- Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0. ([\#13223](https://github.com/matrix-org/synapse/issues/13223)) +- Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. ([\#13226](https://github.com/matrix-org/synapse/issues/13226)) +- Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted. ([\#13235](https://github.com/matrix-org/synapse/issues/13235)) +- Fix appservices not receiving room-less EDUs, like presence, if enabled. ([\#13236](https://github.com/matrix-org/synapse/issues/13236)) + + +Updates to the Docker image +--------------------------- + +- Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1. ([\#13207](https://github.com/matrix-org/synapse/issues/13207)) + + +Improved Documentation +---------------------- + +- Add an explanation of the `--report-stats` argument to the docs. ([\#13029](https://github.com/matrix-org/synapse/issues/13029)) +- Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. ([\#13032](https://github.com/matrix-org/synapse/issues/13032)) +- Clean up references to sample configuration and redirect users to the configuration manual instead. ([\#13077](https://github.com/matrix-org/synapse/issues/13077)) +- Add documentation for anonymised homeserver statistics collection. ([\#13086](https://github.com/matrix-org/synapse/issues/13086)) +- Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. ([\#13116](https://github.com/matrix-org/synapse/issues/13116)) +- Document how the Synapse team does reviews. ([\#13132](https://github.com/matrix-org/synapse/issues/13132)) +- Add a link to the configuration manual from the homeserver sample config documentation. ([\#13139](https://github.com/matrix-org/synapse/issues/13139)) +- Add missing links to config options. ([\#13166](https://github.com/matrix-org/synapse/issues/13166)) +- Add documentation for the existing `databases` option in the homeserver configuration manual. ([\#13212](https://github.com/matrix-org/synapse/issues/13212)) + + +Deprecations and Removals +------------------------- + +- Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar. ([\#13200](https://github.com/matrix-org/synapse/issues/13200)) + + +Internal Changes +---------------- + +- Add type annotations to `tests.utils`. ([\#13028](https://github.com/matrix-org/synapse/issues/13028)) +- Support temporary experimental return values for spam checker module callbacks. ([\#13044](https://github.com/matrix-org/synapse/issues/13044)) +- Reduce memory consumption when processing incoming events in large rooms. ([\#13078](https://github.com/matrix-org/synapse/issues/13078), [\#13222](https://github.com/matrix-org/synapse/issues/13222)) +- Enable Complement testing in the 'Twisted Trunk' CI runs. ([\#13079](https://github.com/matrix-org/synapse/issues/13079), [\#13157](https://github.com/matrix-org/synapse/issues/13157)) +- Faster room joins: Handle race between persisting an event and un-partial stating a room. ([\#13100](https://github.com/matrix-org/synapse/issues/13100)) +- Add missing type hints to `synapse.logging`. ([\#13103](https://github.com/matrix-org/synapse/issues/13103)) +- Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. ([\#13113](https://github.com/matrix-org/synapse/issues/13113)) +- Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. ([\#13119](https://github.com/matrix-org/synapse/issues/13119), [\#13153](https://github.com/matrix-org/synapse/issues/13153)) +- Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. ([\#13127](https://github.com/matrix-org/synapse/issues/13127)) +- Only one-line SQL statements for logging and tracing. ([\#13129](https://github.com/matrix-org/synapse/issues/13129)) +- Apply ratelimiting earlier in processing of /send request. ([\#13134](https://github.com/matrix-org/synapse/issues/13134)) +- Enforce type annotations for `tests.test_server`. ([\#13135](https://github.com/matrix-org/synapse/issues/13135)) +- Add type annotations to `tests.server`. ([\#13136](https://github.com/matrix-org/synapse/issues/13136)) +- Add support to `complement.sh` for skipping the docker build. ([\#13143](https://github.com/matrix-org/synapse/issues/13143), [\#13158](https://github.com/matrix-org/synapse/issues/13158)) +- Faster joins: skip waiting for full state when processing incoming events over federation. ([\#13144](https://github.com/matrix-org/synapse/issues/13144)) +- Improve exception handling when processing events received over federation. ([\#13145](https://github.com/matrix-org/synapse/issues/13145)) +- Faster room joins: fix race in recalculation of current room state. ([\#13151](https://github.com/matrix-org/synapse/issues/13151)) +- Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. ([\#13152](https://github.com/matrix-org/synapse/issues/13152)) +- Improve and fix type hints. ([\#13159](https://github.com/matrix-org/synapse/issues/13159)) +- Update config used by Complement to allow device name lookup over federation. ([\#13167](https://github.com/matrix-org/synapse/issues/13167)) +- Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. ([\#13195](https://github.com/matrix-org/synapse/issues/13195)) +- Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar). ([\#13209](https://github.com/matrix-org/synapse/issues/13209)) +- Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar). ([\#13210](https://github.com/matrix-org/synapse/issues/13210)) +- More aggressively rotate push actions. ([\#13211](https://github.com/matrix-org/synapse/issues/13211)) +- Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper. ([\#13228](https://github.com/matrix-org/synapse/issues/13228)) + + Synapse 1.62.0 (2022-07-05) =========================== diff --git a/changelog.d/13028.misc b/changelog.d/13028.misc deleted file mode 100644 index 4e5f3d8f9..000000000 --- a/changelog.d/13028.misc +++ /dev/null @@ -1 +0,0 @@ -Add type annotations to `tests.utils`. diff --git a/changelog.d/13029.doc b/changelog.d/13029.doc deleted file mode 100644 index d398f0fdb..000000000 --- a/changelog.d/13029.doc +++ /dev/null @@ -1 +0,0 @@ -Add an explanation of the `--report-stats` argument to the docs. diff --git a/changelog.d/13031.feature b/changelog.d/13031.feature deleted file mode 100644 index fee8e9d1f..000000000 --- a/changelog.d/13031.feature +++ /dev/null @@ -1 +0,0 @@ -Implement [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827): Filtering of /publicRooms by room type. diff --git a/changelog.d/13032.doc b/changelog.d/13032.doc deleted file mode 100644 index 54d45ecd0..000000000 --- a/changelog.d/13032.doc +++ /dev/null @@ -1 +0,0 @@ -Add a helpful example bash script to the contrib directory for creating multiple worker configuration files of the same type. Contributed by @villepeh. diff --git a/changelog.d/13044.misc b/changelog.d/13044.misc deleted file mode 100644 index f9a0669dd..000000000 --- a/changelog.d/13044.misc +++ /dev/null @@ -1 +0,0 @@ -Support temporary experimental return values for spam checker module callbacks. \ No newline at end of file diff --git a/changelog.d/13077.doc b/changelog.d/13077.doc deleted file mode 100644 index 502f2d059..000000000 --- a/changelog.d/13077.doc +++ /dev/null @@ -1,3 +0,0 @@ -Clean up references to sample configuration and redirect users to the configuration manual instead. - - diff --git a/changelog.d/13078.misc b/changelog.d/13078.misc deleted file mode 100644 index 3835e97ad..000000000 --- a/changelog.d/13078.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce memory consumption when processing incoming events in large rooms. diff --git a/changelog.d/13079.misc b/changelog.d/13079.misc deleted file mode 100644 index 0133097c8..000000000 --- a/changelog.d/13079.misc +++ /dev/null @@ -1 +0,0 @@ -Enable Complement testing in the 'Twisted Trunk' CI runs. \ No newline at end of file diff --git a/changelog.d/13086.doc b/changelog.d/13086.doc deleted file mode 100644 index a3960ca32..000000000 --- a/changelog.d/13086.doc +++ /dev/null @@ -1 +0,0 @@ -Add documentation for anonymised homeserver statistics collection. \ No newline at end of file diff --git a/changelog.d/13100.misc b/changelog.d/13100.misc deleted file mode 100644 index 28f2fe034..000000000 --- a/changelog.d/13100.misc +++ /dev/null @@ -1 +0,0 @@ -Faster room joins: Handle race between persisting an event and un-partial stating a room. diff --git a/changelog.d/13103.misc b/changelog.d/13103.misc deleted file mode 100644 index 4de5f9e90..000000000 --- a/changelog.d/13103.misc +++ /dev/null @@ -1 +0,0 @@ -Add missing type hints to `synapse.logging`. diff --git a/changelog.d/13113.misc b/changelog.d/13113.misc deleted file mode 100644 index 7b1a50eec..000000000 --- a/changelog.d/13113.misc +++ /dev/null @@ -1 +0,0 @@ -Raise a `DependencyError` on missing dependencies instead of a `ConfigError`. \ No newline at end of file diff --git a/changelog.d/13116.doc b/changelog.d/13116.doc deleted file mode 100644 index f99be50f4..000000000 --- a/changelog.d/13116.doc +++ /dev/null @@ -1 +0,0 @@ -Fix wrong section header for `allow_public_rooms_over_federation` in the homeserver config documentation. diff --git a/changelog.d/13119.misc b/changelog.d/13119.misc deleted file mode 100644 index 3bb51962e..000000000 --- a/changelog.d/13119.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. diff --git a/changelog.d/13125.feature b/changelog.d/13125.feature deleted file mode 100644 index 9b0f60954..000000000 --- a/changelog.d/13125.feature +++ /dev/null @@ -1 +0,0 @@ -Add a rate limit for local users sending invites. \ No newline at end of file diff --git a/changelog.d/13127.misc b/changelog.d/13127.misc deleted file mode 100644 index 1414811e0..000000000 --- a/changelog.d/13127.misc +++ /dev/null @@ -1 +0,0 @@ -Improve startup times in Complement test runs against workers, particularly in CPU-constrained environments. \ No newline at end of file diff --git a/changelog.d/13129.misc b/changelog.d/13129.misc deleted file mode 100644 index 4c2dbb705..000000000 --- a/changelog.d/13129.misc +++ /dev/null @@ -1 +0,0 @@ -Only one-line SQL statements for logging and tracing. diff --git a/changelog.d/13131.bugfix b/changelog.d/13131.bugfix deleted file mode 100644 index 06602f03f..000000000 --- a/changelog.d/13131.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix application service not being able to join remote federated room without a profile set. diff --git a/changelog.d/13132.doc b/changelog.d/13132.doc deleted file mode 100644 index c57706929..000000000 --- a/changelog.d/13132.doc +++ /dev/null @@ -1 +0,0 @@ -Document how the Synapse team does reviews. diff --git a/changelog.d/13134.misc b/changelog.d/13134.misc deleted file mode 100644 index e3e16056d..000000000 --- a/changelog.d/13134.misc +++ /dev/null @@ -1 +0,0 @@ -Apply ratelimiting earlier in processing of /send request. \ No newline at end of file diff --git a/changelog.d/13135.misc b/changelog.d/13135.misc deleted file mode 100644 index f096dd874..000000000 --- a/changelog.d/13135.misc +++ /dev/null @@ -1 +0,0 @@ -Enforce type annotations for `tests.test_server`. diff --git a/changelog.d/13136.misc b/changelog.d/13136.misc deleted file mode 100644 index 6cf451d8c..000000000 --- a/changelog.d/13136.misc +++ /dev/null @@ -1 +0,0 @@ -Add type annotations to `tests.server`. diff --git a/changelog.d/13139.doc b/changelog.d/13139.doc deleted file mode 100644 index f5d99d461..000000000 --- a/changelog.d/13139.doc +++ /dev/null @@ -1 +0,0 @@ -Add a link to the configuration manual from the homeserver sample config documentation. diff --git a/changelog.d/13143.misc b/changelog.d/13143.misc deleted file mode 100644 index 1cb77c02d..000000000 --- a/changelog.d/13143.misc +++ /dev/null @@ -1 +0,0 @@ -Add support to `complement.sh` for skipping the docker build. diff --git a/changelog.d/13144.misc b/changelog.d/13144.misc deleted file mode 100644 index 34762e2fc..000000000 --- a/changelog.d/13144.misc +++ /dev/null @@ -1 +0,0 @@ -Faster joins: skip waiting for full state when processing incoming events over federation. diff --git a/changelog.d/13145.misc b/changelog.d/13145.misc deleted file mode 100644 index d5e2dba86..000000000 --- a/changelog.d/13145.misc +++ /dev/null @@ -1 +0,0 @@ -Improve exception handling when processing events received over federation. diff --git a/changelog.d/13148.feature b/changelog.d/13148.feature deleted file mode 100644 index d1104b04b..000000000 --- a/changelog.d/13148.feature +++ /dev/null @@ -1 +0,0 @@ -Improve validation logic in Synapse's REST endpoints. diff --git a/changelog.d/13151.misc b/changelog.d/13151.misc deleted file mode 100644 index cfe3eed3a..000000000 --- a/changelog.d/13151.misc +++ /dev/null @@ -1 +0,0 @@ -Faster room joins: fix race in recalculation of current room state. diff --git a/changelog.d/13152.misc b/changelog.d/13152.misc deleted file mode 100644 index 0c919ab70..000000000 --- a/changelog.d/13152.misc +++ /dev/null @@ -1 +0,0 @@ -Add the ability to set the log level using the `SYNAPSE_TEST_LOG_LEVEL` environment when using `complement.sh`. \ No newline at end of file diff --git a/changelog.d/13153.misc b/changelog.d/13153.misc deleted file mode 100644 index 3bb51962e..000000000 --- a/changelog.d/13153.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce DB usage of `/sync` when a large number of unread messages have recently been sent in a room. diff --git a/changelog.d/13157.misc b/changelog.d/13157.misc deleted file mode 100644 index 0133097c8..000000000 --- a/changelog.d/13157.misc +++ /dev/null @@ -1 +0,0 @@ -Enable Complement testing in the 'Twisted Trunk' CI runs. \ No newline at end of file diff --git a/changelog.d/13158.misc b/changelog.d/13158.misc deleted file mode 100644 index 1cb77c02d..000000000 --- a/changelog.d/13158.misc +++ /dev/null @@ -1 +0,0 @@ -Add support to `complement.sh` for skipping the docker build. diff --git a/changelog.d/13159.misc b/changelog.d/13159.misc deleted file mode 100644 index bb5554ebe..000000000 --- a/changelog.d/13159.misc +++ /dev/null @@ -1 +0,0 @@ -Improve and fix type hints. \ No newline at end of file diff --git a/changelog.d/13166.doc b/changelog.d/13166.doc deleted file mode 100644 index 2d92e341e..000000000 --- a/changelog.d/13166.doc +++ /dev/null @@ -1 +0,0 @@ -Add missing links to config options. diff --git a/changelog.d/13167.misc b/changelog.d/13167.misc deleted file mode 100644 index a7c7a688d..000000000 --- a/changelog.d/13167.misc +++ /dev/null @@ -1 +0,0 @@ -Update config used by Complement to allow device name lookup over federation. \ No newline at end of file diff --git a/changelog.d/13174.bugfix b/changelog.d/13174.bugfix deleted file mode 100644 index b17935b93..000000000 --- a/changelog.d/13174.bugfix +++ /dev/null @@ -1 +0,0 @@ -Make use of the more robust `get_current_state` in `_get_state_map_for_room` to avoid breakages. diff --git a/changelog.d/13194.bugfix b/changelog.d/13194.bugfix deleted file mode 100644 index 2c2e8bb21..000000000 --- a/changelog.d/13194.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where rows were not deleted from `event_push_actions` table on large servers. Introduced in v1.62.0. diff --git a/changelog.d/13195.misc b/changelog.d/13195.misc deleted file mode 100644 index 5506f767b..000000000 --- a/changelog.d/13195.misc +++ /dev/null @@ -1 +0,0 @@ -Check that `auto_vacuum` is disabled when porting a SQLite database to Postgres, as `VACUUM`s must not be performed between runs of the script. \ No newline at end of file diff --git a/changelog.d/13197.bugfix b/changelog.d/13197.bugfix deleted file mode 100644 index 841724152..000000000 --- a/changelog.d/13197.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix exception when using experimental [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to look for remote federated imported events before room creation. diff --git a/changelog.d/13200.removal b/changelog.d/13200.removal deleted file mode 100644 index 755f5eb19..000000000 --- a/changelog.d/13200.removal +++ /dev/null @@ -1 +0,0 @@ -Remove obsolete and for 8 years unused `RoomEventsStoreTestCase`. Contributed by @arkamar. diff --git a/changelog.d/13207.docker b/changelog.d/13207.docker deleted file mode 100644 index 63ba5c803..000000000 --- a/changelog.d/13207.docker +++ /dev/null @@ -1 +0,0 @@ -Bump the version of `lxml` in matrix.org Docker images Debian packages from 4.8.0 to 4.9.1. diff --git a/changelog.d/13209.misc b/changelog.d/13209.misc deleted file mode 100644 index cb0b8b4e6..000000000 --- a/changelog.d/13209.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce number of queries used to get profile information. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13210.misc b/changelog.d/13210.misc deleted file mode 100644 index 407791b8e..000000000 --- a/changelog.d/13210.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce number of events queried during room creation. Contributed by Nick @ Beeper (@fizzadar). diff --git a/changelog.d/13211.misc b/changelog.d/13211.misc deleted file mode 100644 index 4d2a6dec6..000000000 --- a/changelog.d/13211.misc +++ /dev/null @@ -1 +0,0 @@ -More aggressively rotate push actions. diff --git a/changelog.d/13212.doc b/changelog.d/13212.doc deleted file mode 100644 index e6b65d826..000000000 --- a/changelog.d/13212.doc +++ /dev/null @@ -1 +0,0 @@ -Add documentation for the existing `databases` option in the homeserver configuration manual. diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc deleted file mode 100644 index 3835e97ad..000000000 --- a/changelog.d/13222.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce memory consumption when processing incoming events in large rooms. diff --git a/changelog.d/13223.bugfix b/changelog.d/13223.bugfix deleted file mode 100644 index 6ee3aed91..000000000 --- a/changelog.d/13223.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where notification counts would get stuck after a highlighted message. Broke in v1.62.0. diff --git a/changelog.d/13226.bugfix b/changelog.d/13226.bugfix deleted file mode 100644 index df96d41f3..000000000 --- a/changelog.d/13226.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug where the `synapse_port_db` script could fail to copy rows with negative row ids. diff --git a/changelog.d/13228.misc b/changelog.d/13228.misc deleted file mode 100644 index fec086557..000000000 --- a/changelog.d/13228.misc +++ /dev/null @@ -1 +0,0 @@ -Add `max_line_length` setting for Python files to the `.editorconfig`. Contributed by @sumnerevans @ Beeper. diff --git a/changelog.d/13235.bugfix b/changelog.d/13235.bugfix deleted file mode 100644 index 5c31fbc77..000000000 --- a/changelog.d/13235.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix MSC3202-enabled appservices not receiving to-device messages, preventing messages from being decrypted. \ No newline at end of file diff --git a/changelog.d/13236.bugfix b/changelog.d/13236.bugfix deleted file mode 100644 index 7fddc4413..000000000 --- a/changelog.d/13236.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix appservices not receiving room-less EDUs, like presence, if enabled. \ No newline at end of file diff --git a/debian/changelog b/debian/changelog index 520d8d20a..9f4352586 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.63.0~rc1) stable; urgency=medium + + * New Synapse release 1.63.0rc1. + + -- Synapse Packaging team Tue, 12 Jul 2022 11:26:02 +0100 + matrix-synapse-py3 (1.62.0) stable; urgency=medium * New Synapse release 1.62.0. diff --git a/pyproject.toml b/pyproject.toml index 4d1007fcb..f77c02ca2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ skip_gitignore = true [tool.poetry] name = "matrix-synapse" -version = "1.62.0" +version = "1.63.0rc1" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "Apache-2.0"