From 7648898d8a3efd7d73e478d54596279a29f965eb Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 11 Oct 2021 20:17:46 +0100 Subject: [PATCH] Ensure dir rebuild includes appservice senders --- docs/user_directory.md | 41 ++++++++++++--- .../storage/databases/main/user_directory.py | 22 ++++++++ .../main/delta/53/user_dir_populate.sql | 2 + tests/storage/test_user_directory.py | 50 +++++++++++++++---- 4 files changed, 99 insertions(+), 16 deletions(-) diff --git a/docs/user_directory.md b/docs/user_directory.md index 07fe954891..658848f499 100644 --- a/docs/user_directory.md +++ b/docs/user_directory.md @@ -5,11 +5,6 @@ The user directory is currently maintained based on the 'visible' users on this particular server - i.e. ones which your account shares a room with, or who are present in a publicly viewable room present on the server. -The directory info is stored in various tables, which can (typically after -DB corruption) get stale or out of sync. If this happens, for now the -solution to fix it is to execute the SQL [here](https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/main/delta/53/user_dir_populate.sql) -and then restart synapse. This should then start a background task to -flush the current tables and regenerate the directory. Data model ---------- @@ -21,7 +16,8 @@ see who. From all of these tables we exclude three types of local user: - support users - - appservice users + - appservice users (e.g. people using IRC) + - but not the "appservice sender" (e.g. the bot which bridges Matrix to IRC). - deactivated users * `user_directory`. This contains the user_id, display name and avatar we'll @@ -47,3 +43,36 @@ From all of these tables we exclude three types of local user: * `users_who_share_private_rooms`. Rows are triples `(L, M, room id)` where `L` is a local user and `M` is a local or remote user. `L` and `M` should be different, but this isn't enforced by a constraint. + + +Rebuilding the directory +------------------------ + +The directory info is stored in various tables, which can (typically after +DB corruption) get stale or out of sync. If this happens, for now the +solution to fix it is to execute the following SQL and then restart Synapse. + +```sql +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the room sharing tables. +-- Also add directory entries for remote users. +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert directory entries for all local users. +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); + +-- Insert directory entries for all appservice senders. +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_appservice_senders', '{}', 'populate_user_directory_process_users'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_appservice_senders'); +``` +This should then start a background task to +flush the current tables and regenerate the directory. diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 88d6dbd2ca..9fe5bbc8b1 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -28,6 +28,7 @@ from typing import ( if TYPE_CHECKING: from synapse.server import HomeServer + from synapse.appservice import ApplicationService from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules from synapse.storage.database import DatabasePool, LoggingTransaction @@ -70,6 +71,10 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): "populate_user_directory_process_users", self._populate_user_directory_process_users, ) + self.db_pool.updates.register_background_update_handler( + "populate_user_directory_process_appservice_senders", + self._populate_user_directory_process_appservice_senders, + ) self.db_pool.updates.register_background_update_handler( "populate_user_directory_cleanup", self._populate_user_directory_cleanup ) @@ -379,6 +384,23 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): return len(users_to_work_on) + async def _populate_user_directory_process_appservice_senders( + self, progress: JsonDict, batch_size: int + ) -> int: + """Add appservice senders to the `user_directory` table. + + Appservices users don't live in the users table, so they won't be picked up + by `_populate_user_directory_process_users`. Process them explicitly here. + """ + service: "ApplicationService" + for service in self.services_cache: + await self.update_profile_in_user_dir(service.sender, None, None) + + await self.db_pool.updates._end_background_update( + "populate_user_directory_process_appservice_senders" + ) + return 1 + async def should_include_local_user_in_dir(self, user: str) -> bool: """Certain classes of local user are omitted from the user directory. Is this user one of them? diff --git a/synapse/storage/schema/main/delta/53/user_dir_populate.sql b/synapse/storage/schema/main/delta/53/user_dir_populate.sql index ffcc896b58..74b921453f 100644 --- a/synapse/storage/schema/main/delta/53/user_dir_populate.sql +++ b/synapse/storage/schema/main/delta/53/user_dir_populate.sql @@ -13,6 +13,8 @@ * limitations under the License. */ +-- For the most up-to-date version of these instructions, see docs/user_directory.md + -- Set up staging tables INSERT INTO background_updates (update_name, progress_json) VALUES ('populate_user_directory_createtables', '{}'); diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 3831fbc8ef..33229c5f95 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -134,7 +134,9 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): hostname="test", id="1234", namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]}, - sender="@as:test", + # Note: this user does not match the regex above, so that tests + # can distinguish the sender from the AS user. + sender="@as_main:test", ) mock_load_appservices = Mock(return_value=[self.appservice]) @@ -205,12 +207,22 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "populate_user_directory_cleanup", + "update_name": "populate_user_directory_process_appservice_senders", "progress_json": "{}", "depends_on": "populate_user_directory_process_users", }, ) ) + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_cleanup", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_appservice_senders", + }, + ) + ) self.wait_for_background_updates() @@ -254,7 +266,7 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): # All three should have entries in the directory users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) - self.assertEqual(users, {u1, u2, u3}) + self.assertEqual(users, {u1, u2, u3, self.appservice.sender}) # The next three tests (test_population_excludes_*) all set up # - A normal user included in the user dir @@ -290,7 +302,7 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): ) -> None: # After rebuilding the directory, we should only see the normal user. users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) - self.assertEqual(users, {normal_user}) + self.assertEqual(users, {normal_user, self.appservice.sender}) in_public_rooms = self.get_success( self.user_dir_helper.get_users_in_public_rooms() ) @@ -364,7 +376,7 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): # Check the AS user is not in the directory. self._check_room_sharing_tables(user, public, private) - def test_population_excludes_appservice_sender(self) -> None: + def test_population_includes_appservice_sender(self) -> None: user = self.register_user("user", "pass") token = self.login(user, "pass") @@ -376,8 +388,25 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): # Rebuild the directory. self._purge_and_rebuild_user_dir() - # Check the AS sender is not in the directory. - self._check_room_sharing_tables(user, public, private) + # Check the AS sender is in the directory. + users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) + self.assertEqual(users, {user, self.appservice.sender}) + in_public_rooms = self.get_success( + self.user_dir_helper.get_users_in_public_rooms() + ) + self.assertEqual( + set(in_public_rooms), {(user, public), (self.appservice.sender, public)} + ) + in_private_rooms = self.get_success( + self.user_dir_helper.get_users_who_share_private_rooms() + ) + self.assertEqual( + self.user_dir_helper._compress_shared(in_private_rooms), + { + (user, self.appservice.sender, private), + (self.appservice.sender, user, private), + }, + ) def test_population_conceals_private_nickname(self) -> None: # Make a private room, and set a nickname within @@ -407,15 +436,16 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): self._purge_and_rebuild_user_dir() # Local users are ignored by the scan over rooms - users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory()) - self.assertEqual(users, {}) + users = self.get_success(self.user_dir_helper.get_users_in_user_directory()) + self.assertEqual(users, {self.appservice.sender}) # Do a full rebuild including the scan over the `users` table. The local # user should appear with their profile name. self._purge_and_rebuild_user_dir() users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory()) self.assertEqual( - users, {user: ProfileInfo(display_name="aaaa", avatar_url=None)} + users[user], + ProfileInfo(display_name="aaaa", avatar_url=None), )