Ensure dir rebuild includes appservice senders

This commit is contained in:
David Robertson 2021-10-11 20:17:46 +01:00
parent 4c5afd032c
commit 7648898d8a
No known key found for this signature in database
GPG key ID: 903ECE108A39DEDD
4 changed files with 99 additions and 16 deletions

View file

@ -5,11 +5,6 @@ The user directory is currently maintained based on the 'visible' users
on this particular server - i.e. ones which your account shares a room with, or
who are present in a publicly viewable room present on the server.
The directory info is stored in various tables, which can (typically after
DB corruption) get stale or out of sync. If this happens, for now the
solution to fix it is to execute the SQL [here](https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/main/delta/53/user_dir_populate.sql)
and then restart synapse. This should then start a background task to
flush the current tables and regenerate the directory.
Data model
----------
@ -21,7 +16,8 @@ see who.
From all of these tables we exclude three types of local user:
- support users
- appservice users
- appservice users (e.g. people using IRC)
- but not the "appservice sender" (e.g. the bot which bridges Matrix to IRC).
- deactivated users
* `user_directory`. This contains the user_id, display name and avatar we'll
@ -47,3 +43,36 @@ From all of these tables we exclude three types of local user:
* `users_who_share_private_rooms`. Rows are triples `(L, M, room id)` where `L`
is a local user and `M` is a local or remote user. `L` and `M` should be
different, but this isn't enforced by a constraint.
Rebuilding the directory
------------------------
The directory info is stored in various tables, which can (typically after
DB corruption) get stale or out of sync. If this happens, for now the
solution to fix it is to execute the following SQL and then restart Synapse.
```sql
-- Set up staging tables
INSERT INTO background_updates (update_name, progress_json) VALUES
('populate_user_directory_createtables', '{}');
-- Run through each room and update the room sharing tables.
-- Also add directory entries for remote users.
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables');
-- Insert directory entries for all local users.
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms');
-- Insert directory entries for all appservice senders.
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('populate_user_directory_process_appservice_senders', '{}', 'populate_user_directory_process_users');
-- Clean up staging tables
INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES
('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_appservice_senders');
```
This should then start a background task to
flush the current tables and regenerate the directory.

View file

@ -28,6 +28,7 @@ from typing import (
if TYPE_CHECKING:
from synapse.server import HomeServer
from synapse.appservice import ApplicationService
from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
from synapse.storage.database import DatabasePool, LoggingTransaction
@ -70,6 +71,10 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
"populate_user_directory_process_users",
self._populate_user_directory_process_users,
)
self.db_pool.updates.register_background_update_handler(
"populate_user_directory_process_appservice_senders",
self._populate_user_directory_process_appservice_senders,
)
self.db_pool.updates.register_background_update_handler(
"populate_user_directory_cleanup", self._populate_user_directory_cleanup
)
@ -379,6 +384,23 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
return len(users_to_work_on)
async def _populate_user_directory_process_appservice_senders(
self, progress: JsonDict, batch_size: int
) -> int:
"""Add appservice senders to the `user_directory` table.
Appservices users don't live in the users table, so they won't be picked up
by `_populate_user_directory_process_users`. Process them explicitly here.
"""
service: "ApplicationService"
for service in self.services_cache:
await self.update_profile_in_user_dir(service.sender, None, None)
await self.db_pool.updates._end_background_update(
"populate_user_directory_process_appservice_senders"
)
return 1
async def should_include_local_user_in_dir(self, user: str) -> bool:
"""Certain classes of local user are omitted from the user directory.
Is this user one of them?

View file

@ -13,6 +13,8 @@
* limitations under the License.
*/
-- For the most up-to-date version of these instructions, see docs/user_directory.md
-- Set up staging tables
INSERT INTO background_updates (update_name, progress_json) VALUES
('populate_user_directory_createtables', '{}');

View file

@ -134,7 +134,9 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
hostname="test",
id="1234",
namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]},
sender="@as:test",
# Note: this user does not match the regex above, so that tests
# can distinguish the sender from the AS user.
sender="@as_main:test",
)
mock_load_appservices = Mock(return_value=[self.appservice])
@ -205,12 +207,22 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": "populate_user_directory_cleanup",
"update_name": "populate_user_directory_process_appservice_senders",
"progress_json": "{}",
"depends_on": "populate_user_directory_process_users",
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": "populate_user_directory_cleanup",
"progress_json": "{}",
"depends_on": "populate_user_directory_process_appservice_senders",
},
)
)
self.wait_for_background_updates()
@ -254,7 +266,7 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
# All three should have entries in the directory
users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
self.assertEqual(users, {u1, u2, u3})
self.assertEqual(users, {u1, u2, u3, self.appservice.sender})
# The next three tests (test_population_excludes_*) all set up
# - A normal user included in the user dir
@ -290,7 +302,7 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
) -> None:
# After rebuilding the directory, we should only see the normal user.
users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
self.assertEqual(users, {normal_user})
self.assertEqual(users, {normal_user, self.appservice.sender})
in_public_rooms = self.get_success(
self.user_dir_helper.get_users_in_public_rooms()
)
@ -364,7 +376,7 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
# Check the AS user is not in the directory.
self._check_room_sharing_tables(user, public, private)
def test_population_excludes_appservice_sender(self) -> None:
def test_population_includes_appservice_sender(self) -> None:
user = self.register_user("user", "pass")
token = self.login(user, "pass")
@ -376,8 +388,25 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
# Rebuild the directory.
self._purge_and_rebuild_user_dir()
# Check the AS sender is not in the directory.
self._check_room_sharing_tables(user, public, private)
# Check the AS sender is in the directory.
users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
self.assertEqual(users, {user, self.appservice.sender})
in_public_rooms = self.get_success(
self.user_dir_helper.get_users_in_public_rooms()
)
self.assertEqual(
set(in_public_rooms), {(user, public), (self.appservice.sender, public)}
)
in_private_rooms = self.get_success(
self.user_dir_helper.get_users_who_share_private_rooms()
)
self.assertEqual(
self.user_dir_helper._compress_shared(in_private_rooms),
{
(user, self.appservice.sender, private),
(self.appservice.sender, user, private),
},
)
def test_population_conceals_private_nickname(self) -> None:
# Make a private room, and set a nickname within
@ -407,15 +436,16 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase):
self._purge_and_rebuild_user_dir()
# Local users are ignored by the scan over rooms
users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
self.assertEqual(users, {})
users = self.get_success(self.user_dir_helper.get_users_in_user_directory())
self.assertEqual(users, {self.appservice.sender})
# Do a full rebuild including the scan over the `users` table. The local
# user should appear with their profile name.
self._purge_and_rebuild_user_dir()
users = self.get_success(self.user_dir_helper.get_profiles_in_user_directory())
self.assertEqual(
users, {user: ProfileInfo(display_name="aaaa", avatar_url=None)}
users[user],
ProfileInfo(display_name="aaaa", avatar_url=None),
)