From f25b0f88081bb436bef914983cff7087b54eba5f Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 7 Jul 2023 09:23:27 -0700 Subject: [PATCH] Stop writing to column `user_id` of tables `profiles` and `user_filters` (#15787) --- changelog.d/15787.misc | 1 + synapse/storage/database.py | 2 + synapse/storage/databases/main/__init__.py | 6 +- synapse/storage/databases/main/filtering.py | 5 +- synapse/storage/databases/main/profile.py | 12 +-- synapse/storage/schema/__init__.py | 9 +- .../79/01_drop_user_id_constraint_profiles.py | 50 ++++++++++ ...02_drop_user_id_constraint_user_filters.py | 54 +++++++++++ tests/storage/test_profile.py | 63 ------------- tests/storage/test_user_filters.py | 94 ------------------- 10 files changed, 123 insertions(+), 173 deletions(-) create mode 100644 changelog.d/15787.misc create mode 100644 synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py create mode 100644 synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py delete mode 100644 tests/storage/test_user_filters.py diff --git a/changelog.d/15787.misc b/changelog.d/15787.misc new file mode 100644 index 000000000..bd7536d36 --- /dev/null +++ b/changelog.d/15787.misc @@ -0,0 +1 @@ +Stop writing to column `user_id` of tables `profiles` and `user_filters`. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index a1c8fb0f4..c9d687fb2 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -98,6 +98,8 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = { "event_push_summary": "event_push_summary_unique_index2", "receipts_linearized": "receipts_linearized_unique_index", "receipts_graph": "receipts_graph_unique_index", + "profiles": "profiles_full_user_id_key_idx", + "user_filters": "full_users_filters_unique_idx", } diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 80c0304b1..b6028853c 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast from synapse.api.constants import Direction from synapse.config.homeserver import HomeServerConfig @@ -196,7 +196,7 @@ class DataStore( txn: LoggingTransaction, ) -> Tuple[List[JsonDict], int]: filters = [] - args = [self.hs.config.server.server_name] + args: List[Union[str, int]] = [] # Set ordering order_by_column = UserSortOrder(order_by).value @@ -263,7 +263,7 @@ class DataStore( sql_base = f""" FROM users as u - LEFT JOIN profiles AS p ON u.name = '@' || p.user_id || ':' || ? + LEFT JOIN profiles AS p ON u.name = p.full_user_id LEFT JOIN erased_users AS eu ON u.name = eu.user_id {where_clause} """ diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py index fff417f9e..75f7fe875 100644 --- a/synapse/storage/databases/main/filtering.py +++ b/synapse/storage/databases/main/filtering.py @@ -188,14 +188,13 @@ class FilteringWorkerStore(SQLBaseStore): filter_id = max_id + 1 sql = ( - "INSERT INTO user_filters (full_user_id, user_id, filter_id, filter_json)" - "VALUES(?, ?, ?, ?)" + "INSERT INTO user_filters (full_user_id, filter_id, filter_json)" + "VALUES(?, ?, ?)" ) txn.execute( sql, ( user_id.to_string(), - user_id.localpart, filter_id, bytearray(def_json), ), diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py index 3ba9cc885..660a5507b 100644 --- a/synapse/storage/databases/main/profile.py +++ b/synapse/storage/databases/main/profile.py @@ -173,10 +173,9 @@ class ProfileWorkerStore(SQLBaseStore): ) async def create_profile(self, user_id: UserID) -> None: - user_localpart = user_id.localpart await self.db_pool.simple_insert( table="profiles", - values={"user_id": user_localpart, "full_user_id": user_id.to_string()}, + values={"full_user_id": user_id.to_string()}, desc="create_profile", ) @@ -191,13 +190,11 @@ class ProfileWorkerStore(SQLBaseStore): new_displayname: The new display name. If this is None, the user's display name is removed. """ - user_localpart = user_id.localpart await self.db_pool.simple_upsert( table="profiles", - keyvalues={"user_id": user_localpart}, + keyvalues={"full_user_id": user_id.to_string()}, values={ "displayname": new_displayname, - "full_user_id": user_id.to_string(), }, desc="set_profile_displayname", ) @@ -213,11 +210,10 @@ class ProfileWorkerStore(SQLBaseStore): new_avatar_url: The new avatar URL. If this is None, the user's avatar is removed. """ - user_localpart = user_id.localpart await self.db_pool.simple_upsert( table="profiles", - keyvalues={"user_id": user_localpart}, - values={"avatar_url": new_avatar_url, "full_user_id": user_id.to_string()}, + keyvalues={"full_user_id": user_id.to_string()}, + values={"avatar_url": new_avatar_url}, desc="set_profile_avatar_url", ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index fc190a8b1..6d14963c0 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 78 # remember to update the list below when updating +SCHEMA_VERSION = 79 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -106,6 +106,9 @@ Changes in SCHEMA_VERSION = 77 Changes in SCHEMA_VERSION = 78 - Validate check (full_user_id IS NOT NULL) on tables profiles and user_filters + +Changes in SCHEMA_VERSION = 79 + - We no longer write to column user_id of tables profiles and user_filters """ @@ -118,7 +121,9 @@ SCHEMA_COMPAT_VERSION = ( # # insertions to the column `full_user_id` of tables profiles and user_filters can no # longer be null - 76 + # + # we no longer write to column `full_user_id` of tables profiles and user_filters + 78 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py b/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py new file mode 100644 index 000000000..3541266f7 --- /dev/null +++ b/synapse/storage/schema/main/delta/79/01_drop_user_id_constraint_profiles.py @@ -0,0 +1,50 @@ +from synapse.storage.database import LoggingTransaction +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine + + +def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None: + """ + Update to drop the NOT NULL constraint on column user_id so that we can cease to + write to it without inserts to other columns triggering the constraint + """ + + if isinstance(database_engine, PostgresEngine): + drop_sql = """ + ALTER TABLE profiles ALTER COLUMN user_id DROP NOT NULL + """ + cur.execute(drop_sql) + else: + # irritatingly in SQLite we need to rewrite the table to drop the constraint. + cur.execute("DROP TABLE IF EXISTS temp_profiles") + + create_sql = """ + CREATE TABLE temp_profiles ( + full_user_id text NOT NULL, + user_id text, + displayname text, + avatar_url text, + UNIQUE (full_user_id), + UNIQUE (user_id) + ) + """ + cur.execute(create_sql) + + copy_sql = """ + INSERT INTO temp_profiles ( + user_id, + displayname, + avatar_url, + full_user_id) + SELECT user_id, displayname, avatar_url, full_user_id FROM profiles + """ + cur.execute(copy_sql) + + drop_sql = """ + DROP TABLE profiles + """ + cur.execute(drop_sql) + + rename_sql = """ + ALTER TABLE temp_profiles RENAME to profiles + """ + cur.execute(rename_sql) diff --git a/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py b/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py new file mode 100644 index 000000000..8e7569c47 --- /dev/null +++ b/synapse/storage/schema/main/delta/79/02_drop_user_id_constraint_user_filters.py @@ -0,0 +1,54 @@ +from synapse.storage.database import LoggingTransaction +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine + + +def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None: + """ + Update to drop the NOT NULL constraint on column user_id so that we can cease to + write to it without inserts to other columns triggering the constraint + """ + if isinstance(database_engine, PostgresEngine): + drop_sql = """ + ALTER TABLE user_filters ALTER COLUMN user_id DROP NOT NULL + """ + cur.execute(drop_sql) + + else: + # irritatingly in SQLite we need to rewrite the table to drop the constraint. + cur.execute("DROP TABLE IF EXISTS temp_user_filters") + + create_sql = """ + CREATE TABLE temp_user_filters ( + full_user_id text NOT NULL, + user_id text, + filter_id bigint NOT NULL, + filter_json bytea NOT NULL + ) + """ + cur.execute(create_sql) + + index_sql = """ + CREATE UNIQUE INDEX IF NOT EXISTS user_filters_full_user_id_unique ON + temp_user_filters (full_user_id, filter_id) + """ + cur.execute(index_sql) + + copy_sql = """ + INSERT INTO temp_user_filters ( + user_id, + filter_id, + filter_json, + full_user_id) + SELECT user_id, filter_id, filter_json, full_user_id FROM user_filters + """ + cur.execute(copy_sql) + + drop_sql = """ + DROP TABLE user_filters + """ + cur.execute(drop_sql) + + rename_sql = """ + ALTER TABLE temp_user_filters RENAME to user_filters + """ + cur.execute(rename_sql) diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py index fe5bb7791..bbe8bd88b 100644 --- a/tests/storage/test_profile.py +++ b/tests/storage/test_profile.py @@ -15,8 +15,6 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.server import HomeServer -from synapse.storage.database import LoggingTransaction -from synapse.storage.engines import PostgresEngine from synapse.types import UserID from synapse.util import Clock @@ -64,64 +62,3 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase): self.assertIsNone( self.get_success(self.store.get_profile_avatar_url(self.u_frank)) ) - - def test_profiles_bg_migration(self) -> None: - """ - Test background job that copies entries from column user_id to full_user_id, adding - the hostname in the process. - """ - updater = self.hs.get_datastores().main.db_pool.updates - - # drop the constraint so we can insert nulls in full_user_id to populate the test - if isinstance(self.store.database_engine, PostgresEngine): - - def f(txn: LoggingTransaction) -> None: - txn.execute( - "ALTER TABLE profiles DROP CONSTRAINT full_user_id_not_null" - ) - - self.get_success(self.store.db_pool.runInteraction("", f)) - - for i in range(0, 70): - self.get_success( - self.store.db_pool.simple_insert( - "profiles", - {"user_id": f"hello{i:02}"}, - ) - ) - - # re-add the constraint so that when it's validated it actually exists - if isinstance(self.store.database_engine, PostgresEngine): - - def f(txn: LoggingTransaction) -> None: - txn.execute( - "ALTER TABLE profiles ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID" - ) - - self.get_success(self.store.db_pool.runInteraction("", f)) - - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - values={ - "update_name": "populate_full_user_id_profiles", - "progress_json": "{}", - }, - ) - ) - - self.get_success( - updater.run_background_updates(False), - ) - - expected_values = [] - for i in range(0, 70): - expected_values.append((f"@hello{i:02}:{self.hs.hostname}",)) - - res = self.get_success( - self.store.db_pool.execute( - "", None, "SELECT full_user_id from profiles ORDER BY full_user_id" - ) - ) - self.assertEqual(len(res), len(expected_values)) - self.assertEqual(res, expected_values) diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py deleted file mode 100644 index bab802f56..000000000 --- a/tests/storage/test_user_filters.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2023 The Matrix.org Foundation C.I.C -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from twisted.test.proto_helpers import MemoryReactor - -from synapse.server import HomeServer -from synapse.storage.database import LoggingTransaction -from synapse.storage.engines import PostgresEngine -from synapse.util import Clock - -from tests import unittest - - -class UserFiltersStoreTestCase(unittest.HomeserverTestCase): - """ - Test background migration that copies entries from column user_id to full_user_id, adding - the hostname in the process. - """ - - def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - self.store = hs.get_datastores().main - - def test_bg_migration(self) -> None: - updater = self.hs.get_datastores().main.db_pool.updates - - # drop the constraint so we can insert nulls in full_user_id to populate the test - if isinstance(self.store.database_engine, PostgresEngine): - - def f(txn: LoggingTransaction) -> None: - txn.execute( - "ALTER TABLE user_filters DROP CONSTRAINT full_user_id_not_null" - ) - - self.get_success(self.store.db_pool.runInteraction("", f)) - - for i in range(0, 70): - self.get_success( - self.store.db_pool.simple_insert( - "user_filters", - { - "user_id": f"hello{i:02}", - "filter_id": i, - "filter_json": bytearray(i), - }, - ) - ) - - # re-add the constraint so that when it's validated it actually exists - if isinstance(self.store.database_engine, PostgresEngine): - - def f(txn: LoggingTransaction) -> None: - txn.execute( - "ALTER TABLE user_filters ADD CONSTRAINT full_user_id_not_null CHECK (full_user_id IS NOT NULL) NOT VALID" - ) - - self.get_success(self.store.db_pool.runInteraction("", f)) - - self.get_success( - self.store.db_pool.simple_insert( - "background_updates", - values={ - "update_name": "populate_full_user_id_user_filters", - "progress_json": "{}", - }, - ) - ) - - self.get_success( - updater.run_background_updates(False), - ) - - expected_values = [] - for i in range(0, 70): - expected_values.append((f"@hello{i:02}:{self.hs.hostname}",)) - - res = self.get_success( - self.store.db_pool.execute( - "", None, "SELECT full_user_id from user_filters ORDER BY full_user_id" - ) - ) - self.assertEqual(len(res), len(expected_values)) - self.assertEqual(res, expected_values)