From 96312536f28ef73c61f9e82b34d35255cd4fa84e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Sep 2020 13:04:10 +0100 Subject: [PATCH 1/5] Refuse to upgrade database on worker processes (#8266) --- changelog.d/8266.misc | 1 + synapse/storage/prepare_database.py | 78 ++++++++++++++++++++++------- 2 files changed, 62 insertions(+), 17 deletions(-) create mode 100644 changelog.d/8266.misc diff --git a/changelog.d/8266.misc b/changelog.d/8266.misc new file mode 100644 index 0000000000..e7c899bea8 --- /dev/null +++ b/changelog.d/8266.misc @@ -0,0 +1 @@ +Do not attempt to upgrade upgrade database schema on worker processes. diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 964d8d9eb8..93dca96476 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -47,6 +47,22 @@ class UpgradeDatabaseException(PrepareDatabaseException): pass +OUTDATED_SCHEMA_ON_WORKER_ERROR = ( + "Expected database schema version %i but got %i: run the main synapse process to " + "upgrade the database schema before starting worker processes." +) + +EMPTY_DATABASE_ON_WORKER_ERROR = ( + "Uninitialised database: run the main synapse process to prepare the database " + "schema before starting worker processes." +) + +UNAPPLIED_DELTA_ON_WORKER_ERROR = ( + "Database schema delta %s has not been applied: run the main synapse process to " + "upgrade the database schema before starting worker processes." +) + + def prepare_database(db_conn, database_engine, config, databases=["main", "state"]): """Prepares a physical database for usage. Will either create all necessary tables or upgrade from an older schema version. @@ -71,25 +87,35 @@ def prepare_database(db_conn, database_engine, config, databases=["main", "state if version_info: user_version, delta_files, upgraded = version_info + # config should only be None when we are preparing an in-memory SQLite db, + # which should be empty. if config is None: - if user_version != SCHEMA_VERSION: - # If we don't pass in a config file then we are expecting to - # have already upgraded the DB. - raise UpgradeDatabaseException( - "Expected database schema version %i but got %i" - % (SCHEMA_VERSION, user_version) - ) - else: - _upgrade_existing_database( - cur, - user_version, - delta_files, - upgraded, - database_engine, - config, - databases=databases, + raise ValueError( + "config==None in prepare_database, but databse is not empty" ) + + # if it's a worker app, refuse to upgrade the database, to avoid multiple + # workers doing it at once. + if config.worker_app is not None and user_version != SCHEMA_VERSION: + raise UpgradeDatabaseException( + OUTDATED_SCHEMA_ON_WORKER_ERROR % (SCHEMA_VERSION, user_version) + ) + + _upgrade_existing_database( + cur, + user_version, + delta_files, + upgraded, + database_engine, + config, + databases=databases, + ) else: + # if it's a worker app, refuse to upgrade the database, to avoid multiple + # workers doing it at once. + if config and config.worker_app is not None: + raise UpgradeDatabaseException(EMPTY_DATABASE_ON_WORKER_ERROR) + _setup_new_database(cur, database_engine, databases=databases) # check if any of our configured dynamic modules want a database @@ -295,6 +321,8 @@ def _upgrade_existing_database( else: assert config + is_worker = config and config.worker_app is not None + if current_version > SCHEMA_VERSION: raise ValueError( "Cannot use this database as it is too " @@ -322,7 +350,7 @@ def _upgrade_existing_database( specific_engine_extensions = (".sqlite", ".postgres") for v in range(start_ver, SCHEMA_VERSION + 1): - logger.info("Upgrading schema to v%d", v) + logger.info("Applying schema deltas for v%d", v) # We need to search both the global and per data store schema # directories for schema updates. @@ -382,9 +410,15 @@ def _upgrade_existing_database( continue root_name, ext = os.path.splitext(file_name) + if ext == ".py": # This is a python upgrade module. We need to import into some # package and then execute its `run_upgrade` function. + if is_worker: + raise PrepareDatabaseException( + UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path + ) + module_name = "synapse.storage.v%d_%s" % (v, root_name) with open(absolute_path) as python_file: module = imp.load_source(module_name, absolute_path, python_file) @@ -399,10 +433,18 @@ def _upgrade_existing_database( continue elif ext == ".sql": # A plain old .sql file, just read and execute it + if is_worker: + raise PrepareDatabaseException( + UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path + ) logger.info("Applying schema %s", relative_path) executescript(cur, absolute_path) elif ext == specific_engine_extension and root_name.endswith(".sql"): # A .sql file specific to our engine; just read and execute it + if is_worker: + raise PrepareDatabaseException( + UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path + ) logger.info("Applying engine-specific schema %s", relative_path) executescript(cur, absolute_path) elif ext in specific_engine_extensions and root_name.endswith(".sql"): @@ -432,6 +474,8 @@ def _upgrade_existing_database( (v, True), ) + logger.info("Schema now up to date") + def _apply_module_schemas(txn, database_engine, config): """Apply the module schemas for the dynamic modules, if any From 0dae7d80bfb497d417a53e52b8353bbcd6d10d58 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Sep 2020 13:36:02 +0100 Subject: [PATCH 2/5] Add more logging to debug slow startup (#8264) I'm hoping this will provide some pointers for debugging https://github.com/matrix-org/synapse/issues/7968. --- changelog.d/8264.misc | 1 + synapse/storage/databases/__init__.py | 23 +++++++++++++++------- synapse/storage/databases/main/__init__.py | 1 + synapse/storage/prepare_database.py | 10 ++++++++++ synapse/storage/util/id_generators.py | 5 +++++ 5 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 changelog.d/8264.misc diff --git a/changelog.d/8264.misc b/changelog.d/8264.misc new file mode 100644 index 0000000000..600b175655 --- /dev/null +++ b/changelog.d/8264.misc @@ -0,0 +1 @@ +Add more logging to debug slow startup. diff --git a/synapse/storage/databases/__init__.py b/synapse/storage/databases/__init__.py index 7f08bd8285..985b12df91 100644 --- a/synapse/storage/databases/__init__.py +++ b/synapse/storage/databases/__init__.py @@ -47,9 +47,14 @@ class Databases: engine = create_engine(database_config.config) with make_conn(database_config, engine) as db_conn: - logger.info("Preparing database %r...", db_name) - + logger.info("[database config %r]: Checking database server", db_name) engine.check_database(db_conn) + + logger.info( + "[database config %r]: Preparing for databases %r", + db_name, + database_config.databases, + ) prepare_database( db_conn, engine, hs.config, databases=database_config.databases, ) @@ -57,7 +62,9 @@ class Databases: database = DatabasePool(hs, database_config, engine) if "main" in database_config.databases: - logger.info("Starting 'main' data store") + logger.info( + "[database config %r]: Starting 'main' database", db_name + ) # Sanity check we don't try and configure the main store on # multiple databases. @@ -72,7 +79,9 @@ class Databases: persist_events = PersistEventsStore(hs, database, main) if "state" in database_config.databases: - logger.info("Starting 'state' data store") + logger.info( + "[database config %r]: Starting 'state' database", db_name + ) # Sanity check we don't try and configure the state store on # multiple databases. @@ -85,7 +94,7 @@ class Databases: self.databases.append(database) - logger.info("Database %r prepared", db_name) + logger.info("[database config %r]: prepared", db_name) # Closing the context manager doesn't close the connection. # psycopg will close the connection when the object gets GCed, but *only* @@ -98,10 +107,10 @@ class Databases: # Sanity check that we have actually configured all the required stores. if not main: - raise Exception("No 'main' data store configured") + raise Exception("No 'main' database configured") if not state: - raise Exception("No 'state' data store configured") + raise Exception("No 'state' database configured") # We use local variables here to ensure that the databases do not have # optional types. diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 99890ffbf3..b315ae66d2 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -591,6 +591,7 @@ def check_database_before_upgrade(cur, database_engine, config: HomeServerConfig """Called before upgrading an existing database to check that it is broadly sane compared with the configuration. """ + logger.info("Running sanity-checks on database...") domain = config.server_name sql = database_engine.convert_param_style( diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 93dca96476..ee60e2a718 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -82,10 +82,18 @@ def prepare_database(db_conn, database_engine, config, databases=["main", "state try: cur = db_conn.cursor() + + logger.info("%r: Checking existing schema version", databases) version_info = _get_or_create_schema_state(cur, database_engine) if version_info: user_version, delta_files, upgraded = version_info + logger.info( + "%r: Existing schema is %i (+%i deltas)", + databases, + user_version, + len(delta_files), + ) # config should only be None when we are preparing an in-memory SQLite db, # which should be empty. @@ -111,6 +119,8 @@ def prepare_database(db_conn, database_engine, config, databases=["main", "state databases=databases, ) else: + logger.info("%r: Initialising new database", databases) + # if it's a worker app, refuse to upgrade the database, to avoid multiple # workers doing it at once. if config and config.worker_app is not None: diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 76bc3afdfa..b7eb4f8ac9 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -15,6 +15,7 @@ import contextlib import heapq +import logging import threading from collections import deque from typing import Dict, List, Set @@ -24,6 +25,8 @@ from typing_extensions import Deque from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.util.sequence import PostgresSequenceGenerator +logger = logging.getLogger(__name__) + class IdGenerator: def __init__(self, db_conn, table, column): @@ -48,6 +51,8 @@ def _load_current_id(db_conn, table, column, step=1): Returns: int """ + # debug logging for https://github.com/matrix-org/synapse/issues/7968 + logger.info("initialising stream generator for %s(%s)", table, column) cur = db_conn.cursor() if step == 1: cur.execute("SELECT MAX(%s) FROM %s" % (column, table)) From a55e2707d73c19b555c18ebc19df3170a96055e2 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Sep 2020 15:15:06 +0100 Subject: [PATCH 3/5] Fix unread count failing on NULL values (#8270) Fix unread counts making sync fail if the value of the `unread_count` column in `event_push_summary` is `None`. --- changelog.d/8270.feature | 1 + synapse/storage/databases/main/event_push_actions.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8270.feature diff --git a/changelog.d/8270.feature b/changelog.d/8270.feature new file mode 100644 index 0000000000..feb02be234 --- /dev/null +++ b/changelog.d/8270.feature @@ -0,0 +1 @@ +Add unread messages count to sync responses, as specified in [MSC2654](https://github.com/matrix-org/matrix-doc/pull/2654). diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 001d06378d..50fac9e72e 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -177,7 +177,12 @@ class EventPushActionsWorkerStore(SQLBaseStore): if row: notif_count += row[0] - unread_count += row[1] + + if row[1] is not None: + # The unread_count column of event_push_summary is NULLable, so we need + # to make sure we don't try increasing the unread counts if it's NULL + # for this row. + unread_count += row[1] return { "notify_count": notif_count, From ef2804d27c766595f9eae134422652d1d32df9a6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 7 Sep 2020 16:48:52 +0100 Subject: [PATCH 4/5] Avoid table-scanning users at startup (#8271) This takes about 10 seconds in the best case; often more. --- changelog.d/8271.bugfix | 1 + synapse/storage/databases/main/__init__.py | 25 ++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) create mode 100644 changelog.d/8271.bugfix diff --git a/changelog.d/8271.bugfix b/changelog.d/8271.bugfix new file mode 100644 index 0000000000..b75f07b03c --- /dev/null +++ b/changelog.d/8271.bugfix @@ -0,0 +1 @@ +Fix slow start times for large servers by removing a table scan of the `users` table from startup code. diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index b315ae66d2..2ae2fbd5d7 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -29,6 +29,7 @@ from synapse.storage.util.id_generators import ( MultiWriterIdGenerator, StreamIdGenerator, ) +from synapse.types import get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache from .account_data import AccountDataStore @@ -591,22 +592,24 @@ def check_database_before_upgrade(cur, database_engine, config: HomeServerConfig """Called before upgrading an existing database to check that it is broadly sane compared with the configuration. """ - logger.info("Running sanity-checks on database...") - domain = config.server_name + logger.info("Checking database for consistency with configuration...") - sql = database_engine.convert_param_style( - "SELECT COUNT(*) FROM users WHERE name NOT LIKE ?" - ) - pat = "%:" + domain - cur.execute(sql, (pat,)) - num_not_matching = cur.fetchall()[0][0] - if num_not_matching == 0: + # if there are any users in the database, check that the username matches our + # configured server name. + + cur.execute("SELECT name FROM users LIMIT 1") + rows = cur.fetchall() + if not rows: + return + + user_domain = get_domain_from_id(rows[0][0]) + if user_domain == config.server_name: return raise Exception( "Found users in database not native to %s!\n" - "You cannot changed a synapse server_name after it's been configured" - % (domain,) + "You cannot change a synapse server_name after it's been configured" + % (config.server_name,) ) From d8762cc116110ed0d4557b902414ed5ee9ccea96 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 7 Sep 2020 16:56:27 +0100 Subject: [PATCH 5/5] Only add rows to the push actions table if the event notifies or should be marked unread (#8274) --- changelog.d/8274.feature | 1 + synapse/push/bulk_push_rule_evaluator.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8274.feature diff --git a/changelog.d/8274.feature b/changelog.d/8274.feature new file mode 100644 index 0000000000..feb02be234 --- /dev/null +++ b/changelog.d/8274.feature @@ -0,0 +1 @@ +Add unread messages count to sync responses, as specified in [MSC2654](https://github.com/matrix-org/matrix-doc/pull/2654). diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1bb8e346b9..c440f2545c 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -219,7 +219,12 @@ class BulkPushRuleEvaluator: if event.type == EventTypes.Member and event.state_key == uid: display_name = event.content.get("displayname", None) - actions_by_user[uid] = [] + if count_as_unread: + # Add an element for the current user if the event needs to be marked as + # unread, so that add_push_actions_to_staging iterates over it. + # If the event shouldn't be marked as unread but should notify the + # current user, it'll be added to the dict later. + actions_by_user[uid] = [] for rule in rules: if "enabled" in rule and not rule["enabled"]: