From cfe8c8ab8e412b6320e5963ced0670fbc7b00d1b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 31 Mar 2020 17:24:06 +0100 Subject: [PATCH 1/8] Remove unused `start_background_update` This was only used in a unit test, so let's just inline it in the test. --- synapse/storage/background_updates.py | 21 --------------------- tests/storage/test_background_update.py | 14 +++++++++----- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index eb1a7e5002..d4e26eab6c 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -400,27 +400,6 @@ class BackgroundUpdater(object): self.register_background_update_handler(update_name, updater) - def start_background_update(self, update_name, progress): - """Starts a background update running. - - Args: - update_name: The update to set running. - progress: The initial state of the progress of the update. - - Returns: - A deferred that completes once the task has been added to the - queue. - """ - # Clear the background update queue so that we will pick up the new - # task on the next iteration of do_background_update. - self._background_update_queue = [] - progress_json = json.dumps(progress) - - return self.db.simple_insert( - "background_updates", - {"update_name": update_name, "progress_json": progress_json}, - ) - def _end_background_update(self, update_name): """Removes a completed background update task from the queue. diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index ae14fb407d..aca41eb215 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -25,12 +25,20 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): # the target runtime for each bg update target_background_update_duration_ms = 50000 + store = self.hs.get_datastore() + self.get_success( + store.db.simple_insert( + "background_updates", + values={"update_name": "test_update", "progress_json": '{"my_key": 1}'}, + ) + ) + # first step: make a bit of progress @defer.inlineCallbacks def update(progress, count): yield self.clock.sleep((count * duration_ms) / 1000) progress = {"my_key": progress["my_key"] + 1} - yield self.hs.get_datastore().db.runInteraction( + yield store.db.runInteraction( "update_progress", self.updates._background_update_progress_txn, "test_update", @@ -39,10 +47,6 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): return count self.update_handler.side_effect = update - - self.get_success( - self.updates.start_background_update("test_update", {"my_key": 1}) - ) self.update_handler.reset_mock() res = self.get_success( self.updates.do_next_background_update( From 26d17b9bdc0de51d5f1a7526e8ab70e7f7796e4d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 31 Mar 2020 17:25:10 +0100 Subject: [PATCH 2/8] Make `has_completed_background_updates` async (Almost) everywhere that uses it is happy with an awaitable. --- synapse/storage/background_updates.py | 7 +++---- tests/storage/test_background_update.py | 4 +++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index d4e26eab6c..79494bcdf5 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -119,12 +119,11 @@ class BackgroundUpdater(object): self._all_done = True return None - @defer.inlineCallbacks - def has_completed_background_updates(self): + async def has_completed_background_updates(self) -> bool: """Check if all the background updates have completed Returns: - Deferred[bool]: True if all background updates have completed + True if all background updates have completed """ # if we've previously determined that there is nothing left to do, that # is easy @@ -138,7 +137,7 @@ class BackgroundUpdater(object): # otherwise, check if there are updates to be run. This is important, # as we may be running on a worker which doesn't perform the bg updates # itself, but still wants to wait for them to happen. - updates = yield self.db.simple_select_onecol( + updates = await self.db.simple_select_onecol( "background_updates", keyvalues=None, retcol="1", diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index aca41eb215..e578de8acf 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -11,7 +11,9 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, homeserver): self.updates = self.hs.get_datastore().db.updates # type: BackgroundUpdater # the base test class should have run the real bg updates for us - self.assertTrue(self.updates.has_completed_background_updates()) + self.assertTrue( + self.get_success(self.updates.has_completed_background_updates()) + ) self.update_handler = Mock() self.updates.register_background_update_handler( From 51f4d52cb4663a056372d779b78488aeae45f554 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 31 Mar 2020 17:27:56 +0100 Subject: [PATCH 3/8] Set a logging context while running the bg updates This mostly just reduces the amount of "running from sentinel context" spam during unittest setup. --- tests/unittest.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/unittest.py b/tests/unittest.py index d0406ca2fd..27af5228fe 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -40,6 +40,7 @@ from synapse.http.server import JsonResource from synapse.http.site import SynapseRequest, SynapseSite from synapse.logging.context import ( SENTINEL_CONTEXT, + LoggingContext, current_context, set_current_context, ) @@ -419,15 +420,17 @@ class HomeserverTestCase(TestCase): config_obj.parse_config_dict(config, "", "") kwargs["config"] = config_obj + async def run_bg_updates(): + with LoggingContext("run_bg_updates", request="run_bg_updates-1"): + while not await stor.db.updates.has_completed_background_updates(): + await stor.db.updates.do_next_background_update(1) + hs = setup_test_homeserver(self.addCleanup, *args, **kwargs) stor = hs.get_datastore() # Run the database background updates, when running against "master". if hs.__class__.__name__ == "TestHomeServer": - while not self.get_success( - stor.db.updates.has_completed_background_updates() - ): - self.get_success(stor.db.updates.do_next_background_update(1)) + self.get_success(run_bg_updates()) return hs From b4c22342320d7de86c02dfb36415a38c62bec88d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 31 Mar 2020 17:31:32 +0100 Subject: [PATCH 4/8] Make do_next_background_update return a bool returning a None or an int that we don't use is confusing. --- synapse/storage/background_updates.py | 12 +++++------- tests/storage/test_background_update.py | 6 +++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 79494bcdf5..4a59132bf3 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -111,7 +111,7 @@ class BackgroundUpdater(object): except Exception: logger.exception("Error doing update") else: - if result is None: + if result: logger.info( "No more background updates to do." " Unscheduling background update task." @@ -169,9 +169,7 @@ class BackgroundUpdater(object): return not update_exists - async def do_next_background_update( - self, desired_duration_ms: float - ) -> Optional[int]: + async def do_next_background_update(self, desired_duration_ms: float) -> bool: """Does some amount of work on the next queued background update Returns once some amount of work is done. @@ -180,7 +178,7 @@ class BackgroundUpdater(object): desired_duration_ms(float): How long we want to spend updating. Returns: - None if there is no more work to do, otherwise an int + True if there is no more work to do, otherwise False """ if not self._background_update_queue: updates = await self.db.simple_select_list( @@ -195,14 +193,14 @@ class BackgroundUpdater(object): if not self._background_update_queue: # no work left to do - return None + return True # pop from the front, and add back to the back update_name = self._background_update_queue.pop(0) self._background_update_queue.append(update_name) res = await self._do_background_update(update_name, desired_duration_ms) - return res + return False async def _do_background_update( self, update_name: str, desired_duration_ms: float diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index e578de8acf..940b166129 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -56,7 +56,7 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): ), by=0.1, ) - self.assertIsNotNone(res) + self.assertFalse(res) # on the first call, we should get run with the default background update size self.update_handler.assert_called_once_with( @@ -79,7 +79,7 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): result = self.get_success( self.updates.do_next_background_update(target_background_update_duration_ms) ) - self.assertIsNotNone(result) + self.assertFalse(result) self.update_handler.assert_called_once() # third step: we don't expect to be called any more @@ -87,5 +87,5 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): result = self.get_success( self.updates.do_next_background_update(target_background_update_duration_ms) ) - self.assertIsNone(result) + self.assertTrue(result) self.assertFalse(self.update_handler.called) From 7b608cf4683c0df2dbb55aacd472c407a0f6b1fa Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 31 Mar 2020 17:43:19 +0100 Subject: [PATCH 5/8] Only run one background update at a time --- synapse/storage/background_updates.py | 74 ++++++++++++------- synapse/storage/prepare_database.py | 2 +- .../delta/58/00background_update_ordering.sql | 19 +++++ 3 files changed, 68 insertions(+), 27 deletions(-) create mode 100644 synapse/storage/schema/delta/58/00background_update_ordering.sql diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 4a59132bf3..0e430356cd 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -90,8 +90,10 @@ class BackgroundUpdater(object): self._clock = hs.get_clock() self.db = database + # if a background update is currently running, its name. + self._current_background_update = None # type: Optional[str] + self._background_update_performance = {} - self._background_update_queue = [] self._background_update_handlers = {} self._all_done = False @@ -131,7 +133,7 @@ class BackgroundUpdater(object): return True # obviously, if we have things in our queue, we're not done. - if self._background_update_queue: + if self._current_background_update: return False # otherwise, check if there are updates to be run. This is important, @@ -152,11 +154,10 @@ class BackgroundUpdater(object): async def has_completed_background_update(self, update_name) -> bool: """Check if the given background update has finished running. """ - if self._all_done: return True - if update_name in self._background_update_queue: + if update_name == self._current_background_update: return False update_exists = await self.db.simple_select_one_onecol( @@ -180,31 +181,49 @@ class BackgroundUpdater(object): Returns: True if there is no more work to do, otherwise False """ - if not self._background_update_queue: - updates = await self.db.simple_select_list( - "background_updates", - keyvalues=None, - retcols=("update_name", "depends_on"), + + def get_background_updates_txn(txn): + txn.execute( + """ + SELECT update_name, depends_on FROM background_updates + ORDER BY ordering, update_name + """ ) - in_flight = {update["update_name"] for update in updates} - for update in updates: - if update["depends_on"] not in in_flight: - self._background_update_queue.append(update["update_name"]) + return self.db.cursor_to_dict(txn) - if not self._background_update_queue: - # no work left to do - return True + if not self._current_background_update: + all_pending_updates = await self.db.runInteraction( + "background_updates", get_background_updates_txn, + ) + if not all_pending_updates: + # no work left to do + return True - # pop from the front, and add back to the back - update_name = self._background_update_queue.pop(0) - self._background_update_queue.append(update_name) + # find the first update which isn't dependent on another one in the queue. + pending = {update["update_name"] for update in all_pending_updates} + for upd in all_pending_updates: + depends_on = upd["depends_on"] + if not depends_on or depends_on not in pending: + break + logger.info( + "Not starting on bg update %s until %s is done", + upd["update_name"], + depends_on, + ) + else: + # if we get to the end of that for loop, there is a problem + raise Exception( + "Unable to find a background update which doesn't depend on " + "another: dependency cycle?" + ) - res = await self._do_background_update(update_name, desired_duration_ms) + self._current_background_update = upd["update_name"] + + await self._do_background_update(desired_duration_ms) return False - async def _do_background_update( - self, update_name: str, desired_duration_ms: float - ) -> int: + async def _do_background_update(self, desired_duration_ms: float) -> int: + update_name = self._current_background_update logger.info("Starting update batch on background update '%s'", update_name) update_handler = self._background_update_handlers[update_name] @@ -405,9 +424,12 @@ class BackgroundUpdater(object): Returns: A deferred that completes once the task is removed. """ - self._background_update_queue = [ - name for name in self._background_update_queue if name != update_name - ] + if update_name != self._current_background_update: + raise Exception( + "Cannot end background update %s which isn't currently running" + % update_name + ) + self._current_background_update = None return self.db.simple_delete_one( "background_updates", keyvalues={"update_name": update_name} ) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 6cb7d4b922..1712932f31 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -29,7 +29,7 @@ logger = logging.getLogger(__name__) # Remember to update this number every time a change is made to database # schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 57 +SCHEMA_VERSION = 58 dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/synapse/storage/schema/delta/58/00background_update_ordering.sql b/synapse/storage/schema/delta/58/00background_update_ordering.sql new file mode 100644 index 0000000000..02dae587cc --- /dev/null +++ b/synapse/storage/schema/delta/58/00background_update_ordering.sql @@ -0,0 +1,19 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* add an "ordering" column to background_updates, which can be used to sort them + to achieve some level of consistency. */ + +ALTER TABLE background_updates ADD COLUMN ordering INT NOT NULL DEFAULT 0; From b413ab8aa64a3e1a01db8e3e6bce0c486f916618 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 31 Mar 2020 17:44:36 +0100 Subject: [PATCH 6/8] changelog --- changelog.d/7190.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/7190.misc diff --git a/changelog.d/7190.misc b/changelog.d/7190.misc new file mode 100644 index 0000000000..34348873f1 --- /dev/null +++ b/changelog.d/7190.misc @@ -0,0 +1 @@ +Only run one background database update at a time. From af47264b78c33698f6a70ce1ce3d32774d65de72 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 2 Apr 2020 12:04:55 +0100 Subject: [PATCH 7/8] review comment --- synapse/storage/background_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 0e430356cd..510963eb7f 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -132,7 +132,7 @@ class BackgroundUpdater(object): if self._all_done: return True - # obviously, if we have things in our queue, we're not done. + # obviously, if we are currently processing an update, we're not done. if self._current_background_update: return False From fcc2de7a0c0c7a02b935c4b35394f228a5d5a304 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 3 Apr 2020 10:51:32 +0100 Subject: [PATCH 8/8] Update docstring per review comments --- synapse/storage/background_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 510963eb7f..59f3394b0a 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -179,7 +179,7 @@ class BackgroundUpdater(object): desired_duration_ms(float): How long we want to spend updating. Returns: - True if there is no more work to do, otherwise False + True if we have finished running all the background updates, otherwise False """ def get_background_updates_txn(txn):