Merge pull request #7190 from matrix-org/rav/one_bg_update_at_a_time

Only run one background update at a time
This commit is contained in:
Richard van der Hoff 2020-04-03 13:17:30 +01:00 committed by GitHub
commit fd4c975b5b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 99 additions and 72 deletions

1
changelog.d/7190.misc Normal file
View file

@ -0,0 +1 @@
Only run one background database update at a time.

View file

@ -90,8 +90,10 @@ class BackgroundUpdater(object):
self._clock = hs.get_clock() self._clock = hs.get_clock()
self.db = database self.db = database
# if a background update is currently running, its name.
self._current_background_update = None # type: Optional[str]
self._background_update_performance = {} self._background_update_performance = {}
self._background_update_queue = []
self._background_update_handlers = {} self._background_update_handlers = {}
self._all_done = False self._all_done = False
@ -111,7 +113,7 @@ class BackgroundUpdater(object):
except Exception: except Exception:
logger.exception("Error doing update") logger.exception("Error doing update")
else: else:
if result is None: if result:
logger.info( logger.info(
"No more background updates to do." "No more background updates to do."
" Unscheduling background update task." " Unscheduling background update task."
@ -119,26 +121,25 @@ class BackgroundUpdater(object):
self._all_done = True self._all_done = True
return None return None
@defer.inlineCallbacks async def has_completed_background_updates(self) -> bool:
def has_completed_background_updates(self):
"""Check if all the background updates have completed """Check if all the background updates have completed
Returns: Returns:
Deferred[bool]: True if all background updates have completed True if all background updates have completed
""" """
# if we've previously determined that there is nothing left to do, that # if we've previously determined that there is nothing left to do, that
# is easy # is easy
if self._all_done: if self._all_done:
return True return True
# obviously, if we have things in our queue, we're not done. # obviously, if we are currently processing an update, we're not done.
if self._background_update_queue: if self._current_background_update:
return False return False
# otherwise, check if there are updates to be run. This is important, # otherwise, check if there are updates to be run. This is important,
# as we may be running on a worker which doesn't perform the bg updates # as we may be running on a worker which doesn't perform the bg updates
# itself, but still wants to wait for them to happen. # itself, but still wants to wait for them to happen.
updates = yield self.db.simple_select_onecol( updates = await self.db.simple_select_onecol(
"background_updates", "background_updates",
keyvalues=None, keyvalues=None,
retcol="1", retcol="1",
@ -153,11 +154,10 @@ class BackgroundUpdater(object):
async def has_completed_background_update(self, update_name) -> bool: async def has_completed_background_update(self, update_name) -> bool:
"""Check if the given background update has finished running. """Check if the given background update has finished running.
""" """
if self._all_done: if self._all_done:
return True return True
if update_name in self._background_update_queue: if update_name == self._current_background_update:
return False return False
update_exists = await self.db.simple_select_one_onecol( update_exists = await self.db.simple_select_one_onecol(
@ -170,9 +170,7 @@ class BackgroundUpdater(object):
return not update_exists return not update_exists
async def do_next_background_update( async def do_next_background_update(self, desired_duration_ms: float) -> bool:
self, desired_duration_ms: float
) -> Optional[int]:
"""Does some amount of work on the next queued background update """Does some amount of work on the next queued background update
Returns once some amount of work is done. Returns once some amount of work is done.
@ -181,33 +179,51 @@ class BackgroundUpdater(object):
desired_duration_ms(float): How long we want to spend desired_duration_ms(float): How long we want to spend
updating. updating.
Returns: Returns:
None if there is no more work to do, otherwise an int True if we have finished running all the background updates, otherwise False
""" """
if not self._background_update_queue:
updates = await self.db.simple_select_list( def get_background_updates_txn(txn):
"background_updates", txn.execute(
keyvalues=None, """
retcols=("update_name", "depends_on"), SELECT update_name, depends_on FROM background_updates
ORDER BY ordering, update_name
"""
) )
in_flight = {update["update_name"] for update in updates} return self.db.cursor_to_dict(txn)
for update in updates:
if update["depends_on"] not in in_flight:
self._background_update_queue.append(update["update_name"])
if not self._background_update_queue: if not self._current_background_update:
# no work left to do all_pending_updates = await self.db.runInteraction(
return None "background_updates", get_background_updates_txn,
)
if not all_pending_updates:
# no work left to do
return True
# pop from the front, and add back to the back # find the first update which isn't dependent on another one in the queue.
update_name = self._background_update_queue.pop(0) pending = {update["update_name"] for update in all_pending_updates}
self._background_update_queue.append(update_name) for upd in all_pending_updates:
depends_on = upd["depends_on"]
if not depends_on or depends_on not in pending:
break
logger.info(
"Not starting on bg update %s until %s is done",
upd["update_name"],
depends_on,
)
else:
# if we get to the end of that for loop, there is a problem
raise Exception(
"Unable to find a background update which doesn't depend on "
"another: dependency cycle?"
)
res = await self._do_background_update(update_name, desired_duration_ms) self._current_background_update = upd["update_name"]
return res
async def _do_background_update( await self._do_background_update(desired_duration_ms)
self, update_name: str, desired_duration_ms: float return False
) -> int:
async def _do_background_update(self, desired_duration_ms: float) -> int:
update_name = self._current_background_update
logger.info("Starting update batch on background update '%s'", update_name) logger.info("Starting update batch on background update '%s'", update_name)
update_handler = self._background_update_handlers[update_name] update_handler = self._background_update_handlers[update_name]
@ -400,27 +416,6 @@ class BackgroundUpdater(object):
self.register_background_update_handler(update_name, updater) self.register_background_update_handler(update_name, updater)
def start_background_update(self, update_name, progress):
"""Starts a background update running.
Args:
update_name: The update to set running.
progress: The initial state of the progress of the update.
Returns:
A deferred that completes once the task has been added to the
queue.
"""
# Clear the background update queue so that we will pick up the new
# task on the next iteration of do_background_update.
self._background_update_queue = []
progress_json = json.dumps(progress)
return self.db.simple_insert(
"background_updates",
{"update_name": update_name, "progress_json": progress_json},
)
def _end_background_update(self, update_name): def _end_background_update(self, update_name):
"""Removes a completed background update task from the queue. """Removes a completed background update task from the queue.
@ -429,9 +424,12 @@ class BackgroundUpdater(object):
Returns: Returns:
A deferred that completes once the task is removed. A deferred that completes once the task is removed.
""" """
self._background_update_queue = [ if update_name != self._current_background_update:
name for name in self._background_update_queue if name != update_name raise Exception(
] "Cannot end background update %s which isn't currently running"
% update_name
)
self._current_background_update = None
return self.db.simple_delete_one( return self.db.simple_delete_one(
"background_updates", keyvalues={"update_name": update_name} "background_updates", keyvalues={"update_name": update_name}
) )

View file

@ -29,7 +29,7 @@ logger = logging.getLogger(__name__)
# Remember to update this number every time a change is made to database # Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts. # schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 57 SCHEMA_VERSION = 58
dir_path = os.path.abspath(os.path.dirname(__file__)) dir_path = os.path.abspath(os.path.dirname(__file__))

View file

@ -0,0 +1,19 @@
/* Copyright 2020 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* add an "ordering" column to background_updates, which can be used to sort them
to achieve some level of consistency. */
ALTER TABLE background_updates ADD COLUMN ordering INT NOT NULL DEFAULT 0;

View file

@ -11,7 +11,9 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
def prepare(self, reactor, clock, homeserver): def prepare(self, reactor, clock, homeserver):
self.updates = self.hs.get_datastore().db.updates # type: BackgroundUpdater self.updates = self.hs.get_datastore().db.updates # type: BackgroundUpdater
# the base test class should have run the real bg updates for us # the base test class should have run the real bg updates for us
self.assertTrue(self.updates.has_completed_background_updates()) self.assertTrue(
self.get_success(self.updates.has_completed_background_updates())
)
self.update_handler = Mock() self.update_handler = Mock()
self.updates.register_background_update_handler( self.updates.register_background_update_handler(
@ -25,12 +27,20 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
# the target runtime for each bg update # the target runtime for each bg update
target_background_update_duration_ms = 50000 target_background_update_duration_ms = 50000
store = self.hs.get_datastore()
self.get_success(
store.db.simple_insert(
"background_updates",
values={"update_name": "test_update", "progress_json": '{"my_key": 1}'},
)
)
# first step: make a bit of progress # first step: make a bit of progress
@defer.inlineCallbacks @defer.inlineCallbacks
def update(progress, count): def update(progress, count):
yield self.clock.sleep((count * duration_ms) / 1000) yield self.clock.sleep((count * duration_ms) / 1000)
progress = {"my_key": progress["my_key"] + 1} progress = {"my_key": progress["my_key"] + 1}
yield self.hs.get_datastore().db.runInteraction( yield store.db.runInteraction(
"update_progress", "update_progress",
self.updates._background_update_progress_txn, self.updates._background_update_progress_txn,
"test_update", "test_update",
@ -39,10 +49,6 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
return count return count
self.update_handler.side_effect = update self.update_handler.side_effect = update
self.get_success(
self.updates.start_background_update("test_update", {"my_key": 1})
)
self.update_handler.reset_mock() self.update_handler.reset_mock()
res = self.get_success( res = self.get_success(
self.updates.do_next_background_update( self.updates.do_next_background_update(
@ -50,7 +56,7 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
), ),
by=0.1, by=0.1,
) )
self.assertIsNotNone(res) self.assertFalse(res)
# on the first call, we should get run with the default background update size # on the first call, we should get run with the default background update size
self.update_handler.assert_called_once_with( self.update_handler.assert_called_once_with(
@ -73,7 +79,7 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
result = self.get_success( result = self.get_success(
self.updates.do_next_background_update(target_background_update_duration_ms) self.updates.do_next_background_update(target_background_update_duration_ms)
) )
self.assertIsNotNone(result) self.assertFalse(result)
self.update_handler.assert_called_once() self.update_handler.assert_called_once()
# third step: we don't expect to be called any more # third step: we don't expect to be called any more
@ -81,5 +87,5 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase):
result = self.get_success( result = self.get_success(
self.updates.do_next_background_update(target_background_update_duration_ms) self.updates.do_next_background_update(target_background_update_duration_ms)
) )
self.assertIsNone(result) self.assertTrue(result)
self.assertFalse(self.update_handler.called) self.assertFalse(self.update_handler.called)

View file

@ -40,6 +40,7 @@ from synapse.http.server import JsonResource
from synapse.http.site import SynapseRequest, SynapseSite from synapse.http.site import SynapseRequest, SynapseSite
from synapse.logging.context import ( from synapse.logging.context import (
SENTINEL_CONTEXT, SENTINEL_CONTEXT,
LoggingContext,
current_context, current_context,
set_current_context, set_current_context,
) )
@ -419,15 +420,17 @@ class HomeserverTestCase(TestCase):
config_obj.parse_config_dict(config, "", "") config_obj.parse_config_dict(config, "", "")
kwargs["config"] = config_obj kwargs["config"] = config_obj
async def run_bg_updates():
with LoggingContext("run_bg_updates", request="run_bg_updates-1"):
while not await stor.db.updates.has_completed_background_updates():
await stor.db.updates.do_next_background_update(1)
hs = setup_test_homeserver(self.addCleanup, *args, **kwargs) hs = setup_test_homeserver(self.addCleanup, *args, **kwargs)
stor = hs.get_datastore() stor = hs.get_datastore()
# Run the database background updates, when running against "master". # Run the database background updates, when running against "master".
if hs.__class__.__name__ == "TestHomeServer": if hs.__class__.__name__ == "TestHomeServer":
while not self.get_success( self.get_success(run_bg_updates())
stor.db.updates.has_completed_background_updates()
):
self.get_success(stor.db.updates.do_next_background_update(1))
return hs return hs