Fix SQL delta file taking a long time to run (#9516)

Fixes #9504
This commit is contained in:
Erik Johnston 2021-03-02 14:05:01 +00:00 committed by GitHub
parent f00c4e7af0
commit 16108c579d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 60 additions and 5 deletions

1
changelog.d/9516.bugfix Normal file
View file

@ -0,0 +1 @@
Fix a bug where users' pushers were not all deleted when they deactivated their account.

View file

@ -47,6 +47,7 @@ from synapse.storage.databases.main.events_bg_updates import (
from synapse.storage.databases.main.media_repository import ( from synapse.storage.databases.main.media_repository import (
MediaRepositoryBackgroundUpdateStore, MediaRepositoryBackgroundUpdateStore,
) )
from synapse.storage.databases.main.pusher import PusherWorkerStore
from synapse.storage.databases.main.registration import ( from synapse.storage.databases.main.registration import (
RegistrationBackgroundUpdateStore, RegistrationBackgroundUpdateStore,
find_max_generated_user_id_localpart, find_max_generated_user_id_localpart,
@ -177,6 +178,7 @@ class Store(
UserDirectoryBackgroundUpdateStore, UserDirectoryBackgroundUpdateStore,
EndToEndKeyBackgroundStore, EndToEndKeyBackgroundStore,
StatsStore, StatsStore,
PusherWorkerStore,
): ):
def execute(self, f, *args, **kwargs): def execute(self, f, *args, **kwargs):
return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs) return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs)

View file

@ -39,6 +39,11 @@ class PusherWorkerStore(SQLBaseStore):
db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")] db_conn, "pushers", "id", extra_tables=[("deleted_pushers", "stream_id")]
) )
self.db_pool.updates.register_background_update_handler(
"remove_deactivated_pushers",
self._remove_deactivated_pushers,
)
def _decode_pushers_rows(self, rows: Iterable[dict]) -> Iterator[PusherConfig]: def _decode_pushers_rows(self, rows: Iterable[dict]) -> Iterator[PusherConfig]:
"""JSON-decode the data in the rows returned from the `pushers` table """JSON-decode the data in the rows returned from the `pushers` table
@ -284,6 +289,54 @@ class PusherWorkerStore(SQLBaseStore):
lock=False, lock=False,
) )
async def _remove_deactivated_pushers(self, progress: dict, batch_size: int) -> int:
"""A background update that deletes all pushers for deactivated users.
Note that we don't proacively tell the pusherpool that we've deleted
these (just because its a bit off a faff to do from here), but they will
get cleaned up at the next restart
"""
last_user = progress.get("last_user", "")
def _delete_pushers(txn) -> int:
sql = """
SELECT name FROM users
WHERE deactivated = ? and name > ?
ORDER BY name ASC
LIMIT ?
"""
txn.execute(sql, (1, last_user, batch_size))
users = [row[0] for row in txn]
self.db_pool.simple_delete_many_txn(
txn,
table="pushers",
column="user_name",
iterable=users,
keyvalues={},
)
if users:
self.db_pool.updates._background_update_progress_txn(
txn, "remove_deactivated_pushers", {"last_user": users[-1]}
)
return len(users)
number_deleted = await self.db_pool.runInteraction(
"_remove_deactivated_pushers", _delete_pushers
)
if number_deleted < batch_size:
await self.db_pool.updates._end_background_update(
"remove_deactivated_pushers"
)
return number_deleted
class PusherStore(PusherWorkerStore): class PusherStore(PusherWorkerStore):
def get_pushers_stream_token(self) -> int: def get_pushers_stream_token(self) -> int:

View file

@ -14,8 +14,7 @@
*/ */
-- We may not have deleted all pushers for deactivated accounts. Do so now. -- We may not have deleted all pushers for deactivated accounts, so we set up a
-- -- background job to delete them.
-- Note: We don't bother updating the `deleted_pushers` table as it's just use INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
-- to stop pushers on workers, and that will happen when they get next restarted. (5908, 'remove_deactivated_pushers', '{}');
DELETE FROM pushers WHERE user_name IN (SELECT name FROM users WHERE deactivated = 1);