Refactor get_user_ids_changed to pull less from DB

When a client asks for users whose devices have changed since a token we
used to pull *all* users from the database since the token, which could
easily be thousands of rows for old tokens.

This PR changes this to only check for changes for users the client is
actually interested in.

Fixes #5553
This commit is contained in:
Erik Johnston 2019-06-26 11:56:52 +01:00
parent 0e97284dfa
commit a2f6d31a63
3 changed files with 58 additions and 29 deletions

View file

@ -101,9 +101,13 @@ class DeviceWorkerHandler(BaseHandler):
room_ids = yield self.store.get_rooms_for_user(user_id) room_ids = yield self.store.get_rooms_for_user(user_id)
# First we check if any devices have changed # First we check if any devices have changed for users that we share
# rooms with.
users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id
)
changed = yield self.store.get_user_whose_devices_changed( changed = yield self.store.get_user_whose_devices_changed(
from_token.device_list_key from_token.device_list_key, users_who_share_room
) )
# Then work out if any users have since joined # Then work out if any users have since joined
@ -188,10 +192,6 @@ class DeviceWorkerHandler(BaseHandler):
break break
if possibly_changed or possibly_left: if possibly_changed or possibly_left:
users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id
)
# Take the intersection of the users whose devices may have changed # Take the intersection of the users whose devices may have changed
# and those that actually still share a room with the user # and those that actually still share a room with the user
possibly_joined = possibly_changed & users_who_share_room possibly_joined = possibly_changed & users_who_share_room

View file

@ -1062,10 +1062,6 @@ class SyncHandler(object):
since_token = sync_result_builder.since_token since_token = sync_result_builder.since_token
if since_token and since_token.device_list_key: if since_token and since_token.device_list_key:
changed = yield self.store.get_user_whose_devices_changed(
since_token.device_list_key
)
# TODO: Be more clever than this, i.e. remove users who we already # TODO: Be more clever than this, i.e. remove users who we already
# share a room with? # share a room with?
for room_id in newly_joined_rooms: for room_id in newly_joined_rooms:
@ -1076,21 +1072,23 @@ class SyncHandler(object):
left_users = yield self.state.get_current_users_in_room(room_id) left_users = yield self.state.get_current_users_in_room(room_id)
newly_left_users.update(left_users) newly_left_users.update(left_users)
# TODO: Check that these users are actually new, i.e. either they
# weren't in the previous sync *or* they left and rejoined.
changed.update(newly_joined_or_invited_users)
if not changed and not newly_left_users:
defer.returnValue(DeviceLists(changed=[], left=newly_left_users))
users_who_share_room = yield self.store.get_users_who_share_room_with_user( users_who_share_room = yield self.store.get_users_who_share_room_with_user(
user_id user_id
) )
# TODO: Check that these users are actually new, i.e. either they
# weren't in the previous sync *or* they left and rejoined.
changed = users_who_share_room & set(newly_joined_or_invited_users)
changed_users = yield self.store.get_user_whose_devices_changed(
since_token.device_list_key, users_who_share_room
)
changed.update(changed_users)
defer.returnValue( defer.returnValue(
DeviceLists( DeviceLists(
changed=users_who_share_room & changed, changed=changed, left=set(newly_left_users) - users_who_share_room
left=set(newly_left_users) - users_who_share_room,
) )
) )
else: else:

View file

@ -391,22 +391,53 @@ class DeviceWorkerStore(SQLBaseStore):
return now_stream_id, [] return now_stream_id, []
@defer.inlineCallbacks def get_user_whose_devices_changed(self, from_key, user_ids):
def get_user_whose_devices_changed(self, from_key): """Get set of users whose devices have changed since `from_key` that
"""Get set of users whose devices have changed since `from_key`. are in the given list of user_ids.
Args:
user_ids (Iterable[str])
from_key: The device lists stream token
Returns:
Deferred[set[str]]: The set of user_ids whose devices have changed
since `from_key`
""" """
from_key = int(from_key) from_key = int(from_key)
changed = self._device_list_stream_cache.get_all_entities_changed(from_key)
if changed is not None: # Get set of users who *may* have changed. Users not in the returned
defer.returnValue(set(changed)) # list have definitely not changed.
to_check = list(
self._device_list_stream_cache.get_entities_changed(user_ids, from_key)
)
if not to_check:
return defer.succeed(set())
# We now check the database for all users in `to_check`, in batches.
batch_size = 100
chunks = [
to_check[i : i + batch_size] for i in range(0, len(to_check), batch_size)
]
sql = """ sql = """
SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? SELECT DISTINCT user_id FROM device_lists_stream
WHERE stream_id > ?
AND user_id IN (%s)
""" """
rows = yield self._execute(
"get_user_whose_devices_changed", None, sql, from_key def _get_user_whose_devices_changed_txn(txn):
changes = set()
for chunk in chunks:
txn.execute(sql % (",".join("?" for _ in chunk),), [from_key] + chunk)
changes.update(user_id for user_id, in txn)
return changes
return self.runInteraction(
"get_user_whose_devices_changed", _get_user_whose_devices_changed_txn
) )
defer.returnValue(set(row[0] for row in rows))
def get_all_device_list_changes_for_remotes(self, from_key, to_key): def get_all_device_list_changes_for_remotes(self, from_key, to_key):
"""Return a list of `(stream_id, user_id, destination)` which is the """Return a list of `(stream_id, user_id, destination)` which is the