From 7be0f6594e2a6dd7c3dc745eb856025276ec7d1f Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 11 Feb 2015 15:53:56 +0000 Subject: [PATCH 1/3] First step of making user_rooms_intersect() faster - implement in intersection logic in Python code terms of a DB query that is cacheable per user --- synapse/storage/roommember.py | 36 ++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index c69dd995c..d490a374e 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -240,28 +240,30 @@ class RoomMemberStore(SQLBaseStore): results = self._parse_events_txn(txn, rows) return results + @defer.inlineCallbacks def user_rooms_intersect(self, user_id_list): """ Checks whether all the users whose IDs are given in a list share a room. + + This is a "hot path" function that's called a lot, e.g. by presence for + generating the event stream. """ - def interaction(txn): - user_list_clause = " OR ".join(["m.user_id = ?"] * len(user_id_list)) - sql = ( - "SELECT m.room_id FROM room_memberships as m " - "INNER JOIN current_state_events as c " - "ON m.event_id = c.event_id " - "WHERE m.membership = 'join' " - "AND (%(clause)s) " - # TODO(paul): We've got duplicate rows in the database somewhere - # so we have to DISTINCT m.user_id here - "GROUP BY m.room_id HAVING COUNT(DISTINCT m.user_id) = ?" - ) % {"clause": user_list_clause} + if len(user_id_list) < 2: + defer.returnValue(True) - args = list(user_id_list) - args.append(len(user_id_list)) + deferreds = [ + self.get_rooms_for_user_where_membership_is( + u, membership_list=[Membership.JOIN], + ) + for u in user_id_list + ] - txn.execute(sql, args) + results = yield defer.DeferredList(deferreds) - return len(txn.fetchall()) > 0 + # A list of sets of strings giving room IDs for each user + room_id_lists = [set([r.room_id for r in result[1]]) for result in results] - return self.runInteraction("user_rooms_intersect", interaction) + # There isn't a setintersection(*list_of_sets) + ret = len(room_id_lists.pop(0).intersection(*room_id_lists)) > 0 + + defer.returnValue(ret) From 45b56609ae84e7ffc3713335e7d9abc315ad1725 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 11 Feb 2015 16:04:08 +0000 Subject: [PATCH 2/3] Cache the result of a get_rooms_for_user query, to make user_rooms_intersect() much lighter in the read-common case --- synapse/storage/roommember.py | 41 +++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index d490a374e..e05465bc1 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -35,6 +35,11 @@ RoomsForUser = namedtuple( class RoomMemberStore(SQLBaseStore): + def __init__(self, *args, **kw): + super(RoomMemberStore, self).__init__(*args, **kw) + + self._user_rooms_cache = {} + def _store_room_member_txn(self, txn, event): """Store a room member in the database. """ @@ -98,6 +103,8 @@ class RoomMemberStore(SQLBaseStore): txn.execute(sql, (event.room_id, domain)) + self.invalidate_rooms_for_user(target_user_id) + @defer.inlineCallbacks def get_room_member(self, user_id, room_id): """Retrieve the current state of a room member. @@ -240,23 +247,43 @@ class RoomMemberStore(SQLBaseStore): results = self._parse_events_txn(txn, rows) return results + # TODO(paul): Create a nice @cached decorator to do this + # @cached + # def get_foo(...) + # ... + # invalidate_foo = get_foo.invalidator + + @defer.inlineCallbacks + def get_rooms_for_user(self, user_id): + # TODO(paul): put some performance counters in here so we can easily + # track what impact this cache is having + if user_id in self._user_rooms_cache: + defer.returnValue(self._user_rooms_cache[user_id]) + + rooms = yield self.get_rooms_for_user_where_membership_is( + user_id, membership_list=[Membership.JOIN], + ) + + self._user_rooms_cache[user_id] = rooms + defer.returnValue(rooms) + + def invalidate_rooms_for_user(self, user_id): + if user_id in self._user_rooms_cache: + del self._user_rooms_cache[user_id] + @defer.inlineCallbacks def user_rooms_intersect(self, user_id_list): """ Checks whether all the users whose IDs are given in a list share a room. This is a "hot path" function that's called a lot, e.g. by presence for - generating the event stream. + generating the event stream. As such, it is implemented locally by + wrapping logic around heavily-cached database queries. """ if len(user_id_list) < 2: defer.returnValue(True) - deferreds = [ - self.get_rooms_for_user_where_membership_is( - u, membership_list=[Membership.JOIN], - ) - for u in user_id_list - ] + deferreds = [self.get_rooms_for_user(u) for u in user_id_list] results = yield defer.DeferredList(deferreds) From 7f47ba7383302fdbdaa3a10abef00d3710c77fce Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 11 Feb 2015 16:18:21 +0000 Subject: [PATCH 3/3] Added another TODO note --- synapse/storage/roommember.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index e05465bc1..779f9ce54 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -264,6 +264,9 @@ class RoomMemberStore(SQLBaseStore): user_id, membership_list=[Membership.JOIN], ) + # TODO(paul): Consider applying a maximum size; just evict things at + # random, or consider LRU? + self._user_rooms_cache[user_id] = rooms defer.returnValue(rooms)