diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b597f94cf6..5689ad2f58 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -543,17 +543,6 @@ class SyncHandler(object): state_ids = current_state_ids - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -562,9 +551,9 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, - timeline_start_members=member_state_ids, previous={}, current=current_state_ids, + lazy_load_members=lazy_load_members, ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( @@ -582,37 +571,27 @@ class SyncHandler(object): filtered_types=filtered_types, ) - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implement, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implemented, this should filter using event_ids (not mxids). + # In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. + state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, + lazy_load_members=lazy_load_members, ) else: state_ids = {} @@ -1536,16 +1515,14 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, timeline_start_members, - previous, current): +def _calculate_state( + timeline_contains, timeline_start, previous, current, lazy_load_members, +): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline - timeline_start_members (dict): state at the start of the timeline - for room members who participate in this chunk of timeline. - Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members, c_ids = set(e for e in current.values()) ts_ids = set(e for e in timeline_start.values()) - tsm_ids = set(e for e in timeline_start_members.values()) p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids + # track the membership events in the state as of the start of the timeline + # so we can add them back in to the state if we're lazyloading. We don't + # add them into state if they're already contained in the timeline. + if lazy_load_members: + ll_ids = set( + e for t, e in timeline_start.iteritems() + if t[0] == EventTypes.Member and e not in tc_ids + ) + else: + ll_ids = set() + + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids return { event_id_to_key[e]: e for e in state_ids diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f09be7172d..40ca8bd2a2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups(list[int]): list of state group IDs to query - types(list[str|None, str|None])|None: List of 2-tuples of the form + types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(list[str]|None): Only apply filtering via `types` to this + filtered_types(Iterable[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. @@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types + self._get_state_groups_from_groups_txn, chunk, types, filtered_types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None + self, txn, groups, types=None, filtered_types=None, ): results = {group: {} for group in groups} - include_other_types = False if filtered_types is None else True - if types is not None: types = list(set(types)) # deduplicate types list @@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore): for etype, state_key in types ] - if include_other_types: + if filtered_types is not None: # XXX: check whether this slows postgres down like a list of # ORs does too? unique_types = set(filtered_types) @@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if include_other_types: + if filtered_types is not None: unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" @@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False if filtered_types is None else True - for typ, state_key in types: key = (typ, state_key) @@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types and typ not in filtered_types + return filtered_types is not None and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None, None|str)]): + types (None|iterable[(str, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned.