Don't unnecessarily block notifying of new events.

When persisting events we calculate new stream orderings up front. Before we notify about an event all events with lower stream orderings must have finished being persisted. This PR moves the assignment of stream ordering till *after* calculated the new current state and split the batch of events into separate chunks for persistence. This means that if it takes a long time to calculate new current state then it will not block events in other rooms being notified about. This should help reduce some global pauses in the events stream which can last for tens of seconds (if not longer), caused by some particularly expensive state resolutions.
2019-08-06 13:27:22 +01:00 · 2019-08-06 13:27:22 +01:00 · bf4db42920
commit bf4db42920
parent 8ed9e63432
1 changed files with 135 additions and 121 deletions
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@ -364,147 +364,161 @@ class EventsStore(
        if not events_and_contexts:
            return
-        if backfilled:
+        chunks = [
-            stream_ordering_manager = self._backfill_id_gen.get_next_mult(
+            events_and_contexts[x : x + 100]
-                len(events_and_contexts)
+            for x in range(0, len(events_and_contexts), 100)
-            )
+        ]
        else:
            stream_ordering_manager = self._stream_id_gen.get_next_mult(
                len(events_and_contexts)
            )
-        with stream_ordering_manager as stream_orderings:
+        for chunk in chunks:
-            for (event, context), stream in zip(events_and_contexts, stream_orderings):
+            # We can't easily parallelize these since different chunks
-                event.internal_metadata.stream_ordering = stream
+            # might contain the same event. :(
-            chunks = [
+            # NB: Assumes that we are only persisting events for one room
-                events_and_contexts[x : x + 100]
+            # at a time.
                for x in range(0, len(events_and_contexts), 100)
            ]
-            for chunk in chunks:
+            # map room_id->list[event_ids] giving the new forward
-                # We can't easily parallelize these since different chunks
+            # extremities in each room
-                # might contain the same event. :(
+            new_forward_extremeties = {}
-                # NB: Assumes that we are only persisting events for one room
+            # map room_id->(type,state_key)->event_id tracking the full
-                # at a time.
+            # state in each room after adding these events.
            # This is simply used to prefill the get_current_state_ids
            # cache
            current_state_for_room = {}
-                # map room_id->list[event_ids] giving the new forward
+            # map room_id->(to_delete, to_insert) where to_delete is a list
-                # extremities in each room
+            # of type/state keys to remove from current state, and to_insert
-                new_forward_extremeties = {}
+            # is a map (type,key)->event_id giving the state delta in each
            # room
            state_delta_for_room = {}
-                # map room_id->(type,state_key)->event_id tracking the full
+            if not backfilled:
-                # state in each room after adding these events.
+                with Measure(self._clock, "_calculate_state_and_extrem"):
-                # This is simply used to prefill the get_current_state_ids
+                    # Work out the new "current state" for each room.
-                # cache
+                    # We do this by working out what the new extremities are and then
-                current_state_for_room = {}
+                    # calculating the state from that.
                    events_by_room = {}
                    for event, context in chunk:
                        events_by_room.setdefault(event.room_id, []).append(
                            (event, context)
                        )
-                # map room_id->(to_delete, to_insert) where to_delete is a list
+                    for room_id, ev_ctx_rm in iteritems(events_by_room):
-                # of type/state keys to remove from current state, and to_insert
+                        latest_event_ids = yield self.get_latest_event_ids_in_room(
-                # is a map (type,key)->event_id giving the state delta in each
+                            room_id
-                # room
+                        )
-                state_delta_for_room = {}
+                        new_latest_event_ids = yield self._calculate_new_extremities(
                            room_id, ev_ctx_rm, latest_event_ids
                        )
-                if not backfilled:
+                        latest_event_ids = set(latest_event_ids)
-                    with Measure(self._clock, "_calculate_state_and_extrem"):
+                        if new_latest_event_ids == latest_event_ids:
-                        # Work out the new "current state" for each room.
+                            # No change in extremities, so no change in state
-                        # We do this by working out what the new extremities are and then
+                            continue
-                        # calculating the state from that.
+
-                        events_by_room = {}
+                        # there should always be at least one forward extremity.
-                        for event, context in chunk:
+                        # (except during the initial persistence of the send_join
-                            events_by_room.setdefault(event.room_id, []).append(
+                        # results, in which case there will be no existing
-                                (event, context)
+                        # extremities, so we'll `continue` above and skip this bit.)
                        assert new_latest_event_ids, "No forward extremities left!"
                        new_forward_extremeties[room_id] = new_latest_event_ids
                        len_1 = (
                            len(latest_event_ids) == 1
                            and len(new_latest_event_ids) == 1
                        )
                        if len_1:
                            all_single_prev_not_state = all(
                                len(event.prev_event_ids()) == 1
                                and not event.is_state()
                                for event, ctx in ev_ctx_rm
                            )
-
+                            # Don't bother calculating state if they're just
-                        for room_id, ev_ctx_rm in iteritems(events_by_room):
+                            # a long chain of single ancestor non-state events.
-                            latest_event_ids = yield self.get_latest_event_ids_in_room(
+                            if all_single_prev_not_state:
                                room_id
                            )
                            new_latest_event_ids = yield self._calculate_new_extremities(
                                room_id, ev_ctx_rm, latest_event_ids
                            )
                            latest_event_ids = set(latest_event_ids)
                            if new_latest_event_ids == latest_event_ids:
                                # No change in extremities, so no change in state
                                continue
-                            # there should always be at least one forward extremity.
+                        state_delta_counter.inc()
-                            # (except during the initial persistence of the send_join
+                        if len(new_latest_event_ids) == 1:
-                            # results, in which case there will be no existing
+                            state_delta_single_event_counter.inc()
                            # extremities, so we'll `continue` above and skip this bit.)
                            assert new_latest_event_ids, "No forward extremities left!"
-                            new_forward_extremeties[room_id] = new_latest_event_ids
+                            # This is a fairly handwavey check to see if we could
                            # have guessed what the delta would have been when
                            # processing one of these events.
                            # What we're interested in is if the latest extremities
                            # were the same when we created the event as they are
                            # now. When this server creates a new event (as opposed
                            # to receiving it over federation) it will use the
                            # forward extremities as the prev_events, so we can
                            # guess this by looking at the prev_events and checking
                            # if they match the current forward extremities.
                            for ev, _ in ev_ctx_rm:
                                prev_event_ids = set(ev.prev_event_ids())
                                if latest_event_ids == prev_event_ids:
                                    state_delta_reuse_delta_counter.inc()
                                    break
-                            len_1 = (
+                        logger.info("Calculating state delta for room %s", room_id)
-                                len(latest_event_ids) == 1
+                        with Measure(
-                                and len(new_latest_event_ids) == 1
+                            self._clock, "persist_events.get_new_state_after_events"
                        ):
                            res = yield self._get_new_state_after_events(
                                room_id,
                                ev_ctx_rm,
                                latest_event_ids,
                                new_latest_event_ids,
                            )
-                            if len_1:
+                            current_state, delta_ids = res
                                all_single_prev_not_state = all(
                                    len(event.prev_event_ids()) == 1
                                    and not event.is_state()
                                    for event, ctx in ev_ctx_rm
                                )
                                # Don't bother calculating state if they're just
                                # a long chain of single ancestor non-state events.
                                if all_single_prev_not_state:
                                    continue
-                            state_delta_counter.inc()
+                        # If either are not None then there has been a change,
-                            if len(new_latest_event_ids) == 1:
+                        # and we need to work out the delta (or use that
-                                state_delta_single_event_counter.inc()
+                        # given)
-
+                        if delta_ids is not None:
-                                # This is a fairly handwavey check to see if we could
+                            # If there is a delta we know that we've
-                                # have guessed what the delta would have been when
+                            # only added or replaced state, never
-                                # processing one of these events.
+                            # removed keys entirely.
-                                # What we're interested in is if the latest extremities
+                            state_delta_for_room[room_id] = ([], delta_ids)
-                                # were the same when we created the event as they are
+                        elif current_state is not None:
                                # now. When this server creates a new event (as opposed
                                # to receiving it over federation) it will use the
                                # forward extremities as the prev_events, so we can
                                # guess this by looking at the prev_events and checking
                                # if they match the current forward extremities.
                                for ev, _ in ev_ctx_rm:
                                    prev_event_ids = set(ev.prev_event_ids())
                                    if latest_event_ids == prev_event_ids:
                                        state_delta_reuse_delta_counter.inc()
                                        break
                            logger.info("Calculating state delta for room %s", room_id)
                            with Measure(
-                                self._clock, "persist_events.get_new_state_after_events"
+                                self._clock, "persist_events.calculate_state_delta"
                            ):
-                                res = yield self._get_new_state_after_events(
+                                delta = yield self._calculate_state_delta(
-                                    room_id,
+                                    room_id, current_state
                                    ev_ctx_rm,
                                    latest_event_ids,
                                    new_latest_event_ids,
                                )
-                                current_state, delta_ids = res
+                            state_delta_for_room[room_id] = delta
-                            # If either are not None then there has been a change,
+                        # If we have the current_state then lets prefill
-                            # and we need to work out the delta (or use that
+                        # the cache with it.
-                            # given)
+                        if current_state is not None:
-                            if delta_ids is not None:
+                            current_state_for_room[room_id] = current_state
                                # If there is a delta we know that we've
                                # only added or replaced state, never
                                # removed keys entirely.
                                state_delta_for_room[room_id] = ([], delta_ids)
                            elif current_state is not None:
                                with Measure(
                                    self._clock, "persist_events.calculate_state_delta"
                                ):
                                    delta = yield self._calculate_state_delta(
                                        room_id, current_state
                                    )
                                state_delta_for_room[room_id] = delta
-                            # If we have the current_state then lets prefill
+            # We want to calculate the stream orderings as late as possible, as
-                            # the cache with it.
+            # we only notify after all events with a lesser stream ordering have
-                            if current_state is not None:
+            # been persisted. I.e. if we spend 10s inside the with block then
-                                current_state_for_room[room_id] = current_state
+            # that will delay all subsequent events from being notified about.
            # Hence why we do it down here rather than wrapping the entire
            # function.
            #
            # Its safe to do this after calculating the state deltas etc as we
            # only need to protect the *persistence* of the events. This is to
            # ensure that queries of the form "fetch events since X" don't
            # return events and stream positions after events that are still in
            # flight, as otherwise subsequent requests "fetch event since Y"
            # will not return those events.
            #
            # Note: Multiple instances of this function cannot be in flight at
            # the same time for the same room.
            if backfilled:
                stream_ordering_manager = self._backfill_id_gen.get_next_mult(
                    len(chunk)
                )
            else:
                stream_ordering_manager = self._stream_id_gen.get_next_mult(len(chunk))
            with stream_ordering_manager as stream_orderings:
                for (event, context), stream in zip(chunk, stream_orderings):
                    event.internal_metadata.stream_ordering = stream
                yield self.runInteraction(
                    "persist_events",