Don't unnecessarily block notifying of new events.

When persisting events we calculate new stream orderings up front.
Before we notify about an event all events with lower stream orderings
must have finished being persisted.

This PR moves the assignment of stream ordering till *after* calculated
the new current state and split the batch of events into separate chunks
for persistence. This means that if it takes a long time to calculate
new current state then it will not block events in other rooms being
notified about.

This should help reduce some global pauses in the events stream which
can last for tens of seconds (if not longer), caused by some
particularly expensive state resolutions.
This commit is contained in:
Erik Johnston 2019-08-06 13:27:22 +01:00
parent 8ed9e63432
commit bf4db42920

View file

@ -364,147 +364,161 @@ class EventsStore(
if not events_and_contexts: if not events_and_contexts:
return return
if backfilled: chunks = [
stream_ordering_manager = self._backfill_id_gen.get_next_mult( events_and_contexts[x : x + 100]
len(events_and_contexts) for x in range(0, len(events_and_contexts), 100)
) ]
else:
stream_ordering_manager = self._stream_id_gen.get_next_mult(
len(events_and_contexts)
)
with stream_ordering_manager as stream_orderings: for chunk in chunks:
for (event, context), stream in zip(events_and_contexts, stream_orderings): # We can't easily parallelize these since different chunks
event.internal_metadata.stream_ordering = stream # might contain the same event. :(
chunks = [ # NB: Assumes that we are only persisting events for one room
events_and_contexts[x : x + 100] # at a time.
for x in range(0, len(events_and_contexts), 100)
]
for chunk in chunks: # map room_id->list[event_ids] giving the new forward
# We can't easily parallelize these since different chunks # extremities in each room
# might contain the same event. :( new_forward_extremeties = {}
# NB: Assumes that we are only persisting events for one room # map room_id->(type,state_key)->event_id tracking the full
# at a time. # state in each room after adding these events.
# This is simply used to prefill the get_current_state_ids
# cache
current_state_for_room = {}
# map room_id->list[event_ids] giving the new forward # map room_id->(to_delete, to_insert) where to_delete is a list
# extremities in each room # of type/state keys to remove from current state, and to_insert
new_forward_extremeties = {} # is a map (type,key)->event_id giving the state delta in each
# room
state_delta_for_room = {}
# map room_id->(type,state_key)->event_id tracking the full if not backfilled:
# state in each room after adding these events. with Measure(self._clock, "_calculate_state_and_extrem"):
# This is simply used to prefill the get_current_state_ids # Work out the new "current state" for each room.
# cache # We do this by working out what the new extremities are and then
current_state_for_room = {} # calculating the state from that.
events_by_room = {}
for event, context in chunk:
events_by_room.setdefault(event.room_id, []).append(
(event, context)
)
# map room_id->(to_delete, to_insert) where to_delete is a list for room_id, ev_ctx_rm in iteritems(events_by_room):
# of type/state keys to remove from current state, and to_insert latest_event_ids = yield self.get_latest_event_ids_in_room(
# is a map (type,key)->event_id giving the state delta in each room_id
# room )
state_delta_for_room = {} new_latest_event_ids = yield self._calculate_new_extremities(
room_id, ev_ctx_rm, latest_event_ids
)
if not backfilled: latest_event_ids = set(latest_event_ids)
with Measure(self._clock, "_calculate_state_and_extrem"): if new_latest_event_ids == latest_event_ids:
# Work out the new "current state" for each room. # No change in extremities, so no change in state
# We do this by working out what the new extremities are and then continue
# calculating the state from that.
events_by_room = {} # there should always be at least one forward extremity.
for event, context in chunk: # (except during the initial persistence of the send_join
events_by_room.setdefault(event.room_id, []).append( # results, in which case there will be no existing
(event, context) # extremities, so we'll `continue` above and skip this bit.)
assert new_latest_event_ids, "No forward extremities left!"
new_forward_extremeties[room_id] = new_latest_event_ids
len_1 = (
len(latest_event_ids) == 1
and len(new_latest_event_ids) == 1
)
if len_1:
all_single_prev_not_state = all(
len(event.prev_event_ids()) == 1
and not event.is_state()
for event, ctx in ev_ctx_rm
) )
# Don't bother calculating state if they're just
for room_id, ev_ctx_rm in iteritems(events_by_room): # a long chain of single ancestor non-state events.
latest_event_ids = yield self.get_latest_event_ids_in_room( if all_single_prev_not_state:
room_id
)
new_latest_event_ids = yield self._calculate_new_extremities(
room_id, ev_ctx_rm, latest_event_ids
)
latest_event_ids = set(latest_event_ids)
if new_latest_event_ids == latest_event_ids:
# No change in extremities, so no change in state
continue continue
# there should always be at least one forward extremity. state_delta_counter.inc()
# (except during the initial persistence of the send_join if len(new_latest_event_ids) == 1:
# results, in which case there will be no existing state_delta_single_event_counter.inc()
# extremities, so we'll `continue` above and skip this bit.)
assert new_latest_event_ids, "No forward extremities left!"
new_forward_extremeties[room_id] = new_latest_event_ids # This is a fairly handwavey check to see if we could
# have guessed what the delta would have been when
# processing one of these events.
# What we're interested in is if the latest extremities
# were the same when we created the event as they are
# now. When this server creates a new event (as opposed
# to receiving it over federation) it will use the
# forward extremities as the prev_events, so we can
# guess this by looking at the prev_events and checking
# if they match the current forward extremities.
for ev, _ in ev_ctx_rm:
prev_event_ids = set(ev.prev_event_ids())
if latest_event_ids == prev_event_ids:
state_delta_reuse_delta_counter.inc()
break
len_1 = ( logger.info("Calculating state delta for room %s", room_id)
len(latest_event_ids) == 1 with Measure(
and len(new_latest_event_ids) == 1 self._clock, "persist_events.get_new_state_after_events"
):
res = yield self._get_new_state_after_events(
room_id,
ev_ctx_rm,
latest_event_ids,
new_latest_event_ids,
) )
if len_1: current_state, delta_ids = res
all_single_prev_not_state = all(
len(event.prev_event_ids()) == 1
and not event.is_state()
for event, ctx in ev_ctx_rm
)
# Don't bother calculating state if they're just
# a long chain of single ancestor non-state events.
if all_single_prev_not_state:
continue
state_delta_counter.inc() # If either are not None then there has been a change,
if len(new_latest_event_ids) == 1: # and we need to work out the delta (or use that
state_delta_single_event_counter.inc() # given)
if delta_ids is not None:
# This is a fairly handwavey check to see if we could # If there is a delta we know that we've
# have guessed what the delta would have been when # only added or replaced state, never
# processing one of these events. # removed keys entirely.
# What we're interested in is if the latest extremities state_delta_for_room[room_id] = ([], delta_ids)
# were the same when we created the event as they are elif current_state is not None:
# now. When this server creates a new event (as opposed
# to receiving it over federation) it will use the
# forward extremities as the prev_events, so we can
# guess this by looking at the prev_events and checking
# if they match the current forward extremities.
for ev, _ in ev_ctx_rm:
prev_event_ids = set(ev.prev_event_ids())
if latest_event_ids == prev_event_ids:
state_delta_reuse_delta_counter.inc()
break
logger.info("Calculating state delta for room %s", room_id)
with Measure( with Measure(
self._clock, "persist_events.get_new_state_after_events" self._clock, "persist_events.calculate_state_delta"
): ):
res = yield self._get_new_state_after_events( delta = yield self._calculate_state_delta(
room_id, room_id, current_state
ev_ctx_rm,
latest_event_ids,
new_latest_event_ids,
) )
current_state, delta_ids = res state_delta_for_room[room_id] = delta
# If either are not None then there has been a change, # If we have the current_state then lets prefill
# and we need to work out the delta (or use that # the cache with it.
# given) if current_state is not None:
if delta_ids is not None: current_state_for_room[room_id] = current_state
# If there is a delta we know that we've
# only added or replaced state, never
# removed keys entirely.
state_delta_for_room[room_id] = ([], delta_ids)
elif current_state is not None:
with Measure(
self._clock, "persist_events.calculate_state_delta"
):
delta = yield self._calculate_state_delta(
room_id, current_state
)
state_delta_for_room[room_id] = delta
# If we have the current_state then lets prefill # We want to calculate the stream orderings as late as possible, as
# the cache with it. # we only notify after all events with a lesser stream ordering have
if current_state is not None: # been persisted. I.e. if we spend 10s inside the with block then
current_state_for_room[room_id] = current_state # that will delay all subsequent events from being notified about.
# Hence why we do it down here rather than wrapping the entire
# function.
#
# Its safe to do this after calculating the state deltas etc as we
# only need to protect the *persistence* of the events. This is to
# ensure that queries of the form "fetch events since X" don't
# return events and stream positions after events that are still in
# flight, as otherwise subsequent requests "fetch event since Y"
# will not return those events.
#
# Note: Multiple instances of this function cannot be in flight at
# the same time for the same room.
if backfilled:
stream_ordering_manager = self._backfill_id_gen.get_next_mult(
len(chunk)
)
else:
stream_ordering_manager = self._stream_id_gen.get_next_mult(len(chunk))
with stream_ordering_manager as stream_orderings:
for (event, context), stream in zip(chunk, stream_orderings):
event.internal_metadata.stream_ordering = stream
yield self.runInteraction( yield self.runInteraction(
"persist_events", "persist_events",