forked from MirrorHub/synapse
Merge pull request #3595 from matrix-org/erikj/use_deltas
Use deltas to calculate current state deltas
This commit is contained in:
commit
60a1d147a7
2 changed files with 118 additions and 52 deletions
1
changelog.d/3595.misc
Normal file
1
changelog.d/3595.misc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Attempt to reduce amount of state pulled out of DB during persist_events
|
|
@ -19,7 +19,7 @@ import logging
|
||||||
from collections import OrderedDict, deque, namedtuple
|
from collections import OrderedDict, deque, namedtuple
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
from six import iteritems, itervalues
|
from six import iteritems
|
||||||
from six.moves import range
|
from six.moves import range
|
||||||
|
|
||||||
from canonicaljson import json
|
from canonicaljson import json
|
||||||
|
@ -344,11 +344,14 @@ class EventsStore(EventsWorkerStore):
|
||||||
new_forward_extremeties = {}
|
new_forward_extremeties = {}
|
||||||
|
|
||||||
# map room_id->(type,state_key)->event_id tracking the full
|
# map room_id->(type,state_key)->event_id tracking the full
|
||||||
# state in each room after adding these events
|
# state in each room after adding these events.
|
||||||
|
# This is simply used to prefill the get_current_state_ids
|
||||||
|
# cache
|
||||||
current_state_for_room = {}
|
current_state_for_room = {}
|
||||||
|
|
||||||
# map room_id->(to_delete, to_insert) where each entry is
|
# map room_id->(to_delete, to_insert) where to_delete is a list
|
||||||
# a map (type,key)->event_id giving the state delta in each
|
# of type/state keys to remove from current state, and to_insert
|
||||||
|
# is a map (type,key)->event_id giving the state delta in each
|
||||||
# room
|
# room
|
||||||
state_delta_for_room = {}
|
state_delta_for_room = {}
|
||||||
|
|
||||||
|
@ -418,28 +421,40 @@ class EventsStore(EventsWorkerStore):
|
||||||
logger.info(
|
logger.info(
|
||||||
"Calculating state delta for room %s", room_id,
|
"Calculating state delta for room %s", room_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
with Measure(
|
with Measure(
|
||||||
self._clock,
|
self._clock,
|
||||||
"persist_events.get_new_state_after_events",
|
"persist_events.get_new_state_after_events",
|
||||||
):
|
):
|
||||||
current_state = yield self._get_new_state_after_events(
|
res = yield self._get_new_state_after_events(
|
||||||
room_id,
|
room_id,
|
||||||
ev_ctx_rm,
|
ev_ctx_rm,
|
||||||
latest_event_ids,
|
latest_event_ids,
|
||||||
new_latest_event_ids,
|
new_latest_event_ids,
|
||||||
)
|
)
|
||||||
|
current_state, delta_ids = res
|
||||||
|
|
||||||
if current_state is not None:
|
# If either are not None then there has been a change,
|
||||||
current_state_for_room[room_id] = current_state
|
# and we need to work out the delta (or use that
|
||||||
|
# given)
|
||||||
|
if delta_ids is not None:
|
||||||
|
# If there is a delta we know that we've
|
||||||
|
# only added or replaced state, never
|
||||||
|
# removed keys entirely.
|
||||||
|
state_delta_for_room[room_id] = ([], delta_ids)
|
||||||
|
elif current_state is not None:
|
||||||
with Measure(
|
with Measure(
|
||||||
self._clock,
|
self._clock,
|
||||||
"persist_events.calculate_state_delta",
|
"persist_events.calculate_state_delta",
|
||||||
):
|
):
|
||||||
delta = yield self._calculate_state_delta(
|
delta = yield self._calculate_state_delta(
|
||||||
room_id, current_state,
|
room_id, current_state,
|
||||||
)
|
)
|
||||||
state_delta_for_room[room_id] = delta
|
state_delta_for_room[room_id] = delta
|
||||||
|
|
||||||
|
# If we have the current_state then lets prefill
|
||||||
|
# the cache with it.
|
||||||
|
if current_state is not None:
|
||||||
|
current_state_for_room[room_id] = current_state
|
||||||
|
|
||||||
yield self.runInteraction(
|
yield self.runInteraction(
|
||||||
"persist_events",
|
"persist_events",
|
||||||
|
@ -538,9 +553,15 @@ class EventsStore(EventsWorkerStore):
|
||||||
the new forward extremities for the room.
|
the new forward extremities for the room.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Deferred[dict[(str,str), str]|None]:
|
Deferred[tuple[dict[(str,str), str]|None, dict[(str,str), str]|None]]:
|
||||||
None if there are no changes to the room state, or
|
Returns a tuple of two state maps, the first being the full new current
|
||||||
a dict of (type, state_key) -> event_id].
|
state and the second being the delta to the existing current state.
|
||||||
|
If both are None then there has been no change.
|
||||||
|
|
||||||
|
If there has been a change then we only return the delta if its
|
||||||
|
already been calculated. Conversely if we do know the delta then
|
||||||
|
the new current state is only returned if we've already calculated
|
||||||
|
it.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not new_latest_event_ids:
|
if not new_latest_event_ids:
|
||||||
|
@ -548,6 +569,10 @@ class EventsStore(EventsWorkerStore):
|
||||||
|
|
||||||
# map from state_group to ((type, key) -> event_id) state map
|
# map from state_group to ((type, key) -> event_id) state map
|
||||||
state_groups_map = {}
|
state_groups_map = {}
|
||||||
|
|
||||||
|
# Map from (prev state group, new state group) -> delta state dict
|
||||||
|
state_group_deltas = {}
|
||||||
|
|
||||||
for ev, ctx in events_context:
|
for ev, ctx in events_context:
|
||||||
if ctx.state_group is None:
|
if ctx.state_group is None:
|
||||||
# I don't think this can happen, but let's double-check
|
# I don't think this can happen, but let's double-check
|
||||||
|
@ -566,6 +591,9 @@ class EventsStore(EventsWorkerStore):
|
||||||
if current_state_ids is not None:
|
if current_state_ids is not None:
|
||||||
state_groups_map[ctx.state_group] = current_state_ids
|
state_groups_map[ctx.state_group] = current_state_ids
|
||||||
|
|
||||||
|
if ctx.prev_group:
|
||||||
|
state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids
|
||||||
|
|
||||||
# We need to map the event_ids to their state groups. First, let's
|
# We need to map the event_ids to their state groups. First, let's
|
||||||
# check if the event is one we're persisting, in which case we can
|
# check if the event is one we're persisting, in which case we can
|
||||||
# pull the state group from its context.
|
# pull the state group from its context.
|
||||||
|
@ -607,7 +635,26 @@ class EventsStore(EventsWorkerStore):
|
||||||
# If they old and new groups are the same then we don't need to do
|
# If they old and new groups are the same then we don't need to do
|
||||||
# anything.
|
# anything.
|
||||||
if old_state_groups == new_state_groups:
|
if old_state_groups == new_state_groups:
|
||||||
return
|
defer.returnValue((None, None))
|
||||||
|
|
||||||
|
if len(new_state_groups) == 1 and len(old_state_groups) == 1:
|
||||||
|
# If we're going from one state group to another, lets check if
|
||||||
|
# we have a delta for that transition. If we do then we can just
|
||||||
|
# return that.
|
||||||
|
|
||||||
|
new_state_group = next(iter(new_state_groups))
|
||||||
|
old_state_group = next(iter(old_state_groups))
|
||||||
|
|
||||||
|
delta_ids = state_group_deltas.get(
|
||||||
|
(old_state_group, new_state_group,), None
|
||||||
|
)
|
||||||
|
if delta_ids is not None:
|
||||||
|
# We have a delta from the existing to new current state,
|
||||||
|
# so lets just return that. If we happen to already have
|
||||||
|
# the current state in memory then lets also return that,
|
||||||
|
# but it doesn't matter if we don't.
|
||||||
|
new_state = state_groups_map.get(new_state_group)
|
||||||
|
defer.returnValue((new_state, delta_ids))
|
||||||
|
|
||||||
# Now that we have calculated new_state_groups we need to get
|
# Now that we have calculated new_state_groups we need to get
|
||||||
# their state IDs so we can resolve to a single state set.
|
# their state IDs so we can resolve to a single state set.
|
||||||
|
@ -619,7 +666,7 @@ class EventsStore(EventsWorkerStore):
|
||||||
if len(new_state_groups) == 1:
|
if len(new_state_groups) == 1:
|
||||||
# If there is only one state group, then we know what the current
|
# If there is only one state group, then we know what the current
|
||||||
# state is.
|
# state is.
|
||||||
defer.returnValue(state_groups_map[new_state_groups.pop()])
|
defer.returnValue((state_groups_map[new_state_groups.pop()], None))
|
||||||
|
|
||||||
# Ok, we need to defer to the state handler to resolve our state sets.
|
# Ok, we need to defer to the state handler to resolve our state sets.
|
||||||
|
|
||||||
|
@ -638,7 +685,7 @@ class EventsStore(EventsWorkerStore):
|
||||||
room_id, state_groups, events_map, get_events
|
room_id, state_groups, events_map, get_events
|
||||||
)
|
)
|
||||||
|
|
||||||
defer.returnValue(res.state)
|
defer.returnValue((res.state, None))
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def _calculate_state_delta(self, room_id, current_state):
|
def _calculate_state_delta(self, room_id, current_state):
|
||||||
|
@ -647,17 +694,16 @@ class EventsStore(EventsWorkerStore):
|
||||||
Assumes that we are only persisting events for one room at a time.
|
Assumes that we are only persisting events for one room at a time.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
2-tuple (to_delete, to_insert) where both are state dicts,
|
tuple[list, dict] (to_delete, to_insert): where to_delete are the
|
||||||
i.e. (type, state_key) -> event_id. `to_delete` are the entries to
|
type/state_keys to remove from current_state_events and `to_insert`
|
||||||
first be deleted from current_state_events, `to_insert` are entries
|
are the updates to current_state_events.
|
||||||
to insert.
|
|
||||||
"""
|
"""
|
||||||
existing_state = yield self.get_current_state_ids(room_id)
|
existing_state = yield self.get_current_state_ids(room_id)
|
||||||
|
|
||||||
to_delete = {
|
to_delete = [
|
||||||
key: ev_id for key, ev_id in iteritems(existing_state)
|
key for key in existing_state
|
||||||
if ev_id != current_state.get(key)
|
if key not in current_state
|
||||||
}
|
]
|
||||||
|
|
||||||
to_insert = {
|
to_insert = {
|
||||||
key: ev_id for key, ev_id in iteritems(current_state)
|
key: ev_id for key, ev_id in iteritems(current_state)
|
||||||
|
@ -684,10 +730,10 @@ class EventsStore(EventsWorkerStore):
|
||||||
delete_existing (bool): True to purge existing table rows for the
|
delete_existing (bool): True to purge existing table rows for the
|
||||||
events from the database. This is useful when retrying due to
|
events from the database. This is useful when retrying due to
|
||||||
IntegrityError.
|
IntegrityError.
|
||||||
state_delta_for_room (dict[str, (list[str], list[str])]):
|
state_delta_for_room (dict[str, (list, dict)]):
|
||||||
The current-state delta for each room. For each room, a tuple
|
The current-state delta for each room. For each room, a tuple
|
||||||
(to_delete, to_insert), being a list of event ids to be removed
|
(to_delete, to_insert), being a list of type/state keys to be
|
||||||
from the current state, and a list of event ids to be added to
|
removed from the current state, and a state set to be added to
|
||||||
the current state.
|
the current state.
|
||||||
new_forward_extremeties (dict[str, list[str]]):
|
new_forward_extremeties (dict[str, list[str]]):
|
||||||
The new forward extremities for each room. For each room, a
|
The new forward extremities for each room. For each room, a
|
||||||
|
@ -765,9 +811,46 @@ class EventsStore(EventsWorkerStore):
|
||||||
def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order):
|
def _update_current_state_txn(self, txn, state_delta_by_room, max_stream_order):
|
||||||
for room_id, current_state_tuple in iteritems(state_delta_by_room):
|
for room_id, current_state_tuple in iteritems(state_delta_by_room):
|
||||||
to_delete, to_insert = current_state_tuple
|
to_delete, to_insert = current_state_tuple
|
||||||
|
|
||||||
|
# First we add entries to the current_state_delta_stream. We
|
||||||
|
# do this before updating the current_state_events table so
|
||||||
|
# that we can use it to calculate the `prev_event_id`. (This
|
||||||
|
# allows us to not have to pull out the existing state
|
||||||
|
# unnecessarily).
|
||||||
|
sql = """
|
||||||
|
INSERT INTO current_state_delta_stream
|
||||||
|
(stream_id, room_id, type, state_key, event_id, prev_event_id)
|
||||||
|
SELECT ?, ?, ?, ?, ?, (
|
||||||
|
SELECT event_id FROM current_state_events
|
||||||
|
WHERE room_id = ? AND type = ? AND state_key = ?
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
txn.executemany(sql, (
|
||||||
|
(
|
||||||
|
max_stream_order, room_id, etype, state_key, None,
|
||||||
|
room_id, etype, state_key,
|
||||||
|
)
|
||||||
|
for etype, state_key in to_delete
|
||||||
|
# We sanity check that we're deleting rather than updating
|
||||||
|
if (etype, state_key) not in to_insert
|
||||||
|
))
|
||||||
|
txn.executemany(sql, (
|
||||||
|
(
|
||||||
|
max_stream_order, room_id, etype, state_key, ev_id,
|
||||||
|
room_id, etype, state_key,
|
||||||
|
)
|
||||||
|
for (etype, state_key), ev_id in iteritems(to_insert)
|
||||||
|
))
|
||||||
|
|
||||||
|
# Now we actually update the current_state_events table
|
||||||
|
|
||||||
txn.executemany(
|
txn.executemany(
|
||||||
"DELETE FROM current_state_events WHERE event_id = ?",
|
"DELETE FROM current_state_events"
|
||||||
[(ev_id,) for ev_id in itervalues(to_delete)],
|
" WHERE room_id = ? AND type = ? AND state_key = ?",
|
||||||
|
(
|
||||||
|
(room_id, etype, state_key)
|
||||||
|
for etype, state_key in itertools.chain(to_delete, to_insert)
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._simple_insert_many_txn(
|
self._simple_insert_many_txn(
|
||||||
|
@ -784,25 +867,6 @@ class EventsStore(EventsWorkerStore):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
state_deltas = {key: None for key in to_delete}
|
|
||||||
state_deltas.update(to_insert)
|
|
||||||
|
|
||||||
self._simple_insert_many_txn(
|
|
||||||
txn,
|
|
||||||
table="current_state_delta_stream",
|
|
||||||
values=[
|
|
||||||
{
|
|
||||||
"stream_id": max_stream_order,
|
|
||||||
"room_id": room_id,
|
|
||||||
"type": key[0],
|
|
||||||
"state_key": key[1],
|
|
||||||
"event_id": ev_id,
|
|
||||||
"prev_event_id": to_delete.get(key, None),
|
|
||||||
}
|
|
||||||
for key, ev_id in iteritems(state_deltas)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
txn.call_after(
|
txn.call_after(
|
||||||
self._curr_state_delta_stream_cache.entity_has_changed,
|
self._curr_state_delta_stream_cache.entity_has_changed,
|
||||||
room_id, max_stream_order,
|
room_id, max_stream_order,
|
||||||
|
@ -816,7 +880,8 @@ class EventsStore(EventsWorkerStore):
|
||||||
# and which we have added, then we invlidate the caches for all
|
# and which we have added, then we invlidate the caches for all
|
||||||
# those users.
|
# those users.
|
||||||
members_changed = set(
|
members_changed = set(
|
||||||
state_key for ev_type, state_key in state_deltas
|
state_key
|
||||||
|
for ev_type, state_key in itertools.chain(to_delete, to_insert)
|
||||||
if ev_type == EventTypes.Member
|
if ev_type == EventTypes.Member
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue