forked from MirrorHub/synapse
Use dictionary cache to do group -> state fetching
This commit is contained in:
parent
c67ba143fa
commit
07507643cb
7 changed files with 195 additions and 110 deletions
|
@ -507,7 +507,7 @@ class FederationHandler(BaseHandler):
|
|||
event_ids = list(extremities.keys())
|
||||
|
||||
states = yield defer.gatherResults([
|
||||
self.state_handler.resolve_state_groups([e])
|
||||
self.state_handler.resolve_state_groups(room_id, [e])
|
||||
for e in event_ids
|
||||
])
|
||||
states = dict(zip(event_ids, [s[1] for s in states]))
|
||||
|
|
|
@ -96,7 +96,7 @@ class StateHandler(object):
|
|||
cache.ts = self.clock.time_msec()
|
||||
state = cache.state
|
||||
else:
|
||||
res = yield self.resolve_state_groups(event_ids)
|
||||
res = yield self.resolve_state_groups(room_id, event_ids)
|
||||
state = res[1]
|
||||
|
||||
if event_type:
|
||||
|
@ -155,13 +155,13 @@ class StateHandler(object):
|
|||
|
||||
if event.is_state():
|
||||
ret = yield self.resolve_state_groups(
|
||||
[e for e, _ in event.prev_events],
|
||||
event.room_id, [e for e, _ in event.prev_events],
|
||||
event_type=event.type,
|
||||
state_key=event.state_key,
|
||||
)
|
||||
else:
|
||||
ret = yield self.resolve_state_groups(
|
||||
[e for e, _ in event.prev_events],
|
||||
event.room_id, [e for e, _ in event.prev_events],
|
||||
)
|
||||
|
||||
group, curr_state, prev_state = ret
|
||||
|
@ -180,7 +180,7 @@ class StateHandler(object):
|
|||
|
||||
@defer.inlineCallbacks
|
||||
@log_function
|
||||
def resolve_state_groups(self, event_ids, event_type=None, state_key=""):
|
||||
def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""):
|
||||
""" Given a list of event_ids this method fetches the state at each
|
||||
event, resolves conflicts between them and returns them.
|
||||
|
||||
|
@ -205,7 +205,7 @@ class StateHandler(object):
|
|||
)
|
||||
|
||||
state_groups = yield self.store.get_state_groups(
|
||||
event_ids
|
||||
room_id, event_ids
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
|
|
|
@ -18,6 +18,7 @@ from synapse.api.errors import StoreError
|
|||
from synapse.util.logutils import log_function
|
||||
from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
|
||||
from synapse.util.lrucache import LruCache
|
||||
from synapse.util.dictionary_cache import DictionaryCache
|
||||
import synapse.metrics
|
||||
|
||||
from util.id_generators import IdGenerator, StreamIdGenerator
|
||||
|
@ -87,23 +88,33 @@ class Cache(object):
|
|||
)
|
||||
|
||||
def get(self, *keyargs):
|
||||
if len(keyargs) != self.keylen:
|
||||
raise ValueError("Expected a key to have %d items", self.keylen)
|
||||
try:
|
||||
if len(keyargs) != self.keylen:
|
||||
raise ValueError("Expected a key to have %d items", self.keylen)
|
||||
|
||||
val = self.cache.get(keyargs, self.sentinel)
|
||||
if val is not self.sentinel:
|
||||
cache_counter.inc_hits(self.name)
|
||||
return val
|
||||
val = self.cache.get(keyargs, self.sentinel)
|
||||
if val is not self.sentinel:
|
||||
cache_counter.inc_hits(self.name)
|
||||
return val
|
||||
|
||||
cache_counter.inc_misses(self.name)
|
||||
raise KeyError()
|
||||
cache_counter.inc_misses(self.name)
|
||||
raise KeyError()
|
||||
except KeyError:
|
||||
raise
|
||||
except:
|
||||
logger.exception("Cache.get failed for %s" % (self.name,))
|
||||
raise
|
||||
|
||||
def update(self, sequence, *args):
|
||||
self.check_thread()
|
||||
if self.sequence == sequence:
|
||||
# Only update the cache if the caches sequence number matches the
|
||||
# number that the cache had before the SELECT was started (SYN-369)
|
||||
self.prefill(*args)
|
||||
try:
|
||||
self.check_thread()
|
||||
if self.sequence == sequence:
|
||||
# Only update the cache if the caches sequence number matches the
|
||||
# number that the cache had before the SELECT was started (SYN-369)
|
||||
self.prefill(*args)
|
||||
except:
|
||||
logger.exception("Cache.update failed for %s" % (self.name,))
|
||||
raise
|
||||
|
||||
def prefill(self, *args): # because I can't *keyargs, value
|
||||
keyargs = args[:-1]
|
||||
|
@ -327,6 +338,8 @@ class SQLBaseStore(object):
|
|||
self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
|
||||
max_entries=hs.config.event_cache_size)
|
||||
|
||||
self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000)
|
||||
|
||||
self._event_fetch_lock = threading.Condition()
|
||||
self._event_fetch_list = []
|
||||
self._event_fetch_ongoing = 0
|
||||
|
|
|
@ -45,52 +45,38 @@ class StateStore(SQLBaseStore):
|
|||
"""
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def get_state_groups(self, event_ids):
|
||||
def get_state_groups(self, room_id, event_ids):
|
||||
""" Get the state groups for the given list of event_ids
|
||||
|
||||
The return value is a dict mapping group names to lists of events.
|
||||
"""
|
||||
|
||||
def f(txn):
|
||||
groups = set()
|
||||
for event_id in event_ids:
|
||||
group = self._simple_select_one_onecol_txn(
|
||||
txn,
|
||||
table="event_to_state_groups",
|
||||
keyvalues={"event_id": event_id},
|
||||
retcol="state_group",
|
||||
allow_none=True,
|
||||
)
|
||||
if group:
|
||||
groups.add(group)
|
||||
|
||||
res = {}
|
||||
for group in groups:
|
||||
state_ids = self._simple_select_onecol_txn(
|
||||
txn,
|
||||
table="state_groups_state",
|
||||
keyvalues={"state_group": group},
|
||||
retcol="event_id",
|
||||
)
|
||||
|
||||
res[group] = state_ids
|
||||
|
||||
return res
|
||||
|
||||
states = yield self.runInteraction(
|
||||
"get_state_groups",
|
||||
f,
|
||||
)
|
||||
|
||||
state_list = yield defer.gatherResults(
|
||||
event_and_groups = yield defer.gatherResults(
|
||||
[
|
||||
self._fetch_events_for_group(group, vals)
|
||||
for group, vals in states.items()
|
||||
self._get_state_group_for_event(
|
||||
room_id, event_id,
|
||||
).addCallback(lambda group, event_id: (event_id, group), event_id)
|
||||
for event_id in event_ids
|
||||
],
|
||||
consumeErrors=True,
|
||||
)
|
||||
).addErrback(unwrapFirstError)
|
||||
|
||||
defer.returnValue(dict(state_list))
|
||||
groups = set(group for _, group in event_and_groups if group)
|
||||
|
||||
group_to_state = yield defer.gatherResults(
|
||||
[
|
||||
self._get_state_for_group(
|
||||
group,
|
||||
).addCallback(lambda state_dict, group: (group, state_dict), group)
|
||||
for group in groups
|
||||
],
|
||||
consumeErrors=True,
|
||||
).addErrback(unwrapFirstError)
|
||||
|
||||
defer.returnValue({
|
||||
group: state_map.values()
|
||||
for group, state_map in group_to_state
|
||||
})
|
||||
|
||||
@cached(num_args=1)
|
||||
def _fetch_events_for_group(self, key, events):
|
||||
|
@ -207,16 +193,25 @@ class StateStore(SQLBaseStore):
|
|||
events = yield self._get_events(event_ids, get_prev_content=False)
|
||||
defer.returnValue(events)
|
||||
|
||||
@cached(num_args=3, lru=True)
|
||||
def _get_state_groups_from_group(self, room_id, group, types):
|
||||
@cached(num_args=2, lru=True, max_entries=10000)
|
||||
def _get_state_groups_from_group(self, group, types):
|
||||
def f(txn):
|
||||
if types is not None:
|
||||
where_clause = "AND (%s)" % (
|
||||
" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
|
||||
)
|
||||
else:
|
||||
where_clause = ""
|
||||
|
||||
sql = (
|
||||
"SELECT event_id FROM state_groups_state WHERE"
|
||||
" room_id = ? AND state_group = ? AND (%s)"
|
||||
) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),)
|
||||
" state_group = ? %s"
|
||||
) % (where_clause,)
|
||||
|
||||
args = [group]
|
||||
if types is not None:
|
||||
args.extend([i for typ in types for i in typ])
|
||||
|
||||
args = [room_id, group]
|
||||
args.extend([i for typ in types for i in typ])
|
||||
txn.execute(sql, args)
|
||||
|
||||
return group, [
|
||||
|
@ -229,7 +224,7 @@ class StateStore(SQLBaseStore):
|
|||
f,
|
||||
)
|
||||
|
||||
@cached(num_args=3, lru=True, max_entries=100000)
|
||||
@cached(num_args=3, lru=True, max_entries=20000)
|
||||
def _get_state_for_event_id(self, room_id, event_id, types):
|
||||
def f(txn):
|
||||
type_and_state_sql = " OR ".join([
|
||||
|
@ -280,40 +275,33 @@ class StateStore(SQLBaseStore):
|
|||
deferred: A list of dicts corresponding to the event_ids given.
|
||||
The dicts are mappings from (type, state_key) -> state_events
|
||||
"""
|
||||
set_types = frozenset(types)
|
||||
res = yield defer.gatherResults(
|
||||
event_and_groups = yield defer.gatherResults(
|
||||
[
|
||||
self._get_state_for_event_id(
|
||||
room_id, event_id, set_types,
|
||||
)
|
||||
self._get_state_group_for_event(
|
||||
room_id, event_id,
|
||||
).addCallback(lambda group, event_id: (event_id, group), event_id)
|
||||
for event_id in event_ids
|
||||
],
|
||||
consumeErrors=True,
|
||||
).addErrback(unwrapFirstError)
|
||||
|
||||
event_to_state_ids = dict(res)
|
||||
groups = set(group for _, group in event_and_groups)
|
||||
|
||||
event_dict = yield self._get_events(
|
||||
res = yield defer.gatherResults(
|
||||
[
|
||||
item
|
||||
for lst in event_to_state_ids.values()
|
||||
for item in lst
|
||||
self._get_state_for_group(
|
||||
group, types
|
||||
).addCallback(lambda state_dict, group: (group, state_dict), group)
|
||||
for group in groups
|
||||
],
|
||||
get_prev_content=False
|
||||
).addCallback(
|
||||
lambda evs: {ev.event_id: ev for ev in evs}
|
||||
)
|
||||
consumeErrors=True,
|
||||
).addErrback(unwrapFirstError)
|
||||
|
||||
group_to_state = dict(res)
|
||||
|
||||
event_to_state = {
|
||||
event_id: {
|
||||
(ev.type, ev.state_key): ev
|
||||
for ev in [
|
||||
event_dict[state_id]
|
||||
for state_id in state_ids
|
||||
if state_id in event_dict
|
||||
]
|
||||
}
|
||||
for event_id, state_ids in event_to_state_ids.items()
|
||||
event_id: group_to_state[group]
|
||||
for event_id, group in event_and_groups
|
||||
}
|
||||
|
||||
defer.returnValue([
|
||||
|
@ -321,6 +309,79 @@ class StateStore(SQLBaseStore):
|
|||
for event in event_ids
|
||||
])
|
||||
|
||||
@cached(num_args=2, lru=True, max_entries=100000)
|
||||
def _get_state_group_for_event(self, room_id, event_id):
|
||||
return self._simple_select_one_onecol(
|
||||
table="event_to_state_groups",
|
||||
keyvalues={
|
||||
"event_id": event_id,
|
||||
},
|
||||
retcol="state_group",
|
||||
allow_none=True,
|
||||
desc="_get_state_group_for_event",
|
||||
)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def _get_state_for_group(self, group, types=None):
|
||||
is_all, state_dict = self._state_group_cache.get(group)
|
||||
|
||||
type_to_key = {}
|
||||
missing_types = set()
|
||||
if types is not None:
|
||||
for typ, state_key in types:
|
||||
if state_key is None:
|
||||
type_to_key[typ] = None
|
||||
missing_types.add((typ, state_key))
|
||||
else:
|
||||
if type_to_key.get(typ, object()) is not None:
|
||||
type_to_key.setdefault(typ, set()).add(state_key)
|
||||
|
||||
if (typ, state_key) not in state_dict:
|
||||
missing_types.add((typ, state_key))
|
||||
|
||||
if is_all and types is None:
|
||||
defer.returnValue(state_dict)
|
||||
|
||||
if is_all or (types is not None and not missing_types):
|
||||
def include(typ, state_key):
|
||||
sentinel = object()
|
||||
valid_state_keys = type_to_key.get(typ, sentinel)
|
||||
if valid_state_keys is sentinel:
|
||||
return False
|
||||
if valid_state_keys is None:
|
||||
return True
|
||||
if state_key in valid_state_keys:
|
||||
return True
|
||||
return False
|
||||
|
||||
defer.returnValue({
|
||||
k: v
|
||||
for k, v in state_dict.items()
|
||||
if include(k[0], k[1])
|
||||
})
|
||||
|
||||
# Okay, so we have some missing_types, lets fetch them.
|
||||
cache_seq_num = self._state_group_cache.sequence
|
||||
_, state_ids = yield self._get_state_groups_from_group(
|
||||
group,
|
||||
frozenset(types) if types else None
|
||||
)
|
||||
state_events = yield self._get_events(state_ids, get_prev_content=False)
|
||||
state_dict = {
|
||||
(e.type, e.state_key): e
|
||||
for e in state_events
|
||||
}
|
||||
|
||||
# Update the cache
|
||||
self._state_group_cache.update(
|
||||
cache_seq_num,
|
||||
key=group,
|
||||
value=state_dict,
|
||||
full=(types is None),
|
||||
)
|
||||
|
||||
defer.returnValue(state_dict)
|
||||
|
||||
|
||||
def _make_group_id(clock):
|
||||
return str(int(clock.time_msec())) + random_string(5)
|
||||
|
|
|
@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore):
|
|||
defer.returnValue((events, token))
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def get_recent_events_for_room(self, room_id, limit, end_token,
|
||||
with_feedback=False, from_token=None):
|
||||
def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None):
|
||||
# TODO (erikj): Handle compressed feedback
|
||||
|
||||
end_token = RoomStreamToken.parse_stream_token(end_token)
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
from synapse.util.lrucache import LruCache
|
||||
from collections import namedtuple
|
||||
import threading
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value"))
|
||||
|
@ -47,21 +51,25 @@ class DictionaryCache(object):
|
|||
)
|
||||
|
||||
def get(self, key, dict_keys=None):
|
||||
entry = self.cache.get(key, self.sentinel)
|
||||
if entry is not self.sentinel:
|
||||
# cache_counter.inc_hits(self.name)
|
||||
try:
|
||||
entry = self.cache.get(key, self.sentinel)
|
||||
if entry is not self.sentinel:
|
||||
# cache_counter.inc_hits(self.name)
|
||||
|
||||
if dict_keys is None:
|
||||
return DictionaryEntry(entry.full, dict(entry.value))
|
||||
else:
|
||||
return DictionaryEntry(entry.full, {
|
||||
k: entry.value[k]
|
||||
for k in dict_keys
|
||||
if k in entry.value
|
||||
})
|
||||
if dict_keys is None:
|
||||
return DictionaryEntry(entry.full, dict(entry.value))
|
||||
else:
|
||||
return DictionaryEntry(entry.full, {
|
||||
k: entry.value[k]
|
||||
for k in dict_keys
|
||||
if k in entry.value
|
||||
})
|
||||
|
||||
# cache_counter.inc_misses(self.name)
|
||||
return DictionaryEntry(False, {})
|
||||
# cache_counter.inc_misses(self.name)
|
||||
return DictionaryEntry(False, {})
|
||||
except:
|
||||
logger.exception("get failed")
|
||||
raise
|
||||
|
||||
def invalidate(self, key):
|
||||
self.check_thread()
|
||||
|
@ -77,14 +85,18 @@ class DictionaryCache(object):
|
|||
self.cache.clear()
|
||||
|
||||
def update(self, sequence, key, value, full=False):
|
||||
self.check_thread()
|
||||
if self.sequence == sequence:
|
||||
# Only update the cache if the caches sequence number matches the
|
||||
# number that the cache had before the SELECT was started (SYN-369)
|
||||
if full:
|
||||
self._insert(key, value)
|
||||
else:
|
||||
self._update_or_insert(key, value)
|
||||
try:
|
||||
self.check_thread()
|
||||
if self.sequence == sequence:
|
||||
# Only update the cache if the caches sequence number matches the
|
||||
# number that the cache had before the SELECT was started (SYN-369)
|
||||
if full:
|
||||
self._insert(key, value)
|
||||
else:
|
||||
self._update_or_insert(key, value)
|
||||
except:
|
||||
logger.exception("update failed")
|
||||
raise
|
||||
|
||||
def _update_or_insert(self, key, value):
|
||||
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
|
||||
|
|
|
@ -69,7 +69,7 @@ class StateGroupStore(object):
|
|||
|
||||
self._next_group = 1
|
||||
|
||||
def get_state_groups(self, event_ids):
|
||||
def get_state_groups(self, room_id, event_ids):
|
||||
groups = {}
|
||||
for event_id in event_ids:
|
||||
group = self._event_to_state_group.get(event_id)
|
||||
|
|
Loading…
Reference in a new issue