forked from MirrorHub/synapse
Use dictionary cache to do group -> state fetching
This commit is contained in:
parent
c67ba143fa
commit
07507643cb
7 changed files with 195 additions and 110 deletions
|
@ -507,7 +507,7 @@ class FederationHandler(BaseHandler):
|
||||||
event_ids = list(extremities.keys())
|
event_ids = list(extremities.keys())
|
||||||
|
|
||||||
states = yield defer.gatherResults([
|
states = yield defer.gatherResults([
|
||||||
self.state_handler.resolve_state_groups([e])
|
self.state_handler.resolve_state_groups(room_id, [e])
|
||||||
for e in event_ids
|
for e in event_ids
|
||||||
])
|
])
|
||||||
states = dict(zip(event_ids, [s[1] for s in states]))
|
states = dict(zip(event_ids, [s[1] for s in states]))
|
||||||
|
|
|
@ -96,7 +96,7 @@ class StateHandler(object):
|
||||||
cache.ts = self.clock.time_msec()
|
cache.ts = self.clock.time_msec()
|
||||||
state = cache.state
|
state = cache.state
|
||||||
else:
|
else:
|
||||||
res = yield self.resolve_state_groups(event_ids)
|
res = yield self.resolve_state_groups(room_id, event_ids)
|
||||||
state = res[1]
|
state = res[1]
|
||||||
|
|
||||||
if event_type:
|
if event_type:
|
||||||
|
@ -155,13 +155,13 @@ class StateHandler(object):
|
||||||
|
|
||||||
if event.is_state():
|
if event.is_state():
|
||||||
ret = yield self.resolve_state_groups(
|
ret = yield self.resolve_state_groups(
|
||||||
[e for e, _ in event.prev_events],
|
event.room_id, [e for e, _ in event.prev_events],
|
||||||
event_type=event.type,
|
event_type=event.type,
|
||||||
state_key=event.state_key,
|
state_key=event.state_key,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
ret = yield self.resolve_state_groups(
|
ret = yield self.resolve_state_groups(
|
||||||
[e for e, _ in event.prev_events],
|
event.room_id, [e for e, _ in event.prev_events],
|
||||||
)
|
)
|
||||||
|
|
||||||
group, curr_state, prev_state = ret
|
group, curr_state, prev_state = ret
|
||||||
|
@ -180,7 +180,7 @@ class StateHandler(object):
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
@log_function
|
@log_function
|
||||||
def resolve_state_groups(self, event_ids, event_type=None, state_key=""):
|
def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""):
|
||||||
""" Given a list of event_ids this method fetches the state at each
|
""" Given a list of event_ids this method fetches the state at each
|
||||||
event, resolves conflicts between them and returns them.
|
event, resolves conflicts between them and returns them.
|
||||||
|
|
||||||
|
@ -205,7 +205,7 @@ class StateHandler(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
state_groups = yield self.store.get_state_groups(
|
state_groups = yield self.store.get_state_groups(
|
||||||
event_ids
|
room_id, event_ids
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|
|
@ -18,6 +18,7 @@ from synapse.api.errors import StoreError
|
||||||
from synapse.util.logutils import log_function
|
from synapse.util.logutils import log_function
|
||||||
from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
|
from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
|
||||||
from synapse.util.lrucache import LruCache
|
from synapse.util.lrucache import LruCache
|
||||||
|
from synapse.util.dictionary_cache import DictionaryCache
|
||||||
import synapse.metrics
|
import synapse.metrics
|
||||||
|
|
||||||
from util.id_generators import IdGenerator, StreamIdGenerator
|
from util.id_generators import IdGenerator, StreamIdGenerator
|
||||||
|
@ -87,23 +88,33 @@ class Cache(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
def get(self, *keyargs):
|
def get(self, *keyargs):
|
||||||
if len(keyargs) != self.keylen:
|
try:
|
||||||
raise ValueError("Expected a key to have %d items", self.keylen)
|
if len(keyargs) != self.keylen:
|
||||||
|
raise ValueError("Expected a key to have %d items", self.keylen)
|
||||||
|
|
||||||
val = self.cache.get(keyargs, self.sentinel)
|
val = self.cache.get(keyargs, self.sentinel)
|
||||||
if val is not self.sentinel:
|
if val is not self.sentinel:
|
||||||
cache_counter.inc_hits(self.name)
|
cache_counter.inc_hits(self.name)
|
||||||
return val
|
return val
|
||||||
|
|
||||||
cache_counter.inc_misses(self.name)
|
cache_counter.inc_misses(self.name)
|
||||||
raise KeyError()
|
raise KeyError()
|
||||||
|
except KeyError:
|
||||||
|
raise
|
||||||
|
except:
|
||||||
|
logger.exception("Cache.get failed for %s" % (self.name,))
|
||||||
|
raise
|
||||||
|
|
||||||
def update(self, sequence, *args):
|
def update(self, sequence, *args):
|
||||||
self.check_thread()
|
try:
|
||||||
if self.sequence == sequence:
|
self.check_thread()
|
||||||
# Only update the cache if the caches sequence number matches the
|
if self.sequence == sequence:
|
||||||
# number that the cache had before the SELECT was started (SYN-369)
|
# Only update the cache if the caches sequence number matches the
|
||||||
self.prefill(*args)
|
# number that the cache had before the SELECT was started (SYN-369)
|
||||||
|
self.prefill(*args)
|
||||||
|
except:
|
||||||
|
logger.exception("Cache.update failed for %s" % (self.name,))
|
||||||
|
raise
|
||||||
|
|
||||||
def prefill(self, *args): # because I can't *keyargs, value
|
def prefill(self, *args): # because I can't *keyargs, value
|
||||||
keyargs = args[:-1]
|
keyargs = args[:-1]
|
||||||
|
@ -327,6 +338,8 @@ class SQLBaseStore(object):
|
||||||
self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
|
self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
|
||||||
max_entries=hs.config.event_cache_size)
|
max_entries=hs.config.event_cache_size)
|
||||||
|
|
||||||
|
self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000)
|
||||||
|
|
||||||
self._event_fetch_lock = threading.Condition()
|
self._event_fetch_lock = threading.Condition()
|
||||||
self._event_fetch_list = []
|
self._event_fetch_list = []
|
||||||
self._event_fetch_ongoing = 0
|
self._event_fetch_ongoing = 0
|
||||||
|
|
|
@ -45,52 +45,38 @@ class StateStore(SQLBaseStore):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_state_groups(self, event_ids):
|
def get_state_groups(self, room_id, event_ids):
|
||||||
""" Get the state groups for the given list of event_ids
|
""" Get the state groups for the given list of event_ids
|
||||||
|
|
||||||
The return value is a dict mapping group names to lists of events.
|
The return value is a dict mapping group names to lists of events.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def f(txn):
|
event_and_groups = yield defer.gatherResults(
|
||||||
groups = set()
|
|
||||||
for event_id in event_ids:
|
|
||||||
group = self._simple_select_one_onecol_txn(
|
|
||||||
txn,
|
|
||||||
table="event_to_state_groups",
|
|
||||||
keyvalues={"event_id": event_id},
|
|
||||||
retcol="state_group",
|
|
||||||
allow_none=True,
|
|
||||||
)
|
|
||||||
if group:
|
|
||||||
groups.add(group)
|
|
||||||
|
|
||||||
res = {}
|
|
||||||
for group in groups:
|
|
||||||
state_ids = self._simple_select_onecol_txn(
|
|
||||||
txn,
|
|
||||||
table="state_groups_state",
|
|
||||||
keyvalues={"state_group": group},
|
|
||||||
retcol="event_id",
|
|
||||||
)
|
|
||||||
|
|
||||||
res[group] = state_ids
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
states = yield self.runInteraction(
|
|
||||||
"get_state_groups",
|
|
||||||
f,
|
|
||||||
)
|
|
||||||
|
|
||||||
state_list = yield defer.gatherResults(
|
|
||||||
[
|
[
|
||||||
self._fetch_events_for_group(group, vals)
|
self._get_state_group_for_event(
|
||||||
for group, vals in states.items()
|
room_id, event_id,
|
||||||
|
).addCallback(lambda group, event_id: (event_id, group), event_id)
|
||||||
|
for event_id in event_ids
|
||||||
],
|
],
|
||||||
consumeErrors=True,
|
consumeErrors=True,
|
||||||
)
|
).addErrback(unwrapFirstError)
|
||||||
|
|
||||||
defer.returnValue(dict(state_list))
|
groups = set(group for _, group in event_and_groups if group)
|
||||||
|
|
||||||
|
group_to_state = yield defer.gatherResults(
|
||||||
|
[
|
||||||
|
self._get_state_for_group(
|
||||||
|
group,
|
||||||
|
).addCallback(lambda state_dict, group: (group, state_dict), group)
|
||||||
|
for group in groups
|
||||||
|
],
|
||||||
|
consumeErrors=True,
|
||||||
|
).addErrback(unwrapFirstError)
|
||||||
|
|
||||||
|
defer.returnValue({
|
||||||
|
group: state_map.values()
|
||||||
|
for group, state_map in group_to_state
|
||||||
|
})
|
||||||
|
|
||||||
@cached(num_args=1)
|
@cached(num_args=1)
|
||||||
def _fetch_events_for_group(self, key, events):
|
def _fetch_events_for_group(self, key, events):
|
||||||
|
@ -207,16 +193,25 @@ class StateStore(SQLBaseStore):
|
||||||
events = yield self._get_events(event_ids, get_prev_content=False)
|
events = yield self._get_events(event_ids, get_prev_content=False)
|
||||||
defer.returnValue(events)
|
defer.returnValue(events)
|
||||||
|
|
||||||
@cached(num_args=3, lru=True)
|
@cached(num_args=2, lru=True, max_entries=10000)
|
||||||
def _get_state_groups_from_group(self, room_id, group, types):
|
def _get_state_groups_from_group(self, group, types):
|
||||||
def f(txn):
|
def f(txn):
|
||||||
|
if types is not None:
|
||||||
|
where_clause = "AND (%s)" % (
|
||||||
|
" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
where_clause = ""
|
||||||
|
|
||||||
sql = (
|
sql = (
|
||||||
"SELECT event_id FROM state_groups_state WHERE"
|
"SELECT event_id FROM state_groups_state WHERE"
|
||||||
" room_id = ? AND state_group = ? AND (%s)"
|
" state_group = ? %s"
|
||||||
) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),)
|
) % (where_clause,)
|
||||||
|
|
||||||
|
args = [group]
|
||||||
|
if types is not None:
|
||||||
|
args.extend([i for typ in types for i in typ])
|
||||||
|
|
||||||
args = [room_id, group]
|
|
||||||
args.extend([i for typ in types for i in typ])
|
|
||||||
txn.execute(sql, args)
|
txn.execute(sql, args)
|
||||||
|
|
||||||
return group, [
|
return group, [
|
||||||
|
@ -229,7 +224,7 @@ class StateStore(SQLBaseStore):
|
||||||
f,
|
f,
|
||||||
)
|
)
|
||||||
|
|
||||||
@cached(num_args=3, lru=True, max_entries=100000)
|
@cached(num_args=3, lru=True, max_entries=20000)
|
||||||
def _get_state_for_event_id(self, room_id, event_id, types):
|
def _get_state_for_event_id(self, room_id, event_id, types):
|
||||||
def f(txn):
|
def f(txn):
|
||||||
type_and_state_sql = " OR ".join([
|
type_and_state_sql = " OR ".join([
|
||||||
|
@ -280,40 +275,33 @@ class StateStore(SQLBaseStore):
|
||||||
deferred: A list of dicts corresponding to the event_ids given.
|
deferred: A list of dicts corresponding to the event_ids given.
|
||||||
The dicts are mappings from (type, state_key) -> state_events
|
The dicts are mappings from (type, state_key) -> state_events
|
||||||
"""
|
"""
|
||||||
set_types = frozenset(types)
|
event_and_groups = yield defer.gatherResults(
|
||||||
res = yield defer.gatherResults(
|
|
||||||
[
|
[
|
||||||
self._get_state_for_event_id(
|
self._get_state_group_for_event(
|
||||||
room_id, event_id, set_types,
|
room_id, event_id,
|
||||||
)
|
).addCallback(lambda group, event_id: (event_id, group), event_id)
|
||||||
for event_id in event_ids
|
for event_id in event_ids
|
||||||
],
|
],
|
||||||
consumeErrors=True,
|
consumeErrors=True,
|
||||||
).addErrback(unwrapFirstError)
|
).addErrback(unwrapFirstError)
|
||||||
|
|
||||||
event_to_state_ids = dict(res)
|
groups = set(group for _, group in event_and_groups)
|
||||||
|
|
||||||
event_dict = yield self._get_events(
|
res = yield defer.gatherResults(
|
||||||
[
|
[
|
||||||
item
|
self._get_state_for_group(
|
||||||
for lst in event_to_state_ids.values()
|
group, types
|
||||||
for item in lst
|
).addCallback(lambda state_dict, group: (group, state_dict), group)
|
||||||
|
for group in groups
|
||||||
],
|
],
|
||||||
get_prev_content=False
|
consumeErrors=True,
|
||||||
).addCallback(
|
).addErrback(unwrapFirstError)
|
||||||
lambda evs: {ev.event_id: ev for ev in evs}
|
|
||||||
)
|
group_to_state = dict(res)
|
||||||
|
|
||||||
event_to_state = {
|
event_to_state = {
|
||||||
event_id: {
|
event_id: group_to_state[group]
|
||||||
(ev.type, ev.state_key): ev
|
for event_id, group in event_and_groups
|
||||||
for ev in [
|
|
||||||
event_dict[state_id]
|
|
||||||
for state_id in state_ids
|
|
||||||
if state_id in event_dict
|
|
||||||
]
|
|
||||||
}
|
|
||||||
for event_id, state_ids in event_to_state_ids.items()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
defer.returnValue([
|
defer.returnValue([
|
||||||
|
@ -321,6 +309,79 @@ class StateStore(SQLBaseStore):
|
||||||
for event in event_ids
|
for event in event_ids
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@cached(num_args=2, lru=True, max_entries=100000)
|
||||||
|
def _get_state_group_for_event(self, room_id, event_id):
|
||||||
|
return self._simple_select_one_onecol(
|
||||||
|
table="event_to_state_groups",
|
||||||
|
keyvalues={
|
||||||
|
"event_id": event_id,
|
||||||
|
},
|
||||||
|
retcol="state_group",
|
||||||
|
allow_none=True,
|
||||||
|
desc="_get_state_group_for_event",
|
||||||
|
)
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def _get_state_for_group(self, group, types=None):
|
||||||
|
is_all, state_dict = self._state_group_cache.get(group)
|
||||||
|
|
||||||
|
type_to_key = {}
|
||||||
|
missing_types = set()
|
||||||
|
if types is not None:
|
||||||
|
for typ, state_key in types:
|
||||||
|
if state_key is None:
|
||||||
|
type_to_key[typ] = None
|
||||||
|
missing_types.add((typ, state_key))
|
||||||
|
else:
|
||||||
|
if type_to_key.get(typ, object()) is not None:
|
||||||
|
type_to_key.setdefault(typ, set()).add(state_key)
|
||||||
|
|
||||||
|
if (typ, state_key) not in state_dict:
|
||||||
|
missing_types.add((typ, state_key))
|
||||||
|
|
||||||
|
if is_all and types is None:
|
||||||
|
defer.returnValue(state_dict)
|
||||||
|
|
||||||
|
if is_all or (types is not None and not missing_types):
|
||||||
|
def include(typ, state_key):
|
||||||
|
sentinel = object()
|
||||||
|
valid_state_keys = type_to_key.get(typ, sentinel)
|
||||||
|
if valid_state_keys is sentinel:
|
||||||
|
return False
|
||||||
|
if valid_state_keys is None:
|
||||||
|
return True
|
||||||
|
if state_key in valid_state_keys:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
defer.returnValue({
|
||||||
|
k: v
|
||||||
|
for k, v in state_dict.items()
|
||||||
|
if include(k[0], k[1])
|
||||||
|
})
|
||||||
|
|
||||||
|
# Okay, so we have some missing_types, lets fetch them.
|
||||||
|
cache_seq_num = self._state_group_cache.sequence
|
||||||
|
_, state_ids = yield self._get_state_groups_from_group(
|
||||||
|
group,
|
||||||
|
frozenset(types) if types else None
|
||||||
|
)
|
||||||
|
state_events = yield self._get_events(state_ids, get_prev_content=False)
|
||||||
|
state_dict = {
|
||||||
|
(e.type, e.state_key): e
|
||||||
|
for e in state_events
|
||||||
|
}
|
||||||
|
|
||||||
|
# Update the cache
|
||||||
|
self._state_group_cache.update(
|
||||||
|
cache_seq_num,
|
||||||
|
key=group,
|
||||||
|
value=state_dict,
|
||||||
|
full=(types is None),
|
||||||
|
)
|
||||||
|
|
||||||
|
defer.returnValue(state_dict)
|
||||||
|
|
||||||
|
|
||||||
def _make_group_id(clock):
|
def _make_group_id(clock):
|
||||||
return str(int(clock.time_msec())) + random_string(5)
|
return str(int(clock.time_msec())) + random_string(5)
|
||||||
|
|
|
@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore):
|
||||||
defer.returnValue((events, token))
|
defer.returnValue((events, token))
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def get_recent_events_for_room(self, room_id, limit, end_token,
|
def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None):
|
||||||
with_feedback=False, from_token=None):
|
|
||||||
# TODO (erikj): Handle compressed feedback
|
# TODO (erikj): Handle compressed feedback
|
||||||
|
|
||||||
end_token = RoomStreamToken.parse_stream_token(end_token)
|
end_token = RoomStreamToken.parse_stream_token(end_token)
|
||||||
|
|
|
@ -16,6 +16,10 @@
|
||||||
from synapse.util.lrucache import LruCache
|
from synapse.util.lrucache import LruCache
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import threading
|
import threading
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value"))
|
DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value"))
|
||||||
|
@ -47,21 +51,25 @@ class DictionaryCache(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
def get(self, key, dict_keys=None):
|
def get(self, key, dict_keys=None):
|
||||||
entry = self.cache.get(key, self.sentinel)
|
try:
|
||||||
if entry is not self.sentinel:
|
entry = self.cache.get(key, self.sentinel)
|
||||||
# cache_counter.inc_hits(self.name)
|
if entry is not self.sentinel:
|
||||||
|
# cache_counter.inc_hits(self.name)
|
||||||
|
|
||||||
if dict_keys is None:
|
if dict_keys is None:
|
||||||
return DictionaryEntry(entry.full, dict(entry.value))
|
return DictionaryEntry(entry.full, dict(entry.value))
|
||||||
else:
|
else:
|
||||||
return DictionaryEntry(entry.full, {
|
return DictionaryEntry(entry.full, {
|
||||||
k: entry.value[k]
|
k: entry.value[k]
|
||||||
for k in dict_keys
|
for k in dict_keys
|
||||||
if k in entry.value
|
if k in entry.value
|
||||||
})
|
})
|
||||||
|
|
||||||
# cache_counter.inc_misses(self.name)
|
# cache_counter.inc_misses(self.name)
|
||||||
return DictionaryEntry(False, {})
|
return DictionaryEntry(False, {})
|
||||||
|
except:
|
||||||
|
logger.exception("get failed")
|
||||||
|
raise
|
||||||
|
|
||||||
def invalidate(self, key):
|
def invalidate(self, key):
|
||||||
self.check_thread()
|
self.check_thread()
|
||||||
|
@ -77,14 +85,18 @@ class DictionaryCache(object):
|
||||||
self.cache.clear()
|
self.cache.clear()
|
||||||
|
|
||||||
def update(self, sequence, key, value, full=False):
|
def update(self, sequence, key, value, full=False):
|
||||||
self.check_thread()
|
try:
|
||||||
if self.sequence == sequence:
|
self.check_thread()
|
||||||
# Only update the cache if the caches sequence number matches the
|
if self.sequence == sequence:
|
||||||
# number that the cache had before the SELECT was started (SYN-369)
|
# Only update the cache if the caches sequence number matches the
|
||||||
if full:
|
# number that the cache had before the SELECT was started (SYN-369)
|
||||||
self._insert(key, value)
|
if full:
|
||||||
else:
|
self._insert(key, value)
|
||||||
self._update_or_insert(key, value)
|
else:
|
||||||
|
self._update_or_insert(key, value)
|
||||||
|
except:
|
||||||
|
logger.exception("update failed")
|
||||||
|
raise
|
||||||
|
|
||||||
def _update_or_insert(self, key, value):
|
def _update_or_insert(self, key, value):
|
||||||
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
|
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
|
||||||
|
|
|
@ -69,7 +69,7 @@ class StateGroupStore(object):
|
||||||
|
|
||||||
self._next_group = 1
|
self._next_group = 1
|
||||||
|
|
||||||
def get_state_groups(self, event_ids):
|
def get_state_groups(self, room_id, event_ids):
|
||||||
groups = {}
|
groups = {}
|
||||||
for event_id in event_ids:
|
for event_id in event_ids:
|
||||||
group = self._event_to_state_group.get(event_id)
|
group = self._event_to_state_group.get(event_id)
|
||||||
|
|
Loading…
Reference in a new issue