Use dictionary cache to do group -> state fetching

This commit is contained in:
Erik Johnston 2015-08-05 15:06:51 +01:00
parent c67ba143fa
commit 07507643cb
7 changed files with 195 additions and 110 deletions

View file

@ -507,7 +507,7 @@ class FederationHandler(BaseHandler):
event_ids = list(extremities.keys()) event_ids = list(extremities.keys())
states = yield defer.gatherResults([ states = yield defer.gatherResults([
self.state_handler.resolve_state_groups([e]) self.state_handler.resolve_state_groups(room_id, [e])
for e in event_ids for e in event_ids
]) ])
states = dict(zip(event_ids, [s[1] for s in states])) states = dict(zip(event_ids, [s[1] for s in states]))

View file

@ -96,7 +96,7 @@ class StateHandler(object):
cache.ts = self.clock.time_msec() cache.ts = self.clock.time_msec()
state = cache.state state = cache.state
else: else:
res = yield self.resolve_state_groups(event_ids) res = yield self.resolve_state_groups(room_id, event_ids)
state = res[1] state = res[1]
if event_type: if event_type:
@ -155,13 +155,13 @@ class StateHandler(object):
if event.is_state(): if event.is_state():
ret = yield self.resolve_state_groups( ret = yield self.resolve_state_groups(
[e for e, _ in event.prev_events], event.room_id, [e for e, _ in event.prev_events],
event_type=event.type, event_type=event.type,
state_key=event.state_key, state_key=event.state_key,
) )
else: else:
ret = yield self.resolve_state_groups( ret = yield self.resolve_state_groups(
[e for e, _ in event.prev_events], event.room_id, [e for e, _ in event.prev_events],
) )
group, curr_state, prev_state = ret group, curr_state, prev_state = ret
@ -180,7 +180,7 @@ class StateHandler(object):
@defer.inlineCallbacks @defer.inlineCallbacks
@log_function @log_function
def resolve_state_groups(self, event_ids, event_type=None, state_key=""): def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""):
""" Given a list of event_ids this method fetches the state at each """ Given a list of event_ids this method fetches the state at each
event, resolves conflicts between them and returns them. event, resolves conflicts between them and returns them.
@ -205,7 +205,7 @@ class StateHandler(object):
) )
state_groups = yield self.store.get_state_groups( state_groups = yield self.store.get_state_groups(
event_ids room_id, event_ids
) )
logger.debug( logger.debug(

View file

@ -18,6 +18,7 @@ from synapse.api.errors import StoreError
from synapse.util.logutils import log_function from synapse.util.logutils import log_function
from synapse.util.logcontext import preserve_context_over_fn, LoggingContext from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
from synapse.util.lrucache import LruCache from synapse.util.lrucache import LruCache
from synapse.util.dictionary_cache import DictionaryCache
import synapse.metrics import synapse.metrics
from util.id_generators import IdGenerator, StreamIdGenerator from util.id_generators import IdGenerator, StreamIdGenerator
@ -87,23 +88,33 @@ class Cache(object):
) )
def get(self, *keyargs): def get(self, *keyargs):
if len(keyargs) != self.keylen: try:
raise ValueError("Expected a key to have %d items", self.keylen) if len(keyargs) != self.keylen:
raise ValueError("Expected a key to have %d items", self.keylen)
val = self.cache.get(keyargs, self.sentinel) val = self.cache.get(keyargs, self.sentinel)
if val is not self.sentinel: if val is not self.sentinel:
cache_counter.inc_hits(self.name) cache_counter.inc_hits(self.name)
return val return val
cache_counter.inc_misses(self.name) cache_counter.inc_misses(self.name)
raise KeyError() raise KeyError()
except KeyError:
raise
except:
logger.exception("Cache.get failed for %s" % (self.name,))
raise
def update(self, sequence, *args): def update(self, sequence, *args):
self.check_thread() try:
if self.sequence == sequence: self.check_thread()
# Only update the cache if the caches sequence number matches the if self.sequence == sequence:
# number that the cache had before the SELECT was started (SYN-369) # Only update the cache if the caches sequence number matches the
self.prefill(*args) # number that the cache had before the SELECT was started (SYN-369)
self.prefill(*args)
except:
logger.exception("Cache.update failed for %s" % (self.name,))
raise
def prefill(self, *args): # because I can't *keyargs, value def prefill(self, *args): # because I can't *keyargs, value
keyargs = args[:-1] keyargs = args[:-1]
@ -327,6 +338,8 @@ class SQLBaseStore(object):
self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True, self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
max_entries=hs.config.event_cache_size) max_entries=hs.config.event_cache_size)
self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000)
self._event_fetch_lock = threading.Condition() self._event_fetch_lock = threading.Condition()
self._event_fetch_list = [] self._event_fetch_list = []
self._event_fetch_ongoing = 0 self._event_fetch_ongoing = 0

View file

@ -45,52 +45,38 @@ class StateStore(SQLBaseStore):
""" """
@defer.inlineCallbacks @defer.inlineCallbacks
def get_state_groups(self, event_ids): def get_state_groups(self, room_id, event_ids):
""" Get the state groups for the given list of event_ids """ Get the state groups for the given list of event_ids
The return value is a dict mapping group names to lists of events. The return value is a dict mapping group names to lists of events.
""" """
def f(txn): event_and_groups = yield defer.gatherResults(
groups = set()
for event_id in event_ids:
group = self._simple_select_one_onecol_txn(
txn,
table="event_to_state_groups",
keyvalues={"event_id": event_id},
retcol="state_group",
allow_none=True,
)
if group:
groups.add(group)
res = {}
for group in groups:
state_ids = self._simple_select_onecol_txn(
txn,
table="state_groups_state",
keyvalues={"state_group": group},
retcol="event_id",
)
res[group] = state_ids
return res
states = yield self.runInteraction(
"get_state_groups",
f,
)
state_list = yield defer.gatherResults(
[ [
self._fetch_events_for_group(group, vals) self._get_state_group_for_event(
for group, vals in states.items() room_id, event_id,
).addCallback(lambda group, event_id: (event_id, group), event_id)
for event_id in event_ids
], ],
consumeErrors=True, consumeErrors=True,
) ).addErrback(unwrapFirstError)
defer.returnValue(dict(state_list)) groups = set(group for _, group in event_and_groups if group)
group_to_state = yield defer.gatherResults(
[
self._get_state_for_group(
group,
).addCallback(lambda state_dict, group: (group, state_dict), group)
for group in groups
],
consumeErrors=True,
).addErrback(unwrapFirstError)
defer.returnValue({
group: state_map.values()
for group, state_map in group_to_state
})
@cached(num_args=1) @cached(num_args=1)
def _fetch_events_for_group(self, key, events): def _fetch_events_for_group(self, key, events):
@ -207,16 +193,25 @@ class StateStore(SQLBaseStore):
events = yield self._get_events(event_ids, get_prev_content=False) events = yield self._get_events(event_ids, get_prev_content=False)
defer.returnValue(events) defer.returnValue(events)
@cached(num_args=3, lru=True) @cached(num_args=2, lru=True, max_entries=10000)
def _get_state_groups_from_group(self, room_id, group, types): def _get_state_groups_from_group(self, group, types):
def f(txn): def f(txn):
if types is not None:
where_clause = "AND (%s)" % (
" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
)
else:
where_clause = ""
sql = ( sql = (
"SELECT event_id FROM state_groups_state WHERE" "SELECT event_id FROM state_groups_state WHERE"
" room_id = ? AND state_group = ? AND (%s)" " state_group = ? %s"
) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),) ) % (where_clause,)
args = [group]
if types is not None:
args.extend([i for typ in types for i in typ])
args = [room_id, group]
args.extend([i for typ in types for i in typ])
txn.execute(sql, args) txn.execute(sql, args)
return group, [ return group, [
@ -229,7 +224,7 @@ class StateStore(SQLBaseStore):
f, f,
) )
@cached(num_args=3, lru=True, max_entries=100000) @cached(num_args=3, lru=True, max_entries=20000)
def _get_state_for_event_id(self, room_id, event_id, types): def _get_state_for_event_id(self, room_id, event_id, types):
def f(txn): def f(txn):
type_and_state_sql = " OR ".join([ type_and_state_sql = " OR ".join([
@ -280,40 +275,33 @@ class StateStore(SQLBaseStore):
deferred: A list of dicts corresponding to the event_ids given. deferred: A list of dicts corresponding to the event_ids given.
The dicts are mappings from (type, state_key) -> state_events The dicts are mappings from (type, state_key) -> state_events
""" """
set_types = frozenset(types) event_and_groups = yield defer.gatherResults(
res = yield defer.gatherResults(
[ [
self._get_state_for_event_id( self._get_state_group_for_event(
room_id, event_id, set_types, room_id, event_id,
) ).addCallback(lambda group, event_id: (event_id, group), event_id)
for event_id in event_ids for event_id in event_ids
], ],
consumeErrors=True, consumeErrors=True,
).addErrback(unwrapFirstError) ).addErrback(unwrapFirstError)
event_to_state_ids = dict(res) groups = set(group for _, group in event_and_groups)
event_dict = yield self._get_events( res = yield defer.gatherResults(
[ [
item self._get_state_for_group(
for lst in event_to_state_ids.values() group, types
for item in lst ).addCallback(lambda state_dict, group: (group, state_dict), group)
for group in groups
], ],
get_prev_content=False consumeErrors=True,
).addCallback( ).addErrback(unwrapFirstError)
lambda evs: {ev.event_id: ev for ev in evs}
) group_to_state = dict(res)
event_to_state = { event_to_state = {
event_id: { event_id: group_to_state[group]
(ev.type, ev.state_key): ev for event_id, group in event_and_groups
for ev in [
event_dict[state_id]
for state_id in state_ids
if state_id in event_dict
]
}
for event_id, state_ids in event_to_state_ids.items()
} }
defer.returnValue([ defer.returnValue([
@ -321,6 +309,79 @@ class StateStore(SQLBaseStore):
for event in event_ids for event in event_ids
]) ])
@cached(num_args=2, lru=True, max_entries=100000)
def _get_state_group_for_event(self, room_id, event_id):
return self._simple_select_one_onecol(
table="event_to_state_groups",
keyvalues={
"event_id": event_id,
},
retcol="state_group",
allow_none=True,
desc="_get_state_group_for_event",
)
@defer.inlineCallbacks
def _get_state_for_group(self, group, types=None):
is_all, state_dict = self._state_group_cache.get(group)
type_to_key = {}
missing_types = set()
if types is not None:
for typ, state_key in types:
if state_key is None:
type_to_key[typ] = None
missing_types.add((typ, state_key))
else:
if type_to_key.get(typ, object()) is not None:
type_to_key.setdefault(typ, set()).add(state_key)
if (typ, state_key) not in state_dict:
missing_types.add((typ, state_key))
if is_all and types is None:
defer.returnValue(state_dict)
if is_all or (types is not None and not missing_types):
def include(typ, state_key):
sentinel = object()
valid_state_keys = type_to_key.get(typ, sentinel)
if valid_state_keys is sentinel:
return False
if valid_state_keys is None:
return True
if state_key in valid_state_keys:
return True
return False
defer.returnValue({
k: v
for k, v in state_dict.items()
if include(k[0], k[1])
})
# Okay, so we have some missing_types, lets fetch them.
cache_seq_num = self._state_group_cache.sequence
_, state_ids = yield self._get_state_groups_from_group(
group,
frozenset(types) if types else None
)
state_events = yield self._get_events(state_ids, get_prev_content=False)
state_dict = {
(e.type, e.state_key): e
for e in state_events
}
# Update the cache
self._state_group_cache.update(
cache_seq_num,
key=group,
value=state_dict,
full=(types is None),
)
defer.returnValue(state_dict)
def _make_group_id(clock): def _make_group_id(clock):
return str(int(clock.time_msec())) + random_string(5) return str(int(clock.time_msec())) + random_string(5)

View file

@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore):
defer.returnValue((events, token)) defer.returnValue((events, token))
@defer.inlineCallbacks @defer.inlineCallbacks
def get_recent_events_for_room(self, room_id, limit, end_token, def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None):
with_feedback=False, from_token=None):
# TODO (erikj): Handle compressed feedback # TODO (erikj): Handle compressed feedback
end_token = RoomStreamToken.parse_stream_token(end_token) end_token = RoomStreamToken.parse_stream_token(end_token)

View file

@ -16,6 +16,10 @@
from synapse.util.lrucache import LruCache from synapse.util.lrucache import LruCache
from collections import namedtuple from collections import namedtuple
import threading import threading
import logging
logger = logging.getLogger(__name__)
DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value")) DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value"))
@ -47,21 +51,25 @@ class DictionaryCache(object):
) )
def get(self, key, dict_keys=None): def get(self, key, dict_keys=None):
entry = self.cache.get(key, self.sentinel) try:
if entry is not self.sentinel: entry = self.cache.get(key, self.sentinel)
# cache_counter.inc_hits(self.name) if entry is not self.sentinel:
# cache_counter.inc_hits(self.name)
if dict_keys is None: if dict_keys is None:
return DictionaryEntry(entry.full, dict(entry.value)) return DictionaryEntry(entry.full, dict(entry.value))
else: else:
return DictionaryEntry(entry.full, { return DictionaryEntry(entry.full, {
k: entry.value[k] k: entry.value[k]
for k in dict_keys for k in dict_keys
if k in entry.value if k in entry.value
}) })
# cache_counter.inc_misses(self.name) # cache_counter.inc_misses(self.name)
return DictionaryEntry(False, {}) return DictionaryEntry(False, {})
except:
logger.exception("get failed")
raise
def invalidate(self, key): def invalidate(self, key):
self.check_thread() self.check_thread()
@ -77,14 +85,18 @@ class DictionaryCache(object):
self.cache.clear() self.cache.clear()
def update(self, sequence, key, value, full=False): def update(self, sequence, key, value, full=False):
self.check_thread() try:
if self.sequence == sequence: self.check_thread()
# Only update the cache if the caches sequence number matches the if self.sequence == sequence:
# number that the cache had before the SELECT was started (SYN-369) # Only update the cache if the caches sequence number matches the
if full: # number that the cache had before the SELECT was started (SYN-369)
self._insert(key, value) if full:
else: self._insert(key, value)
self._update_or_insert(key, value) else:
self._update_or_insert(key, value)
except:
logger.exception("update failed")
raise
def _update_or_insert(self, key, value): def _update_or_insert(self, key, value):
entry = self.cache.setdefault(key, DictionaryEntry(False, {})) entry = self.cache.setdefault(key, DictionaryEntry(False, {}))

View file

@ -69,7 +69,7 @@ class StateGroupStore(object):
self._next_group = 1 self._next_group = 1
def get_state_groups(self, event_ids): def get_state_groups(self, room_id, event_ids):
groups = {} groups = {}
for event_id in event_ids: for event_id in event_ids:
group = self._event_to_state_group.get(event_id) group = self._event_to_state_group.get(event_id)