forked from MirrorHub/synapse
Make get_state_groups_from_groups faster.
Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy.
This commit is contained in:
parent
9f430fa07f
commit
bbfe4e996c
3 changed files with 58 additions and 41 deletions
|
@ -563,20 +563,22 @@ class StateStore(SQLBaseStore):
|
|||
where a `state_key` of `None` matches all state_keys for the
|
||||
`type`.
|
||||
"""
|
||||
is_all, state_dict_ids = self._state_group_cache.get(group)
|
||||
is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
|
||||
|
||||
type_to_key = {}
|
||||
missing_types = set()
|
||||
|
||||
for typ, state_key in types:
|
||||
key = (typ, state_key)
|
||||
if state_key is None:
|
||||
type_to_key[typ] = None
|
||||
missing_types.add((typ, state_key))
|
||||
missing_types.add(key)
|
||||
else:
|
||||
if type_to_key.get(typ, object()) is not None:
|
||||
type_to_key.setdefault(typ, set()).add(state_key)
|
||||
|
||||
if (typ, state_key) not in state_dict_ids:
|
||||
missing_types.add((typ, state_key))
|
||||
if key not in state_dict_ids and key not in known_absent:
|
||||
missing_types.add(key)
|
||||
|
||||
sentinel = object()
|
||||
|
||||
|
@ -590,7 +592,7 @@ class StateStore(SQLBaseStore):
|
|||
return True
|
||||
return False
|
||||
|
||||
got_all = not (missing_types or types is None)
|
||||
got_all = is_all or not missing_types
|
||||
|
||||
return {
|
||||
k: v for k, v in state_dict_ids.iteritems()
|
||||
|
@ -607,7 +609,7 @@ class StateStore(SQLBaseStore):
|
|||
Args:
|
||||
group: The state group to lookup
|
||||
"""
|
||||
is_all, state_dict_ids = self._state_group_cache.get(group)
|
||||
is_all, _, state_dict_ids = self._state_group_cache.get(group)
|
||||
|
||||
return state_dict_ids, is_all
|
||||
|
||||
|
@ -624,7 +626,7 @@ class StateStore(SQLBaseStore):
|
|||
missing_groups = []
|
||||
if types is not None:
|
||||
for group in set(groups):
|
||||
state_dict_ids, missing_types, got_all = self._get_some_state_from_cache(
|
||||
state_dict_ids, _, got_all = self._get_some_state_from_cache(
|
||||
group, types
|
||||
)
|
||||
results[group] = state_dict_ids
|
||||
|
@ -653,18 +655,6 @@ class StateStore(SQLBaseStore):
|
|||
# Now we want to update the cache with all the things we fetched
|
||||
# from the database.
|
||||
for group, group_state_dict in group_to_state_dict.iteritems():
|
||||
if types:
|
||||
# We delibrately put key -> None mappings into the cache to
|
||||
# cache absence of the key, on the assumption that if we've
|
||||
# explicitly asked for some types then we will probably ask
|
||||
# for them again.
|
||||
state_dict = {
|
||||
(intern_string(etype), intern_string(state_key)): None
|
||||
for (etype, state_key) in types
|
||||
}
|
||||
state_dict.update(results[group])
|
||||
results[group] = state_dict
|
||||
else:
|
||||
state_dict = results[group]
|
||||
|
||||
state_dict.update(
|
||||
|
@ -677,17 +667,9 @@ class StateStore(SQLBaseStore):
|
|||
key=group,
|
||||
value=state_dict,
|
||||
full=(types is None),
|
||||
known_absent=types,
|
||||
)
|
||||
|
||||
# Remove all the entries with None values. The None values were just
|
||||
# used for bookkeeping in the cache.
|
||||
for group, state_dict in results.iteritems():
|
||||
results[group] = {
|
||||
key: event_id
|
||||
for key, event_id in state_dict.iteritems()
|
||||
if event_id
|
||||
}
|
||||
|
||||
defer.returnValue(results)
|
||||
|
||||
def get_next_state_group(self):
|
||||
|
|
|
@ -23,7 +23,17 @@ import logging
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))):
|
||||
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
|
||||
"""Returned when getting an entry from the cache
|
||||
|
||||
Attributes:
|
||||
full (bool): Whether the cache has the full or dict or just some keys.
|
||||
If not full then not all requested keys will necessarily be present
|
||||
in `value`
|
||||
known_absent (set): Keys that were looked up in the dict and were not
|
||||
there.
|
||||
value (dict): The full or partial dict value
|
||||
"""
|
||||
def __len__(self):
|
||||
return len(self.value)
|
||||
|
||||
|
@ -58,21 +68,31 @@ class DictionaryCache(object):
|
|||
)
|
||||
|
||||
def get(self, key, dict_keys=None):
|
||||
"""Fetch an entry out of the cache
|
||||
|
||||
Args:
|
||||
key
|
||||
dict_key(list): If given a set of keys then return only those keys
|
||||
that exist in the cache.
|
||||
|
||||
Returns:
|
||||
DictionaryEntry
|
||||
"""
|
||||
entry = self.cache.get(key, self.sentinel)
|
||||
if entry is not self.sentinel:
|
||||
self.metrics.inc_hits()
|
||||
|
||||
if dict_keys is None:
|
||||
return DictionaryEntry(entry.full, dict(entry.value))
|
||||
return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
|
||||
else:
|
||||
return DictionaryEntry(entry.full, {
|
||||
return DictionaryEntry(entry.full, entry.known_absent, {
|
||||
k: entry.value[k]
|
||||
for k in dict_keys
|
||||
if k in entry.value
|
||||
})
|
||||
|
||||
self.metrics.inc_misses()
|
||||
return DictionaryEntry(False, {})
|
||||
return DictionaryEntry(False, set(), {})
|
||||
|
||||
def invalidate(self, key):
|
||||
self.check_thread()
|
||||
|
@ -87,19 +107,34 @@ class DictionaryCache(object):
|
|||
self.sequence += 1
|
||||
self.cache.clear()
|
||||
|
||||
def update(self, sequence, key, value, full=False):
|
||||
def update(self, sequence, key, value, full=False, known_absent=None):
|
||||
"""Updates the entry in the cache
|
||||
|
||||
Args:
|
||||
sequence
|
||||
key
|
||||
value (dict): The value to update the cache with.
|
||||
full (bool): Whether the given value is the full dict, or just a
|
||||
partial subset there of. If not full then any existing entries
|
||||
for the key will be updated.
|
||||
known_absent (set): Set of keys that we know don't exist in the full
|
||||
dict.
|
||||
"""
|
||||
self.check_thread()
|
||||
if self.sequence == sequence:
|
||||
# Only update the cache if the caches sequence number matches the
|
||||
# number that the cache had before the SELECT was started (SYN-369)
|
||||
if known_absent is None:
|
||||
known_absent = set()
|
||||
if full:
|
||||
self._insert(key, value)
|
||||
self._insert(key, value, known_absent)
|
||||
else:
|
||||
self._update_or_insert(key, value)
|
||||
self._update_or_insert(key, value, known_absent)
|
||||
|
||||
def _update_or_insert(self, key, value):
|
||||
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
|
||||
def _update_or_insert(self, key, value, known_absent):
|
||||
entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
|
||||
entry.value.update(value)
|
||||
entry.known_absent.update(known_absent)
|
||||
|
||||
def _insert(self, key, value):
|
||||
self.cache[key] = DictionaryEntry(True, value)
|
||||
def _insert(self, key, value, known_absent):
|
||||
self.cache[key] = DictionaryEntry(True, known_absent, value)
|
||||
|
|
|
@ -28,7 +28,7 @@ class DictCacheTestCase(unittest.TestCase):
|
|||
key = "test_simple_cache_hit_full"
|
||||
|
||||
v = self.cache.get(key)
|
||||
self.assertEqual((False, {}), v)
|
||||
self.assertEqual((False, set(), {}), v)
|
||||
|
||||
seq = self.cache.sequence
|
||||
test_value = {"test": "test_simple_cache_hit_full"}
|
||||
|
|
Loading…
Reference in a new issue