Merge pull request #2230 from matrix-org/erikj/speed_up_get_state
Make get_state_groups_from_groups faster.
This commit is contained in:
commit
d9e3a4b5db
|
@ -563,20 +563,22 @@ class StateStore(SQLBaseStore):
|
||||||
where a `state_key` of `None` matches all state_keys for the
|
where a `state_key` of `None` matches all state_keys for the
|
||||||
`type`.
|
`type`.
|
||||||
"""
|
"""
|
||||||
is_all, state_dict_ids = self._state_group_cache.get(group)
|
is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
|
||||||
|
|
||||||
type_to_key = {}
|
type_to_key = {}
|
||||||
missing_types = set()
|
missing_types = set()
|
||||||
|
|
||||||
for typ, state_key in types:
|
for typ, state_key in types:
|
||||||
|
key = (typ, state_key)
|
||||||
if state_key is None:
|
if state_key is None:
|
||||||
type_to_key[typ] = None
|
type_to_key[typ] = None
|
||||||
missing_types.add((typ, state_key))
|
missing_types.add(key)
|
||||||
else:
|
else:
|
||||||
if type_to_key.get(typ, object()) is not None:
|
if type_to_key.get(typ, object()) is not None:
|
||||||
type_to_key.setdefault(typ, set()).add(state_key)
|
type_to_key.setdefault(typ, set()).add(state_key)
|
||||||
|
|
||||||
if (typ, state_key) not in state_dict_ids:
|
if key not in state_dict_ids and key not in known_absent:
|
||||||
missing_types.add((typ, state_key))
|
missing_types.add(key)
|
||||||
|
|
||||||
sentinel = object()
|
sentinel = object()
|
||||||
|
|
||||||
|
@ -590,7 +592,7 @@ class StateStore(SQLBaseStore):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
got_all = not (missing_types or types is None)
|
got_all = is_all or not missing_types
|
||||||
|
|
||||||
return {
|
return {
|
||||||
k: v for k, v in state_dict_ids.iteritems()
|
k: v for k, v in state_dict_ids.iteritems()
|
||||||
|
@ -607,7 +609,7 @@ class StateStore(SQLBaseStore):
|
||||||
Args:
|
Args:
|
||||||
group: The state group to lookup
|
group: The state group to lookup
|
||||||
"""
|
"""
|
||||||
is_all, state_dict_ids = self._state_group_cache.get(group)
|
is_all, _, state_dict_ids = self._state_group_cache.get(group)
|
||||||
|
|
||||||
return state_dict_ids, is_all
|
return state_dict_ids, is_all
|
||||||
|
|
||||||
|
@ -624,7 +626,7 @@ class StateStore(SQLBaseStore):
|
||||||
missing_groups = []
|
missing_groups = []
|
||||||
if types is not None:
|
if types is not None:
|
||||||
for group in set(groups):
|
for group in set(groups):
|
||||||
state_dict_ids, missing_types, got_all = self._get_some_state_from_cache(
|
state_dict_ids, _, got_all = self._get_some_state_from_cache(
|
||||||
group, types
|
group, types
|
||||||
)
|
)
|
||||||
results[group] = state_dict_ids
|
results[group] = state_dict_ids
|
||||||
|
@ -653,19 +655,7 @@ class StateStore(SQLBaseStore):
|
||||||
# Now we want to update the cache with all the things we fetched
|
# Now we want to update the cache with all the things we fetched
|
||||||
# from the database.
|
# from the database.
|
||||||
for group, group_state_dict in group_to_state_dict.iteritems():
|
for group, group_state_dict in group_to_state_dict.iteritems():
|
||||||
if types:
|
state_dict = results[group]
|
||||||
# We delibrately put key -> None mappings into the cache to
|
|
||||||
# cache absence of the key, on the assumption that if we've
|
|
||||||
# explicitly asked for some types then we will probably ask
|
|
||||||
# for them again.
|
|
||||||
state_dict = {
|
|
||||||
(intern_string(etype), intern_string(state_key)): None
|
|
||||||
for (etype, state_key) in types
|
|
||||||
}
|
|
||||||
state_dict.update(results[group])
|
|
||||||
results[group] = state_dict
|
|
||||||
else:
|
|
||||||
state_dict = results[group]
|
|
||||||
|
|
||||||
state_dict.update(
|
state_dict.update(
|
||||||
((intern_string(k[0]), intern_string(k[1])), to_ascii(v))
|
((intern_string(k[0]), intern_string(k[1])), to_ascii(v))
|
||||||
|
@ -677,17 +667,9 @@ class StateStore(SQLBaseStore):
|
||||||
key=group,
|
key=group,
|
||||||
value=state_dict,
|
value=state_dict,
|
||||||
full=(types is None),
|
full=(types is None),
|
||||||
|
known_absent=types,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove all the entries with None values. The None values were just
|
|
||||||
# used for bookkeeping in the cache.
|
|
||||||
for group, state_dict in results.iteritems():
|
|
||||||
results[group] = {
|
|
||||||
key: event_id
|
|
||||||
for key, event_id in state_dict.iteritems()
|
|
||||||
if event_id
|
|
||||||
}
|
|
||||||
|
|
||||||
defer.returnValue(results)
|
defer.returnValue(results)
|
||||||
|
|
||||||
def get_next_state_group(self):
|
def get_next_state_group(self):
|
||||||
|
|
|
@ -23,7 +23,17 @@ import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "value"))):
|
class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "value"))):
|
||||||
|
"""Returned when getting an entry from the cache
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
full (bool): Whether the cache has the full or dict or just some keys.
|
||||||
|
If not full then not all requested keys will necessarily be present
|
||||||
|
in `value`
|
||||||
|
known_absent (set): Keys that were looked up in the dict and were not
|
||||||
|
there.
|
||||||
|
value (dict): The full or partial dict value
|
||||||
|
"""
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.value)
|
return len(self.value)
|
||||||
|
|
||||||
|
@ -58,21 +68,31 @@ class DictionaryCache(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
def get(self, key, dict_keys=None):
|
def get(self, key, dict_keys=None):
|
||||||
|
"""Fetch an entry out of the cache
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key
|
||||||
|
dict_key(list): If given a set of keys then return only those keys
|
||||||
|
that exist in the cache.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DictionaryEntry
|
||||||
|
"""
|
||||||
entry = self.cache.get(key, self.sentinel)
|
entry = self.cache.get(key, self.sentinel)
|
||||||
if entry is not self.sentinel:
|
if entry is not self.sentinel:
|
||||||
self.metrics.inc_hits()
|
self.metrics.inc_hits()
|
||||||
|
|
||||||
if dict_keys is None:
|
if dict_keys is None:
|
||||||
return DictionaryEntry(entry.full, dict(entry.value))
|
return DictionaryEntry(entry.full, entry.known_absent, dict(entry.value))
|
||||||
else:
|
else:
|
||||||
return DictionaryEntry(entry.full, {
|
return DictionaryEntry(entry.full, entry.known_absent, {
|
||||||
k: entry.value[k]
|
k: entry.value[k]
|
||||||
for k in dict_keys
|
for k in dict_keys
|
||||||
if k in entry.value
|
if k in entry.value
|
||||||
})
|
})
|
||||||
|
|
||||||
self.metrics.inc_misses()
|
self.metrics.inc_misses()
|
||||||
return DictionaryEntry(False, {})
|
return DictionaryEntry(False, set(), {})
|
||||||
|
|
||||||
def invalidate(self, key):
|
def invalidate(self, key):
|
||||||
self.check_thread()
|
self.check_thread()
|
||||||
|
@ -87,19 +107,34 @@ class DictionaryCache(object):
|
||||||
self.sequence += 1
|
self.sequence += 1
|
||||||
self.cache.clear()
|
self.cache.clear()
|
||||||
|
|
||||||
def update(self, sequence, key, value, full=False):
|
def update(self, sequence, key, value, full=False, known_absent=None):
|
||||||
|
"""Updates the entry in the cache
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sequence
|
||||||
|
key
|
||||||
|
value (dict): The value to update the cache with.
|
||||||
|
full (bool): Whether the given value is the full dict, or just a
|
||||||
|
partial subset there of. If not full then any existing entries
|
||||||
|
for the key will be updated.
|
||||||
|
known_absent (set): Set of keys that we know don't exist in the full
|
||||||
|
dict.
|
||||||
|
"""
|
||||||
self.check_thread()
|
self.check_thread()
|
||||||
if self.sequence == sequence:
|
if self.sequence == sequence:
|
||||||
# Only update the cache if the caches sequence number matches the
|
# Only update the cache if the caches sequence number matches the
|
||||||
# number that the cache had before the SELECT was started (SYN-369)
|
# number that the cache had before the SELECT was started (SYN-369)
|
||||||
|
if known_absent is None:
|
||||||
|
known_absent = set()
|
||||||
if full:
|
if full:
|
||||||
self._insert(key, value)
|
self._insert(key, value, known_absent)
|
||||||
else:
|
else:
|
||||||
self._update_or_insert(key, value)
|
self._update_or_insert(key, value, known_absent)
|
||||||
|
|
||||||
def _update_or_insert(self, key, value):
|
def _update_or_insert(self, key, value, known_absent):
|
||||||
entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
|
entry = self.cache.setdefault(key, DictionaryEntry(False, set(), {}))
|
||||||
entry.value.update(value)
|
entry.value.update(value)
|
||||||
|
entry.known_absent.update(known_absent)
|
||||||
|
|
||||||
def _insert(self, key, value):
|
def _insert(self, key, value, known_absent):
|
||||||
self.cache[key] = DictionaryEntry(True, value)
|
self.cache[key] = DictionaryEntry(True, known_absent, value)
|
||||||
|
|
|
@ -28,7 +28,7 @@ class DictCacheTestCase(unittest.TestCase):
|
||||||
key = "test_simple_cache_hit_full"
|
key = "test_simple_cache_hit_full"
|
||||||
|
|
||||||
v = self.cache.get(key)
|
v = self.cache.get(key)
|
||||||
self.assertEqual((False, {}), v)
|
self.assertEqual((False, set(), {}), v)
|
||||||
|
|
||||||
seq = self.cache.sequence
|
seq = self.cache.sequence
|
||||||
test_value = {"test": "test_simple_cache_hit_full"}
|
test_value = {"test": "test_simple_cache_hit_full"}
|
||||||
|
|
Loading…
Reference in New Issue