synapse-old/synapse/util/caches/dictionary_cache.py

# -*- coding: utf-8 -*-
# Copyright 2015, 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import enum
import logging
import threading
from typing import Any, Dict, Generic, Iterable, Optional, Set, TypeVar

import attr

from synapse.util.caches.lrucache import LruCache

logger = logging.getLogger(__name__)


# The type of the cache keys.
KT = TypeVar("KT")
# The type of the dictionary keys.
DKT = TypeVar("DKT")


@attr.s(slots=True)
class DictionaryEntry:
    """Returned when getting an entry from the cache

    Attributes:
        full: Whether the cache has the full or dict or just some keys.
            If not full then not all requested keys will necessarily be present
            in `value`
        known_absent: Keys that were looked up in the dict and were not
            there.
        value: The full or partial dict value
    """

    full = attr.ib(type=bool)
    known_absent = attr.ib()
    value = attr.ib()

    def __len__(self):
        return len(self.value)


class _Sentinel(enum.Enum):
    # defining a sentinel in this way allows mypy to correctly handle the
    # type of a dictionary lookup.
    sentinel = object()


class DictionaryCache(Generic[KT, DKT]):
    """Caches key -> dictionary lookups, supporting caching partial dicts, i.e.
    fetching a subset of dictionary keys for a particular key.
    """

    def __init__(self, name: str, max_entries: int = 1000):
        self.cache = LruCache(
            max_size=max_entries, cache_name=name, size_callback=len
        )  # type: LruCache[KT, DictionaryEntry]

        self.name = name
        self.sequence = 0
        self.thread = None  # type: Optional[threading.Thread]

    def check_thread(self) -> None:
        expected_thread = self.thread
        if expected_thread is None:
            self.thread = threading.current_thread()
        else:
            if expected_thread is not threading.current_thread():
                raise ValueError(
                    "Cache objects can only be accessed from the main thread"
                )

    def get(
        self, key: KT, dict_keys: Optional[Iterable[DKT]] = None
    ) -> DictionaryEntry:
        """Fetch an entry out of the cache

        Args:
            key
            dict_key: If given a set of keys then return only those keys
                that exist in the cache.

        Returns:
            DictionaryEntry
        """
        entry = self.cache.get(key, _Sentinel.sentinel)
        if entry is not _Sentinel.sentinel:
            if dict_keys is None:
                return DictionaryEntry(
                    entry.full, entry.known_absent, dict(entry.value)
                )
            else:
                return DictionaryEntry(
                    entry.full,
                    entry.known_absent,
                    {k: entry.value[k] for k in dict_keys if k in entry.value},
                )

        return DictionaryEntry(False, set(), {})

    def invalidate(self, key: KT) -> None:
        self.check_thread()

        # Increment the sequence number so that any SELECT statements that
        # raced with the INSERT don't update the cache (SYN-369)
        self.sequence += 1
        self.cache.pop(key, None)

    def invalidate_all(self) -> None:
        self.check_thread()
        self.sequence += 1
        self.cache.clear()

    def update(
        self,
        sequence: int,
        key: KT,
        value: Dict[DKT, Any],
        fetched_keys: Optional[Set[DKT]] = None,
    ) -> None:
        """Updates the entry in the cache

        Args:
            sequence
            key
            value: The value to update the cache with.
            fetched_keys: All of the dictionary keys which were
                fetched from the database.

                If None, this is the complete value for key K. Otherwise, it
                is used to infer a list of keys which we know don't exist in
                the full dict.
        """
        self.check_thread()
        if self.sequence == sequence:
            # Only update the cache if the caches sequence number matches the
            # number that the cache had before the SELECT was started (SYN-369)
            if fetched_keys is None:
                self._insert(key, value, set())
            else:
                self._update_or_insert(key, value, fetched_keys)

    def _update_or_insert(
        self, key: KT, value: Dict[DKT, Any], known_absent: Set[DKT]
    ) -> None:
        # We pop and reinsert as we need to tell the cache the size may have
        # changed

        entry = self.cache.pop(key, DictionaryEntry(False, set(), {}))
        entry.value.update(value)
        entry.known_absent.update(known_absent)
        self.cache[key] = entry

    def _insert(self, key: KT, value: Dict[DKT, Any], known_absent: Set[DKT]) -> None:
        self.cache[key] = DictionaryEntry(True, known_absent, value)
Add basic dictionary cache 2015-08-04 08:56:56 -06:00			`# -- coding: utf-8 --`
copyrights 2016-01-06 21:26:29 -07:00			`# Copyright 2015, 2016 OpenMarket Ltd`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
type annotations for LruCache 2020-10-16 08:56:39 -06:00			`import enum`
Use dictionary cache to do group -> state fetching 2015-08-05 08:06:51 -06:00			`import logging`
run isort 2018-07-09 00:09:20 -06:00			`import threading`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`from typing import Any, Dict, Generic, Iterable, Optional, Set, TypeVar`

			`import attr`
run isort 2018-07-09 00:09:20 -06:00
			`from synapse.util.caches.lrucache import LruCache`
Use dictionary cache to do group -> state fetching 2015-08-05 08:06:51 -06:00
			`logger = logging.getLogger(__name__)`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00

Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`# The type of the cache keys.`
			`KT = TypeVar("KT")`
			`# The type of the dictionary keys.`
			`DKT = TypeVar("DKT")`


			`@attr.s(slots=True)`
			`class DictionaryEntry:`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`"""Returned when getting an entry from the cache`

			`Attributes:`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`full: Whether the cache has the full or dict or just some keys.`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`If not full then not all requested keys will necessarily be present`
			in `value`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`known_absent: Keys that were looked up in the dict and were not`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`there.`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`value: The full or partial dict value`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`"""`
Run Black. (#5482) 2019-06-20 03:32:02 -06:00
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`full = attr.ib(type=bool)`
			`known_absent = attr.ib()`
			`value = attr.ib()`

Speed up cache size calculation Instead of calculating the size of the cache repeatedly, which can take a long time now that it can use a callback, instead cache the size and update that on insertion and deletion. This requires changing the cache descriptors to have two caches, one for pending deferreds and the other for the actual values. There's no reason to evict from the pending deferreds as they won't take up any more memory. 2017-01-17 04:18:13 -07:00			`def __len__(self):`
			`return len(self.value)`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00

type annotations for LruCache 2020-10-16 08:56:39 -06:00			`class _Sentinel(enum.Enum):`
			`# defining a sentinel in this way allows mypy to correctly handle the`
			`# type of a dictionary lookup.`
			`sentinel = object()`


Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`class DictionaryCache(Generic[KT, DKT]):`
Comments 2015-08-10 07:16:24 -06:00			`"""Caches key -> dictionary lookups, supporting caching partial dicts, i.e.`
			`fetching a subset of dictionary keys for a particular key.`
			`"""`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def __init__(self, name: str, max_entries: int = 1000):`
type annotations for LruCache 2020-10-16 08:56:39 -06:00			`self.cache = LruCache(`
			`max_size=max_entries, cache_name=name, size_callback=len`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`) # type: LruCache[KT, DictionaryEntry]`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00
			`self.name = name`
			`self.sequence = 0`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`self.thread = None # type: Optional[threading.Thread]`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def check_thread(self) -> None:`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00			`expected_thread = self.thread`
			`if expected_thread is None:`
			`self.thread = threading.current_thread()`
			`else:`
			`if expected_thread is not threading.current_thread():`
			`raise ValueError(`
			`"Cache objects can only be accessed from the main thread"`
			`)`

Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def get(`
			`self, key: KT, dict_keys: Optional[Iterable[DKT]] = None`
			`) -> DictionaryEntry:`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`"""Fetch an entry out of the cache`

			`Args:`
			`key`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`dict_key: If given a set of keys then return only those keys`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`that exist in the cache.`

			`Returns:`
			`DictionaryEntry`
			`"""`
type annotations for LruCache 2020-10-16 08:56:39 -06:00			`entry = self.cache.get(key, _Sentinel.sentinel)`
			`if entry is not _Sentinel.sentinel:`
Wire up the dictionarycache to the metrics 2015-08-12 03:13:35 -06:00			`if dict_keys is None:`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`return DictionaryEntry(`
			`entry.full, entry.known_absent, dict(entry.value)`
Run Black. (#5482) 2019-06-20 03:32:02 -06:00			`)`
Wire up the dictionarycache to the metrics 2015-08-12 03:13:35 -06:00			`else:`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`return DictionaryEntry(`
			`entry.full,`
			`entry.known_absent,`
Wire up the dictionarycache to the metrics 2015-08-12 03:13:35 -06:00			`{k: entry.value[k] for k in dict_keys if k in entry.value},`
			`)`

Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`return DictionaryEntry(False, set(), {})`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def invalidate(self, key: KT) -> None:`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00			`self.check_thread()`

			`# Increment the sequence number so that any SELECT statements that`
			`# raced with the INSERT don't update the cache (SYN-369)`
			`self.sequence += 1`
			`self.cache.pop(key, None)`

Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def invalidate_all(self) -> None:`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00			`self.check_thread()`
			`self.sequence += 1`
			`self.cache.clear()`

Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def update(`
			`self,`
			`sequence: int,`
			`key: KT,`
			`value: Dict[DKT, Any],`
			`fetched_keys: Optional[Set[DKT]] = None,`
			`) -> None:`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`"""Updates the entry in the cache`

			`Args:`
			`sequence`
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`key`
			`value: The value to update the cache with.`
			`fetched_keys: All of the dictionary keys which were`
Disable partial state group caching for wildcard lookups When _get_state_for_groups is given a wildcard filter, just do a complete lookup. Hopefully this will give us the best of both worlds by not filling up the ram if we only need one or two keys, but also making the cache still work for the federation reader usecase. 2018-06-11 16:13:06 -06:00			`fetched from the database.`

			`If None, this is the complete value for key K. Otherwise, it`
			`is used to infer a list of keys which we know don't exist in`
			`the full dict.`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`"""`
Wire up the dictionarycache to the metrics 2015-08-12 03:13:35 -06:00			`self.check_thread()`
			`if self.sequence == sequence:`
			`# Only update the cache if the caches sequence number matches the`
			`# number that the cache had before the SELECT was started (SYN-369)`
Disable partial state group caching for wildcard lookups When _get_state_for_groups is given a wildcard filter, just do a complete lookup. Hopefully this will give us the best of both worlds by not filling up the ram if we only need one or two keys, but also making the cache still work for the federation reader usecase. 2018-06-11 16:13:06 -06:00			`if fetched_keys is None:`
			`self._insert(key, value, set())`
Wire up the dictionarycache to the metrics 2015-08-12 03:13:35 -06:00			`else:`
Disable partial state group caching for wildcard lookups When _get_state_for_groups is given a wildcard filter, just do a complete lookup. Hopefully this will give us the best of both worlds by not filling up the ram if we only need one or two keys, but also making the cache still work for the federation reader usecase. 2018-06-11 16:13:06 -06:00			`self._update_or_insert(key, value, fetched_keys)`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def _update_or_insert(`
			`self, key: KT, value: Dict[DKT, Any], known_absent: Set[DKT]`
			`) -> None:`
Fix bug where state cache used lots of memory The state cache bases its size on the sum of the size of entries. The size of the entry is calculated once on insertion, so it is important that the size of entries does not change. The DictionaryCache modified the entries size, which caused the state cache to incorrectly think it was smaller than it actually was. 2018-03-15 09:40:13 -06:00			`# We pop and reinsert as we need to tell the cache the size may have`
			`# changed`

			`entry = self.cache.pop(key, DictionaryEntry(False, set(), {}))`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00			`entry.value.update(value)`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`entry.known_absent.update(known_absent)`
Fix bug where state cache used lots of memory The state cache bases its size on the sum of the size of entries. The size of the entry is calculated once on insertion, so it is important that the size of entries does not change. The DictionaryCache modified the entries size, which caused the state cache to incorrectly think it was smaller than it actually was. 2018-03-15 09:40:13 -06:00			`self.cache[key] = entry`
Add basic dictionary cache 2015-08-04 08:56:56 -06:00
Add type hints to DictionaryCache and TTLCache. (#9442) 2021-03-29 10:15:33 -06:00			`def _insert(self, key: KT, value: Dict[DKT, Any], known_absent: Set[DKT]) -> None:`
Make get_state_groups_from_groups faster. Most of the time was spent copying a dict to filter out sentinel values that indicated that keys did not exist in the dict. The sentinel values were added to ensure that we cached the non-existence of keys. By updating DictionaryCache to keep track of which keys were known to not exist itself we can remove a dictionary copy. 2017-05-17 07:31:23 -06:00			`self.cache[key] = DictionaryEntry(True, known_absent, value)`