Speed up MultiWriterIdGenerator when lots of IDs are in flight. (#10755)

2021-09-03 18:23:46 +01:00 · 2021-09-03 18:23:46 +01:00 · 92b6ac31b2
parent 924276f482
commit 92b6ac31b2
4 changed files with 124 additions and 2 deletions
--- a/changelog.d/10755.misc
+++ b/changelog.d/10755.misc
@ -0,0 +1 @@
 Minor speed ups when joining large rooms over federation.
--- a/stubs/sortedcontainers/init.pyi
+++ b/stubs/sortedcontainers/init.pyi
@ -1,5 +1,6 @@
 from .sorteddict import SortedDict, SortedItemsView, SortedKeysView, SortedValuesView
 from .sortedlist import SortedKeyList, SortedList, SortedListWithKey
 from .sortedset import SortedSet
 __all__ = [
    "SortedDict",
@ -9,4 +10,5 @@ __all__ = [
    "SortedKeyList",
    "SortedList",
    "SortedListWithKey",
    "SortedSet",
 ]
--- a/stubs/sortedcontainers/sortedset.pyi
+++ b/stubs/sortedcontainers/sortedset.pyi
@ -0,0 +1,118 @@
 # stub for SortedSet. This is a lightly edited copy of
 # https://github.com/grantjenks/python-sortedcontainers/blob/d0a225d7fd0fb4c54532b8798af3cbeebf97e2d5/sortedcontainers/sortedset.pyi
 # (from https://github.com/grantjenks/python-sortedcontainers/pull/107)
 from typing import (
    AbstractSet,
    Any,
    Callable,
    Generic,
    Hashable,
    Iterable,
    Iterator,
    List,
    MutableSet,
    Optional,
    Sequence,
    Set,
    Tuple,
    Type,
    TypeVar,
    Union,
    overload,
 )
 # --- Global
 _T = TypeVar("_T", bound=Hashable)
 _S = TypeVar("_S", bound=Hashable)
 _SS = TypeVar("_SS", bound=SortedSet)
 _Key = Callable[[_T], Any]
 class SortedSet(MutableSet[_T], Sequence[_T]):
    def __init__(
        self,
        iterable: Optional[Iterable[_T]] = ...,
        key: Optional[_Key[_T]] = ...,
    ) -> None: ...
    @classmethod
    def _fromset(
        cls, values: Set[_T], key: Optional[_Key[_T]] = ...
    ) -> SortedSet[_T]: ...
    @property
    def key(self) -> Optional[_Key[_T]]: ...
    def __contains__(self, value: Any) -> bool: ...
    @overload
    def __getitem__(self, index: int) -> _T: ...
    @overload
    def __getitem__(self, index: slice) -> List[_T]: ...
    def __delitem__(self, index: Union[int, slice]) -> None: ...
    def __eq__(self, other: Any) -> bool: ...
    def __ne__(self, other: Any) -> bool: ...
    def __lt__(self, other: Iterable[_T]) -> bool: ...
    def __gt__(self, other: Iterable[_T]) -> bool: ...
    def __le__(self, other: Iterable[_T]) -> bool: ...
    def __ge__(self, other: Iterable[_T]) -> bool: ...
    def __len__(self) -> int: ...
    def __iter__(self) -> Iterator[_T]: ...
    def __reversed__(self) -> Iterator[_T]: ...
    def add(self, value: _T) -> None: ...
    def _add(self, value: _T) -> None: ...
    def clear(self) -> None: ...
    def copy(self: _SS) -> _SS: ...
    def __copy__(self: _SS) -> _SS: ...
    def count(self, value: _T) -> int: ...
    def discard(self, value: _T) -> None: ...
    def _discard(self, value: _T) -> None: ...
    def pop(self, index: int = ...) -> _T: ...
    def remove(self, value: _T) -> None: ...
    def difference(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __sub__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def difference_update(
        self, *iterables: Iterable[_S]
    ) -> SortedSet[Union[_T, _S]]: ...
    def __isub__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def intersection(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __and__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __rand__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def intersection_update(
        self, *iterables: Iterable[_S]
    ) -> SortedSet[Union[_T, _S]]: ...
    def __iand__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def symmetric_difference(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __xor__(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __rxor__(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def symmetric_difference_update(
        self, other: Iterable[_S]
    ) -> SortedSet[Union[_T, _S]]: ...
    def __ixor__(self, other: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def union(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __or__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __ror__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def update(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __ior__(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def _update(self, *iterables: Iterable[_S]) -> SortedSet[Union[_T, _S]]: ...
    def __reduce__(
        self,
    ) -> Tuple[Type[SortedSet[_T]], Set[_T], Callable[[_T], Any]]: ...
    def __repr__(self) -> str: ...
    def _check(self) -> None: ...
    def bisect_left(self, value: _T) -> int: ...
    def bisect_right(self, value: _T) -> int: ...
    def islice(
        self,
        start: Optional[int] = ...,
        stop: Optional[int] = ...,
        reverse=bool,
    ) -> Iterator[_T]: ...
    def irange(
        self,
        minimum: Optional[_T] = ...,
        maximum: Optional[_T] = ...,
        inclusive: Tuple[bool, bool] = ...,
        reverse: bool = ...,
    ) -> Iterator[_T]: ...
    def index(
        self, value: _T, start: Optional[int] = ..., stop: Optional[int] = ...
    ) -> int: ...
    def _reset(self, load: int) -> None: ...
--- a/synapse/storage/util/id_generators.py
+++ b/synapse/storage/util/id_generators.py
@ -19,6 +19,7 @@ from contextlib import contextmanager
 from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
 import attr
 from sortedcontainers import SortedSet
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.storage.database import DatabasePool, LoggingTransaction
@ -240,7 +241,7 @@ class MultiWriterIdGenerator:
        # Set of local IDs that we're still processing. The current position
        # should be less than the minimum of this set (if not empty).
-        self._unfinished_ids: Set[int] = set()
+        self._unfinished_ids: SortedSet[int] = SortedSet()
        # Set of local IDs that we've processed that are larger than the current
        # position, due to there being smaller unpersisted IDs.
@ -473,7 +474,7 @@ class MultiWriterIdGenerator:
                finished = set()
-                min_unfinshed = min(self._unfinished_ids)
+                min_unfinshed = self._unfinished_ids[0]
                for s in self._finished_ids:
                    if s < min_unfinshed:
                        if new_cur is None or new_cur < s:
		`@ -0,0 +1 @@`
							`Minor speed ups when joining large rooms over federation.`