Improve the performance of calculating ignored users in large rooms (#9024)
This allows for efficiently finding which users ignore a particular user. Co-authored-by: Erik Johnston <erik@matrix.org>
This commit is contained in:
parent
1d5c021a45
commit
23d701864f
|
@ -0,0 +1 @@
|
||||||
|
Improved performance when calculating ignored users in large rooms.
|
|
@ -203,14 +203,18 @@ class BulkPushRuleEvaluator:
|
||||||
|
|
||||||
condition_cache = {} # type: Dict[str, bool]
|
condition_cache = {} # type: Dict[str, bool]
|
||||||
|
|
||||||
|
# If the event is not a state event check if any users ignore the sender.
|
||||||
|
if not event.is_state():
|
||||||
|
ignorers = await self.store.ignored_by(event.sender)
|
||||||
|
else:
|
||||||
|
ignorers = set()
|
||||||
|
|
||||||
for uid, rules in rules_by_user.items():
|
for uid, rules in rules_by_user.items():
|
||||||
if event.sender == uid:
|
if event.sender == uid:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not event.is_state():
|
if uid in ignorers:
|
||||||
is_ignored = await self.store.is_ignored_by(event.sender, uid)
|
continue
|
||||||
if is_ignored:
|
|
||||||
continue
|
|
||||||
|
|
||||||
display_name = None
|
display_name = None
|
||||||
profile_info = room_members.get(uid)
|
profile_info = room_members.get(uid)
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Set, Tuple
|
||||||
|
|
||||||
from synapse.api.constants import AccountDataTypes
|
from synapse.api.constants import AccountDataTypes
|
||||||
from synapse.storage._base import SQLBaseStore, db_to_json
|
from synapse.storage._base import SQLBaseStore, db_to_json
|
||||||
|
@ -24,7 +24,7 @@ from synapse.storage.database import DatabasePool
|
||||||
from synapse.storage.util.id_generators import StreamIdGenerator
|
from synapse.storage.util.id_generators import StreamIdGenerator
|
||||||
from synapse.types import JsonDict
|
from synapse.types import JsonDict
|
||||||
from synapse.util import json_encoder
|
from synapse.util import json_encoder
|
||||||
from synapse.util.caches.descriptors import _CacheContext, cached
|
from synapse.util.caches.descriptors import cached
|
||||||
from synapse.util.caches.stream_change_cache import StreamChangeCache
|
from synapse.util.caches.stream_change_cache import StreamChangeCache
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -287,23 +287,25 @@ class AccountDataWorkerStore(SQLBaseStore, metaclass=abc.ABCMeta):
|
||||||
"get_updated_account_data_for_user", get_updated_account_data_for_user_txn
|
"get_updated_account_data_for_user", get_updated_account_data_for_user_txn
|
||||||
)
|
)
|
||||||
|
|
||||||
@cached(num_args=2, cache_context=True, max_entries=5000)
|
@cached(max_entries=5000, iterable=True)
|
||||||
async def is_ignored_by(
|
async def ignored_by(self, user_id: str) -> Set[str]:
|
||||||
self, ignored_user_id: str, ignorer_user_id: str, cache_context: _CacheContext
|
"""
|
||||||
) -> bool:
|
Get users which ignore the given user.
|
||||||
ignored_account_data = await self.get_global_account_data_by_type_for_user(
|
|
||||||
AccountDataTypes.IGNORED_USER_LIST,
|
|
||||||
ignorer_user_id,
|
|
||||||
on_invalidate=cache_context.invalidate,
|
|
||||||
)
|
|
||||||
if not ignored_account_data:
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
Params:
|
||||||
return ignored_user_id in ignored_account_data.get("ignored_users", {})
|
user_id: The user ID which might be ignored.
|
||||||
except TypeError:
|
|
||||||
# The type of the ignored_users field is invalid.
|
Return:
|
||||||
return False
|
The user IDs which ignore the given user.
|
||||||
|
"""
|
||||||
|
return set(
|
||||||
|
await self.db_pool.simple_select_onecol(
|
||||||
|
table="ignored_users",
|
||||||
|
keyvalues={"ignored_user_id": user_id},
|
||||||
|
retcol="ignorer_user_id",
|
||||||
|
desc="ignored_by",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AccountDataStore(AccountDataWorkerStore):
|
class AccountDataStore(AccountDataWorkerStore):
|
||||||
|
@ -390,18 +392,14 @@ class AccountDataStore(AccountDataWorkerStore):
|
||||||
Returns:
|
Returns:
|
||||||
The maximum stream ID.
|
The maximum stream ID.
|
||||||
"""
|
"""
|
||||||
content_json = json_encoder.encode(content)
|
|
||||||
|
|
||||||
async with self._account_data_id_gen.get_next() as next_id:
|
async with self._account_data_id_gen.get_next() as next_id:
|
||||||
# no need to lock here as account_data has a unique constraint on
|
await self.db_pool.runInteraction(
|
||||||
# (user_id, account_data_type) so simple_upsert will retry if
|
"add_user_account_data",
|
||||||
# there is a conflict.
|
self._add_account_data_for_user,
|
||||||
await self.db_pool.simple_upsert(
|
next_id,
|
||||||
desc="add_user_account_data",
|
user_id,
|
||||||
table="account_data",
|
account_data_type,
|
||||||
keyvalues={"user_id": user_id, "account_data_type": account_data_type},
|
content,
|
||||||
values={"stream_id": next_id, "content": content_json},
|
|
||||||
lock=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# it's theoretically possible for the above to succeed and the
|
# it's theoretically possible for the above to succeed and the
|
||||||
|
@ -424,6 +422,71 @@ class AccountDataStore(AccountDataWorkerStore):
|
||||||
|
|
||||||
return self._account_data_id_gen.get_current_token()
|
return self._account_data_id_gen.get_current_token()
|
||||||
|
|
||||||
|
def _add_account_data_for_user(
|
||||||
|
self,
|
||||||
|
txn,
|
||||||
|
next_id: int,
|
||||||
|
user_id: str,
|
||||||
|
account_data_type: str,
|
||||||
|
content: JsonDict,
|
||||||
|
) -> None:
|
||||||
|
content_json = json_encoder.encode(content)
|
||||||
|
|
||||||
|
# no need to lock here as account_data has a unique constraint on
|
||||||
|
# (user_id, account_data_type) so simple_upsert will retry if
|
||||||
|
# there is a conflict.
|
||||||
|
self.db_pool.simple_upsert_txn(
|
||||||
|
txn,
|
||||||
|
table="account_data",
|
||||||
|
keyvalues={"user_id": user_id, "account_data_type": account_data_type},
|
||||||
|
values={"stream_id": next_id, "content": content_json},
|
||||||
|
lock=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ignored users get denormalized into a separate table as an optimisation.
|
||||||
|
if account_data_type != AccountDataTypes.IGNORED_USER_LIST:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Insert / delete to sync the list of ignored users.
|
||||||
|
previously_ignored_users = set(
|
||||||
|
self.db_pool.simple_select_onecol_txn(
|
||||||
|
txn,
|
||||||
|
table="ignored_users",
|
||||||
|
keyvalues={"ignorer_user_id": user_id},
|
||||||
|
retcol="ignored_user_id",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# If the data is invalid, no one is ignored.
|
||||||
|
ignored_users_content = content.get("ignored_users", {})
|
||||||
|
if isinstance(ignored_users_content, dict):
|
||||||
|
currently_ignored_users = set(ignored_users_content)
|
||||||
|
else:
|
||||||
|
currently_ignored_users = set()
|
||||||
|
|
||||||
|
# Delete entries which are no longer ignored.
|
||||||
|
self.db_pool.simple_delete_many_txn(
|
||||||
|
txn,
|
||||||
|
table="ignored_users",
|
||||||
|
column="ignored_user_id",
|
||||||
|
iterable=previously_ignored_users - currently_ignored_users,
|
||||||
|
keyvalues={"ignorer_user_id": user_id},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add entries which are newly ignored.
|
||||||
|
self.db_pool.simple_insert_many_txn(
|
||||||
|
txn,
|
||||||
|
table="ignored_users",
|
||||||
|
values=[
|
||||||
|
{"ignorer_user_id": user_id, "ignored_user_id": u}
|
||||||
|
for u in currently_ignored_users - previously_ignored_users
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Invalidate the cache for any ignored users which were added or removed.
|
||||||
|
for ignored_user_id in previously_ignored_users ^ currently_ignored_users:
|
||||||
|
self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,))
|
||||||
|
|
||||||
async def _update_max_stream_id(self, next_id: int) -> None:
|
async def _update_max_stream_id(self, next_id: int) -> None:
|
||||||
"""Update the max stream_id
|
"""Update the max stream_id
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,82 @@
|
||||||
|
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This migration denormalises the account_data table into an ignored users table.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
from synapse.storage._base import db_to_json
|
||||||
|
from synapse.storage.engines import BaseDatabaseEngine
|
||||||
|
from synapse.storage.prepare_database import execute_statements_from_stream
|
||||||
|
from synapse.storage.types import Cursor
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
|
||||||
|
logger.info("Creating ignored_users table")
|
||||||
|
execute_statements_from_stream(cur, StringIO(_create_commands))
|
||||||
|
|
||||||
|
# We now upgrade existing data, if any. We don't do this in `run_upgrade` as
|
||||||
|
# we a) want to run these before adding constraints and b) `run_upgrade` is
|
||||||
|
# not run on empty databases.
|
||||||
|
insert_sql = """
|
||||||
|
INSERT INTO ignored_users (ignorer_user_id, ignored_user_id) VALUES (?, ?)
|
||||||
|
"""
|
||||||
|
|
||||||
|
logger.info("Converting existing ignore lists")
|
||||||
|
cur.execute(
|
||||||
|
"SELECT user_id, content FROM account_data WHERE account_data_type = 'm.ignored_user_list'"
|
||||||
|
)
|
||||||
|
for user_id, content_json in cur.fetchall():
|
||||||
|
content = db_to_json(content_json)
|
||||||
|
|
||||||
|
# The content should be the form of a dictionary with a key
|
||||||
|
# "ignored_users" pointing to a dictionary with keys of ignored users.
|
||||||
|
#
|
||||||
|
# { "ignored_users": "@someone:example.org": {} }
|
||||||
|
ignored_users = content.get("ignored_users", {})
|
||||||
|
if isinstance(ignored_users, dict) and ignored_users:
|
||||||
|
cur.executemany(insert_sql, [(user_id, u) for u in ignored_users])
|
||||||
|
|
||||||
|
# Add indexes after inserting data for efficiency.
|
||||||
|
logger.info("Adding constraints to ignored_users table")
|
||||||
|
execute_statements_from_stream(cur, StringIO(_constraints_commands))
|
||||||
|
|
||||||
|
|
||||||
|
# there might be duplicates, so the easiest way to achieve this is to create a new
|
||||||
|
# table with the right data, and renaming it into place
|
||||||
|
|
||||||
|
_create_commands = """
|
||||||
|
-- Users which are ignored when calculating push notifications. This data is
|
||||||
|
-- denormalized from account data.
|
||||||
|
CREATE TABLE IF NOT EXISTS ignored_users(
|
||||||
|
ignorer_user_id TEXT NOT NULL, -- The user ID of the user who is ignoring another user. (This is a local user.)
|
||||||
|
ignored_user_id TEXT NOT NULL -- The user ID of the user who is being ignored. (This is a local or remote user.)
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
_constraints_commands = """
|
||||||
|
CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id);
|
||||||
|
|
||||||
|
-- Add an index on ignored_users since look-ups are done to get all ignorers of an ignored user.
|
||||||
|
CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id);
|
||||||
|
"""
|
|
@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
|
||||||
# XXX: If you're about to bump this to 59 (or higher) please create an update
|
# XXX: If you're about to bump this to 59 (or higher) please create an update
|
||||||
# that drops the unused `cache_invalidation_stream` table, as per #7436!
|
# that drops the unused `cache_invalidation_stream` table, as per #7436!
|
||||||
# XXX: Also add an update to drop `account_data_max_stream_id` as per #7656!
|
# XXX: Also add an update to drop `account_data_max_stream_id` as per #7656!
|
||||||
SCHEMA_VERSION = 58
|
SCHEMA_VERSION = 59
|
||||||
|
|
||||||
dir_path = os.path.abspath(os.path.dirname(__file__))
|
dir_path = os.path.abspath(os.path.dirname(__file__))
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,120 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from typing import Iterable, Set
|
||||||
|
|
||||||
|
from synapse.api.constants import AccountDataTypes
|
||||||
|
|
||||||
|
from tests import unittest
|
||||||
|
|
||||||
|
|
||||||
|
class IgnoredUsersTestCase(unittest.HomeserverTestCase):
|
||||||
|
def prepare(self, hs, reactor, clock):
|
||||||
|
self.store = self.hs.get_datastore()
|
||||||
|
self.user = "@user:test"
|
||||||
|
|
||||||
|
def _update_ignore_list(
|
||||||
|
self, *ignored_user_ids: Iterable[str], ignorer_user_id: str = None
|
||||||
|
) -> None:
|
||||||
|
"""Update the account data to block the given users."""
|
||||||
|
if ignorer_user_id is None:
|
||||||
|
ignorer_user_id = self.user
|
||||||
|
|
||||||
|
self.get_success(
|
||||||
|
self.store.add_account_data_for_user(
|
||||||
|
ignorer_user_id,
|
||||||
|
AccountDataTypes.IGNORED_USER_LIST,
|
||||||
|
{"ignored_users": {u: {} for u in ignored_user_ids}},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def assert_ignorers(
|
||||||
|
self, ignored_user_id: str, expected_ignorer_user_ids: Set[str]
|
||||||
|
) -> None:
|
||||||
|
self.assertEqual(
|
||||||
|
self.get_success(self.store.ignored_by(ignored_user_id)),
|
||||||
|
expected_ignorer_user_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_ignoring_users(self):
|
||||||
|
"""Basic adding/removing of users from the ignore list."""
|
||||||
|
self._update_ignore_list("@other:test", "@another:remote")
|
||||||
|
|
||||||
|
# Check a user which no one ignores.
|
||||||
|
self.assert_ignorers("@user:test", set())
|
||||||
|
|
||||||
|
# Check a local user which is ignored.
|
||||||
|
self.assert_ignorers("@other:test", {self.user})
|
||||||
|
|
||||||
|
# Check a remote user which is ignored.
|
||||||
|
self.assert_ignorers("@another:remote", {self.user})
|
||||||
|
|
||||||
|
# Add one user, remove one user, and leave one user.
|
||||||
|
self._update_ignore_list("@foo:test", "@another:remote")
|
||||||
|
|
||||||
|
# Check the removed user.
|
||||||
|
self.assert_ignorers("@other:test", set())
|
||||||
|
|
||||||
|
# Check the added user.
|
||||||
|
self.assert_ignorers("@foo:test", {self.user})
|
||||||
|
|
||||||
|
# Check the removed user.
|
||||||
|
self.assert_ignorers("@another:remote", {self.user})
|
||||||
|
|
||||||
|
def test_caching(self):
|
||||||
|
"""Ensure that caching works properly between different users."""
|
||||||
|
# The first user ignores a user.
|
||||||
|
self._update_ignore_list("@other:test")
|
||||||
|
self.assert_ignorers("@other:test", {self.user})
|
||||||
|
|
||||||
|
# The second user ignores them.
|
||||||
|
self._update_ignore_list("@other:test", ignorer_user_id="@second:test")
|
||||||
|
self.assert_ignorers("@other:test", {self.user, "@second:test"})
|
||||||
|
|
||||||
|
# The first user un-ignores them.
|
||||||
|
self._update_ignore_list()
|
||||||
|
self.assert_ignorers("@other:test", {"@second:test"})
|
||||||
|
|
||||||
|
def test_invalid_data(self):
|
||||||
|
"""Invalid data ends up clearing out the ignored users list."""
|
||||||
|
# Add some data and ensure it is there.
|
||||||
|
self._update_ignore_list("@other:test")
|
||||||
|
self.assert_ignorers("@other:test", {self.user})
|
||||||
|
|
||||||
|
# No ignored_users key.
|
||||||
|
self.get_success(
|
||||||
|
self.store.add_account_data_for_user(
|
||||||
|
self.user, AccountDataTypes.IGNORED_USER_LIST, {},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# No one ignores the user now.
|
||||||
|
self.assert_ignorers("@other:test", set())
|
||||||
|
|
||||||
|
# Add some data and ensure it is there.
|
||||||
|
self._update_ignore_list("@other:test")
|
||||||
|
self.assert_ignorers("@other:test", {self.user})
|
||||||
|
|
||||||
|
# Invalid data.
|
||||||
|
self.get_success(
|
||||||
|
self.store.add_account_data_for_user(
|
||||||
|
self.user,
|
||||||
|
AccountDataTypes.IGNORED_USER_LIST,
|
||||||
|
{"ignored_users": "unexpected"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# No one ignores the user now.
|
||||||
|
self.assert_ignorers("@other:test", set())
|
Loading…
Reference in New Issue