Improve the performance of calculating ignored users in large rooms (#9024)
This allows for efficiently finding which users ignore a particular user. Co-authored-by: Erik Johnston <erik@matrix.org>
This commit is contained in:
parent
1d5c021a45
commit
23d701864f
|
@ -0,0 +1 @@
|
|||
Improved performance when calculating ignored users in large rooms.
|
|
@ -203,14 +203,18 @@ class BulkPushRuleEvaluator:
|
|||
|
||||
condition_cache = {} # type: Dict[str, bool]
|
||||
|
||||
# If the event is not a state event check if any users ignore the sender.
|
||||
if not event.is_state():
|
||||
ignorers = await self.store.ignored_by(event.sender)
|
||||
else:
|
||||
ignorers = set()
|
||||
|
||||
for uid, rules in rules_by_user.items():
|
||||
if event.sender == uid:
|
||||
continue
|
||||
|
||||
if not event.is_state():
|
||||
is_ignored = await self.store.is_ignored_by(event.sender, uid)
|
||||
if is_ignored:
|
||||
continue
|
||||
if uid in ignorers:
|
||||
continue
|
||||
|
||||
display_name = None
|
||||
profile_info = room_members.get(uid)
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
import abc
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional, Set, Tuple
|
||||
|
||||
from synapse.api.constants import AccountDataTypes
|
||||
from synapse.storage._base import SQLBaseStore, db_to_json
|
||||
|
@ -24,7 +24,7 @@ from synapse.storage.database import DatabasePool
|
|||
from synapse.storage.util.id_generators import StreamIdGenerator
|
||||
from synapse.types import JsonDict
|
||||
from synapse.util import json_encoder
|
||||
from synapse.util.caches.descriptors import _CacheContext, cached
|
||||
from synapse.util.caches.descriptors import cached
|
||||
from synapse.util.caches.stream_change_cache import StreamChangeCache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -287,23 +287,25 @@ class AccountDataWorkerStore(SQLBaseStore, metaclass=abc.ABCMeta):
|
|||
"get_updated_account_data_for_user", get_updated_account_data_for_user_txn
|
||||
)
|
||||
|
||||
@cached(num_args=2, cache_context=True, max_entries=5000)
|
||||
async def is_ignored_by(
|
||||
self, ignored_user_id: str, ignorer_user_id: str, cache_context: _CacheContext
|
||||
) -> bool:
|
||||
ignored_account_data = await self.get_global_account_data_by_type_for_user(
|
||||
AccountDataTypes.IGNORED_USER_LIST,
|
||||
ignorer_user_id,
|
||||
on_invalidate=cache_context.invalidate,
|
||||
)
|
||||
if not ignored_account_data:
|
||||
return False
|
||||
@cached(max_entries=5000, iterable=True)
|
||||
async def ignored_by(self, user_id: str) -> Set[str]:
|
||||
"""
|
||||
Get users which ignore the given user.
|
||||
|
||||
try:
|
||||
return ignored_user_id in ignored_account_data.get("ignored_users", {})
|
||||
except TypeError:
|
||||
# The type of the ignored_users field is invalid.
|
||||
return False
|
||||
Params:
|
||||
user_id: The user ID which might be ignored.
|
||||
|
||||
Return:
|
||||
The user IDs which ignore the given user.
|
||||
"""
|
||||
return set(
|
||||
await self.db_pool.simple_select_onecol(
|
||||
table="ignored_users",
|
||||
keyvalues={"ignored_user_id": user_id},
|
||||
retcol="ignorer_user_id",
|
||||
desc="ignored_by",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class AccountDataStore(AccountDataWorkerStore):
|
||||
|
@ -390,18 +392,14 @@ class AccountDataStore(AccountDataWorkerStore):
|
|||
Returns:
|
||||
The maximum stream ID.
|
||||
"""
|
||||
content_json = json_encoder.encode(content)
|
||||
|
||||
async with self._account_data_id_gen.get_next() as next_id:
|
||||
# no need to lock here as account_data has a unique constraint on
|
||||
# (user_id, account_data_type) so simple_upsert will retry if
|
||||
# there is a conflict.
|
||||
await self.db_pool.simple_upsert(
|
||||
desc="add_user_account_data",
|
||||
table="account_data",
|
||||
keyvalues={"user_id": user_id, "account_data_type": account_data_type},
|
||||
values={"stream_id": next_id, "content": content_json},
|
||||
lock=False,
|
||||
await self.db_pool.runInteraction(
|
||||
"add_user_account_data",
|
||||
self._add_account_data_for_user,
|
||||
next_id,
|
||||
user_id,
|
||||
account_data_type,
|
||||
content,
|
||||
)
|
||||
|
||||
# it's theoretically possible for the above to succeed and the
|
||||
|
@ -424,6 +422,71 @@ class AccountDataStore(AccountDataWorkerStore):
|
|||
|
||||
return self._account_data_id_gen.get_current_token()
|
||||
|
||||
def _add_account_data_for_user(
|
||||
self,
|
||||
txn,
|
||||
next_id: int,
|
||||
user_id: str,
|
||||
account_data_type: str,
|
||||
content: JsonDict,
|
||||
) -> None:
|
||||
content_json = json_encoder.encode(content)
|
||||
|
||||
# no need to lock here as account_data has a unique constraint on
|
||||
# (user_id, account_data_type) so simple_upsert will retry if
|
||||
# there is a conflict.
|
||||
self.db_pool.simple_upsert_txn(
|
||||
txn,
|
||||
table="account_data",
|
||||
keyvalues={"user_id": user_id, "account_data_type": account_data_type},
|
||||
values={"stream_id": next_id, "content": content_json},
|
||||
lock=False,
|
||||
)
|
||||
|
||||
# Ignored users get denormalized into a separate table as an optimisation.
|
||||
if account_data_type != AccountDataTypes.IGNORED_USER_LIST:
|
||||
return
|
||||
|
||||
# Insert / delete to sync the list of ignored users.
|
||||
previously_ignored_users = set(
|
||||
self.db_pool.simple_select_onecol_txn(
|
||||
txn,
|
||||
table="ignored_users",
|
||||
keyvalues={"ignorer_user_id": user_id},
|
||||
retcol="ignored_user_id",
|
||||
)
|
||||
)
|
||||
|
||||
# If the data is invalid, no one is ignored.
|
||||
ignored_users_content = content.get("ignored_users", {})
|
||||
if isinstance(ignored_users_content, dict):
|
||||
currently_ignored_users = set(ignored_users_content)
|
||||
else:
|
||||
currently_ignored_users = set()
|
||||
|
||||
# Delete entries which are no longer ignored.
|
||||
self.db_pool.simple_delete_many_txn(
|
||||
txn,
|
||||
table="ignored_users",
|
||||
column="ignored_user_id",
|
||||
iterable=previously_ignored_users - currently_ignored_users,
|
||||
keyvalues={"ignorer_user_id": user_id},
|
||||
)
|
||||
|
||||
# Add entries which are newly ignored.
|
||||
self.db_pool.simple_insert_many_txn(
|
||||
txn,
|
||||
table="ignored_users",
|
||||
values=[
|
||||
{"ignorer_user_id": user_id, "ignored_user_id": u}
|
||||
for u in currently_ignored_users - previously_ignored_users
|
||||
],
|
||||
)
|
||||
|
||||
# Invalidate the cache for any ignored users which were added or removed.
|
||||
for ignored_user_id in previously_ignored_users ^ currently_ignored_users:
|
||||
self._invalidate_cache_and_stream(txn, self.ignored_by, (ignored_user_id,))
|
||||
|
||||
async def _update_max_stream_id(self, next_id: int) -> None:
|
||||
"""Update the max stream_id
|
||||
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
This migration denormalises the account_data table into an ignored users table.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from io import StringIO
|
||||
|
||||
from synapse.storage._base import db_to_json
|
||||
from synapse.storage.engines import BaseDatabaseEngine
|
||||
from synapse.storage.prepare_database import execute_statements_from_stream
|
||||
from synapse.storage.types import Cursor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_upgrade(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
def run_create(cur: Cursor, database_engine: BaseDatabaseEngine, *args, **kwargs):
|
||||
logger.info("Creating ignored_users table")
|
||||
execute_statements_from_stream(cur, StringIO(_create_commands))
|
||||
|
||||
# We now upgrade existing data, if any. We don't do this in `run_upgrade` as
|
||||
# we a) want to run these before adding constraints and b) `run_upgrade` is
|
||||
# not run on empty databases.
|
||||
insert_sql = """
|
||||
INSERT INTO ignored_users (ignorer_user_id, ignored_user_id) VALUES (?, ?)
|
||||
"""
|
||||
|
||||
logger.info("Converting existing ignore lists")
|
||||
cur.execute(
|
||||
"SELECT user_id, content FROM account_data WHERE account_data_type = 'm.ignored_user_list'"
|
||||
)
|
||||
for user_id, content_json in cur.fetchall():
|
||||
content = db_to_json(content_json)
|
||||
|
||||
# The content should be the form of a dictionary with a key
|
||||
# "ignored_users" pointing to a dictionary with keys of ignored users.
|
||||
#
|
||||
# { "ignored_users": "@someone:example.org": {} }
|
||||
ignored_users = content.get("ignored_users", {})
|
||||
if isinstance(ignored_users, dict) and ignored_users:
|
||||
cur.executemany(insert_sql, [(user_id, u) for u in ignored_users])
|
||||
|
||||
# Add indexes after inserting data for efficiency.
|
||||
logger.info("Adding constraints to ignored_users table")
|
||||
execute_statements_from_stream(cur, StringIO(_constraints_commands))
|
||||
|
||||
|
||||
# there might be duplicates, so the easiest way to achieve this is to create a new
|
||||
# table with the right data, and renaming it into place
|
||||
|
||||
_create_commands = """
|
||||
-- Users which are ignored when calculating push notifications. This data is
|
||||
-- denormalized from account data.
|
||||
CREATE TABLE IF NOT EXISTS ignored_users(
|
||||
ignorer_user_id TEXT NOT NULL, -- The user ID of the user who is ignoring another user. (This is a local user.)
|
||||
ignored_user_id TEXT NOT NULL -- The user ID of the user who is being ignored. (This is a local or remote user.)
|
||||
);
|
||||
"""
|
||||
|
||||
_constraints_commands = """
|
||||
CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id);
|
||||
|
||||
-- Add an index on ignored_users since look-ups are done to get all ignorers of an ignored user.
|
||||
CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id);
|
||||
"""
|
|
@ -38,7 +38,7 @@ logger = logging.getLogger(__name__)
|
|||
# XXX: If you're about to bump this to 59 (or higher) please create an update
|
||||
# that drops the unused `cache_invalidation_stream` table, as per #7436!
|
||||
# XXX: Also add an update to drop `account_data_max_stream_id` as per #7656!
|
||||
SCHEMA_VERSION = 58
|
||||
SCHEMA_VERSION = 59
|
||||
|
||||
dir_path = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
|
|
@ -0,0 +1,120 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2021 The Matrix.org Foundation C.I.C.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Iterable, Set
|
||||
|
||||
from synapse.api.constants import AccountDataTypes
|
||||
|
||||
from tests import unittest
|
||||
|
||||
|
||||
class IgnoredUsersTestCase(unittest.HomeserverTestCase):
|
||||
def prepare(self, hs, reactor, clock):
|
||||
self.store = self.hs.get_datastore()
|
||||
self.user = "@user:test"
|
||||
|
||||
def _update_ignore_list(
|
||||
self, *ignored_user_ids: Iterable[str], ignorer_user_id: str = None
|
||||
) -> None:
|
||||
"""Update the account data to block the given users."""
|
||||
if ignorer_user_id is None:
|
||||
ignorer_user_id = self.user
|
||||
|
||||
self.get_success(
|
||||
self.store.add_account_data_for_user(
|
||||
ignorer_user_id,
|
||||
AccountDataTypes.IGNORED_USER_LIST,
|
||||
{"ignored_users": {u: {} for u in ignored_user_ids}},
|
||||
)
|
||||
)
|
||||
|
||||
def assert_ignorers(
|
||||
self, ignored_user_id: str, expected_ignorer_user_ids: Set[str]
|
||||
) -> None:
|
||||
self.assertEqual(
|
||||
self.get_success(self.store.ignored_by(ignored_user_id)),
|
||||
expected_ignorer_user_ids,
|
||||
)
|
||||
|
||||
def test_ignoring_users(self):
|
||||
"""Basic adding/removing of users from the ignore list."""
|
||||
self._update_ignore_list("@other:test", "@another:remote")
|
||||
|
||||
# Check a user which no one ignores.
|
||||
self.assert_ignorers("@user:test", set())
|
||||
|
||||
# Check a local user which is ignored.
|
||||
self.assert_ignorers("@other:test", {self.user})
|
||||
|
||||
# Check a remote user which is ignored.
|
||||
self.assert_ignorers("@another:remote", {self.user})
|
||||
|
||||
# Add one user, remove one user, and leave one user.
|
||||
self._update_ignore_list("@foo:test", "@another:remote")
|
||||
|
||||
# Check the removed user.
|
||||
self.assert_ignorers("@other:test", set())
|
||||
|
||||
# Check the added user.
|
||||
self.assert_ignorers("@foo:test", {self.user})
|
||||
|
||||
# Check the removed user.
|
||||
self.assert_ignorers("@another:remote", {self.user})
|
||||
|
||||
def test_caching(self):
|
||||
"""Ensure that caching works properly between different users."""
|
||||
# The first user ignores a user.
|
||||
self._update_ignore_list("@other:test")
|
||||
self.assert_ignorers("@other:test", {self.user})
|
||||
|
||||
# The second user ignores them.
|
||||
self._update_ignore_list("@other:test", ignorer_user_id="@second:test")
|
||||
self.assert_ignorers("@other:test", {self.user, "@second:test"})
|
||||
|
||||
# The first user un-ignores them.
|
||||
self._update_ignore_list()
|
||||
self.assert_ignorers("@other:test", {"@second:test"})
|
||||
|
||||
def test_invalid_data(self):
|
||||
"""Invalid data ends up clearing out the ignored users list."""
|
||||
# Add some data and ensure it is there.
|
||||
self._update_ignore_list("@other:test")
|
||||
self.assert_ignorers("@other:test", {self.user})
|
||||
|
||||
# No ignored_users key.
|
||||
self.get_success(
|
||||
self.store.add_account_data_for_user(
|
||||
self.user, AccountDataTypes.IGNORED_USER_LIST, {},
|
||||
)
|
||||
)
|
||||
|
||||
# No one ignores the user now.
|
||||
self.assert_ignorers("@other:test", set())
|
||||
|
||||
# Add some data and ensure it is there.
|
||||
self._update_ignore_list("@other:test")
|
||||
self.assert_ignorers("@other:test", {self.user})
|
||||
|
||||
# Invalid data.
|
||||
self.get_success(
|
||||
self.store.add_account_data_for_user(
|
||||
self.user,
|
||||
AccountDataTypes.IGNORED_USER_LIST,
|
||||
{"ignored_users": "unexpected"},
|
||||
)
|
||||
)
|
||||
|
||||
# No one ignores the user now.
|
||||
self.assert_ignorers("@other:test", set())
|
Loading…
Reference in New Issue