From 82c8799ec7f6676555033f5d804cbed443a1ea3e Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Sat, 19 Oct 2019 09:06:15 +0100 Subject: [PATCH 01/85] Set room version default to 5 --- changelog.d/6220.feature | 1 + docs/sample_config.yaml | 2 +- synapse/config/server.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6220.feature diff --git a/changelog.d/6220.feature b/changelog.d/6220.feature new file mode 100644 index 0000000000..8343e9912b --- /dev/null +++ b/changelog.d/6220.feature @@ -0,0 +1 @@ +Increase default room version from 4 to 5, thereby enforcing server key validity period checks. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 8226978ba6..af3ca0f722 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -72,7 +72,7 @@ pid_file: DATADIR/homeserver.pid # For example, for room version 1, default_room_version should be set # to "1". # -#default_room_version: "4" +#default_room_version: "5" # The GC threshold parameters to pass to `gc.set_threshold`, if defined # diff --git a/synapse/config/server.py b/synapse/config/server.py index afc4d6a4ab..26e6d84c09 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -41,7 +41,7 @@ logger = logging.Logger(__name__) # in the list. DEFAULT_BIND_ADDRESSES = ["::", "0.0.0.0"] -DEFAULT_ROOM_VERSION = "4" +DEFAULT_ROOM_VERSION = "5" ROOM_COMPLEXITY_TOO_GREAT = ( "Your homeserver is unable to join rooms this large or complex. " From 7c8c97e635811609c5a7ae4c0bb94e6573c30753 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 30 Oct 2019 15:12:49 +0000 Subject: [PATCH 02/85] Split purge API into events vs state --- synapse/handlers/pagination.py | 7 +- synapse/storage/__init__.py | 2 + synapse/storage/data_stores/main/events.py | 328 ++++++++++----------- synapse/storage/data_stores/main/state.py | 23 ++ synapse/storage/purge_events.py | 117 ++++++++ tests/storage/test_purge.py | 15 +- 6 files changed, 308 insertions(+), 184 deletions(-) create mode 100644 synapse/storage/purge_events.py diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 5744f4579d..9088ba14cd 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -69,6 +69,7 @@ class PaginationHandler(object): self.hs = hs self.auth = hs.get_auth() self.store = hs.get_datastore() + self.storage = hs.get_storage() self.clock = hs.get_clock() self._server_name = hs.hostname @@ -125,7 +126,9 @@ class PaginationHandler(object): self._purges_in_progress_by_room.add(room_id) try: with (yield self.pagination_lock.write(room_id)): - yield self.store.purge_history(room_id, token, delete_local_events) + yield self.storage.purge_events.purge_history( + room_id, token, delete_local_events + ) logger.info("[purge] complete") self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE except Exception: @@ -168,7 +171,7 @@ class PaginationHandler(object): if joined: raise SynapseError(400, "Users are still joined to this room") - await self.store.purge_room(room_id) + await self.storage.purge_events.purge_room(room_id) @defer.inlineCallbacks def get_messages( diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index a6429d17ed..3646ebd007 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -30,6 +30,7 @@ stored in `synapse.storage.schema`. from synapse.storage.data_stores import DataStores from synapse.storage.data_stores.main import DataStore from synapse.storage.persist_events import EventsPersistenceStorage +from synapse.storage.purge_events import PurgeEventsStorage __all__ = ["DataStores", "DataStore"] @@ -45,6 +46,7 @@ class Storage(object): self.main = stores.main self.persistence = EventsPersistenceStorage(hs, stores) + self.purge_events = PurgeEventsStorage(hs, stores) def are_all_users_on_domain(txn, database_engine, domain): diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 7c3607f308..4eacba8058 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1368,6 +1368,10 @@ class EventsStore( if True, we will delete local events as well as remote ones (instead of just marking them as outliers and deleting their state groups). + + Returns: + Deferred[set[int]]: The set of state groups that reference deleted + events. """ return self.runInteraction( @@ -1521,60 +1525,6 @@ class EventsStore( "[purge] found %i referenced state groups", len(referenced_state_groups) ) - logger.info("[purge] finding state groups that can be deleted") - - _ = self._find_unreferenced_groups_during_purge(txn, referenced_state_groups) - state_groups_to_delete, remaining_state_groups = _ - - logger.info( - "[purge] found %i state groups to delete", len(state_groups_to_delete) - ) - - logger.info( - "[purge] de-delta-ing %i remaining state groups", - len(remaining_state_groups), - ) - - # Now we turn the state groups that reference to-be-deleted state - # groups to non delta versions. - for sg in remaining_state_groups: - logger.info("[purge] de-delta-ing remaining state group %s", sg) - curr_state = self._get_state_groups_from_groups_txn(txn, [sg]) - curr_state = curr_state[sg] - - self._simple_delete_txn( - txn, table="state_groups_state", keyvalues={"state_group": sg} - ) - - self._simple_delete_txn( - txn, table="state_group_edges", keyvalues={"state_group": sg} - ) - - self._simple_insert_many_txn( - txn, - table="state_groups_state", - values=[ - { - "state_group": sg, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } - for key, state_id in iteritems(curr_state) - ], - ) - - logger.info("[purge] removing redundant state groups") - txn.executemany( - "DELETE FROM state_groups_state WHERE state_group = ?", - ((sg,) for sg in state_groups_to_delete), - ) - txn.executemany( - "DELETE FROM state_groups WHERE id = ?", - ((sg,) for sg in state_groups_to_delete), - ) - logger.info("[purge] removing events from event_to_state_groups") txn.execute( "DELETE FROM event_to_state_groups " @@ -1661,87 +1611,7 @@ class EventsStore( logger.info("[purge] done") - def _find_unreferenced_groups_during_purge(self, txn, state_groups): - """Used when purging history to figure out which state groups can be - deleted and which need to be de-delta'ed (due to one of its prev groups - being scheduled for deletion). - - Args: - txn - state_groups (set[int]): Set of state groups referenced by events - that are going to be deleted. - - Returns: - tuple[set[int], set[int]]: The set of state groups that can be - deleted and the set of state groups that need to be de-delta'ed - """ - # Graph of state group -> previous group - graph = {} - - # Set of events that we have found to be referenced by events - referenced_groups = set() - - # Set of state groups we've already seen - state_groups_seen = set(state_groups) - - # Set of state groups to handle next. - next_to_search = set(state_groups) - while next_to_search: - # We bound size of groups we're looking up at once, to stop the - # SQL query getting too big - if len(next_to_search) < 100: - current_search = next_to_search - next_to_search = set() - else: - current_search = set(itertools.islice(next_to_search, 100)) - next_to_search -= current_search - - # Check if state groups are referenced - sql = """ - SELECT DISTINCT state_group FROM event_to_state_groups - LEFT JOIN events_to_purge AS ep USING (event_id) - WHERE ep.event_id IS NULL AND - """ - clause, args = make_in_list_sql_clause( - txn.database_engine, "state_group", current_search - ) - txn.execute(sql + clause, list(args)) - - referenced = set(sg for sg, in txn) - referenced_groups |= referenced - - # We don't continue iterating up the state group graphs for state - # groups that are referenced. - current_search -= referenced - - rows = self._simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=current_search, - keyvalues={}, - retcols=("prev_state_group", "state_group"), - ) - - prevs = set(row["state_group"] for row in rows) - # We don't bother re-handling groups we've already seen - prevs -= state_groups_seen - next_to_search |= prevs - state_groups_seen |= prevs - - for row in rows: - # Note: Each state group can have at most one prev group - graph[row["state_group"]] = row["prev_state_group"] - - to_delete = state_groups_seen - referenced_groups - - to_dedelta = set() - for sg in referenced_groups: - prev_sg = graph.get(sg) - if prev_sg and prev_sg in to_delete: - to_dedelta.add(sg) - - return to_delete, to_dedelta + return referenced_state_groups def purge_room(self, room_id): """Deletes all record of a room @@ -1753,46 +1623,7 @@ class EventsStore( return self.runInteraction("purge_room", self._purge_room_txn, room_id) def _purge_room_txn(self, txn, room_id): - # first we have to delete the state groups states - logger.info("[purge] removing %s from state_groups_state", room_id) - - txn.execute( - """ - DELETE FROM state_groups_state WHERE state_group IN ( - SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) - WHERE events.room_id=? - ) - """, - (room_id,), - ) - - # ... and the state group edges - logger.info("[purge] removing %s from state_group_edges", room_id) - - txn.execute( - """ - DELETE FROM state_group_edges WHERE state_group IN ( - SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) - WHERE events.room_id=? - ) - """, - (room_id,), - ) - - # ... and the state groups - logger.info("[purge] removing %s from state_groups", room_id) - - txn.execute( - """ - DELETE FROM state_groups WHERE id IN ( - SELECT state_group FROM events JOIN event_to_state_groups USING(event_id) - WHERE events.room_id=? - ) - """, - (room_id,), - ) - - # and then tables which lack an index on room_id but have one on event_id + # First delete tables which lack an index on room_id but have one on event_id for table in ( "event_auth", "event_edges", @@ -1881,6 +1712,153 @@ class EventsStore( logger.info("[purge] done") + def purge_unreferenced_state_groups(self, room_id, state_groups_to_delete): + """Deletes no longer referenced state groups and de-deltas any state + groups that reference them. + """ + + return self.runInteraction( + "purge_unreferenced_state_groups", + self._purge_unreferenced_state_groups, + room_id, + state_groups_to_delete, + ) + + def _purge_unreferenced_state_groups(self, txn, room_id, state_groups_to_delete): + logger.info( + "[purge] found %i state groups to delete", len(state_groups_to_delete) + ) + + rows = self._simple_select_many_txn( + txn, + table="state_group_edges", + column="prev_state_group", + iterable=state_groups_to_delete, + keyvalues={}, + retcols=("state_group",), + ) + + remaining_state_groups = set( + row["state_group"] + for row in rows + if row["state_group"] not in state_groups_to_delete + ) + + logger.info( + "[purge] de-delta-ing %i remaining state groups", + len(remaining_state_groups), + ) + + # Now we turn the state groups that reference to-be-deleted state + # groups to non delta versions. + for sg in remaining_state_groups: + logger.info("[purge] de-delta-ing remaining state group %s", sg) + curr_state = self._get_state_groups_from_groups_txn(txn, [sg]) + curr_state = curr_state[sg] + + self._simple_delete_txn( + txn, table="state_groups_state", keyvalues={"state_group": sg} + ) + + self._simple_delete_txn( + txn, table="state_group_edges", keyvalues={"state_group": sg} + ) + + self._simple_insert_many_txn( + txn, + table="state_groups_state", + values=[ + { + "state_group": sg, + "room_id": room_id, + "type": key[0], + "state_key": key[1], + "event_id": state_id, + } + for key, state_id in iteritems(curr_state) + ], + ) + + logger.info("[purge] removing redundant state groups") + txn.executemany( + "DELETE FROM state_groups_state WHERE state_group = ?", + ((sg,) for sg in state_groups_to_delete), + ) + txn.executemany( + "DELETE FROM state_groups WHERE id = ?", + ((sg,) for sg in state_groups_to_delete), + ) + + @defer.inlineCallbacks + def get_previous_state_groups(self, state_groups): + """Fetch the previous groups of the given state groups. + + Args: + state_groups (Iterable[int]) + + Returns: + Deferred[dict[int, int]]: mapping from state group to previous + state group. + """ + + rows = yield self._simple_select_many_batch( + table="state_group_edges", + column="prev_state_group", + iterable=state_groups, + keyvalues={}, + retcols=("prev_state_group", "state_group"), + desc="get_previous_state_groups", + ) + + return {row["state_group"]: row["prev_state_group"] for row in rows} + + def purge_room_state(self, room_id): + """Deletes all record of a room from state tables + + Args: + room_id (str): + """ + + return self.runInteraction( + "purge_room_state", self._purge_room_state_txn, room_id + ) + + def _purge_room_state_txn(self, txn, room_id): + # first we have to delete the state groups states + logger.info("[purge] removing %s from state_groups_state", room_id) + + txn.execute( + """ + DELETE FROM state_groups_state + INNER JOIN state_groups USING (event_id) + WHEREE state_groups.room_id = ? + """, + (room_id,), + ) + + # ... and the state group edges + logger.info("[purge] removing %s from state_group_edges", room_id) + + txn.execute( + """ + DELETE FROM state_group_edges + INNER JOIN state_groups USING (event_id) + WHEREE state_groups.room_id = ? + ) + """, + (room_id,), + ) + + # ... and the state groups + logger.info("[purge] removing %s from state_groups", room_id) + + txn.execute( + """ + DELETE FROM state_groups WHEREE room_id = ? + """, + (room_id,), + ) + async def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream """ diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 9b2207075b..36be8f0a9d 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -989,6 +989,29 @@ class StateGroupWorkerStore( return self.runInteraction("store_state_group", _store_state_group_txn) + @defer.inlineCallbacks + def get_referenced_state_groups(self, state_groups): + """Check if the state groups are referenced by events. + + Args: + state_groups (Iterable[int]) + + Returns: + Deferred[set[int]]: The subset of state groups that are + referenced. + """ + + rows = yield self._simple_select_many_batch( + table="event_to_state_groups", + column="state_group", + iterable=state_groups, + keyvalues={}, + retcols=("DISTINCT state_group",), + desc="get_referenced_state_groups", + ) + + return set(row["state_group"] for row in rows) + class StateBackgroundUpdateStore( StateGroupBackgroundUpdateStore, BackgroundUpdateStore diff --git a/synapse/storage/purge_events.py b/synapse/storage/purge_events.py new file mode 100644 index 0000000000..dd45df0c88 --- /dev/null +++ b/synapse/storage/purge_events.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import itertools +import logging + +from twisted.internet import defer + +logger = logging.getLogger(__name__) + + +class PurgeEventsStorage(object): + """High level interface for purging rooms and event history. + """ + + def __init__(self, hs, stores): + self.stores = stores + + @defer.inlineCallbacks + def purge_room(self, room_id: str): + """Deletes all record of a room + """ + + yield self.stores.main.purge_room(room_id) + yield self.stores.main.purge_room_state(room_id) + + @defer.inlineCallbacks + def purge_history(self, room_id, token, delete_local_events): + """Deletes room history before a certain point + + Args: + room_id (str): + + token (str): A topological token to delete events before + + delete_local_events (bool): + if True, we will delete local events as well as remote ones + (instead of just marking them as outliers and deleting their + state groups). + """ + state_groups = yield self.stores.main.purge_history( + room_id, token, delete_local_events + ) + + logger.info("[purge] finding state groups that can be deleted") + + sg_to_delete = yield self._find_unreferenced_groups(state_groups) + + yield self.stores.main.purge_unreferenced_state_groups(room_id, sg_to_delete) + + @defer.inlineCallbacks + def _find_unreferenced_groups(self, state_groups): + """Used when purging history to figure out which state groups can be + deleted. + + Args: + state_groups (set[int]): Set of state groups referenced by events + that are going to be deleted. + + Returns: + Deferred[set[int]] The set of state groups that can be deleted. + """ + # Graph of state group -> previous group + graph = {} + + # Set of events that we have found to be referenced by events + referenced_groups = set() + + # Set of state groups we've already seen + state_groups_seen = set(state_groups) + + # Set of state groups to handle next. + next_to_search = set(state_groups) + while next_to_search: + # We bound size of groups we're looking up at once, to stop the + # SQL query getting too big + if len(next_to_search) < 100: + current_search = next_to_search + next_to_search = set() + else: + current_search = set(itertools.islice(next_to_search, 100)) + next_to_search -= current_search + + referenced = yield self.stores.main.get_referenced_state_groups( + current_search + ) + referenced_groups |= referenced + + # We don't continue iterating up the state group graphs for state + # groups that are referenced. + current_search -= referenced + + edges = yield self.stores.main.get_previous_state_groups(current_search) + + prevs = set(edges.values()) + # We don't bother re-handling groups we've already seen + prevs -= state_groups_seen + next_to_search |= prevs + state_groups_seen |= prevs + + graph.update(edges) + + to_delete = state_groups_seen - referenced_groups + + return to_delete diff --git a/tests/storage/test_purge.py b/tests/storage/test_purge.py index f671599cb8..b9fafaa1a6 100644 --- a/tests/storage/test_purge.py +++ b/tests/storage/test_purge.py @@ -40,23 +40,24 @@ class PurgeTests(HomeserverTestCase): third = self.helper.send(self.room_id, body="test3") last = self.helper.send(self.room_id, body="test4") - storage = self.hs.get_datastore() + store = self.hs.get_datastore() + storage = self.hs.get_storage() # Get the topological token - event = storage.get_topological_token_for_event(last["event_id"]) + event = store.get_topological_token_for_event(last["event_id"]) self.pump() event = self.successResultOf(event) # Purge everything before this topological token - purge = storage.purge_history(self.room_id, event, True) + purge = storage.purge_events.purge_history(self.room_id, event, True) self.pump() self.assertEqual(self.successResultOf(purge), None) # Try and get the events - get_first = storage.get_event(first["event_id"]) - get_second = storage.get_event(second["event_id"]) - get_third = storage.get_event(third["event_id"]) - get_last = storage.get_event(last["event_id"]) + get_first = store.get_event(first["event_id"]) + get_second = store.get_event(second["event_id"]) + get_third = store.get_event(third["event_id"]) + get_last = store.get_event(last["event_id"]) self.pump() # 1-3 should fail and last will succeed, meaning that 1-3 are deleted From ecfba89a784db12b48074ade3a44092267ba9cf7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 30 Oct 2019 15:14:29 +0000 Subject: [PATCH 03/85] Newsfile --- changelog.d/6295.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6295.misc diff --git a/changelog.d/6295.misc b/changelog.d/6295.misc new file mode 100644 index 0000000000..a3e6b8296e --- /dev/null +++ b/changelog.d/6295.misc @@ -0,0 +1 @@ +Split out state storage into separate data store. From 8f5bbdb987c92ee3e9b4170cc8ce296d87b1555d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:22:08 +0000 Subject: [PATCH 04/85] Fix purge room API --- synapse/storage/data_stores/main/events.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 63b09a09e8..a904a71570 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1829,8 +1829,10 @@ class EventsStore( txn.execute( """ DELETE FROM state_groups_state - INNER JOIN state_groups USING (event_id) - WHEREE state_groups.room_id = ? + WHERE state_group IN ( + SELECT state_group FROM state_groups + WHERE room_id = ? + ) """, (room_id,), ) @@ -1841,8 +1843,9 @@ class EventsStore( txn.execute( """ DELETE FROM state_group_edges - INNER JOIN state_groups USING (event_id) - WHEREE state_groups.room_id = ? + WHERE state_group IN ( + SELECT state_group FROM state_groups + WHERE room_id = ? ) """, (room_id,), @@ -1853,7 +1856,7 @@ class EventsStore( txn.execute( """ - DELETE FROM state_groups WHEREE room_id = ? + DELETE FROM state_groups WHERE room_id = ? """, (room_id,), ) From f91f2a1f92ee2eb5758184ef0df66490592587d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:25:21 +0000 Subject: [PATCH 05/85] Docstrings --- synapse/storage/data_stores/main/events.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index a904a71570..108b2e8bd5 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -19,6 +19,7 @@ import itertools import logging from collections import Counter as c_counter, OrderedDict, namedtuple from functools import wraps +from typing import Set from six import iteritems, text_type from six.moves import range @@ -1370,8 +1371,8 @@ class EventsStore( state groups). Returns: - Deferred[set[int]]: The set of state groups that reference deleted - events. + Deferred[set[int]]: The set of state groups that are referenced by + deleted events. """ return self.runInteraction( @@ -1711,9 +1712,16 @@ class EventsStore( logger.info("[purge] done") - def purge_unreferenced_state_groups(self, room_id, state_groups_to_delete): + def purge_unreferenced_state_groups( + self, room_id: str, state_groups_to_delete: Set[int] + ) -> defer.Deferred: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. + + Args: + room_id: The room the state groups belong to (must all be in the + same room). + state_groups_to_delete: Set of all state groups to delete. """ return self.runInteraction( From 61be1a29262a9a3553537f257a4187ef355684f5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:39:26 +0000 Subject: [PATCH 06/85] Add state_groups.room_id index --- .../schema/delta/56/state_group_room_idx.sql | 17 +++++++++++++++++ synapse/storage/data_stores/main/state.py | 7 +++++++ 2 files changed, 24 insertions(+) create mode 100644 synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql diff --git a/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql new file mode 100644 index 0000000000..7916ef18b2 --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('state_groups_room_id_idx', '{}'); diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 36be8f0a9d..bf6de4ca22 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -1021,6 +1021,7 @@ class StateBackgroundUpdateStore( STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" + STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" def __init__(self, db_conn, hs): super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) @@ -1044,6 +1045,12 @@ class StateBackgroundUpdateStore( table="event_to_state_groups", columns=["state_group"], ) + self.register_background_index_update( + self.STATE_GROUPS_ROOM_INDEX_UPDATE_NAME, + index_name="state_groups_room_id_idx", + table="state_groups", + columns=["room_id"], + ) @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): From fb1a6914cf953ed237235274a9aab62aafec5b4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 31 Oct 2019 15:45:48 +0000 Subject: [PATCH 07/85] Update log line to lie a little less --- synapse/storage/data_stores/main/events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 108b2e8bd5..3a9b6bed45 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1509,7 +1509,7 @@ class EventsStore( [(room_id, event_id) for event_id, in new_backwards_extrems], ) - logger.info("[purge] finding redundant state groups") + logger.info("[purge] finding state groups referenced by deleted events") # Get all state groups that are referenced by events that are to be # deleted. We then go and check if they are referenced by other events From 669b6cbda3e545f277def545954948090aec8980 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 1 Nov 2019 11:32:20 +0000 Subject: [PATCH 08/85] Fix up comment --- synapse/storage/data_stores/main/events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 3a9b6bed45..a71d7346d2 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1512,8 +1512,7 @@ class EventsStore( logger.info("[purge] finding state groups referenced by deleted events") # Get all state groups that are referenced by events that are to be - # deleted. We then go and check if they are referenced by other events - # or state groups, and if not we delete them. + # deleted. txn.execute( """ SELECT DISTINCT state_group FROM events_to_purge From c9a1b80a74863b98b5dce2954b8486a068a0151f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:41:28 +0000 Subject: [PATCH 09/85] MSC2326: Add background update to take previous events into account --- .../data_stores/main/events_bg_updates.py | 55 +++++++++++++++++++ .../56/event_labels_background_update.sql | 17 ++++++ 2 files changed, 72 insertions(+) create mode 100644 synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 51352b9966..e702fca149 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -21,6 +21,7 @@ from canonicaljson import json from twisted.internet import defer +from synapse.api.constants import LabelsField from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore @@ -85,6 +86,10 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "event_fix_redactions_bytes", self._event_fix_redactions_bytes ) + self.register_background_update_handler( + "event_store_labels", self._event_store_labels + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -503,3 +508,53 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): yield self._end_background_update("event_fix_redactions_bytes") return 1 + + @defer.inlineCallbacks + def _event_store_labels(self, progress, batch_size): + """Stores labels for events.""" + last_event_id = progress.get("last_event_id", "") + + def _event_store_labels_txn(txn): + txn.execute( + """ + SELECT event_id, json FROM event_json + WHERE event_id > ? + LIMIT ? + """, + (last_event_id, batch_size) + ) + + rows = txn.fetchall() + if not rows: + return True + + for row in rows: + event_id = row["event_id"] + event_json = json.loads(row["json"]) + + self._simple_insert_many_txn( + txn=txn, + table="event_labels", + values=[ + { + "event_id": event_id, + "label": label, + } + for label in event_json["content"].get(LabelsField) + ] + ) + + self._background_update_progress_txn( + txn, "event_store_labels", {"last_event_id": event_id} + ) + + return len(rows) == batch_size + + end = yield self.runInteraction( + desc="event_store_labels", func=_event_store_labels_txn + ) + + if end: + yield self._end_background_update("event_store_labels") + + return batch_size diff --git a/synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql b/synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql new file mode 100644 index 0000000000..5f5e0499ae --- /dev/null +++ b/synapse/storage/data_stores/main/schema/delta/56/event_labels_background_update.sql @@ -0,0 +1,17 @@ +/* Copyright 2019 The Matrix.org Foundation C.I.C. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('event_store_labels', '{}'); From a46574281d90d45e4a5f3ecede3261d17726719e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:46:16 +0000 Subject: [PATCH 10/85] Use the right format for rows --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index e702fca149..013242b04e 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -524,7 +524,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size) ) - rows = txn.fetchall() + rows = self.cursor_to_dict(txn) if not rows: return True From 07cb38e965a29624b3cc8a07d4c86f61dbe7b72a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:47:09 +0000 Subject: [PATCH 11/85] Use a sensible default value for labels --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 013242b04e..448048e11d 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -540,7 +540,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "event_id": event_id, "label": label, } - for label in event_json["content"].get(LabelsField) + for label in event_json["content"].get(LabelsField, []) ] ) From 911b03ca312160bd8f80f010da4eaf63d7f602da Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 14:49:41 +0000 Subject: [PATCH 12/85] Don't try to process events we already have a label for --- synapse/storage/data_stores/main/events_bg_updates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 448048e11d..c8168f6926 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -518,7 +518,8 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn.execute( """ SELECT event_id, json FROM event_json - WHERE event_id > ? + LEFT JOIN event_labels USING (event_id) + WHERE event_id > ? AND label IS NULL LIMIT ? """, (last_event_id, batch_size) From 416c7baee64e0d7145c2447c51326af1c901a176 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 15:00:40 +0000 Subject: [PATCH 13/85] Changelog --- changelog.d/6310.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6310.feature diff --git a/changelog.d/6310.feature b/changelog.d/6310.feature new file mode 100644 index 0000000000..b7ff3fad3b --- /dev/null +++ b/changelog.d/6310.feature @@ -0,0 +1 @@ +Implement label-based filtering. From 1c1268245d501e3edefbe3a4eb40c238124bb2e6 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 15:01:29 +0000 Subject: [PATCH 14/85] Lint --- synapse/storage/data_stores/main/events_bg_updates.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index c8168f6926..10ebc6d865 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -522,7 +522,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): WHERE event_id > ? AND label IS NULL LIMIT ? """, - (last_event_id, batch_size) + (last_event_id, batch_size), ) rows = self.cursor_to_dict(txn) @@ -537,12 +537,9 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn=txn, table="event_labels", values=[ - { - "event_id": event_id, - "label": label, - } + {"event_id": event_id, "label": label} for label in event_json["content"].get(LabelsField, []) - ] + ], ) self._background_update_progress_txn( From 1586f2c7e716fb066aa5551bdce654be8e35b01d Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 23:01:26 +0000 Subject: [PATCH 15/85] Fix exit condition --- synapse/storage/data_stores/main/events_bg_updates.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 10ebc6d865..72d600c51b 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -546,7 +546,9 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn, "event_store_labels", {"last_event_id": event_id} ) - return len(rows) == batch_size + # We want to return true (to end the background update) only when + # the query returned with less rows than we asked for. + return len(rows) != batch_size end = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn From 49008e674f6c5463168e970349ce84c488407834 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 31 Oct 2019 23:12:15 +0000 Subject: [PATCH 16/85] TODO --- synapse/storage/data_stores/main/events_bg_updates.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 72d600c51b..b7b390485b 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -548,6 +548,8 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): # We want to return true (to end the background update) only when # the query returned with less rows than we asked for. + # TODO: this currently doesn't work, i.e. it only runs once whereas + # its opposite does the whole thing, investigate. return len(rows) != batch_size end = yield self.runInteraction( From 824bba2f78f86a9364a87c6ce79a40bff0f73cbf Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 1 Nov 2019 12:03:04 +0000 Subject: [PATCH 17/85] Correctly order results --- synapse/storage/data_stores/main/events_bg_updates.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index b7b390485b..5ba1cff468 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -21,7 +21,7 @@ from canonicaljson import json from twisted.internet import defer -from synapse.api.constants import LabelsField +from synapse.api.constants import EventContentFields from synapse.storage._base import make_in_list_sql_clause from synapse.storage.background_updates import BackgroundUpdateStore @@ -520,7 +520,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): SELECT event_id, json FROM event_json LEFT JOIN event_labels USING (event_id) WHERE event_id > ? AND label IS NULL - LIMIT ? + ORDER BY event_id LIMIT ? """, (last_event_id, batch_size), ) @@ -538,7 +538,9 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): table="event_labels", values=[ {"event_id": event_id, "label": label} - for label in event_json["content"].get(LabelsField, []) + for label in event_json["content"].get( + EventContentFields.Labels, [] + ) ], ) @@ -548,8 +550,6 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): # We want to return true (to end the background update) only when # the query returned with less rows than we asked for. - # TODO: this currently doesn't work, i.e. it only runs once whereas - # its opposite does the whole thing, investigate. return len(rows) != batch_size end = yield self.runInteraction( From 3b29a73f9fc18912a6b775a083d9449d426fa790 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 1 Nov 2019 12:07:05 +0000 Subject: [PATCH 18/85] Print out the actual number of affected rows --- synapse/storage/data_stores/main/events_bg_updates.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 5ba1cff468..a703927471 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -527,7 +527,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): rows = self.cursor_to_dict(txn) if not rows: - return True + return True, 0 for row in rows: event_id = row["event_id"] @@ -550,13 +550,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): # We want to return true (to end the background update) only when # the query returned with less rows than we asked for. - return len(rows) != batch_size + return len(rows) != batch_size, len(rows) - end = yield self.runInteraction( + end, num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn ) if end: yield self._end_background_update("event_store_labels") - return batch_size + return num_rows From 7134ca7daa7ead8104e3c51ba4bc730d99d098e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Nov 2019 13:36:57 +0000 Subject: [PATCH 19/85] Change to not require a state_groups.room_id index. This does mean that we won't clean up orphaned state groups (i.e. state groups that were persisted but the associated event wasn't). --- synapse/storage/data_stores/main/events.py | 70 ++++++++++++------- .../schema/delta/56/state_group_room_idx.sql | 17 ----- synapse/storage/data_stores/main/state.py | 7 -- synapse/storage/purge_events.py | 4 +- 4 files changed, 45 insertions(+), 53 deletions(-) delete mode 100644 synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 68f27078c4..3049a21dc5 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1624,7 +1624,10 @@ class EventsStore( """Deletes all record of a room Args: - room_id (str): + room_id (str) + + Returns: + Deferred[List[int]]: The list of state groups to delete. """ return self.runInteraction("purge_room", self._purge_room_txn, room_id) @@ -1714,10 +1717,24 @@ class EventsStore( # index on them. In any case we should be clearing out 'stream' tables # periodically anyway (#5888) + # Now we fetch all the state groups that should be deleted. + txn.execute( + """ + SELECT DISTINCT state_group FROM events + INNER JOIN event_to_state_groups USING(event_id) + WHERE events.room_id = ? + """, + (room_id,), + ) + + state_groups = [row[0] for row in txn] + # TODO: we could probably usefully do a bunch of cache invalidation here logger.info("[purge] done") + return state_groups + def purge_unreferenced_state_groups( self, room_id: str, state_groups_to_delete: Set[int] ) -> defer.Deferred: @@ -1825,54 +1842,53 @@ class EventsStore( return {row["state_group"]: row["prev_state_group"] for row in rows} - def purge_room_state(self, room_id): + def purge_room_state(self, room_id, state_groups_to_delete): """Deletes all record of a room from state tables Args: room_id (str): + state_groups_to_delete (list[int]): State groups to delete """ return self.runInteraction( - "purge_room_state", self._purge_room_state_txn, room_id + "purge_room_state", + self._purge_room_state_txn, + room_id, + state_groups_to_delete, ) - def _purge_room_state_txn(self, txn, room_id): + def _purge_room_state_txn(self, txn, room_id, state_groups_to_delete): # first we have to delete the state groups states logger.info("[purge] removing %s from state_groups_state", room_id) - txn.execute( - """ - DELETE FROM state_groups_state - WHERE state_group IN ( - SELECT state_group FROM state_groups - WHERE room_id = ? - ) - """, - (room_id,), + self._simple_delete_many_txn( + txn, + table="state_groups_state", + column="state_group", + iterable=state_groups_to_delete, + keyvalues={}, ) # ... and the state group edges logger.info("[purge] removing %s from state_group_edges", room_id) - txn.execute( - """ - DELETE FROM state_group_edges - WHERE state_group IN ( - SELECT state_group FROM state_groups - WHERE room_id = ? - ) - """, - (room_id,), + self._simple_delete_many_txn( + txn, + table="state_group_edges", + column="state_group", + iterable=state_groups_to_delete, + keyvalues={}, ) # ... and the state groups logger.info("[purge] removing %s from state_groups", room_id) - txn.execute( - """ - DELETE FROM state_groups WHERE room_id = ? - """, - (room_id,), + self._simple_delete_many_txn( + txn, + table="state_groups", + column="id", + iterable=state_groups_to_delete, + keyvalues={}, ) async def is_event_after(self, event_id1, event_id2): diff --git a/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql b/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql deleted file mode 100644 index 7916ef18b2..0000000000 --- a/synapse/storage/data_stores/main/schema/delta/56/state_group_room_idx.sql +++ /dev/null @@ -1,17 +0,0 @@ -/* Copyright 2019 The Matrix.org Foundation C.I.C. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -INSERT INTO background_updates (update_name, progress_json) VALUES - ('state_groups_room_id_idx', '{}'); diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index e1d3041c7c..5c5b15840e 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -1023,7 +1023,6 @@ class StateBackgroundUpdateStore( STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" CURRENT_STATE_INDEX_UPDATE_NAME = "current_state_members_idx" EVENT_STATE_GROUP_INDEX_UPDATE_NAME = "event_to_state_groups_sg_index" - STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" def __init__(self, db_conn, hs): super(StateBackgroundUpdateStore, self).__init__(db_conn, hs) @@ -1047,12 +1046,6 @@ class StateBackgroundUpdateStore( table="event_to_state_groups", columns=["state_group"], ) - self.register_background_index_update( - self.STATE_GROUPS_ROOM_INDEX_UPDATE_NAME, - index_name="state_groups_room_id_idx", - table="state_groups", - columns=["room_id"], - ) @defer.inlineCallbacks def _background_deduplicate_state(self, progress, batch_size): diff --git a/synapse/storage/purge_events.py b/synapse/storage/purge_events.py index dd45df0c88..a368182034 100644 --- a/synapse/storage/purge_events.py +++ b/synapse/storage/purge_events.py @@ -33,8 +33,8 @@ class PurgeEventsStorage(object): """Deletes all record of a room """ - yield self.stores.main.purge_room(room_id) - yield self.stores.main.purge_room_state(room_id) + state_groups_to_delete = yield self.stores.main.purge_room(room_id) + yield self.stores.main.purge_room_state(room_id, state_groups_to_delete) @defer.inlineCallbacks def purge_history(self, room_id, token, delete_local_events): From 0287d033eec86fb7f6bb84f929e756c99caf2113 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Nov 2019 18:08:50 +0000 Subject: [PATCH 20/85] Transfer upgraded rooms on groups --- changelog.d/6235.bugfix | 1 + synapse/handlers/room_member.py | 9 +++++++++ synapse/storage/data_stores/main/group_server.py | 15 +++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 changelog.d/6235.bugfix diff --git a/changelog.d/6235.bugfix b/changelog.d/6235.bugfix new file mode 100644 index 0000000000..12718ba934 --- /dev/null +++ b/changelog.d/6235.bugfix @@ -0,0 +1 @@ +Remove a room from a server's public rooms list on room upgrade. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 06d09c2947..01c65ee222 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -514,6 +514,15 @@ class RoomMemberHandler(object): if old_room and old_room["is_public"]: yield self.store.set_room_is_public(old_room_id, False) yield self.store.set_room_is_public(room_id, True) + + # Check if any groups we own contain the predecessor room + local_group_ids = yield self.store.get_local_groups_for_room(old_room_id) + for group_id in local_group_ids: + # Add new the new room to those groups + yield self.store.add_room_to_group(group_id, room_id, old_room["is_public"]) + + # Remove the old room from those groups + yield self.store.remove_room_from_group(group_id, old_room_id) @defer.inlineCallbacks def copy_user_state_on_room_upgrade(self, old_room_id, new_room_id, user_ids): diff --git a/synapse/storage/data_stores/main/group_server.py b/synapse/storage/data_stores/main/group_server.py index b3a2771f1b..13ad71a49c 100644 --- a/synapse/storage/data_stores/main/group_server.py +++ b/synapse/storage/data_stores/main/group_server.py @@ -552,6 +552,21 @@ class GroupServerStore(SQLBaseStore): keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id}, desc="remove_user_from_summary", ) + + def get_local_groups_for_room(self, room_id): + """Get all of the local group that contain a given room + Args: + room_id (str): The ID of a room + Returns: + Deferred[list[str]]: A twisted.Deferred containing a list of group ids + containing this room + """ + return self._simple_select_onecol( + table="group_rooms", + keyvalues={"room_id": room_id}, + retcol="group_id", + desc="get_local_groups_for_room", + ) def get_users_for_summary_by_role(self, group_id, include_private=False): """Get the users and roles that should be included in a summary request From c2203bea57fd34de9be994f5e117da2c24338708 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Nov 2019 18:17:11 +0000 Subject: [PATCH 21/85] Re-add docstring, with caveats detailed --- synapse/handlers/room_member.py | 2 +- synapse/storage/data_stores/main/group_server.py | 2 +- synapse/storage/data_stores/main/state.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 01c65ee222..6cfee4b361 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -514,7 +514,7 @@ class RoomMemberHandler(object): if old_room and old_room["is_public"]: yield self.store.set_room_is_public(old_room_id, False) yield self.store.set_room_is_public(room_id, True) - + # Check if any groups we own contain the predecessor room local_group_ids = yield self.store.get_local_groups_for_room(old_room_id) for group_id in local_group_ids: diff --git a/synapse/storage/data_stores/main/group_server.py b/synapse/storage/data_stores/main/group_server.py index 13ad71a49c..5ded539af8 100644 --- a/synapse/storage/data_stores/main/group_server.py +++ b/synapse/storage/data_stores/main/group_server.py @@ -552,7 +552,7 @@ class GroupServerStore(SQLBaseStore): keyvalues={"group_id": group_id, "role_id": role_id, "user_id": user_id}, desc="remove_user_from_summary", ) - + def get_local_groups_for_room(self, room_id): """Get all of the local group that contain a given room Args: diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 3132848034..a41cac7b36 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -285,7 +285,11 @@ class StateGroupWorkerStore( room_id (str) Returns: - Deferred[unicode|None]: predecessor room id + Deferred[dict|None]: A dictionary containing the structure of the predecessor + field from the room's create event. The structure is subject to other servers, + but it is expected to be: + * room_id (str): The room ID of the predecessor room + * event_id (str): The ID of the tombstone event in the predecessor room Raises: NotFoundError if the room is unknown From 408600282774391fade9e4a6606f4967184865c0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 5 Nov 2019 13:23:25 +0000 Subject: [PATCH 22/85] Improve documentation for EventContext fields (#6319) --- changelog.d/6319.misc | 1 + synapse/events/snapshot.py | 85 +++++++++++++++++++++++++++----------- synapse/state/__init__.py | 3 ++ 3 files changed, 66 insertions(+), 23 deletions(-) create mode 100644 changelog.d/6319.misc diff --git a/changelog.d/6319.misc b/changelog.d/6319.misc new file mode 100644 index 0000000000..9711ef21ed --- /dev/null +++ b/changelog.d/6319.misc @@ -0,0 +1 @@ +Improve documentation for EventContext fields. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index a269de5482..5f07f6fe4b 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional, Tuple, Union + from six import iteritems import attr @@ -19,45 +21,82 @@ from frozendict import frozendict from twisted.internet import defer +from synapse.appservice import ApplicationService from synapse.logging.context import make_deferred_yieldable, run_in_background @attr.s(slots=True) class EventContext: """ + Holds information relevant to persisting an event + Attributes: - state_group (int|None): state group id, if the state has been stored - as a state group. This is usually only None if e.g. the event is - an outlier. - rejected (bool|str): A rejection reason if the event was rejected, else - False + rejected: A rejection reason if the event was rejected, else False - prev_group (int): Previously persisted state group. ``None`` for an - outlier. - delta_ids (dict[(str, str), str]): Delta from ``prev_group``. - (type, state_key) -> event_id. ``None`` for an outlier. + state_group: The ID of the state group for this event. Note that state events + are persisted with a state group which includes the new event, so this is + effectively the state *after* the event in question. - app_service: FIXME + For a *rejected* state event, where the state of the rejected event is + ignored, this state_group should never make it into the + event_to_state_groups table. Indeed, inspecting this value for a rejected + state event is almost certainly incorrect. + + For an outlier, where we don't have the state at the event, this will be + None. + + prev_group: If it is known, ``state_group``'s prev_group. Note that this being + None does not necessarily mean that ``state_group`` does not have + a prev_group! + + If ``state_group`` is None (ie, the event is an outlier), ``prev_group`` + will always also be ``None``. + + Note that this *not* (necessarily) the state group associated with + ``_prev_state_ids``. + + delta_ids: If ``prev_group`` is not None, the state delta between ``prev_group`` + and ``state_group``. + + app_service: If this event is being sent by a (local) application service, that + app service. + + _current_state_ids: The room state map, including this event - ie, the state + in ``state_group``. - _current_state_ids (dict[(str, str), str]|None): - The current state map including the current event. None if outlier - or we haven't fetched the state from DB yet. (type, state_key) -> event_id - _prev_state_ids (dict[(str, str), str]|None): - The current state map excluding the current event. None if outlier - or we haven't fetched the state from DB yet. + FIXME: what is this for an outlier? it seems ill-defined. It seems like + it could be either {}, or the state we were given by the remote + server, depending on $THINGS + + Note that this is a private attribute: it should be accessed via + ``get_current_state_ids``. _AsyncEventContext impl calculates this + on-demand: it will be None until that happens. + + _prev_state_ids: The room state map, excluding this event. For a non-state + event, this will be the same as _current_state_events. + + Note that it is a completely different thing to prev_group! + (type, state_key) -> event_id + + FIXME: again, what is this for an outlier? + + As with _current_state_ids, this is a private attribute. It should be + accessed via get_prev_state_ids. """ - state_group = attr.ib(default=None) - rejected = attr.ib(default=False) - prev_group = attr.ib(default=None) - delta_ids = attr.ib(default=None) - app_service = attr.ib(default=None) + rejected = attr.ib(default=False, type=Union[bool, str]) + state_group = attr.ib(default=None, type=Optional[int]) + prev_group = attr.ib(default=None, type=Optional[int]) + delta_ids = attr.ib(default=None, type=Optional[Dict[Tuple[str, str], str]]) + app_service = attr.ib(default=None, type=Optional[ApplicationService]) - _prev_state_ids = attr.ib(default=None) - _current_state_ids = attr.ib(default=None) + _current_state_ids = attr.ib( + default=None, type=Optional[Dict[Tuple[str, str], str]] + ) + _prev_state_ids = attr.ib(default=None, type=Optional[Dict[Tuple[str, str], str]]) @staticmethod def with_state( diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 4e91eb66fe..2c04ab1854 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -232,6 +232,9 @@ class StateHandler(object): # If this is an outlier, then we know it shouldn't have any current # state. Certainly store.get_current_state won't return any, and # persisting the event won't store the state group. + + # FIXME: why do we populate current_state_ids? I thought the point was + # that we weren't supposed to have any state for outliers? if old_state: prev_state_ids = {(s.type, s.state_key): s.event_id for s in old_state} if event.is_state(): From f5d8fdf0a71cadd4ded81e276cc57e9c0c195a2f Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 5 Nov 2019 14:44:25 +0000 Subject: [PATCH 23/85] Update changelog --- changelog.d/6310.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/6310.feature b/changelog.d/6310.feature index b7ff3fad3b..78a187a1dc 100644 --- a/changelog.d/6310.feature +++ b/changelog.d/6310.feature @@ -1 +1 @@ -Implement label-based filtering. +Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)). From e9bfe719ba1928dc191cea93120c5c8a89584434 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 5 Nov 2019 15:45:17 +0000 Subject: [PATCH 24/85] Strip overlong OpenGraph data from url preview ... to stop people causing DoSes with malicious web pages --- changelog.d/6331.feature | 1 + synapse/rest/media/v1/preview_url_resource.py | 20 ++++++++++- tests/rest/media/v1/test_url_preview.py | 34 +++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6331.feature diff --git a/changelog.d/6331.feature b/changelog.d/6331.feature new file mode 100644 index 0000000000..eaf69ef3f6 --- /dev/null +++ b/changelog.d/6331.feature @@ -0,0 +1 @@ +Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0c68c3aad5..6d8c39a410 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -56,6 +56,9 @@ logger = logging.getLogger(__name__) _charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) +OG_TAG_NAME_MAXLEN = 50 +OG_TAG_VALUE_MAXLEN = 1000 + class PreviewUrlResource(DirectServeResource): isLeaf = True @@ -167,7 +170,7 @@ class PreviewUrlResource(DirectServeResource): ts (int): Returns: - Deferred[str]: json-encoded og data + Deferred[bytes]: json-encoded og data """ # check the URL cache in the DB (which will also provide us with # historical previews, if we have any) @@ -268,6 +271,17 @@ class PreviewUrlResource(DirectServeResource): logger.warn("Failed to find any OG data in %s", url) og = {} + # filter out any stupidly long values + keys_to_remove = [] + for k, v in og.items(): + if len(k) > OG_TAG_NAME_MAXLEN or len(v) > OG_TAG_VALUE_MAXLEN: + logger.warning( + "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN] + ) + keys_to_remove.append(k) + for k in keys_to_remove: + del og[k] + logger.debug("Calculated OG for %s as %s" % (url, og)) jsonog = json.dumps(og) @@ -502,6 +516,10 @@ def _calc_og(tree, media_uri): og = {} for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"): if "content" in tag.attrib: + # if we've got more than 50 tags, someone is taking the piss + if len(og) >= 50: + logger.warning("skipping OG for page with too many og: tags") + return {} og[tag.attrib["property"]] = tag.attrib["content"] # TODO: grab article: meta tags too, e.g.: diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 976652aee8..da19a8e86f 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -247,6 +247,40 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430") + def test_overlong_title(self): + self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")] + + end_content = ( + b"" + b"" + b"x" * 2000 + b"" + b'' + b"" + ) + + request, channel = self.make_request( + "GET", "url_preview?url=http://matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="windows-1251"\r\n\r\n' + ) + % (len(end_content),) + + end_content + ) + + self.pump() + self.assertEqual(channel.code, 200) + res = channel.json_body + self.assertCountEqual(["og:description"], res.keys()) + def test_ipaddr(self): """ IP addresses can be previewed directly. From e78167c94b3f63136f7d0e4f32a05ad1befdc0ec Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 5 Nov 2019 16:46:39 +0000 Subject: [PATCH 25/85] Apply suggestions from code review Co-Authored-By: Brendan Abolivier Co-Authored-By: Erik Johnston --- synapse/rest/media/v1/preview_url_resource.py | 2 +- tests/rest/media/v1/test_url_preview.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6d8c39a410..4d4b3c1462 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -518,7 +518,7 @@ def _calc_og(tree, media_uri): if "content" in tag.attrib: # if we've got more than 50 tags, someone is taking the piss if len(og) >= 50: - logger.warning("skipping OG for page with too many og: tags") + logger.warning("Skipping OG for page with too many 'og:' tags") return {} og[tag.attrib["property"]] = tag.attrib["content"] diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index da19a8e86f..852b8ab11c 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -279,6 +279,7 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.pump() self.assertEqual(channel.code, 200) res = channel.json_body + # We should only see the `og:description` field, as `title` is too long and should be stripped out self.assertCountEqual(["og:description"], res.keys()) def test_ipaddr(self): From 81d49cbb07a4dc5a673e31a8a626af6e8a18f801 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 5 Nov 2019 17:22:58 +0000 Subject: [PATCH 26/85] Fix exception when OpenGraph tag values are ints --- changelog.d/6334.feature | 1 + synapse/rest/media/v1/preview_url_resource.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6334.feature diff --git a/changelog.d/6334.feature b/changelog.d/6334.feature new file mode 100644 index 0000000000..eaf69ef3f6 --- /dev/null +++ b/changelog.d/6334.feature @@ -0,0 +1 @@ +Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 4d4b3c1462..ec9c4619c9 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -274,7 +274,8 @@ class PreviewUrlResource(DirectServeResource): # filter out any stupidly long values keys_to_remove = [] for k, v in og.items(): - if len(k) > OG_TAG_NAME_MAXLEN or len(v) > OG_TAG_VALUE_MAXLEN: + # values can be numeric as well as strings, hence the cast to str + if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN: logger.warning( "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN] ) From 052513958de214eed9508e60fef707641fb34da4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Nov 2019 17:44:09 +0000 Subject: [PATCH 27/85] Fix phone home stats --- synapse/app/homeserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 00a7f8330e..73e2c29d06 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -636,7 +636,7 @@ def run(hs): if hs.config.report_stats: logger.info("Scheduling stats reporting for 3 hour intervals") - clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000, hs, stats) + clock.looping_call(start_phone_stats_home, 3 * 60 * 60 * 1000) # We need to defer this init for the cases that we daemonize # otherwise the process ID we get is that of the non-daemon process @@ -644,7 +644,7 @@ def run(hs): # We wait 5 minutes to send the first set of stats as the server can # be quite busy the first few minutes - clock.call_later(5 * 60, start_phone_stats_home, hs, stats) + clock.call_later(5 * 60, start_phone_stats_home) _base.start_reactor( "synapse-homeserver", From b437eb48b6bde1cd908d472893c5638e021c6a8f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Nov 2019 17:45:29 +0000 Subject: [PATCH 28/85] Newsfile --- changelog.d/6336.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6336.misc diff --git a/changelog.d/6336.misc b/changelog.d/6336.misc new file mode 100644 index 0000000000..63527ccef4 --- /dev/null +++ b/changelog.d/6336.misc @@ -0,0 +1 @@ +Remove the dependency on psutil and replace functionality with the stdlib `resource` module. From 0e3ab8afdc2b89ac2f47878112d93dd03d01f7ef Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 5 Nov 2019 22:13:37 +0000 Subject: [PATCH 29/85] Add some checks that we aren't using state from rejected events (#6330) * Raise an exception if accessing state for rejected events Add some sanity checks on accessing state_group etc for rejected events. * Skip calculating push actions for rejected events It didn't actually cause any bugs, because rejected events get filtered out at various later points, but there's not point in trying to calculate the push actions for a rejected event. --- changelog.d/6330.misc | 1 + synapse/events/snapshot.py | 49 ++++++++++++++++++++++++++++++---- synapse/handlers/federation.py | 6 ++++- 3 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 changelog.d/6330.misc diff --git a/changelog.d/6330.misc b/changelog.d/6330.misc new file mode 100644 index 0000000000..6239cba263 --- /dev/null +++ b/changelog.d/6330.misc @@ -0,0 +1 @@ +Add some checks that we aren't using state from rejected events. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 5f07f6fe4b..0f3c5989cb 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -33,7 +33,7 @@ class EventContext: Attributes: rejected: A rejection reason if the event was rejected, else False - state_group: The ID of the state group for this event. Note that state events + _state_group: The ID of the state group for this event. Note that state events are persisted with a state group which includes the new event, so this is effectively the state *after* the event in question. @@ -45,6 +45,9 @@ class EventContext: For an outlier, where we don't have the state at the event, this will be None. + Note that this is a private attribute: it should be accessed via + the ``state_group`` property. + prev_group: If it is known, ``state_group``'s prev_group. Note that this being None does not necessarily mean that ``state_group`` does not have a prev_group! @@ -88,7 +91,7 @@ class EventContext: """ rejected = attr.ib(default=False, type=Union[bool, str]) - state_group = attr.ib(default=None, type=Optional[int]) + _state_group = attr.ib(default=None, type=Optional[int]) prev_group = attr.ib(default=None, type=Optional[int]) delta_ids = attr.ib(default=None, type=Optional[Dict[Tuple[str, str], str]]) app_service = attr.ib(default=None, type=Optional[ApplicationService]) @@ -136,7 +139,7 @@ class EventContext: "prev_state_id": prev_state_id, "event_type": event.type, "event_state_key": event.state_key if event.is_state() else None, - "state_group": self.state_group, + "state_group": self._state_group, "rejected": self.rejected, "prev_group": self.prev_group, "delta_ids": _encode_state_dict(self.delta_ids), @@ -173,22 +176,52 @@ class EventContext: return context + @property + def state_group(self) -> Optional[int]: + """The ID of the state group for this event. + + Note that state events are persisted with a state group which includes the new + event, so this is effectively the state *after* the event in question. + + For an outlier, where we don't have the state at the event, this will be None. + + It is an error to access this for a rejected event, since rejected state should + not make it into the room state. Accessing this property will raise an exception + if ``rejected`` is set. + """ + if self.rejected: + raise RuntimeError("Attempt to access state_group of rejected event") + + return self._state_group + @defer.inlineCallbacks def get_current_state_ids(self, store): - """Gets the current state IDs + """ + Gets the room state map, including this event - ie, the state in ``state_group`` + + It is an error to access this for a rejected event, since rejected state should + not make it into the room state. This method will raise an exception if + ``rejected`` is set. Returns: Deferred[dict[(str, str), str]|None]: Returns None if state_group is None, which happens when the associated event is an outlier. + Maps a (type, state_key) to the event ID of the state event matching this tuple. """ + if self.rejected: + raise RuntimeError("Attempt to access state_ids of rejected event") + yield self._ensure_fetched(store) return self._current_state_ids @defer.inlineCallbacks def get_prev_state_ids(self, store): - """Gets the prev state IDs + """ + Gets the room state map, excluding this event. + + For a non-state event, this will be the same as get_current_state_ids(). Returns: Deferred[dict[(str, str), str]|None]: Returns None if state_group @@ -202,11 +235,17 @@ class EventContext: def get_cached_current_state_ids(self): """Gets the current state IDs if we have them already cached. + It is an error to access this for a rejected event, since rejected state should + not make it into the room state. This method will raise an exception if + ``rejected`` is set. + Returns: dict[(str, str), str]|None: Returns None if we haven't cached the state or if state_group is None, which happens when the associated event is an outlier. """ + if self.rejected: + raise RuntimeError("Attempt to access state_ids of rejected event") return self._current_state_ids diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 8cafcfdab0..b7916de909 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1688,7 +1688,11 @@ class FederationHandler(BaseHandler): # hack around with a try/finally instead. success = False try: - if not event.internal_metadata.is_outlier() and not backfilled: + if ( + not event.internal_metadata.is_outlier() + and not backfilled + and not context.rejected + ): yield self.action_generator.handle_push_actions_for_event( event, context ) From 807ec3bd99908d2991d2b3d0615b0862610c6dc3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 6 Nov 2019 10:01:39 +0000 Subject: [PATCH 30/85] Fix bug which caused rejected events to be stored with the wrong room state (#6320) Fixes a bug where rejected events were persisted with the wrong state group. Also fixes an occasional internal-server-error when receiving events over federation which are rejected and (possibly because they are backwards-extremities) have no prev_group. Fixes #6289. --- changelog.d/6320.bugfix | 1 + synapse/events/snapshot.py | 25 +++- synapse/handlers/federation.py | 1 + synapse/state/__init__.py | 168 +++++++++++----------- synapse/storage/data_stores/main/state.py | 2 +- tests/handlers/test_federation.py | 126 ++++++++++++++++ tests/test_state.py | 61 ++++++-- 7 files changed, 283 insertions(+), 101 deletions(-) create mode 100644 changelog.d/6320.bugfix diff --git a/changelog.d/6320.bugfix b/changelog.d/6320.bugfix new file mode 100644 index 0000000000..2c3fad5655 --- /dev/null +++ b/changelog.d/6320.bugfix @@ -0,0 +1 @@ +Fix bug which casued rejected events to be persisted with the wrong room state. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 0f3c5989cb..64e898f40c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -48,10 +48,21 @@ class EventContext: Note that this is a private attribute: it should be accessed via the ``state_group`` property. + state_group_before_event: The ID of the state group representing the state + of the room before this event. + + If this is a non-state event, this will be the same as ``state_group``. If + it's a state event, it will be the same as ``prev_group``. + + If ``state_group`` is None (ie, the event is an outlier), + ``state_group_before_event`` will always also be ``None``. + prev_group: If it is known, ``state_group``'s prev_group. Note that this being None does not necessarily mean that ``state_group`` does not have a prev_group! + If the event is a state event, this is normally the same as ``prev_group``. + If ``state_group`` is None (ie, the event is an outlier), ``prev_group`` will always also be ``None``. @@ -77,7 +88,8 @@ class EventContext: ``get_current_state_ids``. _AsyncEventContext impl calculates this on-demand: it will be None until that happens. - _prev_state_ids: The room state map, excluding this event. For a non-state + _prev_state_ids: The room state map, excluding this event - ie, the state + in ``state_group_before_event``. For a non-state event, this will be the same as _current_state_events. Note that it is a completely different thing to prev_group! @@ -92,6 +104,7 @@ class EventContext: rejected = attr.ib(default=False, type=Union[bool, str]) _state_group = attr.ib(default=None, type=Optional[int]) + state_group_before_event = attr.ib(default=None, type=Optional[int]) prev_group = attr.ib(default=None, type=Optional[int]) delta_ids = attr.ib(default=None, type=Optional[Dict[Tuple[str, str], str]]) app_service = attr.ib(default=None, type=Optional[ApplicationService]) @@ -103,12 +116,18 @@ class EventContext: @staticmethod def with_state( - state_group, current_state_ids, prev_state_ids, prev_group=None, delta_ids=None + state_group, + state_group_before_event, + current_state_ids, + prev_state_ids, + prev_group=None, + delta_ids=None, ): return EventContext( current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, state_group=state_group, + state_group_before_event=state_group_before_event, prev_group=prev_group, delta_ids=delta_ids, ) @@ -140,6 +159,7 @@ class EventContext: "event_type": event.type, "event_state_key": event.state_key if event.is_state() else None, "state_group": self._state_group, + "state_group_before_event": self.state_group_before_event, "rejected": self.rejected, "prev_group": self.prev_group, "delta_ids": _encode_state_dict(self.delta_ids), @@ -165,6 +185,7 @@ class EventContext: event_type=input["event_type"], event_state_key=input["event_state_key"], state_group=input["state_group"], + state_group_before_event=input["state_group_before_event"], prev_group=input["prev_group"], delta_ids=_decode_state_dict(input["delta_ids"]), rejected=input["rejected"], diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b7916de909..05dd8d2671 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2280,6 +2280,7 @@ class FederationHandler(BaseHandler): return EventContext.with_state( state_group=state_group, + state_group_before_event=context.state_group_before_event, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, prev_group=prev_group, diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 2c04ab1854..139beef8ed 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -16,6 +16,7 @@ import logging from collections import namedtuple +from typing import Iterable, Optional from six import iteritems, itervalues @@ -27,6 +28,7 @@ from twisted.internet import defer from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions +from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.logging.utils import log_function from synapse.state import v1, v2 @@ -212,15 +214,17 @@ class StateHandler(object): return joined_hosts @defer.inlineCallbacks - def compute_event_context(self, event, old_state=None): + def compute_event_context( + self, event: EventBase, old_state: Optional[Iterable[EventBase]] = None + ): """Build an EventContext structure for the event. This works out what the current state should be for the event, and generates a new state group if necessary. Args: - event (synapse.events.EventBase): - old_state (dict|None): The state at the event if it can't be + event: + old_state: The state at the event if it can't be calculated from existing events. This is normally only specified when receiving an event from federation where we don't have the prev events for, e.g. when backfilling. @@ -251,113 +255,103 @@ class StateHandler(object): # group for it. context = EventContext.with_state( state_group=None, + state_group_before_event=None, current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, ) return context + # + # first of all, figure out the state before the event + # + if old_state: - # We already have the state, so we don't need to calculate it. - # Let's just correctly fill out the context and create a - # new state group for it. + # if we're given the state before the event, then we use that + state_ids_before_event = { + (s.type, s.state_key): s.event_id for s in old_state + } + state_group_before_event = None + state_group_before_event_prev_group = None + deltas_to_state_group_before_event = None - prev_state_ids = {(s.type, s.state_key): s.event_id for s in old_state} + else: + # otherwise, we'll need to resolve the state across the prev_events. + logger.debug("calling resolve_state_groups from compute_event_context") - if event.is_state(): - key = (event.type, event.state_key) - if key in prev_state_ids: - replaces = prev_state_ids[key] - if replaces != event.event_id: # Paranoia check - event.unsigned["replaces_state"] = replaces - current_state_ids = dict(prev_state_ids) - current_state_ids[key] = event.event_id - else: - current_state_ids = prev_state_ids + entry = yield self.resolve_state_groups_for_events( + event.room_id, event.prev_event_ids() + ) - state_group = yield self.state_store.store_state_group( + state_ids_before_event = entry.state + state_group_before_event = entry.state_group + state_group_before_event_prev_group = entry.prev_group + deltas_to_state_group_before_event = entry.delta_ids + + # + # make sure that we have a state group at that point. If it's not a state event, + # that will be the state group for the new event. If it *is* a state event, + # it might get rejected (in which case we'll need to persist it with the + # previous state group) + # + + if not state_group_before_event: + state_group_before_event = yield self.state_store.store_state_group( event.event_id, event.room_id, - prev_group=None, - delta_ids=None, - current_state_ids=current_state_ids, + prev_group=state_group_before_event_prev_group, + delta_ids=deltas_to_state_group_before_event, + current_state_ids=state_ids_before_event, ) - context = EventContext.with_state( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, + # XXX: can we update the state cache entry for the new state group? or + # could we set a flag on resolve_state_groups_for_events to tell it to + # always make a state group? + + # + # now if it's not a state event, we're done + # + + if not event.is_state(): + return EventContext.with_state( + state_group_before_event=state_group_before_event, + state_group=state_group_before_event, + current_state_ids=state_ids_before_event, + prev_state_ids=state_ids_before_event, + prev_group=state_group_before_event_prev_group, + delta_ids=deltas_to_state_group_before_event, ) - return context + # + # otherwise, we'll need to create a new state group for after the event + # - logger.debug("calling resolve_state_groups from compute_event_context") - - entry = yield self.resolve_state_groups_for_events( - event.room_id, event.prev_event_ids() - ) - - prev_state_ids = entry.state - prev_group = None - delta_ids = None - - if event.is_state(): - # If this is a state event then we need to create a new state - # group for the state after this event. - - key = (event.type, event.state_key) - if key in prev_state_ids: - replaces = prev_state_ids[key] + key = (event.type, event.state_key) + if key in state_ids_before_event: + replaces = state_ids_before_event[key] + if replaces != event.event_id: event.unsigned["replaces_state"] = replaces - current_state_ids = dict(prev_state_ids) - current_state_ids[key] = event.event_id + state_ids_after_event = dict(state_ids_before_event) + state_ids_after_event[key] = event.event_id + delta_ids = {key: event.event_id} - if entry.state_group: - # If the state at the event has a state group assigned then - # we can use that as the prev group - prev_group = entry.state_group - delta_ids = {key: event.event_id} - elif entry.prev_group: - # If the state at the event only has a prev group, then we can - # use that as a prev group too. - prev_group = entry.prev_group - delta_ids = dict(entry.delta_ids) - delta_ids[key] = event.event_id - - state_group = yield self.state_store.store_state_group( - event.event_id, - event.room_id, - prev_group=prev_group, - delta_ids=delta_ids, - current_state_ids=current_state_ids, - ) - else: - current_state_ids = prev_state_ids - prev_group = entry.prev_group - delta_ids = entry.delta_ids - - if entry.state_group is None: - entry.state_group = yield self.state_store.store_state_group( - event.event_id, - event.room_id, - prev_group=entry.prev_group, - delta_ids=entry.delta_ids, - current_state_ids=current_state_ids, - ) - entry.state_id = entry.state_group - - state_group = entry.state_group - - context = EventContext.with_state( - state_group=state_group, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, - prev_group=prev_group, + state_group_after_event = yield self.state_store.store_state_group( + event.event_id, + event.room_id, + prev_group=state_group_before_event, delta_ids=delta_ids, + current_state_ids=state_ids_after_event, ) - return context + return EventContext.with_state( + state_group=state_group_after_event, + state_group_before_event=state_group_before_event, + current_state_ids=state_ids_after_event, + prev_state_ids=state_ids_before_event, + prev_group=state_group_before_event, + delta_ids=delta_ids, + ) @measure_func() @defer.inlineCallbacks diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py index 3132848034..9e1541988e 100644 --- a/synapse/storage/data_stores/main/state.py +++ b/synapse/storage/data_stores/main/state.py @@ -1231,7 +1231,7 @@ class StateStore(StateGroupWorkerStore, StateBackgroundUpdateStore): # if the event was rejected, just give it the same state as its # predecessor. if context.rejected: - state_groups[event.event_id] = context.prev_group + state_groups[event.event_id] = context.state_group_before_event continue state_groups[event.event_id] = context.state_group diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index d56220f403..b4d92cf732 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -12,13 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging + from synapse.api.constants import EventTypes from synapse.api.errors import AuthError, Codes +from synapse.federation.federation_base import event_from_pdu_json +from synapse.logging.context import LoggingContext, run_in_background from synapse.rest import admin from synapse.rest.client.v1 import login, room from tests import unittest +logger = logging.getLogger(__name__) + class FederationTestCase(unittest.HomeserverTestCase): servlets = [ @@ -79,3 +85,123 @@ class FederationTestCase(unittest.HomeserverTestCase): self.assertEqual(failure.code, 403, failure) self.assertEqual(failure.errcode, Codes.FORBIDDEN, failure) self.assertEqual(failure.msg, "You are not invited to this room.") + + def test_rejected_message_event_state(self): + """ + Check that we store the state group correctly for rejected non-state events. + + Regression test for #6289. + """ + OTHER_SERVER = "otherserver" + OTHER_USER = "@otheruser:" + OTHER_SERVER + + # create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # pretend that another server has joined + join_event = self._build_and_send_join_event(OTHER_SERVER, OTHER_USER, room_id) + + # check the state group + sg = self.successResultOf( + self.store._get_state_group_for_event(join_event.event_id) + ) + + # build and send an event which will be rejected + ev = event_from_pdu_json( + { + "type": EventTypes.Message, + "content": {}, + "room_id": room_id, + "sender": "@yetanotheruser:" + OTHER_SERVER, + "depth": join_event["depth"] + 1, + "prev_events": [join_event.event_id], + "auth_events": [], + "origin_server_ts": self.clock.time_msec(), + }, + join_event.format_version, + ) + + with LoggingContext(request="send_rejected"): + d = run_in_background(self.handler.on_receive_pdu, OTHER_SERVER, ev) + self.get_success(d) + + # that should have been rejected + e = self.get_success(self.store.get_event(ev.event_id, allow_rejected=True)) + self.assertIsNotNone(e.rejected_reason) + + # ... and the state group should be the same as before + sg2 = self.successResultOf(self.store._get_state_group_for_event(ev.event_id)) + + self.assertEqual(sg, sg2) + + def test_rejected_state_event_state(self): + """ + Check that we store the state group correctly for rejected state events. + + Regression test for #6289. + """ + OTHER_SERVER = "otherserver" + OTHER_USER = "@otheruser:" + OTHER_SERVER + + # create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # pretend that another server has joined + join_event = self._build_and_send_join_event(OTHER_SERVER, OTHER_USER, room_id) + + # check the state group + sg = self.successResultOf( + self.store._get_state_group_for_event(join_event.event_id) + ) + + # build and send an event which will be rejected + ev = event_from_pdu_json( + { + "type": "org.matrix.test", + "state_key": "test_key", + "content": {}, + "room_id": room_id, + "sender": "@yetanotheruser:" + OTHER_SERVER, + "depth": join_event["depth"] + 1, + "prev_events": [join_event.event_id], + "auth_events": [], + "origin_server_ts": self.clock.time_msec(), + }, + join_event.format_version, + ) + + with LoggingContext(request="send_rejected"): + d = run_in_background(self.handler.on_receive_pdu, OTHER_SERVER, ev) + self.get_success(d) + + # that should have been rejected + e = self.get_success(self.store.get_event(ev.event_id, allow_rejected=True)) + self.assertIsNotNone(e.rejected_reason) + + # ... and the state group should be the same as before + sg2 = self.successResultOf(self.store._get_state_group_for_event(ev.event_id)) + + self.assertEqual(sg, sg2) + + def _build_and_send_join_event(self, other_server, other_user, room_id): + join_event = self.get_success( + self.handler.on_make_join_request(other_server, room_id, other_user) + ) + # the auth code requires that a signature exists, but doesn't check that + # signature... go figure. + join_event.signatures[other_server] = {"x": "y"} + with LoggingContext(request="send_join"): + d = run_in_background( + self.handler.on_send_join_request, other_server, join_event + ) + self.get_success(d) + + # sanity-check: the room should show that the new user is a member + r = self.get_success(self.store.get_current_state_ids(room_id)) + self.assertEqual(r[(EventTypes.Member, other_user)], join_event.event_id) + + return join_event diff --git a/tests/test_state.py b/tests/test_state.py index 38246555bd..176535947a 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -21,6 +21,7 @@ from synapse.api.auth import Auth from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.events import FrozenEvent +from synapse.events.snapshot import EventContext from synapse.state import StateHandler, StateResolutionHandler from tests import unittest @@ -198,16 +199,22 @@ class StateTestCase(unittest.TestCase): self.store.register_events(graph.walk()) - context_store = {} + context_store = {} # type: dict[str, EventContext] for event in graph.walk(): context = yield self.state.compute_event_context(event) self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["D"].get_prev_state_ids(self.store) + ctx_c = context_store["C"] + ctx_d = context_store["D"] + + prev_state_ids = yield ctx_d.get_prev_state_ids(self.store) self.assertEqual(2, len(prev_state_ids)) + self.assertEqual(ctx_c.state_group, ctx_d.state_group_before_event) + self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group) + @defer.inlineCallbacks def test_branch_basic_conflict(self): graph = Graph( @@ -241,12 +248,19 @@ class StateTestCase(unittest.TestCase): self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["D"].get_prev_state_ids(self.store) + # C ends up winning the resolution between B and C + ctx_c = context_store["C"] + ctx_d = context_store["D"] + + prev_state_ids = yield ctx_d.get_prev_state_ids(self.store) self.assertSetEqual( {"START", "A", "C"}, {e_id for e_id in prev_state_ids.values()} ) + self.assertEqual(ctx_c.state_group, ctx_d.state_group_before_event) + self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group) + @defer.inlineCallbacks def test_branch_have_banned_conflict(self): graph = Graph( @@ -292,11 +306,18 @@ class StateTestCase(unittest.TestCase): self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["E"].get_prev_state_ids(self.store) + # C ends up winning the resolution between C and D because bans win over other + # changes + ctx_c = context_store["C"] + ctx_e = context_store["E"] + + prev_state_ids = yield ctx_e.get_prev_state_ids(self.store) self.assertSetEqual( {"START", "A", "B", "C"}, {e for e in prev_state_ids.values()} ) + self.assertEqual(ctx_c.state_group, ctx_e.state_group_before_event) + self.assertEqual(ctx_e.state_group_before_event, ctx_e.state_group) @defer.inlineCallbacks def test_branch_have_perms_conflict(self): @@ -360,12 +381,20 @@ class StateTestCase(unittest.TestCase): self.store.register_event_context(event, context) context_store[event.event_id] = context - prev_state_ids = yield context_store["D"].get_prev_state_ids(self.store) + # B ends up winning the resolution between B and C because power levels + # win over other changes. + ctx_b = context_store["B"] + ctx_d = context_store["D"] + + prev_state_ids = yield ctx_d.get_prev_state_ids(self.store) self.assertSetEqual( {"A1", "A2", "A3", "A5", "B"}, {e for e in prev_state_ids.values()} ) + self.assertEqual(ctx_b.state_group, ctx_d.state_group_before_event) + self.assertEqual(ctx_d.state_group_before_event, ctx_d.state_group) + def _add_depths(self, nodes, edges): def _get_depth(ev): node = nodes[ev] @@ -390,13 +419,16 @@ class StateTestCase(unittest.TestCase): context = yield self.state.compute_event_context(event, old_state=old_state) - current_state_ids = yield context.get_current_state_ids(self.store) + prev_state_ids = yield context.get_prev_state_ids(self.store) + self.assertCountEqual((e.event_id for e in old_state), prev_state_ids.values()) - self.assertEqual( - set(e.event_id for e in old_state), set(current_state_ids.values()) + current_state_ids = yield context.get_current_state_ids(self.store) + self.assertCountEqual( + (e.event_id for e in old_state), current_state_ids.values() ) - self.assertIsNotNone(context.state_group) + self.assertIsNotNone(context.state_group_before_event) + self.assertEqual(context.state_group_before_event, context.state_group) @defer.inlineCallbacks def test_annotate_with_old_state(self): @@ -411,11 +443,18 @@ class StateTestCase(unittest.TestCase): context = yield self.state.compute_event_context(event, old_state=old_state) prev_state_ids = yield context.get_prev_state_ids(self.store) + self.assertCountEqual((e.event_id for e in old_state), prev_state_ids.values()) - self.assertEqual( - set(e.event_id for e in old_state), set(prev_state_ids.values()) + current_state_ids = yield context.get_current_state_ids(self.store) + self.assertCountEqual( + (e.event_id for e in old_state + [event]), current_state_ids.values() ) + self.assertIsNotNone(context.state_group_before_event) + self.assertNotEqual(context.state_group_before_event, context.state_group) + self.assertEqual(context.state_group_before_event, context.prev_group) + self.assertEqual({("state", ""): event.event_id}, context.delta_ids) + @defer.inlineCallbacks def test_trivial_annotate_message(self): prev_event_id = "prev_event_id" From feafd98aca3e72d27516c79f986a28ea39886ebc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 6 Nov 2019 10:02:23 +0000 Subject: [PATCH 31/85] 1.5.1 --- CHANGES.md | 9 +++++++++ changelog.d/6331.feature | 1 - changelog.d/6334.feature | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) delete mode 100644 changelog.d/6331.feature delete mode 100644 changelog.d/6334.feature diff --git a/CHANGES.md b/CHANGES.md index 6faa4b8dce..9312dc2941 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 1.5.1 (2019-11-06) +========================== + +Features +-------- + +- Limit the length of data returned by url previews, to prevent DoS attacks. ([\#6331](https://github.com/matrix-org/synapse/issues/6331), [\#6334](https://github.com/matrix-org/synapse/issues/6334)) + + Synapse 1.5.0 (2019-10-29) ========================== diff --git a/changelog.d/6331.feature b/changelog.d/6331.feature deleted file mode 100644 index eaf69ef3f6..0000000000 --- a/changelog.d/6331.feature +++ /dev/null @@ -1 +0,0 @@ -Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/changelog.d/6334.feature b/changelog.d/6334.feature deleted file mode 100644 index eaf69ef3f6..0000000000 --- a/changelog.d/6334.feature +++ /dev/null @@ -1 +0,0 @@ -Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/debian/changelog b/debian/changelog index acda7e5c63..c4415f460a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.5.1) stable; urgency=medium + + * New synapse release 1.5.1. + + -- Synapse Packaging team Wed, 06 Nov 2019 10:02:14 +0000 + matrix-synapse-py3 (1.5.0) stable; urgency=medium * New synapse release 1.5.0. diff --git a/synapse/__init__.py b/synapse/__init__.py index 8587ffa76f..ec16f54a49 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -36,7 +36,7 @@ try: except ImportError: pass -__version__ = "1.5.0" +__version__ = "1.5.1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From 70d93cafdb4a3055849e7b84881cfd6225f0b6e5 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Nov 2019 10:59:03 +0000 Subject: [PATCH 32/85] Update insert --- synapse/storage/data_stores/main/events_bg_updates.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index a703927471..2fcb0c33dc 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -537,7 +537,12 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): txn=txn, table="event_labels", values=[ - {"event_id": event_id, "label": label} + { + "event_id": event_id, + "label": label, + "room_id": event_json["room_id"], + "topological_ordering": event_json["depth"], + } for label in event_json["content"].get( EventContentFields.Labels, [] ) From 24a214bd1bc396cbcc41c4c913938299ca9ffcf5 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Nov 2019 11:04:19 +0000 Subject: [PATCH 33/85] Fix field name --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 2fcb0c33dc..309bfcafe5 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -544,7 +544,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "topological_ordering": event_json["depth"], } for label in event_json["content"].get( - EventContentFields.Labels, [] + EventContentFields.LABELS, [] ) ], ) From 541f1b92d946093fef17ea8b95a7cb595fc5ffc4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Nov 2019 17:39:16 +0000 Subject: [PATCH 34/85] Only do `rc_login` ratelimiting on succesful login. We were doing this in a number of places which meant that some login code paths incremented the counter multiple times. It was also applying ratelimiting to UIA endpoints, which was probably not intentional. In particular, some custom auth modules were calling `check_user_exists`, which incremented the counters, meaning that people would fail to login sometimes. --- synapse/handlers/auth.py | 55 +--------------- synapse/rest/client/v1/login.py | 111 ++++++++++++++++++++++++++------ 2 files changed, 94 insertions(+), 72 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 7a0f54ca24..14c6387b6a 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,7 +35,6 @@ from synapse.api.errors import ( SynapseError, UserDeactivatedError, ) -from synapse.api.ratelimiting import Ratelimiter from synapse.handlers.ui_auth import INTERACTIVE_AUTH_CHECKERS from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.logging.context import defer_to_thread @@ -102,9 +101,6 @@ class AuthHandler(BaseHandler): login_types.append(t) self._supported_login_types = login_types - self._account_ratelimiter = Ratelimiter() - self._failed_attempts_ratelimiter = Ratelimiter() - self._clock = self.hs.get_clock() @defer.inlineCallbacks @@ -501,11 +497,8 @@ class AuthHandler(BaseHandler): multiple matches Raises: - LimitExceededError if the ratelimiter's login requests count for this - user is too high too proceed. UserDeactivatedError if a user is found but is deactivated. """ - self.ratelimit_login_per_account(user_id) res = yield self._find_user_id_and_pwd_hash(user_id) if res is not None: return res[0] @@ -572,8 +565,6 @@ class AuthHandler(BaseHandler): StoreError if there was a problem accessing the database SynapseError if there was a problem with the request LoginError if there was an authentication problem. - LimitExceededError if the ratelimiter's login requests count for this - user is too high too proceed. """ if username.startswith("@"): @@ -581,8 +572,6 @@ class AuthHandler(BaseHandler): else: qualified_user_id = UserID(username, self.hs.hostname).to_string() - self.ratelimit_login_per_account(qualified_user_id) - login_type = login_submission.get("type") known_login_type = False @@ -650,15 +639,6 @@ class AuthHandler(BaseHandler): if not known_login_type: raise SynapseError(400, "Unknown login type %s" % login_type) - # unknown username or invalid password. - self._failed_attempts_ratelimiter.ratelimit( - qualified_user_id.lower(), - time_now_s=self._clock.time(), - rate_hz=self.hs.config.rc_login_failed_attempts.per_second, - burst_count=self.hs.config.rc_login_failed_attempts.burst_count, - update=True, - ) - # We raise a 403 here, but note that if we're doing user-interactive # login, it turns all LoginErrors into a 401 anyway. raise LoginError(403, "Invalid password", errcode=Codes.FORBIDDEN) @@ -710,10 +690,6 @@ class AuthHandler(BaseHandler): Returns: Deferred[unicode] the canonical_user_id, or Deferred[None] if unknown user/bad password - - Raises: - LimitExceededError if the ratelimiter's login requests count for this - user is too high too proceed. """ lookupres = yield self._find_user_id_and_pwd_hash(user_id) if not lookupres: @@ -742,7 +718,7 @@ class AuthHandler(BaseHandler): auth_api.validate_macaroon(macaroon, "login", user_id) except Exception: raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) - self.ratelimit_login_per_account(user_id) + yield self.auth.check_auth_blocking(user_id) return user_id @@ -912,35 +888,6 @@ class AuthHandler(BaseHandler): else: return defer.succeed(False) - def ratelimit_login_per_account(self, user_id): - """Checks whether the process must be stopped because of ratelimiting. - - Checks against two ratelimiters: the generic one for login attempts per - account and the one specific to failed attempts. - - Args: - user_id (unicode): complete @user:id - - Raises: - LimitExceededError if one of the ratelimiters' login requests count - for this user is too high too proceed. - """ - self._failed_attempts_ratelimiter.ratelimit( - user_id.lower(), - time_now_s=self._clock.time(), - rate_hz=self.hs.config.rc_login_failed_attempts.per_second, - burst_count=self.hs.config.rc_login_failed_attempts.burst_count, - update=False, - ) - - self._account_ratelimiter.ratelimit( - user_id.lower(), - time_now_s=self._clock.time(), - rate_hz=self.hs.config.rc_login_account.per_second, - burst_count=self.hs.config.rc_login_account.burst_count, - update=True, - ) - @attr.s class MacaroonGenerator(object): diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 24a0ce74f2..abc210da57 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -92,8 +92,11 @@ class LoginRestServlet(RestServlet): self.auth_handler = self.hs.get_auth_handler() self.registration_handler = hs.get_registration_handler() self.handlers = hs.get_handlers() + self._clock = hs.get_clock() self._well_known_builder = WellKnownBuilder(hs) self._address_ratelimiter = Ratelimiter() + self._account_ratelimiter = Ratelimiter() + self._failed_attempts_ratelimiter = Ratelimiter() def on_GET(self, request): flows = [] @@ -202,6 +205,16 @@ class LoginRestServlet(RestServlet): # (See add_threepid in synapse/handlers/auth.py) address = address.lower() + # We also apply account rate limiting using the 3PID as a key, as + # otherwise using 3PID bypasses the ratelimiting based on user ID. + self._failed_attempts_ratelimiter.ratelimit( + (medium, address), + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, + ) + # Check for login providers that support 3pid login types ( canonical_user_id, @@ -211,7 +224,8 @@ class LoginRestServlet(RestServlet): ) if canonical_user_id: # Authentication through password provider and 3pid succeeded - result = yield self._register_device_with_callback( + + result = yield self._complete_login( canonical_user_id, login_submission, callback_3pid ) return result @@ -225,6 +239,21 @@ class LoginRestServlet(RestServlet): logger.warning( "unknown 3pid identifier medium %s, address %r", medium, address ) + # We mark that we've failed to log in here, as + # `check_password_provider_3pid` might have returned `None` due + # to an incorrect password, rather than the account not + # existing. + # + # If it returned None but the 3PID was bound then we won't hit + # this code path, which is fine as then the per-user ratelimit + # will kick in below. + self._failed_attempts_ratelimiter.can_do_action( + (medium, address), + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) raise LoginError(403, "", errcode=Codes.FORBIDDEN) identifier = {"type": "m.id.user", "user": user_id} @@ -236,29 +265,84 @@ class LoginRestServlet(RestServlet): if "user" not in identifier: raise SynapseError(400, "User identifier is missing 'user' key") - canonical_user_id, callback = yield self.auth_handler.validate_login( - identifier["user"], login_submission + if identifier["user"].startswith("@"): + qualified_user_id = identifier["user"] + else: + qualified_user_id = UserID(identifier["user"], self.hs.hostname).to_string() + + # Check if we've hit the failed ratelimit (but don't update it) + self._failed_attempts_ratelimiter.ratelimit( + qualified_user_id.lower(), + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, ) - result = yield self._register_device_with_callback( + try: + canonical_user_id, callback = yield self.auth_handler.validate_login( + identifier["user"], login_submission + ) + except LoginError: + # The user has failed to log in, so we need to update the rate + # limiter. Using `can_do_action` avoids us raising a ratelimit + # exception and masking the LoginError. The actual ratelimiting + # should have happened above. + self._failed_attempts_ratelimiter.can_do_action( + qualified_user_id.lower(), + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) + raise + + result = yield self._complete_login( canonical_user_id, login_submission, callback ) return result @defer.inlineCallbacks - def _register_device_with_callback(self, user_id, login_submission, callback=None): - """ Registers a device with a given user_id. Optionally run a callback - function after registration has completed. + def _complete_login( + self, user_id, login_submission, callback=None, create_non_existant_users=False + ): + """Called when we've successfully authed the user and now need to + actually login them in (e.g. create devices). This gets called on + all succesful logins. + + Applies the ratelimiting for succesful login attempts against an + account. Args: user_id (str): ID of the user to register. login_submission (dict): Dictionary of login information. callback (func|None): Callback function to run after registration. + create_non_existant_users (bool): Whether to create the user if + they don't exist. Defaults to False. Returns: result (Dict[str,str]): Dictionary of account information after successful registration. """ + + # Before we actually log them in we check if they've already logged in + # too often. This happens here rather than before as we don't + # necessarily know the user before now. + self._account_ratelimiter.ratelimit( + user_id.lower(), + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_account.per_second, + burst_count=self.hs.config.rc_login_account.burst_count, + update=True, + ) + + if create_non_existant_users: + user_id = yield self.auth_handler.check_user_exists(user_id) + if not user_id: + user_id = yield self.registration_handler.register_user( + localpart=UserID.from_string(user_id).localpart + ) + device_id = login_submission.get("device_id") initial_display_name = login_submission.get("initial_device_display_name") device_id, access_token = yield self.registration_handler.register_device( @@ -285,7 +369,7 @@ class LoginRestServlet(RestServlet): token ) - result = yield self._register_device_with_callback(user_id, login_submission) + result = yield self._complete_login(user_id, login_submission) return result @defer.inlineCallbacks @@ -313,16 +397,7 @@ class LoginRestServlet(RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() - - registered_user_id = yield self.auth_handler.check_user_exists(user_id) - if not registered_user_id: - registered_user_id = yield self.registration_handler.register_user( - localpart=user - ) - - result = yield self._register_device_with_callback( - registered_user_id, login_submission - ) + result = yield self._complete_login(user_id, login_submission) return result From f697b4b4a2ca329a32105ddf83735737808306bf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Nov 2019 11:00:54 +0000 Subject: [PATCH 35/85] Add failed auth ratelimiting to UIA --- synapse/handlers/auth.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 14c6387b6a..20c62bd780 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( SynapseError, UserDeactivatedError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.handlers.ui_auth import INTERACTIVE_AUTH_CHECKERS from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.logging.context import defer_to_thread @@ -101,6 +102,10 @@ class AuthHandler(BaseHandler): login_types.append(t) self._supported_login_types = login_types + # Ratelimiter for failed auth during UIA. Uses same ratelimit config + # as per `rc_login.failed_attempts`. + self._failed_uia_attempts_ratelimiter = Ratelimiter() + self._clock = self.hs.get_clock() @defer.inlineCallbacks @@ -129,12 +134,38 @@ class AuthHandler(BaseHandler): AuthError if the client has completed a login flow, and it gives a different user to `requester` + + LimitExceededError if the ratelimiter's failed requests count for this + user is too high too proceed + """ + user_id = requester.user.to_string() + + # Check if we should be ratelimited due to too many previous failed attempts + self._failed_uia_attempts_ratelimiter.ratelimit( + user_id, + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, + ) + # build a list of supported flows flows = [[login_type] for login_type in self._supported_login_types] - result, params, _ = yield self.check_auth(flows, request_body, clientip) + try: + result, params, _ = yield self.check_auth(flows, request_body, clientip) + except LoginError: + # Update the ratelimite to say we failed (`can_do_action` doesn't raise). + self._failed_uia_attempts_ratelimiter.can_do_action( + user_id, + time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) + raise # find the completed login type for login_type in self._supported_login_types: From 4fc53bf1fb97d52c19aa718e67f31d290218e3c1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Nov 2019 17:42:42 +0000 Subject: [PATCH 36/85] Newsfile --- changelog.d/6335.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6335.bugfix diff --git a/changelog.d/6335.bugfix b/changelog.d/6335.bugfix new file mode 100644 index 0000000000..a95f6b9eec --- /dev/null +++ b/changelog.d/6335.bugfix @@ -0,0 +1 @@ +Fix bug where `rc_login` ratelimiting would prematurely kick in. From b33c4f7a828e722d6115f73525e0456edb79a90f Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 6 Nov 2019 11:55:00 +0000 Subject: [PATCH 37/85] Numeric ID checker now checks @0, don't ratelimit on checking --- synapse/handlers/register.py | 41 +++++++++++-------- .../storage/data_stores/main/registration.py | 8 ++-- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index cff6b0d375..3c142a4395 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -168,6 +168,7 @@ class RegistrationHandler(BaseHandler): Raises: RegistrationError if there was a problem registering. """ + yield self._check_registration_ratelimit(address) yield self.auth.check_auth_blocking(threepid=threepid) password_hash = None @@ -414,6 +415,30 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) + def _check_registration_ratelimit(self, address): + """A simple helper method to check whether the registration rate limit has been hit + for a given IP address + + Args: + address (str): the IP address used to perform the registration. + + Raises: + LimitExceededError: If the rate limit has been exceeded. + """ + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + address, + time_now_s=time_now, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)) + ) + def register_with_store( self, user_id, @@ -446,22 +471,6 @@ class RegistrationHandler(BaseHandler): Returns: Deferred """ - # Don't rate limit for app services - if appservice_id is None and address is not None: - time_now = self.clock.time() - - allowed, time_allowed = self.ratelimiter.can_do_action( - address, - time_now_s=time_now, - rate_hz=self.hs.config.rc_registration.per_second, - burst_count=self.hs.config.rc_registration.burst_count, - ) - - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now)) - ) - if self.hs.config.worker_app: return self._register_client( user_id=user_id, diff --git a/synapse/storage/data_stores/main/registration.py b/synapse/storage/data_stores/main/registration.py index f70d41ecab..ee1b2b2bbf 100644 --- a/synapse/storage/data_stores/main/registration.py +++ b/synapse/storage/data_stores/main/registration.py @@ -488,14 +488,14 @@ class RegistrationWorkerStore(SQLBaseStore): we can. Unfortunately, it's possible some of them are already taken by existing users, and there may be gaps in the already taken range. This function returns the start of the first allocatable gap. This is to - avoid the case of ID 10000000 being pre-allocated, so us wasting the - first (and shortest) many generated user IDs. + avoid the case of ID 1000 being pre-allocated and starting at 1001 while + 0-999 are available. """ def _find_next_generated_user_id(txn): - # We bound between '@1' and '@a' to avoid pulling the entire table + # We bound between '@0' and '@a' to avoid pulling the entire table # out. - txn.execute("SELECT name FROM users WHERE '@1' <= name AND name < '@a'") + txn.execute("SELECT name FROM users WHERE '@0' <= name AND name < '@a'") regex = re.compile(r"^@(\d+):") From 4059d61e2608ac823ef04fe37f23fcac2387a37b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 6 Nov 2019 12:01:54 +0000 Subject: [PATCH 38/85] Don't forget to ratelimit calls outside of RegistrationHandler --- synapse/handlers/register.py | 4 ++-- synapse/replication/http/register.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 3c142a4395..8be82e3754 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -168,7 +168,7 @@ class RegistrationHandler(BaseHandler): Raises: RegistrationError if there was a problem registering. """ - yield self._check_registration_ratelimit(address) + yield self.check_registration_ratelimit(address) yield self.auth.check_auth_blocking(threepid=threepid) password_hash = None @@ -415,7 +415,7 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) - def _check_registration_ratelimit(self, address): + def check_registration_ratelimit(self, address): """A simple helper method to check whether the registration rate limit has been hit for a given IP address diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 915cfb9430..6f4bba7aa4 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -75,6 +75,8 @@ class ReplicationRegisterServlet(ReplicationEndpoint): async def _handle_request(self, request, user_id): content = parse_json_object_from_request(request) + await self.registration_handler.check_registration_ratelimit(content["address"]) + await self.registration_handler.register_with_store( user_id=user_id, password_hash=content["password_hash"], From d2f6a67cb4c8f1ea1a4ae563dd53139838b019c7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 6 Nov 2019 12:03:12 +0000 Subject: [PATCH 39/85] Add changelog --- changelog.d/6338.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6338.bugfix diff --git a/changelog.d/6338.bugfix b/changelog.d/6338.bugfix new file mode 100644 index 0000000000..8e469f0fb6 --- /dev/null +++ b/changelog.d/6338.bugfix @@ -0,0 +1 @@ +Prevent the server taking a long time to start up when guest registration is enabled. \ No newline at end of file From 4257feb20f328c83ac7cb27113f779e844623e30 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 6 Nov 2019 13:35:56 +0000 Subject: [PATCH 40/85] build debs for eoan and bullseye --- scripts-dev/build_debian_packages | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts-dev/build_debian_packages b/scripts-dev/build_debian_packages index 93305ee9b1..84eaec6a95 100755 --- a/scripts-dev/build_debian_packages +++ b/scripts-dev/build_debian_packages @@ -20,11 +20,13 @@ from concurrent.futures import ThreadPoolExecutor DISTS = ( "debian:stretch", "debian:buster", + "debian:bullseye", "debian:sid", "ubuntu:xenial", "ubuntu:bionic", "ubuntu:cosmic", "ubuntu:disco", + "ubuntu:eoan", ) DESC = '''\ From 1fe3cc2c9c59001a6d3f7b28f81bd6681c3c03ac Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 6 Nov 2019 14:54:24 +0000 Subject: [PATCH 41/85] Address review comments --- synapse/handlers/register.py | 24 ++++++++++++------------ synapse/replication/http/register.py | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 8be82e3754..47b9ae8d7f 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,7 +24,6 @@ from synapse.api.errors import ( AuthError, Codes, ConsentNotGivenError, - LimitExceededError, RegistrationError, SynapseError, ) @@ -218,8 +217,8 @@ class RegistrationHandler(BaseHandler): else: # autogen a sequential user ID - user = None - while not user: + # Fail after being unable to find a suitable ID a few times + for x in range(10): localpart = yield self._generate_user_id() user = UserID(localpart, self.hs.hostname) user_id = user.to_string() @@ -234,10 +233,12 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=default_display_name, address=address, ) + + # Successfully registered + break except SynapseError: # if user id is taken, just generate another - user = None - user_id = None + pass if not self.hs.config.user_consent_at_registration: yield self._auto_join_rooms(user_id) @@ -420,25 +421,24 @@ class RegistrationHandler(BaseHandler): for a given IP address Args: - address (str): the IP address used to perform the registration. + address (str|None): the IP address used to perform the registration. If this is + None, no ratelimiting will be performed. Raises: LimitExceededError: If the rate limit has been exceeded. """ + if not address: + return + time_now = self.clock.time() - allowed, time_allowed = self.ratelimiter.can_do_action( + self.ratelimiter.ratelimit( address, time_now_s=time_now, rate_hz=self.hs.config.rc_registration.per_second, burst_count=self.hs.config.rc_registration.burst_count, ) - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now)) - ) - def register_with_store( self, user_id, diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 6f4bba7aa4..0c4aca1291 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -75,7 +75,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): async def _handle_request(self, request, user_id): content = parse_json_object_from_request(request) - await self.registration_handler.check_registration_ratelimit(content["address"]) + self.registration_handler.check_registration_ratelimit(content["address"]) await self.registration_handler.register_with_store( user_id=user_id, From 55bc8d531e0dfe6623d98a9e81ee9a63d1c2799a Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 6 Nov 2019 16:52:54 +0000 Subject: [PATCH 42/85] raise exception after multiple failures --- synapse/handlers/register.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 47b9ae8d7f..235f11c322 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -217,8 +217,13 @@ class RegistrationHandler(BaseHandler): else: # autogen a sequential user ID - # Fail after being unable to find a suitable ID a few times - for x in range(10): + fail_count = 0 + user = None + while not user: + # Fail after being unable to find a suitable ID a few times + if fail_count > 10: + raise SynapseError(500, "Unable to find a suitable guest user ID") + localpart = yield self._generate_user_id() user = UserID(localpart, self.hs.hostname) user_id = user.to_string() @@ -238,7 +243,9 @@ class RegistrationHandler(BaseHandler): break except SynapseError: # if user id is taken, just generate another - pass + user = None + user_id = None + fail_count += 1 if not self.hs.config.user_consent_at_registration: yield self._auto_join_rooms(user_id) From 71f3bd734fe9f8f903c5a496720e7dcf926f2f4a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Nov 2019 17:00:18 +0000 Subject: [PATCH 43/85] Use correct type annotation --- synapse/storage/data_stores/main/events.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 3049a21dc5..d69c59f5a1 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -19,7 +19,7 @@ import itertools import logging from collections import Counter as c_counter, OrderedDict, namedtuple from functools import wraps -from typing import Set +from typing import Collection from six import iteritems, text_type from six.moves import range @@ -1736,7 +1736,7 @@ class EventsStore( return state_groups def purge_unreferenced_state_groups( - self, room_id: str, state_groups_to_delete: Set[int] + self, room_id: str, state_groups_to_delete: Collection[int] ) -> defer.Deferred: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. From 5c3363233cca7044a333b7e19ba239eaf5587ff8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Nov 2019 17:02:05 +0000 Subject: [PATCH 44/85] Fix deleting state groups during room purge. And fix the tests to actually test that things got deleted. --- synapse/storage/data_stores/main/events.py | 27 +++++++++++----------- tests/rest/admin/test_admin.py | 4 +++- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index d69c59f5a1..946823876a 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -1633,7 +1633,20 @@ class EventsStore( return self.runInteraction("purge_room", self._purge_room_txn, room_id) def _purge_room_txn(self, txn, room_id): - # First delete tables which lack an index on room_id but have one on event_id + # First we fetch all the state groups that should be deleted, before + # we delete that information. + txn.execute( + """ + SELECT DISTINCT state_group FROM events + INNER JOIN event_to_state_groups USING(event_id) + WHERE events.room_id = ? + """, + (room_id,), + ) + + state_groups = [row[0] for row in txn] + + # Now we delete tables which lack an index on room_id but have one on event_id for table in ( "event_auth", "event_edges", @@ -1717,18 +1730,6 @@ class EventsStore( # index on them. In any case we should be clearing out 'stream' tables # periodically anyway (#5888) - # Now we fetch all the state groups that should be deleted. - txn.execute( - """ - SELECT DISTINCT state_group FROM events - INNER JOIN event_to_state_groups USING(event_id) - WHERE events.room_id = ? - """, - (room_id,), - ) - - state_groups = [row[0] for row in txn] - # TODO: we could probably usefully do a bunch of cache invalidation here logger.info("[purge] done") diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index 8e1ca8b738..d9f1b95cb0 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -628,10 +628,12 @@ class PurgeRoomTestCase(unittest.HomeserverTestCase): "local_invites", "room_account_data", "room_tags", + "state_groups", + "state_groups_state", ): count = self.get_success( self.store._simple_select_one_onecol( - table="events", + table=table, keyvalues={"room_id": room_id}, retcol="COUNT(*)", desc="test_purge_room", From affcc2cc3655531351048a4ad8ac67e22d1e398d Mon Sep 17 00:00:00 2001 From: V02460 Date: Thu, 7 Nov 2019 10:43:51 +0100 Subject: [PATCH 45/85] Fix LruCache callback deduplication (#6213) --- changelog.d/6213.bugfix | 1 + synapse/util/caches/descriptors.py | 48 +++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 changelog.d/6213.bugfix diff --git a/changelog.d/6213.bugfix b/changelog.d/6213.bugfix new file mode 100644 index 0000000000..072264fba3 --- /dev/null +++ b/changelog.d/6213.bugfix @@ -0,0 +1 @@ +Fix LruCache callback deduplication. diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 0e8da27f53..84f5ae22c3 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -17,8 +17,8 @@ import functools import inspect import logging import threading -from collections import namedtuple -from typing import Any, cast +from typing import Any, Tuple, Union, cast +from weakref import WeakValueDictionary from six import itervalues @@ -38,6 +38,8 @@ from . import register_cache logger = logging.getLogger(__name__) +CacheKey = Union[Tuple, Any] + class _CachedFunction(Protocol): invalidate = None # type: Any @@ -430,7 +432,7 @@ class CacheDescriptor(_CacheDescriptorBase): # Add our own `cache_context` to argument list if the wrapped function # has asked for one if self.add_cache_context: - kwargs["cache_context"] = _CacheContext(cache, cache_key) + kwargs["cache_context"] = _CacheContext.get_instance(cache, cache_key) try: cached_result_d = cache.get(cache_key, callback=invalidate_callback) @@ -624,14 +626,38 @@ class CacheListDescriptor(_CacheDescriptorBase): return wrapped -class _CacheContext(namedtuple("_CacheContext", ("cache", "key"))): - # We rely on _CacheContext implementing __eq__ and __hash__ sensibly, - # which namedtuple does for us (i.e. two _CacheContext are the same if - # their caches and keys match). This is important in particular to - # dedupe when we add callbacks to lru cache nodes, otherwise the number - # of callbacks would grow. - def invalidate(self): - self.cache.invalidate(self.key) +class _CacheContext: + """Holds cache information from the cached function higher in the calling order. + + Can be used to invalidate the higher level cache entry if something changes + on a lower level. + """ + + _cache_context_objects = ( + WeakValueDictionary() + ) # type: WeakValueDictionary[Tuple[Cache, CacheKey], _CacheContext] + + def __init__(self, cache, cache_key): # type: (Cache, CacheKey) -> None + self._cache = cache + self._cache_key = cache_key + + def invalidate(self): # type: () -> None + """Invalidates the cache entry referred to by the context.""" + self._cache.invalidate(self._cache_key) + + @classmethod + def get_instance(cls, cache, cache_key): # type: (Cache, CacheKey) -> _CacheContext + """Returns an instance constructed with the given arguments. + + A new instance is only created if none already exists. + """ + + # We make sure there are no identical _CacheContext instances. This is + # important in particular to dedupe when we add callbacks to lru cache + # nodes, otherwise the number of callbacks would grow. + return cls._cache_context_objects.setdefault( + (cache, cache_key), cls(cache, cache_key) + ) def cached( From b03cddaeb99437311094fbe617ae2a6bde4c5615 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 7 Nov 2019 09:46:25 +0000 Subject: [PATCH 46/85] tweak changelog --- changelog.d/6213.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/6213.bugfix b/changelog.d/6213.bugfix index 072264fba3..2bb2d08851 100644 --- a/changelog.d/6213.bugfix +++ b/changelog.d/6213.bugfix @@ -1 +1 @@ -Fix LruCache callback deduplication. +Fix LruCache callback deduplication for Python 3.8. Contributed by @V02460. From 3f9b61ff9504dd88cac17fb3cb097e319babd2a3 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 11:49:37 +0000 Subject: [PATCH 47/85] Fix the SQL SELECT query in _paginate_room_events_txn Doing a SELECT DISTINCT when paginating is quite expensive, because it requires the engine to do sorting on the entire events table. However, we only need to run it if we're filtering on 2+ labels, so this PR is changing the request so that DISTINCT is only used then. --- synapse/storage/data_stores/main/stream.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 616ef91d4e..ef0b1426d1 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -871,14 +871,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args.append(int(limit)) + # Using DISTINCT in this SELECT query is quite expensive, because it requires the + # engine to sort on the entire (not limited) result set, i.e. the entire events + # table. We only need to use it when we're filtering on more than two labels, + # because that's the only scenario in which we can possibly to get multiple times + # the same event ID in the results. + if event_filter.labels and len(event_filter.labels) > 1: + select_keywords = "SELECT DISTINCT" + + else: + select_keywords = "SELECT" + sql = ( - "SELECT DISTINCT event_id, topological_ordering, stream_ordering" + "%(select_keywords)s event_id, topological_ordering, stream_ordering" " FROM events" " LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)" " WHERE outlier = ? AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," " stream_ordering %(order)s LIMIT ?" - ) % {"bounds": bounds, "order": order} + ) % {"select_keywords": select_keywords, "bounds": bounds, "order": order} txn.execute(sql, args) From 4f519d556e32ac29a960977df4ed14c42290af5e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 11:51:54 +0000 Subject: [PATCH 48/85] Changelog --- changelog.d/6340.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6340.feature diff --git a/changelog.d/6340.feature b/changelog.d/6340.feature new file mode 100644 index 0000000000..78a187a1dc --- /dev/null +++ b/changelog.d/6340.feature @@ -0,0 +1 @@ +Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)). From 15a1a02e70ca18d8af9a4faaf1bb40427ea6a643 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 12:04:37 +0000 Subject: [PATCH 49/85] Handle lack of filter --- synapse/storage/data_stores/main/stream.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index ef0b1426d1..bb70a0f38a 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -876,11 +876,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # table. We only need to use it when we're filtering on more than two labels, # because that's the only scenario in which we can possibly to get multiple times # the same event ID in the results. - if event_filter.labels and len(event_filter.labels) > 1: - select_keywords = "SELECT DISTINCT" - - else: - select_keywords = "SELECT" + select_keywords = "SELECT" + if event_filter and event_filter.labels and len(event_filter.labels) > 1: + select_keywords += "DISTINCT" sql = ( "%(select_keywords)s event_id, topological_ordering, stream_ordering" From 70804392ae42949109e55e4e51566e2545e1df63 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 14:55:10 +0000 Subject: [PATCH 50/85] Only join on event_labels if we're filtering on labels --- synapse/storage/data_stores/main/stream.py | 33 ++++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index bb70a0f38a..064f602a65 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -871,23 +871,38 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): args.append(int(limit)) - # Using DISTINCT in this SELECT query is quite expensive, because it requires the - # engine to sort on the entire (not limited) result set, i.e. the entire events - # table. We only need to use it when we're filtering on more than two labels, - # because that's the only scenario in which we can possibly to get multiple times - # the same event ID in the results. select_keywords = "SELECT" - if event_filter and event_filter.labels and len(event_filter.labels) > 1: - select_keywords += "DISTINCT" + join_clause = "" + if event_filter and event_filter.labels: + # If we're not filtering on a label, then joining on event_labels will + # return as many row for a single event as the number of labels it has. To + # avoid this, only join if we're filtering on at least one label. + join_clause = ( + "LEFT JOIN event_labels" + " USING (event_id, room_id, topological_ordering)" + ) + if len(event_filter.labels) > 1: + # Using DISTINCT in this SELECT query is quite expensive, because it + # requires the engine to sort on the entire (not limited) result set, + # i.e. the entire events table. We only need to use it when we're + # filtering on more than two labels, because that's the only scenario + # in which we can possibly to get multiple times the same event ID in + # the results. + select_keywords += "DISTINCT" sql = ( "%(select_keywords)s event_id, topological_ordering, stream_ordering" " FROM events" - " LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)" + " %(join_clause)s" " WHERE outlier = ? AND room_id = ? AND %(bounds)s" " ORDER BY topological_ordering %(order)s," " stream_ordering %(order)s LIMIT ?" - ) % {"select_keywords": select_keywords, "bounds": bounds, "order": order} + ) % { + "select_keywords": select_keywords, + "join_clause": join_clause, + "bounds": bounds, + "order": order + } txn.execute(sql, args) From b9cba079628db11951fc5ed30b03e8825eb954d7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 14:57:15 +0000 Subject: [PATCH 51/85] Lint --- synapse/storage/data_stores/main/stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 064f602a65..90bb2d8e8f 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -876,7 +876,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if event_filter and event_filter.labels: # If we're not filtering on a label, then joining on event_labels will # return as many row for a single event as the number of labels it has. To - # avoid this, only join if we're filtering on at least one label. + # avoid this, only join if we're filtering on at least one label. join_clause = ( "LEFT JOIN event_labels" " USING (event_id, room_id, topological_ordering)" @@ -901,7 +901,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "select_keywords": select_keywords, "join_clause": join_clause, "bounds": bounds, - "order": order + "order": order, } txn.execute(sql, args) From bb78276bdc77c0462696529c27fd8a6062b37afc Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 15:25:27 +0000 Subject: [PATCH 52/85] Incorporate review --- .../data_stores/main/events_bg_updates.py | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 309bfcafe5..9714662c11 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,43 +525,38 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) - rows = self.cursor_to_dict(txn) - if not rows: - return True, 0 - - for row in rows: - event_id = row["event_id"] - event_json = json.loads(row["json"]) - + nbrows = 0 + for (event_id, event_json) in txn: self._simple_insert_many_txn( txn=txn, table="event_labels", values=[ { "event_id": event_id, - "label": label, + "label": str(label), "room_id": event_json["room_id"], "topological_ordering": event_json["depth"], } for label in event_json["content"].get( EventContentFields.LABELS, [] ) + if label is not None ], ) + nbrows += 1 + self._background_update_progress_txn( txn, "event_store_labels", {"last_event_id": event_id} ) - # We want to return true (to end the background update) only when - # the query returned with less rows than we asked for. - return len(rows) != batch_size, len(rows) + return nbrows - end, num_rows = yield self.runInteraction( + num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn ) - if end: + if not num_rows: yield self._end_background_update("event_store_labels") return num_rows From ec2cb9f29829cf81f843124ef4289bc0eb88413e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:18:40 +0000 Subject: [PATCH 53/85] Initialise value before looping --- .../storage/data_stores/main/events_bg_updates.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 9714662c11..bee8c9cfa0 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -526,28 +526,32 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ) nbrows = 0 - for (event_id, event_json) in txn: + last_row_event_id = "" + for (event_id, event_json_raw) in txn: + event_json = json.loads(event_json_raw) + self._simple_insert_many_txn( txn=txn, table="event_labels", values=[ { "event_id": event_id, - "label": str(label), + "label": label, "room_id": event_json["room_id"], "topological_ordering": event_json["depth"], } for label in event_json["content"].get( EventContentFields.LABELS, [] ) - if label is not None + if label is not None and isinstance(label, str) ], ) nbrows += 1 + last_row_event_id = event_id self._background_update_progress_txn( - txn, "event_store_labels", {"last_event_id": event_id} + txn, "event_store_labels", {"last_event_id": last_row_event_id} ) return nbrows From 1186612d6cd728e6b7ed7806579db3cea7410b54 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:46:41 +0000 Subject: [PATCH 54/85] Back to using cursor_to_dict --- .../data_stores/main/events_bg_updates.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index bee8c9cfa0..b858e3ac1a 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,10 +525,13 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) - nbrows = 0 - last_row_event_id = "" - for (event_id, event_json_raw) in txn: - event_json = json.loads(event_json_raw) + rows = self.cursor_to_dict(txn) + if not len(rows): + return 0 + + for row in rows: + event_id = row["event_id"] + event_json = json.loads(row["event_json"]) self._simple_insert_many_txn( txn=txn, @@ -547,14 +550,11 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ], ) - nbrows += 1 - last_row_event_id = event_id - self._background_update_progress_txn( - txn, "event_store_labels", {"last_event_id": last_row_event_id} + txn, "event_store_labels", {"last_event_id": event_id} ) - return nbrows + return len(rows) num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn From cd312012676d39da8d0383cba167d1211378fece Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:47:15 +0000 Subject: [PATCH 55/85] Revert "Back to using cursor_to_dict" This reverts commit 1186612d6cd728e6b7ed7806579db3cea7410b54. --- .../data_stores/main/events_bg_updates.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index b858e3ac1a..bee8c9cfa0 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,13 +525,10 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) - rows = self.cursor_to_dict(txn) - if not len(rows): - return 0 - - for row in rows: - event_id = row["event_id"] - event_json = json.loads(row["event_json"]) + nbrows = 0 + last_row_event_id = "" + for (event_id, event_json_raw) in txn: + event_json = json.loads(event_json_raw) self._simple_insert_many_txn( txn=txn, @@ -550,11 +547,14 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): ], ) + nbrows += 1 + last_row_event_id = event_id + self._background_update_progress_txn( - txn, "event_store_labels", {"last_event_id": event_id} + txn, "event_store_labels", {"last_event_id": last_row_event_id} ) - return len(rows) + return nbrows num_rows = yield self.runInteraction( desc="event_store_labels", func=_event_store_labels_txn From c9b27d00445f84ebfbd4115e3177a10513aadcfc Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 16:47:45 +0000 Subject: [PATCH 56/85] Copy results --- synapse/storage/data_stores/main/events_bg_updates.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index bee8c9cfa0..a4cb64f479 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -525,9 +525,11 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): (last_event_id, batch_size), ) + results = list(txn) + nbrows = 0 last_row_event_id = "" - for (event_id, event_json_raw) in txn: + for (event_id, event_json_raw) in results: event_json = json.loads(event_json_raw) self._simple_insert_many_txn( From 6d360f099f3ea09c90795e5a6c4fc07dbffd874e Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 17:01:43 +0000 Subject: [PATCH 57/85] Update synapse/storage/data_stores/main/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index a4cb64f479..18862adb9c 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -545,7 +545,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): for label in event_json["content"].get( EventContentFields.LABELS, [] ) - if label is not None and isinstance(label, str) + if isinstance(label, str) ], ) From dad8d68c9938e7d09eefd6aa1b633494ab89f719 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 7 Nov 2019 17:01:53 +0000 Subject: [PATCH 58/85] Update synapse/storage/data_stores/main/events_bg_updates.py Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/data_stores/main/events_bg_updates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 18862adb9c..0ed59ef48e 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -511,7 +511,7 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): @defer.inlineCallbacks def _event_store_labels(self, progress, batch_size): - """Stores labels for events.""" + """Background update handler which will store labels for existing events.""" last_event_id = progress.get("last_event_id", "") def _event_store_labels_txn(txn): From c5abb67e432b9279c838f7b9318144ca8f2b7c0d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 7 Nov 2019 17:14:13 +0000 Subject: [PATCH 59/85] Python 3.8 for tox (#6341) ... and update INSTALL.md to include py3.8. We'll also have to update the buildkite pipeline to run it --- INSTALL.md | 2 +- changelog.d/6341.misc | 1 + tox.ini | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6341.misc diff --git a/INSTALL.md b/INSTALL.md index e7b429c05d..29e0abafd3 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -36,7 +36,7 @@ that your email address is probably `user@example.com` rather than System requirements: - POSIX-compliant system (tested on Linux & OS X) -- Python 3.5, 3.6, or 3.7 +- Python 3.5, 3.6, 3.7 or 3.8. - At least 1GB of free RAM if you want to join large public rooms like #matrix:matrix.org Synapse is written in Python but some of the libraries it uses are written in diff --git a/changelog.d/6341.misc b/changelog.d/6341.misc new file mode 100644 index 0000000000..359b9bf1d7 --- /dev/null +++ b/changelog.d/6341.misc @@ -0,0 +1 @@ +Add continuous integration for python 3.8. \ No newline at end of file diff --git a/tox.ini b/tox.ini index afe9bc909b..62b350ea6a 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = packaging, py35, py36, py37, check_codestyle, check_isort +envlist = packaging, py35, py36, py37, py38, check_codestyle, check_isort [base] basepython = python3.7 From e4ec82ce0fdd17f912fad76defd53ec99f782528 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Nov 2019 09:50:48 +0000 Subject: [PATCH 60/85] Move type annotation into docstring --- synapse/storage/data_stores/main/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 946823876a..878f7568a6 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -19,7 +19,6 @@ import itertools import logging from collections import Counter as c_counter, OrderedDict, namedtuple from functools import wraps -from typing import Collection from six import iteritems, text_type from six.moves import range @@ -1737,7 +1736,7 @@ class EventsStore( return state_groups def purge_unreferenced_state_groups( - self, room_id: str, state_groups_to_delete: Collection[int] + self, room_id: str, state_groups_to_delete ) -> defer.Deferred: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. @@ -1745,7 +1744,8 @@ class EventsStore( Args: room_id: The room the state groups belong to (must all be in the same room). - state_groups_to_delete: Set of all state groups to delete. + state_groups_to_delete (Collection[int]): Set of all state groups + to delete. """ return self.runInteraction( From b16fa433860824560c64acf594aa6c891e5f1a0a Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 8 Nov 2019 10:34:09 +0000 Subject: [PATCH 61/85] Incorporate review --- synapse/storage/data_stores/main/stream.py | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 90bb2d8e8f..8780fdd989 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -877,10 +877,10 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # If we're not filtering on a label, then joining on event_labels will # return as many row for a single event as the number of labels it has. To # avoid this, only join if we're filtering on at least one label. - join_clause = ( - "LEFT JOIN event_labels" - " USING (event_id, room_id, topological_ordering)" - ) + join_clause = """ + LEFT JOIN event_labels + USING (event_id, room_id, topological_ordering) + """ if len(event_filter.labels) > 1: # Using DISTINCT in this SELECT query is quite expensive, because it # requires the engine to sort on the entire (not limited) result set, @@ -890,14 +890,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # the results. select_keywords += "DISTINCT" - sql = ( - "%(select_keywords)s event_id, topological_ordering, stream_ordering" - " FROM events" - " %(join_clause)s" - " WHERE outlier = ? AND room_id = ? AND %(bounds)s" - " ORDER BY topological_ordering %(order)s," - " stream_ordering %(order)s LIMIT ?" - ) % { + sql = """ + %(select_keywords)s event_id, topological_ordering, stream_ordering + FROM events + %(join_clause)s + WHERE outlier = ? AND room_id = ? AND %(bounds)s + ORDER BY topological_ordering %(order)s, + stream_ordering %(order)s LIMIT ? + """ % { "select_keywords": select_keywords, "join_clause": join_clause, "bounds": bounds, From bc29a19731c518dbd70f3adefc66061fb4629cee Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 12 Nov 2019 13:08:12 +0000 Subject: [PATCH 62/85] Replace instance variations of homeserver with correct case/spacing --- synapse/__init__.py | 2 +- synapse/_scripts/register_new_matrix_user.py | 6 +++--- synapse/api/errors.py | 2 +- synapse/config/captcha.py | 4 ++-- synapse/config/emailconfig.py | 2 +- synapse/config/server.py | 2 +- synapse/federation/federation_client.py | 6 +++--- synapse/federation/transport/__init__.py | 4 ++-- synapse/federation/transport/client.py | 6 +++--- synapse/federation/transport/server.py | 2 +- synapse/handlers/auth.py | 4 ++-- synapse/handlers/directory.py | 2 +- synapse/handlers/federation.py | 4 ++-- synapse/handlers/profile.py | 6 +++--- synapse/handlers/register.py | 2 +- synapse/handlers/typing.py | 4 ++-- synapse/http/matrixfederationclient.py | 2 +- synapse/util/httpresourcetree.py | 2 +- 18 files changed, 31 insertions(+), 31 deletions(-) diff --git a/synapse/__init__.py b/synapse/__init__.py index ec16f54a49..1c27d68009 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" This is a reference implementation of a Matrix home server. +""" This is a reference implementation of a Matrix homeserver. """ import os diff --git a/synapse/_scripts/register_new_matrix_user.py b/synapse/_scripts/register_new_matrix_user.py index bdcd915bbe..d528450c78 100644 --- a/synapse/_scripts/register_new_matrix_user.py +++ b/synapse/_scripts/register_new_matrix_user.py @@ -144,8 +144,8 @@ def main(): logging.captureWarnings(True) parser = argparse.ArgumentParser( - description="Used to register new users with a given home server when" - " registration has been disabled. The home server must be" + description="Used to register new users with a given homeserver when" + " registration has been disabled. The homeserver must be" " configured with the 'registration_shared_secret' option" " set." ) @@ -202,7 +202,7 @@ def main(): "server_url", default="https://localhost:8448", nargs="?", - help="URL to use to talk to the home server. Defaults to " + help="URL to use to talk to the homeserver. Defaults to " " 'https://localhost:8448'.", ) diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cca92c34ba..5853a54c95 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -457,7 +457,7 @@ def cs_error(msg, code=Codes.UNKNOWN, **kwargs): class FederationError(RuntimeError): - """ This class is used to inform remote home servers about erroneous + """ This class is used to inform remote homeservers about erroneous PDUs they sent us. FATAL: The remote server could not interpret the source event. diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index 44bd5c6799..f0171bb5b2 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -35,11 +35,11 @@ class CaptchaConfig(Config): ## Captcha ## # See docs/CAPTCHA_SETUP for full details of configuring this. - # This Home Server's ReCAPTCHA public key. + # This homeserver's ReCAPTCHA public key. # #recaptcha_public_key: "YOUR_PUBLIC_KEY" - # This Home Server's ReCAPTCHA private key. + # This homeserver's ReCAPTCHA private key. # #recaptcha_private_key: "YOUR_PRIVATE_KEY" diff --git a/synapse/config/emailconfig.py b/synapse/config/emailconfig.py index 39e7a1dddb..43fad0bf8b 100644 --- a/synapse/config/emailconfig.py +++ b/synapse/config/emailconfig.py @@ -305,7 +305,7 @@ class EmailConfig(Config): # smtp_user: "exampleusername" # smtp_pass: "examplepassword" # require_transport_security: false - # notif_from: "Your Friendly %(app)s Home Server " + # notif_from: "Your Friendly %(app)s homeserver " # app_name: Matrix # # # Enable email notifications by default diff --git a/synapse/config/server.py b/synapse/config/server.py index d556df308d..a04e600fda 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -781,7 +781,7 @@ class ServerConfig(Config): "--daemonize", action="store_true", default=None, - help="Daemonize the home server", + help="Daemonize the homeserver", ) server_group.add_argument( "--print-pidfile", diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 545d719652..27f6aff004 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -177,7 +177,7 @@ class FederationClient(FederationBase): given destination server. Args: - dest (str): The remote home server to ask. + dest (str): The remote homeserver to ask. room_id (str): The room_id to backfill. limit (int): The maximum number of PDUs to return. extremities (list): List of PDU id and origins of the first pdus @@ -227,7 +227,7 @@ class FederationClient(FederationBase): one succeeds. Args: - destinations (list): Which home servers to query + destinations (list): Which homeservers to query event_id (str): event to fetch room_version (str): version of the room outlier (bool): Indicates whether the PDU is an `outlier`, i.e. if @@ -312,7 +312,7 @@ class FederationClient(FederationBase): @defer.inlineCallbacks @log_function def get_state_for_room(self, destination, room_id, event_id): - """Requests all of the room state at a given event from a remote home server. + """Requests all of the room state at a given event from a remote homeserver. Args: destination (str): The remote homeserver to query for the state. diff --git a/synapse/federation/transport/__init__.py b/synapse/federation/transport/__init__.py index d9fcc520a0..5db733af98 100644 --- a/synapse/federation/transport/__init__.py +++ b/synapse/federation/transport/__init__.py @@ -14,9 +14,9 @@ # limitations under the License. """The transport layer is responsible for both sending transactions to remote -home servers and receiving a variety of requests from other home servers. +homeservers and receiving a variety of requests from other homeservers. -By default this is done over HTTPS (and all home servers are required to +By default this is done over HTTPS (and all homeservers are required to support HTTPS), however individual pairings of servers may decide to communicate over a different (albeit still reliable) protocol. """ diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 920fa86853..dc95ab2113 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -44,7 +44,7 @@ class TransportLayerClient(object): given event. Args: - destination (str): The host name of the remote home server we want + destination (str): The host name of the remote homeserver we want to get the state from. context (str): The name of the context we want the state of event_id (str): The event we want the context at. @@ -68,7 +68,7 @@ class TransportLayerClient(object): given event. Returns the state's event_id's Args: - destination (str): The host name of the remote home server we want + destination (str): The host name of the remote homeserver we want to get the state from. context (str): The name of the context we want the state of event_id (str): The event we want the context at. @@ -91,7 +91,7 @@ class TransportLayerClient(object): """ Requests the pdu with give id and origin from the given server. Args: - destination (str): The host name of the remote home server we want + destination (str): The host name of the remote homeserver we want to get the state from. event_id (str): The id of the event being requested. timeout (int): How long to try (in ms) the destination for before diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index d6c23f22bd..09baa9c57d 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -714,7 +714,7 @@ class PublicRoomList(BaseFederationServlet): This API returns information in the same format as /publicRooms on the client API, but will only ever include local public rooms and hence is - intended for consumption by other home servers. + intended for consumption by other homeservers. GET /publicRooms HTTP/1.1 diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 7a0f54ca24..c9d0db4823 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -223,7 +223,7 @@ class AuthHandler(BaseHandler): # could continue registration from your phone having clicked the # email auth link on there). It's probably too open to abuse # because it lets unauthenticated clients store arbitrary objects - # on a home server. + # on a homeserver. # Revisit: Assumimg the REST APIs do sensible validation, the data # isn't arbintrary. session["clientdict"] = clientdict @@ -810,7 +810,7 @@ class AuthHandler(BaseHandler): @defer.inlineCallbacks def add_threepid(self, user_id, medium, address, validated_at): # 'Canonicalise' email addresses down to lower case. - # We've now moving towards the Home Server being the entity that + # We've now moving towards the homeserver being the entity that # is responsible for validating threepids used for resetting passwords # on accounts, so in future Synapse will gain knowledge of specific # types (mediums) of threepid. For now, we still use the existing diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index c4632f8984..69051101a6 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -283,7 +283,7 @@ class DirectoryHandler(BaseHandler): def on_directory_query(self, args): room_alias = RoomAlias.from_string(args["room_alias"]) if not self.hs.is_mine(room_alias): - raise SynapseError(400, "Room Alias is not hosted on this Home Server") + raise SynapseError(400, "Room Alias is not hosted on this homeserver") result = yield self.get_association_from_room_alias(room_alias) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 05dd8d2671..0e904f2da0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -97,9 +97,9 @@ class FederationHandler(BaseHandler): """Handles events that originated from federation. Responsible for: a) handling received Pdus before handing them on as Events to the rest - of the home server (including auth and state conflict resoultion) + of the homeserver (including auth and state conflict resoultion) b) converting events that were produced by local clients that may need - to be sent to remote home servers. + to be sent to remote homeservers. c) doing the necessary dances to invite remote users and join remote rooms. """ diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 22e0a04da4..1e5a4613c9 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -152,7 +152,7 @@ class BaseProfileHandler(BaseHandler): by_admin (bool): Whether this change was made by an administrator. """ if not self.hs.is_mine(target_user): - raise SynapseError(400, "User is not hosted on this Home Server") + raise SynapseError(400, "User is not hosted on this homeserver") if not by_admin and target_user != requester.user: raise AuthError(400, "Cannot set another user's displayname") @@ -207,7 +207,7 @@ class BaseProfileHandler(BaseHandler): """target_user is the user whose avatar_url is to be changed; auth_user is the user attempting to make this change.""" if not self.hs.is_mine(target_user): - raise SynapseError(400, "User is not hosted on this Home Server") + raise SynapseError(400, "User is not hosted on this homeserver") if not by_admin and target_user != requester.user: raise AuthError(400, "Cannot set another user's avatar_url") @@ -231,7 +231,7 @@ class BaseProfileHandler(BaseHandler): def on_profile_query(self, args): user = UserID.from_string(args["user_id"]) if not self.hs.is_mine(user): - raise SynapseError(400, "User is not hosted on this Home Server") + raise SynapseError(400, "User is not hosted on this homeserver") just_field = args.get("field", None) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 235f11c322..95806af41e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -630,7 +630,7 @@ class RegistrationHandler(BaseHandler): # And we add an email pusher for them by default, but only # if email notifications are enabled (so people don't start # getting mail spam where they weren't before if email - # notifs are set up on a home server) + # notifs are set up on a homeserver) if ( self.hs.config.email_enable_notifs and self.hs.config.email_notif_for_new_users diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index ca8ae9fb5b..856337b7e2 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -120,7 +120,7 @@ class TypingHandler(object): auth_user_id = auth_user.to_string() if not self.is_mine_id(target_user_id): - raise SynapseError(400, "User is not hosted on this Home Server") + raise SynapseError(400, "User is not hosted on this homeserver") if target_user_id != auth_user_id: raise AuthError(400, "Cannot set another user's typing state") @@ -150,7 +150,7 @@ class TypingHandler(object): auth_user_id = auth_user.to_string() if not self.is_mine_id(target_user_id): - raise SynapseError(400, "User is not hosted on this Home Server") + raise SynapseError(400, "User is not hosted on this homeserver") if target_user_id != auth_user_id: raise AuthError(400, "Cannot set another user's typing state") diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 691380abda..16765d54e0 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -530,7 +530,7 @@ class MatrixFederationHttpClient(object): """ Builds the Authorization headers for a federation request Args: - destination (bytes|None): The desination home server of the request. + destination (bytes|None): The desination homeserver of the request. May be None if the destination is an identity server, in which case destination_is must be non-None. method (bytes): The HTTP method of the request diff --git a/synapse/util/httpresourcetree.py b/synapse/util/httpresourcetree.py index 1a20c596bf..3c0e8469f3 100644 --- a/synapse/util/httpresourcetree.py +++ b/synapse/util/httpresourcetree.py @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) def create_resource_tree(desired_tree, root_resource): - """Create the resource tree for this Home Server. + """Create the resource tree for this homeserver. This in unduly complicated because Twisted does not support putting child resources more than 1 level deep at a time. From 73d091be48c6bfa1c41a8d31068d21fc28c26f6e Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 12 Nov 2019 13:12:25 +0000 Subject: [PATCH 63/85] A couple more instances --- synapse/config/server.py | 2 +- synapse/logging/_terse_json.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/config/server.py b/synapse/config/server.py index a04e600fda..a1bd3c0ae7 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -721,7 +721,7 @@ class ServerConfig(Config): # Used by phonehome stats to group together related servers. #server_context: context - # Resource-constrained Homeserver Settings + # Resource-constrained homeserver Settings # # If limit_remote_rooms.enabled is True, the room complexity will be # checked before a user joins a new remote room. If it is above diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 0ebbde06f2..76ce7d8808 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -153,7 +153,7 @@ class TerseJSONToTCPLogObserver(object): An IObserver that writes JSON logs to a TCP target. Args: - hs (HomeServer): The Homeserver that is being logged for. + hs (HomeServer): The homeserver that is being logged for. host: The host of the logging target. port: The logging target's port. metadata: Metadata to be added to each log entry. From 85f172ef968a9726cd62bc61fe98e39cdf017e15 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 12 Nov 2019 13:13:19 +0000 Subject: [PATCH 64/85] Add changelog --- changelog.d/6357.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6357.misc diff --git a/changelog.d/6357.misc b/changelog.d/6357.misc new file mode 100644 index 0000000000..a68df0f384 --- /dev/null +++ b/changelog.d/6357.misc @@ -0,0 +1 @@ +Correct spacing/case of various instances of the word "homeserver". \ No newline at end of file From e1648dc5763bda2cf10daafa5beebb4fbdfd2cb5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 12 Nov 2019 13:15:59 +0000 Subject: [PATCH 65/85] sample config --- docs/sample_config.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index d2f4aff826..da7e5f2e21 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -287,7 +287,7 @@ listeners: # Used by phonehome stats to group together related servers. #server_context: context -# Resource-constrained Homeserver Settings +# Resource-constrained homeserver Settings # # If limit_remote_rooms.enabled is True, the room complexity will be # checked before a user joins a new remote room. If it is above @@ -743,11 +743,11 @@ uploads_path: "DATADIR/uploads" ## Captcha ## # See docs/CAPTCHA_SETUP for full details of configuring this. -# This Home Server's ReCAPTCHA public key. +# This homeserver's ReCAPTCHA public key. # #recaptcha_public_key: "YOUR_PUBLIC_KEY" -# This Home Server's ReCAPTCHA private key. +# This homeserver's ReCAPTCHA private key. # #recaptcha_private_key: "YOUR_PRIVATE_KEY" @@ -1270,7 +1270,7 @@ password_config: # smtp_user: "exampleusername" # smtp_pass: "examplepassword" # require_transport_security: false -# notif_from: "Your Friendly %(app)s Home Server " +# notif_from: "Your Friendly %(app)s homeserver " # app_name: Matrix # # # Enable email notifications by default From c350bc2f92d87e46a40f917f65c9e10e0f4999fc Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 13 Nov 2019 19:09:20 +0000 Subject: [PATCH 66/85] Blacklist PurgeRoomTestCase (#6361) --- changelog.d/6361.misc | 1 + tests/rest/admin/test_admin.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/6361.misc diff --git a/changelog.d/6361.misc b/changelog.d/6361.misc new file mode 100644 index 0000000000..324d74ebf9 --- /dev/null +++ b/changelog.d/6361.misc @@ -0,0 +1 @@ +Temporarily blacklist the failing unit test PurgeRoomTestCase.test_purge_room. diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index d9f1b95cb0..9575058252 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -641,3 +641,5 @@ class PurgeRoomTestCase(unittest.HomeserverTestCase): ) self.assertEqual(count, 0, msg="Rows not purged in {}".format(table)) + + test_purge_room.skip = "Disabled because it's currently broken" From 745a48625d9760374a7d683441185fa8bd2a2aac Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 14 Nov 2019 12:02:05 +0000 Subject: [PATCH 67/85] Fix guest -> real account upgrade with account validity enabled (#6359) --- changelog.d/6359.bugfix | 1 + synapse/storage/_base.py | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) create mode 100644 changelog.d/6359.bugfix diff --git a/changelog.d/6359.bugfix b/changelog.d/6359.bugfix new file mode 100644 index 0000000000..22bf5f642a --- /dev/null +++ b/changelog.d/6359.bugfix @@ -0,0 +1 @@ +Fix bug where upgrading a guest account to a full user would fail when account validity is enabled. \ No newline at end of file diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1a2b7ebe25..ab596fa68d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -361,14 +361,11 @@ class SQLBaseStore(object): expiration_ts, ) - self._simple_insert_txn( + self._simple_upsert_txn( txn, "account_validity", - values={ - "user_id": user_id, - "expiration_ts_ms": expiration_ts, - "email_sent": False, - }, + keyvalues={"user_id": user_id}, + values={"expiration_ts_ms": expiration_ts, "email_sent": False}, ) def start_profiling(self): From 53b6559a8906ba56f9f50469e0c2bec430614a4e Mon Sep 17 00:00:00 2001 From: James Date: Fri, 15 Nov 2019 05:42:46 +1100 Subject: [PATCH 68/85] Add optional python dependencies to snap packaging (#6317) Signed-off-by: James Hebden --- changelog.d/6317.misc | 1 + snap/snapcraft.yaml | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 changelog.d/6317.misc diff --git a/changelog.d/6317.misc b/changelog.d/6317.misc new file mode 100644 index 0000000000..a67d13fa72 --- /dev/null +++ b/changelog.d/6317.misc @@ -0,0 +1 @@ +Add optional python dependencies and dependant binary libraries to snapcraft packaging. diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index 1f7df71db2..9e644e8567 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -20,3 +20,23 @@ parts: source: . plugin: python python-version: python3 + python-packages: + - '.[all]' + build-packages: + - libffi-dev + - libturbojpeg0-dev + - libssl-dev + - libxslt1-dev + - libpq-dev + - zlib1g-dev + stage-packages: + - libasn1-8-heimdal + - libgssapi3-heimdal + - libhcrypto4-heimdal + - libheimbase1-heimdal + - libheimntlm0-heimdal + - libhx509-5-heimdal + - libkrb5-26-heimdal + - libldap-2.4-2 + - libpq5 + - libsasl2-2 From 657d614f6a53f3dbfd2858bd85d0f81563db0041 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 15 Nov 2019 14:02:34 +0000 Subject: [PATCH 69/85] Replace UPDATE with UPSERT on device_max_stream_id table (#6363) --- changelog.d/6363.bugfix | 1 + synapse/storage/data_stores/main/deviceinbox.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6363.bugfix diff --git a/changelog.d/6363.bugfix b/changelog.d/6363.bugfix new file mode 100644 index 0000000000..d023b49181 --- /dev/null +++ b/changelog.d/6363.bugfix @@ -0,0 +1 @@ +Fix `to_device` stream ID getting reset every time Synapse restarts, which had the potential to cause unable to decrypt errors. \ No newline at end of file diff --git a/synapse/storage/data_stores/main/deviceinbox.py b/synapse/storage/data_stores/main/deviceinbox.py index f04aad0743..96cd0fb77a 100644 --- a/synapse/storage/data_stores/main/deviceinbox.py +++ b/synapse/storage/data_stores/main/deviceinbox.py @@ -358,8 +358,21 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) def _add_messages_to_local_device_inbox_txn( self, txn, stream_id, messages_by_user_then_device ): - sql = "UPDATE device_max_stream_id" " SET stream_id = ?" " WHERE stream_id < ?" - txn.execute(sql, (stream_id, stream_id)) + # Compatible method of performing an upsert + sql = "SELECT stream_id FROM device_max_stream_id" + + txn.execute(sql) + rows = txn.fetchone() + if rows: + db_stream_id = rows[0] + if db_stream_id < stream_id: + # Insert the new stream_id + sql = "UPDATE device_max_stream_id SET stream_id = ?" + else: + # No rows, perform an insert + sql = "INSERT INTO device_max_stream_id (stream_id) VALUES (?)" + + txn.execute(sql, (stream_id,)) local_by_user_then_device = {} for user_id, messages_by_device in messages_by_user_then_device.items(): From c7376cdfe3efe05942964efcdf8886d66342383c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 18 Nov 2019 17:10:16 +0000 Subject: [PATCH 70/85] Apply suggestions from code review Co-Authored-By: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Co-Authored-By: Brendan Abolivier --- synapse/handlers/auth.py | 4 ++-- synapse/rest/client/v1/login.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 20c62bd780..0955cf9dba 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -135,8 +135,8 @@ class AuthHandler(BaseHandler): AuthError if the client has completed a login flow, and it gives a different user to `requester` - LimitExceededError if the ratelimiter's failed requests count for this - user is too high too proceed + LimitExceededError if the ratelimiter's failed request count for this + user is too high to proceed """ diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index abc210da57..f8d58afb29 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -397,7 +397,7 @@ class LoginRestServlet(RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() - result = yield self._complete_login(user_id, login_submission) + result = yield self._complete_login(user_id, login_submission, create_non_existant_users=True) return result From 271c322d08a3d13c986d97cbc40e72eef50e92ba Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 20 Nov 2019 09:29:48 +0000 Subject: [PATCH 71/85] Lint --- synapse/rest/client/v1/login.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index f8d58afb29..19eb15003d 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -397,7 +397,9 @@ class LoginRestServlet(RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() - result = yield self._complete_login(user_id, login_submission, create_non_existant_users=True) + result = yield self._complete_login( + user_id, login_submission, create_non_existant_users=True + ) return result From 4f5ca455bf17b52d70ab08be043178b4678cc4b8 Mon Sep 17 00:00:00 2001 From: Manuel Stahl <37705355+awesome-manuel@users.noreply.github.com> Date: Wed, 20 Nov 2019 12:49:11 +0100 Subject: [PATCH 72/85] Move admin endpoints into separate files (#6308) --- changelog.d/6308.misc | 1 + synapse/rest/admin/__init__.py | 567 +-------------------------------- synapse/rest/admin/groups.py | 46 +++ synapse/rest/admin/rooms.py | 157 +++++++++ synapse/rest/admin/users.py | 406 ++++++++++++++++++++++- 5 files changed, 622 insertions(+), 555 deletions(-) create mode 100644 changelog.d/6308.misc create mode 100644 synapse/rest/admin/groups.py create mode 100644 synapse/rest/admin/rooms.py diff --git a/changelog.d/6308.misc b/changelog.d/6308.misc new file mode 100644 index 0000000000..72be63ba4b --- /dev/null +++ b/changelog.d/6308.misc @@ -0,0 +1 @@ +Move admin endpoints into separate files. Contributed by Awesome Technologies Innovationslabor GmbH. \ No newline at end of file diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 5c2a2eb593..68a59a3424 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -14,62 +14,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -import hashlib -import hmac import logging import platform import re -from six import text_type -from six.moves import http_client - import synapse -from synapse.api.constants import Membership, UserTypes from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.server import JsonResource -from synapse.http.servlet import ( - RestServlet, - assert_params_in_dict, - parse_integer, - parse_json_object_from_request, - parse_string, -) +from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.rest.admin._base import ( assert_requester_is_admin, - assert_user_is_admin, historical_admin_path_patterns, ) +from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo from synapse.rest.admin.purge_room_servlet import PurgeRoomServlet +from synapse.rest.admin.rooms import ShutdownRoomRestServlet from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet -from synapse.rest.admin.users import UserAdminServlet -from synapse.types import UserID, create_requester -from synapse.util.async_helpers import maybe_awaitable +from synapse.rest.admin.users import ( + AccountValidityRenewServlet, + DeactivateAccountRestServlet, + GetUsersPaginatedRestServlet, + ResetPasswordRestServlet, + SearchUsersRestServlet, + UserAdminServlet, + UserRegisterServlet, + UsersRestServlet, + WhoisRestServlet, +) from synapse.util.versionstring import get_version_string logger = logging.getLogger(__name__) -class UsersRestServlet(RestServlet): - PATTERNS = historical_admin_path_patterns("/users/(?P[^/]*)$") - - def __init__(self, hs): - self.hs = hs - self.auth = hs.get_auth() - self.handlers = hs.get_handlers() - - async def on_GET(self, request, user_id): - target_user = UserID.from_string(user_id) - await assert_requester_is_admin(self.auth, request) - - if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only users a local user") - - ret = await self.handlers.admin_handler.get_users() - - return 200, ret - - class VersionServlet(RestServlet): PATTERNS = (re.compile("^/_synapse/admin/v1/server_version$"),) @@ -83,159 +60,6 @@ class VersionServlet(RestServlet): return 200, self.res -class UserRegisterServlet(RestServlet): - """ - Attributes: - NONCE_TIMEOUT (int): Seconds until a generated nonce won't be accepted - nonces (dict[str, int]): The nonces that we will accept. A dict of - nonce to the time it was generated, in int seconds. - """ - - PATTERNS = historical_admin_path_patterns("/register") - NONCE_TIMEOUT = 60 - - def __init__(self, hs): - self.handlers = hs.get_handlers() - self.reactor = hs.get_reactor() - self.nonces = {} - self.hs = hs - - def _clear_old_nonces(self): - """ - Clear out old nonces that are older than NONCE_TIMEOUT. - """ - now = int(self.reactor.seconds()) - - for k, v in list(self.nonces.items()): - if now - v > self.NONCE_TIMEOUT: - del self.nonces[k] - - def on_GET(self, request): - """ - Generate a new nonce. - """ - self._clear_old_nonces() - - nonce = self.hs.get_secrets().token_hex(64) - self.nonces[nonce] = int(self.reactor.seconds()) - return 200, {"nonce": nonce} - - async def on_POST(self, request): - self._clear_old_nonces() - - if not self.hs.config.registration_shared_secret: - raise SynapseError(400, "Shared secret registration is not enabled") - - body = parse_json_object_from_request(request) - - if "nonce" not in body: - raise SynapseError(400, "nonce must be specified", errcode=Codes.BAD_JSON) - - nonce = body["nonce"] - - if nonce not in self.nonces: - raise SynapseError(400, "unrecognised nonce") - - # Delete the nonce, so it can't be reused, even if it's invalid - del self.nonces[nonce] - - if "username" not in body: - raise SynapseError( - 400, "username must be specified", errcode=Codes.BAD_JSON - ) - else: - if ( - not isinstance(body["username"], text_type) - or len(body["username"]) > 512 - ): - raise SynapseError(400, "Invalid username") - - username = body["username"].encode("utf-8") - if b"\x00" in username: - raise SynapseError(400, "Invalid username") - - if "password" not in body: - raise SynapseError( - 400, "password must be specified", errcode=Codes.BAD_JSON - ) - else: - if ( - not isinstance(body["password"], text_type) - or len(body["password"]) > 512 - ): - raise SynapseError(400, "Invalid password") - - password = body["password"].encode("utf-8") - if b"\x00" in password: - raise SynapseError(400, "Invalid password") - - admin = body.get("admin", None) - user_type = body.get("user_type", None) - - if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: - raise SynapseError(400, "Invalid user type") - - got_mac = body["mac"] - - want_mac = hmac.new( - key=self.hs.config.registration_shared_secret.encode(), - digestmod=hashlib.sha1, - ) - want_mac.update(nonce.encode("utf8")) - want_mac.update(b"\x00") - want_mac.update(username) - want_mac.update(b"\x00") - want_mac.update(password) - want_mac.update(b"\x00") - want_mac.update(b"admin" if admin else b"notadmin") - if user_type: - want_mac.update(b"\x00") - want_mac.update(user_type.encode("utf8")) - want_mac = want_mac.hexdigest() - - if not hmac.compare_digest(want_mac.encode("ascii"), got_mac.encode("ascii")): - raise SynapseError(403, "HMAC incorrect") - - # Reuse the parts of RegisterRestServlet to reduce code duplication - from synapse.rest.client.v2_alpha.register import RegisterRestServlet - - register = RegisterRestServlet(self.hs) - - user_id = await register.registration_handler.register_user( - localpart=body["username"].lower(), - password=body["password"], - admin=bool(admin), - user_type=user_type, - ) - - result = await register._create_registration_details(user_id, body) - return 200, result - - -class WhoisRestServlet(RestServlet): - PATTERNS = historical_admin_path_patterns("/whois/(?P[^/]*)") - - def __init__(self, hs): - self.hs = hs - self.auth = hs.get_auth() - self.handlers = hs.get_handlers() - - async def on_GET(self, request, user_id): - target_user = UserID.from_string(user_id) - requester = await self.auth.get_user_by_req(request) - auth_user = requester.user - - if target_user != auth_user: - await assert_user_is_admin(self.auth, auth_user) - - if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only whois a local user") - - ret = await self.handlers.admin_handler.get_whois(target_user) - - return 200, ret - - class PurgeHistoryRestServlet(RestServlet): PATTERNS = historical_admin_path_patterns( "/purge_history/(?P[^/]*)(/(?P[^/]+))?" @@ -342,369 +166,6 @@ class PurgeHistoryStatusRestServlet(RestServlet): return 200, purge_status.asdict() -class DeactivateAccountRestServlet(RestServlet): - PATTERNS = historical_admin_path_patterns("/deactivate/(?P[^/]*)") - - def __init__(self, hs): - self._deactivate_account_handler = hs.get_deactivate_account_handler() - self.auth = hs.get_auth() - - async def on_POST(self, request, target_user_id): - await assert_requester_is_admin(self.auth, request) - body = parse_json_object_from_request(request, allow_empty_body=True) - erase = body.get("erase", False) - if not isinstance(erase, bool): - raise SynapseError( - http_client.BAD_REQUEST, - "Param 'erase' must be a boolean, if given", - Codes.BAD_JSON, - ) - - UserID.from_string(target_user_id) - - result = await self._deactivate_account_handler.deactivate_account( - target_user_id, erase - ) - if result: - id_server_unbind_result = "success" - else: - id_server_unbind_result = "no-support" - - return 200, {"id_server_unbind_result": id_server_unbind_result} - - -class ShutdownRoomRestServlet(RestServlet): - """Shuts down a room by removing all local users from the room and blocking - all future invites and joins to the room. Any local aliases will be repointed - to a new room created by `new_room_user_id` and kicked users will be auto - joined to the new room. - """ - - PATTERNS = historical_admin_path_patterns("/shutdown_room/(?P[^/]+)") - - DEFAULT_MESSAGE = ( - "Sharing illegal content on this server is not permitted and rooms in" - " violation will be blocked." - ) - - def __init__(self, hs): - self.hs = hs - self.store = hs.get_datastore() - self.state = hs.get_state_handler() - self._room_creation_handler = hs.get_room_creation_handler() - self.event_creation_handler = hs.get_event_creation_handler() - self.room_member_handler = hs.get_room_member_handler() - self.auth = hs.get_auth() - - async def on_POST(self, request, room_id): - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) - - content = parse_json_object_from_request(request) - assert_params_in_dict(content, ["new_room_user_id"]) - new_room_user_id = content["new_room_user_id"] - - room_creator_requester = create_requester(new_room_user_id) - - message = content.get("message", self.DEFAULT_MESSAGE) - room_name = content.get("room_name", "Content Violation Notification") - - info = await self._room_creation_handler.create_room( - room_creator_requester, - config={ - "preset": "public_chat", - "name": room_name, - "power_level_content_override": {"users_default": -10}, - }, - ratelimit=False, - ) - new_room_id = info["room_id"] - - requester_user_id = requester.user.to_string() - - logger.info( - "Shutting down room %r, joining to new room: %r", room_id, new_room_id - ) - - # This will work even if the room is already blocked, but that is - # desirable in case the first attempt at blocking the room failed below. - await self.store.block_room(room_id, requester_user_id) - - users = await self.state.get_current_users_in_room(room_id) - kicked_users = [] - failed_to_kick_users = [] - for user_id in users: - if not self.hs.is_mine_id(user_id): - continue - - logger.info("Kicking %r from %r...", user_id, room_id) - - try: - target_requester = create_requester(user_id) - await self.room_member_handler.update_membership( - requester=target_requester, - target=target_requester.user, - room_id=room_id, - action=Membership.LEAVE, - content={}, - ratelimit=False, - require_consent=False, - ) - - await self.room_member_handler.forget(target_requester.user, room_id) - - await self.room_member_handler.update_membership( - requester=target_requester, - target=target_requester.user, - room_id=new_room_id, - action=Membership.JOIN, - content={}, - ratelimit=False, - require_consent=False, - ) - - kicked_users.append(user_id) - except Exception: - logger.exception( - "Failed to leave old room and join new room for %r", user_id - ) - failed_to_kick_users.append(user_id) - - await self.event_creation_handler.create_and_send_nonmember_event( - room_creator_requester, - { - "type": "m.room.message", - "content": {"body": message, "msgtype": "m.text"}, - "room_id": new_room_id, - "sender": new_room_user_id, - }, - ratelimit=False, - ) - - aliases_for_room = await maybe_awaitable( - self.store.get_aliases_for_room(room_id) - ) - - await self.store.update_aliases_for_room( - room_id, new_room_id, requester_user_id - ) - - return ( - 200, - { - "kicked_users": kicked_users, - "failed_to_kick_users": failed_to_kick_users, - "local_aliases": aliases_for_room, - "new_room_id": new_room_id, - }, - ) - - -class ResetPasswordRestServlet(RestServlet): - """Post request to allow an administrator reset password for a user. - This needs user to have administrator access in Synapse. - Example: - http://localhost:8008/_synapse/admin/v1/reset_password/ - @user:to_reset_password?access_token=admin_access_token - JsonBodyToSend: - { - "new_password": "secret" - } - Returns: - 200 OK with empty object if success otherwise an error. - """ - - PATTERNS = historical_admin_path_patterns( - "/reset_password/(?P[^/]*)" - ) - - def __init__(self, hs): - self.store = hs.get_datastore() - self.hs = hs - self.auth = hs.get_auth() - self._set_password_handler = hs.get_set_password_handler() - - async def on_POST(self, request, target_user_id): - """Post request to allow an administrator reset password for a user. - This needs user to have administrator access in Synapse. - """ - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) - - UserID.from_string(target_user_id) - - params = parse_json_object_from_request(request) - assert_params_in_dict(params, ["new_password"]) - new_password = params["new_password"] - - await self._set_password_handler.set_password( - target_user_id, new_password, requester - ) - return 200, {} - - -class GetUsersPaginatedRestServlet(RestServlet): - """Get request to get specific number of users from Synapse. - This needs user to have administrator access in Synapse. - Example: - http://localhost:8008/_synapse/admin/v1/users_paginate/ - @admin:user?access_token=admin_access_token&start=0&limit=10 - Returns: - 200 OK with json object {list[dict[str, Any]], count} or empty object. - """ - - PATTERNS = historical_admin_path_patterns( - "/users_paginate/(?P[^/]*)" - ) - - def __init__(self, hs): - self.store = hs.get_datastore() - self.hs = hs - self.auth = hs.get_auth() - self.handlers = hs.get_handlers() - - async def on_GET(self, request, target_user_id): - """Get request to get specific number of users from Synapse. - This needs user to have administrator access in Synapse. - """ - await assert_requester_is_admin(self.auth, request) - - target_user = UserID.from_string(target_user_id) - - if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only users a local user") - - order = "name" # order by name in user table - start = parse_integer(request, "start", required=True) - limit = parse_integer(request, "limit", required=True) - - logger.info("limit: %s, start: %s", limit, start) - - ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) - return 200, ret - - async def on_POST(self, request, target_user_id): - """Post request to get specific number of users from Synapse.. - This needs user to have administrator access in Synapse. - Example: - http://localhost:8008/_synapse/admin/v1/users_paginate/ - @admin:user?access_token=admin_access_token - JsonBodyToSend: - { - "start": "0", - "limit": "10 - } - Returns: - 200 OK with json object {list[dict[str, Any]], count} or empty object. - """ - await assert_requester_is_admin(self.auth, request) - UserID.from_string(target_user_id) - - order = "name" # order by name in user table - params = parse_json_object_from_request(request) - assert_params_in_dict(params, ["limit", "start"]) - limit = params["limit"] - start = params["start"] - logger.info("limit: %s, start: %s", limit, start) - - ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) - return 200, ret - - -class SearchUsersRestServlet(RestServlet): - """Get request to search user table for specific users according to - search term. - This needs user to have administrator access in Synapse. - Example: - http://localhost:8008/_synapse/admin/v1/search_users/ - @admin:user?access_token=admin_access_token&term=alice - Returns: - 200 OK with json object {list[dict[str, Any]], count} or empty object. - """ - - PATTERNS = historical_admin_path_patterns("/search_users/(?P[^/]*)") - - def __init__(self, hs): - self.store = hs.get_datastore() - self.hs = hs - self.auth = hs.get_auth() - self.handlers = hs.get_handlers() - - async def on_GET(self, request, target_user_id): - """Get request to search user table for specific users according to - search term. - This needs user to have a administrator access in Synapse. - """ - await assert_requester_is_admin(self.auth, request) - - target_user = UserID.from_string(target_user_id) - - # To allow all users to get the users list - # if not is_admin and target_user != auth_user: - # raise AuthError(403, "You are not a server admin") - - if not self.hs.is_mine(target_user): - raise SynapseError(400, "Can only users a local user") - - term = parse_string(request, "term", required=True) - logger.info("term: %s ", term) - - ret = await self.handlers.admin_handler.search_users(term) - return 200, ret - - -class DeleteGroupAdminRestServlet(RestServlet): - """Allows deleting of local groups - """ - - PATTERNS = historical_admin_path_patterns("/delete_group/(?P[^/]*)") - - def __init__(self, hs): - self.group_server = hs.get_groups_server_handler() - self.is_mine_id = hs.is_mine_id - self.auth = hs.get_auth() - - async def on_POST(self, request, group_id): - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) - - if not self.is_mine_id(group_id): - raise SynapseError(400, "Can only delete local groups") - - await self.group_server.delete_group(group_id, requester.user.to_string()) - return 200, {} - - -class AccountValidityRenewServlet(RestServlet): - PATTERNS = historical_admin_path_patterns("/account_validity/validity$") - - def __init__(self, hs): - """ - Args: - hs (synapse.server.HomeServer): server - """ - self.hs = hs - self.account_activity_handler = hs.get_account_validity_handler() - self.auth = hs.get_auth() - - async def on_POST(self, request): - await assert_requester_is_admin(self.auth, request) - - body = parse_json_object_from_request(request) - - if "user_id" not in body: - raise SynapseError(400, "Missing property 'user_id' in the request body") - - expiration_ts = await self.account_activity_handler.renew_account_for_user( - body["user_id"], - body.get("expiration_ts"), - not body.get("enable_renewal_emails", True), - ) - - res = {"expiration_ts": expiration_ts} - return 200, res - - ######################################################################################## # # please don't add more servlets here: this file is already long and unwieldy. Put diff --git a/synapse/rest/admin/groups.py b/synapse/rest/admin/groups.py new file mode 100644 index 0000000000..0b54ca09f4 --- /dev/null +++ b/synapse/rest/admin/groups.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from synapse.api.errors import SynapseError +from synapse.http.servlet import RestServlet +from synapse.rest.admin._base import ( + assert_user_is_admin, + historical_admin_path_patterns, +) + +logger = logging.getLogger(__name__) + + +class DeleteGroupAdminRestServlet(RestServlet): + """Allows deleting of local groups + """ + + PATTERNS = historical_admin_path_patterns("/delete_group/(?P[^/]*)") + + def __init__(self, hs): + self.group_server = hs.get_groups_server_handler() + self.is_mine_id = hs.is_mine_id + self.auth = hs.get_auth() + + async def on_POST(self, request, group_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) + + if not self.is_mine_id(group_id): + raise SynapseError(400, "Can only delete local groups") + + await self.group_server.delete_group(group_id, requester.user.to_string()) + return 200, {} diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py new file mode 100644 index 0000000000..f7cc5e9be9 --- /dev/null +++ b/synapse/rest/admin/rooms.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from synapse.api.constants import Membership +from synapse.http.servlet import ( + RestServlet, + assert_params_in_dict, + parse_json_object_from_request, +) +from synapse.rest.admin._base import ( + assert_user_is_admin, + historical_admin_path_patterns, +) +from synapse.types import create_requester +from synapse.util.async_helpers import maybe_awaitable + +logger = logging.getLogger(__name__) + + +class ShutdownRoomRestServlet(RestServlet): + """Shuts down a room by removing all local users from the room and blocking + all future invites and joins to the room. Any local aliases will be repointed + to a new room created by `new_room_user_id` and kicked users will be auto + joined to the new room. + """ + + PATTERNS = historical_admin_path_patterns("/shutdown_room/(?P[^/]+)") + + DEFAULT_MESSAGE = ( + "Sharing illegal content on this server is not permitted and rooms in" + " violation will be blocked." + ) + + def __init__(self, hs): + self.hs = hs + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + self._room_creation_handler = hs.get_room_creation_handler() + self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() + self.auth = hs.get_auth() + + async def on_POST(self, request, room_id): + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) + + content = parse_json_object_from_request(request) + assert_params_in_dict(content, ["new_room_user_id"]) + new_room_user_id = content["new_room_user_id"] + + room_creator_requester = create_requester(new_room_user_id) + + message = content.get("message", self.DEFAULT_MESSAGE) + room_name = content.get("room_name", "Content Violation Notification") + + info = await self._room_creation_handler.create_room( + room_creator_requester, + config={ + "preset": "public_chat", + "name": room_name, + "power_level_content_override": {"users_default": -10}, + }, + ratelimit=False, + ) + new_room_id = info["room_id"] + + requester_user_id = requester.user.to_string() + + logger.info( + "Shutting down room %r, joining to new room: %r", room_id, new_room_id + ) + + # This will work even if the room is already blocked, but that is + # desirable in case the first attempt at blocking the room failed below. + await self.store.block_room(room_id, requester_user_id) + + users = await self.state.get_current_users_in_room(room_id) + kicked_users = [] + failed_to_kick_users = [] + for user_id in users: + if not self.hs.is_mine_id(user_id): + continue + + logger.info("Kicking %r from %r...", user_id, room_id) + + try: + target_requester = create_requester(user_id) + await self.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=room_id, + action=Membership.LEAVE, + content={}, + ratelimit=False, + require_consent=False, + ) + + await self.room_member_handler.forget(target_requester.user, room_id) + + await self.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=new_room_id, + action=Membership.JOIN, + content={}, + ratelimit=False, + require_consent=False, + ) + + kicked_users.append(user_id) + except Exception: + logger.exception( + "Failed to leave old room and join new room for %r", user_id + ) + failed_to_kick_users.append(user_id) + + await self.event_creation_handler.create_and_send_nonmember_event( + room_creator_requester, + { + "type": "m.room.message", + "content": {"body": message, "msgtype": "m.text"}, + "room_id": new_room_id, + "sender": new_room_user_id, + }, + ratelimit=False, + ) + + aliases_for_room = await maybe_awaitable( + self.store.get_aliases_for_room(room_id) + ) + + await self.store.update_aliases_for_room( + room_id, new_room_id, requester_user_id + ) + + return ( + 200, + { + "kicked_users": kicked_users, + "failed_to_kick_users": failed_to_kick_users, + "local_aliases": aliases_for_room, + "new_room_id": new_room_id, + }, + ) diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index d5d124a0dc..58a83f93af 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -12,17 +12,419 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import hashlib +import hmac +import logging import re -from synapse.api.errors import SynapseError +from six import text_type +from six.moves import http_client + +from synapse.api.constants import UserTypes +from synapse.api.errors import Codes, SynapseError from synapse.http.servlet import ( RestServlet, assert_params_in_dict, + parse_integer, parse_json_object_from_request, + parse_string, +) +from synapse.rest.admin._base import ( + assert_requester_is_admin, + assert_user_is_admin, + historical_admin_path_patterns, ) -from synapse.rest.admin import assert_requester_is_admin, assert_user_is_admin from synapse.types import UserID +logger = logging.getLogger(__name__) + + +class UsersRestServlet(RestServlet): + PATTERNS = historical_admin_path_patterns("/users/(?P[^/]*)$") + + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.admin_handler = hs.get_handlers().admin_handler + + async def on_GET(self, request, user_id): + target_user = UserID.from_string(user_id) + await assert_requester_is_admin(self.auth, request) + + if not self.hs.is_mine(target_user): + raise SynapseError(400, "Can only users a local user") + + ret = await self.admin_handler.get_users() + + return 200, ret + + +class GetUsersPaginatedRestServlet(RestServlet): + """Get request to get specific number of users from Synapse. + This needs user to have administrator access in Synapse. + Example: + http://localhost:8008/_synapse/admin/v1/users_paginate/ + @admin:user?access_token=admin_access_token&start=0&limit=10 + Returns: + 200 OK with json object {list[dict[str, Any]], count} or empty object. + """ + + PATTERNS = historical_admin_path_patterns( + "/users_paginate/(?P[^/]*)" + ) + + def __init__(self, hs): + self.store = hs.get_datastore() + self.hs = hs + self.auth = hs.get_auth() + self.handlers = hs.get_handlers() + + async def on_GET(self, request, target_user_id): + """Get request to get specific number of users from Synapse. + This needs user to have administrator access in Synapse. + """ + await assert_requester_is_admin(self.auth, request) + + target_user = UserID.from_string(target_user_id) + + if not self.hs.is_mine(target_user): + raise SynapseError(400, "Can only users a local user") + + order = "name" # order by name in user table + start = parse_integer(request, "start", required=True) + limit = parse_integer(request, "limit", required=True) + + logger.info("limit: %s, start: %s", limit, start) + + ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) + return 200, ret + + async def on_POST(self, request, target_user_id): + """Post request to get specific number of users from Synapse.. + This needs user to have administrator access in Synapse. + Example: + http://localhost:8008/_synapse/admin/v1/users_paginate/ + @admin:user?access_token=admin_access_token + JsonBodyToSend: + { + "start": "0", + "limit": "10 + } + Returns: + 200 OK with json object {list[dict[str, Any]], count} or empty object. + """ + await assert_requester_is_admin(self.auth, request) + UserID.from_string(target_user_id) + + order = "name" # order by name in user table + params = parse_json_object_from_request(request) + assert_params_in_dict(params, ["limit", "start"]) + limit = params["limit"] + start = params["start"] + logger.info("limit: %s, start: %s", limit, start) + + ret = await self.handlers.admin_handler.get_users_paginate(order, start, limit) + return 200, ret + + +class UserRegisterServlet(RestServlet): + """ + Attributes: + NONCE_TIMEOUT (int): Seconds until a generated nonce won't be accepted + nonces (dict[str, int]): The nonces that we will accept. A dict of + nonce to the time it was generated, in int seconds. + """ + + PATTERNS = historical_admin_path_patterns("/register") + NONCE_TIMEOUT = 60 + + def __init__(self, hs): + self.handlers = hs.get_handlers() + self.reactor = hs.get_reactor() + self.nonces = {} + self.hs = hs + + def _clear_old_nonces(self): + """ + Clear out old nonces that are older than NONCE_TIMEOUT. + """ + now = int(self.reactor.seconds()) + + for k, v in list(self.nonces.items()): + if now - v > self.NONCE_TIMEOUT: + del self.nonces[k] + + def on_GET(self, request): + """ + Generate a new nonce. + """ + self._clear_old_nonces() + + nonce = self.hs.get_secrets().token_hex(64) + self.nonces[nonce] = int(self.reactor.seconds()) + return 200, {"nonce": nonce} + + async def on_POST(self, request): + self._clear_old_nonces() + + if not self.hs.config.registration_shared_secret: + raise SynapseError(400, "Shared secret registration is not enabled") + + body = parse_json_object_from_request(request) + + if "nonce" not in body: + raise SynapseError(400, "nonce must be specified", errcode=Codes.BAD_JSON) + + nonce = body["nonce"] + + if nonce not in self.nonces: + raise SynapseError(400, "unrecognised nonce") + + # Delete the nonce, so it can't be reused, even if it's invalid + del self.nonces[nonce] + + if "username" not in body: + raise SynapseError( + 400, "username must be specified", errcode=Codes.BAD_JSON + ) + else: + if ( + not isinstance(body["username"], text_type) + or len(body["username"]) > 512 + ): + raise SynapseError(400, "Invalid username") + + username = body["username"].encode("utf-8") + if b"\x00" in username: + raise SynapseError(400, "Invalid username") + + if "password" not in body: + raise SynapseError( + 400, "password must be specified", errcode=Codes.BAD_JSON + ) + else: + if ( + not isinstance(body["password"], text_type) + or len(body["password"]) > 512 + ): + raise SynapseError(400, "Invalid password") + + password = body["password"].encode("utf-8") + if b"\x00" in password: + raise SynapseError(400, "Invalid password") + + admin = body.get("admin", None) + user_type = body.get("user_type", None) + + if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: + raise SynapseError(400, "Invalid user type") + + got_mac = body["mac"] + + want_mac = hmac.new( + key=self.hs.config.registration_shared_secret.encode(), + digestmod=hashlib.sha1, + ) + want_mac.update(nonce.encode("utf8")) + want_mac.update(b"\x00") + want_mac.update(username) + want_mac.update(b"\x00") + want_mac.update(password) + want_mac.update(b"\x00") + want_mac.update(b"admin" if admin else b"notadmin") + if user_type: + want_mac.update(b"\x00") + want_mac.update(user_type.encode("utf8")) + want_mac = want_mac.hexdigest() + + if not hmac.compare_digest(want_mac.encode("ascii"), got_mac.encode("ascii")): + raise SynapseError(403, "HMAC incorrect") + + # Reuse the parts of RegisterRestServlet to reduce code duplication + from synapse.rest.client.v2_alpha.register import RegisterRestServlet + + register = RegisterRestServlet(self.hs) + + user_id = await register.registration_handler.register_user( + localpart=body["username"].lower(), + password=body["password"], + admin=bool(admin), + user_type=user_type, + ) + + result = await register._create_registration_details(user_id, body) + return 200, result + + +class WhoisRestServlet(RestServlet): + PATTERNS = historical_admin_path_patterns("/whois/(?P[^/]*)") + + def __init__(self, hs): + self.hs = hs + self.auth = hs.get_auth() + self.handlers = hs.get_handlers() + + async def on_GET(self, request, user_id): + target_user = UserID.from_string(user_id) + requester = await self.auth.get_user_by_req(request) + auth_user = requester.user + + if target_user != auth_user: + await assert_user_is_admin(self.auth, auth_user) + + if not self.hs.is_mine(target_user): + raise SynapseError(400, "Can only whois a local user") + + ret = await self.handlers.admin_handler.get_whois(target_user) + + return 200, ret + + +class DeactivateAccountRestServlet(RestServlet): + PATTERNS = historical_admin_path_patterns("/deactivate/(?P[^/]*)") + + def __init__(self, hs): + self._deactivate_account_handler = hs.get_deactivate_account_handler() + self.auth = hs.get_auth() + + async def on_POST(self, request, target_user_id): + await assert_requester_is_admin(self.auth, request) + body = parse_json_object_from_request(request, allow_empty_body=True) + erase = body.get("erase", False) + if not isinstance(erase, bool): + raise SynapseError( + http_client.BAD_REQUEST, + "Param 'erase' must be a boolean, if given", + Codes.BAD_JSON, + ) + + UserID.from_string(target_user_id) + + result = await self._deactivate_account_handler.deactivate_account( + target_user_id, erase + ) + if result: + id_server_unbind_result = "success" + else: + id_server_unbind_result = "no-support" + + return 200, {"id_server_unbind_result": id_server_unbind_result} + + +class AccountValidityRenewServlet(RestServlet): + PATTERNS = historical_admin_path_patterns("/account_validity/validity$") + + def __init__(self, hs): + """ + Args: + hs (synapse.server.HomeServer): server + """ + self.hs = hs + self.account_activity_handler = hs.get_account_validity_handler() + self.auth = hs.get_auth() + + async def on_POST(self, request): + await assert_requester_is_admin(self.auth, request) + + body = parse_json_object_from_request(request) + + if "user_id" not in body: + raise SynapseError(400, "Missing property 'user_id' in the request body") + + expiration_ts = await self.account_activity_handler.renew_account_for_user( + body["user_id"], + body.get("expiration_ts"), + not body.get("enable_renewal_emails", True), + ) + + res = {"expiration_ts": expiration_ts} + return 200, res + + +class ResetPasswordRestServlet(RestServlet): + """Post request to allow an administrator reset password for a user. + This needs user to have administrator access in Synapse. + Example: + http://localhost:8008/_synapse/admin/v1/reset_password/ + @user:to_reset_password?access_token=admin_access_token + JsonBodyToSend: + { + "new_password": "secret" + } + Returns: + 200 OK with empty object if success otherwise an error. + """ + + PATTERNS = historical_admin_path_patterns( + "/reset_password/(?P[^/]*)" + ) + + def __init__(self, hs): + self.store = hs.get_datastore() + self.hs = hs + self.auth = hs.get_auth() + self._set_password_handler = hs.get_set_password_handler() + + async def on_POST(self, request, target_user_id): + """Post request to allow an administrator reset password for a user. + This needs user to have administrator access in Synapse. + """ + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) + + UserID.from_string(target_user_id) + + params = parse_json_object_from_request(request) + assert_params_in_dict(params, ["new_password"]) + new_password = params["new_password"] + + await self._set_password_handler.set_password( + target_user_id, new_password, requester + ) + return 200, {} + + +class SearchUsersRestServlet(RestServlet): + """Get request to search user table for specific users according to + search term. + This needs user to have administrator access in Synapse. + Example: + http://localhost:8008/_synapse/admin/v1/search_users/ + @admin:user?access_token=admin_access_token&term=alice + Returns: + 200 OK with json object {list[dict[str, Any]], count} or empty object. + """ + + PATTERNS = historical_admin_path_patterns("/search_users/(?P[^/]*)") + + def __init__(self, hs): + self.store = hs.get_datastore() + self.hs = hs + self.auth = hs.get_auth() + self.handlers = hs.get_handlers() + + async def on_GET(self, request, target_user_id): + """Get request to search user table for specific users according to + search term. + This needs user to have a administrator access in Synapse. + """ + await assert_requester_is_admin(self.auth, request) + + target_user = UserID.from_string(target_user_id) + + # To allow all users to get the users list + # if not is_admin and target_user != auth_user: + # raise AuthError(403, "You are not a server admin") + + if not self.hs.is_mine(target_user): + raise SynapseError(400, "Can only users a local user") + + term = parse_string(request, "term", required=True) + logger.info("term: %s ", term) + + ret = await self.handlers.admin_handler.search_users(term) + return 200, ret + class UserAdminServlet(RestServlet): """ From 6356f2088f0adb681fe24a8435955b19883fa3b4 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 20 Nov 2019 12:09:06 +0000 Subject: [PATCH 73/85] Test if a purge can make /messages return 500 responses --- tests/rest/client/v1/test_rooms.py | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 5e38fd6ced..ebaa67e899 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -25,7 +25,9 @@ from twisted.internet import defer import synapse.rest.admin from synapse.api.constants import EventContentFields, EventTypes, Membership +from synapse.handlers.pagination import PurgeStatus from synapse.rest.client.v1 import login, profile, room +from synapse.util.stringutils import random_string from tests import unittest @@ -910,6 +912,76 @@ class RoomMessageListTestCase(RoomBase): return channel.json_body["chunk"] + def test_room_messages_purge(self): + store = self.hs.get_datastore() + pagination_handler = self.hs.get_pagination_handler() + + # Send a first message in the room, which will be removed by the purge. + first_event_id = self.helper.send(self.room_id, "message 1")["event_id"] + first_token = self.get_success( + store.get_topological_token_for_event(first_event_id) + ) + + # Send a second message in the room, which won't be removed, and which we'll + # use as the marker to purge events before. + second_event_id = self.helper.send(self.room_id, "message 2")["event_id"] + second_token = self.get_success( + store.get_topological_token_for_event(second_event_id) + ) + + # Send a third event in the room to ensure we don't fall under any edge case + # due to our marker being the latest forward extremity in the room. + self.helper.send(self.room_id, "message 3") + + # Check that we get the first and second message when querying /messages. + request, channel = self.make_request( + "GET", + "/rooms/%s/messages?access_token=x&from=%s&dir=b&filter=%s" + % (self.room_id, second_token, json.dumps({"types": [EventTypes.Message]})), + ) + self.render(request) + self.assertEqual(channel.code, 200, channel.json_body) + + chunk = channel.json_body["chunk"] + self.assertEqual(len(chunk), 2, [event["content"] for event in chunk]) + + # Purge every event before the second event. + purge_id = random_string(16) + pagination_handler._purges_by_id[purge_id] = PurgeStatus() + self.get_success(pagination_handler._purge_history( + purge_id=purge_id, + room_id=self.room_id, + token=second_token, + delete_local_events=True, + )) + + # Check that we only get the second message through /message now that the first + # has been purged. + request, channel = self.make_request( + "GET", + "/rooms/%s/messages?access_token=x&from=%s&dir=b&filter=%s" + % (self.room_id, second_token, json.dumps({"types": [EventTypes.Message]})), + ) + self.render(request) + self.assertEqual(channel.code, 200, channel.json_body) + + chunk = channel.json_body["chunk"] + self.assertEqual(len(chunk), 1, [event["content"] for event in chunk]) + + # Check that we get no event, but also no error, when querying /messages with + # the token that was pointing at the first event, because we don't have it + # anymore. + request, channel = self.make_request( + "GET", + "/rooms/%s/messages?access_token=x&from=%s&dir=b&filter=%s" + % (self.room_id, first_token, json.dumps({"types": [EventTypes.Message]})), + ) + self.render(request) + self.assertEqual(channel.code, 200, channel.json_body) + + chunk = channel.json_body["chunk"] + self.assertEqual(len(chunk), 0, [event["content"] for event in chunk]) + class RoomSearchTestCase(unittest.HomeserverTestCase): servlets = [ From 234f55f3c4295f08399b725cda8a8aa4b559f1f5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 20 Nov 2019 13:32:31 +0000 Subject: [PATCH 74/85] Docker: Change permissions for data dir before attempting to write to it (#6389) --- changelog.d/6389.bugfix | 1 + docker/start.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/6389.bugfix diff --git a/changelog.d/6389.bugfix b/changelog.d/6389.bugfix new file mode 100644 index 0000000000..c553622b02 --- /dev/null +++ b/changelog.d/6389.bugfix @@ -0,0 +1 @@ +Fix permission denied error when trying to generate a config file with the docker image. \ No newline at end of file diff --git a/docker/start.py b/docker/start.py index 6e1cb807a1..97fd247f8f 100755 --- a/docker/start.py +++ b/docker/start.py @@ -169,11 +169,11 @@ def run_generate_config(environ, ownership): # log("running %s" % (args, )) if ownership is not None: - args = ["su-exec", ownership] + args - os.execv("/sbin/su-exec", args) - # make sure that synapse has perms to write to the data dir. subprocess.check_output(["chown", ownership, data_dir]) + + args = ["su-exec", ownership] + args + os.execv("/sbin/su-exec", args) else: os.execv("/usr/local/bin/python", args) From 41e4566682946fca8600214969c4e9562b5a9315 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 20 Nov 2019 14:12:42 +0000 Subject: [PATCH 75/85] 1.6.0rc1 --- CHANGES.md | 74 ++++++++++++++++++++++++++++++++++++++++ changelog.d/5727.feature | 1 - changelog.d/6140.misc | 1 - changelog.d/6164.doc | 1 - changelog.d/6213.bugfix | 1 - changelog.d/6218.misc | 1 - changelog.d/6220.feature | 1 - changelog.d/6232.bugfix | 1 - changelog.d/6235.bugfix | 1 - changelog.d/6238.feature | 1 - changelog.d/6240.misc | 1 - changelog.d/6250.misc | 1 - changelog.d/6251.misc | 1 - changelog.d/6253.bugfix | 1 - changelog.d/6254.bugfix | 1 - changelog.d/6257.doc | 1 - changelog.d/6259.misc | 1 - changelog.d/6263.misc | 1 - changelog.d/6269.misc | 1 - changelog.d/6270.misc | 1 - changelog.d/6271.misc | 1 - changelog.d/6272.doc | 1 - changelog.d/6273.doc | 1 - changelog.d/6274.misc | 1 - changelog.d/6275.misc | 1 - changelog.d/6276.misc | 1 - changelog.d/6277.misc | 1 - changelog.d/6278.bugfix | 1 - changelog.d/6279.misc | 1 - changelog.d/6280.misc | 1 - changelog.d/6284.bugfix | 1 - changelog.d/6291.misc | 1 - changelog.d/6294.misc | 1 - changelog.d/6295.misc | 1 - changelog.d/6298.misc | 1 - changelog.d/6300.misc | 1 - changelog.d/6301.feature | 1 - changelog.d/6304.misc | 1 - changelog.d/6305.misc | 1 - changelog.d/6306.bugfix | 1 - changelog.d/6307.bugfix | 1 - changelog.d/6308.misc | 1 - changelog.d/6310.feature | 1 - changelog.d/6312.misc | 1 - changelog.d/6313.bugfix | 1 - changelog.d/6314.misc | 1 - changelog.d/6317.misc | 1 - changelog.d/6318.misc | 1 - changelog.d/6319.misc | 1 - changelog.d/6320.bugfix | 1 - changelog.d/6330.misc | 1 - changelog.d/6335.bugfix | 1 - changelog.d/6336.misc | 1 - changelog.d/6338.bugfix | 1 - changelog.d/6340.feature | 1 - changelog.d/6341.misc | 1 - changelog.d/6357.misc | 1 - changelog.d/6359.bugfix | 1 - changelog.d/6361.misc | 1 - changelog.d/6363.bugfix | 1 - changelog.d/6389.bugfix | 1 - synapse/__init__.py | 2 +- 62 files changed, 75 insertions(+), 61 deletions(-) delete mode 100644 changelog.d/5727.feature delete mode 100644 changelog.d/6140.misc delete mode 100644 changelog.d/6164.doc delete mode 100644 changelog.d/6213.bugfix delete mode 100644 changelog.d/6218.misc delete mode 100644 changelog.d/6220.feature delete mode 100644 changelog.d/6232.bugfix delete mode 100644 changelog.d/6235.bugfix delete mode 100644 changelog.d/6238.feature delete mode 100644 changelog.d/6240.misc delete mode 100644 changelog.d/6250.misc delete mode 100644 changelog.d/6251.misc delete mode 100644 changelog.d/6253.bugfix delete mode 100644 changelog.d/6254.bugfix delete mode 100644 changelog.d/6257.doc delete mode 100644 changelog.d/6259.misc delete mode 100644 changelog.d/6263.misc delete mode 100644 changelog.d/6269.misc delete mode 100644 changelog.d/6270.misc delete mode 100644 changelog.d/6271.misc delete mode 100644 changelog.d/6272.doc delete mode 100644 changelog.d/6273.doc delete mode 100644 changelog.d/6274.misc delete mode 100644 changelog.d/6275.misc delete mode 100644 changelog.d/6276.misc delete mode 100644 changelog.d/6277.misc delete mode 100644 changelog.d/6278.bugfix delete mode 100644 changelog.d/6279.misc delete mode 100644 changelog.d/6280.misc delete mode 100644 changelog.d/6284.bugfix delete mode 100644 changelog.d/6291.misc delete mode 100644 changelog.d/6294.misc delete mode 100644 changelog.d/6295.misc delete mode 100644 changelog.d/6298.misc delete mode 100644 changelog.d/6300.misc delete mode 100644 changelog.d/6301.feature delete mode 100644 changelog.d/6304.misc delete mode 100644 changelog.d/6305.misc delete mode 100644 changelog.d/6306.bugfix delete mode 100644 changelog.d/6307.bugfix delete mode 100644 changelog.d/6308.misc delete mode 100644 changelog.d/6310.feature delete mode 100644 changelog.d/6312.misc delete mode 100644 changelog.d/6313.bugfix delete mode 100644 changelog.d/6314.misc delete mode 100644 changelog.d/6317.misc delete mode 100644 changelog.d/6318.misc delete mode 100644 changelog.d/6319.misc delete mode 100644 changelog.d/6320.bugfix delete mode 100644 changelog.d/6330.misc delete mode 100644 changelog.d/6335.bugfix delete mode 100644 changelog.d/6336.misc delete mode 100644 changelog.d/6338.bugfix delete mode 100644 changelog.d/6340.feature delete mode 100644 changelog.d/6341.misc delete mode 100644 changelog.d/6357.misc delete mode 100644 changelog.d/6359.bugfix delete mode 100644 changelog.d/6361.misc delete mode 100644 changelog.d/6363.bugfix delete mode 100644 changelog.d/6389.bugfix diff --git a/CHANGES.md b/CHANGES.md index 9312dc2941..f4f61db5d4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,77 @@ +Synapse 1.6.0rc1 (2019-11-20) +============================= + +Features +-------- + +- Add federation support for cross-signing. ([\#5727](https://github.com/matrix-org/synapse/issues/5727)) +- Increase default room version from 4 to 5, thereby enforcing server key validity period checks. ([\#6220](https://github.com/matrix-org/synapse/issues/6220)) +- Add support for outbound http proxying via http_proxy/HTTPS_PROXY env vars. ([\#6238](https://github.com/matrix-org/synapse/issues/6238)) +- Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)). ([\#6301](https://github.com/matrix-org/synapse/issues/6301), [\#6310](https://github.com/matrix-org/synapse/issues/6310), [\#6340](https://github.com/matrix-org/synapse/issues/6340)) + + +Bugfixes +-------- + +- Fix LruCache callback deduplication for Python 3.8. Contributed by @V02460. ([\#6213](https://github.com/matrix-org/synapse/issues/6213)) +- Remove a room from a server's public rooms list on room upgrade. ([\#6232](https://github.com/matrix-org/synapse/issues/6232), [\#6235](https://github.com/matrix-org/synapse/issues/6235)) +- Delete keys from key backup when deleting backup versions. ([\#6253](https://github.com/matrix-org/synapse/issues/6253)) +- Make notification of cross-signing signatures work with workers. ([\#6254](https://github.com/matrix-org/synapse/issues/6254)) +- Fix exception when remote servers attempt to join a room that they're not allowed to join. ([\#6278](https://github.com/matrix-org/synapse/issues/6278)) +- Prevent errors from appearing on Synapse startup if `git` is not installed. ([\#6284](https://github.com/matrix-org/synapse/issues/6284)) +- Appservice requests will no longer contain a double slash prefix when the appservice url provided ends in a slash. ([\#6306](https://github.com/matrix-org/synapse/issues/6306)) +- Fix `/purge_room` admin API. ([\#6307](https://github.com/matrix-org/synapse/issues/6307)) +- Fix the `hidden` field in the `devices` table for SQLite versions prior to 3.23.0. ([\#6313](https://github.com/matrix-org/synapse/issues/6313)) +- Fix bug which casued rejected events to be persisted with the wrong room state. ([\#6320](https://github.com/matrix-org/synapse/issues/6320)) +- Fix bug where `rc_login` ratelimiting would prematurely kick in. ([\#6335](https://github.com/matrix-org/synapse/issues/6335)) +- Prevent the server taking a long time to start up when guest registration is enabled. ([\#6338](https://github.com/matrix-org/synapse/issues/6338)) +- Fix bug where upgrading a guest account to a full user would fail when account validity is enabled. ([\#6359](https://github.com/matrix-org/synapse/issues/6359)) +- Fix `to_device` stream ID getting reset every time Synapse restarts, which had the potential to cause unable to decrypt errors. ([\#6363](https://github.com/matrix-org/synapse/issues/6363)) +- Fix permission denied error when trying to generate a config file with the docker image. ([\#6389](https://github.com/matrix-org/synapse/issues/6389)) + + +Improved Documentation +---------------------- + +- Contributor documentation now mentions script to run linters. ([\#6164](https://github.com/matrix-org/synapse/issues/6164)) +- Modify CAPTCHA_SETUP.md to update the terms `private key` and `public key` to `secret key` and `site key` respectively. Contributed by Yash Jipkate. ([\#6257](https://github.com/matrix-org/synapse/issues/6257)) +- Update `INSTALL.md` Email section to talk about `account_threepid_delegates`. ([\#6272](https://github.com/matrix-org/synapse/issues/6272)) +- Fix a small typo in `account_threepid_delegates` configuration option. ([\#6273](https://github.com/matrix-org/synapse/issues/6273)) + + +Internal Changes +---------------- + +- Add a CI job to test the `synapse_port_db` script. ([\#6140](https://github.com/matrix-org/synapse/issues/6140), [\#6276](https://github.com/matrix-org/synapse/issues/6276)) +- Convert EventContext to an attrs. ([\#6218](https://github.com/matrix-org/synapse/issues/6218)) +- Move `persist_events` out from main data store. ([\#6240](https://github.com/matrix-org/synapse/issues/6240), [\#6300](https://github.com/matrix-org/synapse/issues/6300)) +- Reduce verbosity of user/room stats. ([\#6250](https://github.com/matrix-org/synapse/issues/6250)) +- Reduce impact of debug logging. ([\#6251](https://github.com/matrix-org/synapse/issues/6251)) +- Expose some homeserver functionality to spam checkers. ([\#6259](https://github.com/matrix-org/synapse/issues/6259)) +- Change cache descriptors to always return deferreds. ([\#6263](https://github.com/matrix-org/synapse/issues/6263), [\#6291](https://github.com/matrix-org/synapse/issues/6291)) +- Fix incorrect comment regarding the functionality of an `if` statement. ([\#6269](https://github.com/matrix-org/synapse/issues/6269)) +- Update CI to run `isort` over the `scripts` and `scripts-dev` directories. ([\#6270](https://github.com/matrix-org/synapse/issues/6270)) +- Replace every instance of `logger.warn` method with `logger.warning` as the former is deprecated. ([\#6271](https://github.com/matrix-org/synapse/issues/6271), [\#6314](https://github.com/matrix-org/synapse/issues/6314)) +- Port replication http server endpoints to async/await. ([\#6274](https://github.com/matrix-org/synapse/issues/6274)) +- Port room rest handlers to async/await. ([\#6275](https://github.com/matrix-org/synapse/issues/6275)) +- Remove redundant CLI parameters on CI's `flake8` step. ([\#6277](https://github.com/matrix-org/synapse/issues/6277)) +- Port `federation_server.py` to async/await. ([\#6279](https://github.com/matrix-org/synapse/issues/6279)) +- Port receipt and read markers to async/wait. ([\#6280](https://github.com/matrix-org/synapse/issues/6280)) +- Split out state storage into separate data store. ([\#6294](https://github.com/matrix-org/synapse/issues/6294), [\#6295](https://github.com/matrix-org/synapse/issues/6295)) +- Refactor EventContext for clarity. ([\#6298](https://github.com/matrix-org/synapse/issues/6298)) +- Update the version of black used to 19.10b0. ([\#6304](https://github.com/matrix-org/synapse/issues/6304)) +- Add some documentation about worker replication. ([\#6305](https://github.com/matrix-org/synapse/issues/6305)) +- Move admin endpoints into separate files. Contributed by Awesome Technologies Innovationslabor GmbH. ([\#6308](https://github.com/matrix-org/synapse/issues/6308)) +- Document the use of `lint.sh` for code style enforcement & extend it to run on specified paths only. ([\#6312](https://github.com/matrix-org/synapse/issues/6312)) +- Add optional python dependencies and dependant binary libraries to snapcraft packaging. ([\#6317](https://github.com/matrix-org/synapse/issues/6317)) +- Remove the dependency on psutil and replace functionality with the stdlib `resource` module. ([\#6318](https://github.com/matrix-org/synapse/issues/6318), [\#6336](https://github.com/matrix-org/synapse/issues/6336)) +- Improve documentation for EventContext fields. ([\#6319](https://github.com/matrix-org/synapse/issues/6319)) +- Add some checks that we aren't using state from rejected events. ([\#6330](https://github.com/matrix-org/synapse/issues/6330)) +- Add continuous integration for python 3.8. ([\#6341](https://github.com/matrix-org/synapse/issues/6341)) +- Correct spacing/case of various instances of the word "homeserver". ([\#6357](https://github.com/matrix-org/synapse/issues/6357)) +- Temporarily blacklist the failing unit test PurgeRoomTestCase.test_purge_room. ([\#6361](https://github.com/matrix-org/synapse/issues/6361)) + + Synapse 1.5.1 (2019-11-06) ========================== diff --git a/changelog.d/5727.feature b/changelog.d/5727.feature deleted file mode 100644 index 819bebf2d7..0000000000 --- a/changelog.d/5727.feature +++ /dev/null @@ -1 +0,0 @@ -Add federation support for cross-signing. diff --git a/changelog.d/6140.misc b/changelog.d/6140.misc deleted file mode 100644 index 0feb97ec61..0000000000 --- a/changelog.d/6140.misc +++ /dev/null @@ -1 +0,0 @@ -Add a CI job to test the `synapse_port_db` script. \ No newline at end of file diff --git a/changelog.d/6164.doc b/changelog.d/6164.doc deleted file mode 100644 index f9395b02b3..0000000000 --- a/changelog.d/6164.doc +++ /dev/null @@ -1 +0,0 @@ -Contributor documentation now mentions script to run linters. diff --git a/changelog.d/6213.bugfix b/changelog.d/6213.bugfix deleted file mode 100644 index 2bb2d08851..0000000000 --- a/changelog.d/6213.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix LruCache callback deduplication for Python 3.8. Contributed by @V02460. diff --git a/changelog.d/6218.misc b/changelog.d/6218.misc deleted file mode 100644 index 49d10c36cf..0000000000 --- a/changelog.d/6218.misc +++ /dev/null @@ -1 +0,0 @@ -Convert EventContext to an attrs. diff --git a/changelog.d/6220.feature b/changelog.d/6220.feature deleted file mode 100644 index 8343e9912b..0000000000 --- a/changelog.d/6220.feature +++ /dev/null @@ -1 +0,0 @@ -Increase default room version from 4 to 5, thereby enforcing server key validity period checks. diff --git a/changelog.d/6232.bugfix b/changelog.d/6232.bugfix deleted file mode 100644 index 12718ba934..0000000000 --- a/changelog.d/6232.bugfix +++ /dev/null @@ -1 +0,0 @@ -Remove a room from a server's public rooms list on room upgrade. \ No newline at end of file diff --git a/changelog.d/6235.bugfix b/changelog.d/6235.bugfix deleted file mode 100644 index 12718ba934..0000000000 --- a/changelog.d/6235.bugfix +++ /dev/null @@ -1 +0,0 @@ -Remove a room from a server's public rooms list on room upgrade. \ No newline at end of file diff --git a/changelog.d/6238.feature b/changelog.d/6238.feature deleted file mode 100644 index d225ac33b6..0000000000 --- a/changelog.d/6238.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for outbound http proxying via http_proxy/HTTPS_PROXY env vars. diff --git a/changelog.d/6240.misc b/changelog.d/6240.misc deleted file mode 100644 index 0b3d7a14a1..0000000000 --- a/changelog.d/6240.misc +++ /dev/null @@ -1 +0,0 @@ -Move `persist_events` out from main data store. diff --git a/changelog.d/6250.misc b/changelog.d/6250.misc deleted file mode 100644 index 12e3fe66b0..0000000000 --- a/changelog.d/6250.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce verbosity of user/room stats. diff --git a/changelog.d/6251.misc b/changelog.d/6251.misc deleted file mode 100644 index 371c6983be..0000000000 --- a/changelog.d/6251.misc +++ /dev/null @@ -1 +0,0 @@ -Reduce impact of debug logging. diff --git a/changelog.d/6253.bugfix b/changelog.d/6253.bugfix deleted file mode 100644 index 266fae381c..0000000000 --- a/changelog.d/6253.bugfix +++ /dev/null @@ -1 +0,0 @@ -Delete keys from key backup when deleting backup versions. diff --git a/changelog.d/6254.bugfix b/changelog.d/6254.bugfix deleted file mode 100644 index 3181484b88..0000000000 --- a/changelog.d/6254.bugfix +++ /dev/null @@ -1 +0,0 @@ -Make notification of cross-signing signatures work with workers. diff --git a/changelog.d/6257.doc b/changelog.d/6257.doc deleted file mode 100644 index e985afde0e..0000000000 --- a/changelog.d/6257.doc +++ /dev/null @@ -1 +0,0 @@ -Modify CAPTCHA_SETUP.md to update the terms `private key` and `public key` to `secret key` and `site key` respectively. Contributed by Yash Jipkate. diff --git a/changelog.d/6259.misc b/changelog.d/6259.misc deleted file mode 100644 index 3ff81b1ac7..0000000000 --- a/changelog.d/6259.misc +++ /dev/null @@ -1 +0,0 @@ -Expose some homeserver functionality to spam checkers. diff --git a/changelog.d/6263.misc b/changelog.d/6263.misc deleted file mode 100644 index 7b1bb4b679..0000000000 --- a/changelog.d/6263.misc +++ /dev/null @@ -1 +0,0 @@ -Change cache descriptors to always return deferreds. diff --git a/changelog.d/6269.misc b/changelog.d/6269.misc deleted file mode 100644 index 9fd333cc89..0000000000 --- a/changelog.d/6269.misc +++ /dev/null @@ -1 +0,0 @@ -Fix incorrect comment regarding the functionality of an `if` statement. \ No newline at end of file diff --git a/changelog.d/6270.misc b/changelog.d/6270.misc deleted file mode 100644 index d1c5811323..0000000000 --- a/changelog.d/6270.misc +++ /dev/null @@ -1 +0,0 @@ -Update CI to run `isort` over the `scripts` and `scripts-dev` directories. \ No newline at end of file diff --git a/changelog.d/6271.misc b/changelog.d/6271.misc deleted file mode 100644 index 2369760272..0000000000 --- a/changelog.d/6271.misc +++ /dev/null @@ -1 +0,0 @@ -Replace every instance of `logger.warn` method with `logger.warning` as the former is deprecated. \ No newline at end of file diff --git a/changelog.d/6272.doc b/changelog.d/6272.doc deleted file mode 100644 index 232180bcdc..0000000000 --- a/changelog.d/6272.doc +++ /dev/null @@ -1 +0,0 @@ -Update `INSTALL.md` Email section to talk about `account_threepid_delegates`. \ No newline at end of file diff --git a/changelog.d/6273.doc b/changelog.d/6273.doc deleted file mode 100644 index 21a41d987d..0000000000 --- a/changelog.d/6273.doc +++ /dev/null @@ -1 +0,0 @@ -Fix a small typo in `account_threepid_delegates` configuration option. \ No newline at end of file diff --git a/changelog.d/6274.misc b/changelog.d/6274.misc deleted file mode 100644 index eb4966124f..0000000000 --- a/changelog.d/6274.misc +++ /dev/null @@ -1 +0,0 @@ -Port replication http server endpoints to async/await. diff --git a/changelog.d/6275.misc b/changelog.d/6275.misc deleted file mode 100644 index f57e2c4adb..0000000000 --- a/changelog.d/6275.misc +++ /dev/null @@ -1 +0,0 @@ -Port room rest handlers to async/await. diff --git a/changelog.d/6276.misc b/changelog.d/6276.misc deleted file mode 100644 index 4a4428251e..0000000000 --- a/changelog.d/6276.misc +++ /dev/null @@ -1 +0,0 @@ -Add a CI job to test the `synapse_port_db` script. diff --git a/changelog.d/6277.misc b/changelog.d/6277.misc deleted file mode 100644 index 490713577f..0000000000 --- a/changelog.d/6277.misc +++ /dev/null @@ -1 +0,0 @@ -Remove redundant CLI parameters on CI's `flake8` step. \ No newline at end of file diff --git a/changelog.d/6278.bugfix b/changelog.d/6278.bugfix deleted file mode 100644 index c107270461..0000000000 --- a/changelog.d/6278.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix exception when remote servers attempt to join a room that they're not allowed to join. diff --git a/changelog.d/6279.misc b/changelog.d/6279.misc deleted file mode 100644 index 5f5144a9ee..0000000000 --- a/changelog.d/6279.misc +++ /dev/null @@ -1 +0,0 @@ -Port `federation_server.py` to async/await. diff --git a/changelog.d/6280.misc b/changelog.d/6280.misc deleted file mode 100644 index 96a0eb21b2..0000000000 --- a/changelog.d/6280.misc +++ /dev/null @@ -1 +0,0 @@ -Port receipt and read markers to async/wait. diff --git a/changelog.d/6284.bugfix b/changelog.d/6284.bugfix deleted file mode 100644 index cf15053d2d..0000000000 --- a/changelog.d/6284.bugfix +++ /dev/null @@ -1 +0,0 @@ -Prevent errors from appearing on Synapse startup if `git` is not installed. \ No newline at end of file diff --git a/changelog.d/6291.misc b/changelog.d/6291.misc deleted file mode 100644 index 7b1bb4b679..0000000000 --- a/changelog.d/6291.misc +++ /dev/null @@ -1 +0,0 @@ -Change cache descriptors to always return deferreds. diff --git a/changelog.d/6294.misc b/changelog.d/6294.misc deleted file mode 100644 index a3e6b8296e..0000000000 --- a/changelog.d/6294.misc +++ /dev/null @@ -1 +0,0 @@ -Split out state storage into separate data store. diff --git a/changelog.d/6295.misc b/changelog.d/6295.misc deleted file mode 100644 index a3e6b8296e..0000000000 --- a/changelog.d/6295.misc +++ /dev/null @@ -1 +0,0 @@ -Split out state storage into separate data store. diff --git a/changelog.d/6298.misc b/changelog.d/6298.misc deleted file mode 100644 index d4190730b2..0000000000 --- a/changelog.d/6298.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor EventContext for clarity. \ No newline at end of file diff --git a/changelog.d/6300.misc b/changelog.d/6300.misc deleted file mode 100644 index 0b3d7a14a1..0000000000 --- a/changelog.d/6300.misc +++ /dev/null @@ -1 +0,0 @@ -Move `persist_events` out from main data store. diff --git a/changelog.d/6301.feature b/changelog.d/6301.feature deleted file mode 100644 index 78a187a1dc..0000000000 --- a/changelog.d/6301.feature +++ /dev/null @@ -1 +0,0 @@ -Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)). diff --git a/changelog.d/6304.misc b/changelog.d/6304.misc deleted file mode 100644 index 20372b4f7c..0000000000 --- a/changelog.d/6304.misc +++ /dev/null @@ -1 +0,0 @@ -Update the version of black used to 19.10b0. diff --git a/changelog.d/6305.misc b/changelog.d/6305.misc deleted file mode 100644 index f047fc3062..0000000000 --- a/changelog.d/6305.misc +++ /dev/null @@ -1 +0,0 @@ -Add some documentation about worker replication. diff --git a/changelog.d/6306.bugfix b/changelog.d/6306.bugfix deleted file mode 100644 index c7dcbcdce8..0000000000 --- a/changelog.d/6306.bugfix +++ /dev/null @@ -1 +0,0 @@ -Appservice requests will no longer contain a double slash prefix when the appservice url provided ends in a slash. diff --git a/changelog.d/6307.bugfix b/changelog.d/6307.bugfix deleted file mode 100644 index f2917c5053..0000000000 --- a/changelog.d/6307.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix `/purge_room` admin API. diff --git a/changelog.d/6308.misc b/changelog.d/6308.misc deleted file mode 100644 index 72be63ba4b..0000000000 --- a/changelog.d/6308.misc +++ /dev/null @@ -1 +0,0 @@ -Move admin endpoints into separate files. Contributed by Awesome Technologies Innovationslabor GmbH. \ No newline at end of file diff --git a/changelog.d/6310.feature b/changelog.d/6310.feature deleted file mode 100644 index 78a187a1dc..0000000000 --- a/changelog.d/6310.feature +++ /dev/null @@ -1 +0,0 @@ -Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)). diff --git a/changelog.d/6312.misc b/changelog.d/6312.misc deleted file mode 100644 index 55e3e1654d..0000000000 --- a/changelog.d/6312.misc +++ /dev/null @@ -1 +0,0 @@ -Document the use of `lint.sh` for code style enforcement & extend it to run on specified paths only. diff --git a/changelog.d/6313.bugfix b/changelog.d/6313.bugfix deleted file mode 100644 index f4d4a97f00..0000000000 --- a/changelog.d/6313.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix the `hidden` field in the `devices` table for SQLite versions prior to 3.23.0. diff --git a/changelog.d/6314.misc b/changelog.d/6314.misc deleted file mode 100644 index 2369760272..0000000000 --- a/changelog.d/6314.misc +++ /dev/null @@ -1 +0,0 @@ -Replace every instance of `logger.warn` method with `logger.warning` as the former is deprecated. \ No newline at end of file diff --git a/changelog.d/6317.misc b/changelog.d/6317.misc deleted file mode 100644 index a67d13fa72..0000000000 --- a/changelog.d/6317.misc +++ /dev/null @@ -1 +0,0 @@ -Add optional python dependencies and dependant binary libraries to snapcraft packaging. diff --git a/changelog.d/6318.misc b/changelog.d/6318.misc deleted file mode 100644 index 63527ccef4..0000000000 --- a/changelog.d/6318.misc +++ /dev/null @@ -1 +0,0 @@ -Remove the dependency on psutil and replace functionality with the stdlib `resource` module. diff --git a/changelog.d/6319.misc b/changelog.d/6319.misc deleted file mode 100644 index 9711ef21ed..0000000000 --- a/changelog.d/6319.misc +++ /dev/null @@ -1 +0,0 @@ -Improve documentation for EventContext fields. diff --git a/changelog.d/6320.bugfix b/changelog.d/6320.bugfix deleted file mode 100644 index 2c3fad5655..0000000000 --- a/changelog.d/6320.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug which casued rejected events to be persisted with the wrong room state. diff --git a/changelog.d/6330.misc b/changelog.d/6330.misc deleted file mode 100644 index 6239cba263..0000000000 --- a/changelog.d/6330.misc +++ /dev/null @@ -1 +0,0 @@ -Add some checks that we aren't using state from rejected events. diff --git a/changelog.d/6335.bugfix b/changelog.d/6335.bugfix deleted file mode 100644 index a95f6b9eec..0000000000 --- a/changelog.d/6335.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where `rc_login` ratelimiting would prematurely kick in. diff --git a/changelog.d/6336.misc b/changelog.d/6336.misc deleted file mode 100644 index 63527ccef4..0000000000 --- a/changelog.d/6336.misc +++ /dev/null @@ -1 +0,0 @@ -Remove the dependency on psutil and replace functionality with the stdlib `resource` module. diff --git a/changelog.d/6338.bugfix b/changelog.d/6338.bugfix deleted file mode 100644 index 8e469f0fb6..0000000000 --- a/changelog.d/6338.bugfix +++ /dev/null @@ -1 +0,0 @@ -Prevent the server taking a long time to start up when guest registration is enabled. \ No newline at end of file diff --git a/changelog.d/6340.feature b/changelog.d/6340.feature deleted file mode 100644 index 78a187a1dc..0000000000 --- a/changelog.d/6340.feature +++ /dev/null @@ -1 +0,0 @@ -Implement label-based filtering on `/sync` and `/messages` ([MSC2326](https://github.com/matrix-org/matrix-doc/pull/2326)). diff --git a/changelog.d/6341.misc b/changelog.d/6341.misc deleted file mode 100644 index 359b9bf1d7..0000000000 --- a/changelog.d/6341.misc +++ /dev/null @@ -1 +0,0 @@ -Add continuous integration for python 3.8. \ No newline at end of file diff --git a/changelog.d/6357.misc b/changelog.d/6357.misc deleted file mode 100644 index a68df0f384..0000000000 --- a/changelog.d/6357.misc +++ /dev/null @@ -1 +0,0 @@ -Correct spacing/case of various instances of the word "homeserver". \ No newline at end of file diff --git a/changelog.d/6359.bugfix b/changelog.d/6359.bugfix deleted file mode 100644 index 22bf5f642a..0000000000 --- a/changelog.d/6359.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix bug where upgrading a guest account to a full user would fail when account validity is enabled. \ No newline at end of file diff --git a/changelog.d/6361.misc b/changelog.d/6361.misc deleted file mode 100644 index 324d74ebf9..0000000000 --- a/changelog.d/6361.misc +++ /dev/null @@ -1 +0,0 @@ -Temporarily blacklist the failing unit test PurgeRoomTestCase.test_purge_room. diff --git a/changelog.d/6363.bugfix b/changelog.d/6363.bugfix deleted file mode 100644 index d023b49181..0000000000 --- a/changelog.d/6363.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix `to_device` stream ID getting reset every time Synapse restarts, which had the potential to cause unable to decrypt errors. \ No newline at end of file diff --git a/changelog.d/6389.bugfix b/changelog.d/6389.bugfix deleted file mode 100644 index c553622b02..0000000000 --- a/changelog.d/6389.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix permission denied error when trying to generate a config file with the docker image. \ No newline at end of file diff --git a/synapse/__init__.py b/synapse/__init__.py index 1c27d68009..1d962f5dc8 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -36,7 +36,7 @@ try: except ImportError: pass -__version__ = "1.5.1" +__version__ = "1.6.0rc1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From 486be06f4877842dfb109caac42ab052e09fd5b0 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 20 Nov 2019 15:08:03 +0000 Subject: [PATCH 76/85] Changelog --- changelog.d/6392.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6392.misc diff --git a/changelog.d/6392.misc b/changelog.d/6392.misc new file mode 100644 index 0000000000..a00257944f --- /dev/null +++ b/changelog.d/6392.misc @@ -0,0 +1 @@ +Add a test scenario to make sure room history purges don't break `/messages` in the future. From e2a20326e8141fdf9304434901da38c64b917a78 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 20 Nov 2019 15:08:47 +0000 Subject: [PATCH 77/85] Lint --- tests/rest/client/v1/test_rooms.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index ebaa67e899..e84e578f99 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -948,12 +948,14 @@ class RoomMessageListTestCase(RoomBase): # Purge every event before the second event. purge_id = random_string(16) pagination_handler._purges_by_id[purge_id] = PurgeStatus() - self.get_success(pagination_handler._purge_history( - purge_id=purge_id, - room_id=self.room_id, - token=second_token, - delete_local_events=True, - )) + self.get_success( + pagination_handler._purge_history( + purge_id=purge_id, + room_id=self.room_id, + token=second_token, + delete_local_events=True, + ) + ) # Check that we only get the second message through /message now that the first # has been purged. From 9cc168e42e14481387b4e6de73bfe1f8f9a2a508 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 20 Nov 2019 18:44:45 +0000 Subject: [PATCH 78/85] update macOS installation instructions --- INSTALL.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 29e0abafd3..9b7360f0ef 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -133,9 +133,9 @@ sudo yum install libtiff-devel libjpeg-devel libzip-devel freetype-devel \ sudo yum groupinstall "Development Tools" ``` -#### Mac OS X +#### macOS -Installing prerequisites on Mac OS X: +Installing prerequisites on macOS: ``` xcode-select --install @@ -144,6 +144,14 @@ sudo pip install virtualenv brew install pkg-config libffi ``` +On macOS Catalina (10.15) you may need to explicitly install OpenSSL +via brew and inform `pip` about it so that `psycopg2` builds: + +``` +brew install openssl@1.1 +export LDFLAGS=-L/usr/local/Cellar/openssl\@1.1/1.1.1d/lib/ +``` + #### OpenSUSE Installing prerequisites on openSUSE: From 3916e1b97a1ffc481dfdf66f7da58201a52140a9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 21 Nov 2019 12:00:14 +0000 Subject: [PATCH 79/85] Clean up newline quote marks around the codebase (#6362) --- changelog.d/6362.misc | 1 + synapse/app/federation_sender.py | 2 +- synapse/appservice/api.py | 2 +- synapse/config/appservice.py | 2 +- synapse/config/room_directory.py | 2 +- synapse/config/server.py | 6 +++--- synapse/federation/persistence.py | 4 ++-- synapse/federation/sender/__init__.py | 2 +- synapse/federation/sender/transaction_manager.py | 4 ++-- synapse/handlers/directory.py | 2 +- synapse/http/servlet.py | 2 +- synapse/push/httppusher.py | 5 ++--- synapse/push/mailer.py | 4 ++-- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/server_notices/consent_server_notices.py | 2 +- synapse/storage/_base.py | 2 +- synapse/storage/data_stores/main/deviceinbox.py | 2 +- synapse/storage/data_stores/main/end_to_end_keys.py | 6 +++--- synapse/storage/data_stores/main/events.py | 8 +++----- synapse/storage/data_stores/main/filtering.py | 2 +- synapse/storage/data_stores/main/media_repository.py | 6 +++--- synapse/storage/data_stores/main/registration.py | 4 +--- synapse/storage/data_stores/main/stream.py | 2 +- synapse/storage/data_stores/main/tags.py | 4 +--- synapse/storage/prepare_database.py | 2 +- synapse/streams/config.py | 9 ++++++--- 26 files changed, 43 insertions(+), 46 deletions(-) create mode 100644 changelog.d/6362.misc diff --git a/changelog.d/6362.misc b/changelog.d/6362.misc new file mode 100644 index 0000000000..b79a5bea99 --- /dev/null +++ b/changelog.d/6362.misc @@ -0,0 +1 @@ +Clean up some unnecessary quotation marks around the codebase. \ No newline at end of file diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 139221ad34..448e45e00f 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -69,7 +69,7 @@ class FederationSenderSlaveStore( self.federation_out_pos_startup = self._get_federation_out_pos(db_conn) def _get_federation_out_pos(self, db_conn): - sql = "SELECT stream_id FROM federation_stream_position" " WHERE type = ?" + sql = "SELECT stream_id FROM federation_stream_position WHERE type = ?" sql = self.database_engine.convert_param_style(sql) txn = db_conn.cursor() diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 3e25bf5747..57174da021 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -185,7 +185,7 @@ class ApplicationServiceApi(SimpleHttpClient): if not _is_valid_3pe_metadata(info): logger.warning( - "query_3pe_protocol to %s did not return a" " valid result", uri + "query_3pe_protocol to %s did not return a valid result", uri ) return None diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index e77d3387ff..ca43e96bd1 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -134,7 +134,7 @@ def _load_appservice(hostname, as_info, config_filename): for regex_obj in as_info["namespaces"][ns]: if not isinstance(regex_obj, dict): raise ValueError( - "Expected namespace entry in %s to be an object," " but got %s", + "Expected namespace entry in %s to be an object, but got %s", ns, regex_obj, ) diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 7c9f05bde4..7ac7699676 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -170,7 +170,7 @@ class _RoomDirectoryRule(object): self.action = action else: raise ConfigError( - "%s rules can only have action of 'allow'" " or 'deny'" % (option_name,) + "%s rules can only have action of 'allow' or 'deny'" % (option_name,) ) self._alias_matches_all = alias == "*" diff --git a/synapse/config/server.py b/synapse/config/server.py index 00d01c43af..11336d7549 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -223,7 +223,7 @@ class ServerConfig(Config): self.federation_ip_range_blacklist.update(["0.0.0.0", "::"]) except Exception as e: raise ConfigError( - "Invalid range(s) provided in " "federation_ip_range_blacklist: %s" % e + "Invalid range(s) provided in federation_ip_range_blacklist: %s" % e ) if self.public_baseurl is not None: @@ -787,14 +787,14 @@ class ServerConfig(Config): "--print-pidfile", action="store_true", default=None, - help="Print the path to the pidfile just" " before daemonizing", + help="Print the path to the pidfile just before daemonizing", ) server_group.add_argument( "--manhole", metavar="PORT", dest="manhole", type=int, - help="Turn on the twisted telnet manhole" " service on the given port.", + help="Turn on the twisted telnet manhole service on the given port.", ) diff --git a/synapse/federation/persistence.py b/synapse/federation/persistence.py index 44edcabed4..d68b4bd670 100644 --- a/synapse/federation/persistence.py +++ b/synapse/federation/persistence.py @@ -44,7 +44,7 @@ class TransactionActions(object): response code and response body. """ if not transaction.transaction_id: - raise RuntimeError("Cannot persist a transaction with no " "transaction_id") + raise RuntimeError("Cannot persist a transaction with no transaction_id") return self.store.get_received_txn_response(transaction.transaction_id, origin) @@ -56,7 +56,7 @@ class TransactionActions(object): Deferred """ if not transaction.transaction_id: - raise RuntimeError("Cannot persist a transaction with no " "transaction_id") + raise RuntimeError("Cannot persist a transaction with no transaction_id") return self.store.set_received_txn_response( transaction.transaction_id, origin, code, response diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 2b2ee8612a..4ebb0e8bc0 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -49,7 +49,7 @@ sent_pdus_destination_dist_count = Counter( sent_pdus_destination_dist_total = Counter( "synapse_federation_client_sent_pdu_destinations:total", - "" "Total number of PDUs queued for sending across all destinations", + "Total number of PDUs queued for sending across all destinations", ) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 67b3e1ab6e..5fed626d5b 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -84,7 +84,7 @@ class TransactionManager(object): txn_id = str(self._next_txn_id) logger.debug( - "TX [%s] {%s} Attempting new transaction" " (pdus: %d, edus: %d)", + "TX [%s] {%s} Attempting new transaction (pdus: %d, edus: %d)", destination, txn_id, len(pdus), @@ -103,7 +103,7 @@ class TransactionManager(object): self._next_txn_id += 1 logger.info( - "TX [%s] {%s} Sending transaction [%s]," " (PDUs: %d, EDUs: %d)", + "TX [%s] {%s} Sending transaction [%s], (PDUs: %d, EDUs: %d)", destination, txn_id, transaction.transaction_id, diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 69051101a6..a07d2f1a17 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -119,7 +119,7 @@ class DirectoryHandler(BaseHandler): if not service.is_interested_in_alias(room_alias.to_string()): raise SynapseError( 400, - "This application service has not reserved" " this kind of alias.", + "This application service has not reserved this kind of alias.", errcode=Codes.EXCLUSIVE, ) else: diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index e9a5e46ced..13fcb408a6 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -96,7 +96,7 @@ def parse_boolean_from_args(args, name, default=None, required=False): return {b"true": True, b"false": False}[args[name][0]] except Exception: message = ( - "Boolean query parameter %r must be one of" " ['true', 'false']" + "Boolean query parameter %r must be one of ['true', 'false']" ) % (name,) raise SynapseError(400, message) else: diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index e994037be6..d0879b0490 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -246,7 +246,7 @@ class HttpPusher(object): # fixed, we don't suddenly deliver a load # of old notifications. logger.warning( - "Giving up on a notification to user %s, " "pushkey %s", + "Giving up on a notification to user %s, pushkey %s", self.user_id, self.pushkey, ) @@ -299,8 +299,7 @@ class HttpPusher(object): # for sanity, we only remove the pushkey if it # was the one we actually sent... logger.warning( - ("Ignoring rejected pushkey %s because we" " didn't send it"), - pk, + ("Ignoring rejected pushkey %s because we didn't send it"), pk, ) else: logger.info("Pushkey %s was rejected: removing", pk) diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 1d15a06a58..b13b646bfd 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -43,7 +43,7 @@ logger = logging.getLogger(__name__) MESSAGE_FROM_PERSON_IN_ROOM = ( - "You have a message on %(app)s from %(person)s " "in the %(room)s room..." + "You have a message on %(app)s from %(person)s in the %(room)s room..." ) MESSAGE_FROM_PERSON = "You have a message on %(app)s from %(person)s..." MESSAGES_FROM_PERSON = "You have messages on %(app)s from %(person)s..." @@ -55,7 +55,7 @@ MESSAGES_FROM_PERSON_AND_OTHERS = ( "You have messages on %(app)s from %(person)s and others..." ) INVITE_FROM_PERSON_TO_ROOM = ( - "%(person)s has invited you to join the " "%(room)s room on %(app)s..." + "%(person)s has invited you to join the %(room)s room on %(app)s..." ) INVITE_FROM_PERSON = "%(person)s has invited you to chat on %(app)s..." diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 15c15a12f5..a23d6f5c75 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -122,7 +122,7 @@ class PreviewUrlResource(DirectServeResource): pattern = entry[attrib] value = getattr(url_tuple, attrib) logger.debug( - "Matching attrib '%s' with value '%s' against" " pattern '%s'", + "Matching attrib '%s' with value '%s' against pattern '%s'", attrib, value, pattern, diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py index 415e9c17d8..5736c56032 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py @@ -54,7 +54,7 @@ class ConsentServerNotices(object): ) if "body" not in self._server_notice_content: raise ConfigError( - "user_consent server_notice_consent must contain a 'body' " "key." + "user_consent server_notice_consent must contain a 'body' key." ) self._consent_uri_builder = ConsentURIBuilder(hs.config) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index ab596fa68d..6b8a9cd89a 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -851,7 +851,7 @@ class SQLBaseStore(object): allvalues.update(values) latter = "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) - sql = ("INSERT INTO %s (%s) VALUES (%s) " "ON CONFLICT (%s) DO %s") % ( + sql = ("INSERT INTO %s (%s) VALUES (%s) ON CONFLICT (%s) DO %s") % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), diff --git a/synapse/storage/data_stores/main/deviceinbox.py b/synapse/storage/data_stores/main/deviceinbox.py index 96cd0fb77a..a23744f11c 100644 --- a/synapse/storage/data_stores/main/deviceinbox.py +++ b/synapse/storage/data_stores/main/deviceinbox.py @@ -380,7 +380,7 @@ class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore) devices = list(messages_by_device.keys()) if len(devices) == 1 and devices[0] == "*": # Handle wildcard device_ids. - sql = "SELECT device_id FROM devices" " WHERE user_id = ?" + sql = "SELECT device_id FROM devices WHERE user_id = ?" txn.execute(sql, (user_id,)) message_json = json.dumps(messages_by_device["*"]) for row in txn: diff --git a/synapse/storage/data_stores/main/end_to_end_keys.py b/synapse/storage/data_stores/main/end_to_end_keys.py index 073412a78d..d8ad59ad93 100644 --- a/synapse/storage/data_stores/main/end_to_end_keys.py +++ b/synapse/storage/data_stores/main/end_to_end_keys.py @@ -138,9 +138,9 @@ class EndToEndKeyWorkerStore(SQLBaseStore): result.setdefault(user_id, {})[device_id] = None # get signatures on the device - signature_sql = ( - "SELECT * " " FROM e2e_cross_signing_signatures " " WHERE %s" - ) % (" OR ".join("(" + q + ")" for q in signature_query_clauses)) + signature_sql = ("SELECT * FROM e2e_cross_signing_signatures WHERE %s") % ( + " OR ".join("(" + q + ")" for q in signature_query_clauses) + ) txn.execute(signature_sql, signature_query_params) rows = self.cursor_to_dict(txn) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index 878f7568a6..627c0b67f1 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -713,9 +713,7 @@ class EventsStore( metadata_json = encode_json(event.internal_metadata.get_dict()) - sql = ( - "UPDATE event_json SET internal_metadata = ?" " WHERE event_id = ?" - ) + sql = "UPDATE event_json SET internal_metadata = ? WHERE event_id = ?" txn.execute(sql, (metadata_json, event.event_id)) # Add an entry to the ex_outlier_stream table to replicate the @@ -732,7 +730,7 @@ class EventsStore( }, ) - sql = "UPDATE events SET outlier = ?" " WHERE event_id = ?" + sql = "UPDATE events SET outlier = ? WHERE event_id = ?" txn.execute(sql, (False, event.event_id)) # Update the event_backward_extremities table now that this @@ -1479,7 +1477,7 @@ class EventsStore( # We do joins against events_to_purge for e.g. calculating state # groups to purge, etc., so lets make an index. - txn.execute("CREATE INDEX events_to_purge_id" " ON events_to_purge(event_id)") + txn.execute("CREATE INDEX events_to_purge_id ON events_to_purge(event_id)") txn.execute("SELECT event_id, should_delete FROM events_to_purge") event_rows = txn.fetchall() diff --git a/synapse/storage/data_stores/main/filtering.py b/synapse/storage/data_stores/main/filtering.py index a2a2a67927..f05ace299a 100644 --- a/synapse/storage/data_stores/main/filtering.py +++ b/synapse/storage/data_stores/main/filtering.py @@ -55,7 +55,7 @@ class FilteringStore(SQLBaseStore): if filter_id_response is not None: return filter_id_response[0] - sql = "SELECT MAX(filter_id) FROM user_filters " "WHERE user_id = ?" + sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?" txn.execute(sql, (user_localpart,)) max_id = txn.fetchone()[0] if max_id is None: diff --git a/synapse/storage/data_stores/main/media_repository.py b/synapse/storage/data_stores/main/media_repository.py index 84b5f3ad5e..0f2887bdce 100644 --- a/synapse/storage/data_stores/main/media_repository.py +++ b/synapse/storage/data_stores/main/media_repository.py @@ -337,7 +337,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): if len(media_ids) == 0: return - sql = "DELETE FROM local_media_repository_url_cache" " WHERE media_id = ?" + sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?" def _delete_url_cache_txn(txn): txn.executemany(sql, [(media_id,) for media_id in media_ids]) @@ -365,11 +365,11 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): return def _delete_url_cache_media_txn(txn): - sql = "DELETE FROM local_media_repository" " WHERE media_id = ?" + sql = "DELETE FROM local_media_repository WHERE media_id = ?" txn.executemany(sql, [(media_id,) for media_id in media_ids]) - sql = "DELETE FROM local_media_repository_thumbnails" " WHERE media_id = ?" + sql = "DELETE FROM local_media_repository_thumbnails WHERE media_id = ?" txn.executemany(sql, [(media_id,) for media_id in media_ids]) diff --git a/synapse/storage/data_stores/main/registration.py b/synapse/storage/data_stores/main/registration.py index ee1b2b2bbf..6a594c160c 100644 --- a/synapse/storage/data_stores/main/registration.py +++ b/synapse/storage/data_stores/main/registration.py @@ -377,9 +377,7 @@ class RegistrationWorkerStore(SQLBaseStore): """ def f(txn): - sql = ( - "SELECT name, password_hash FROM users" " WHERE lower(name) = lower(?)" - ) + sql = "SELECT name, password_hash FROM users WHERE lower(name) = lower(?)" txn.execute(sql, (user_id,)) return dict(txn) diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py index 8780fdd989..9ae4a913a1 100644 --- a/synapse/storage/data_stores/main/stream.py +++ b/synapse/storage/data_stores/main/stream.py @@ -616,7 +616,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): def _get_max_topological_txn(self, txn, room_id): txn.execute( - "SELECT MAX(topological_ordering) FROM events" " WHERE room_id = ?", + "SELECT MAX(topological_ordering) FROM events WHERE room_id = ?", (room_id,), ) diff --git a/synapse/storage/data_stores/main/tags.py b/synapse/storage/data_stores/main/tags.py index 10d1887f75..aa24339717 100644 --- a/synapse/storage/data_stores/main/tags.py +++ b/synapse/storage/data_stores/main/tags.py @@ -83,9 +83,7 @@ class TagsWorkerStore(AccountDataWorkerStore): ) def get_tag_content(txn, tag_ids): - sql = ( - "SELECT tag, content" " FROM room_tags" " WHERE user_id=? AND room_id=?" - ) + sql = "SELECT tag, content FROM room_tags WHERE user_id=? AND room_id=?" results = [] for stream_id, user_id, room_id in tag_ids: txn.execute(sql, (user_id, room_id)) diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 2e7753820e..731e1c9d9c 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -447,7 +447,7 @@ def _apply_module_schema_files(cur, database_engine, modname, names_and_streams) # Mark as done. cur.execute( database_engine.convert_param_style( - "INSERT INTO applied_module_schemas (module_name, file)" " VALUES (?,?)" + "INSERT INTO applied_module_schemas (module_name, file) VALUES (?,?)" ), (modname, name), ) diff --git a/synapse/streams/config.py b/synapse/streams/config.py index 02994ab2a5..cd56cd91ed 100644 --- a/synapse/streams/config.py +++ b/synapse/streams/config.py @@ -88,9 +88,12 @@ class PaginationConfig(object): raise SynapseError(400, "Invalid request.") def __repr__(self): - return ( - "PaginationConfig(from_tok=%r, to_tok=%r," " direction=%r, limit=%r)" - ) % (self.from_token, self.to_token, self.direction, self.limit) + return ("PaginationConfig(from_tok=%r, to_tok=%r, direction=%r, limit=%r)") % ( + self.from_token, + self.to_token, + self.direction, + self.limit, + ) def get_source_config(self, source_name): keyname = "%s_key" % source_name From 24cc31ee967e5c387a137e22b428dcea17fc9fa5 Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Thu, 21 Nov 2019 11:38:14 -0600 Subject: [PATCH 80/85] Fix link to user_dir_populate.sql in the user directory docs (#6388) --- changelog.d/6388.doc | 1 + docs/user_directory.md | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 changelog.d/6388.doc diff --git a/changelog.d/6388.doc b/changelog.d/6388.doc new file mode 100644 index 0000000000..c777cb6b8f --- /dev/null +++ b/changelog.d/6388.doc @@ -0,0 +1 @@ +Fix link in the user directory documentation. diff --git a/docs/user_directory.md b/docs/user_directory.md index e64aa453cc..37dc71e751 100644 --- a/docs/user_directory.md +++ b/docs/user_directory.md @@ -7,7 +7,6 @@ who are present in a publicly viewable room present on the server. The directory info is stored in various tables, which can (typically after DB corruption) get stale or out of sync. If this happens, for now the -solution to fix it is to execute the SQL here -https://github.com/matrix-org/synapse/blob/master/synapse/storage/schema/delta/53/user_dir_populate.sql +solution to fix it is to execute the SQL [here](../synapse/storage/data_stores/main/schema/delta/53/user_dir_populate.sql) and then restart synapse. This should then start a background task to flush the current tables and regenerate the directory. From 265c0bd2fe54db7f8a7dab05f41b27ce9a450563 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 22 Nov 2019 19:54:05 +0000 Subject: [PATCH 81/85] Add working build command for docker image (#6390) * Add working build command for docker image * Add changelog --- changelog.d/6390.doc | 1 + docker/README.md | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 changelog.d/6390.doc diff --git a/changelog.d/6390.doc b/changelog.d/6390.doc new file mode 100644 index 0000000000..093411bec1 --- /dev/null +++ b/changelog.d/6390.doc @@ -0,0 +1 @@ +Add build instructions to the docker readme. \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index 24dfa77dcc..9f112a01d0 100644 --- a/docker/README.md +++ b/docker/README.md @@ -130,3 +130,15 @@ docker run -it --rm \ This will generate the same configuration file as the legacy mode used, but will store it in `/data/homeserver.yaml` instead of a temporary location. You can then use it as shown above at [Running synapse](#running-synapse). + +## Building the image + +If you need to build the image from a Synapse checkout, use the following `docker + build` command from the repo's root: + +``` +docker build -t matrixdotorg/synapse -f docker/Dockerfile . +``` + +You can choose to build a different docker image by changing the value of the `-f` flag to +point to another Dockerfile. From b7367c339db153824fb47728d3eebe2f944530e6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 25 Nov 2019 13:26:59 +0000 Subject: [PATCH 82/85] Fix exceptions from background database update for event labels. (#6407) Add some exception handling here so that events whose json cannot be parsed are ignored rather than getting us stuck in a loop. Fixes #6404. --- changelog.d/6407.bugfix | 1 + .../data_stores/main/events_bg_updates.py | 41 +++++++++++-------- 2 files changed, 25 insertions(+), 17 deletions(-) create mode 100644 changelog.d/6407.bugfix diff --git a/changelog.d/6407.bugfix b/changelog.d/6407.bugfix new file mode 100644 index 0000000000..0fdbf2a781 --- /dev/null +++ b/changelog.d/6407.bugfix @@ -0,0 +1 @@ +Fix a bug which could cause the background database update hander for event labels to get stuck in a loop raising exceptions. diff --git a/synapse/storage/data_stores/main/events_bg_updates.py b/synapse/storage/data_stores/main/events_bg_updates.py index 0ed59ef48e..aa87f9abc5 100644 --- a/synapse/storage/data_stores/main/events_bg_updates.py +++ b/synapse/storage/data_stores/main/events_bg_updates.py @@ -530,24 +530,31 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): nbrows = 0 last_row_event_id = "" for (event_id, event_json_raw) in results: - event_json = json.loads(event_json_raw) + try: + event_json = json.loads(event_json_raw) - self._simple_insert_many_txn( - txn=txn, - table="event_labels", - values=[ - { - "event_id": event_id, - "label": label, - "room_id": event_json["room_id"], - "topological_ordering": event_json["depth"], - } - for label in event_json["content"].get( - EventContentFields.LABELS, [] - ) - if isinstance(label, str) - ], - ) + self._simple_insert_many_txn( + txn=txn, + table="event_labels", + values=[ + { + "event_id": event_id, + "label": label, + "room_id": event_json["room_id"], + "topological_ordering": event_json["depth"], + } + for label in event_json["content"].get( + EventContentFields.LABELS, [] + ) + if isinstance(label, str) + ], + ) + except Exception as e: + logger.warning( + "Unable to load event %s (no labels will be imported): %s", + event_id, + e, + ) nbrows += 1 last_row_event_id = event_id From f9c9e1f07646262cb064782d4bf427dd1634617f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 25 Nov 2019 13:28:12 +0000 Subject: [PATCH 83/85] 1.6.0rc2 --- CHANGES.md | 9 +++++++++ changelog.d/6407.bugfix | 1 - synapse/__init__.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) delete mode 100644 changelog.d/6407.bugfix diff --git a/CHANGES.md b/CHANGES.md index f4f61db5d4..d26bc7a86f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 1.6.0rc2 (2019-11-25) +============================= + +Bugfixes +-------- + +- Fix a bug which could cause the background database update hander for event labels to get stuck in a loop raising exceptions. ([\#6407](https://github.com/matrix-org/synapse/issues/6407)) + + Synapse 1.6.0rc1 (2019-11-20) ============================= diff --git a/changelog.d/6407.bugfix b/changelog.d/6407.bugfix deleted file mode 100644 index 0fdbf2a781..0000000000 --- a/changelog.d/6407.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug which could cause the background database update hander for event labels to get stuck in a loop raising exceptions. diff --git a/synapse/__init__.py b/synapse/__init__.py index 1d962f5dc8..051c83774e 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -36,7 +36,7 @@ try: except ImportError: pass -__version__ = "1.6.0rc1" +__version__ = "1.6.0rc2" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From 9eebd46048d0b34767047b2156760a1467f19ae6 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 26 Nov 2019 03:45:50 +1100 Subject: [PATCH 84/85] Improve the performance of structured logging (#6322) --- changelog.d/6322.misc | 1 + synapse/logging/_structured.py | 14 ++++- synapse/logging/_terse_json.py | 104 ++++++++++++++++++++++++--------- tests/server.py | 2 + 4 files changed, 92 insertions(+), 29 deletions(-) create mode 100644 changelog.d/6322.misc diff --git a/changelog.d/6322.misc b/changelog.d/6322.misc new file mode 100644 index 0000000000..70ef36ca80 --- /dev/null +++ b/changelog.d/6322.misc @@ -0,0 +1 @@ +Improve the performance of outputting structured logging. diff --git a/synapse/logging/_structured.py b/synapse/logging/_structured.py index 334ddaf39a..ffa7b20ca8 100644 --- a/synapse/logging/_structured.py +++ b/synapse/logging/_structured.py @@ -261,6 +261,18 @@ def parse_drain_configs( ) +class StoppableLogPublisher(LogPublisher): + """ + A log publisher that can tell its observers to shut down any external + communications. + """ + + def stop(self): + for obs in self._observers: + if hasattr(obs, "stop"): + obs.stop() + + def setup_structured_logging( hs, config, @@ -336,7 +348,7 @@ def setup_structured_logging( # We should never get here, but, just in case, throw an error. raise ConfigError("%s drain type cannot be configured" % (observer.type,)) - publisher = LogPublisher(*observers) + publisher = StoppableLogPublisher(*observers) log_filter = LogLevelFilterPredicate() for namespace, namespace_config in log_config.get( diff --git a/synapse/logging/_terse_json.py b/synapse/logging/_terse_json.py index 76ce7d8808..05fc64f409 100644 --- a/synapse/logging/_terse_json.py +++ b/synapse/logging/_terse_json.py @@ -17,25 +17,29 @@ Log formatters that output terse JSON. """ +import json import sys +import traceback from collections import deque from ipaddress import IPv4Address, IPv6Address, ip_address from math import floor -from typing import IO +from typing import IO, Optional import attr -from simplejson import dumps from zope.interface import implementer from twisted.application.internet import ClientService +from twisted.internet.defer import Deferred from twisted.internet.endpoints import ( HostnameEndpoint, TCP4ClientEndpoint, TCP6ClientEndpoint, ) +from twisted.internet.interfaces import IPushProducer, ITransport from twisted.internet.protocol import Factory, Protocol from twisted.logger import FileLogObserver, ILogObserver, Logger -from twisted.python.failure import Failure + +_encoder = json.JSONEncoder(ensure_ascii=False, separators=(",", ":")) def flatten_event(event: dict, metadata: dict, include_time: bool = False): @@ -141,11 +145,49 @@ def TerseJSONToConsoleLogObserver(outFile: IO[str], metadata: dict) -> FileLogOb def formatEvent(_event: dict) -> str: flattened = flatten_event(_event, metadata) - return dumps(flattened, ensure_ascii=False, separators=(",", ":")) + "\n" + return _encoder.encode(flattened) + "\n" return FileLogObserver(outFile, formatEvent) +@attr.s +@implementer(IPushProducer) +class LogProducer(object): + """ + An IPushProducer that writes logs from its buffer to its transport when it + is resumed. + + Args: + buffer: Log buffer to read logs from. + transport: Transport to write to. + """ + + transport = attr.ib(type=ITransport) + _buffer = attr.ib(type=deque) + _paused = attr.ib(default=False, type=bool, init=False) + + def pauseProducing(self): + self._paused = True + + def stopProducing(self): + self._paused = True + self._buffer = None + + def resumeProducing(self): + self._paused = False + + while self._paused is False and (self._buffer and self.transport.connected): + try: + event = self._buffer.popleft() + self.transport.write(_encoder.encode(event).encode("utf8")) + self.transport.write(b"\n") + except Exception: + # Something has gone wrong writing to the transport -- log it + # and break out of the while. + traceback.print_exc(file=sys.__stderr__) + break + + @attr.s @implementer(ILogObserver) class TerseJSONToTCPLogObserver(object): @@ -165,8 +207,9 @@ class TerseJSONToTCPLogObserver(object): metadata = attr.ib(type=dict) maximum_buffer = attr.ib(type=int) _buffer = attr.ib(default=attr.Factory(deque), type=deque) - _writer = attr.ib(default=None) + _connection_waiter = attr.ib(default=None, type=Optional[Deferred]) _logger = attr.ib(default=attr.Factory(Logger)) + _producer = attr.ib(default=None, type=Optional[LogProducer]) def start(self) -> None: @@ -187,38 +230,43 @@ class TerseJSONToTCPLogObserver(object): factory = Factory.forProtocol(Protocol) self._service = ClientService(endpoint, factory, clock=self.hs.get_reactor()) self._service.startService() + self._connect() - def _write_loop(self) -> None: + def stop(self): + self._service.stopService() + + def _connect(self) -> None: """ - Implement the write loop. + Triggers an attempt to connect then write to the remote if not already writing. """ - if self._writer: + if self._connection_waiter: return - self._writer = self._service.whenConnected() + self._connection_waiter = self._service.whenConnected(failAfterFailures=1) - @self._writer.addBoth + @self._connection_waiter.addErrback + def fail(r): + r.printTraceback(file=sys.__stderr__) + self._connection_waiter = None + self._connect() + + @self._connection_waiter.addCallback def writer(r): - if isinstance(r, Failure): - r.printTraceback(file=sys.__stderr__) - self._writer = None - self.hs.get_reactor().callLater(1, self._write_loop) + # We have a connection. If we already have a producer, and its + # transport is the same, just trigger a resumeProducing. + if self._producer and r.transport is self._producer.transport: + self._producer.resumeProducing() return - try: - for event in self._buffer: - r.transport.write( - dumps(event, ensure_ascii=False, separators=(",", ":")).encode( - "utf8" - ) - ) - r.transport.write(b"\n") - self._buffer.clear() - except Exception as e: - sys.__stderr__.write("Failed writing out logs with %s\n" % (str(e),)) + # If the producer is still producing, stop it. + if self._producer: + self._producer.stopProducing() - self._writer = False - self.hs.get_reactor().callLater(1, self._write_loop) + # Make a new producer and start it. + self._producer = LogProducer(buffer=self._buffer, transport=r.transport) + r.transport.registerProducer(self._producer, True) + self._producer.resumeProducing() + self._connection_waiter = None def _handle_pressure(self) -> None: """ @@ -277,4 +325,4 @@ class TerseJSONToTCPLogObserver(object): self._logger.failure("Failed clearing backpressure") # Try and write immediately. - self._write_loop() + self._connect() diff --git a/tests/server.py b/tests/server.py index f878aeaada..2b7cf4242e 100644 --- a/tests/server.py +++ b/tests/server.py @@ -379,6 +379,7 @@ class FakeTransport(object): disconnecting = False disconnected = False + connected = True buffer = attr.ib(default=b"") producer = attr.ib(default=None) autoflush = attr.ib(default=True) @@ -402,6 +403,7 @@ class FakeTransport(object): "FakeTransport: Delaying disconnect until buffer is flushed" ) else: + self.connected = False self.disconnected = True def abortConnection(self): From c01d5435843ad4af3d520851e86d9938b47b2d12 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 25 Nov 2019 21:03:17 +0000 Subject: [PATCH 85/85] Make sure that we close cursors before returning from a query (#6408) There are lots of words in the comment as to why this is a good idea. Fixes #6403. --- changelog.d/6408.bugfix | 1 + synapse/storage/_base.py | 51 ++++++++++++++++---- synapse/storage/data_stores/main/receipts.py | 2 +- 3 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 changelog.d/6408.bugfix diff --git a/changelog.d/6408.bugfix b/changelog.d/6408.bugfix new file mode 100644 index 0000000000..c9babe599b --- /dev/null +++ b/changelog.d/6408.bugfix @@ -0,0 +1 @@ +Fix an intermittent exception when handling read-receipts. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 6b8a9cd89a..459901ac60 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -409,16 +409,15 @@ class SQLBaseStore(object): i = 0 N = 5 while True: + cursor = LoggingTransaction( + conn.cursor(), + name, + self.database_engine, + after_callbacks, + exception_callbacks, + ) try: - txn = conn.cursor() - txn = LoggingTransaction( - txn, - name, - self.database_engine, - after_callbacks, - exception_callbacks, - ) - r = func(txn, *args, **kwargs) + r = func(cursor, *args, **kwargs) conn.commit() return r except self.database_engine.module.OperationalError as e: @@ -456,6 +455,40 @@ class SQLBaseStore(object): ) continue raise + finally: + # we're either about to retry with a new cursor, or we're about to + # release the connection. Once we release the connection, it could + # get used for another query, which might do a conn.rollback(). + # + # In the latter case, even though that probably wouldn't affect the + # results of this transaction, python's sqlite will reset all + # statements on the connection [1], which will make our cursor + # invalid [2]. + # + # In any case, continuing to read rows after commit()ing seems + # dubious from the PoV of ACID transactional semantics + # (sqlite explicitly says that once you commit, you may see rows + # from subsequent updates.) + # + # In psycopg2, cursors are essentially a client-side fabrication - + # all the data is transferred to the client side when the statement + # finishes executing - so in theory we could go on streaming results + # from the cursor, but attempting to do so would make us + # incompatible with sqlite, so let's make sure we're not doing that + # by closing the cursor. + # + # (*named* cursors in psycopg2 are different and are proper server- + # side things, but (a) we don't use them and (b) they are implicitly + # closed by ending the transaction anyway.) + # + # In short, if we haven't finished with the cursor yet, that's a + # problem waiting to bite us. + # + # TL;DR: we're done with the cursor, so we can close it. + # + # [1]: https://github.com/python/cpython/blob/v3.8.0/Modules/_sqlite/connection.c#L465 + # [2]: https://github.com/python/cpython/blob/v3.8.0/Modules/_sqlite/cursor.c#L236 + cursor.close() except Exception as e: logger.debug("[TXN FAIL] {%s} %s", name, e) raise diff --git a/synapse/storage/data_stores/main/receipts.py b/synapse/storage/data_stores/main/receipts.py index 0c24430f28..8b17334ff4 100644 --- a/synapse/storage/data_stores/main/receipts.py +++ b/synapse/storage/data_stores/main/receipts.py @@ -280,7 +280,7 @@ class ReceiptsWorkerStore(SQLBaseStore): args.append(limit) txn.execute(sql, args) - return (r[0:5] + (json.loads(r[5]),) for r in txn) + return list(r[0:5] + (json.loads(r[5]),) for r in txn) return self.runInteraction( "get_all_updated_receipts", get_all_updated_receipts_txn