diff --git a/changelog.d/4927.feature b/changelog.d/4927.feature new file mode 100644 index 0000000000..8d74262250 --- /dev/null +++ b/changelog.d/4927.feature @@ -0,0 +1 @@ +Batch up outgoing read-receipts to reduce federation traffic. diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 55630ba9a7..e16fad5261 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -223,14 +223,25 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): return # Now lets try and call on_ function - try: - run_as_background_process( - "replication-" + cmd.get_logcontext_id(), - getattr(self, "on_%s" % (cmd_name,)), - cmd, - ) - except Exception: - logger.exception("[%s] Failed to handle line: %r", self.id(), line) + run_as_background_process( + "replication-" + cmd.get_logcontext_id(), + self.handle_command, + cmd, + ) + + def handle_command(self, cmd): + """Handle a command we have received over the replication stream. + + By default delegates to on_ + + Args: + cmd (synapse.replication.tcp.commands.Command): received command + + Returns: + Deferred + """ + handler = getattr(self, "on_%s" % (cmd.NAME,)) + return handler(cmd) def close(self): logger.warn("[%s] Closing connection", self.id()) diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py index c1e626be3f..e23084baae 100644 --- a/synapse/replication/tcp/streams.py +++ b/synapse/replication/tcp/streams.py @@ -23,7 +23,7 @@ Each stream is defined by the following information: current_token: The function that returns the current token for the stream update_function: The function that returns a list of updates between two tokens """ - +import itertools import logging from collections import namedtuple @@ -195,8 +195,8 @@ class Stream(object): limit=MAX_EVENTS_BEHIND + 1, ) - if len(rows) >= MAX_EVENTS_BEHIND: - raise Exception("stream %s has fallen behind" % (self.NAME)) + # never turn more than MAX_EVENTS_BEHIND + 1 into updates. + rows = itertools.islice(rows, MAX_EVENTS_BEHIND + 1) else: rows = yield self.update_function( from_token, current_token, @@ -204,6 +204,11 @@ class Stream(object): updates = [(row[0], self.ROW_TYPE(*row[1:])) for row in rows] + # check we didn't get more rows than the limit. + # doing it like this allows the update_function to be a generator. + if self._LIMITED and len(updates) >= MAX_EVENTS_BEHIND: + raise Exception("stream %s has fallen behind" % (self.NAME)) + defer.returnValue((updates, current_token)) def current_token(self): diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 0fd1ccc40a..89a1f7e3d7 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -301,7 +301,9 @@ class ReceiptsWorkerStore(SQLBaseStore): args.append(limit) txn.execute(sql, args) - return txn.fetchall() + return ( + r[0:5] + (json.loads(r[5]), ) for r in txn + ) return self.runInteraction( "get_all_updated_receipts", get_all_updated_receipts_txn ) diff --git a/tests/replication/tcp/__init__.py b/tests/replication/tcp/__init__.py new file mode 100644 index 0000000000..1453d04571 --- /dev/null +++ b/tests/replication/tcp/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/replication/tcp/streams/__init__.py b/tests/replication/tcp/streams/__init__.py new file mode 100644 index 0000000000..1453d04571 --- /dev/null +++ b/tests/replication/tcp/streams/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/replication/tcp/streams/_base.py b/tests/replication/tcp/streams/_base.py new file mode 100644 index 0000000000..38b368a972 --- /dev/null +++ b/tests/replication/tcp/streams/_base.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.replication.tcp.commands import ReplicateCommand +from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol +from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory + +from tests import unittest +from tests.server import FakeTransport + + +class BaseStreamTestCase(unittest.HomeserverTestCase): + """Base class for tests of the replication streams""" + def prepare(self, reactor, clock, hs): + # build a replication server + server_factory = ReplicationStreamProtocolFactory(self.hs) + self.streamer = server_factory.streamer + server = server_factory.buildProtocol(None) + + # build a replication client, with a dummy handler + self.test_handler = TestReplicationClientHandler() + self.client = ClientReplicationStreamProtocol( + "client", "test", clock, self.test_handler + ) + + # wire them together + self.client.makeConnection(FakeTransport(server, reactor)) + server.makeConnection(FakeTransport(self.client, reactor)) + + def replicate(self): + """Tell the master side of replication that something has happened, and then + wait for the replication to occur. + """ + self.streamer.on_notifier_poke() + self.pump(0.1) + + def replicate_stream(self, stream, token="NOW"): + """Make the client end a REPLICATE command to set up a subscription to a stream""" + self.client.send_command(ReplicateCommand(stream, token)) + + +class TestReplicationClientHandler(object): + """Drop-in for ReplicationClientHandler which just collects RDATA rows""" + def __init__(self): + self.received_rdata_rows = [] + + def get_streams_to_replicate(self): + return {} + + def get_currently_syncing_users(self): + return [] + + def update_connection(self, connection): + pass + + def finished_connecting(self): + pass + + def on_rdata(self, stream_name, token, rows): + for r in rows: + self.received_rdata_rows.append( + (stream_name, token, r) + ) diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py new file mode 100644 index 0000000000..9aa9dfe82e --- /dev/null +++ b/tests/replication/tcp/streams/test_receipts.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.replication.tcp.streams import ReceiptsStreamRow + +from tests.replication.tcp.streams._base import BaseStreamTestCase + +USER_ID = "@feeling:blue" +ROOM_ID = "!room:blue" +EVENT_ID = "$event:blue" + + +class ReceiptsStreamTestCase(BaseStreamTestCase): + def test_receipt(self): + # make the client subscribe to the receipts stream + self.replicate_stream("receipts", "NOW") + + # tell the master to send a new receipt + self.get_success( + self.hs.get_datastore().insert_receipt( + ROOM_ID, "m.read", USER_ID, [EVENT_ID], {"a": 1} + ) + ) + self.replicate() + + # there should be one RDATA command + rdata_rows = self.test_handler.received_rdata_rows + self.assertEqual(1, len(rdata_rows)) + self.assertEqual(rdata_rows[0][0], "receipts") + row = rdata_rows[0][2] # type: ReceiptsStreamRow + self.assertEqual(ROOM_ID, row.room_id) + self.assertEqual("m.read", row.receipt_type) + self.assertEqual(USER_ID, row.user_id) + self.assertEqual(EVENT_ID, row.event_id) + self.assertEqual({"a": 1}, row.data)