Reduce replication traffic due to reflected cache stream POSITION (#16557)

This commit is contained in:
Erik Johnston 2023-10-27 12:51:08 +01:00 committed by GitHub
parent c02406ac71
commit 0680d76659
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 1 deletions

1
changelog.d/16557.bugfix Normal file
View File

@ -0,0 +1 @@
Fix a long-standing, exceedingly rare edge case where the first event persisted by a new event persister worker might not be sent down `/sync`.

View File

@ -27,7 +27,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.replication.tcp.commands import PositionCommand
from synapse.replication.tcp.protocol import ServerReplicationStreamProtocol
from synapse.replication.tcp.streams import EventsStream
from synapse.replication.tcp.streams._base import StreamRow, Token
from synapse.replication.tcp.streams._base import CachesStream, StreamRow, Token
from synapse.util.metrics import Measure
if TYPE_CHECKING:
@ -204,6 +204,23 @@ class ReplicationStreamer:
# The token has advanced but there is no data to
# send, so we send a `POSITION` to inform other
# workers of the updated position.
#
# There are two reasons for this: 1) this instance
# requested a stream ID but didn't use it, or 2)
# this instance advanced its own stream position due
# to receiving notifications about other instances
# advancing their stream position.
# We skip sending `POSITION` for the `caches` stream
# for the second case as a) it generates a lot of
# traffic as every worker would echo each write, and
# b) nothing cares if a given worker's caches stream
# position lags.
if stream.NAME == CachesStream.NAME:
# If there haven't been any writes since the
# `last_token` then we're in the second case.
if stream.minimal_local_current_token() <= last_token:
continue
# Note: `last_token` may not *actually* be the
# last token we sent out in a RDATA or POSITION.