More efficiently handle no-op POSITION (#16640)

We may receive `POSITION` commands where we already know that worker has
advanced past that position, so there is no point in handling it.
This commit is contained in:
Erik Johnston 2023-11-16 12:32:17 +00:00 committed by GitHub
parent 830988ae72
commit 898655fd12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 0 deletions

1
changelog.d/16640.misc Normal file
View File

@ -0,0 +1 @@
More efficiently handle no-op `POSITION` over replication.

View File

@ -588,6 +588,21 @@ class ReplicationCommandHandler:
logger.debug("Handling '%s %s'", cmd.NAME, cmd.to_line())
# Check if we can early discard this position. We can only do so for
# connected streams.
stream = self._streams[cmd.stream_name]
if stream.can_discard_position(
cmd.instance_name, cmd.prev_token, cmd.new_token
) and self.is_stream_connected(conn, cmd.stream_name):
logger.debug(
"Discarding redundant POSITION %s/%s %s %s",
cmd.instance_name,
cmd.stream_name,
cmd.prev_token,
cmd.new_token,
)
return
self._add_command_to_stream_queue(conn, cmd)
async def _process_position(
@ -599,6 +614,18 @@ class ReplicationCommandHandler:
"""
stream = self._streams[stream_name]
if stream.can_discard_position(
cmd.instance_name, cmd.prev_token, cmd.new_token
) and self.is_stream_connected(conn, cmd.stream_name):
logger.debug(
"Discarding redundant POSITION %s/%s %s %s",
cmd.instance_name,
cmd.stream_name,
cmd.prev_token,
cmd.new_token,
)
return
# We're about to go and catch up with the stream, so remove from set
# of connected streams.
for streams in self._streams_by_connection.values():
@ -657,6 +684,13 @@ class ReplicationCommandHandler:
self._streams_by_connection.setdefault(conn, set()).add(stream_name)
def is_stream_connected(
self, conn: IReplicationConnection, stream_name: str
) -> bool:
"""Return if stream has been successfully connected and is ready to
receive updates"""
return stream_name in self._streams_by_connection.get(conn, ())
def on_REMOTE_SERVER_UP(
self, conn: IReplicationConnection, cmd: RemoteServerUpCommand
) -> None:

View File

@ -144,6 +144,16 @@ class Stream:
"""
raise NotImplementedError()
def can_discard_position(
self, instance_name: str, prev_token: int, new_token: int
) -> bool:
"""Whether or not a position command for this stream can be discarded.
Useful for streams that can never go backwards and where we already know
the stream ID for the instance has advanced.
"""
return False
def discard_updates_and_advance(self) -> None:
"""Called when the stream should advance but the updates would be discarded,
e.g. when there are no currently connected workers.
@ -221,6 +231,14 @@ class _StreamFromIdGen(Stream):
def minimal_local_current_token(self) -> Token:
return self._stream_id_gen.get_minimal_local_current_token()
def can_discard_position(
self, instance_name: str, prev_token: int, new_token: int
) -> bool:
# These streams can't go backwards, so we know we can ignore any
# positions where the tokens are from before the current token.
return new_token <= self.current_token(instance_name)
def current_token_without_instance(
current_token: Callable[[], int]