Handle old staged inbound events (#10303)
We might have events in the staging area if the service was restarted while there were unhandled events in the staging area. Fixes #10295
This commit is contained in:
parent
d7a94a7dcc
commit
c65067d673
|
@ -0,0 +1 @@
|
||||||
|
Ensure that inbound events from federation that were being processed when Synapse was restarted get promptly processed on start up.
|
|
@ -148,6 +148,41 @@ class FederationServer(FederationBase):
|
||||||
|
|
||||||
self._room_prejoin_state_types = hs.config.api.room_prejoin_state
|
self._room_prejoin_state_types = hs.config.api.room_prejoin_state
|
||||||
|
|
||||||
|
# Whether we have started handling old events in the staging area.
|
||||||
|
self._started_handling_of_staged_events = False
|
||||||
|
|
||||||
|
@wrap_as_background_process("_handle_old_staged_events")
|
||||||
|
async def _handle_old_staged_events(self) -> None:
|
||||||
|
"""Handle old staged events by fetching all rooms that have staged
|
||||||
|
events and start the processing of each of those rooms.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get all the rooms IDs with staged events.
|
||||||
|
room_ids = await self.store.get_all_rooms_with_staged_incoming_events()
|
||||||
|
|
||||||
|
# We then shuffle them so that if there are multiple instances doing
|
||||||
|
# this work they're less likely to collide.
|
||||||
|
random.shuffle(room_ids)
|
||||||
|
|
||||||
|
for room_id in room_ids:
|
||||||
|
room_version = await self.store.get_room_version(room_id)
|
||||||
|
|
||||||
|
# Try and acquire the processing lock for the room, if we get it start a
|
||||||
|
# background process for handling the events in the room.
|
||||||
|
lock = await self.store.try_acquire_lock(
|
||||||
|
_INBOUND_EVENT_HANDLING_LOCK_NAME, room_id
|
||||||
|
)
|
||||||
|
if lock:
|
||||||
|
logger.info("Handling old staged inbound events in %s", room_id)
|
||||||
|
self._process_incoming_pdus_in_room_inner(
|
||||||
|
room_id,
|
||||||
|
room_version,
|
||||||
|
lock,
|
||||||
|
)
|
||||||
|
|
||||||
|
# We pause a bit so that we don't start handling all rooms at once.
|
||||||
|
await self._clock.sleep(random.uniform(0, 0.1))
|
||||||
|
|
||||||
async def on_backfill_request(
|
async def on_backfill_request(
|
||||||
self, origin: str, room_id: str, versions: List[str], limit: int
|
self, origin: str, room_id: str, versions: List[str], limit: int
|
||||||
) -> Tuple[int, Dict[str, Any]]:
|
) -> Tuple[int, Dict[str, Any]]:
|
||||||
|
@ -166,6 +201,12 @@ class FederationServer(FederationBase):
|
||||||
async def on_incoming_transaction(
|
async def on_incoming_transaction(
|
||||||
self, origin: str, transaction_data: JsonDict
|
self, origin: str, transaction_data: JsonDict
|
||||||
) -> Tuple[int, Dict[str, Any]]:
|
) -> Tuple[int, Dict[str, Any]]:
|
||||||
|
# If we receive a transaction we should make sure that kick off handling
|
||||||
|
# any old events in the staging area.
|
||||||
|
if not self._started_handling_of_staged_events:
|
||||||
|
self._started_handling_of_staged_events = True
|
||||||
|
self._handle_old_staged_events()
|
||||||
|
|
||||||
# keep this as early as possible to make the calculated origin ts as
|
# keep this as early as possible to make the calculated origin ts as
|
||||||
# accurate as possible.
|
# accurate as possible.
|
||||||
request_time = self._clock.time_msec()
|
request_time = self._clock.time_msec()
|
||||||
|
@ -882,25 +923,28 @@ class FederationServer(FederationBase):
|
||||||
room_id: str,
|
room_id: str,
|
||||||
room_version: RoomVersion,
|
room_version: RoomVersion,
|
||||||
lock: Lock,
|
lock: Lock,
|
||||||
latest_origin: str,
|
latest_origin: Optional[str] = None,
|
||||||
latest_event: EventBase,
|
latest_event: Optional[EventBase] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Process events in the staging area for the given room.
|
"""Process events in the staging area for the given room.
|
||||||
|
|
||||||
The latest_origin and latest_event args are the latest origin and event
|
The latest_origin and latest_event args are the latest origin and event
|
||||||
received.
|
received (or None to simply pull the next event from the database).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# The common path is for the event we just received be the only event in
|
# The common path is for the event we just received be the only event in
|
||||||
# the room, so instead of pulling the event out of the DB and parsing
|
# the room, so instead of pulling the event out of the DB and parsing
|
||||||
# the event we just pull out the next event ID and check if that matches.
|
# the event we just pull out the next event ID and check if that matches.
|
||||||
next_origin, next_event_id = await self.store.get_next_staged_event_id_for_room(
|
if latest_event is not None and latest_origin is not None:
|
||||||
room_id
|
(
|
||||||
)
|
next_origin,
|
||||||
if next_origin == latest_origin and next_event_id == latest_event.event_id:
|
next_event_id,
|
||||||
origin = latest_origin
|
) = await self.store.get_next_staged_event_id_for_room(room_id)
|
||||||
event = latest_event
|
if next_origin != latest_origin or next_event_id != latest_event.event_id:
|
||||||
else:
|
latest_origin = None
|
||||||
|
latest_event = None
|
||||||
|
|
||||||
|
if latest_origin is None or latest_event is None:
|
||||||
next = await self.store.get_next_staged_event_for_room(
|
next = await self.store.get_next_staged_event_for_room(
|
||||||
room_id, room_version
|
room_id, room_version
|
||||||
)
|
)
|
||||||
|
@ -908,6 +952,9 @@ class FederationServer(FederationBase):
|
||||||
return
|
return
|
||||||
|
|
||||||
origin, event = next
|
origin, event = next
|
||||||
|
else:
|
||||||
|
origin = latest_origin
|
||||||
|
event = latest_event
|
||||||
|
|
||||||
# We loop round until there are no more events in the room in the
|
# We loop round until there are no more events in the room in the
|
||||||
# staging area, or we fail to get the lock (which means another process
|
# staging area, or we fail to get the lock (which means another process
|
||||||
|
|
|
@ -1207,6 +1207,15 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
|
||||||
|
|
||||||
return origin, event
|
return origin, event
|
||||||
|
|
||||||
|
async def get_all_rooms_with_staged_incoming_events(self) -> List[str]:
|
||||||
|
"""Get the room IDs of all events currently staged."""
|
||||||
|
return await self.db_pool.simple_select_onecol(
|
||||||
|
table="federation_inbound_events_staging",
|
||||||
|
keyvalues={},
|
||||||
|
retcol="DISTINCT room_id",
|
||||||
|
desc="get_all_rooms_with_staged_incoming_events",
|
||||||
|
)
|
||||||
|
|
||||||
@wrap_as_background_process("_get_stats_for_federation_staging")
|
@wrap_as_background_process("_get_stats_for_federation_staging")
|
||||||
async def _get_stats_for_federation_staging(self):
|
async def _get_stats_for_federation_staging(self):
|
||||||
"""Update the prometheus metrics for the inbound federation staging area."""
|
"""Update the prometheus metrics for the inbound federation staging area."""
|
||||||
|
|
Loading…
Reference in New Issue