Don't lock up when joining large rooms (#16903)

Co-authored-by: Andrew Morgan <andrew@amorgan.xyz>
2024-02-20 14:29:18 +00:00 · 2024-02-20 14:29:18 +00:00 · cdbbf3653d
parent c51a2240d1
commit cdbbf3653d
2 changed files with 18 additions and 9 deletions
--- a/changelog.d/16903.bugfix
+++ b/changelog.d/16903.bugfix
@ -0,0 +1 @@
 Fix performance issue when joining very large rooms that can cause the server to lock up. Introduced in v1.100.0.
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@ -1757,17 +1757,25 @@ class FederationEventHandler:
            events_and_contexts_to_persist.append((event, context))
-        for event in sorted_auth_events:
+        for i, event in enumerate(sorted_auth_events):
            await prep(event)
-        await self.persist_events_and_notify(
+            # The above function is typically not async, and so won't yield to
-            room_id,
+            # the reactor. For large rooms let's yield to the reactor
-            events_and_contexts_to_persist,
+            # occasionally to ensure we don't block other work.
-            # Mark these events backfilled as they're historic events that will
+            if (i + 1) % 1000 == 0:
-            # eventually be backfilled. For example, missing events we fetch
+                await self._clock.sleep(0)
-            # during backfill should be marked as backfilled as well.
+
-            backfilled=True,
+        # Also persist the new event in batches for similar reasons as above.
-        )
+        for batch in batch_iter(events_and_contexts_to_persist, 1000):
            await self.persist_events_and_notify(
                room_id,
                batch,
                # Mark these events as backfilled as they're historic events that will
                # eventually be backfilled. For example, missing events we fetch
                # during backfill should be marked as backfilled as well.
                backfilled=True,
            )
    @trace
    async def _check_event_auth(
		`@ -0,0 +1 @@`
							`Fix performance issue when joining very large rooms that can cause the server to lock up. Introduced in v1.100.0.`