Fix AssertionErrors after purging events (#11642)

* Fix AssertionErrors after purging events If you purged a bunch of events from your database, and then restarted synapse without receiving more events, then you would get a bunch of AssertionErrors on restart. This fixes the situation by rewinding the stream processors. * `check-newsfragment`: ignore deleted newsfiles
2022-01-04 16:36:33 +00:00 · 2022-01-04 16:36:33 +00:00 · b38bdae3a2
parent 878aa55293
commit b38bdae3a2
6 changed files with 30 additions and 6 deletions
--- a/changelog.d/11536.bugfix
+++ b/changelog.d/11536.bugfix
@ -0,0 +1 @@
+Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database.
--- a/changelog.d/11536.misc
+++ b/changelog.d/11536.misc
@ -1 +0,0 @@
-Improvements to log messages around handling stream ids.
--- a/changelog.d/11642.bugfix
+++ b/changelog.d/11642.bugfix
@ -0,0 +1 @@
+Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database.
--- a/scripts-dev/check-newsfragment
+++ b/scripts-dev/check-newsfragment
@ -42,8 +42,8 @@ echo "--------------------------"
 echo

 matched=0
-for f in $(git diff --name-only FETCH_HEAD... -- changelog.d); do
-    # check that any modified newsfiles on this branch end with a full stop.
+for f in $(git diff --diff-filter=d --name-only FETCH_HEAD... -- changelog.d); do
+    # check that any added newsfiles on this branch end with a full stop.
    lastchar=$(tr -d '\n' < "$f" | tail -c 1)
    if [ "$lastchar" != '.' ] && [ "$lastchar" != '!' ]; then
        echo -e "\e[31mERROR: newsfragment $f does not end with a '.' or '!'\e[39m" >&2
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@ -80,6 +80,17 @@ class StatsHandler:
        # If self.pos is None then means we haven't fetched it from DB
        if self.pos is None:
            self.pos = await self.store.get_stats_positions()
+            room_max_stream_ordering = self.store.get_room_max_stream_ordering()
+            if self.pos > room_max_stream_ordering:
+                # apparently, we've processed more events than exist in the database!
+                # this can happen if events are removed with history purge or similar.
+                logger.warning(
+                    "Event stream ordering appears to have gone backwards (%i -> %i): "
+                    "rewinding stats processor",
+                    self.pos,
+                    room_max_stream_ordering,
+                )
+                self.pos = room_max_stream_ordering

        # Loop round handling deltas until we're up to date

--- a/synapse/handlers/user_directory.py
+++ b/synapse/handlers/user_directory.py
@ -148,9 +148,21 @@ class UserDirectoryHandler(StateDeltasHandler):
        if self.pos is None:
            self.pos = await self.store.get_user_directory_stream_pos()

-        # If still None then the initial background update hasn't happened yet.
-        if self.pos is None:
-            return None
+            # If still None then the initial background update hasn't happened yet.
+            if self.pos is None:
+                return None
+
+            room_max_stream_ordering = self.store.get_room_max_stream_ordering()
+            if self.pos > room_max_stream_ordering:
+                # apparently, we've processed more events than exist in the database!
+                # this can happen if events are removed with history purge or similar.
+                logger.warning(
+                    "Event stream ordering appears to have gone backwards (%i -> %i): "
+                    "rewinding user directory processor",
+                    self.pos,
+                    room_max_stream_ordering,
+                )
+                self.pos = room_max_stream_ordering

        # Loop round handling deltas until we're up to date
        while True:
				`@ -0,0 +1 @@`
				Fix a long-standing bug which could cause `AssertionError`s to be written to the log when Synapse was restarted after purging events from the database.
				`@ -1 +0,0 @@`
				`Improvements to log messages around handling stream ids.`