Merge pull request #3856 from matrix-org/erikj/speed_up_purge

Make purge history slightly faster
2018-09-13 16:14:46 +01:00 · 2018-09-13 16:14:46 +01:00 · 6c0f8d9d50
parent cb64fe2cb7 ed5331a627
commit 6c0f8d9d50
2 changed files with 29 additions and 16 deletions
--- a/changelog.d/3856.misc
+++ b/changelog.d/3856.misc
@ -0,0 +1 @@
 Speed up purge history for rooms that have been previously purged
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@ -1890,20 +1890,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
            ")"
        )
        # create an index on should_delete because later we'll be looking for
        # the should_delete / shouldn't_delete subsets
        txn.execute(
            "CREATE INDEX events_to_purge_should_delete"
            " ON events_to_purge(should_delete)",
        )
        # We do joins against events_to_purge for e.g. calculating state
        # groups to purge, etc., so lets make an index.
        txn.execute(
            "CREATE INDEX events_to_purge_id"
            " ON events_to_purge(event_id)",
        )
        # First ensure that we're not about to delete all the forward extremeties
        txn.execute(
            "SELECT e.event_id, e.depth FROM events as e "
@ -1930,19 +1916,45 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
        should_delete_params = ()
        if not delete_local_events:
            should_delete_expr += " AND event_id NOT LIKE ?"
-            should_delete_params += ("%:" + self.hs.hostname, )
+
            # We include the parameter twice since we use the expression twice
            should_delete_params += (
                "%:" + self.hs.hostname,
                "%:" + self.hs.hostname,
            )
        should_delete_params += (room_id, token.topological)
        # Note that we insert events that are outliers and aren't going to be
        # deleted, as nothing will happen to them.
        txn.execute(
            "INSERT INTO events_to_purge"
            " SELECT event_id, %s"
            " FROM events AS e LEFT JOIN state_events USING (event_id)"
-            " WHERE e.room_id = ? AND topological_ordering < ?" % (
+            " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
            % (
                should_delete_expr,
                should_delete_expr,
            ),
            should_delete_params,
        )
        # We create the indices *after* insertion as that's a lot faster.
        # create an index on should_delete because later we'll be looking for
        # the should_delete / shouldn't_delete subsets
        txn.execute(
            "CREATE INDEX events_to_purge_should_delete"
            " ON events_to_purge(should_delete)",
        )
        # We do joins against events_to_purge for e.g. calculating state
        # groups to purge, etc., so lets make an index.
        txn.execute(
            "CREATE INDEX events_to_purge_id"
            " ON events_to_purge(event_id)",
        )
        txn.execute(
            "SELECT event_id, should_delete FROM events_to_purge"
        )
		`@ -0,0 +1 @@`
							`Speed up purge history for rooms that have been previously purged`