From 5a5abd55e8b47a7c1620c298a72817ccf73f90b0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Feb 2020 09:39:27 +0000 Subject: [PATCH] Limit size of get_auth_chain_ids query (#6947) --- changelog.d/6947.misc | 1 + .../data_stores/main/event_federation.py | 39 +++++++++++-------- 2 files changed, 23 insertions(+), 17 deletions(-) create mode 100644 changelog.d/6947.misc diff --git a/changelog.d/6947.misc b/changelog.d/6947.misc new file mode 100644 index 0000000000..6d00e58654 --- /dev/null +++ b/changelog.d/6947.misc @@ -0,0 +1 @@ +Increase perf of `get_auth_chain_ids` used in state res v2. diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py index 1746f40adf..dcc375b840 100644 --- a/synapse/storage/data_stores/main/event_federation.py +++ b/synapse/storage/data_stores/main/event_federation.py @@ -62,32 +62,37 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas ) def _get_auth_chain_ids_txn(self, txn, event_ids, include_given): + if include_given: + results = set(event_ids) + else: + results = set() + if isinstance(self.database_engine, PostgresEngine): # For efficiency we make the database do this if we can. - sql = """ - WITH RECURSIVE auth_chain(event_id) AS ( - SELECT auth_id FROM event_auth WHERE event_id = ANY(?) - UNION - SELECT auth_id FROM event_auth - INNER JOIN auth_chain USING (event_id) - ) - SELECT event_id FROM auth_chain - """ - txn.execute(sql, (list(event_ids),)) - results = set(event_id for event_id, in txn) + # We need to be a little careful with querying large amounts at + # once, for some reason postgres really doesn't like it. We do this + # by only asking for auth chain of 500 events at a time. + event_ids = list(event_ids) + chunks = [event_ids[x : x + 500] for x in range(0, len(event_ids), 500)] + for chunk in chunks: + sql = """ + WITH RECURSIVE auth_chain(event_id) AS ( + SELECT auth_id FROM event_auth WHERE event_id = ANY(?) + UNION + SELECT auth_id FROM event_auth + INNER JOIN auth_chain USING (event_id) + ) + SELECT event_id FROM auth_chain + """ + txn.execute(sql, (chunk,)) - if include_given: - results.update(event_ids) + results.update(event_id for event_id, in txn) return list(results) # Database doesn't necessarily support recursive CTE, so we fall # back to do doing it manually. - if include_given: - results = set(event_ids) - else: - results = set() base_sql = "SELECT auth_id FROM event_auth WHERE "