From 57174db6e029fb9ea01e59ca8b91f773ef8824f4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 29 Jun 2022 12:37:40 +0200 Subject: [PATCH] Fix archive not responding in big rooms because homeserver times out on `/context` request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `filter={"lazy_load_members":true}` so that `/context` responds without timing out by returning just the state for the sender of the included event. Otherwise, the homeserver returns all state in the room at that point in time which in big rooms, can be 100k member events that we don't care about anyway. Synapse seems to timeout at about the ~5k state event mark. ## Dev notes Without `filter={"lazy_load_members":true}`, Synapse can only handle ~4k member events (probably state in general) before `/context` times out before it ever responds in the 180 second window. I'm only looking at the member count as a rough proxy but the number of member events + state will be more. Loads | Member count | Alias | Matrix.to | API request --- | --- | --- | --- | --- :x: | 39k | `#matrix:matrix.org` | [🔗 ](https://matrix.to/#/!OGEhHVWSdvArJzumhm:matrix.org/$xo-tESRNP1Vg1RxLXIfmMeO6dA6-u9XuE2lv6toeKcw?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!OGEhHVWSdvArJzumhm:matrix.org/context/$xo-tESRNP1Vg1RxLXIfmMeO6dA6-u9XuE2lv6toeKcw?limit=0` :x: | 31k | `#openwisp_general:gitter.im` | [🔗 ](https://matrix.to/#/!RBzfoBeqYcCwLAAenz:gitter.im/$YFjmbmH0NRPmfVsPyxhM0jcK4RFR_CdCigtHSgCTLSc?via=matrix.org) |`https://matrix-client.matrix.org/_matrix/client/r0/rooms/!RBzfoBeqYcCwLAAenz:gitter.im/context/$YFjmbmH0NRPmfVsPyxhM0jcK4RFR_CdCigtHSgCTLSc?limit=0` :x: | 16k | `#raspberrypi:matrix.org` | [🔗 ](https://matrix.to/#/!wOlkWNmgkAZFxbTaqj:matrix.org/$WMh_QauoyW6cjFDuUeNgsnUSgDn7C2XlrUnlijhdmdk?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!wOlkWNmgkAZFxbTaqj:matrix.org/context/$WMh_QauoyW6cjFDuUeNgsnUSgDn7C2XlrUnlijhdmdk?limit=0` Loads with warm cache | 7.2k | `#element-android:matrix.org` | [🔗 ](https://matrix.to/#/!AZozoWghOYSIAfaZjJ:matrix.org/$Qmb8vmaD91PIM6ROfh-2jApPoJgH2Q7NnjiwRdiEPZE?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!AZozoWghOYSIAfaZjJ:matrix.org/context/$Qmb8vmaD91PIM6ROfh-2jApPoJgH2Q7NnjiwRdiEPZE?limit=0` ✅ | 4k | `#nim-science:envs.net` | [🔗 ](https://matrix.to/#/!IpFtPSbgfrZrVcVyti:envs.net/$bDRop4yvjFOl3HMouAyx4mtau-JaAxvBUJ-MqftAm7E?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!IpFtPSbgfrZrVcVyti:envs.net/context/$bDRop4yvjFOl3HMouAyx4mtau-JaAxvBUJ-MqftAm7E?limit=0` ✅ | 2.3k | `#nim-webdev:matrix.org` | [🔗 ](https://matrix.to/#/!EoyccMaVGwdqyfKMAL:matrix.org/$VdRyNb1F-gLdDP9nDYK7xAh_LcE822fEfDYWz5re8dE?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!EoyccMaVGwdqyfKMAL:matrix.org/context/$VdRyNb1F-gLdDP9nDYK7xAh_LcE822fEfDYWz5re8dE?limit=0` --- server/fetch-events-in-range.js | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/server/fetch-events-in-range.js b/server/fetch-events-in-range.js index f8cca37..58bee0f 100644 --- a/server/fetch-events-in-range.js +++ b/server/fetch-events-in-range.js @@ -47,16 +47,28 @@ async function fetchEventsFromTimestampBackwards(accessToken, roomId, ts, limit) assert(eventIdForTimestamp); //console.log('eventIdForTimestamp', eventIdForTimestamp); + // We only use this endpoint to get a pagination we can use with `/messages`. + // + // We add `limit=0` here because we want to grab + // + // Add `filter={"lazy_load_members":true}` so that this endpoint responds + // without timing out. Otherwise, the homeserver returns all state in the room + // at that point in time which in big rooms, can be 100k member events that we + // don't care about anyway. Synapse seems to timeout at about the ~5k state + // event mark. const contextEndpoint = urlJoin( matrixServerUrl, - `_matrix/client/r0/rooms/${roomId}/context/${eventIdForTimestamp}?limit=0` + `_matrix/client/r0/rooms/${roomId}/context/${eventIdForTimestamp}?limit=0&filter={"lazy_load_members":true}` ); const contextResData = await fetchEndpointAsJson(contextEndpoint, { accessToken, }); //console.log('contextResData', contextResData); - // Add filter={"lazy_load_members":true,"include_redundant_members":true} to get member state events included + // TODO: Do we need `"include_redundant_members":true` here? + // + // Add `filter={"lazy_load_members":true,"include_redundant_members":true}` to + // get member state events included const messagesEndpoint = urlJoin( matrixServerUrl, `_matrix/client/r0/rooms/${roomId}/messages?dir=b&from=${contextResData.end}&limit=${limit}&filter={"lazy_load_members":true,"include_redundant_members":true}`