matrix-public-archive/server/lib/matrix-utils/fetch-events-from-timestamp...

115 lines
3.9 KiB
JavaScript
Raw Normal View History

2022-02-24 02:27:53 -07:00
'use strict';
const assert = require('assert');
const urlJoin = require('url-join');
const { fetchEndpointAsJson } = require('../fetch-endpoint');
const timestampToEvent = require('./timestamp-to-event');
const { traceFunction } = require('../../tracing/trace-utilities');
2022-02-24 02:27:53 -07:00
const config = require('../config');
2022-02-24 02:27:53 -07:00
const matrixServerUrl = config.get('matrixServerUrl');
assert(matrixServerUrl);
// Find an event right ahead of where we are trying to look. Then paginate
// /messages backwards. This makes sure that we can get events for the day when
// the room started. And it ensures that the `/messages` backfill kicks in
// properly since it only works to fill in the gaps going backwards.
2022-02-24 02:27:53 -07:00
//
// Consider this scenario: dayStart(fromTs) <- msg1 <- msg2 <- msg3 <- dayEnd(toTs)
2022-02-24 02:27:53 -07:00
// - ❌ If we start from dayStart and look backwards, we will find nothing.
// - ❌ If we start from dayStart and look forwards, we will find msg1, but
// federated backfill won't be able to paginate forwards
// - ✅ If we start from dayEnd and look backwards, we will find msg3 and
// federation backfill can paginate backwards
2022-02-24 02:27:53 -07:00
// - ❌ If we start from dayEnd and look forwards, we will find nothing
//
// Returns events in reverse-chronological order.
async function fetchEventsFromTimestampBackwards({ accessToken, roomId, ts, limit }) {
2022-02-24 02:27:53 -07:00
assert(accessToken);
assert(roomId);
assert(ts);
// Synapse has a max `/messages` limit of 1000
assert(
limit <= 1000,
'We can only get 1000 messages at a time from Synapse. If you need more messages, we will have to implement pagination'
);
2022-02-24 02:27:53 -07:00
let eventIdForTimestamp;
try {
const { eventId } = await timestampToEvent({
accessToken,
roomId,
ts,
direction: 'b',
});
eventIdForTimestamp = eventId;
} catch (err) {
const allowedErrorCodes = [
// Allow `404: Unable to find event xxx in direction x`
// so we can just display an empty placeholder with no events.
404,
];
if (!allowedErrorCodes.includes(err?.response?.status)) {
throw err;
}
}
if (!eventIdForTimestamp) {
return {
stateEventMap: {},
events: [],
};
}
2022-02-24 02:27:53 -07:00
// We only use this endpoint to get a pagination token we can use with
// `/messages`.
//
// We add `limit=0` here because we want to grab the pagination token right
// (before/after) the event.
Fix archive not responding in big rooms because homeserver times out on `/context` request Add `filter={"lazy_load_members":true}` so that `/context` responds without timing out by returning just the state for the sender of the included event. Otherwise, the homeserver returns all state in the room at that point in time which in big rooms, can be 100k member events that we don't care about anyway. Synapse seems to timeout at about the ~5k state event mark. ## Dev notes Without `filter={"lazy_load_members":true}`, Synapse can only handle ~4k member events (probably state in general) before `/context` times out before it ever responds in the 180 second window. I'm only looking at the member count as a rough proxy but the number of member events + state will be more. Loads | Member count | Alias | Matrix.to | API request --- | --- | --- | --- | --- :x: | 39k | `#matrix:matrix.org` | [🔗 ](https://matrix.to/#/!OGEhHVWSdvArJzumhm:matrix.org/$xo-tESRNP1Vg1RxLXIfmMeO6dA6-u9XuE2lv6toeKcw?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!OGEhHVWSdvArJzumhm:matrix.org/context/$xo-tESRNP1Vg1RxLXIfmMeO6dA6-u9XuE2lv6toeKcw?limit=0` :x: | 31k | `#openwisp_general:gitter.im` | [🔗 ](https://matrix.to/#/!RBzfoBeqYcCwLAAenz:gitter.im/$YFjmbmH0NRPmfVsPyxhM0jcK4RFR_CdCigtHSgCTLSc?via=matrix.org) |`https://matrix-client.matrix.org/_matrix/client/r0/rooms/!RBzfoBeqYcCwLAAenz:gitter.im/context/$YFjmbmH0NRPmfVsPyxhM0jcK4RFR_CdCigtHSgCTLSc?limit=0` :x: | 16k | `#raspberrypi:matrix.org` | [🔗 ](https://matrix.to/#/!wOlkWNmgkAZFxbTaqj:matrix.org/$WMh_QauoyW6cjFDuUeNgsnUSgDn7C2XlrUnlijhdmdk?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!wOlkWNmgkAZFxbTaqj:matrix.org/context/$WMh_QauoyW6cjFDuUeNgsnUSgDn7C2XlrUnlijhdmdk?limit=0` Loads with warm cache | 7.2k | `#element-android:matrix.org` | [🔗 ](https://matrix.to/#/!AZozoWghOYSIAfaZjJ:matrix.org/$Qmb8vmaD91PIM6ROfh-2jApPoJgH2Q7NnjiwRdiEPZE?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!AZozoWghOYSIAfaZjJ:matrix.org/context/$Qmb8vmaD91PIM6ROfh-2jApPoJgH2Q7NnjiwRdiEPZE?limit=0` ✅ | 4k | `#nim-science:envs.net` | [🔗 ](https://matrix.to/#/!IpFtPSbgfrZrVcVyti:envs.net/$bDRop4yvjFOl3HMouAyx4mtau-JaAxvBUJ-MqftAm7E?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!IpFtPSbgfrZrVcVyti:envs.net/context/$bDRop4yvjFOl3HMouAyx4mtau-JaAxvBUJ-MqftAm7E?limit=0` ✅ | 2.3k | `#nim-webdev:matrix.org` | [🔗 ](https://matrix.to/#/!EoyccMaVGwdqyfKMAL:matrix.org/$VdRyNb1F-gLdDP9nDYK7xAh_LcE822fEfDYWz5re8dE?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!EoyccMaVGwdqyfKMAL:matrix.org/context/$VdRyNb1F-gLdDP9nDYK7xAh_LcE822fEfDYWz5re8dE?limit=0`
2022-06-29 04:37:40 -06:00
//
// Add `filter={"lazy_load_members":true}` so that this endpoint responds
// without timing out by returning just the state for the sender of the
// included event. Otherwise, the homeserver returns all state in the room at
// that point in time which in big rooms, can be 100k member events that we
Fix archive not responding in big rooms because homeserver times out on `/context` request Add `filter={"lazy_load_members":true}` so that `/context` responds without timing out by returning just the state for the sender of the included event. Otherwise, the homeserver returns all state in the room at that point in time which in big rooms, can be 100k member events that we don't care about anyway. Synapse seems to timeout at about the ~5k state event mark. ## Dev notes Without `filter={"lazy_load_members":true}`, Synapse can only handle ~4k member events (probably state in general) before `/context` times out before it ever responds in the 180 second window. I'm only looking at the member count as a rough proxy but the number of member events + state will be more. Loads | Member count | Alias | Matrix.to | API request --- | --- | --- | --- | --- :x: | 39k | `#matrix:matrix.org` | [🔗 ](https://matrix.to/#/!OGEhHVWSdvArJzumhm:matrix.org/$xo-tESRNP1Vg1RxLXIfmMeO6dA6-u9XuE2lv6toeKcw?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!OGEhHVWSdvArJzumhm:matrix.org/context/$xo-tESRNP1Vg1RxLXIfmMeO6dA6-u9XuE2lv6toeKcw?limit=0` :x: | 31k | `#openwisp_general:gitter.im` | [🔗 ](https://matrix.to/#/!RBzfoBeqYcCwLAAenz:gitter.im/$YFjmbmH0NRPmfVsPyxhM0jcK4RFR_CdCigtHSgCTLSc?via=matrix.org) |`https://matrix-client.matrix.org/_matrix/client/r0/rooms/!RBzfoBeqYcCwLAAenz:gitter.im/context/$YFjmbmH0NRPmfVsPyxhM0jcK4RFR_CdCigtHSgCTLSc?limit=0` :x: | 16k | `#raspberrypi:matrix.org` | [🔗 ](https://matrix.to/#/!wOlkWNmgkAZFxbTaqj:matrix.org/$WMh_QauoyW6cjFDuUeNgsnUSgDn7C2XlrUnlijhdmdk?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!wOlkWNmgkAZFxbTaqj:matrix.org/context/$WMh_QauoyW6cjFDuUeNgsnUSgDn7C2XlrUnlijhdmdk?limit=0` Loads with warm cache | 7.2k | `#element-android:matrix.org` | [🔗 ](https://matrix.to/#/!AZozoWghOYSIAfaZjJ:matrix.org/$Qmb8vmaD91PIM6ROfh-2jApPoJgH2Q7NnjiwRdiEPZE?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!AZozoWghOYSIAfaZjJ:matrix.org/context/$Qmb8vmaD91PIM6ROfh-2jApPoJgH2Q7NnjiwRdiEPZE?limit=0` ✅ | 4k | `#nim-science:envs.net` | [🔗 ](https://matrix.to/#/!IpFtPSbgfrZrVcVyti:envs.net/$bDRop4yvjFOl3HMouAyx4mtau-JaAxvBUJ-MqftAm7E?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!IpFtPSbgfrZrVcVyti:envs.net/context/$bDRop4yvjFOl3HMouAyx4mtau-JaAxvBUJ-MqftAm7E?limit=0` ✅ | 2.3k | `#nim-webdev:matrix.org` | [🔗 ](https://matrix.to/#/!EoyccMaVGwdqyfKMAL:matrix.org/$VdRyNb1F-gLdDP9nDYK7xAh_LcE822fEfDYWz5re8dE?via=matrix.org) | `https://matrix-client.matrix.org/_matrix/client/r0/rooms/!EoyccMaVGwdqyfKMAL:matrix.org/context/$VdRyNb1F-gLdDP9nDYK7xAh_LcE822fEfDYWz5re8dE?limit=0`
2022-06-29 04:37:40 -06:00
// don't care about anyway. Synapse seems to timeout at about the ~5k state
// event mark.
2022-02-24 02:27:53 -07:00
const contextEndpoint = urlJoin(
matrixServerUrl,
`_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/context/${encodeURIComponent(
eventIdForTimestamp
)}?limit=0&filter={"lazy_load_members":true}`
2022-02-24 02:27:53 -07:00
);
const contextResData = await fetchEndpointAsJson(contextEndpoint, {
accessToken,
});
// Add `filter={"lazy_load_members":true}` to only get member state events for
// the messages included in the response
2022-02-24 02:27:53 -07:00
const messagesEndpoint = urlJoin(
matrixServerUrl,
`_matrix/client/r0/rooms/${encodeURIComponent(roomId)}/messages?dir=b&from=${encodeURIComponent(
contextResData.end
)}&limit=${limit}&filter={"lazy_load_members":true}`
2022-02-24 02:27:53 -07:00
);
const messageResData = await fetchEndpointAsJson(messagesEndpoint, {
accessToken,
});
const stateEventMap = {};
for (const stateEvent of messageResData.state || []) {
if (stateEvent.type === 'm.room.member') {
stateEventMap[stateEvent.state_key] = stateEvent;
}
}
const chronologicalEvents = messageResData?.chunk?.reverse() || [];
2022-02-24 02:27:53 -07:00
return {
stateEventMap,
events: chronologicalEvents,
2022-02-24 02:27:53 -07:00
};
}
module.exports = traceFunction(fetchEventsFromTimestampBackwards);