From 79a468b81ffcb566474924747de62982c5108d00 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Jun 2023 20:13:42 -0500 Subject: [PATCH] Start of `retryFnIfNotJoined` --- .../create-retry-fn-if-not-joined.js | 94 +++++++++++++++++++ server/lib/matrix-utils/ensure-room-joined.js | 2 + server/lib/matrix-utils/resolve-room-alias.js | 33 +++++++ .../matrix-utils/resolve-room-id-or-alias.js | 40 ++++++++ server/routes/room-routes.js | 57 ++++++----- 5 files changed, 201 insertions(+), 25 deletions(-) create mode 100644 server/lib/matrix-utils/create-retry-fn-if-not-joined.js create mode 100644 server/lib/matrix-utils/resolve-room-alias.js create mode 100644 server/lib/matrix-utils/resolve-room-id-or-alias.js diff --git a/server/lib/matrix-utils/create-retry-fn-if-not-joined.js b/server/lib/matrix-utils/create-retry-fn-if-not-joined.js new file mode 100644 index 0000000..7820a6f --- /dev/null +++ b/server/lib/matrix-utils/create-retry-fn-if-not-joined.js @@ -0,0 +1,94 @@ +'use strict'; + +const assert = require('assert'); + +const { HTTPResponseError } = require('../fetch-endpoint'); +const ensureRoomJoined = require('./ensure-room-joined'); + +const JOIN_STATES = { + unknown: 'unknown', + joining: 'joining', + joined: 'joined', + failed: 'failed', +}; +const joinStateValues = Object.values(JOIN_STATES); + +// Optimistically use the Matrix API assuming you're already joined to the room or +// accessing a `world_readable` room that doesn't require joining. If we see a 403 +// Forbidden, then try joining the room and retrying the API call. +// +// Usage: Call this once to first create a helper utility that will retry a given +// function appropriately. +function createRetryFnIfNotJoined( + accessToken, + roomIdOrAlias, + { viaServers = new Set(), abortSignal } = {} +) { + assert(accessToken); + assert(roomIdOrAlias); + // We use a `Set` to ensure that we don't have duplicate servers in the list + assert(viaServers instanceof Set); + + let joinState = JOIN_STATES.unknown; + let joinPromise = null; + + return async function retryFnIfNotJoined(fn) { + assert( + joinStateValues.includes(joinState), + `Unexpected internal join state when using createRetryFnIfNotJoined(...) (joinState=${joinState}). ` + + `This is a bug in the Matrix Public Archive. Please report` + ); + + if (joinState === JOIN_STATES.joining) { + // Wait for the join to finish before trying + await joinPromise; + } else if (joinState === JOIN_STATES.failed) { + // If we failed to join the room, then there is no way any other call is going + // to succeed so just immediately return an error. We return `joinPromise` + // which will resolve to the join error that occured + return joinPromise; + } + + try { + return await Promise.resolve(fn()); + } catch (errFromFn) { + const isForbiddenError = + errFromFn instanceof HTTPResponseError && errFromFn.response.status === 403; + + // If we're in the middle of joining, try again + if (joinState === JOIN_STATES.joining) { + return await retryFnIfNotJoined(fn); + } + // Try joining the room if we see a 403 Forbidden error as we may just not + // be part of the room yet. We can't distinguish between a room that has + // banned us vs a room we haven't joined yet so we just try joining the + // room in any case. + else if ( + isForbiddenError && + // Only try joining if we haven't tried joining yet + joinState === JOIN_STATES.unknown + ) { + joinState = JOIN_STATES.joining; + joinPromise = ensureRoomJoined(accessToken, roomIdOrAlias, { + viaServers, + abortSignal, + }); + + try { + await joinPromise; + joinState = JOIN_STATES.joined; + console.log('retryAfterJoin'); + return await retryFnIfNotJoined(fn); + } catch (err) { + console.log('FAILED retryAfterJoin'); + joinState = JOIN_STATES.failed; + throw err; + } + } + + throw errFromFn; + } + }; +} + +module.exports = createRetryFnIfNotJoined; diff --git a/server/lib/matrix-utils/ensure-room-joined.js b/server/lib/matrix-utils/ensure-room-joined.js index f192d76..02474c9 100644 --- a/server/lib/matrix-utils/ensure-room-joined.js +++ b/server/lib/matrix-utils/ensure-room-joined.js @@ -21,6 +21,8 @@ async function ensureRoomJoined( roomIdOrAlias, { viaServers = new Set(), abortSignal } = {} ) { + assert(accessToken); + assert(roomIdOrAlias); // We use a `Set` to ensure that we don't have duplicate servers in the list assert(viaServers instanceof Set); diff --git a/server/lib/matrix-utils/resolve-room-alias.js b/server/lib/matrix-utils/resolve-room-alias.js new file mode 100644 index 0000000..d5d45bb --- /dev/null +++ b/server/lib/matrix-utils/resolve-room-alias.js @@ -0,0 +1,33 @@ +'use strict'; + +const assert = require('assert'); +const urlJoin = require('url-join'); + +const { fetchEndpointAsJson } = require('../fetch-endpoint'); +const { traceFunction } = require('../../tracing/trace-utilities'); + +const config = require('../config'); +const matrixServerUrl = config.get('matrixServerUrl'); +assert(matrixServerUrl); + +async function resolveRoomAlias({ accessToken, roomAlias, abortSignal }) { + assert(accessToken); + assert(roomAlias); + + // GET /_matrix/client/r0/directory/room/{roomAlias} -> { room_id, servers } + const resolveRoomAliasEndpoint = urlJoin( + matrixServerUrl, + `_matrix/client/r0/directory/room/${encodeURIComponent(roomAlias)}` + ); + const { data: resolveRoomAliasResData } = await fetchEndpointAsJson(resolveRoomAliasEndpoint, { + accessToken, + abortSignal, + }); + + return { + roomId: resolveRoomAliasResData.room_id, + viaServers: new Set(resolveRoomAliasResData.servers || []), + }; +} + +module.exports = traceFunction(resolveRoomAlias); diff --git a/server/lib/matrix-utils/resolve-room-id-or-alias.js b/server/lib/matrix-utils/resolve-room-id-or-alias.js new file mode 100644 index 0000000..ffece54 --- /dev/null +++ b/server/lib/matrix-utils/resolve-room-id-or-alias.js @@ -0,0 +1,40 @@ +'use strict'; + +const { + VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP, +} = require('matrix-public-archive-shared/lib/reference-values'); +const resolveRoomAlias = require('./resolve-room-alias'); + +// Given a room ID or alias, return the room ID and the set of servers we should try to +// join from. Does not attempt to join the room. +async function resolveRoomIdOrAlias({ + accessToken, + roomIdOrAlias, + viaServers = new Set(), + abortSignal, +} = {}) { + const isRoomId = roomIdOrAlias.startsWith(VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP.roomid); + const isRoomAlias = roomIdOrAlias.startsWith(VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP.r); + + if (isRoomId) { + const roomId = roomIdOrAlias; + return { roomId, viaServers }; + } else if (isRoomAlias) { + const roomAlias = roomIdOrAlias; + + const { roomId, viaServers: moreViaServers } = await resolveRoomAlias({ + accessToken, + roomAlias, + abortSignal: abortSignal, + }); + return { roomId, viaServers: new Set([...viaServers, ...moreViaServers]) }; + } + + throw new Error( + `resolveRoomIdOrAlias: Unknown roomIdOrAlias=${roomIdOrAlias} does not start with valid sigil (${Object.values( + VALID_ENTITY_DESCRIPTOR_TO_SIGIL_MAP + )})` + ); +} + +module.exports = resolveRoomIdOrAlias; diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js index c58dd7e..7119724 100644 --- a/server/routes/room-routes.js +++ b/server/routes/room-routes.js @@ -19,6 +19,8 @@ const { } = require('../lib/matrix-utils/fetch-room-data'); const fetchEventsFromTimestampBackwards = require('../lib/matrix-utils/fetch-events-from-timestamp-backwards'); const ensureRoomJoined = require('../lib/matrix-utils/ensure-room-joined'); +const createRetryFnIfNotJoined = require('../lib/matrix-utils/create-retry-fn-if-not-joined'); +const resolveRoomIdOrAlias = require('../lib/matrix-utils/resolve-room-id-or-alias'); const timestampToEvent = require('../lib/matrix-utils/timestamp-to-event'); const { removeMe_fetchRoomCreateEventId } = require('../lib/matrix-utils/fetch-room-data'); const getMessagesResponseFromEventId = require('../lib/matrix-utils/get-messages-response-from-event-id'); @@ -788,17 +790,18 @@ router.get( ); } - // We have to wait for the room join to happen first before we can fetch - // any of the additional room info or messages. - // - // XXX: It would be better if we just tried fetching first and assume that we are - // already joined and only join after we see a 403 Forbidden error (we should do - // this for all places we `ensureRoomJoined`). But we need the `roomId` for use with - // the various Matrix API's anyway and `/join/{roomIdOrAlias}` -> `{ room_id }` is a - // great way to get it (see - // https://github.com/matrix-org/matrix-public-archive/issues/50). - const viaServers = parseViaServersFromUserInput(req.query.via); - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, { + // Resolve the room ID without joining the room (optimistically assume that we're + // already joined) + let viaServers = parseViaServersFromUserInput(req.query.via); + let roomId; + ({ roomId, viaServers } = await resolveRoomIdOrAlias({ + accessToken: matrixAccessToken, + roomIdOrAlias, + viaServers, + abortSignal: req.abortSignal, + })); + + const retryFnIfNotJoined = createRetryFnIfNotJoined(matrixAccessToken, roomIdOrAlias, { viaServers, abortSignal: req.abortSignal, }); @@ -806,26 +809,30 @@ router.get( // Do these in parallel to avoid the extra time in sequential round-trips // (we want to display the archive page faster) const [roomData, { events, stateEventMap }] = await Promise.all([ - fetchRoomData(matrixAccessToken, roomId, { abortSignal: req.abortSignal }), + retryFnIfNotJoined(() => + fetchRoomData(matrixAccessToken, roomId, { abortSignal: req.abortSignal }) + ), // We over-fetch messages outside of the range of the given day so that we // can display messages from surrounding days (currently only from days // before) so that the quiet rooms don't feel as desolate and broken. // // When given a bare date like `2022/11/16`, we want to paginate from the end of that // day backwards. This is why we use the `toTimestamp` here and fetch backwards. - fetchEventsFromTimestampBackwards({ - accessToken: matrixAccessToken, - roomId, - ts: toTimestamp, - // We fetch one more than the `archiveMessageLimit` so that we can see if there - // are too many messages from the given day. If we have over the - // `archiveMessageLimit` number of messages fetching from the given day, it's - // acceptable to have them be from surrounding days. But if all 500 messages - // (for example) are from the same day, let's redirect to a smaller hour range - // to display. - limit: archiveMessageLimit + 1, - abortSignal: req.abortSignal, - }), + retryFnIfNotJoined(() => + fetchEventsFromTimestampBackwards({ + accessToken: matrixAccessToken, + roomId, + ts: toTimestamp, + // We fetch one more than the `archiveMessageLimit` so that we can see if there + // are too many messages from the given day. If we have over the + // `archiveMessageLimit` number of messages fetching from the given day, it's + // acceptable to have them be from surrounding days. But if all 500 messages + // (for example) are from the same day, let's redirect to a smaller hour range + // to display. + limit: archiveMessageLimit + 1, + abortSignal: req.abortSignal, + }) + ), ]); // Only `world_readable` or `shared` rooms that are `public` are viewable in the archive