From 6c789eae6967b0e3a04cb4658b638921c7c34169 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 11 Apr 2023 15:09:44 -0500 Subject: [PATCH] Do our best to get the user to the right place and try joining `via` derived server name (#168) Split out from https://github.com/matrix-org/matrix-public-archive/pull/167 --- server/lib/matrix-utils/ensure-room-joined.js | 18 +++++- ...erver-name-from-matrix-room-id-or-alias.js | 22 +++++++ .../lib/parse-via-servers-from-user-input.js | 30 ++++++++++ server/routes/room-routes.js | 21 +++++-- ...name-from-matrix-room-id-or-alias-tests.js | 58 +++++++++++++++++++ 5 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 server/lib/matrix-utils/get-server-name-from-matrix-room-id-or-alias.js create mode 100644 server/lib/parse-via-servers-from-user-input.js create mode 100644 test/server/matrix-utils/get-server-name-from-matrix-room-id-or-alias-tests.js diff --git a/server/lib/matrix-utils/ensure-room-joined.js b/server/lib/matrix-utils/ensure-room-joined.js index e24f06d..6a57c57 100644 --- a/server/lib/matrix-utils/ensure-room-joined.js +++ b/server/lib/matrix-utils/ensure-room-joined.js @@ -4,15 +4,29 @@ const assert = require('assert'); const urlJoin = require('url-join'); const { fetchEndpointAsJson } = require('../fetch-endpoint'); +const getServerNameFromMatrixRoomIdOrAlias = require('./get-server-name-from-matrix-room-id-or-alias'); const config = require('../config'); const StatusError = require('../status-error'); const matrixServerUrl = config.get('matrixServerUrl'); assert(matrixServerUrl); -async function ensureRoomJoined(accessToken, roomIdOrAlias, viaServers = []) { +async function ensureRoomJoined(accessToken, roomIdOrAlias, viaServers = new Set()) { + // We use a `Set` to ensure that we don't have duplicate servers in the list + assert(viaServers instanceof Set); + + // Let's do our best for the user to join the room. Since room ID's are + // unroutable on their own and won't be found if the server doesn't already + // know about the room, we'll try to join the room via the server name that + // we derived from the room ID or alias. + const viaServersWithAssumptions = new Set(viaServers); + const derivedServerName = getServerNameFromMatrixRoomIdOrAlias(roomIdOrAlias); + if (derivedServerName) { + viaServersWithAssumptions.add(derivedServerName); + } + let qs = new URLSearchParams(); - [].concat(viaServers).forEach((viaServer) => { + Array.from(viaServersWithAssumptions).forEach((viaServer) => { qs.append('server_name', viaServer); }); diff --git a/server/lib/matrix-utils/get-server-name-from-matrix-room-id-or-alias.js b/server/lib/matrix-utils/get-server-name-from-matrix-room-id-or-alias.js new file mode 100644 index 0000000..1ddcf5a --- /dev/null +++ b/server/lib/matrix-utils/get-server-name-from-matrix-room-id-or-alias.js @@ -0,0 +1,22 @@ +'use strict'; + +const assert = require('assert'); + +// See https://spec.matrix.org/v1.5/appendices/#server-name +function getServerNameFromMatrixRoomIdOrAlias(roomIdOrAlias) { + assert(roomIdOrAlias); + + const pieces = roomIdOrAlias.split(':'); + // We can only derive the server name if there is a colon in the string. Since room + // IDs are supposed to be treated as opaque strings, there is a future possibility + // that they will not contain a colon. + if (pieces.length < 2) { + return null; + } + + const servername = pieces.slice(1).join(':'); + + return servername; +} + +module.exports = getServerNameFromMatrixRoomIdOrAlias; diff --git a/server/lib/parse-via-servers-from-user-input.js b/server/lib/parse-via-servers-from-user-input.js new file mode 100644 index 0000000..631c6d5 --- /dev/null +++ b/server/lib/parse-via-servers-from-user-input.js @@ -0,0 +1,30 @@ +'use strict'; + +const StatusError = require('../lib/status-error'); + +function parseViaServersFromUserInput(rawViaServers) { + // `rawViaServers` could be an array, a single string, or undefined. Turn it into an + // array no matter what + const rawViaServerList = [].concat(rawViaServers || []); + if (rawViaServerList.length === 0) { + return new Set(); + } + + const viaServerList = rawViaServerList.map((viaServer) => { + // Sanity check to ensure that the via servers are strings (valid enough looking + // host names) + if (typeof viaServer !== 'string') { + throw new StatusError( + 400, + `?via server must be a string, got ${viaServer} (${typeof viaServer})` + ); + } + + return viaServer; + }); + + // We use a `Set` to ensure that we don't have duplicate servers in the list + return new Set(viaServerList); +} + +module.exports = parseViaServersFromUserInput; diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js index 7dc457a..26faf50 100644 --- a/server/routes/room-routes.js +++ b/server/routes/room-routes.js @@ -11,6 +11,7 @@ const timeoutMiddleware = require('./timeout-middleware'); const redirectToCorrectArchiveUrlIfBadSigil = require('./redirect-to-correct-archive-url-if-bad-sigil-middleware'); const { HTTPResponseError } = require('../lib/fetch-endpoint'); +const parseViaServersFromUserInput = require('../lib/parse-via-servers-from-user-input'); const fetchRoomData = require('../lib/matrix-utils/fetch-room-data'); const fetchEventsFromTimestampBackwards = require('../lib/matrix-utils/fetch-events-from-timestamp-backwards'); const ensureRoomJoined = require('../lib/matrix-utils/ensure-room-joined'); @@ -173,7 +174,11 @@ router.get( // We have to wait for the room join to happen first before we can fetch // any of the additional room info or messages. - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, req.query.via); + const roomId = await ensureRoomJoined( + matrixAccessToken, + roomIdOrAlias, + parseViaServersFromUserInput(req.query.via) + ); // Find the closest day to the current time with messages const { originServerTs } = await timestampToEvent({ @@ -192,7 +197,7 @@ router.get( // We can avoid passing along the `via` query parameter because we already // joined the room above (see `ensureRoomJoined`). // - //viaServers: req.query.via, + //viaServers: parseViaServersFromUserInput(req.query.via), }) ); }) @@ -245,7 +250,11 @@ router.get( // We have to wait for the room join to happen first before we can use the jump to // date endpoint - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, req.query.via); + const roomId = await ensureRoomJoined( + matrixAccessToken, + roomIdOrAlias, + parseViaServersFromUserInput(req.query.via) + ); let eventIdForClosestEvent; let tsForClosestEvent; @@ -536,7 +545,11 @@ router.get( // We have to wait for the room join to happen first before we can fetch // any of the additional room info or messages. - const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, req.query.via); + const roomId = await ensureRoomJoined( + matrixAccessToken, + roomIdOrAlias, + parseViaServersFromUserInput(req.query.via) + ); // Do these in parallel to avoid the extra time in sequential round-trips // (we want to display the archive page faster) diff --git a/test/server/matrix-utils/get-server-name-from-matrix-room-id-or-alias-tests.js b/test/server/matrix-utils/get-server-name-from-matrix-room-id-or-alias-tests.js new file mode 100644 index 0000000..8eaa992 --- /dev/null +++ b/test/server/matrix-utils/get-server-name-from-matrix-room-id-or-alias-tests.js @@ -0,0 +1,58 @@ +'use strict'; + +const assert = require('assert'); + +const getServerNameFromMatrixRoomIdOrAlias = require('../../../server/lib/matrix-utils/get-server-name-from-matrix-room-id-or-alias'); + +describe('getServerNameFromMatrixRoomIdOrAlias', () => { + // Some examples from https://spec.matrix.org/v1.5/appendices/#server-name + const testCases = [ + { + name: 'can parse normal looking domain name', + input: '!foo:matrix.org', + expected: 'matrix.org', + }, + { + name: 'can parse sub-domain', + input: '!foo:archive.matrix.org', + expected: 'archive.matrix.org', + }, + { + name: 'can parse domain with port', + input: '!foo:matrix.org:8888', + expected: 'matrix.org:8888', + }, + { + name: 'can parse IPv4 address', + input: '!foo:192.168.1.1', + expected: '192.168.1.1', + }, + { + name: 'can parse IPv4 address with port', + input: '!foo:192.168.1.1:1234', + expected: '192.168.1.1:1234', + }, + { + name: 'can parse IPv6 address', + input: '!foo:[1234:5678::abcd]', + expected: '[1234:5678::abcd]', + }, + { + name: 'can parse IPv6 address with port', + input: '!foo:[1234:5678::abcd]:1234', + expected: '[1234:5678::abcd]:1234', + }, + { + name: `opaque room ID is *NOT* parsed and we can't derive a server name`, + input: '!foobarbaz', + expected: null, + }, + ]; + + testCases.forEach((testCaseMeta) => { + it(testCaseMeta.name, () => { + const actual = getServerNameFromMatrixRoomIdOrAlias(testCaseMeta.input); + assert.strictEqual(actual, testCaseMeta.expected); + }); + }); +});