diff --git a/server/hydrogen-render/render-hydrogen-vm-render-script-to-page-html.js b/server/hydrogen-render/render-hydrogen-vm-render-script-to-page-html.js
index ef28a87..91a6b63 100644
--- a/server/hydrogen-render/render-hydrogen-vm-render-script-to-page-html.js
+++ b/server/hydrogen-render/render-hydrogen-vm-render-script-to-page-html.js
@@ -27,11 +27,18 @@ async function renderHydrogenVmRenderScriptToPageHtml(
const serializableSpans = getSerializableSpans();
const serializedSpans = JSON.stringify(serializableSpans);
+ // We shouldn't let some pages be indexed by search engines
+ let maybeNoIndexHtml = '';
+ if (pageOptions.noIndex) {
+ maybeNoIndexHtml = ``;
+ }
+
const pageHtml = `
+ ${maybeNoIndexHtml}
${sanitizeHtml(`${pageOptions.title}`)}
${pageOptions.styles
.map((styleUrl) => ``)
diff --git a/server/lib/matrix-utils/ensure-room-joined.js b/server/lib/matrix-utils/ensure-room-joined.js
index 82440fd..78d8df0 100644
--- a/server/lib/matrix-utils/ensure-room-joined.js
+++ b/server/lib/matrix-utils/ensure-room-joined.js
@@ -6,6 +6,7 @@ const urlJoin = require('url-join');
const { fetchEndpointAsJson } = require('../fetch-endpoint');
const config = require('../config');
+const StatusError = require('../status-error');
const matrixServerUrl = config.get('matrixServerUrl');
assert(matrixServerUrl);
@@ -15,18 +16,18 @@ async function ensureRoomJoined(accessToken, roomId, viaServers = []) {
qs.append('server_name', viaServer);
});
- // TODO: Only join world_readable rooms. Perhaps we want to serve public rooms
- // where we have been invited. GET
- // /_matrix/client/v3/directory/list/room/{roomId} (Gets the visibility of a
- // given room on the server’s public room directory.)
const joinEndpoint = urlJoin(
matrixServerUrl,
`_matrix/client/r0/join/${roomId}?${qs.toString()}`
);
- await fetchEndpointAsJson(joinEndpoint, {
- method: 'POST',
- accessToken,
- });
+ try {
+ await fetchEndpointAsJson(joinEndpoint, {
+ method: 'POST',
+ accessToken,
+ });
+ } catch (err) {
+ throw new StatusError(403, `Archiver is unable to join room: ${err.message}`);
+ }
}
module.exports = ensureRoomJoined;
diff --git a/server/lib/matrix-utils/fetch-room-data.js b/server/lib/matrix-utils/fetch-room-data.js
index 05bfdb3..5fb9435 100644
--- a/server/lib/matrix-utils/fetch-room-data.js
+++ b/server/lib/matrix-utils/fetch-room-data.js
@@ -22,14 +22,33 @@ async function fetchRoomData(accessToken, roomId) {
matrixServerUrl,
`_matrix/client/r0/rooms/${roomId}/state/m.room.avatar`
);
+ const stateHistoryVisibilityEndpoint = urlJoin(
+ matrixServerUrl,
+ `_matrix/client/r0/rooms/${roomId}/state/m.room.history_visibility`
+ );
+ const stateJoinRulesEndpoint = urlJoin(
+ matrixServerUrl,
+ `_matrix/client/r0/rooms/${roomId}/state/m.room.join_rules`
+ );
- const [stateNameResDataOutcome, stateAvatarResDataOutcome] = await Promise.allSettled([
+ const [
+ stateNameResDataOutcome,
+ stateAvatarResDataOutcome,
+ stateHistoryVisibilityResDataOutcome,
+ stateJoinRulesResDataOutcome,
+ ] = await Promise.allSettled([
fetchEndpointAsJson(stateNameEndpoint, {
accessToken,
}),
fetchEndpointAsJson(stateAvatarEndpoint, {
accessToken,
}),
+ fetchEndpointAsJson(stateHistoryVisibilityEndpoint, {
+ accessToken,
+ }),
+ fetchEndpointAsJson(stateJoinRulesEndpoint, {
+ accessToken,
+ }),
]);
let name;
@@ -42,10 +61,22 @@ async function fetchRoomData(accessToken, roomId) {
avatarUrl = stateAvatarResDataOutcome.value.url;
}
+ let historyVisibility;
+ if (stateHistoryVisibilityResDataOutcome.reason === undefined) {
+ historyVisibility = stateHistoryVisibilityResDataOutcome.value.history_visibility;
+ }
+
+ let joinRule;
+ if (stateJoinRulesResDataOutcome.reason === undefined) {
+ joinRule = stateJoinRulesResDataOutcome.value.join_rule;
+ }
+
return {
id: roomId,
name,
avatarUrl,
+ historyVisibility,
+ joinRule,
};
}
diff --git a/server/lib/status-error.js b/server/lib/status-error.js
index bac4b11..3d34e39 100644
--- a/server/lib/status-error.js
+++ b/server/lib/status-error.js
@@ -9,7 +9,7 @@ function StatusError(status, inputMessage) {
message = http.STATUS_CODES[status] || http.STATUS_CODES['500'];
}
- this.message = message;
+ this.message = `${status} - ${message}`;
this.status = status;
this.name = 'StatusError';
Error.captureStackTrace(this, StatusError);
diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js
index ad27056..094d452 100644
--- a/server/routes/room-routes.js
+++ b/server/routes/room-routes.js
@@ -176,6 +176,18 @@ router.get(
),
]);
+ // Only `world_readable` or `shared` rooms that are `public` are viewable in the archive
+ const allowedToViewRoom =
+ roomData?.historyVisibility === 'world_readable' ||
+ (roomData?.historyVisibility === 'shared' && roomData?.joinRule === 'public');
+
+ if (!allowedToViewRoom) {
+ throw new StatusError(
+ 403,
+ `Only \`world_readable\` or \`shared\` rooms that are \`public\` can be viewed in the archive. ${roomData.id} has m.room.history_visiblity=${roomData?.historyVisibility} m.room.join_rules=${roomData?.joinRule}`
+ );
+ }
+
if (events.length >= archiveMessageLimit) {
throw new Error('TODO: Redirect user to smaller hour range');
}
@@ -200,6 +212,8 @@ router.get(
title: `${roomData.name} - Matrix Public Archive`,
styles: [hydrogenStylesUrl, stylesUrl],
scripts: [jsBundleUrl],
+ // We only allow search engines to index `world_readable` rooms
+ noIndex: roomData?.historyVisibility !== `world_readable`,
}
);
diff --git a/test/client-utils.js b/test/client-utils.js
index edb42cb..b13e7b3 100644
--- a/test/client-utils.js
+++ b/test/client-utils.js
@@ -73,7 +73,7 @@ async function getTestClientForHs(testMatrixServerUrl) {
}
// Create a public room to test in
-async function createTestRoom(client) {
+async function createTestRoom(client, overrideCreateOptions) {
let qs = new URLSearchParams();
if (client.applicationServiceUserIdOverride) {
qs.append('user_id', client.applicationServiceUserIdOverride);
@@ -95,6 +95,7 @@ async function createTestRoom(client) {
},
},
],
+ ...overrideCreateOptions,
},
accessToken: client.accessToken,
}
diff --git a/test/e2e-tests.js b/test/e2e-tests.js
index f824bd9..0522af1 100644
--- a/test/e2e-tests.js
+++ b/test/e2e-tests.js
@@ -506,5 +506,61 @@ describe('matrix-public-archive', () => {
it(
`will render a room with a sparse amount of messages (a few per day) with no contamination between days`
);
+
+ describe('access controls', () => {
+ it('not allowed to view private room even when the archiver user is in the room', async () => {
+ const client = await getTestClientForHs(testMatrixServerUrl1);
+ const roomId = await createTestRoom(client, {
+ preset: 'private_chat',
+ initial_state: [],
+ });
+
+ try {
+ archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
+ await fetchEndpointAsText(archiveUrl);
+ assert.fail(
+ 'We expect the request to fail with a 403 since the archive should not be able to view a private room'
+ );
+ } catch (err) {
+ assert.strictEqual(err.response.status, 403);
+ }
+ });
+
+ it('search engines allowed to index `world_readable` room', async () => {
+ const client = await getTestClientForHs(testMatrixServerUrl1);
+ const roomId = await createTestRoom(client);
+
+ archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
+ const archivePageHtml = await fetchEndpointAsText(archiveUrl);
+
+ const dom = parseHTML(archivePageHtml);
+
+ // Make sure the `` tag does NOT exist on the
+ // page telling search engines not to index it
+ assert.strictEqual(dom.document.querySelector(`meta[name="robots"]`), null);
+ });
+
+ it('search engines not allowed to index `public` room', async () => {
+ const client = await getTestClientForHs(testMatrixServerUrl1);
+ const roomId = await createTestRoom(client, {
+ // The default options for the test rooms adds a
+ // `m.room.history_visiblity` state event so we override that here so
+ // it's only a public room.
+ initial_state: [],
+ });
+
+ archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
+ const archivePageHtml = await fetchEndpointAsText(archiveUrl);
+
+ const dom = parseHTML(archivePageHtml);
+
+ // Make sure the `` tag exists on the page
+ // telling search engines not to index it
+ assert.strictEqual(
+ dom.document.querySelector(`meta[name="robots"]`)?.getAttribute('content'),
+ 'noindex, nofollow'
+ );
+ });
+ });
});
});