Only show `world_readable` or `public` rooms in the archive. Only index `world_readable` (#66)
Only show `world_readable` or `public` rooms in the archive. Only allow `world_readable` rooms to be indexed by search engines. Related to https://github.com/matrix-org/matrix-public-archive/issues/47
This commit is contained in:
parent
65a371910a
commit
32c77ecffe
|
@ -27,11 +27,18 @@ async function renderHydrogenVmRenderScriptToPageHtml(
|
||||||
const serializableSpans = getSerializableSpans();
|
const serializableSpans = getSerializableSpans();
|
||||||
const serializedSpans = JSON.stringify(serializableSpans);
|
const serializedSpans = JSON.stringify(serializableSpans);
|
||||||
|
|
||||||
|
// We shouldn't let some pages be indexed by search engines
|
||||||
|
let maybeNoIndexHtml = '';
|
||||||
|
if (pageOptions.noIndex) {
|
||||||
|
maybeNoIndexHtml = `<meta name="robots" content="noindex, nofollow" />`;
|
||||||
|
}
|
||||||
|
|
||||||
const pageHtml = `
|
const pageHtml = `
|
||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
|
${maybeNoIndexHtml}
|
||||||
${sanitizeHtml(`<title>${pageOptions.title}</title>`)}
|
${sanitizeHtml(`<title>${pageOptions.title}</title>`)}
|
||||||
${pageOptions.styles
|
${pageOptions.styles
|
||||||
.map((styleUrl) => `<link href="${styleUrl}" rel="stylesheet">`)
|
.map((styleUrl) => `<link href="${styleUrl}" rel="stylesheet">`)
|
||||||
|
|
|
@ -6,6 +6,7 @@ const urlJoin = require('url-join');
|
||||||
const { fetchEndpointAsJson } = require('../fetch-endpoint');
|
const { fetchEndpointAsJson } = require('../fetch-endpoint');
|
||||||
|
|
||||||
const config = require('../config');
|
const config = require('../config');
|
||||||
|
const StatusError = require('../status-error');
|
||||||
const matrixServerUrl = config.get('matrixServerUrl');
|
const matrixServerUrl = config.get('matrixServerUrl');
|
||||||
assert(matrixServerUrl);
|
assert(matrixServerUrl);
|
||||||
|
|
||||||
|
@ -15,18 +16,18 @@ async function ensureRoomJoined(accessToken, roomId, viaServers = []) {
|
||||||
qs.append('server_name', viaServer);
|
qs.append('server_name', viaServer);
|
||||||
});
|
});
|
||||||
|
|
||||||
// TODO: Only join world_readable rooms. Perhaps we want to serve public rooms
|
|
||||||
// where we have been invited. GET
|
|
||||||
// /_matrix/client/v3/directory/list/room/{roomId} (Gets the visibility of a
|
|
||||||
// given room on the server’s public room directory.)
|
|
||||||
const joinEndpoint = urlJoin(
|
const joinEndpoint = urlJoin(
|
||||||
matrixServerUrl,
|
matrixServerUrl,
|
||||||
`_matrix/client/r0/join/${roomId}?${qs.toString()}`
|
`_matrix/client/r0/join/${roomId}?${qs.toString()}`
|
||||||
);
|
);
|
||||||
await fetchEndpointAsJson(joinEndpoint, {
|
try {
|
||||||
method: 'POST',
|
await fetchEndpointAsJson(joinEndpoint, {
|
||||||
accessToken,
|
method: 'POST',
|
||||||
});
|
accessToken,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
throw new StatusError(403, `Archiver is unable to join room: ${err.message}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = ensureRoomJoined;
|
module.exports = ensureRoomJoined;
|
||||||
|
|
|
@ -22,14 +22,33 @@ async function fetchRoomData(accessToken, roomId) {
|
||||||
matrixServerUrl,
|
matrixServerUrl,
|
||||||
`_matrix/client/r0/rooms/${roomId}/state/m.room.avatar`
|
`_matrix/client/r0/rooms/${roomId}/state/m.room.avatar`
|
||||||
);
|
);
|
||||||
|
const stateHistoryVisibilityEndpoint = urlJoin(
|
||||||
|
matrixServerUrl,
|
||||||
|
`_matrix/client/r0/rooms/${roomId}/state/m.room.history_visibility`
|
||||||
|
);
|
||||||
|
const stateJoinRulesEndpoint = urlJoin(
|
||||||
|
matrixServerUrl,
|
||||||
|
`_matrix/client/r0/rooms/${roomId}/state/m.room.join_rules`
|
||||||
|
);
|
||||||
|
|
||||||
const [stateNameResDataOutcome, stateAvatarResDataOutcome] = await Promise.allSettled([
|
const [
|
||||||
|
stateNameResDataOutcome,
|
||||||
|
stateAvatarResDataOutcome,
|
||||||
|
stateHistoryVisibilityResDataOutcome,
|
||||||
|
stateJoinRulesResDataOutcome,
|
||||||
|
] = await Promise.allSettled([
|
||||||
fetchEndpointAsJson(stateNameEndpoint, {
|
fetchEndpointAsJson(stateNameEndpoint, {
|
||||||
accessToken,
|
accessToken,
|
||||||
}),
|
}),
|
||||||
fetchEndpointAsJson(stateAvatarEndpoint, {
|
fetchEndpointAsJson(stateAvatarEndpoint, {
|
||||||
accessToken,
|
accessToken,
|
||||||
}),
|
}),
|
||||||
|
fetchEndpointAsJson(stateHistoryVisibilityEndpoint, {
|
||||||
|
accessToken,
|
||||||
|
}),
|
||||||
|
fetchEndpointAsJson(stateJoinRulesEndpoint, {
|
||||||
|
accessToken,
|
||||||
|
}),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
let name;
|
let name;
|
||||||
|
@ -42,10 +61,22 @@ async function fetchRoomData(accessToken, roomId) {
|
||||||
avatarUrl = stateAvatarResDataOutcome.value.url;
|
avatarUrl = stateAvatarResDataOutcome.value.url;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let historyVisibility;
|
||||||
|
if (stateHistoryVisibilityResDataOutcome.reason === undefined) {
|
||||||
|
historyVisibility = stateHistoryVisibilityResDataOutcome.value.history_visibility;
|
||||||
|
}
|
||||||
|
|
||||||
|
let joinRule;
|
||||||
|
if (stateJoinRulesResDataOutcome.reason === undefined) {
|
||||||
|
joinRule = stateJoinRulesResDataOutcome.value.join_rule;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: roomId,
|
id: roomId,
|
||||||
name,
|
name,
|
||||||
avatarUrl,
|
avatarUrl,
|
||||||
|
historyVisibility,
|
||||||
|
joinRule,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ function StatusError(status, inputMessage) {
|
||||||
message = http.STATUS_CODES[status] || http.STATUS_CODES['500'];
|
message = http.STATUS_CODES[status] || http.STATUS_CODES['500'];
|
||||||
}
|
}
|
||||||
|
|
||||||
this.message = message;
|
this.message = `${status} - ${message}`;
|
||||||
this.status = status;
|
this.status = status;
|
||||||
this.name = 'StatusError';
|
this.name = 'StatusError';
|
||||||
Error.captureStackTrace(this, StatusError);
|
Error.captureStackTrace(this, StatusError);
|
||||||
|
|
|
@ -176,6 +176,18 @@ router.get(
|
||||||
),
|
),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
// Only `world_readable` or `shared` rooms that are `public` are viewable in the archive
|
||||||
|
const allowedToViewRoom =
|
||||||
|
roomData?.historyVisibility === 'world_readable' ||
|
||||||
|
(roomData?.historyVisibility === 'shared' && roomData?.joinRule === 'public');
|
||||||
|
|
||||||
|
if (!allowedToViewRoom) {
|
||||||
|
throw new StatusError(
|
||||||
|
403,
|
||||||
|
`Only \`world_readable\` or \`shared\` rooms that are \`public\` can be viewed in the archive. ${roomData.id} has m.room.history_visiblity=${roomData?.historyVisibility} m.room.join_rules=${roomData?.joinRule}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if (events.length >= archiveMessageLimit) {
|
if (events.length >= archiveMessageLimit) {
|
||||||
throw new Error('TODO: Redirect user to smaller hour range');
|
throw new Error('TODO: Redirect user to smaller hour range');
|
||||||
}
|
}
|
||||||
|
@ -200,6 +212,8 @@ router.get(
|
||||||
title: `${roomData.name} - Matrix Public Archive`,
|
title: `${roomData.name} - Matrix Public Archive`,
|
||||||
styles: [hydrogenStylesUrl, stylesUrl],
|
styles: [hydrogenStylesUrl, stylesUrl],
|
||||||
scripts: [jsBundleUrl],
|
scripts: [jsBundleUrl],
|
||||||
|
// We only allow search engines to index `world_readable` rooms
|
||||||
|
noIndex: roomData?.historyVisibility !== `world_readable`,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -73,7 +73,7 @@ async function getTestClientForHs(testMatrixServerUrl) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a public room to test in
|
// Create a public room to test in
|
||||||
async function createTestRoom(client) {
|
async function createTestRoom(client, overrideCreateOptions) {
|
||||||
let qs = new URLSearchParams();
|
let qs = new URLSearchParams();
|
||||||
if (client.applicationServiceUserIdOverride) {
|
if (client.applicationServiceUserIdOverride) {
|
||||||
qs.append('user_id', client.applicationServiceUserIdOverride);
|
qs.append('user_id', client.applicationServiceUserIdOverride);
|
||||||
|
@ -95,6 +95,7 @@ async function createTestRoom(client) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
...overrideCreateOptions,
|
||||||
},
|
},
|
||||||
accessToken: client.accessToken,
|
accessToken: client.accessToken,
|
||||||
}
|
}
|
||||||
|
|
|
@ -506,5 +506,61 @@ describe('matrix-public-archive', () => {
|
||||||
it(
|
it(
|
||||||
`will render a room with a sparse amount of messages (a few per day) with no contamination between days`
|
`will render a room with a sparse amount of messages (a few per day) with no contamination between days`
|
||||||
);
|
);
|
||||||
|
|
||||||
|
describe('access controls', () => {
|
||||||
|
it('not allowed to view private room even when the archiver user is in the room', async () => {
|
||||||
|
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||||
|
const roomId = await createTestRoom(client, {
|
||||||
|
preset: 'private_chat',
|
||||||
|
initial_state: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
|
||||||
|
await fetchEndpointAsText(archiveUrl);
|
||||||
|
assert.fail(
|
||||||
|
'We expect the request to fail with a 403 since the archive should not be able to view a private room'
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
assert.strictEqual(err.response.status, 403);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('search engines allowed to index `world_readable` room', async () => {
|
||||||
|
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||||
|
const roomId = await createTestRoom(client);
|
||||||
|
|
||||||
|
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
|
||||||
|
const archivePageHtml = await fetchEndpointAsText(archiveUrl);
|
||||||
|
|
||||||
|
const dom = parseHTML(archivePageHtml);
|
||||||
|
|
||||||
|
// Make sure the `<meta name="robots" ...>` tag does NOT exist on the
|
||||||
|
// page telling search engines not to index it
|
||||||
|
assert.strictEqual(dom.document.querySelector(`meta[name="robots"]`), null);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('search engines not allowed to index `public` room', async () => {
|
||||||
|
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||||
|
const roomId = await createTestRoom(client, {
|
||||||
|
// The default options for the test rooms adds a
|
||||||
|
// `m.room.history_visiblity` state event so we override that here so
|
||||||
|
// it's only a public room.
|
||||||
|
initial_state: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
|
||||||
|
const archivePageHtml = await fetchEndpointAsText(archiveUrl);
|
||||||
|
|
||||||
|
const dom = parseHTML(archivePageHtml);
|
||||||
|
|
||||||
|
// Make sure the `<meta name="robots" ...>` tag exists on the page
|
||||||
|
// telling search engines not to index it
|
||||||
|
assert.strictEqual(
|
||||||
|
dom.document.querySelector(`meta[name="robots"]`)?.getAttribute('content'),
|
||||||
|
'noindex, nofollow'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue