This commit is contained in:
commit
7879837f25
10
CHANGELOG.md
10
CHANGELOG.md
|
@ -2,12 +2,22 @@
|
|||
|
||||
- Prevent Cloudflare from overriding our own 504 timeout page, https://github.com/matrix-org/matrix-public-archive/pull/228
|
||||
- Catch NSFW rooms with underscores, https://github.com/matrix-org/matrix-public-archive/pull/231
|
||||
- Fix `18+` false positives with NSFW check, https://github.com/matrix-org/matrix-public-archive/pull/279
|
||||
- Fix room cards sorting in the wrong direction on Firefox, https://github.com/matrix-org/matrix-public-archive/pull/261
|
||||
- Remove `libera.chat` as a default since their rooms are not accessible in the archive, https://github.com/matrix-org/matrix-public-archive/pull/263
|
||||
- Add reason why the archive bot is joining the room, https://github.com/matrix-org/matrix-public-archive/pull/262
|
||||
- Add `/faq` redirect, https://github.com/matrix-org/matrix-public-archive/pull/265
|
||||
- Use `rel=canonical` link to de-duplicate event permalinks, https://github.com/matrix-org/matrix-public-archive/pull/266, https://github.com/matrix-org/matrix-public-archive/pull/269
|
||||
- Prevent join event spam with stable `reason`, https://github.com/matrix-org/matrix-public-archive/pull/268
|
||||
- Don't allow previewing `shared` history rooms, https://github.com/matrix-org/matrix-public-archive/pull/239
|
||||
- Contributed by [@tulir](https://github.com/tulir)
|
||||
- Update FAQ to explain `world_readable` only, https://github.com/matrix-org/matrix-public-archive/pull/277
|
||||
- Indicate when the room was set to `world_readable` and by who, https://github.com/matrix-org/matrix-public-archive/pull/278
|
||||
- Only show `world_readable` rooms in the room directory, https://github.com/matrix-org/matrix-public-archive/pull/276
|
||||
|
||||
Developer facing:
|
||||
|
||||
- Fix eslint trying to look at `node_modules/`, https://github.com/matrix-org/matrix-public-archive/pull/275
|
||||
|
||||
# 0.1.0 - 2023-05-11
|
||||
|
||||
|
|
34
docs/faq.md
34
docs/faq.md
|
@ -19,40 +19,22 @@ messages from any given date and day-by-day navigation.
|
|||
|
||||
## Why did the archive bot join my room?
|
||||
|
||||
Only public Matrix rooms with `shared` or `world_readable` [history
|
||||
visibility](https://spec.matrix.org/latest/client-server-api/#room-history-visibility) are
|
||||
accessible in the Matrix Public Archive. In some clients like Element, the `shared`
|
||||
option equates to "Members only (since the point in time of selecting this option)" and
|
||||
`world_readable` to "Anyone" under the **room settings** -> **Security & Privacy** ->
|
||||
**Who can read history?**.
|
||||
Only Matrix rooms with `world_readable` [history
|
||||
visibility](https://spec.matrix.org/latest/client-server-api/#room-history-visibility)
|
||||
are accessible in the Matrix Public Archive and indexed by search engines.
|
||||
|
||||
But the archive bot (`@archive:matrix.org`) will join any public room because it doesn't
|
||||
know the history visibility without first joining. Any room without `world_readable` or
|
||||
`shared` history visibility will lead a `403 Forbidden`. And if the public room is in
|
||||
the room directory, it will be listed in the archive but will still lead to a `403
|
||||
Forbidden` in that case.
|
||||
know the history visibility without first joining. Any room that doesn't have
|
||||
`world_readable` history visibility will lead a `403 Forbidden`.
|
||||
|
||||
The Matrix Public Archive doesn't hold onto any data (it's
|
||||
stateless) and requests the messages from the homeserver every time. The
|
||||
[matrix-archive.evulid.cc](https://matrix-archive.evulid.cc/) instance has some caching in place, 5
|
||||
minutes for the current day, and 2 days for past content.
|
||||
|
||||
The Matrix Public Archive only allows rooms with `world_readable` history visibility to
|
||||
be indexed by search engines. See the [opt
|
||||
out](#how-do-i-opt-out-and-keep-my-room-from-being-indexed-by-search-engines) topic
|
||||
below for more details.
|
||||
|
||||
### Why does the archive user join rooms instead of browsing them as a guest?
|
||||
|
||||
Guests require `m.room.guest_access` to access a room. Most public rooms do not allow
|
||||
guests because even the `public_chat` preset when creating a room does not allow guest
|
||||
access. Not being able to view most public rooms is the major blocker on being able to
|
||||
use guest access. The idea is if I can view the messages from a Matrix client as a
|
||||
random user, I should also be able to see the messages in the archive.
|
||||
|
||||
Guest access is also a much different ask than read-only access since guests can also
|
||||
send messages in the room which isn't always desirable. The archive bot is read-only and
|
||||
does not send messages.
|
||||
See the [opt out
|
||||
section](#how-do-i-opt-out-and-keep-my-room-from-being-indexed-by-search-engines) below
|
||||
for more details.
|
||||
|
||||
## Technical details
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
"url": "https://github.com/matrix-org/matrix-public-archive"
|
||||
},
|
||||
"scripts": {
|
||||
"lint": "eslint **/*.js",
|
||||
"lint": "eslint \"**/*.js\"",
|
||||
"build": "node ./build-scripts/do-client-build.js",
|
||||
"start": "node server/server.js",
|
||||
"start-dev": "node server/start-dev.js",
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
|
||||
const urlJoin = require('url-join');
|
||||
const { DIRECTION } = require('matrix-public-archive-shared/lib/reference-values');
|
||||
const { fetchEndpointAsJson } = require('../fetch-endpoint');
|
||||
const { traceFunction } = require('../../tracing/trace-utilities');
|
||||
|
||||
const config = require('../config');
|
||||
const matrixServerUrl = config.get('matrixServerUrl');
|
||||
assert(matrixServerUrl);
|
||||
|
||||
// The number of requests we should make to try to fill the limit before bailing out
|
||||
const NUM_MAX_REQUESTS = 10;
|
||||
|
||||
async function requestPublicRooms(
|
||||
accessToken,
|
||||
{ server, searchTerm, paginationToken, limit, abortSignal } = {}
|
||||
) {
|
||||
let qs = new URLSearchParams();
|
||||
if (server) {
|
||||
qs.append('server', server);
|
||||
}
|
||||
|
||||
const publicRoomsEndpoint = urlJoin(
|
||||
matrixServerUrl,
|
||||
`_matrix/client/v3/publicRooms?${qs.toString()}`
|
||||
);
|
||||
|
||||
const { data: publicRoomsRes } = await fetchEndpointAsJson(publicRoomsEndpoint, {
|
||||
method: 'POST',
|
||||
body: {
|
||||
include_all_networks: true,
|
||||
filter: {
|
||||
generic_search_term: searchTerm,
|
||||
},
|
||||
since: paginationToken,
|
||||
limit,
|
||||
},
|
||||
accessToken,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
return publicRoomsRes;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line complexity, max-statements
|
||||
async function fetchAccessibleRooms(
|
||||
accessToken,
|
||||
{
|
||||
server,
|
||||
searchTerm,
|
||||
// Direction is baked into the pagination token but we're unable to decipher it from
|
||||
// the opaque token, we also have to pass it in explicitly.
|
||||
paginationToken,
|
||||
direction = DIRECTION.forward,
|
||||
limit,
|
||||
abortSignal,
|
||||
} = {}
|
||||
) {
|
||||
assert(accessToken);
|
||||
assert([DIRECTION.forward, DIRECTION.backward].includes(direction), 'direction must be [f|b]');
|
||||
|
||||
// Based off of the matrix.org room directory, only 42% of rooms are world_readable,
|
||||
// which means our best bet to fill up the results to the limit is to request at least
|
||||
// 2.4 times as many. I've doubled and rounded it up to 5 times as many so we can have
|
||||
// less round-trips.
|
||||
const bulkPaginationLimit = Math.ceil(5 * limit);
|
||||
|
||||
let accessibleRooms = [];
|
||||
|
||||
let firstResponse;
|
||||
let lastResponse;
|
||||
|
||||
let loopToken = paginationToken;
|
||||
let lastLoopToken;
|
||||
let continuationIndex;
|
||||
let currentRequestCount = 0;
|
||||
while (
|
||||
// Stop if we have reached the limit of rooms we want to fetch
|
||||
accessibleRooms.length < limit &&
|
||||
// And bail if we're already gone through a bunch of pages to try to fill the limit
|
||||
currentRequestCount < NUM_MAX_REQUESTS &&
|
||||
// And bail if we've reached the end of the pagination
|
||||
// Always do the first request
|
||||
(currentRequestCount === 0 ||
|
||||
// If we have a next token, we can do another request
|
||||
(currentRequestCount > 0 && loopToken))
|
||||
) {
|
||||
const publicRoomsRes = await requestPublicRooms(accessToken, {
|
||||
server,
|
||||
searchTerm,
|
||||
paginationToken: loopToken,
|
||||
limit: bulkPaginationLimit,
|
||||
abortSignal,
|
||||
});
|
||||
lastLoopToken = loopToken;
|
||||
lastResponse = publicRoomsRes;
|
||||
|
||||
if (currentRequestCount === 0) {
|
||||
firstResponse = publicRoomsRes;
|
||||
}
|
||||
|
||||
// Get the token ready for the next loop
|
||||
loopToken =
|
||||
direction === DIRECTION.forward ? publicRoomsRes.next_batch : publicRoomsRes.prev_batch;
|
||||
|
||||
const fetchedRooms = publicRoomsRes.chunk;
|
||||
const fetchedRoomsInDirection =
|
||||
direction === DIRECTION.forward ? fetchedRooms : fetchedRooms.reverse();
|
||||
|
||||
// We only want to see world_readable rooms in the archive
|
||||
let index = 0;
|
||||
for (let room of fetchedRoomsInDirection) {
|
||||
if (room.world_readable || room.shared) {
|
||||
if (direction === DIRECTION.forward) {
|
||||
accessibleRooms.push(room);
|
||||
} else if (direction === DIRECTION.backward) {
|
||||
accessibleRooms.unshift(room);
|
||||
} else {
|
||||
throw new Error(`Invalid direction: ${direction}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (accessibleRooms.length === limit && !continuationIndex) {
|
||||
continuationIndex = index;
|
||||
}
|
||||
|
||||
// Stop after we've reached the limit
|
||||
if (accessibleRooms.length >= limit) {
|
||||
break;
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
|
||||
currentRequestCount += 1;
|
||||
}
|
||||
|
||||
// Back-track to get the perfect continuation point and show exactly the limit of
|
||||
// rooms in the grid.
|
||||
//
|
||||
// Alternatively, we could just not worry about and show more than the limit of rooms
|
||||
//
|
||||
// XXX: Since the room directory order is not stable, this is slightly flawed as the
|
||||
// results could have shifted slightly from when we made the last request to now but
|
||||
// we assume it's good enough.
|
||||
let nextPaginationToken;
|
||||
let prevPaginationToken;
|
||||
if (continuationIndex) {
|
||||
const publicRoomsRes = await requestPublicRooms(accessToken, {
|
||||
server,
|
||||
searchTerm,
|
||||
// Start from the last request
|
||||
paginationToken: lastLoopToken,
|
||||
// Then only go out as far out as the continuation index (the point when we filled
|
||||
// the limit)
|
||||
limit: continuationIndex + 1,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
if (direction === DIRECTION.forward) {
|
||||
prevPaginationToken = firstResponse.prev_batch;
|
||||
nextPaginationToken = publicRoomsRes.next_batch;
|
||||
} else if (direction === DIRECTION.backward) {
|
||||
prevPaginationToken = publicRoomsRes.prev_batch;
|
||||
nextPaginationToken = firstResponse.next_batch;
|
||||
} else {
|
||||
throw new Error(`Invalid direction: ${direction}`);
|
||||
}
|
||||
} else {
|
||||
if (direction === DIRECTION.forward) {
|
||||
prevPaginationToken = firstResponse.prev_batch;
|
||||
nextPaginationToken = lastResponse.next_batch;
|
||||
} else if (direction === DIRECTION.backward) {
|
||||
prevPaginationToken = lastResponse.prev_batch;
|
||||
nextPaginationToken = firstResponse.next_batch;
|
||||
} else {
|
||||
throw new Error(`Invalid direction: ${direction}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
rooms: accessibleRooms,
|
||||
prevPaginationToken,
|
||||
nextPaginationToken,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = traceFunction(fetchAccessibleRooms);
|
|
@ -1,57 +0,0 @@
|
|||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
|
||||
const urlJoin = require('url-join');
|
||||
const { fetchEndpointAsJson } = require('../fetch-endpoint');
|
||||
const { traceFunction } = require('../../tracing/trace-utilities');
|
||||
|
||||
const config = require('../config');
|
||||
const matrixServerUrl = config.get('matrixServerUrl');
|
||||
assert(matrixServerUrl);
|
||||
|
||||
async function fetchPublicRooms(
|
||||
accessToken,
|
||||
{ server, searchTerm, paginationToken, limit, abortSignal } = {}
|
||||
) {
|
||||
assert(accessToken);
|
||||
|
||||
let qs = new URLSearchParams();
|
||||
if (server) {
|
||||
qs.append('server', server);
|
||||
}
|
||||
|
||||
const publicRoomsEndpoint = urlJoin(
|
||||
matrixServerUrl,
|
||||
`_matrix/client/v3/publicRooms?${qs.toString()}`
|
||||
);
|
||||
|
||||
const { data: publicRoomsRes } = await fetchEndpointAsJson(publicRoomsEndpoint, {
|
||||
method: 'POST',
|
||||
body: {
|
||||
include_all_networks: true,
|
||||
filter: {
|
||||
generic_search_term: searchTerm,
|
||||
},
|
||||
since: paginationToken,
|
||||
limit,
|
||||
},
|
||||
accessToken,
|
||||
abortSignal,
|
||||
});
|
||||
|
||||
// We only want to see public rooms in the archive
|
||||
const accessibleRooms = publicRoomsRes.chunk.filter((room) => {
|
||||
// `room.world_readable` is also accessible here but we only use history
|
||||
// `world_readable` to determine search indexing.
|
||||
return room.join_rule === 'public';
|
||||
});
|
||||
|
||||
return {
|
||||
rooms: accessibleRooms,
|
||||
nextPaginationToken: publicRoomsRes.next_batch,
|
||||
prevPaginationToken: publicRoomsRes.prev_batch,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = traceFunction(fetchPublicRooms);
|
|
@ -155,7 +155,6 @@ const fetchRoomData = traceFunction(async function (
|
|||
stateCanonicalAliasResDataOutcome,
|
||||
stateAvatarResDataOutcome,
|
||||
stateHistoryVisibilityResDataOutcome,
|
||||
stateJoinRulesResDataOutcome,
|
||||
predecessorInfoOutcome,
|
||||
successorInfoOutcome,
|
||||
] = await Promise.allSettled([
|
||||
|
@ -182,10 +181,6 @@ const fetchRoomData = traceFunction(async function (
|
|||
abortSignal,
|
||||
}
|
||||
),
|
||||
fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.join_rules'), {
|
||||
accessToken: matrixAccessToken,
|
||||
abortSignal,
|
||||
}),
|
||||
fetchPredecessorInfo(matrixAccessToken, roomId, { abortSignal }),
|
||||
fetchSuccessorInfo(matrixAccessToken, roomId, { abortSignal }),
|
||||
]);
|
||||
|
@ -215,15 +210,15 @@ const fetchRoomData = traceFunction(async function (
|
|||
}
|
||||
|
||||
let historyVisibility;
|
||||
let historyVisibilityEventMeta;
|
||||
if (stateHistoryVisibilityResDataOutcome.reason === undefined) {
|
||||
const { data } = stateHistoryVisibilityResDataOutcome.value;
|
||||
historyVisibility = data?.content?.history_visibility;
|
||||
}
|
||||
|
||||
let joinRule;
|
||||
if (stateJoinRulesResDataOutcome.reason === undefined) {
|
||||
const { data } = stateJoinRulesResDataOutcome.value;
|
||||
joinRule = data?.content?.join_rule;
|
||||
historyVisibilityEventMeta = {
|
||||
historyVisibility,
|
||||
sender: data?.sender,
|
||||
originServerTs: data?.origin_server_ts,
|
||||
};
|
||||
}
|
||||
|
||||
let roomCreationTs;
|
||||
|
@ -251,7 +246,7 @@ const fetchRoomData = traceFunction(async function (
|
|||
canonicalAlias,
|
||||
avatarUrl,
|
||||
historyVisibility,
|
||||
joinRule,
|
||||
historyVisibilityEventMeta,
|
||||
roomCreationTs,
|
||||
predecessorRoomId,
|
||||
predecessorLastKnownEventId,
|
||||
|
|
|
@ -6,10 +6,11 @@ const urlJoin = require('url-join');
|
|||
const express = require('express');
|
||||
const asyncHandler = require('../lib/express-async-handler');
|
||||
|
||||
const { DIRECTION } = require('matrix-public-archive-shared/lib/reference-values');
|
||||
const RouteTimeoutAbortError = require('../lib/errors/route-timeout-abort-error');
|
||||
const UserClosedConnectionAbortError = require('../lib/errors/user-closed-connection-abort-error');
|
||||
const identifyRoute = require('../middleware/identify-route-middleware');
|
||||
const fetchPublicRooms = require('../lib/matrix-utils/fetch-public-rooms');
|
||||
const fetchAccessibleRooms = require('../lib/matrix-utils/fetch-accessible-rooms');
|
||||
const renderHydrogenVmRenderScriptToPageHtml = require('../hydrogen-render/render-hydrogen-vm-render-script-to-page-html');
|
||||
const setHeadersToPreloadAssets = require('../lib/set-headers-to-preload-assets');
|
||||
|
||||
|
@ -22,7 +23,6 @@ const matrixServerName = config.get('matrixServerName');
|
|||
assert(matrixServerName);
|
||||
const matrixAccessToken = config.get('matrixAccessToken');
|
||||
assert(matrixAccessToken);
|
||||
const stopSearchEngineIndexing = config.get('stopSearchEngineIndexing');
|
||||
|
||||
const router = express.Router({
|
||||
caseSensitive: true,
|
||||
|
@ -34,9 +34,19 @@ router.get(
|
|||
'/',
|
||||
identifyRoute('app-room-directory-index'),
|
||||
asyncHandler(async function (req, res) {
|
||||
const paginationToken = req.query.page;
|
||||
const searchTerm = req.query.search;
|
||||
const homeserver = req.query.homeserver;
|
||||
const paginationToken = req.query.page;
|
||||
const direction = req.query.dir;
|
||||
|
||||
// You must provide both `paginationToken` and `direction` if either is defined
|
||||
if (paginationToken || direction) {
|
||||
assert(
|
||||
[DIRECTION.forward, DIRECTION.backward].includes(direction),
|
||||
'?dir query parameter must be [f|b]'
|
||||
);
|
||||
assert(paginationToken, '?page query parameter must be defined if ?dir is defined');
|
||||
}
|
||||
|
||||
// It would be good to grab more rooms than we display in case we need
|
||||
// to filter any out but then the pagination tokens with the homeserver
|
||||
|
@ -49,12 +59,13 @@ router.get(
|
|||
let prevPaginationToken;
|
||||
let roomFetchError;
|
||||
try {
|
||||
({ rooms, nextPaginationToken, prevPaginationToken } = await fetchPublicRooms(
|
||||
({ rooms, nextPaginationToken, prevPaginationToken } = await fetchAccessibleRooms(
|
||||
matrixAccessToken,
|
||||
{
|
||||
server: homeserver,
|
||||
searchTerm,
|
||||
paginationToken,
|
||||
direction,
|
||||
limit,
|
||||
abortSignal: req.abortSignal,
|
||||
}
|
||||
|
@ -71,7 +82,8 @@ router.get(
|
|||
}
|
||||
|
||||
// We index the room directory unless the config says we shouldn't index anything
|
||||
const shouldIndex = !stopSearchEngineIndexing;
|
||||
const stopSearchEngineIndexingFromConfig = config.get('stopSearchEngineIndexing');
|
||||
const shouldIndex = !stopSearchEngineIndexingFromConfig;
|
||||
|
||||
const pageOptions = {
|
||||
title: `Matrix Public Archive`,
|
||||
|
|
|
@ -57,7 +57,6 @@ const matrixServerUrl = config.get('matrixServerUrl');
|
|||
assert(matrixServerUrl);
|
||||
const matrixAccessToken = config.get('matrixAccessToken');
|
||||
assert(matrixAccessToken);
|
||||
const stopSearchEngineIndexing = config.get('stopSearchEngineIndexing');
|
||||
|
||||
const matrixPublicArchiveURLCreator = new MatrixPublicArchiveURLCreator(basePath);
|
||||
|
||||
|
@ -828,15 +827,16 @@ router.get(
|
|||
}),
|
||||
]);
|
||||
|
||||
// Only `world_readable` or `shared` rooms that are `public` are viewable in the archive
|
||||
const allowedToViewRoom =
|
||||
roomData.historyVisibility === 'world_readable' ||
|
||||
(roomData.historyVisibility === 'shared' && roomData.joinRule === 'public');
|
||||
// Only `world_readable` rooms are viewable in the archive
|
||||
const allowedToViewRoom = true;
|
||||
|
||||
if (!allowedToViewRoom) {
|
||||
throw new StatusError(
|
||||
403,
|
||||
`Only \`world_readable\` or \`shared\` rooms that are \`public\` can be viewed in the archive. ${roomData.id} has m.room.history_visiblity=${roomData.historyVisibility} m.room.join_rules=${roomData.joinRule}`
|
||||
`Only \`world_readable\` rooms can be viewed in the archive. ` +
|
||||
`${roomData.id} has m.room.history_visiblity=${roomData.historyVisibility} ` +
|
||||
`(set by ${roomData.historyVisibilityEventMeta?.sender} on ` +
|
||||
`${new Date(roomData.historyVisibilityEventMeta?.originServerTs).toISOString()})`
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -891,7 +891,8 @@ router.get(
|
|||
|
||||
// Default to no indexing (safe default)
|
||||
let shouldIndex = false;
|
||||
if (stopSearchEngineIndexing) {
|
||||
const stopSearchEngineIndexingFromConfig = config.get('stopSearchEngineIndexing');
|
||||
if (stopSearchEngineIndexingFromConfig) {
|
||||
shouldIndex = false;
|
||||
} else {
|
||||
// Otherwise we only allow search engines to index `world_readable` rooms
|
||||
|
|
|
@ -118,6 +118,7 @@ async function mountHydrogen() {
|
|||
events,
|
||||
stateEventMap,
|
||||
shouldIndex,
|
||||
historyVisibilityEventMeta: roomData.historyVisibilityEventMeta,
|
||||
basePath: config.basePath,
|
||||
});
|
||||
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
'use strict';
|
||||
|
||||
const escapeStringRegexp = require('escape-string-regexp');
|
||||
|
||||
const NSFW_WORDS = ['nsfw', 'porn', 'nudes', 'sex', '18+'];
|
||||
const NSFW_REGEXES = NSFW_WORDS.map((word) => new RegExp(`(\\b|_)${word}(\\b|_)`, 'i'));
|
||||
const NSFW_REGEXES = NSFW_WORDS.map(
|
||||
// We use `(\b|_|-|\s|^)` instead of just `(\b|_)` because the word boundary doesn't
|
||||
// match next to the `+` sign in `18+`
|
||||
(word) => new RegExp(`(\\b|_|-|\\s|^)${escapeStringRegexp(word)}(\\b|_|-|\\s|$)`, 'i')
|
||||
);
|
||||
|
||||
// A very basic check for NSFW content that just looks for some keywords in the given
|
||||
// text
|
||||
|
|
|
@ -3,7 +3,10 @@
|
|||
const urlJoin = require('url-join');
|
||||
|
||||
const assert = require('matrix-public-archive-shared/lib/assert');
|
||||
const { TIME_PRECISION_VALUES } = require('matrix-public-archive-shared/lib/reference-values');
|
||||
const {
|
||||
DIRECTION,
|
||||
TIME_PRECISION_VALUES,
|
||||
} = require('matrix-public-archive-shared/lib/reference-values');
|
||||
|
||||
function qsToUrlPiece(qs) {
|
||||
if (qs.toString()) {
|
||||
|
@ -25,7 +28,16 @@ class URLCreator {
|
|||
return `https://matrix.to/#/${roomIdOrAlias}`;
|
||||
}
|
||||
|
||||
roomDirectoryUrl({ searchTerm, homeserver, paginationToken } = {}) {
|
||||
roomDirectoryUrl({ searchTerm, homeserver, paginationToken, direction } = {}) {
|
||||
// You must provide both `paginationToken` and `direction` if either is defined
|
||||
if (paginationToken || direction) {
|
||||
assert(
|
||||
[DIRECTION.forward, DIRECTION.backward].includes(direction),
|
||||
'direction must be [f|b]'
|
||||
);
|
||||
assert(paginationToken);
|
||||
}
|
||||
|
||||
let qs = new URLSearchParams();
|
||||
if (searchTerm) {
|
||||
qs.append('search', searchTerm);
|
||||
|
@ -36,6 +48,9 @@ class URLCreator {
|
|||
if (paginationToken) {
|
||||
qs.append('page', paginationToken);
|
||||
}
|
||||
if (direction) {
|
||||
qs.append('dir', direction);
|
||||
}
|
||||
|
||||
return `${this._basePath}${qsToUrlPiece(qs)}`;
|
||||
}
|
||||
|
|
|
@ -75,6 +75,7 @@ class ArchiveRoomViewModel extends ViewModel {
|
|||
events,
|
||||
stateEventMap,
|
||||
shouldIndex,
|
||||
historyVisibilityEventMeta,
|
||||
basePath,
|
||||
} = options;
|
||||
assert(homeserverUrl);
|
||||
|
@ -85,6 +86,9 @@ class ArchiveRoomViewModel extends ViewModel {
|
|||
assert(events);
|
||||
assert(stateEventMap);
|
||||
assert(shouldIndex !== undefined);
|
||||
assert(historyVisibilityEventMeta.historyVisibility);
|
||||
assert(historyVisibilityEventMeta.sender);
|
||||
assert(historyVisibilityEventMeta.originServerTs);
|
||||
assert(events);
|
||||
|
||||
this._room = room;
|
||||
|
@ -213,6 +217,7 @@ class ArchiveRoomViewModel extends ViewModel {
|
|||
shouldShowTimeSelector,
|
||||
timeSelectorViewModel: this._timeSelectorViewModel,
|
||||
shouldIndex,
|
||||
historyVisibilityEventMeta,
|
||||
get developerOptionsUrl() {
|
||||
return urlRouter.urlForSegments([
|
||||
navigation.segment('room', room.id),
|
||||
|
|
|
@ -9,6 +9,7 @@ const ModalViewModel = require('matrix-public-archive-shared/viewmodels/ModalVie
|
|||
const HomeserverSelectionModalContentViewModel = require('matrix-public-archive-shared/viewmodels/HomeserverSelectionModalContentViewModel');
|
||||
const RoomCardViewModel = require('matrix-public-archive-shared/viewmodels/RoomCardViewModel');
|
||||
const checkTextForNsfw = require('matrix-public-archive-shared/lib/check-text-for-nsfw');
|
||||
const { DIRECTION } = require('../lib/reference-values');
|
||||
|
||||
const DEFAULT_SERVER_LIST = ['matrix.org', 'gitter.im'];
|
||||
|
||||
|
@ -304,6 +305,7 @@ class RoomDirectoryViewModel extends ViewModel {
|
|||
homeserver: this.homeserverSelection,
|
||||
searchTerm: this.searchTerm,
|
||||
paginationToken: this._nextPaginationToken,
|
||||
direction: DIRECTION.forward,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -316,6 +318,7 @@ class RoomDirectoryViewModel extends ViewModel {
|
|||
homeserver: this.homeserverSelection,
|
||||
searchTerm: this.searchTerm,
|
||||
paginationToken: this._prevPaginationToken,
|
||||
direction: DIRECTION.backward,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -10,12 +10,28 @@ class RightPanelContentView extends TemplateView {
|
|||
render(t, vm) {
|
||||
assert(vm.shouldIndex !== undefined);
|
||||
assert(vm.shouldShowTimeSelector !== undefined);
|
||||
assert(vm.historyVisibilityEventMeta.historyVisibility);
|
||||
assert(vm.historyVisibilityEventMeta.sender);
|
||||
assert(vm.historyVisibilityEventMeta.originServerTs);
|
||||
|
||||
let maybeIndexedMessage = 'This room is not being indexed by search engines ';
|
||||
if (vm.shouldIndex) {
|
||||
maybeIndexedMessage = 'This room is being indexed by search engines ';
|
||||
maybeIndexedMessage = 'This room is being indexed by search engines';
|
||||
}
|
||||
|
||||
const historyVisibilitySender = vm.historyVisibilityEventMeta.sender;
|
||||
|
||||
let historyVisibilityDisplayValue = vm.historyVisibilityEventMeta.historyVisibility;
|
||||
if (vm.historyVisibilityEventMeta.historyVisibility === 'world_readable' || vm.historyVisibilityEventMeta.historyVisibility === 'shared') {
|
||||
historyVisibilityDisplayValue = vm.historyVisibilityEventMeta.historyVisibility.replace('_', ' ');
|
||||
}
|
||||
|
||||
const [historyVisibilitySetDatePiece, _timePiece] = new Date(
|
||||
vm.historyVisibilityEventMeta.originServerTs
|
||||
)
|
||||
.toISOString()
|
||||
.split('T');
|
||||
|
||||
return t.div(
|
||||
{
|
||||
className: 'RightPanelContentView',
|
||||
|
@ -33,9 +49,13 @@ class RightPanelContentView extends TemplateView {
|
|||
className: 'RightPanelContentView_footer',
|
||||
},
|
||||
[
|
||||
t.p([
|
||||
`This room is accessible in the archive because it was set to ` +
|
||||
`${historyVisibilityDisplayValue} by ${historyVisibilitySender} on ${historyVisibilitySetDatePiece}.`,
|
||||
]),
|
||||
t.p([
|
||||
maybeIndexedMessage,
|
||||
'(',
|
||||
' (',
|
||||
t.a(
|
||||
{
|
||||
className: 'external-link RightPanelContentView_footerLink',
|
||||
|
|
|
@ -322,10 +322,21 @@ class RoomDirectoryView extends TemplateView {
|
|||
t.view(roomList),
|
||||
t.div({ className: 'RoomDirectoryView_paginationButtonCombo' }, [
|
||||
t.a(
|
||||
{ className: 'RoomDirectoryView_paginationButton', href: vm.prevPageUrl },
|
||||
{
|
||||
className: 'RoomDirectoryView_paginationButton',
|
||||
href: vm.prevPageUrl,
|
||||
'data-testid': 'room-directory-prev-link',
|
||||
},
|
||||
'Previous'
|
||||
),
|
||||
t.a({ className: 'RoomDirectoryView_paginationButton', href: vm.nextPageUrl }, 'Next'),
|
||||
t.a(
|
||||
{
|
||||
className: 'RoomDirectoryView_paginationButton',
|
||||
href: vm.nextPageUrl,
|
||||
'data-testid': 'room-directory-next-link',
|
||||
},
|
||||
'Next'
|
||||
),
|
||||
]),
|
||||
]),
|
||||
t.if(
|
||||
|
|
|
@ -3,12 +3,7 @@
|
|||
#
|
||||
# Currently this is based on Complement Synapse images which are based on the
|
||||
# published 'synapse:latest' image -- ie, the most recent Synapse release.
|
||||
|
||||
# FIXME: We're pinning the version to `v1.79.0` until
|
||||
# https://github.com/matrix-org/synapse/issues/15526 is fixed. Feel free to update back
|
||||
# to `latest` once that issue is resolved. More context:
|
||||
# https://github.com/matrix-org/matrix-public-archive/pull/208#discussion_r1183294630
|
||||
ARG SYNAPSE_VERSION=v1.79.0
|
||||
ARG SYNAPSE_VERSION=latest
|
||||
|
||||
FROM matrixdotorg/synapse:${SYNAPSE_VERSION}
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ const {
|
|||
sendMessage,
|
||||
createMessagesInRoom,
|
||||
getMessagesInRoom,
|
||||
waitForResultsInHomeserverRoomDirectory,
|
||||
updateProfile,
|
||||
uploadContent,
|
||||
} = require('./test-utils/client-utils');
|
||||
|
@ -2507,15 +2508,33 @@ describe('matrix-public-archive', () => {
|
|||
// test runs against the same homeserver
|
||||
const timeToken = Date.now();
|
||||
const roomPlanetPrefix = `planet-${timeToken}`;
|
||||
const roomSaturnName = `${roomPlanetPrefix}-saturn`;
|
||||
const roomSaturnId = await createTestRoom(client, {
|
||||
name: `${roomPlanetPrefix}-saturn`,
|
||||
name: roomSaturnName,
|
||||
});
|
||||
const roomMarsName = `${roomPlanetPrefix}-mars`;
|
||||
const roomMarsId = await createTestRoom(client, {
|
||||
name: `${roomPlanetPrefix}-mars`,
|
||||
name: roomMarsName,
|
||||
});
|
||||
|
||||
// Browse the room directory without search to see many rooms
|
||||
//
|
||||
// (we set this here in case we timeout while waiting for the test rooms to
|
||||
// appear in the room directory)
|
||||
archiveUrl = matrixPublicArchiveURLCreator.roomDirectoryUrl();
|
||||
|
||||
// Try to avoid flakey tests where the homeserver hasn't added the rooms to the
|
||||
// room directory yet. This isn't completely robust as it doesn't check that the
|
||||
// random room at the start is in the directory but should be good enough.
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm: roomSaturnName,
|
||||
});
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm: roomMarsName,
|
||||
});
|
||||
|
||||
const { data: roomDirectoryPageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||
const dom = parseHTML(roomDirectoryPageHtml);
|
||||
|
||||
|
@ -2556,17 +2575,33 @@ describe('matrix-public-archive', () => {
|
|||
// test runs against the same homeserver
|
||||
const timeToken = Date.now();
|
||||
const roomPlanetPrefix = `remote-planet-${timeToken}`;
|
||||
const roomXName = `${roomPlanetPrefix}-x`;
|
||||
const roomXId = await createTestRoom(hs2Client, {
|
||||
name: `${roomPlanetPrefix}-x`,
|
||||
name: roomXName,
|
||||
});
|
||||
const roomYname = `${roomPlanetPrefix}-y`;
|
||||
const roomYId = await createTestRoom(hs2Client, {
|
||||
name: `${roomPlanetPrefix}-y`,
|
||||
name: roomYname,
|
||||
});
|
||||
|
||||
// (we set this here in case we timeout while waiting for the test rooms to
|
||||
// appear in the room directory)
|
||||
archiveUrl = matrixPublicArchiveURLCreator.roomDirectoryUrl({
|
||||
homeserver: HOMESERVER_URL_TO_PRETTY_NAME_MAP[testMatrixServerUrl2],
|
||||
searchTerm: roomPlanetPrefix,
|
||||
});
|
||||
|
||||
// Try to avoid flakey tests where the homeserver hasn't added the rooms to the
|
||||
// room directory yet.
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client: hs2Client,
|
||||
searchTerm: roomXName,
|
||||
});
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client: hs2Client,
|
||||
searchTerm: roomYname,
|
||||
});
|
||||
|
||||
const { data: roomDirectoryWithSearchPageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||
const domWithSearch = parseHTML(roomDirectoryWithSearchPageHtml);
|
||||
|
||||
|
@ -2601,21 +2636,39 @@ describe('matrix-public-archive', () => {
|
|||
// test runs against the same homeserver
|
||||
const timeToken = Date.now();
|
||||
const roomPlanetPrefix = `planet-${timeToken}`;
|
||||
const roomUranusName = `${roomPlanetPrefix}-uranus-nsfw`;
|
||||
const roomUranusId = await createTestRoom(client, {
|
||||
// NSFW in title
|
||||
name: `${roomPlanetPrefix}-uranus-nsfw`,
|
||||
name: roomUranusName,
|
||||
});
|
||||
const roomMarsName = `${roomPlanetPrefix}-mars`;
|
||||
const roomMarsId = await createTestRoom(client, {
|
||||
name: `${roomPlanetPrefix}-mars`,
|
||||
name: roomMarsName,
|
||||
// NSFW in room topic/description
|
||||
topic: 'Get your ass to mars (NSFW)',
|
||||
});
|
||||
|
||||
// Browse the room directory searching the room directory for those NSFW rooms
|
||||
// (narrowing down results).
|
||||
//
|
||||
// (we set this here in case we timeout while waiting for the test rooms to
|
||||
// appear in the room directory)
|
||||
archiveUrl = matrixPublicArchiveURLCreator.roomDirectoryUrl({
|
||||
searchTerm: roomPlanetPrefix,
|
||||
});
|
||||
|
||||
// Try to avoid flakey tests where the homeserver hasn't added the rooms to the
|
||||
// room directory yet. This isn't completely robust as it doesn't check that the
|
||||
// random room at the start is in the directory but should be good enough.
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm: roomUranusName,
|
||||
});
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm: roomMarsName,
|
||||
});
|
||||
|
||||
const { data: roomDirectoryWithSearchPageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||
const domWithSearch = parseHTML(roomDirectoryWithSearchPageHtml);
|
||||
|
||||
|
@ -2644,6 +2697,173 @@ describe('matrix-public-archive', () => {
|
|||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('pagination is seamless', async () => {
|
||||
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||
// We use a `timeToken` so that we can namespace these rooms away from other
|
||||
// test runs against the same homeserver
|
||||
const timeToken = Date.now();
|
||||
const roomPlanetPrefix = `planet-${timeToken}`;
|
||||
|
||||
// Fill up the room room directory with multiple pages of rooms
|
||||
const visibleRoomConfigurations = [];
|
||||
const roomsConfigurationsToCreate = [];
|
||||
for (let i = 0; i < 40; i++) {
|
||||
const roomCreateOptions = {
|
||||
name: `${roomPlanetPrefix}-room-${i}`,
|
||||
};
|
||||
|
||||
// Sprinkle in some rooms every so often that should not appear in the room directory
|
||||
if (i % 3 === 0) {
|
||||
roomCreateOptions.name = `${roomPlanetPrefix}-room-not-world-readable-${i}`;
|
||||
roomCreateOptions.initial_state = [
|
||||
{
|
||||
type: 'm.room.history_visibility',
|
||||
state_key: '',
|
||||
content: {
|
||||
history_visibility: 'joined',
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'm.room.topic',
|
||||
state_key: '',
|
||||
content: {
|
||||
// Just a specific token we can search for in the DOM to make sure
|
||||
// this room does not appear in the room directory.
|
||||
topic: 'should-not-be-visible-in-archive-room-directory',
|
||||
},
|
||||
},
|
||||
];
|
||||
} else {
|
||||
visibleRoomConfigurations.push(roomCreateOptions);
|
||||
}
|
||||
|
||||
roomsConfigurationsToCreate.push(roomCreateOptions);
|
||||
}
|
||||
|
||||
// Doing all of these create room requests in parallel is about 2x faster than
|
||||
// doing them serially and the room directory doesn't return the rooms in any
|
||||
// particular order so it doesn't make the test any more clear doing them
|
||||
// serially anyway.
|
||||
const createdRoomsIds = await Promise.all(
|
||||
roomsConfigurationsToCreate.map((roomCreateOptions) =>
|
||||
createTestRoom(client, roomCreateOptions)
|
||||
)
|
||||
);
|
||||
|
||||
function roomIdToRoomName(expectedRoomId) {
|
||||
const roomIndex = createdRoomsIds.findIndex((roomId) => {
|
||||
return roomId === expectedRoomId;
|
||||
});
|
||||
assert(
|
||||
roomIndex > 0,
|
||||
`Expected to find expectedRoomId=${expectedRoomId} in the list of created rooms createdRoomsIds=${createdRoomsIds}`
|
||||
);
|
||||
|
||||
const roomConfig = roomsConfigurationsToCreate[roomIndex];
|
||||
assert(
|
||||
roomConfig,
|
||||
`Expected to find room config for roomIndex=${roomIndex} in the list of roomsConfigurationsToCreate (length ${roomsConfigurationsToCreate.length})}`
|
||||
);
|
||||
|
||||
return roomConfig.name;
|
||||
}
|
||||
|
||||
async function checkRoomsOnPage(archiveUrl) {
|
||||
const { data: roomDirectoryWithSearchPageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||
const dom = parseHTML(roomDirectoryWithSearchPageHtml);
|
||||
|
||||
const roomsCardsOnPageWithSearch = [
|
||||
...dom.document.querySelectorAll(`[data-testid="room-card"]`),
|
||||
];
|
||||
|
||||
const roomsIdsOnPage = roomsCardsOnPageWithSearch.map((roomCardEl) => {
|
||||
return roomCardEl.getAttribute('data-room-id');
|
||||
});
|
||||
|
||||
// Sanity check that we don't see any non-world_readable rooms.
|
||||
roomsCardsOnPageWithSearch.forEach((roomCardEl) => {
|
||||
assert.match(
|
||||
roomCardEl.innerHTML,
|
||||
/^((?!should-not-be-visible-in-archive-room-directory).)*$/,
|
||||
`Expected not to see any non-world_readable rooms on the page but saw ${roomCardEl.getAttribute(
|
||||
'data-room-id'
|
||||
)} which has "should-not-be-visible-in-archive-room-directory" in the room topic`
|
||||
);
|
||||
});
|
||||
|
||||
// Find the pagination buttons and grab the links to the previous and next pages
|
||||
const previousLinkElement = dom.document.querySelector(
|
||||
`[data-testid="room-directory-prev-link"]`
|
||||
);
|
||||
const nextLinkElement = dom.document.querySelector(
|
||||
`[data-testid="room-directory-next-link"]`
|
||||
);
|
||||
|
||||
const previousPaginationLink = previousLinkElement.getAttribute('href');
|
||||
const nextPaginationLink = nextLinkElement.getAttribute('href');
|
||||
|
||||
return {
|
||||
archiveUrl,
|
||||
roomsIdsOnPage,
|
||||
previousPaginationLink,
|
||||
nextPaginationLink,
|
||||
};
|
||||
}
|
||||
|
||||
// Browse the room directory with the search prefix so we only see rooms
|
||||
// relevant to this test.
|
||||
//
|
||||
// (we set this here in case we timeout while waiting for the test rooms to
|
||||
// appear in the room directory)
|
||||
archiveUrl = matrixPublicArchiveURLCreator.roomDirectoryUrl({
|
||||
searchTerm: roomPlanetPrefix,
|
||||
});
|
||||
|
||||
// Try to avoid flakey tests where the homeserver hasn't added the rooms
|
||||
// to the room directory yet. This isn't completely robust as it doesn't check
|
||||
// that all rooms are visible but it's better than nothing.
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm: visibleRoomConfigurations[0].name,
|
||||
});
|
||||
await waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm: visibleRoomConfigurations[visibleRoomConfigurations.length - 1].name,
|
||||
});
|
||||
|
||||
// Visit a sequence of pages using the pagination links: 1 -> 2 -> 3 -> 2 -> 1
|
||||
const firstPage = await checkRoomsOnPage(archiveUrl);
|
||||
const secondPage = await checkRoomsOnPage(firstPage.nextPaginationLink);
|
||||
const thirdPage = await checkRoomsOnPage(secondPage.nextPaginationLink);
|
||||
const backtrackSecondPage = await checkRoomsOnPage(thirdPage.previousPaginationLink);
|
||||
const backtrackFirstPage = await checkRoomsOnPage(
|
||||
backtrackSecondPage.previousPaginationLink
|
||||
);
|
||||
|
||||
// Ensure that we saw all of the visible rooms paginating through the directory
|
||||
assert.deepStrictEqual(
|
||||
[...firstPage.roomsIdsOnPage, ...secondPage.roomsIdsOnPage, ...thirdPage.roomsIdsOnPage]
|
||||
.map(roomIdToRoomName)
|
||||
.sort(),
|
||||
visibleRoomConfigurations.map((roomConfig) => roomConfig.name).sort(),
|
||||
'Make sure we saw all visible rooms paginating through the directory'
|
||||
);
|
||||
|
||||
// Ensure that we see the same rooms in the same order going backward that we saw going forward
|
||||
archiveUrl = backtrackSecondPage.archiveUrl;
|
||||
assert.deepStrictEqual(
|
||||
backtrackSecondPage.roomsIdsOnPage.map(roomIdToRoomName),
|
||||
secondPage.roomsIdsOnPage.map(roomIdToRoomName),
|
||||
'From the third page, going backward to second page should show the same rooms that we saw on the second page when going forward'
|
||||
);
|
||||
archiveUrl = backtrackFirstPage.archiveUrl;
|
||||
assert.deepStrictEqual(
|
||||
backtrackFirstPage.roomsIdsOnPage.map(roomIdToRoomName),
|
||||
firstPage.roomsIdsOnPage.map(roomIdToRoomName),
|
||||
'From the second page, going backward to first page should show the same rooms that we saw on first page when going forward'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('access controls', () => {
|
||||
|
@ -2688,15 +2908,50 @@ describe('matrix-public-archive', () => {
|
|||
assert.strictEqual(dom.document.querySelector(`meta[name="robots"]`), null);
|
||||
});
|
||||
|
||||
it('search engines not allowed to index `public` room', async () => {
|
||||
it('search engines not allowed to access public room with non-`world_readable` history visibility', async () => {
|
||||
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||
const roomId = await createTestRoom(client, {
|
||||
// The default options for the test rooms adds a
|
||||
// `m.room.history_visiblity` state event so we override that here so
|
||||
// it's only a public room.
|
||||
initial_state: [],
|
||||
// Set as `shared` since it's the next most permissive history visibility
|
||||
// after `world_readable` but still not allowed to be accesible in the
|
||||
// archive.
|
||||
initial_state: [
|
||||
{
|
||||
type: 'm.room.history_visibility',
|
||||
state_key: '',
|
||||
content: {
|
||||
history_visibility: 'shared',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
try {
|
||||
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
|
||||
await fetchEndpointAsText(archiveUrl);
|
||||
assert.fail(
|
||||
new TestError(
|
||||
'We expect the request to fail with a 403 since the archive should not be able to view a non-world_readable room but it succeeded'
|
||||
)
|
||||
);
|
||||
} catch (err) {
|
||||
if (err instanceof TestError) {
|
||||
throw err;
|
||||
}
|
||||
assert.strictEqual(
|
||||
err.response.status,
|
||||
403,
|
||||
`Expected err.response.status=${err?.response?.status} to be 403 but error was: ${err.stack}`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it('Configuring `stopSearchEngineIndexing` will stop search engine indexing', async () => {
|
||||
// Disable search engine indexing across the entire instance
|
||||
config.set('stopSearchEngineIndexing', true);
|
||||
|
||||
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||
const roomId = await createTestRoom(client);
|
||||
|
||||
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForRoom(roomId);
|
||||
const { data: archivePageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||
|
||||
|
|
|
@ -13,6 +13,11 @@ describe('checkTextForNsfw', () => {
|
|||
NSFW_foo: true,
|
||||
'NSFW-foo': true,
|
||||
'NSFW:foo': true,
|
||||
'18+ only': true,
|
||||
// Previous false positives that we ran into in the wild that should not be flagged
|
||||
// as NSFW
|
||||
'1888-great-blizzard': false,
|
||||
'argon-18-element': false,
|
||||
}).forEach(([inputText, expectedNsfw]) => {
|
||||
it(`should return ${expectedNsfw} for '${inputText}'`, () => {
|
||||
assert.strictEqual(
|
||||
|
|
|
@ -4,6 +4,8 @@ const assert = require('assert');
|
|||
const urlJoin = require('url-join');
|
||||
const { fetchEndpointAsJson, fetchEndpoint } = require('../../server/lib/fetch-endpoint');
|
||||
const getServerNameFromMatrixRoomIdOrAlias = require('../../server/lib/matrix-utils/get-server-name-from-matrix-room-id-or-alias');
|
||||
const { MS_LOOKUP } = require('matrix-public-archive-shared/lib/reference-values');
|
||||
const { ONE_SECOND_IN_MS } = MS_LOOKUP;
|
||||
|
||||
const config = require('../../server/lib/config');
|
||||
const matrixAccessToken = config.get('matrixAccessToken');
|
||||
|
@ -14,7 +16,7 @@ assert(testMatrixServerUrl1);
|
|||
let txnCount = 0;
|
||||
function getTxnId() {
|
||||
txnCount++;
|
||||
return `${new Date().getTime()}--${txnCount}`;
|
||||
return `txn${txnCount}-${new Date().getTime()}`;
|
||||
}
|
||||
|
||||
// Basic slugify function, plenty of edge cases and should not be used for
|
||||
|
@ -150,7 +152,7 @@ async function createTestRoom(client, overrideCreateOptions = {}) {
|
|||
}
|
||||
|
||||
const roomName = overrideCreateOptions.name || 'the hangout spot';
|
||||
const roomAlias = slugify(roomName + getTxnId());
|
||||
const roomAlias = slugify(roomName + '-' + getTxnId());
|
||||
|
||||
const { data: createRoomResponse } = await fetchEndpointAsJson(
|
||||
urlJoin(client.homeserverUrl, `/_matrix/client/v3/createRoom?${qs.toString()}`),
|
||||
|
@ -421,6 +423,50 @@ async function uploadContent({ client, roomId, data, fileName, contentType }) {
|
|||
return mxcUri;
|
||||
}
|
||||
|
||||
// This can be removed after https://github.com/matrix-org/synapse/issues/15526 is solved
|
||||
async function waitForResultsInHomeserverRoomDirectory({
|
||||
client,
|
||||
searchTerm,
|
||||
timeoutMs = 10 * ONE_SECOND_IN_MS,
|
||||
}) {
|
||||
assert(client);
|
||||
assert(searchTerm !== undefined);
|
||||
|
||||
const roomDirectoryEndpoint = urlJoin(client.homeserverUrl, `_matrix/client/v3/publicRooms`);
|
||||
|
||||
// eslint-disable-next-line no-async-promise-executor
|
||||
await new Promise(async (resolve, reject) => {
|
||||
try {
|
||||
setTimeout(() => {
|
||||
reject(new Error('Timed out waiting for rooms to appear in the room directory'));
|
||||
}, timeoutMs);
|
||||
|
||||
let foundResults = false;
|
||||
while (!foundResults) {
|
||||
const { data: publicRoomsRes } = await fetchEndpointAsJson(roomDirectoryEndpoint, {
|
||||
method: 'POST',
|
||||
body: {
|
||||
include_all_networks: true,
|
||||
filter: {
|
||||
generic_search_term: searchTerm,
|
||||
},
|
||||
limit: 1,
|
||||
},
|
||||
accessToken: client.accessToken,
|
||||
});
|
||||
|
||||
if (publicRoomsRes.chunk.length > 0) {
|
||||
foundResults = true;
|
||||
resolve();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ensureUserRegistered,
|
||||
getTestClientForAs,
|
||||
|
@ -433,6 +479,7 @@ module.exports = {
|
|||
sendMessage,
|
||||
createMessagesInRoom,
|
||||
getMessagesInRoom,
|
||||
waitForResultsInHomeserverRoomDirectory,
|
||||
updateProfile,
|
||||
uploadContent,
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue