Add config to disable search engine indexing (#127)

This commit is contained in:
Eric Eastwood 2022-11-08 22:41:58 -06:00 committed by GitHub
parent b3c553a863
commit dc85e839a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 19 additions and 2 deletions

View File

@ -8,7 +8,11 @@
"archiveMessageLimit": 100,
"requestTimeoutMs": 25000,
"logOutputFromChildProcesses": false,
//"stopSearchEngineIndexing": true,
// Tracing
//"jaegerTracesEndpoint": "http://localhost:14268/api/traces",
// Testing
"testMatrixServerUrl1": "http://localhost:11008/",
"testMatrixServerUrl2": "http://localhost:12008/",

View File

@ -18,6 +18,7 @@ const matrixServerName = config.get('matrixServerName');
assert(matrixServerName);
const matrixAccessToken = config.get('matrixAccessToken');
assert(matrixAccessToken);
const stopSearchEngineIndexing = config.get('stopSearchEngineIndexing');
const router = express.Router({
caseSensitive: true,
@ -56,6 +57,9 @@ router.get(
roomFetchError = err;
}
// We index the room directory unless the config says we shouldn't index anything
const shouldIndex = !stopSearchEngineIndexing;
const hydrogenStylesUrl = urlJoin(basePath, '/hydrogen-styles.css');
const stylesUrl = urlJoin(basePath, '/css/styles.css');
const roomDirectoryStylesUrl = urlJoin(basePath, '/css/room-directory.css');
@ -89,6 +93,7 @@ router.get(
title: `Matrix Public Archive`,
styles: [hydrogenStylesUrl, stylesUrl, roomDirectoryStylesUrl],
scripts: [jsBundleUrl],
shouldIndex,
cspNonce: res.locals.cspNonce,
}
);

View File

@ -26,6 +26,7 @@ const matrixServerUrl = config.get('matrixServerUrl');
assert(matrixServerUrl);
const matrixAccessToken = config.get('matrixAccessToken');
assert(matrixAccessToken);
const stopSearchEngineIndexing = config.get('stopSearchEngineIndexing');
const matrixPublicArchiveURLCreator = new MatrixPublicArchiveURLCreator(basePath);
@ -270,6 +271,7 @@ router.get(
router.get(
'/date/:yyyy(\\d{4})/:mm(\\d{2})/:dd(\\d{2})/:hourRange(\\d\\d?-\\d\\d?)?',
timeoutMiddleware,
// eslint-disable-next-line max-statements
asyncHandler(async function (req, res) {
const roomIdOrAlias = getRoomIdOrAliasFromReq(req);
@ -359,8 +361,14 @@ router.get(
);
}
// We only allow search engines to index `world_readable` rooms
const shouldIndex = roomData?.historyVisibility === `world_readable`;
// Default to no indexing (safe default)
let shouldIndex = false;
if (stopSearchEngineIndexing) {
shouldIndex = false;
} else {
// Otherwise we only allow search engines to index `world_readable` rooms
shouldIndex = roomData?.historyVisibility === `world_readable`;
}
// If we have over the `archiveMessageLimit` number of messages fetching
// from the given day, it's acceptable to have them be from surrounding