From dc85e839a12945371da588fb84b540db0712a94e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 8 Nov 2022 22:41:58 -0600 Subject: [PATCH] Add config to disable search engine indexing (#127) --- config/config.default.json | 4 ++++ server/routes/room-directory-routes.js | 5 +++++ server/routes/room-routes.js | 12 ++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/config/config.default.json b/config/config.default.json index 05dda90..ae93f35 100644 --- a/config/config.default.json +++ b/config/config.default.json @@ -8,7 +8,11 @@ "archiveMessageLimit": 100, "requestTimeoutMs": 25000, "logOutputFromChildProcesses": false, + //"stopSearchEngineIndexing": true, + // Tracing //"jaegerTracesEndpoint": "http://localhost:14268/api/traces", + + // Testing "testMatrixServerUrl1": "http://localhost:11008/", "testMatrixServerUrl2": "http://localhost:12008/", diff --git a/server/routes/room-directory-routes.js b/server/routes/room-directory-routes.js index cc81a08..2be031f 100644 --- a/server/routes/room-directory-routes.js +++ b/server/routes/room-directory-routes.js @@ -18,6 +18,7 @@ const matrixServerName = config.get('matrixServerName'); assert(matrixServerName); const matrixAccessToken = config.get('matrixAccessToken'); assert(matrixAccessToken); +const stopSearchEngineIndexing = config.get('stopSearchEngineIndexing'); const router = express.Router({ caseSensitive: true, @@ -56,6 +57,9 @@ router.get( roomFetchError = err; } + // We index the room directory unless the config says we shouldn't index anything + const shouldIndex = !stopSearchEngineIndexing; + const hydrogenStylesUrl = urlJoin(basePath, '/hydrogen-styles.css'); const stylesUrl = urlJoin(basePath, '/css/styles.css'); const roomDirectoryStylesUrl = urlJoin(basePath, '/css/room-directory.css'); @@ -89,6 +93,7 @@ router.get( title: `Matrix Public Archive`, styles: [hydrogenStylesUrl, stylesUrl, roomDirectoryStylesUrl], scripts: [jsBundleUrl], + shouldIndex, cspNonce: res.locals.cspNonce, } ); diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js index ee9cd2a..1f19b92 100644 --- a/server/routes/room-routes.js +++ b/server/routes/room-routes.js @@ -26,6 +26,7 @@ const matrixServerUrl = config.get('matrixServerUrl'); assert(matrixServerUrl); const matrixAccessToken = config.get('matrixAccessToken'); assert(matrixAccessToken); +const stopSearchEngineIndexing = config.get('stopSearchEngineIndexing'); const matrixPublicArchiveURLCreator = new MatrixPublicArchiveURLCreator(basePath); @@ -270,6 +271,7 @@ router.get( router.get( '/date/:yyyy(\\d{4})/:mm(\\d{2})/:dd(\\d{2})/:hourRange(\\d\\d?-\\d\\d?)?', timeoutMiddleware, + // eslint-disable-next-line max-statements asyncHandler(async function (req, res) { const roomIdOrAlias = getRoomIdOrAliasFromReq(req); @@ -359,8 +361,14 @@ router.get( ); } - // We only allow search engines to index `world_readable` rooms - const shouldIndex = roomData?.historyVisibility === `world_readable`; + // Default to no indexing (safe default) + let shouldIndex = false; + if (stopSearchEngineIndexing) { + shouldIndex = false; + } else { + // Otherwise we only allow search engines to index `world_readable` rooms + shouldIndex = roomData?.historyVisibility === `world_readable`; + } // If we have over the `archiveMessageLimit` number of messages fetching // from the given day, it's acceptable to have them be from surrounding