diff --git a/server/hydrogen-render/render-page-html.js b/server/hydrogen-render/render-page-html.js
index e03a58f..fced81d 100644
--- a/server/hydrogen-render/render-page-html.js
+++ b/server/hydrogen-render/render-page-html.js
@@ -35,7 +35,14 @@ function renderPageHtml({
// We shouldn't let some pages be indexed by search engines
let maybeNoIndexHtml = '';
if (!pageOptions.shouldIndex) {
- maybeNoIndexHtml = ``;
+ maybeNoIndexHtml = ``;
+ }
+
+ // We should tell search engines that some pages are NSFW, see
+ // https://developers.google.com/search/docs/crawling-indexing/safesearch
+ let maybeAdultMeta = '';
+ if (pageOptions.blockedBySafeSearch) {
+ maybeAdultMeta = ``;
}
const faviconMap = getFaviconAssetUrls();
@@ -45,6 +52,7 @@ function renderPageHtml({
${maybeNoIndexHtml}
+ ${maybeAdultMeta}
${sanitizeHtml(`${pageOptions.title}`)}
${sanitizeHtml(``)}
diff --git a/server/lib/matrix-utils/fetch-room-data.js b/server/lib/matrix-utils/fetch-room-data.js
index 624b1c3..91c702e 100644
--- a/server/lib/matrix-utils/fetch-room-data.js
+++ b/server/lib/matrix-utils/fetch-room-data.js
@@ -151,6 +151,7 @@ const fetchRoomData = traceFunction(async function (
const [
stateNameResDataOutcome,
+ stateTopicResDataOutcome,
stateCanonicalAliasResDataOutcome,
stateAvatarResDataOutcome,
stateHistoryVisibilityResDataOutcome,
@@ -162,6 +163,10 @@ const fetchRoomData = traceFunction(async function (
accessToken: matrixAccessToken,
abortSignal,
}),
+ fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.topic'), {
+ accessToken: matrixAccessToken,
+ abortSignal,
+ }),
fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.canonical_alias'), {
accessToken: matrixAccessToken,
abortSignal,
@@ -197,6 +202,12 @@ const fetchRoomData = traceFunction(async function (
canonicalAlias = data?.content?.alias;
}
+ let topic;
+ if (stateTopicResDataOutcome.reason === undefined) {
+ const { data } = stateTopicResDataOutcome.value;
+ topic = data?.content?.topic;
+ }
+
let avatarUrl;
if (stateAvatarResDataOutcome.reason === undefined) {
const { data } = stateAvatarResDataOutcome.value;
@@ -236,6 +247,7 @@ const fetchRoomData = traceFunction(async function (
return {
id: roomId,
name,
+ topic,
canonicalAlias,
avatarUrl,
historyVisibility,
diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js
index ff22295..a0f3989 100644
--- a/server/routes/room-routes.js
+++ b/server/routes/room-routes.js
@@ -26,6 +26,7 @@ const renderHydrogenVmRenderScriptToPageHtml = require('../hydrogen-render/rende
const setHeadersToPreloadAssets = require('../lib/set-headers-to-preload-assets');
const setHeadersForDateTemporalContext = require('../lib/set-headers-for-date-temporal-context');
const MatrixPublicArchiveURLCreator = require('matrix-public-archive-shared/lib/url-creator');
+const checkTextForNsfw = require('matrix-public-archive-shared/lib/check-text-for-nsfw');
const {
MS_LOOKUP,
TIME_PRECISION_VALUES,
@@ -896,9 +897,16 @@ router.get(
shouldIndex = roomData?.historyVisibility === `world_readable`;
}
+ const isNsfw = checkTextForNsfw(
+ // We concat the name, topic, etc together to simply do a single check against
+ // all of the text.
+ `${roomData.name} --- ${roomData.canonicalAlias} --- ${roomData.topic} `
+ );
+
const pageOptions = {
title: `${roomData.name} - Matrix Public Archive`,
description: `View the history of ${roomData.name} in the Matrix Public Archive`,
+ blockedBySafeSearch: isNsfw,
entryPoint: 'client/js/entry-client-hydrogen.js',
locationHref: urlJoin(basePath, req.originalUrl),
shouldIndex,
diff --git a/shared/lib/check-text-for-nsfw.js b/shared/lib/check-text-for-nsfw.js
new file mode 100644
index 0000000..ef65518
--- /dev/null
+++ b/shared/lib/check-text-for-nsfw.js
@@ -0,0 +1,14 @@
+'use strict';
+
+const NSFW_WORDS = ['nsfw', 'porn', 'nudes', 'sex', '18+'];
+const NSFW_REGEXES = NSFW_WORDS.map((word) => new RegExp(`\\b${word}\\b`, 'i'));
+
+// A very basic check for NSFW content that just looks for some keywords in the given
+// text
+function checkTextForNsfw(text) {
+ const isNsfw = NSFW_REGEXES.some((regex) => regex.test(text));
+
+ return isNsfw;
+}
+
+module.exports = checkTextForNsfw;
diff --git a/shared/viewmodels/RoomDirectoryViewModel.js b/shared/viewmodels/RoomDirectoryViewModel.js
index 90742b8..1b28022 100644
--- a/shared/viewmodels/RoomDirectoryViewModel.js
+++ b/shared/viewmodels/RoomDirectoryViewModel.js
@@ -8,12 +8,10 @@ const LOCAL_STORAGE_KEYS = require('matrix-public-archive-shared/lib/local-stora
const ModalViewModel = require('matrix-public-archive-shared/viewmodels/ModalViewModel');
const HomeserverSelectionModalContentViewModel = require('matrix-public-archive-shared/viewmodels/HomeserverSelectionModalContentViewModel');
const RoomCardViewModel = require('matrix-public-archive-shared/viewmodels/RoomCardViewModel');
+const checkTextForNsfw = require('matrix-public-archive-shared/lib/check-text-for-nsfw');
const DEFAULT_SERVER_LIST = ['matrix.org', 'gitter.im', 'libera.chat'];
-const NSFW_WORDS = ['nsfw', 'porn', 'nudes', 'sex', '18+'];
-const NSFW_REGEXES = NSFW_WORDS.map((word) => new RegExp(`\\b${word}\\b`, 'i'));
-
class RoomDirectoryViewModel extends ViewModel {
constructor(options) {
super(options);
@@ -267,9 +265,7 @@ class RoomDirectoryViewModel extends ViewModel {
this._roomCardViewModelsFilterMap.setApply((roomId, vm) => {
// We concat the name, topic, etc together to simply do a single check against
// all of the text.
- const isNsfw = NSFW_REGEXES.some((regex) =>
- regex.test(vm.name + ' ---- ' + vm.canonicalAlias + ' --- ' + vm.topic)
- );
+ const isNsfw = checkTextForNsfw(vm.name + ' --- ' + vm.canonicalAlias + ' --- ' + vm.topic);
vm.setBlockedBySafeSearch(isNsfw);
});
} else {
diff --git a/test/e2e-tests.js b/test/e2e-tests.js
index 526f489..9f21fdf 100644
--- a/test/e2e-tests.js
+++ b/test/e2e-tests.js
@@ -633,6 +633,58 @@ describe('matrix-public-archive', () => {
}
});
+ describe('safe search', () => {
+ [
+ {
+ testName: 'nsfw words in title',
+ createRoomOptions: {
+ name: `uranus-nsfw`,
+ },
+ },
+ {
+ testName: 'nsfw words in topic',
+ createRoomOptions: {
+ name: `mars`,
+ topic: 'Get your ass to mars (NSFW)',
+ },
+ },
+ ].forEach((testCase) => {
+ it(`${testCase.testName} is correctly blocked/marked by safe search`, async () => {
+ const client = await getTestClientForHs(testMatrixServerUrl1);
+ const roomId = await createTestRoom(client, testCase.createRoomOptions);
+
+ archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(roomId, archiveDate);
+ const { data: archivePageHtml } = await fetchEndpointAsText(archiveUrl);
+ const dom = parseHTML(archivePageHtml);
+
+ // Make sure the `` tag exists on the page
+ // telling search engines that this is an adult page.
+ const metaElements = Array.from(dom.document.querySelectorAll('meta'));
+ assert.strictEqual(
+ dom.document.querySelector(`meta[name="rating"]`)?.getAttribute('content'),
+ 'adult',
+ `Unable to find on the page. We found these meta elements though:${metaElements
+ // eslint-disable-next-line max-nested-callbacks
+ .map((metaElement) => `\n \`${metaElement.outerHTML}\``)
+ .join('')}`
+ );
+ });
+ });
+
+ it('normal room is not blocked/marked by safe search', async () => {
+ const client = await getTestClientForHs(testMatrixServerUrl1);
+ const roomId = await createTestRoom(client);
+
+ archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(roomId, archiveDate);
+ const { data: archivePageHtml } = await fetchEndpointAsText(archiveUrl);
+ const dom = parseHTML(archivePageHtml);
+
+ // Make sure the `` tag does NOT exist on the
+ // page telling search engines that this is an adult page.
+ assert.strictEqual(dom.document.querySelector(`meta[name="rating"]`), null);
+ });
+ });
+
describe('time selector', () => {
it('shows time selector when there are too many messages from the same day', async () => {
// Set this low so it's easy to hit the limit