Mark NSFW room pages with `<meta name="rating" content="adult">` (#216)
Related docs: - https://developers.google.com/search/docs/crawling-indexing/safesearch - https://developers.google.com/search/docs/crawling-indexing/special-tags
This commit is contained in:
parent
aeceb195e2
commit
198e8c09be
|
@ -35,7 +35,14 @@ function renderPageHtml({
|
||||||
// We shouldn't let some pages be indexed by search engines
|
// We shouldn't let some pages be indexed by search engines
|
||||||
let maybeNoIndexHtml = '';
|
let maybeNoIndexHtml = '';
|
||||||
if (!pageOptions.shouldIndex) {
|
if (!pageOptions.shouldIndex) {
|
||||||
maybeNoIndexHtml = `<meta name="robots" content="noindex, nofollow" />`;
|
maybeNoIndexHtml = `<meta name="robots" content="noindex, nofollow">`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should tell search engines that some pages are NSFW, see
|
||||||
|
// https://developers.google.com/search/docs/crawling-indexing/safesearch
|
||||||
|
let maybeAdultMeta = '';
|
||||||
|
if (pageOptions.blockedBySafeSearch) {
|
||||||
|
maybeAdultMeta = `<meta name="rating" content="adult">`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const faviconMap = getFaviconAssetUrls();
|
const faviconMap = getFaviconAssetUrls();
|
||||||
|
@ -45,6 +52,7 @@ function renderPageHtml({
|
||||||
<head>
|
<head>
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||||
${maybeNoIndexHtml}
|
${maybeNoIndexHtml}
|
||||||
|
${maybeAdultMeta}
|
||||||
${sanitizeHtml(`<title>${pageOptions.title}</title>`)}
|
${sanitizeHtml(`<title>${pageOptions.title}</title>`)}
|
||||||
${sanitizeHtml(`<meta name="description" content="${pageOptions.description}">`)}
|
${sanitizeHtml(`<meta name="description" content="${pageOptions.description}">`)}
|
||||||
<link rel="icon" href="${faviconMap.ico}" sizes="any">
|
<link rel="icon" href="${faviconMap.ico}" sizes="any">
|
||||||
|
|
|
@ -151,6 +151,7 @@ const fetchRoomData = traceFunction(async function (
|
||||||
|
|
||||||
const [
|
const [
|
||||||
stateNameResDataOutcome,
|
stateNameResDataOutcome,
|
||||||
|
stateTopicResDataOutcome,
|
||||||
stateCanonicalAliasResDataOutcome,
|
stateCanonicalAliasResDataOutcome,
|
||||||
stateAvatarResDataOutcome,
|
stateAvatarResDataOutcome,
|
||||||
stateHistoryVisibilityResDataOutcome,
|
stateHistoryVisibilityResDataOutcome,
|
||||||
|
@ -162,6 +163,10 @@ const fetchRoomData = traceFunction(async function (
|
||||||
accessToken: matrixAccessToken,
|
accessToken: matrixAccessToken,
|
||||||
abortSignal,
|
abortSignal,
|
||||||
}),
|
}),
|
||||||
|
fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.topic'), {
|
||||||
|
accessToken: matrixAccessToken,
|
||||||
|
abortSignal,
|
||||||
|
}),
|
||||||
fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.canonical_alias'), {
|
fetchEndpointAsJson(getStateEndpointForRoomIdAndEventType(roomId, 'm.room.canonical_alias'), {
|
||||||
accessToken: matrixAccessToken,
|
accessToken: matrixAccessToken,
|
||||||
abortSignal,
|
abortSignal,
|
||||||
|
@ -197,6 +202,12 @@ const fetchRoomData = traceFunction(async function (
|
||||||
canonicalAlias = data?.content?.alias;
|
canonicalAlias = data?.content?.alias;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let topic;
|
||||||
|
if (stateTopicResDataOutcome.reason === undefined) {
|
||||||
|
const { data } = stateTopicResDataOutcome.value;
|
||||||
|
topic = data?.content?.topic;
|
||||||
|
}
|
||||||
|
|
||||||
let avatarUrl;
|
let avatarUrl;
|
||||||
if (stateAvatarResDataOutcome.reason === undefined) {
|
if (stateAvatarResDataOutcome.reason === undefined) {
|
||||||
const { data } = stateAvatarResDataOutcome.value;
|
const { data } = stateAvatarResDataOutcome.value;
|
||||||
|
@ -236,6 +247,7 @@ const fetchRoomData = traceFunction(async function (
|
||||||
return {
|
return {
|
||||||
id: roomId,
|
id: roomId,
|
||||||
name,
|
name,
|
||||||
|
topic,
|
||||||
canonicalAlias,
|
canonicalAlias,
|
||||||
avatarUrl,
|
avatarUrl,
|
||||||
historyVisibility,
|
historyVisibility,
|
||||||
|
|
|
@ -26,6 +26,7 @@ const renderHydrogenVmRenderScriptToPageHtml = require('../hydrogen-render/rende
|
||||||
const setHeadersToPreloadAssets = require('../lib/set-headers-to-preload-assets');
|
const setHeadersToPreloadAssets = require('../lib/set-headers-to-preload-assets');
|
||||||
const setHeadersForDateTemporalContext = require('../lib/set-headers-for-date-temporal-context');
|
const setHeadersForDateTemporalContext = require('../lib/set-headers-for-date-temporal-context');
|
||||||
const MatrixPublicArchiveURLCreator = require('matrix-public-archive-shared/lib/url-creator');
|
const MatrixPublicArchiveURLCreator = require('matrix-public-archive-shared/lib/url-creator');
|
||||||
|
const checkTextForNsfw = require('matrix-public-archive-shared/lib/check-text-for-nsfw');
|
||||||
const {
|
const {
|
||||||
MS_LOOKUP,
|
MS_LOOKUP,
|
||||||
TIME_PRECISION_VALUES,
|
TIME_PRECISION_VALUES,
|
||||||
|
@ -896,9 +897,16 @@ router.get(
|
||||||
shouldIndex = roomData?.historyVisibility === `world_readable`;
|
shouldIndex = roomData?.historyVisibility === `world_readable`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const isNsfw = checkTextForNsfw(
|
||||||
|
// We concat the name, topic, etc together to simply do a single check against
|
||||||
|
// all of the text.
|
||||||
|
`${roomData.name} --- ${roomData.canonicalAlias} --- ${roomData.topic} `
|
||||||
|
);
|
||||||
|
|
||||||
const pageOptions = {
|
const pageOptions = {
|
||||||
title: `${roomData.name} - Matrix Public Archive`,
|
title: `${roomData.name} - Matrix Public Archive`,
|
||||||
description: `View the history of ${roomData.name} in the Matrix Public Archive`,
|
description: `View the history of ${roomData.name} in the Matrix Public Archive`,
|
||||||
|
blockedBySafeSearch: isNsfw,
|
||||||
entryPoint: 'client/js/entry-client-hydrogen.js',
|
entryPoint: 'client/js/entry-client-hydrogen.js',
|
||||||
locationHref: urlJoin(basePath, req.originalUrl),
|
locationHref: urlJoin(basePath, req.originalUrl),
|
||||||
shouldIndex,
|
shouldIndex,
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const NSFW_WORDS = ['nsfw', 'porn', 'nudes', 'sex', '18+'];
|
||||||
|
const NSFW_REGEXES = NSFW_WORDS.map((word) => new RegExp(`\\b${word}\\b`, 'i'));
|
||||||
|
|
||||||
|
// A very basic check for NSFW content that just looks for some keywords in the given
|
||||||
|
// text
|
||||||
|
function checkTextForNsfw(text) {
|
||||||
|
const isNsfw = NSFW_REGEXES.some((regex) => regex.test(text));
|
||||||
|
|
||||||
|
return isNsfw;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = checkTextForNsfw;
|
|
@ -8,12 +8,10 @@ const LOCAL_STORAGE_KEYS = require('matrix-public-archive-shared/lib/local-stora
|
||||||
const ModalViewModel = require('matrix-public-archive-shared/viewmodels/ModalViewModel');
|
const ModalViewModel = require('matrix-public-archive-shared/viewmodels/ModalViewModel');
|
||||||
const HomeserverSelectionModalContentViewModel = require('matrix-public-archive-shared/viewmodels/HomeserverSelectionModalContentViewModel');
|
const HomeserverSelectionModalContentViewModel = require('matrix-public-archive-shared/viewmodels/HomeserverSelectionModalContentViewModel');
|
||||||
const RoomCardViewModel = require('matrix-public-archive-shared/viewmodels/RoomCardViewModel');
|
const RoomCardViewModel = require('matrix-public-archive-shared/viewmodels/RoomCardViewModel');
|
||||||
|
const checkTextForNsfw = require('matrix-public-archive-shared/lib/check-text-for-nsfw');
|
||||||
|
|
||||||
const DEFAULT_SERVER_LIST = ['matrix.org', 'gitter.im', 'libera.chat'];
|
const DEFAULT_SERVER_LIST = ['matrix.org', 'gitter.im', 'libera.chat'];
|
||||||
|
|
||||||
const NSFW_WORDS = ['nsfw', 'porn', 'nudes', 'sex', '18+'];
|
|
||||||
const NSFW_REGEXES = NSFW_WORDS.map((word) => new RegExp(`\\b${word}\\b`, 'i'));
|
|
||||||
|
|
||||||
class RoomDirectoryViewModel extends ViewModel {
|
class RoomDirectoryViewModel extends ViewModel {
|
||||||
constructor(options) {
|
constructor(options) {
|
||||||
super(options);
|
super(options);
|
||||||
|
@ -267,9 +265,7 @@ class RoomDirectoryViewModel extends ViewModel {
|
||||||
this._roomCardViewModelsFilterMap.setApply((roomId, vm) => {
|
this._roomCardViewModelsFilterMap.setApply((roomId, vm) => {
|
||||||
// We concat the name, topic, etc together to simply do a single check against
|
// We concat the name, topic, etc together to simply do a single check against
|
||||||
// all of the text.
|
// all of the text.
|
||||||
const isNsfw = NSFW_REGEXES.some((regex) =>
|
const isNsfw = checkTextForNsfw(vm.name + ' --- ' + vm.canonicalAlias + ' --- ' + vm.topic);
|
||||||
regex.test(vm.name + ' ---- ' + vm.canonicalAlias + ' --- ' + vm.topic)
|
|
||||||
);
|
|
||||||
vm.setBlockedBySafeSearch(isNsfw);
|
vm.setBlockedBySafeSearch(isNsfw);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -633,6 +633,58 @@ describe('matrix-public-archive', () => {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('safe search', () => {
|
||||||
|
[
|
||||||
|
{
|
||||||
|
testName: 'nsfw words in title',
|
||||||
|
createRoomOptions: {
|
||||||
|
name: `uranus-nsfw`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
testName: 'nsfw words in topic',
|
||||||
|
createRoomOptions: {
|
||||||
|
name: `mars`,
|
||||||
|
topic: 'Get your ass to mars (NSFW)',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
].forEach((testCase) => {
|
||||||
|
it(`${testCase.testName} is correctly blocked/marked by safe search`, async () => {
|
||||||
|
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||||
|
const roomId = await createTestRoom(client, testCase.createRoomOptions);
|
||||||
|
|
||||||
|
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(roomId, archiveDate);
|
||||||
|
const { data: archivePageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||||
|
const dom = parseHTML(archivePageHtml);
|
||||||
|
|
||||||
|
// Make sure the `<meta name="rating" ...>` tag exists on the page
|
||||||
|
// telling search engines that this is an adult page.
|
||||||
|
const metaElements = Array.from(dom.document.querySelectorAll('meta'));
|
||||||
|
assert.strictEqual(
|
||||||
|
dom.document.querySelector(`meta[name="rating"]`)?.getAttribute('content'),
|
||||||
|
'adult',
|
||||||
|
`Unable to find <meta name="rating" content="adult"> on the page. We found these meta elements though:${metaElements
|
||||||
|
// eslint-disable-next-line max-nested-callbacks
|
||||||
|
.map((metaElement) => `\n \`${metaElement.outerHTML}\``)
|
||||||
|
.join('')}`
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('normal room is not blocked/marked by safe search', async () => {
|
||||||
|
const client = await getTestClientForHs(testMatrixServerUrl1);
|
||||||
|
const roomId = await createTestRoom(client);
|
||||||
|
|
||||||
|
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(roomId, archiveDate);
|
||||||
|
const { data: archivePageHtml } = await fetchEndpointAsText(archiveUrl);
|
||||||
|
const dom = parseHTML(archivePageHtml);
|
||||||
|
|
||||||
|
// Make sure the `<meta name="rating" ...>` tag does NOT exist on the
|
||||||
|
// page telling search engines that this is an adult page.
|
||||||
|
assert.strictEqual(dom.document.querySelector(`meta[name="rating"]`), null);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('time selector', () => {
|
describe('time selector', () => {
|
||||||
it('shows time selector when there are too many messages from the same day', async () => {
|
it('shows time selector when there are too many messages from the same day', async () => {
|
||||||
// Set this low so it's easy to hit the limit
|
// Set this low so it's easy to hit the limit
|
||||||
|
|
Loading…
Reference in New Issue