From b7185c8231a95e66f42186196eb96220045f0d7c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 29 Jun 2023 18:49:25 -0500 Subject: [PATCH] Fix `18+` false positives with NSFW check Was noticing this with our test room names like: `planet-1688081266353-room-18` --- shared/lib/check-text-for-nsfw.js | 8 +++++++- test/shared/lib/check-text-for-nsfw-tests.js | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/shared/lib/check-text-for-nsfw.js b/shared/lib/check-text-for-nsfw.js index 445e557..1c20820 100644 --- a/shared/lib/check-text-for-nsfw.js +++ b/shared/lib/check-text-for-nsfw.js @@ -1,7 +1,13 @@ 'use strict'; +const escapeStringRegexp = require('escape-string-regexp'); + const NSFW_WORDS = ['nsfw', 'porn', 'nudes', 'sex', '18+']; -const NSFW_REGEXES = NSFW_WORDS.map((word) => new RegExp(`(\\b|_)${word}(\\b|_)`, 'i')); +const NSFW_REGEXES = NSFW_WORDS.map( + // We use `(\b|_|-|\s|^)` instead of just `(\b|_)` because the word boundary doesn't + // match next to the `+` sign in `18+` + (word) => new RegExp(`(\\b|_|-|\\s|^)${escapeStringRegexp(word)}(\\b|_|-|\\s|$)`, 'i') +); // A very basic check for NSFW content that just looks for some keywords in the given // text diff --git a/test/shared/lib/check-text-for-nsfw-tests.js b/test/shared/lib/check-text-for-nsfw-tests.js index 9a19179..3d7ba04 100644 --- a/test/shared/lib/check-text-for-nsfw-tests.js +++ b/test/shared/lib/check-text-for-nsfw-tests.js @@ -13,6 +13,11 @@ describe('checkTextForNsfw', () => { NSFW_foo: true, 'NSFW-foo': true, 'NSFW:foo': true, + '18+ only': true, + // Previous false positives that we ran into in the wild that should not be flagged + // as NSFW + '1888-great-blizzard': false, + 'argon-18-element': false, }).forEach(([inputText, expectedNsfw]) => { it(`should return ${expectedNsfw} for '${inputText}'`, () => { assert.strictEqual(