Use `rel=canonical` link to de-duplicate event permalinks (#266)

Fix https://github.com/matrix-org/matrix-public-archive/issues/251
This commit is contained in:
Eric Eastwood 2023-06-22 01:50:55 -05:00 committed by GitHub
parent 3414fcf7b2
commit 0f522bed20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 24 additions and 6 deletions

View File

@ -69,14 +69,14 @@ async function _renderHydrogenToStringUnsafe(renderOptions) {
assert(renderOptions.vmRenderScriptFilePath);
assert(renderOptions.vmRenderContext);
assert(renderOptions.pageOptions);
assert(renderOptions.pageOptions.locationHref);
assert(renderOptions.pageOptions.locationUrl);
assert(renderOptions.pageOptions.cspNonce);
const { dom, vmContext } = createDomAndSetupVmContext();
// A small `window.location` stub
if (!dom.window.location) {
const locationUrl = new URL(renderOptions.pageOptions.locationHref);
const locationUrl = new URL(renderOptions.pageOptions.locationUrl);
dom.window.location = {};
[
'hash',

View File

@ -71,6 +71,11 @@ function renderPageHtml({
metaImageUrl = pageOptions.imageUrl;
}
let maybeRelCanonical = '';
if (pageOptions.canonicalUrl) {
maybeRelCanonical = sanitizeHtml(`<link rel="canonical" href="${pageOptions.canonicalUrl}">`);
}
const pageHtml = `
<!doctype html>
<html lang="en">
@ -83,6 +88,7 @@ function renderPageHtml({
${sanitizeHtml(`<meta property="og:image" content="${metaImageUrl}">`)}
<link rel="icon" href="${pageAssetUrls.faviconIco}" sizes="any">
<link rel="icon" href="${pageAssetUrls.faviconSvg}" type="image/svg+xml">
${maybeRelCanonical}
${styles
.map(
(styleUrl) =>

View File

@ -86,7 +86,7 @@ async function timeoutMiddleware(req, res, next) {
title: `Server timeout - Matrix Public Archive`,
description: `Unable to respond in time (${requestTimeoutMs / 1000}s)`,
entryPoint: 'client/js/entry-client-timeout.js',
locationHref: urlJoin(basePath, req.originalUrl),
locationUrl: urlJoin(basePath, req.originalUrl),
// We don't have a Matrix room so we don't know whether or not to index. Just choose
// a safe-default of false.
shouldIndex: false,

View File

@ -17,7 +17,7 @@ function clientSideRoomAliasHashRedirectRoute(req, res) {
title: `Page not found - Matrix Public Archive`,
description: `This page does not exist but we may be able to redirect you to the right place.`,
entryPoint: 'client/js/entry-client-room-alias-hash-redirect.js',
locationHref: urlJoin(basePath, req.originalUrl),
locationUrl: urlJoin(basePath, req.originalUrl),
// We don't have a Matrix room so we don't know whether or not to index. Just choose
// a safe-default of false.
shouldIndex: false,

View File

@ -78,7 +78,7 @@ router.get(
description:
'Browse thousands of rooms using Matrix. The new portal into the Matrix ecosystem.',
entryPoint: 'client/js/entry-client-room-directory.js',
locationHref: urlJoin(basePath, req.originalUrl),
locationUrl: urlJoin(basePath, req.originalUrl),
shouldIndex,
cspNonce: res.locals.cspNonce,
};

View File

@ -916,7 +916,19 @@ router.get(
}),
blockedBySafeSearch: isNsfw,
entryPoint: 'client/js/entry-client-hydrogen.js',
locationHref: urlJoin(basePath, req.originalUrl),
locationUrl: urlJoin(basePath, req.originalUrl),
canonicalUrl: matrixPublicArchiveURLCreator.archiveUrlForDate(
roomIdOrAlias,
new Date(toTimestamp),
{
preferredPrecision: precisionFromUrl,
// We purposely omit `scrollStartEventId` here because the canonical location
// for any given event ID is the page it resides on.
//
// We can avoid passing along the `viaServers` because we already joined the
// room above (see `ensureRoomJoined`).
}
),
shouldIndex,
cspNonce: res.locals.cspNonce,
};