From 9b067f8637e71992400a908eab71c1050c68be74 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 May 2023 13:42:59 -0500 Subject: [PATCH] Set `X-Date-Temporal-Context` header for easy cache rules (#209) Set `X-Date-Temporal-Context: [past|present|future]` header for easy cache rules: - Cache `past` things heavily - Cache `present`/`future` things for 5 minutes This accomplishes the goal we set out for: > - We can cache all responses except for the latest UTC day (and anything in the future). ex. `/!aMzLHLvScQCGKDNqCB:gitter.im/date/2022/10/13` > - For the latest day, we could set the cache expire after 5 minutes or so > > *-- [Matrix Public Archive deployment issue](https://github.com/vector-im/sre-internal/issues/2079)* And this way we don't have to do any fancy date parsing and comparison from the URL which is probably not even possible Cloudflare cache rules. --- .../set-headers-for-date-temporal-context.js | 34 ++++++++ server/lib/set-headers-to-preload-assets.js | 2 +- server/routes/room-routes.js | 16 +++- test/e2e-tests.js | 83 +++++++++++++++++-- 4 files changed, 127 insertions(+), 8 deletions(-) create mode 100644 server/lib/set-headers-for-date-temporal-context.js diff --git a/server/lib/set-headers-for-date-temporal-context.js b/server/lib/set-headers-for-date-temporal-context.js new file mode 100644 index 0000000..a72f63f --- /dev/null +++ b/server/lib/set-headers-for-date-temporal-context.js @@ -0,0 +1,34 @@ +'use strict'; + +const assert = require('assert'); + +const { getUtcStartOfDayTs } = require('matrix-public-archive-shared/lib/timestamp-utilities'); + +// `X-Date-Temporal-Context` indicates the temporal context of the content, whether it +// is related to past, present, or future *day*. +// +// This is useful for caching purposes so you can heavily cache past content, but not +// present/future. +function setHeadersForDateTemporalContext({ res, nowTs, comparedToUrlDate: { yyyy, mm, dd } }) { + assert(res); + assert(Number.isInteger(nowTs)); + assert(Number.isInteger(yyyy)); + assert(Number.isInteger(mm)); + assert(Number.isInteger(dd)); + + // We use the start of the UTC day so we can compare apples to apples with a new date + // constructed with yyyy-mm-dd (no time occured since the start of the day) + const startOfTodayTs = getUtcStartOfDayTs(nowTs); + const compareTs = Date.UTC(yyyy, mm, dd); + + let temporalContext = 'present'; + if (compareTs < startOfTodayTs) { + temporalContext = 'past'; + } else if (compareTs > startOfTodayTs) { + temporalContext = 'future'; + } + + res.set('X-Date-Temporal-Context', temporalContext); +} + +module.exports = setHeadersForDateTemporalContext; diff --git a/server/lib/set-headers-to-preload-assets.js b/server/lib/set-headers-to-preload-assets.js index 9a173f7..2732992 100644 --- a/server/lib/set-headers-to-preload-assets.js +++ b/server/lib/set-headers-to-preload-assets.js @@ -70,7 +70,7 @@ function setHeadersToPreloadAssets(res, pageOptions) { return `<${scriptUrl}>; rel=modulepreload`; }); - res.append('Link', [].concat(styleLinks, fontLinks, imageLinks, scriptLinks).join(', ')); + res.set('Link', [].concat(styleLinks, fontLinks, imageLinks, scriptLinks).join(', ')); } module.exports = setHeadersToPreloadAssets; diff --git a/server/routes/room-routes.js b/server/routes/room-routes.js index 4cc6d11..7edbe32 100644 --- a/server/routes/room-routes.js +++ b/server/routes/room-routes.js @@ -24,6 +24,7 @@ const { removeMe_fetchRoomCreateEventId } = require('../lib/matrix-utils/fetch-r const getMessagesResponseFromEventId = require('../lib/matrix-utils/get-messages-response-from-event-id'); const renderHydrogenVmRenderScriptToPageHtml = require('../hydrogen-render/render-hydrogen-vm-render-script-to-page-html'); const setHeadersToPreloadAssets = require('../lib/set-headers-to-preload-assets'); +const setHeadersForDateTemporalContext = require('../lib/set-headers-for-date-temporal-context'); const MatrixPublicArchiveURLCreator = require('matrix-public-archive-shared/lib/url-creator'); const { MS_LOOKUP, @@ -764,7 +765,8 @@ router.get( 'archiveMessageLimit needs to be in range [1, 999]. We can only get 1000 messages at a time from Synapse and we need a buffer of at least one to see if there are too many messages on a given day so you can only configure a max of 999. If you need more messages, we will have to implement pagination' ); - const { toTimestamp, timeDefined, secondsDefined } = parseArchiveRangeFromReq(req); + const { toTimestamp, yyyy, mm, dd, timeDefined, secondsDefined } = + parseArchiveRangeFromReq(req); let precisionFromUrl = TIME_PRECISION_VALUES.none; if (secondsDefined) { @@ -929,6 +931,18 @@ router.get( setHeadersToPreloadAssets(res, pageOptions); + // This is useful for caching purposes so you can heavily cache past content, but + // not present/future. + setHeadersForDateTemporalContext({ + res, + nowTs, + comparedToUrlDate: { + yyyy, + mm, + dd, + }, + }); + res.set('Content-Type', 'text/html'); res.send(pageHtml); }) diff --git a/test/e2e-tests.js b/test/e2e-tests.js index 47fd8eb..660c51c 100644 --- a/test/e2e-tests.js +++ b/test/e2e-tests.js @@ -21,6 +21,10 @@ const { DIRECTION, } = require('matrix-public-archive-shared/lib/reference-values'); const { ONE_DAY_IN_MS, ONE_HOUR_IN_MS, ONE_MINUTE_IN_MS, ONE_SECOND_IN_MS } = MS_LOOKUP; +const { + roundUpTimestampToUtcDay, + getUtcStartOfDayTs, +} = require('matrix-public-archive-shared/lib/timestamp-utilities'); const { getTestClientForAs, @@ -2358,6 +2362,78 @@ describe('matrix-public-archive', () => { }); }); + describe('Ensure setHeadersForDateTemporalContext(...) is being set properly (useful for caching)', async () => { + // We can just use `new Date()` here but this just makes it more obvious what + // our intention is here. + const nowDate = new Date(Date.now()); + + const testCases = [ + { + testName: 'now is present', + archiveDate: nowDate, + expectedTemporalContext: 'present', + }, + { + testName: 'start of today is present', + archiveDate: new Date(getUtcStartOfDayTs(nowDate)), + expectedTemporalContext: 'present', + }, + { + testName: 'some time today is present', + archiveDate: new Date( + getUtcStartOfDayTs(nowDate) + + 12 * ONE_HOUR_IN_MS + + 30 * ONE_MINUTE_IN_MS + + 30 * ONE_SECOND_IN_MS + ), + expectedTemporalContext: 'present', + }, + { + testName: 'past is in the past', + archiveDate: new Date('2020-01-01'), + expectedTemporalContext: 'past', + }, + ]; + + let roomId; + before(async () => { + const client = await getTestClientForHs(testMatrixServerUrl1); + roomId = await createTestRoom(client); + }); + + testCases.forEach((testCase) => { + assert(testCase.testName); + assert(testCase.archiveDate); + assert(testCase.expectedTemporalContext); + + // Warn if it's close to the end of the UTC day. This test could be a flakey and + // cause a failure if `expectedTemporalContext` was created just before midnight + // (UTC) and we visit the archive after midnight (UTC). The + // `X-Date-Temporal-Context` would read as `past` when we expect `present`. + if (roundUpTimestampToUtcDay(nowDate) - nowDate.getTime() < 30 * 1000 /* 30 seconds */) { + // eslint-disable-next-line no-console + console.warn( + `Test is being run at the end of the UTC day. This could result in a flakey ` + + `failure if \`expectedTemporalContext\` was created just before midnight (UTC) ` + + `and we visit the archive after midnight (UTC). Since ` + + `this is an e2e test we can't control the date/time exactly.` + ); + } + + it(testCase.testName, async () => { + // Fetch the given page. + archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate( + roomId, + testCase.archiveDate + ); + const { res } = await fetchEndpointAsText(archiveUrl); + + const dateTemporalContextHeader = res.headers.get('X-Date-Temporal-Context'); + assert.strictEqual(dateTemporalContextHeader, testCase.expectedTemporalContext); + }); + }); + }); + describe('Room directory', () => { it('room search narrows down results', async () => { const client = await getTestClientForHs(testMatrixServerUrl1); @@ -2626,12 +2702,7 @@ describe('matrix-public-archive', () => { // from ourselves which may be less faff than this big warning but 🤷 - that's // kinda like making sure `/timestamp_to_event` works by using // `/timestamp_to_event`. - const utcMidnightOfNowDay = Date.UTC( - nowDate.getUTCFullYear(), - nowDate.getUTCMonth(), - nowDate.getUTCDate() + 1 - ); - if (utcMidnightOfNowDay - nowDate.getTime() < 30 * 1000 /* 30 seconds */) { + if (roundUpTimestampToUtcDay(nowDate) - nowDate.getTime() < 30 * 1000 /* 30 seconds */) { // eslint-disable-next-line no-console console.warn( `Test is being run at the end of the UTC day. This could result in a flakey ` +