Set `X-Date-Temporal-Context` header for easy cache rules (#209)

Set `X-Date-Temporal-Context: [past|present|future]` header for easy cache rules:

 - Cache `past` things heavily
 - Cache `present`/`future` things for 5 minutes
 
This accomplishes the goal we set out for:

> - We can cache all responses except for the latest UTC day (and anything in the future). ex. `/!aMzLHLvScQCGKDNqCB:gitter.im/date/2022/10/13`
>    - For the latest day, we could set the cache expire after 5 minutes or so
>
> *-- [Matrix Public Archive deployment issue](https://github.com/vector-im/sre-internal/issues/2079)*

And this way we don't have to do any fancy date parsing and comparison from the URL which is probably not even possible Cloudflare cache rules.
This commit is contained in:
Eric Eastwood 2023-05-04 13:42:59 -05:00 committed by GitHub
parent b70439e95b
commit 9b067f8637
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 127 additions and 8 deletions

View File

@ -0,0 +1,34 @@
'use strict';
const assert = require('assert');
const { getUtcStartOfDayTs } = require('matrix-public-archive-shared/lib/timestamp-utilities');
// `X-Date-Temporal-Context` indicates the temporal context of the content, whether it
// is related to past, present, or future *day*.
//
// This is useful for caching purposes so you can heavily cache past content, but not
// present/future.
function setHeadersForDateTemporalContext({ res, nowTs, comparedToUrlDate: { yyyy, mm, dd } }) {
assert(res);
assert(Number.isInteger(nowTs));
assert(Number.isInteger(yyyy));
assert(Number.isInteger(mm));
assert(Number.isInteger(dd));
// We use the start of the UTC day so we can compare apples to apples with a new date
// constructed with yyyy-mm-dd (no time occured since the start of the day)
const startOfTodayTs = getUtcStartOfDayTs(nowTs);
const compareTs = Date.UTC(yyyy, mm, dd);
let temporalContext = 'present';
if (compareTs < startOfTodayTs) {
temporalContext = 'past';
} else if (compareTs > startOfTodayTs) {
temporalContext = 'future';
}
res.set('X-Date-Temporal-Context', temporalContext);
}
module.exports = setHeadersForDateTemporalContext;

View File

@ -70,7 +70,7 @@ function setHeadersToPreloadAssets(res, pageOptions) {
return `<${scriptUrl}>; rel=modulepreload`;
});
res.append('Link', [].concat(styleLinks, fontLinks, imageLinks, scriptLinks).join(', '));
res.set('Link', [].concat(styleLinks, fontLinks, imageLinks, scriptLinks).join(', '));
}
module.exports = setHeadersToPreloadAssets;

View File

@ -24,6 +24,7 @@ const { removeMe_fetchRoomCreateEventId } = require('../lib/matrix-utils/fetch-r
const getMessagesResponseFromEventId = require('../lib/matrix-utils/get-messages-response-from-event-id');
const renderHydrogenVmRenderScriptToPageHtml = require('../hydrogen-render/render-hydrogen-vm-render-script-to-page-html');
const setHeadersToPreloadAssets = require('../lib/set-headers-to-preload-assets');
const setHeadersForDateTemporalContext = require('../lib/set-headers-for-date-temporal-context');
const MatrixPublicArchiveURLCreator = require('matrix-public-archive-shared/lib/url-creator');
const {
MS_LOOKUP,
@ -764,7 +765,8 @@ router.get(
'archiveMessageLimit needs to be in range [1, 999]. We can only get 1000 messages at a time from Synapse and we need a buffer of at least one to see if there are too many messages on a given day so you can only configure a max of 999. If you need more messages, we will have to implement pagination'
);
const { toTimestamp, timeDefined, secondsDefined } = parseArchiveRangeFromReq(req);
const { toTimestamp, yyyy, mm, dd, timeDefined, secondsDefined } =
parseArchiveRangeFromReq(req);
let precisionFromUrl = TIME_PRECISION_VALUES.none;
if (secondsDefined) {
@ -929,6 +931,18 @@ router.get(
setHeadersToPreloadAssets(res, pageOptions);
// This is useful for caching purposes so you can heavily cache past content, but
// not present/future.
setHeadersForDateTemporalContext({
res,
nowTs,
comparedToUrlDate: {
yyyy,
mm,
dd,
},
});
res.set('Content-Type', 'text/html');
res.send(pageHtml);
})

View File

@ -21,6 +21,10 @@ const {
DIRECTION,
} = require('matrix-public-archive-shared/lib/reference-values');
const { ONE_DAY_IN_MS, ONE_HOUR_IN_MS, ONE_MINUTE_IN_MS, ONE_SECOND_IN_MS } = MS_LOOKUP;
const {
roundUpTimestampToUtcDay,
getUtcStartOfDayTs,
} = require('matrix-public-archive-shared/lib/timestamp-utilities');
const {
getTestClientForAs,
@ -2358,6 +2362,78 @@ describe('matrix-public-archive', () => {
});
});
describe('Ensure setHeadersForDateTemporalContext(...) is being set properly (useful for caching)', async () => {
// We can just use `new Date()` here but this just makes it more obvious what
// our intention is here.
const nowDate = new Date(Date.now());
const testCases = [
{
testName: 'now is present',
archiveDate: nowDate,
expectedTemporalContext: 'present',
},
{
testName: 'start of today is present',
archiveDate: new Date(getUtcStartOfDayTs(nowDate)),
expectedTemporalContext: 'present',
},
{
testName: 'some time today is present',
archiveDate: new Date(
getUtcStartOfDayTs(nowDate) +
12 * ONE_HOUR_IN_MS +
30 * ONE_MINUTE_IN_MS +
30 * ONE_SECOND_IN_MS
),
expectedTemporalContext: 'present',
},
{
testName: 'past is in the past',
archiveDate: new Date('2020-01-01'),
expectedTemporalContext: 'past',
},
];
let roomId;
before(async () => {
const client = await getTestClientForHs(testMatrixServerUrl1);
roomId = await createTestRoom(client);
});
testCases.forEach((testCase) => {
assert(testCase.testName);
assert(testCase.archiveDate);
assert(testCase.expectedTemporalContext);
// Warn if it's close to the end of the UTC day. This test could be a flakey and
// cause a failure if `expectedTemporalContext` was created just before midnight
// (UTC) and we visit the archive after midnight (UTC). The
// `X-Date-Temporal-Context` would read as `past` when we expect `present`.
if (roundUpTimestampToUtcDay(nowDate) - nowDate.getTime() < 30 * 1000 /* 30 seconds */) {
// eslint-disable-next-line no-console
console.warn(
`Test is being run at the end of the UTC day. This could result in a flakey ` +
`failure if \`expectedTemporalContext\` was created just before midnight (UTC) ` +
`and we visit the archive after midnight (UTC). Since ` +
`this is an e2e test we can't control the date/time exactly.`
);
}
it(testCase.testName, async () => {
// Fetch the given page.
archiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(
roomId,
testCase.archiveDate
);
const { res } = await fetchEndpointAsText(archiveUrl);
const dateTemporalContextHeader = res.headers.get('X-Date-Temporal-Context');
assert.strictEqual(dateTemporalContextHeader, testCase.expectedTemporalContext);
});
});
});
describe('Room directory', () => {
it('room search narrows down results', async () => {
const client = await getTestClientForHs(testMatrixServerUrl1);
@ -2626,12 +2702,7 @@ describe('matrix-public-archive', () => {
// from ourselves which may be less faff than this big warning but 🤷 - that's
// kinda like making sure `/timestamp_to_event` works by using
// `/timestamp_to_event`.
const utcMidnightOfNowDay = Date.UTC(
nowDate.getUTCFullYear(),
nowDate.getUTCMonth(),
nowDate.getUTCDate() + 1
);
if (utcMidnightOfNowDay - nowDate.getTime() < 30 * 1000 /* 30 seconds */) {
if (roundUpTimestampToUtcDay(nowDate) - nowDate.getTime() < 30 * 1000 /* 30 seconds */) {
// eslint-disable-next-line no-console
console.warn(
`Test is being run at the end of the UTC day. This could result in a flakey ` +