Jump forward and backward seamlessly (#121)

Fix https://github.com/matrix-org/matrix-public-archive/issues/120
Follow-up to https://github.com/matrix-org/matrix-public-archive/pull/114

 - Uses event permalinking (`?at=$xxx`) to continue the scroll where you should start reading again.
 - When we jump forwards, we make sure that we go a day back to ensure there isn't more than the page limit between where we jumped from and the day so we don't lose any messages in a gap.
This commit is contained in:
Eric Eastwood 2022-11-03 05:06:53 -05:00 committed by GitHub
parent 5bae040d72
commit 026a08a77a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 125 additions and 54 deletions

View File

@ -152,6 +152,7 @@ router.get(
router.get(
'/jump',
// eslint-disable-next-line max-statements
asyncHandler(async function (req, res) {
const roomIdOrAlias = getRoomIdOrAliasFromReq(req);
@ -159,15 +160,13 @@ router.get(
assert(!Number.isNaN(ts), '?ts query parameter must be a number');
const dir = req.query.dir;
assert(['f', 'b'].includes(dir), '?dir query parameter must be [f|b]');
const scrollStartPosition = req.query.continue;
// We have to wait for the room join to happen first before we can use the jump to
// date endpoint
const roomId = await ensureRoomJoined(matrixAccessToken, roomIdOrAlias, req.query.via);
let originServerTs;
let eventIdForTimestamp;
let originServerTs;
try {
// Find the closest day to today with messages
({ eventId: eventIdForTimestamp, originServerTs } = await timestampToEvent({
@ -178,14 +177,15 @@ router.get(
}));
// The goal is to go forward 100 messages, so that when we view the room at that
// point going backwards 100 message, we end up at the perfect sam continuation spot
// in the room.
// point going backwards 100 messages, we end up at the perfect sam continuation
// spot in the room.
//
// XXX: This is flawed in the fact that when we go `/messages?dir=b` it could
// backfill messages which will fill up the response before we perfectly connect and
// continue from the position they were jumping from before. When `/messages?dir=f`
// backfills, we won't have this problem anymore because any messages backfilled in
// the forwards direction would be picked up the same going backwards.
// XXX: This is flawed in the fact that when we go `/messages?dir=b` later, it
// could backfill messages which will fill up the response before we perfectly
// connect and continue from the position they were jumping from before. When
// `/messages?dir=f` backfills, we won't have this problem anymore because any
// messages backfilled in the forwards direction would be picked up the same going
// backwards.
if (dir === 'f') {
// Use `/messages?dir=f` and get the `end` pagination token to paginate from. And
// then start the scroll from the top of the page so they can continue.
@ -198,7 +198,36 @@ router.get(
limit: archiveMessageLimit,
});
originServerTs = messageResData.chunk[messageResData.chunk.length - 1]?.origin_server_ts;
if (!messageResData.chunk?.length) {
throw new StatusError(
404,
`/messages response didn't contain any more messages to jump to`
);
}
const timestampOfLastMessage =
messageResData.chunk[messageResData.chunk.length - 1].origin_server_ts;
const dateOfLastMessage = new Date(timestampOfLastMessage);
// Back track from the last message timestamp to the date boundary. This will
// gurantee some overlap with the previous page we jumped from so we don't lose
// any messages in the gap.
//
// XXX: This date boundary logic may need to change once we introduce hour
// chunks or time slices. For example if we reached into the next day but it has
// too many messages to show for a given page, we would want to back track until
// a suitable time slice boundary. Maybe we need to add a new URL parameter here
// `?time-slice=true` to indicate that it's okay to break it up by time slice
// based on previously having to view by time slice. We wouldn't want to give
const utcMidnightOfDayBefore = Date.UTC(
dateOfLastMessage.getUTCFullYear(),
dateOfLastMessage.getUTCMonth(),
dateOfLastMessage.getUTCDate()
);
// We minus 1 from UTC midnight to get to the day before
const endOfDayBeforeDate = new Date(utcMidnightOfDayBefore - 1);
originServerTs = endOfDayBeforeDate;
}
} catch (err) {
const is404Error = err instanceof HTTPResponseError && err.response.status === 404;
@ -227,7 +256,8 @@ router.get(
// TODO: Add query parameter that causes the client to start the scroll at the top
// when jumping forwards so they can continue reading where they left off.
matrixPublicArchiveURLCreator.archiveUrlForDate(roomIdOrAlias, new Date(originServerTs), {
scrollStartPosition,
// Start the scroll at the next event from where they jumped from (seamless navigation)
scrollStartEventId: eventIdForTimestamp,
})
);
})

View File

@ -661,7 +661,7 @@ describe('matrix-public-archive', () => {
let previousDayToEventMap;
beforeEach(async () => {
// Set this low so we can easily create more than the limit
config.set('archiveMessageLimit', 3);
config.set('archiveMessageLimit', 4);
client = await getTestClientForHs(testMatrixServerUrl1);
roomId = await createTestRoom(client);
@ -670,12 +670,12 @@ describe('matrix-public-archive', () => {
// but don't overflow the limit on a single day basis.
//
// We create 4 days of messages so we can see a seamless continuation from
// page1 to page2. The page limit is 3 but each page will show 4 messages
// because we fetch one extra to determine overflow.
// page1 to page2. The page limit is 4 but each page will show up-to 5
// messages because we fetch one extra to determine overflow.
//
// 1 <-- 2 <-- 3 <-- 4 <-- 5 <-- 6 <-- 7 <-- 8 <-- 9 <-- 10 <-- 11 <-- 12
// [day 1 ] [day 2 ] [day 3 ] [day 4 ]
//
// 1 <-- 2 <-- 3 <-- 4 <-- 5 <-- 6 <-- 7 <-- 8
// [day 1] [day 2] [day 3] [day 4]
// [1st page ] [2nd page ]
previousDayToEventMap = new Map();
for (let i = 1; i < 5; i++) {
// The date should be just past midnight so we don't run into inclusive
@ -688,7 +688,7 @@ describe('matrix-public-archive', () => {
const eventIds = await createMessagesInRoom({
client,
roomId,
numMessages: 2,
numMessages: 3,
prefix: `day ${i} - events in room`,
timestamp: previousArchiveDate.getTime(),
});
@ -697,24 +697,37 @@ describe('matrix-public-archive', () => {
});
it('can jump forward to the next activity', async () => {
// `previousDayToEventMap` maps each day to the events in that day (2 events
// per day). The page limit is 3 but each page will show 4 messages because we
// Test to make sure we can jump from the 1st page to the 2nd page forwards.
//
// `previousDayToEventMap` maps each day to the events in that day (3 events
// per day). The page limit is 4 but each page will show 5 messages because we
// fetch one extra to determine overflow.
//
// 1 <-- 2 <-- 3 <-- 4 <-- 5 <-- 6 <-- 7 <-- 8
// [day 1] [day 2] [day 3] [day 4]
// [1st page ] [2nd page ]
// In order to jump from the 1st page to the 2nd, we first jump forward 4
// messages, then back-track to the first date boundary which is day 3. We do
// this so that we don't start from day 4 backwards which would miss messages
// because there are more than 5 messages in between day 4 and day 2.
//
// Even though there is overlap between the pages, our scroll continues from
// the event where the 1st page starts.
//
// 1 <-- 2 <-- 3 <-- 4 <-- 5 <-- 6 <-- 7 <-- 8 <-- 9 <-- 10 <-- 11 <-- 12
// [day 1 ] [day 2 ] [day 3 ] [day 4 ]
// [1st page ]
// |--jump-fwd-4-messages-->|
// [2nd page ]
const previousArchiveDates = Array.from(previousDayToEventMap.keys());
assert.strictEqual(
previousArchiveDates.length,
4,
`This test expects to work with 4 days of history, each with 2 messages and a page limit of 3 messages previousArchiveDates=${previousArchiveDates}`
`This test expects to work with 4 days of history, each with 3 messages and a page limit of 4 messages previousArchiveDates=${previousArchiveDates}`
);
// Fetch messages for the 1st page (day 2 backwards)
const day2Date = previousArchiveDates[1];
const firstPageArchiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(
roomId,
previousArchiveDates[1]
day2Date
);
// Set this for debugging if the test fails here
archiveUrl = firstPageArchiveUrl;
@ -730,26 +743,37 @@ describe('matrix-public-archive', () => {
return eventId.startsWith('$');
});
// Assert that the first page contains 4 events (day 2 and day 1)
// Assert that the first page contains 5 events (day 2 and day 1)
assert.deepEqual(eventIdsOnFirstPage, [
// All of day 1
...previousDayToEventMap.get(previousArchiveDates[0]),
// Some of day 1
...previousDayToEventMap.get(previousArchiveDates[0]).slice(-2),
// All of day 2
...previousDayToEventMap.get(previousArchiveDates[1]),
]);
// Follow the next activity link. Aka, fetch messages for the 2nd page (day 3
// onwards, seamless continuation from the 1st page).
// Follow the next activity link. Aka, fetch messages for the 2nd page
const nextActivityLinkEl = firstPageDom.document.querySelector(
'[data-testid="jump-to-next-activity-link"]'
);
const nextActivityLink = nextActivityLinkEl.getAttribute('href');
// Set this for debugging if the test fails here
archiveUrl = nextActivityLink;
const { data: nextActivityArchivePageHtml } = await fetchEndpointAsText(nextActivityLink);
const { data: nextActivityArchivePageHtml, res: nextActivityRes } =
await fetchEndpointAsText(nextActivityLink);
const nextActivityDom = parseHTML(nextActivityArchivePageHtml);
// Assert that it's a smooth continuation to more messages with no overlap
// Assert that it's a smooth continuation to more messages
//
// First by checking where the scroll is going to start from
const urlObj = new URL(nextActivityRes.url, basePath);
const qs = new URLSearchParams(urlObj.search);
assert.strictEqual(
qs.get('at'),
// Continuing from the first event of day 3
previousDayToEventMap.get(previousArchiveDates[2])[0]
);
// Then check the events are on the page correctly
const eventIdsOnNextDay = [
...nextActivityDom.document.querySelectorAll(`[data-event-id]`),
]
@ -761,34 +785,42 @@ describe('matrix-public-archive', () => {
return eventId.startsWith('$');
});
// Assert that the 2nd page contains 4 events (day 3 and day 4)
// Assert that the 2nd page contains 5 events (day 3 and day 2)
assert.deepEqual(eventIdsOnNextDay, [
// Some of day 2
...previousDayToEventMap.get(previousArchiveDates[1]).slice(-2),
// All of day 3
...previousDayToEventMap.get(previousArchiveDates[2]),
// All of day 4
...previousDayToEventMap.get(previousArchiveDates[3]),
]);
});
it('can jump backward to the previous activity', async () => {
// `previousDayToEventMap` maps each day to the events in that day (2 events
// per day). The page limit is 3 but each page will show 4 messages because we
// Test to make sure we can jump from the 1st page to the 2nd page backwards
//
// `previousDayToEventMap` maps each day to the events in that day (3 events
// per day). The page limit is 4 but each page will show 5 messages because we
// fetch one extra to determine overflow.
//
// 1 <-- 2 <-- 3 <-- 4 <-- 5 <-- 6 <-- 7 <-- 8
// [day 1] [day 2] [day 3] [day 4]
// [2nd page ] [1st page ]
// The 2nd page continues from the *day* where the 1st page starts. Even
// though there is overlap between the pages, our scroll continues from the
// event where the 1st page starts.
//
// 1 <-- 2 <-- 3 <-- 4 <-- 5 <-- 6 <-- 7 <-- 8 <-- 9 <-- 10 <-- 11 <-- 12
// [day 1 ] [day 2 ] [day 3 ] [day 4 ]
// [1st page ]
// [2nd page ]
const previousArchiveDates = Array.from(previousDayToEventMap.keys());
assert.strictEqual(
previousArchiveDates.length,
4,
`This test expects to work with 4 days of history, each with 2 messages and a page limit of 3 messages previousArchiveDates=${previousArchiveDates}`
`This test expects to work with 4 days of history, each with 3 messages and a page limit of 4 messages previousArchiveDates=${previousArchiveDates}`
);
// Fetch messages for the 1st page (day 4 backwards)
// Fetch messages for the 1st page (day 3 backwards)
const day3Date = previousArchiveDates[2];
const firstPageArchiveUrl = matrixPublicArchiveURLCreator.archiveUrlForDate(
roomId,
previousArchiveDates[3]
day3Date
);
// Set this for debugging if the test fails here
archiveUrl = firstPageArchiveUrl;
@ -804,28 +836,37 @@ describe('matrix-public-archive', () => {
return eventId.startsWith('$');
});
// Assert that the first page contains 4 events (day 4 and day 3)
// Assert that the first page contains 4 events (day 3 and day 2)
assert.deepEqual(eventIdsOnFirstPage, [
// Some of day 2
...previousDayToEventMap.get(previousArchiveDates[1]).slice(-2),
// All of day 3
...previousDayToEventMap.get(previousArchiveDates[2]),
// All of day 4
...previousDayToEventMap.get(previousArchiveDates[3]),
]);
// Follow the previous activity link. Aka, fetch messages for the 2nd page (day 2
// backwards, seamless continuation from the 1st page).
// Follow the previous activity link
const previousActivityLinkEl = firstPageDom.document.querySelector(
'[data-testid="jump-to-previous-activity-link"]'
);
const previousActivityLink = previousActivityLinkEl.getAttribute('href');
// Set this for debugging if the test fails here
archiveUrl = previousActivityLink;
const { data: previousActivityArchivePageHtml } = await fetchEndpointAsText(
previousActivityLink
);
const { data: previousActivityArchivePageHtml, res: previousActivityRes } =
await fetchEndpointAsText(previousActivityLink);
const previousActivityDom = parseHTML(previousActivityArchivePageHtml);
// Assert that it's a smooth continuation to more messages with no overlap
// Assert that it's a smooth continuation to more messages
//
// First by checking where the scroll is going to start from
const urlObj = new URL(previousActivityRes.url, basePath);
const qs = new URLSearchParams(urlObj.search);
assert.strictEqual(
qs.get('at'),
// Continuing from the first event of day 2
previousDayToEventMap.get(previousArchiveDates[1])[0]
);
// Then check the events are on the page correctly
const eventIdsOnPreviousDay = [
...previousActivityDom.document.querySelectorAll(`[data-event-id]`),
]
@ -840,7 +881,7 @@ describe('matrix-public-archive', () => {
// Assert that the 2nd page contains 4 events (day 2 and day 1)
assert.deepEqual(eventIdsOnPreviousDay, [
// All of day 1
...previousDayToEventMap.get(previousArchiveDates[0]),
...previousDayToEventMap.get(previousArchiveDates[0]).slice(-2),
// All of day 2
...previousDayToEventMap.get(previousArchiveDates[1]),
]);