2022-09-08 00:30:04 -06:00
'use strict' ;
const assert = require ( 'assert' ) ;
const path = require ( 'path' ) ;
const urlJoin = require ( 'url-join' ) ;
const express = require ( 'express' ) ;
const asyncHandler = require ( '../lib/express-async-handler' ) ;
const StatusError = require ( '../lib/status-error' ) ;
const timeoutMiddleware = require ( './timeout-middleware' ) ;
2022-10-27 00:09:13 -06:00
const redirectToCorrectArchiveUrlIfBadSigil = require ( './redirect-to-correct-archive-url-if-bad-sigil-middleware' ) ;
2022-09-08 00:30:04 -06:00
2022-11-02 03:27:30 -06:00
const { HTTPResponseError } = require ( '../lib/fetch-endpoint' ) ;
2022-09-08 00:30:04 -06:00
const fetchRoomData = require ( '../lib/matrix-utils/fetch-room-data' ) ;
2022-09-20 15:02:09 -06:00
const fetchEventsFromTimestampBackwards = require ( '../lib/matrix-utils/fetch-events-from-timestamp-backwards' ) ;
2022-09-08 00:30:04 -06:00
const ensureRoomJoined = require ( '../lib/matrix-utils/ensure-room-joined' ) ;
2022-09-08 01:18:18 -06:00
const timestampToEvent = require ( '../lib/matrix-utils/timestamp-to-event' ) ;
2022-11-02 03:27:30 -06:00
const getMessagesResponseFromEventId = require ( '../lib/matrix-utils/get-messages-response-from-event-id' ) ;
2022-09-08 00:30:04 -06:00
const renderHydrogenVmRenderScriptToPageHtml = require ( '../hydrogen-render/render-hydrogen-vm-render-script-to-page-html' ) ;
2022-09-08 01:18:18 -06:00
const MatrixPublicArchiveURLCreator = require ( 'matrix-public-archive-shared/lib/url-creator' ) ;
2023-04-05 03:25:31 -06:00
const {
MS _LOOKUP ,
TIME _PRECISION _VALUES ,
DIRECTION ,
} = require ( 'matrix-public-archive-shared/lib/reference-values' ) ;
const { ONE _DAY _IN _MS , ONE _HOUR _IN _MS , ONE _MINUTE _IN _MS , ONE _SECOND _IN _MS } = MS _LOOKUP ;
const {
roundUpTimestampToUtcDay ,
roundUpTimestampToUtcHour ,
roundUpTimestampToUtcMinute ,
roundUpTimestampToUtcSecond ,
getUtcStartOfDayTs ,
getUtcStartOfHourTs ,
getUtcStartOfMinuteTs ,
getUtcStartOfSecondTs ,
areTimestampsFromSameUtcDay ,
areTimestampsFromSameUtcHour ,
areTimestampsFromSameUtcMinute ,
areTimestampsFromSameUtcSecond ,
} = require ( 'matrix-public-archive-shared/lib/timestamp-utilities' ) ;
2022-09-08 00:30:04 -06:00
const config = require ( '../lib/config' ) ;
const basePath = config . get ( 'basePath' ) ;
assert ( basePath ) ;
const matrixServerUrl = config . get ( 'matrixServerUrl' ) ;
assert ( matrixServerUrl ) ;
const matrixAccessToken = config . get ( 'matrixAccessToken' ) ;
assert ( matrixAccessToken ) ;
2022-11-08 21:41:58 -07:00
const stopSearchEngineIndexing = config . get ( 'stopSearchEngineIndexing' ) ;
2022-09-08 00:30:04 -06:00
2022-09-08 01:18:18 -06:00
const matrixPublicArchiveURLCreator = new MatrixPublicArchiveURLCreator ( basePath ) ;
2022-09-08 00:30:04 -06:00
const router = express . Router ( {
caseSensitive : true ,
// Preserve the req.params values from the parent router.
mergeParams : true ,
} ) ;
2022-10-27 00:09:13 -06:00
const VALID _ENTITY _DESCRIPTOR _TO _SIGIL _MAP = {
r : '#' ,
roomid : '!' ,
} ;
const validSigilList = Object . values ( VALID _ENTITY _DESCRIPTOR _TO _SIGIL _MAP ) ;
const sigilRe = new RegExp ( ` ^( ${ validSigilList . join ( '|' ) } ) ` ) ;
2023-04-05 03:25:31 -06:00
function getErrorStringForTooManyMessages ( archiveMessageLimit ) {
const message =
` Too many messages were sent all within a second for us to display ` +
` (more than ${ archiveMessageLimit } in one second). We're unable to redirect you to ` +
` a smaller time range to view them without losing a few between each page. ` +
` Since this is probably pretty rare, we've decided not to support it for now. ` ;
return message ;
}
2022-10-27 00:09:13 -06:00
function getRoomIdOrAliasFromReq ( req ) {
const entityDescriptor = req . params . entityDescriptor ;
// This could be with or with our without the sigil. Although the correct thing here
// is to have no sigil. We will try to correct it for them in any case.
const roomIdOrAliasDirty = req . params . roomIdOrAliasDirty ;
const roomIdOrAliasWithoutSigil = roomIdOrAliasDirty . replace ( sigilRe , '' ) ;
const sigil = VALID _ENTITY _DESCRIPTOR _TO _SIGIL _MAP [ entityDescriptor ] ;
if ( ! sigil ) {
throw new Error (
` Unknown entityDescriptor= ${ entityDescriptor } has no sigil. This is an error with the Matrix Public Archive itself (please open an issue). `
) ;
}
return ` ${ sigil } ${ roomIdOrAliasWithoutSigil } ` ;
}
2023-04-05 03:25:31 -06:00
// eslint-disable-next-line max-statements, complexity
2022-09-08 00:30:04 -06:00
function parseArchiveRangeFromReq ( req ) {
const yyyy = parseInt ( req . params . yyyy , 10 ) ;
// Month is the only zero-based index in this group
const mm = parseInt ( req . params . mm , 10 ) - 1 ;
const dd = parseInt ( req . params . dd , 10 ) ;
2023-04-05 03:25:31 -06:00
const timeString = req . params . time ;
let timeInMs = 0 ;
let timeDefined = false ;
let secondsDefined = false ;
if ( timeString ) {
const timeMatches = timeString . match ( /^T(\d\d?):(\d\d?)(?::(\d\d?))?$/ ) ;
2022-09-08 00:30:04 -06:00
2023-04-05 03:25:31 -06:00
if ( ! timeMatches ) {
throw new StatusError (
404 ,
'Time was unable to be parsed from URL. It should be in 24-hour format 23:59:59'
) ;
2022-09-08 00:30:04 -06:00
}
2023-04-05 03:25:31 -06:00
const hour = timeMatches [ 1 ] && parseInt ( timeMatches [ 1 ] , 10 ) ;
const minute = timeMatches [ 2 ] && parseInt ( timeMatches [ 2 ] , 10 ) ;
const second = timeMatches [ 3 ] ? parseInt ( timeMatches [ 3 ] , 10 ) : 0 ;
timeDefined = ! ! timeMatches ;
// Whether the timestamp included seconds
secondsDefined = ! ! timeMatches [ 3 ] ;
2022-09-08 00:30:04 -06:00
2023-04-05 03:25:31 -06:00
if ( Number . isNaN ( hour ) || hour < 0 || hour > 23 ) {
throw new StatusError ( 404 , ` Hour can only be in range 0-23 -> ${ hour } ` ) ;
}
if ( Number . isNaN ( minute ) || minute < 0 || minute > 59 ) {
throw new StatusError ( 404 , ` Minute can only be in range 0-59 -> ${ minute } ` ) ;
}
if ( Number . isNaN ( second ) || second < 0 || second > 59 ) {
throw new StatusError ( 404 , ` Second can only be in range 0-59 -> ${ second } ` ) ;
2022-09-08 00:30:04 -06:00
}
2023-04-05 03:25:31 -06:00
const hourInMs = hour * ONE _HOUR _IN _MS ;
const minuteInMs = minute * ONE _MINUTE _IN _MS ;
const secondInMs = second * ONE _SECOND _IN _MS ;
timeInMs = hourInMs + minuteInMs + secondInMs ;
2022-09-08 00:30:04 -06:00
}
2023-04-05 03:25:31 -06:00
let toTimestamp ;
if ( timeInMs ) {
const startOfDayTimestamp = Date . UTC ( yyyy , mm , dd ) ;
toTimestamp = startOfDayTimestamp + timeInMs ;
}
// If no time specified, then we assume end-of-day
else {
// We `- 1` from UTC midnight to get the timestamp that is a millisecond before the
// next day T23:59:59.999
toTimestamp = Date . UTC ( yyyy , mm , dd + 1 ) - 1 ;
2022-09-08 00:30:04 -06:00
}
return {
toTimestamp ,
yyyy ,
mm ,
dd ,
2023-04-05 03:25:31 -06:00
// Whether the req included time `T23:59`
timeDefined ,
// Whether the req included seconds in the time `T23:59:59`
secondsDefined ,
2022-09-08 00:30:04 -06:00
} ;
}
2022-10-27 00:09:13 -06:00
router . use ( redirectToCorrectArchiveUrlIfBadSigil ) ;
2022-09-08 01:18:18 -06:00
router . get (
'/' ,
asyncHandler ( async function ( req , res ) {
2022-10-27 00:09:13 -06:00
const roomIdOrAlias = getRoomIdOrAliasFromReq ( req ) ;
2022-09-08 01:18:18 -06:00
// In case we're joining a new room for the first time,
2022-09-15 19:41:55 -06:00
// let's avoid redirecting to our join event by getting
// the time before we join and looking backwards.
2022-09-08 01:18:18 -06:00
const dateBeforeJoin = Date . now ( ) ;
// We have to wait for the room join to happen first before we can fetch
// any of the additional room info or messages.
2022-10-27 00:09:13 -06:00
const roomId = await ensureRoomJoined ( matrixAccessToken , roomIdOrAlias , req . query . via ) ;
2022-09-08 01:18:18 -06:00
2023-04-05 03:25:31 -06:00
// Find the closest day to the current time with messages
2022-09-08 01:18:18 -06:00
const { originServerTs } = await timestampToEvent ( {
accessToken : matrixAccessToken ,
2022-10-27 00:09:13 -06:00
roomId ,
2022-09-08 01:18:18 -06:00
ts : dateBeforeJoin ,
2023-04-05 03:25:31 -06:00
direction : DIRECTION . backward ,
2022-09-08 01:18:18 -06:00
} ) ;
if ( ! originServerTs ) {
2022-09-15 19:41:55 -06:00
throw new StatusError ( 404 , 'Unable to find day with history' ) ;
2022-09-08 01:18:18 -06:00
}
// Redirect to a day with messages
res . redirect (
matrixPublicArchiveURLCreator . archiveUrlForDate ( roomIdOrAlias , new Date ( originServerTs ) , {
2022-10-27 00:09:13 -06:00
// We can avoid passing along the `via` query parameter because we already
// joined the room above (see `ensureRoomJoined`).
//
//viaServers: req.query.via,
2022-09-08 01:18:18 -06:00
} )
) ;
} )
) ;
2022-09-08 00:30:04 -06:00
router . get (
'/event/:eventId' ,
asyncHandler ( async function ( req , res ) {
// TODO: Fetch event to get `origin_server_ts` and redirect to
// /!roomId/2022/01/01?at=$eventId
res . send ( 'todo' ) ;
} )
) ;
2022-09-20 15:02:09 -06:00
router . get (
'/jump' ,
2023-04-05 03:25:31 -06:00
// eslint-disable-next-line max-statements, complexity
2022-09-20 15:02:09 -06:00
asyncHandler ( async function ( req , res ) {
2022-10-27 00:09:13 -06:00
const roomIdOrAlias = getRoomIdOrAliasFromReq ( req ) ;
2022-09-20 15:02:09 -06:00
2023-04-05 03:25:31 -06:00
const currentRangeStartTs = parseInt ( req . query . currentRangeStartTs , 10 ) ;
assert (
! Number . isNaN ( currentRangeStartTs ) ,
'?currentRangeStartTs query parameter must be a number'
) ;
const currentRangeEndTs = parseInt ( req . query . currentRangeEndTs , 10 ) ;
assert ( ! Number . isNaN ( currentRangeEndTs ) , '?currentRangeEndTs query parameter must be a number' ) ;
2022-09-20 15:02:09 -06:00
const dir = req . query . dir ;
2023-04-05 03:25:31 -06:00
assert (
[ DIRECTION . forward , DIRECTION . backward ] . includes ( dir ) ,
'?dir query parameter must be [f|b]'
) ;
let ts ;
if ( dir === DIRECTION . backward ) {
// We `- 1` so we don't jump to the same event because the endpoint is inclusive.
//
// XXX: This is probably an edge-case flaw when there could be multiple events at
// the same timestamp
ts = currentRangeStartTs - 1 ;
} else if ( dir === DIRECTION . forward ) {
// We `+ 1` so we don't jump to the same event because the endpoint is inclusive
//
// XXX: This is probably an edge-case flaw when there could be multiple events at
// the same timestamp
ts = currentRangeEndTs + 1 ;
} else {
throw new Error ( ` Unable to handle unknown dir= ${ dir } in /jump ` ) ;
}
2022-09-20 15:02:09 -06:00
2022-10-27 00:09:13 -06:00
// We have to wait for the room join to happen first before we can use the jump to
// date endpoint
const roomId = await ensureRoomJoined ( matrixAccessToken , roomIdOrAlias , req . query . via ) ;
2023-04-05 03:25:31 -06:00
let eventIdForClosestEvent ;
let tsForClosestEvent ;
let newOriginServerTs ;
let preferredPrecision = null ;
2022-11-02 03:27:30 -06:00
try {
2023-04-05 03:25:31 -06:00
// We pull this fresh from the config for each request to ensure we have an
// updated value between each e2e test
const archiveMessageLimit = config . get ( 'archiveMessageLimit' ) ;
// Find the closest event to the given timestamp
( { eventId : eventIdForClosestEvent , originServerTs : tsForClosestEvent } =
await timestampToEvent ( {
accessToken : matrixAccessToken ,
roomId ,
ts : ts ,
direction : dir ,
} ) ) ;
2022-11-02 03:27:30 -06:00
2023-04-05 03:25:31 -06:00
// Based on what we found was the closest, figure out the URL that will represent
// the next chunk in the desired direction.
// ==============================
//
// When jumping backwards, since a given room archive URL represents the end of
// the day/time-period looking backward (scroll is also anchored to the bottom),
// we just need to get the user to the previous time-period.
//
// We are trying to avoid sending the user to the same time period they were just
// viewing. i.e, if they were visiting `/2020/01/02T16:00:00` (displays messages
// backwards from that time up to the limit), which had more messages than we
// could display in that day, jumping backwards from the earliest displayed event
// in the displayed range, say `T12:00:05` would still give us the same day
// `/2020/01/02` and we want to redirect them to previous chunk from that same
// day, like `/2020/01/02T12:00:00`
if ( dir === DIRECTION . backward ) {
const fromSameDay =
tsForClosestEvent && areTimestampsFromSameUtcDay ( currentRangeEndTs , tsForClosestEvent ) ;
const fromSameHour =
tsForClosestEvent && areTimestampsFromSameUtcHour ( currentRangeEndTs , tsForClosestEvent ) ;
const fromSameMinute =
tsForClosestEvent && areTimestampsFromSameUtcMinute ( currentRangeEndTs , tsForClosestEvent ) ;
const fromSameSecond =
tsForClosestEvent && areTimestampsFromSameUtcSecond ( currentRangeEndTs , tsForClosestEvent ) ;
// The closest event is from the same second we tried to jump from. Since we
// can't represent something smaller than a second in the URL yet (we could do
// ms but it's a concious choice to make the URL cleaner,
// #support-ms-time-slice), we will need to just return the timestamp with a
// precision of seconds and hope that there isn't too many messages in this same
// second.
//
// XXX: If there is too many messages all within the same second, people will be
// stuck visiting the same page over and over every time they try to jump
// backwards from that range.
if ( fromSameSecond ) {
newOriginServerTs = tsForClosestEvent ;
preferredPrecision = TIME _PRECISION _VALUES . seconds ;
}
// The closest event is from the same minute we tried to jump from, we will need
// to round up to the nearest second so that the URL encompasses the closest
// event looking backwards
else if ( fromSameMinute ) {
newOriginServerTs = roundUpTimestampToUtcSecond ( tsForClosestEvent ) ;
preferredPrecision = TIME _PRECISION _VALUES . seconds ;
}
// The closest event is from the same hour we tried to jump from, we will need
// to round up to the nearest minute so that the URL encompasses the closest
// event looking backwards
else if ( fromSameHour ) {
newOriginServerTs = roundUpTimestampToUtcMinute ( tsForClosestEvent ) ;
preferredPrecision = TIME _PRECISION _VALUES . minutes ;
}
// The closest event is from the same day we tried to jump from, we will need to
// round up to the nearest hour so that the URL encompasses the closest event
// looking backwards
else if ( fromSameDay ) {
newOriginServerTs = roundUpTimestampToUtcHour ( tsForClosestEvent ) ;
preferredPrecision = TIME _PRECISION _VALUES . minutes ;
}
// We don't need to do anything. The next closest event is far enough away
// (greater than 1 day) where we don't need to worry about the URL at all and
// can just render whatever day that the closest event is from because the
// archives biggest time-period represented in the URL is a day.
//
// We can display more than a day of content at a given URL (imagine lots of a
// quiet days in a room), but the URL will never represent a time-period
// greater than a day, ex. `/2023/01/01`. We don't allow someone to just
// specify the month like `/2023/01` ❌
else {
newOriginServerTs = tsForClosestEvent ;
}
}
// When jumping forwards, the goal is to go forward 100 messages, so that when we
// view the room at that point going backwards 100 messages (which is how the
// archive works for any given date from the archive URL), we end up at the
// perfect continuation spot in the room (seamless).
2022-11-02 03:27:30 -06:00
//
2022-11-03 04:06:53 -06:00
// XXX: This is flawed in the fact that when we go `/messages?dir=b` later, it
// could backfill messages which will fill up the response before we perfectly
// connect and continue from the position they were jumping from before. When
// `/messages?dir=f` backfills, we won't have this problem anymore because any
// messages backfilled in the forwards direction would be picked up the same going
// backwards.
2023-04-05 03:25:31 -06:00
if ( dir === DIRECTION . forward ) {
2022-11-02 03:27:30 -06:00
// Use `/messages?dir=f` and get the `end` pagination token to paginate from. And
// then start the scroll from the top of the page so they can continue.
2023-04-05 03:25:31 -06:00
//
// XXX: It would be cool to somehow cache this response and re-use our work here
// for the actual room display that we redirect to from this route. No need for
// us go out 100 messages, only for us to go backwards 100 messages again in the
// next route.
2022-11-02 03:27:30 -06:00
const messageResData = await getMessagesResponseFromEventId ( {
accessToken : matrixAccessToken ,
roomId ,
2023-04-05 03:25:31 -06:00
eventId : eventIdForClosestEvent ,
dir : DIRECTION . forward ,
2022-11-02 03:27:30 -06:00
limit : archiveMessageLimit ,
} ) ;
2022-11-03 04:06:53 -06:00
if ( ! messageResData . chunk ? . length ) {
throw new StatusError (
404 ,
2023-04-05 03:25:31 -06:00
` /jump?dir= ${ dir } : /messages response didn't contain any more messages to jump to `
2022-11-03 04:06:53 -06:00
) ;
}
const timestampOfLastMessage =
messageResData . chunk [ messageResData . chunk . length - 1 ] . origin _server _ts ;
const dateOfLastMessage = new Date ( timestampOfLastMessage ) ;
2023-04-05 03:25:31 -06:00
// Back-track from the last message timestamp to the nearest date boundary.
// Because we're back-tracking a couple events here, when we paginate back out
// by the `archiveMessageLimit` later in the room route, it will gurantee some
// overlap with the previous page we jumped from so we don't lose any messages
// in the gap.
//
// We could choose to jump to the exact timestamp of the last message instead of
// back-tracking but then we get ugly URL's every time you jump instead of being
// able to back-track and round down to the nearest hour in a lot of cases. The
// other reason not to return the exact date is maybe there multiple messages at
// the same timestamp and we will lose messages in the gap it displays more than
// we thought.
const msGapFromJumpPointToLastMessage = timestampOfLastMessage - ts ;
const moreThanDayGap = msGapFromJumpPointToLastMessage > ONE _DAY _IN _MS ;
const moreThanHourGap = msGapFromJumpPointToLastMessage > ONE _HOUR _IN _MS ;
const moreThanMinuteGap = msGapFromJumpPointToLastMessage > ONE _MINUTE _IN _MS ;
const moreThanSecondGap = msGapFromJumpPointToLastMessage > ONE _SECOND _IN _MS ;
// If the `/messages` response returns less than the `archiveMessageLimit`
// looking forwards, it means we're looking at the latest events in the room. We
// can simply just display the day that the latest event occured on or given
// rangeEnd (whichever is later).
const haveReachedLatestMessagesInRoom = messageResData . chunk ? . length < archiveMessageLimit ;
if ( haveReachedLatestMessagesInRoom ) {
const latestDesiredTs = Math . max ( currentRangeEndTs , timestampOfLastMessage ) ;
const latestDesiredDate = new Date ( latestDesiredTs ) ;
const utcMidnightTs = getUtcStartOfDayTs ( latestDesiredDate ) ;
newOriginServerTs = utcMidnightTs ;
preferredPrecision = TIME _PRECISION _VALUES . none ;
}
// More than a day gap here, so we can just back-track to the nearest day
else if ( moreThanDayGap ) {
const utcMidnightOfDayBefore = getUtcStartOfDayTs ( dateOfLastMessage ) ;
// We `- 1` from UTC midnight to get the timestamp that is a millisecond
// before the next day but we choose a no time precision so we jump to just
// the bare date without a time. A bare date in the `/date/2022/12/16`
// endpoint represents the end of that day looking backwards so this is
// exactly what we want.
const endOfDayBeforeTs = utcMidnightOfDayBefore - 1 ;
newOriginServerTs = endOfDayBeforeTs ;
preferredPrecision = TIME _PRECISION _VALUES . none ;
}
// More than a hour gap here, we will need to back-track to the nearest hour
else if ( moreThanHourGap ) {
const utcTopOfHourBefore = getUtcStartOfHourTs ( dateOfLastMessage ) ;
newOriginServerTs = utcTopOfHourBefore ;
preferredPrecision = TIME _PRECISION _VALUES . minutes ;
}
// More than a minute gap here, we will need to back-track to the nearest minute
else if ( moreThanMinuteGap ) {
const utcTopOfMinuteBefore = getUtcStartOfMinuteTs ( dateOfLastMessage ) ;
newOriginServerTs = utcTopOfMinuteBefore ;
preferredPrecision = TIME _PRECISION _VALUES . minutes ;
}
// More than a second gap here, we will need to back-track to the nearest second
else if ( moreThanSecondGap ) {
const utcTopOfSecondBefore = getUtcStartOfSecondTs ( dateOfLastMessage ) ;
newOriginServerTs = utcTopOfSecondBefore ;
preferredPrecision = TIME _PRECISION _VALUES . seconds ;
}
// Less than a second gap here, we will give up.
2022-11-03 04:06:53 -06:00
//
2023-04-05 03:25:31 -06:00
// XXX: Maybe we can support ms here (#support-ms-time-slice)
else {
// 501 Not Implemented: the server does not support the functionality required
// to fulfill the request
res . status ( 501 ) ;
res . send (
` /jump ran into a problem: ${ getErrorStringForTooManyMessages ( archiveMessageLimit ) } `
) ;
return ;
}
2022-11-02 03:27:30 -06:00
}
} catch ( err ) {
const is404Error = err instanceof HTTPResponseError && err . response . status === 404 ;
// Only throw if it's something other than a 404 error. 404 errors are fine, they
2023-04-05 03:25:31 -06:00
// just mean there is no more messages to paginate in that room and we were
// already viewing the latest in the room.
2022-11-02 03:27:30 -06:00
if ( ! is404Error ) {
throw err ;
}
}
// If we can't find any more messages to paginate to, just progress the date by a
// day in whatever direction they wanted to go so we can display the empty view for
// that day.
2023-04-05 03:25:31 -06:00
if ( ! newOriginServerTs ) {
let tsAtRangeBoundaryInDirection ;
if ( dir === DIRECTION . backward ) {
tsAtRangeBoundaryInDirection = currentRangeStartTs ;
} else if ( dir === DIRECTION . forward ) {
tsAtRangeBoundaryInDirection = currentRangeEndTs ;
}
2022-11-02 03:27:30 -06:00
2023-04-05 03:25:31 -06:00
const dateAtRangeBoundaryInDirection = new Date ( tsAtRangeBoundaryInDirection ) ;
const yyyy = dateAtRangeBoundaryInDirection . getUTCFullYear ( ) ;
const mm = dateAtRangeBoundaryInDirection . getUTCMonth ( ) ;
const dd = dateAtRangeBoundaryInDirection . getUTCDate ( ) ;
const newDayDelta = dir === DIRECTION . forward ? 1 : - 1 ;
newOriginServerTs = Date . UTC ( yyyy , mm , dd + newDayDelta ) ;
2022-09-20 15:02:09 -06:00
}
// Redirect to a day with messages
2023-04-05 03:25:31 -06:00
const archiveUrlToRedirecTo = matrixPublicArchiveURLCreator . archiveUrlForDate (
roomIdOrAlias ,
new Date ( newOriginServerTs ) ,
{
2022-11-03 04:06:53 -06:00
// Start the scroll at the next event from where they jumped from (seamless navigation)
2023-04-05 03:25:31 -06:00
scrollStartEventId : eventIdForClosestEvent ,
preferredPrecision ,
}
2022-09-20 15:02:09 -06:00
) ;
2023-04-05 03:25:31 -06:00
res . redirect ( archiveUrlToRedirecTo ) ;
2022-09-20 15:02:09 -06:00
} )
) ;
2023-04-05 03:25:31 -06:00
// Shows messages from the given date/time looking backwards up to the limit.
2022-09-08 00:30:04 -06:00
router . get (
2023-04-05 03:25:31 -06:00
// The extra set of parenthesis around `((:\\d\\d?)?)` is to work around a
// `path-to-regex` bug where the `?` wasn't attaching to the capture group, see
// https://github.com/pillarjs/path-to-regexp/issues/287
'/date/:yyyy(\\d{4})/:mm(\\d{2})/:dd(\\d{2}):time(T\\d\\d?:\\d\\d?((:\\d\\d?)?))?' ,
2022-09-08 00:30:04 -06:00
timeoutMiddleware ,
2023-04-05 03:25:31 -06:00
// eslint-disable-next-line max-statements, complexity
2022-09-08 00:30:04 -06:00
asyncHandler ( async function ( req , res ) {
2022-10-27 00:09:13 -06:00
const roomIdOrAlias = getRoomIdOrAliasFromReq ( req ) ;
2022-09-08 00:30:04 -06:00
2023-04-05 03:25:31 -06:00
// We pull this fresh from the config for each request to ensure we have an
// updated value between each e2e test
2022-09-20 15:02:09 -06:00
const archiveMessageLimit = config . get ( 'archiveMessageLimit' ) ;
assert ( archiveMessageLimit ) ;
// Synapse has a max `/messages` limit of 1000
assert (
archiveMessageLimit <= 999 ,
'archiveMessageLimit needs to be in range [1, 999]. We can only get 1000 messages at a time from Synapse and we need a buffer of at least one to see if there are too many messages on a given day so you can only configure a max of 999. If you need more messages, we will have to implement pagination'
) ;
2023-04-05 03:25:31 -06:00
const { toTimestamp , timeDefined , secondsDefined } = parseArchiveRangeFromReq ( req ) ;
let precisionFromUrl = TIME _PRECISION _VALUES . none ;
if ( secondsDefined ) {
precisionFromUrl = TIME _PRECISION _VALUES . seconds ;
} else if ( timeDefined ) {
precisionFromUrl = TIME _PRECISION _VALUES . minutes ;
}
2022-09-08 00:30:04 -06:00
2022-11-02 03:27:30 -06:00
// Just 404 if anyone is trying to view the future, no need to waste resources on that
const nowTs = Date . now ( ) ;
2023-04-05 03:25:31 -06:00
if ( toTimestamp > roundUpTimestampToUtcDay ( nowTs ) ) {
2022-11-02 03:27:30 -06:00
throw new StatusError (
404 ,
` You can't view the history of a room on a future day ( ${ new Date (
2023-04-05 03:25:31 -06:00
toTimestamp
2022-11-02 03:27:30 -06:00
) . toISOString ( ) } > $ { new Date ( nowTs ) . toISOString ( ) } ) . Go back `
) ;
}
2022-09-08 00:30:04 -06:00
// We have to wait for the room join to happen first before we can fetch
// any of the additional room info or messages.
2022-10-27 00:09:13 -06:00
const roomId = await ensureRoomJoined ( matrixAccessToken , roomIdOrAlias , req . query . via ) ;
2022-09-08 00:30:04 -06:00
// Do these in parallel to avoid the extra time in sequential round-trips
// (we want to display the archive page faster)
const [ roomData , { events , stateEventMap } ] = await Promise . all ( [
2022-10-27 00:09:13 -06:00
fetchRoomData ( matrixAccessToken , roomId ) ,
2022-09-20 15:02:09 -06:00
// We over-fetch messages outside of the range of the given day so that we
// can display messages from surrounding days (currently only from days
// before) so that the quiet rooms don't feel as desolate and broken.
2023-04-05 03:25:31 -06:00
//
// When given a bare date like `2022/11/16`, we want to paginate from the end of that
// day backwards. This is why we use the `toTimestamp` here and fetch backwards.
2022-09-20 15:02:09 -06:00
fetchEventsFromTimestampBackwards ( {
accessToken : matrixAccessToken ,
2022-10-27 00:09:13 -06:00
roomId ,
2022-09-20 15:02:09 -06:00
ts : toTimestamp ,
2023-04-05 03:25:31 -06:00
// We fetch one more than the `archiveMessageLimit` so that we can see if there
// are too many messages from the given day. If we have over the
// `archiveMessageLimit` number of messages fetching from the given day, it's
// acceptable to have them be from surrounding days. But if all 500 messages
// (for example) are from the same day, let's redirect to a smaller hour range
// to display.
2022-09-20 15:02:09 -06:00
limit : archiveMessageLimit + 1 ,
} ) ,
2022-09-08 00:30:04 -06:00
] ) ;
2022-09-08 18:15:07 -06:00
// Only `world_readable` or `shared` rooms that are `public` are viewable in the archive
const allowedToViewRoom =
roomData ? . historyVisibility === 'world_readable' ||
( roomData ? . historyVisibility === 'shared' && roomData ? . joinRule === 'public' ) ;
if ( ! allowedToViewRoom ) {
throw new StatusError (
403 ,
` Only \` world_readable \` or \` shared \` rooms that are \` public \` can be viewed in the archive. ${ roomData . id } has m.room.history_visiblity= ${ roomData ? . historyVisibility } m.room.join_rules= ${ roomData ? . joinRule } `
) ;
}
2022-11-08 21:41:58 -07:00
// Default to no indexing (safe default)
let shouldIndex = false ;
if ( stopSearchEngineIndexing ) {
shouldIndex = false ;
} else {
// Otherwise we only allow search engines to index `world_readable` rooms
shouldIndex = roomData ? . historyVisibility === ` world_readable ` ;
}
2022-09-20 15:02:09 -06:00
2022-09-08 00:30:04 -06:00
const hydrogenStylesUrl = urlJoin ( basePath , '/hydrogen-styles.css' ) ;
const stylesUrl = urlJoin ( basePath , '/css/styles.css' ) ;
const jsBundleUrl = urlJoin ( basePath , '/js/entry-client-hydrogen.es.js' ) ;
const pageHtml = await renderHydrogenVmRenderScriptToPageHtml (
path . resolve ( _ _dirname , '../../shared/hydrogen-vm-render-script.js' ) ,
{
2022-09-20 15:02:09 -06:00
toTimestamp ,
2023-04-05 03:25:31 -06:00
precisionFromUrl ,
2022-10-27 00:09:13 -06:00
roomData : {
... roomData ,
// The `canonicalAlias` will take precedence over the `roomId` when present so we only
// want to use it if that's what the user originally browsed to. We shouldn't
// try to switch someone over to the room alias if they browsed from the room
// ID or vice versa.
canonicalAlias :
roomIdOrAlias === roomData . canonicalAlias ? roomData . canonicalAlias : undefined ,
} ,
2022-09-08 00:30:04 -06:00
events ,
stateEventMap ,
2022-09-20 15:02:09 -06:00
shouldIndex ,
2022-09-08 00:30:04 -06:00
config : {
basePath : basePath ,
matrixServerUrl : matrixServerUrl ,
} ,
} ,
{
title : ` ${ roomData . name } - Matrix Public Archive ` ,
styles : [ hydrogenStylesUrl , stylesUrl ] ,
scripts : [ jsBundleUrl ] ,
2022-11-09 17:57:33 -07:00
locationHref : urlJoin ( basePath , req . originalUrl ) ,
2022-09-20 15:02:09 -06:00
shouldIndex ,
2022-10-19 11:07:39 -06:00
cspNonce : res . locals . cspNonce ,
2022-09-08 00:30:04 -06:00
}
) ;
res . set ( 'Content-Type' , 'text/html' ) ;
res . send ( pageHtml ) ;
} )
) ;
module . exports = router ;