From f738dbc1da572e772e78fbf89ae522da5abe0f00 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 5 Jul 2022 17:30:52 -0500 Subject: [PATCH] Stop Hydrogen from running in the background after we get our SSR HTML render data (#36) We now run the Hydrogen render in a `child_process` so we can exit the whole render process. We still use the `vm` to setup the browser-like globals. With a `vm`, everything continues to run even after it returns and there isn't a way to clean up, stop, kill, terminate the vm script or context so we need this extra `child_process` now to clean up. I don't like the complexity necessary for this though. I wish the `vm` API allowed for this use case. The only way to stop a `vm` is the `timeout` and we want to stop as soon as we return. Fix https://github.com/matrix-org/matrix-public-archive/issues/34 --- public/js/entry-client.js | 2 +- .../1-render-hydrogen-to-string.js | 97 +++++++++++++++++ ...2-render-hydrogen-to-string-fork-script.js | 38 +++++++ .../3-render-hydrogen-to-string-unsafe.js | 100 ++++++++++++++++++ server/render-hydrogen-to-string.js | 98 ----------------- server/routes/install-routes.js | 2 +- ...ript.js => 4-hydrogen-vm-render-script.js} | 5 + 7 files changed, 242 insertions(+), 100 deletions(-) create mode 100644 server/hydrogen-render/1-render-hydrogen-to-string.js create mode 100644 server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js create mode 100644 server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js delete mode 100644 server/render-hydrogen-to-string.js rename shared/{hydrogen-vm-render-script.js => 4-hydrogen-vm-render-script.js} (97%) diff --git a/public/js/entry-client.js b/public/js/entry-client.js index 251663e..101b2db 100644 --- a/public/js/entry-client.js +++ b/public/js/entry-client.js @@ -1,2 +1,2 @@ -import mounted from 'matrix-public-archive-shared/hydrogen-vm-render-script'; +import mounted from 'matrix-public-archive-shared/4-hydrogen-vm-render-script'; console.log('mounted', mounted); diff --git a/server/hydrogen-render/1-render-hydrogen-to-string.js b/server/hydrogen-render/1-render-hydrogen-to-string.js new file mode 100644 index 0000000..afd072c --- /dev/null +++ b/server/hydrogen-render/1-render-hydrogen-to-string.js @@ -0,0 +1,97 @@ +'use strict'; + +// We use a child_process because we want to be able to exit the process after +// we receive the SSR results. We don't want Hydrogen to keep running after we +// get our initial rendered HTML. + +const fork = require('child_process').fork; + +const RethrownError = require('../lib/rethrown-error'); + +// The render should be fast. If it's taking more than 5 seconds, something has +// gone really wrong. +const RENDER_TIMEOUT = 5000; + +async function renderHydrogenToString(options) { + try { + let data = ''; + let childErrors = []; + + const controller = new AbortController(); + const { signal } = controller; + // We use a child_process because we want to be able to exit the process after + // we receive the SSR results. + const child = fork( + require.resolve('./2-render-hydrogen-to-string-fork-script'), + [JSON.stringify(options)], + { + signal, + //cwd: process.cwd(), + } + ); + + // Stops the child process if it takes too long + setTimeout(() => { + controller.abort(); + }, RENDER_TIMEOUT); + + // Collect the data passed back by the child + child.on('message', function (result) { + if (result.error) { + // De-serialize the error + const childError = new Error(); + childError.name = result.name; + childError.message = result.message; + childError.stack = result.stack; + // We shouldn't really run into a situation where there are multiple + // errors but since this is just a message bus, it's possible. + childErrors.push(childError); + } else { + data += result.data; + } + }); + + await new Promise((resolve, reject) => { + child.on('close', (exitCode) => { + // Exited successfully + if (exitCode === 0) { + resolve(data); + } else { + let extraErrorsMessage = ''; + if (childErrors.length > 1) { + extraErrorsMessage = ` (somehow we saw ${childErrors.length} errors but we really always expect 1 error)`; + } + + const error = new RethrownError( + `Child process failed with exit code ${exitCode}${extraErrorsMessage}`, + childErrors[0] + ); + reject(error); + } + }); + + // When a problem occurs when spawning the process or gets aborted + child.on('error', (err) => { + if (err.name === 'AbortError') { + throw new RethrownError( + `Timed out while rendering Hydrogen to string so we aborted the child process after ${RENDER_TIMEOUT}ms`, + err + ); + } + + reject(err); + }); + }); + + return data; + } catch (err) { + throw new RethrownError( + `Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify( + arguments[0] + )}`, + err + ); + } +} + +module.exports = renderHydrogenToString; diff --git a/server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js b/server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js new file mode 100644 index 0000000..6740e0d --- /dev/null +++ b/server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js @@ -0,0 +1,38 @@ +'use strict'; + +// Called by `child_process` `fork` in `render-hydrogen-to-string.js` so we can +// get the data and exit the process cleanly. We don't want Hydrogen to keep +// running after we get our initial rendered HTML. + +const assert = require('assert'); + +const _renderHydrogenToStringUnsafe = require('./3-render-hydrogen-to-string-unsafe'); + +(async () => { + try { + assert( + process.argv[2], + 'No command-line arguments passed to `render-hydrogen-to-string-fork-script.js`. Make sure these are being passed in when we spawn the new process.' + ); + const options = JSON.parse(process.argv[2]); + const resultantHtml = await _renderHydrogenToStringUnsafe(options); + + // Send back the data we need + process.send({ + data: resultantHtml, + }); + // End the process gracefully. We got all the data we need. + process.exit(0); + } catch (err) { + // Serialize the error and send it back up to the parent process so we can + // interact with it and know what happened when the process exits. + process.send({ + error: true, + name: err.name, + message: err.message, + stack: err.stack, + }); + // Throw the error so the process fails and exits + throw err; + } +})(); diff --git a/server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js b/server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js new file mode 100644 index 0000000..fc7adf7 --- /dev/null +++ b/server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js @@ -0,0 +1,100 @@ +'use strict'; + +// Server-side render Hydrogen to a string using a browser-like context thanks +// to `linkedom`. We use a VM so we can put all of the browser-like globals in +// place. +// +// Note: This is marked as unsafe because the render script is run in a VM which +// doesn't exit after we get the result (Hydrogen keeps running). There isn't a +// way to stop, terminate, or kill a vm script or vm context so in order to be +// safe, we need to run this inside of a child_process which we can kill after. +// This is why we have the `1-render-hydrogen-to-string.js` layer to handle +// this. + +const assert = require('assert'); +const vm = require('vm'); +const path = require('path'); +const { readFile } = require('fs').promises; +const crypto = require('crypto'); +const { parseHTML } = require('linkedom'); + +const config = require('../lib/config'); + +async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events, stateEventMap }) { + assert(fromTimestamp); + assert(roomData); + assert(events); + assert(stateEventMap); + + const dom = parseHTML(` + + + + +
+ + + `); + + if (!dom.requestAnimationFrame) { + dom.requestAnimationFrame = function (cb) { + setTimeout(cb, 0); + }; + } + + // Define this for the SSR context + dom.window.matrixPublicArchiveContext = { + fromTimestamp, + roomData, + events, + stateEventMap, + config: { + basePort: config.get('basePort'), + basePath: config.get('basePath'), + matrixServerUrl: config.get('matrixServerUrl'), + }, + }; + // Serialize it for when we run this again client-side + dom.document.body.insertAdjacentHTML( + 'beforeend', + ` + + ` + ); + + const vmContext = vm.createContext(dom); + // Make the dom properties available in sub-`require(...)` calls + vmContext.global.window = dom.window; + vmContext.global.document = dom.document; + vmContext.global.Node = dom.Node; + vmContext.global.navigator = dom.navigator; + vmContext.global.DOMParser = dom.DOMParser; + // Make sure `webcrypto` exists since it was only introduced in Node.js v17 + assert(crypto.webcrypto); + vmContext.global.crypto = crypto.webcrypto; + + // So require(...) works in the vm + vmContext.global.require = require; + // So we can see logs from the underlying vm + vmContext.global.console = console; + + const hydrogenRenderScriptCode = await readFile( + path.resolve(__dirname, '../../shared/4-hydrogen-vm-render-script.js'), + 'utf8' + ); + const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, { + filename: '4-hydrogen-vm-render-script.js', + }); + // Note: The VM does not exit after the result is returned here + const vmResult = hydrogenRenderScript.runInContext(vmContext); + // Wait for everything to render + // (waiting on the promise returned from `4-hydrogen-vm-render-script.js`) + await vmResult; + + const documentString = dom.document.body.toString(); + return documentString; +} + +module.exports = _renderHydrogenToStringUnsafe; diff --git a/server/render-hydrogen-to-string.js b/server/render-hydrogen-to-string.js deleted file mode 100644 index f8dd127..0000000 --- a/server/render-hydrogen-to-string.js +++ /dev/null @@ -1,98 +0,0 @@ -'use strict'; - -const assert = require('assert'); -const vm = require('vm'); -const path = require('path'); -const { readFile } = require('fs').promises; -const crypto = require('crypto'); -const { parseHTML } = require('linkedom'); - -const RethrownError = require('./lib/rethrown-error'); -const config = require('./lib/config'); - -async function renderHydrogenToString({ fromTimestamp, roomData, events, stateEventMap }) { - try { - assert(fromTimestamp); - assert(roomData); - assert(events); - assert(stateEventMap); - - const dom = parseHTML(` - - - - -
- - - `); - - if (!dom.requestAnimationFrame) { - dom.requestAnimationFrame = function (cb) { - setTimeout(cb, 0); - }; - } - - // Define this for the SSR context - dom.window.matrixPublicArchiveContext = { - fromTimestamp, - roomData, - events, - stateEventMap, - config: { - basePort: config.get('basePort'), - basePath: config.get('basePath'), - matrixServerUrl: config.get('matrixServerUrl'), - }, - }; - // Serialize it for when we run this again client-side - dom.document.body.insertAdjacentHTML( - 'beforeend', - ` - - ` - ); - - const vmContext = vm.createContext(dom); - // Make the dom properties available in sub-`require(...)` calls - vmContext.global.window = dom.window; - vmContext.global.document = dom.document; - vmContext.global.Node = dom.Node; - vmContext.global.navigator = dom.navigator; - vmContext.global.DOMParser = dom.DOMParser; - // Make sure `webcrypto` exists since it was only introduced in Node.js v17 - assert(crypto.webcrypto); - vmContext.global.crypto = crypto.webcrypto; - - // So require(...) works in the vm - vmContext.global.require = require; - // So we can see logs from the underlying vm - vmContext.global.console = console; - - const hydrogenRenderScriptCode = await readFile( - path.resolve(__dirname, '../shared/hydrogen-vm-render-script.js'), - 'utf8' - ); - const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, { - filename: 'hydrogen-vm-render-script.js', - }); - const vmResult = hydrogenRenderScript.runInContext(vmContext); - // Wait for everything to render - // (waiting on the promise returned from `hydrogen-render-script.js`) - await vmResult; - - const documentString = dom.document.body.toString(); - return documentString; - } catch (err) { - throw new RethrownError( - `Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify( - arguments[0] - )}`, - err - ); - } -} - -module.exports = renderHydrogenToString; diff --git a/server/routes/install-routes.js b/server/routes/install-routes.js index a405a51..97b9ac5 100644 --- a/server/routes/install-routes.js +++ b/server/routes/install-routes.js @@ -9,7 +9,7 @@ const StatusError = require('../lib/status-error'); const fetchRoomData = require('../fetch-room-data'); const fetchEventsInRange = require('../fetch-events-in-range'); -const renderHydrogenToString = require('../render-hydrogen-to-string'); +const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string'); const config = require('../lib/config'); const basePath = config.get('basePath'); diff --git a/shared/hydrogen-vm-render-script.js b/shared/4-hydrogen-vm-render-script.js similarity index 97% rename from shared/hydrogen-vm-render-script.js rename to shared/4-hydrogen-vm-render-script.js index 4cda967..e134081 100644 --- a/shared/hydrogen-vm-render-script.js +++ b/shared/4-hydrogen-vm-render-script.js @@ -1,5 +1,10 @@ 'use strict'; +// Isomorphic script that runs in the browser and on the server for SSR (needs +// browser context) that renders Hydrogen to the `document.body`. +// +// Data is passed in via `window.matrixPublicArchiveContext` + const assert = require('matrix-public-archive-shared/lib/assert'); const { Platform,