From 02b86a840538aae712a9b744dd46a08a2395a776 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 2 Sep 2022 20:49:06 -0500 Subject: [PATCH] Render pipeline separation of concerns (#64) Follow-up to https://github.com/matrix-org/matrix-public-archive/pull/36 Render pipeline separation of concerns: 1. Run in `child_process` 2. Hydrogen render It's now just a generic `child_process` runner that runs the Hydrogen render in it. This eliminates the windy path of the 1-4 steps that was only held together by the file names themselves. --- public/js/entry-client.js | 2 +- .../child-fork-script.js} | 28 ++++++--- .../run-in-child-process.js} | 61 +++++++++---------- ...js => render-hydrogen-to-string-unsafe.js} | 31 +++------- .../render-hydrogen-to-string.js | 54 ++++++++++++++++ server/routes/install-routes.js | 30 ++++----- server/start-dev.js | 1 + ...script.js => hydrogen-vm-render-script.js} | 0 8 files changed, 128 insertions(+), 79 deletions(-) rename server/{hydrogen-render/2-render-hydrogen-to-string-fork-script.js => child-process-runner/child-fork-script.js} (79%) rename server/{hydrogen-render/1-render-hydrogen-to-string.js => child-process-runner/run-in-child-process.js} (75%) rename server/hydrogen-render/{3-render-hydrogen-to-string-unsafe.js => render-hydrogen-to-string-unsafe.js} (80%) create mode 100644 server/hydrogen-render/render-hydrogen-to-string.js rename shared/{4-hydrogen-vm-render-script.js => hydrogen-vm-render-script.js} (100%) diff --git a/public/js/entry-client.js b/public/js/entry-client.js index 101b2db..251663e 100644 --- a/public/js/entry-client.js +++ b/public/js/entry-client.js @@ -1,2 +1,2 @@ -import mounted from 'matrix-public-archive-shared/4-hydrogen-vm-render-script'; +import mounted from 'matrix-public-archive-shared/hydrogen-vm-render-script'; console.log('mounted', mounted); diff --git a/server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js b/server/child-process-runner/child-fork-script.js similarity index 79% rename from server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js rename to server/child-process-runner/child-fork-script.js index c2fe6d6..4f55bb9 100644 --- a/server/hydrogen-render/2-render-hydrogen-to-string-fork-script.js +++ b/server/child-process-runner/child-fork-script.js @@ -1,13 +1,11 @@ 'use strict'; -// Called by `child_process` `fork` in `render-hydrogen-to-string.js` so we can -// get the data and exit the process cleanly. We don't want Hydrogen to keep -// running after we get our initial rendered HTML. +// Called by `child_process` `fork` in `run-in-child-process.js` so we can +// get the data and exit the process cleanly. const assert = require('assert'); const RethrownError = require('../lib/rethrown-error'); -const _renderHydrogenToStringUnsafe = require('./3-render-hydrogen-to-string-unsafe'); // Serialize the error and send it back up to the parent process so we can // interact with it and know what happened when the process exits. @@ -42,11 +40,12 @@ async function serializeError(err) { } // We don't exit the process after encountering one of these because maybe it -// doesn't matter to the main render process in Hydrogen. +// doesn't matter to the main-line process in the module. // // If we don't listen for these events, the child will exit with status code 1 // (error) when they occur. process.on('uncaughtException', async (err /*, origin*/) => { + console.log('2 uncaughtException', err); await serializeError(new RethrownError('uncaughtException in child process', err)); }); @@ -57,17 +56,28 @@ process.on('unhandledRejection', async (reason /*, promise*/) => { // Only kick everything off once we receive the options. We pass in the options // this way instead of argv because we will run into `Error: spawn E2BIG` and // `Error: spawn ENAMETOOLONG` with argv. -process.on('message', async (renderOptions) => { +process.on('message', async (runArguments) => { try { - const resultantHtml = await _renderHydrogenToStringUnsafe(renderOptions); + assert(runArguments); - assert(resultantHtml, `No HTML returned from _renderHydrogenToStringUnsafe.`); + // Require the module that we're supposed to run + const modulePath = process.argv[2]; + assert( + modulePath, + 'Expected `modulePath` to be passed into `child-fork-script.js` via argv[2]' + ); + const moduleToRun = require(modulePath); + + // Run the module + const result = await moduleToRun(runArguments); + + assert(result, `No result returned from module we ran (${modulePath}).`); // Send back the data we need to the parent. await new Promise((resolve, reject) => { process.send( { - data: resultantHtml, + data: result, }, (err) => { if (err) { diff --git a/server/hydrogen-render/1-render-hydrogen-to-string.js b/server/child-process-runner/run-in-child-process.js similarity index 75% rename from server/hydrogen-render/1-render-hydrogen-to-string.js rename to server/child-process-runner/run-in-child-process.js index c6e1a02..5d58690 100644 --- a/server/hydrogen-render/1-render-hydrogen-to-string.js +++ b/server/child-process-runner/run-in-child-process.js @@ -1,22 +1,23 @@ 'use strict'; -// We use a child_process because we want to be able to exit the process after -// we receive the SSR results. We don't want Hydrogen to keep running after we -// get our initial rendered HTML. - -const fork = require('child_process').fork; +// Generic `child_process` runner that handles running the given module with the +// given `runArguments` and returning the async result. Handles the complexity +// error handling, passing large argument objects, and timeouts. +// +// Error handling includes main-line errors seen while waiting the async result, +// as well as keeping track of out of band `uncaughtException` and +// `unhandledRejection` to give more context if the process exits with code 1 +// (error) or timesout. const assert = require('assert'); +const fork = require('child_process').fork; + const RethrownError = require('../lib/rethrown-error'); const { traceFunction } = require('../tracing/trace-utilities'); const config = require('../lib/config'); const logOutputFromChildProcesses = config.get('logOutputFromChildProcesses'); -// The render should be fast. If it's taking more than 5 seconds, something has -// gone really wrong. -const RENDER_TIMEOUT = 5000; - if (!logOutputFromChildProcesses) { console.warn( `Silencing logs from child processes (config.logOutputFromChildProcesses = ${logOutputFromChildProcesses})` @@ -60,7 +61,7 @@ function assembleErrorAfterChildExitsWithErrors(exitCode, childErrors) { return childErrorSummary; } -async function renderHydrogenToString(renderOptions) { +async function runInChildProcess(modulePath, runArguments, { timeout }) { let abortTimeoutId; try { let childErrors = []; @@ -68,9 +69,10 @@ async function renderHydrogenToString(renderOptions) { const controller = new AbortController(); const { signal } = controller; - // We use a child_process because we want to be able to exit the process after - // we receive the SSR results. - const child = fork(require.resolve('./2-render-hydrogen-to-string-fork-script'), [], { + // We use a child_process because we want to be able to exit the process + // after we receive the results. We use `fork` instead of `exec`/`spawn` so + // that we can pass a module instead of running a command. + const child = fork(require.resolve('./child-fork-script'), [modulePath], { signal, // Default to silencing logs from the child process. We already have // proper instrumentation of any errors that might occur. @@ -92,15 +94,17 @@ async function renderHydrogenToString(renderOptions) { }); } - // Pass the renderOptions to the child by sending instead of via argv because we - // will run into `Error: spawn E2BIG` and `Error: spawn ENAMETOOLONG` with - // argv. - child.send(renderOptions); + // Pass the runArguments to the child by sending instead of via argv because + // we will run into `Error: spawn E2BIG` and `Error: spawn ENAMETOOLONG` + // with argv. + child.send(runArguments); // Stops the child process if it takes too long - abortTimeoutId = setTimeout(() => { - controller.abort(); - }, RENDER_TIMEOUT); + if (timeout) { + abortTimeoutId = setTimeout(() => { + controller.abort(); + }, timeout); + } const returnedData = await new Promise((resolve, reject) => { let data = ''; @@ -112,8 +116,8 @@ async function renderHydrogenToString(renderOptions) { childError.name = result.name; childError.message = result.message; childError.stack = result.stack; - // When an error happens while rendering Hydrogen, we only expect one - // error to come through here from the main line to render Hydrogen. + // When an error happens while running the module, we only expect one + // error to come through here from the main-line to run the module. // But it's possible to get multiple errors from async out of context // places since we also listen to `uncaughtException` and // `unhandledRejection`. @@ -146,7 +150,7 @@ async function renderHydrogenToString(renderOptions) { ); reject( new RethrownError( - `Timed out while rendering Hydrogen to string so we aborted the child process after ${RENDER_TIMEOUT}ms. Any child errors? (${childErrors.length})`, + `Timed out while running ${modulePath} so we aborted the child process after ${timeout}ms. Any child errors? (${childErrors.length})`, childErrorSummary ) ); @@ -159,19 +163,12 @@ async function renderHydrogenToString(renderOptions) { if (!returnedData) { const childErrorSummary = assembleErrorAfterChildExitsWithErrors(childExitCode, childErrors); throw new RethrownError( - `No HTML sent from child process to render Hydrogen. Any child errors? (${childErrors.length})`, + `No \`returnedData\` sent from child process while running the module (${modulePath}). Any child errors? (${childErrors.length})`, childErrorSummary ); } return returnedData; - } catch (err) { - throw new RethrownError( - `Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify( - renderOptions - )}`, - err - ); } finally { // We don't have to add a undefined/null check here because `clearTimeout` // works with any value you give it and doesn't throw an error. @@ -179,4 +176,4 @@ async function renderHydrogenToString(renderOptions) { } } -module.exports = traceFunction(renderHydrogenToString); +module.exports = traceFunction(runInChildProcess); diff --git a/server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js b/server/hydrogen-render/render-hydrogen-to-string-unsafe.js similarity index 80% rename from server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js rename to server/hydrogen-render/render-hydrogen-to-string-unsafe.js index ec8c280..7cdf6d9 100644 --- a/server/hydrogen-render/3-render-hydrogen-to-string-unsafe.js +++ b/server/hydrogen-render/render-hydrogen-to-string-unsafe.js @@ -18,8 +18,6 @@ const { readFile } = require('fs').promises; const crypto = require('crypto'); const { parseHTML } = require('linkedom'); -const config = require('../lib/config'); - // Setup the DOM context with any necessary shims/polyfills and ensure the VM // context global has everything that a normal document does so Hydrogen can // render. @@ -62,25 +60,16 @@ function createDomAndSetupVmContext() { }; } -async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events, stateEventMap }) { - assert(fromTimestamp); - assert(roomData); - assert(events); - assert(stateEventMap); +async function _renderHydrogenToStringUnsafe(renderOptions) { + assert(renderOptions); + assert(renderOptions.vmRenderScriptFilePath); + assert(renderOptions.vmRenderContext); const { dom, vmContext } = createDomAndSetupVmContext(); // Define this for the SSR context dom.window.matrixPublicArchiveContext = { - fromTimestamp, - roomData, - events, - stateEventMap, - config: { - basePort: config.get('basePort'), - basePath: config.get('basePath'), - matrixServerUrl: config.get('matrixServerUrl'), - }, + ...renderOptions.vmRenderContext, }; // Serialize it for when we run this again client-side dom.document.body.insertAdjacentHTML( @@ -92,18 +81,16 @@ async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events, ` ); - const hydrogenRenderScriptCode = await readFile( - path.resolve(__dirname, '../../shared/4-hydrogen-vm-render-script.js'), - 'utf8' - ); + const vmRenderScriptFilePath = renderOptions.vmRenderScriptFilePath; + const hydrogenRenderScriptCode = await readFile(vmRenderScriptFilePath, 'utf8'); const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, { - filename: '4-hydrogen-vm-render-script.js', + filename: path.basename(vmRenderScriptFilePath), }); // Note: The VM does not exit after the result is returned here and is why // this should be run in a `child_process` that we can exit. const vmResult = hydrogenRenderScript.runInContext(vmContext); // Wait for everything to render - // (waiting on the promise returned from `4-hydrogen-vm-render-script.js`) + // (waiting on the promise returned from the VM render script) await vmResult; const documentString = dom.document.body.toString(); diff --git a/server/hydrogen-render/render-hydrogen-to-string.js b/server/hydrogen-render/render-hydrogen-to-string.js new file mode 100644 index 0000000..bb25f92 --- /dev/null +++ b/server/hydrogen-render/render-hydrogen-to-string.js @@ -0,0 +1,54 @@ +'use strict'; + +// Server-side render Hydrogen to a string. +// +// We use a `child_process` because we want to be able to exit the process after +// we receive the SSR results. We don't want Hydrogen to keep running after we +// get our initial rendered HTML. + +const assert = require('assert'); +const RethrownError = require('../lib/rethrown-error'); +const runInChildProcess = require('../child-process-runner/run-in-child-process'); + +// The render should be fast. If it's taking more than 5 seconds, something has +// gone really wrong. +const RENDER_TIMEOUT = 5000; + +async function renderHydrogenToString(renderOptions) { + assert(renderOptions.vmRenderScriptFilePath); + assert(renderOptions.vmRenderContext); + + try { + // In development, if you're running into a hard to track down error with + // the render hydrogen stack and fighting against the multiple layers of + // complexity with `child_process `and `vm`; you can get away with removing + // the `child_process` part of it by using + // `render-hydrogen-to-string-unsafe` directly. + // ```js + // const _renderHydrogenToStringUnsafe = require('../hydrogen-render/render-hydrogen-to-string-unsafe'); + // const hydrogenHtmlOutput = await _renderHydrogenToStringUnsafe(renderOptions); + // ``` + // + // We use a child_process because we want to be able to exit the process after + // we receive the SSR results. We don't want Hydrogen to keep running after we + // get our initial rendered HTML. + const hydrogenHtmlOutput = await runInChildProcess( + require.resolve('./render-hydrogen-to-string-unsafe'), + renderOptions, + { + timeout: RENDER_TIMEOUT, + } + ); + + return hydrogenHtmlOutput; + } catch (err) { + throw new RethrownError( + `Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderHydrogenToString arguments: ${JSON.stringify( + renderOptions + )}`, + err + ); + } +} + +module.exports = renderHydrogenToString; diff --git a/server/routes/install-routes.js b/server/routes/install-routes.js index 92d122c..e1538e9 100644 --- a/server/routes/install-routes.js +++ b/server/routes/install-routes.js @@ -13,7 +13,7 @@ const timeoutMiddleware = require('./timeout-middleware'); const fetchRoomData = require('../fetch-room-data'); const fetchEventsInRange = require('../fetch-events-in-range'); const ensureRoomJoined = require('../ensure-room-joined'); -const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string'); +const renderHydrogenToString = require('../hydrogen-render/render-hydrogen-to-string'); const sanitizeHtml = require('../lib/sanitize-html'); const safeJson = require('../lib/safe-json'); @@ -182,21 +182,21 @@ function installRoutes(app) { throw new Error('TODO: Redirect user to smaller hour range'); } - // In development, if you're running into a hard to track down error with - // the render hydrogen stack and fighting against the multiple layers of - // complexity with `child_process `and `vm`; you can get away with removing - // the `child_process` part of it by using - // `3-render-hydrogen-to-string-unsafe` directly. - // ```js - // const _renderHydrogenToStringUnsafe = require('../hydrogen-render/3-render-hydrogen-to-string-unsafe'); - // const hydrogenHtmlOutput = await _renderHydrogenToStringUnsafe({ /* renderData */ }); - // ``` - // const hydrogenHtmlOutput = await renderHydrogenToString({ - fromTimestamp, - roomData, - events, - stateEventMap, + vmRenderScriptFilePath: path.resolve( + __dirname, + '../../shared/hydrogen-vm-render-script.js' + ), + vmRenderContext: { + fromTimestamp, + roomData, + events, + stateEventMap, + config: { + basePath: config.get('basePath'), + matrixServerUrl: config.get('matrixServerUrl'), + }, + }, }); const serializableSpans = getSerializableSpans(); diff --git a/server/start-dev.js b/server/start-dev.js index e3115c3..48abde4 100644 --- a/server/start-dev.js +++ b/server/start-dev.js @@ -19,6 +19,7 @@ build( }) ); +// Pass through some args const args = []; if (process.argv.includes('--tracing')) { args.push('--tracing'); diff --git a/shared/4-hydrogen-vm-render-script.js b/shared/hydrogen-vm-render-script.js similarity index 100% rename from shared/4-hydrogen-vm-render-script.js rename to shared/hydrogen-vm-render-script.js