Render pipeline separation of concerns (#64)

Follow-up to https://github.com/matrix-org/matrix-public-archive/pull/36

Render pipeline separation of concerns:

 1. Run in `child_process`
 2. Hydrogen render
 
It's now just a generic `child_process` runner that runs the Hydrogen render in it. This eliminates the windy path of the 1-4 steps that was only held together by the file names themselves.
This commit is contained in:
Eric Eastwood 2022-09-02 20:49:06 -05:00 committed by GitHub
parent f6bd581f77
commit 02b86a8405
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 128 additions and 79 deletions

View File

@ -1,2 +1,2 @@
import mounted from 'matrix-public-archive-shared/4-hydrogen-vm-render-script'; import mounted from 'matrix-public-archive-shared/hydrogen-vm-render-script';
console.log('mounted', mounted); console.log('mounted', mounted);

View File

@ -1,13 +1,11 @@
'use strict'; 'use strict';
// Called by `child_process` `fork` in `render-hydrogen-to-string.js` so we can // Called by `child_process` `fork` in `run-in-child-process.js` so we can
// get the data and exit the process cleanly. We don't want Hydrogen to keep // get the data and exit the process cleanly.
// running after we get our initial rendered HTML.
const assert = require('assert'); const assert = require('assert');
const RethrownError = require('../lib/rethrown-error'); const RethrownError = require('../lib/rethrown-error');
const _renderHydrogenToStringUnsafe = require('./3-render-hydrogen-to-string-unsafe');
// Serialize the error and send it back up to the parent process so we can // Serialize the error and send it back up to the parent process so we can
// interact with it and know what happened when the process exits. // interact with it and know what happened when the process exits.
@ -42,11 +40,12 @@ async function serializeError(err) {
} }
// We don't exit the process after encountering one of these because maybe it // We don't exit the process after encountering one of these because maybe it
// doesn't matter to the main render process in Hydrogen. // doesn't matter to the main-line process in the module.
// //
// If we don't listen for these events, the child will exit with status code 1 // If we don't listen for these events, the child will exit with status code 1
// (error) when they occur. // (error) when they occur.
process.on('uncaughtException', async (err /*, origin*/) => { process.on('uncaughtException', async (err /*, origin*/) => {
console.log('2 uncaughtException', err);
await serializeError(new RethrownError('uncaughtException in child process', err)); await serializeError(new RethrownError('uncaughtException in child process', err));
}); });
@ -57,17 +56,28 @@ process.on('unhandledRejection', async (reason /*, promise*/) => {
// Only kick everything off once we receive the options. We pass in the options // Only kick everything off once we receive the options. We pass in the options
// this way instead of argv because we will run into `Error: spawn E2BIG` and // this way instead of argv because we will run into `Error: spawn E2BIG` and
// `Error: spawn ENAMETOOLONG` with argv. // `Error: spawn ENAMETOOLONG` with argv.
process.on('message', async (renderOptions) => { process.on('message', async (runArguments) => {
try { try {
const resultantHtml = await _renderHydrogenToStringUnsafe(renderOptions); assert(runArguments);
assert(resultantHtml, `No HTML returned from _renderHydrogenToStringUnsafe.`); // Require the module that we're supposed to run
const modulePath = process.argv[2];
assert(
modulePath,
'Expected `modulePath` to be passed into `child-fork-script.js` via argv[2]'
);
const moduleToRun = require(modulePath);
// Run the module
const result = await moduleToRun(runArguments);
assert(result, `No result returned from module we ran (${modulePath}).`);
// Send back the data we need to the parent. // Send back the data we need to the parent.
await new Promise((resolve, reject) => { await new Promise((resolve, reject) => {
process.send( process.send(
{ {
data: resultantHtml, data: result,
}, },
(err) => { (err) => {
if (err) { if (err) {

View File

@ -1,22 +1,23 @@
'use strict'; 'use strict';
// We use a child_process because we want to be able to exit the process after // Generic `child_process` runner that handles running the given module with the
// we receive the SSR results. We don't want Hydrogen to keep running after we // given `runArguments` and returning the async result. Handles the complexity
// get our initial rendered HTML. // error handling, passing large argument objects, and timeouts.
//
const fork = require('child_process').fork; // Error handling includes main-line errors seen while waiting the async result,
// as well as keeping track of out of band `uncaughtException` and
// `unhandledRejection` to give more context if the process exits with code 1
// (error) or timesout.
const assert = require('assert'); const assert = require('assert');
const fork = require('child_process').fork;
const RethrownError = require('../lib/rethrown-error'); const RethrownError = require('../lib/rethrown-error');
const { traceFunction } = require('../tracing/trace-utilities'); const { traceFunction } = require('../tracing/trace-utilities');
const config = require('../lib/config'); const config = require('../lib/config');
const logOutputFromChildProcesses = config.get('logOutputFromChildProcesses'); const logOutputFromChildProcesses = config.get('logOutputFromChildProcesses');
// The render should be fast. If it's taking more than 5 seconds, something has
// gone really wrong.
const RENDER_TIMEOUT = 5000;
if (!logOutputFromChildProcesses) { if (!logOutputFromChildProcesses) {
console.warn( console.warn(
`Silencing logs from child processes (config.logOutputFromChildProcesses = ${logOutputFromChildProcesses})` `Silencing logs from child processes (config.logOutputFromChildProcesses = ${logOutputFromChildProcesses})`
@ -60,7 +61,7 @@ function assembleErrorAfterChildExitsWithErrors(exitCode, childErrors) {
return childErrorSummary; return childErrorSummary;
} }
async function renderHydrogenToString(renderOptions) { async function runInChildProcess(modulePath, runArguments, { timeout }) {
let abortTimeoutId; let abortTimeoutId;
try { try {
let childErrors = []; let childErrors = [];
@ -68,9 +69,10 @@ async function renderHydrogenToString(renderOptions) {
const controller = new AbortController(); const controller = new AbortController();
const { signal } = controller; const { signal } = controller;
// We use a child_process because we want to be able to exit the process after // We use a child_process because we want to be able to exit the process
// we receive the SSR results. // after we receive the results. We use `fork` instead of `exec`/`spawn` so
const child = fork(require.resolve('./2-render-hydrogen-to-string-fork-script'), [], { // that we can pass a module instead of running a command.
const child = fork(require.resolve('./child-fork-script'), [modulePath], {
signal, signal,
// Default to silencing logs from the child process. We already have // Default to silencing logs from the child process. We already have
// proper instrumentation of any errors that might occur. // proper instrumentation of any errors that might occur.
@ -92,15 +94,17 @@ async function renderHydrogenToString(renderOptions) {
}); });
} }
// Pass the renderOptions to the child by sending instead of via argv because we // Pass the runArguments to the child by sending instead of via argv because
// will run into `Error: spawn E2BIG` and `Error: spawn ENAMETOOLONG` with // we will run into `Error: spawn E2BIG` and `Error: spawn ENAMETOOLONG`
// argv. // with argv.
child.send(renderOptions); child.send(runArguments);
// Stops the child process if it takes too long // Stops the child process if it takes too long
abortTimeoutId = setTimeout(() => { if (timeout) {
controller.abort(); abortTimeoutId = setTimeout(() => {
}, RENDER_TIMEOUT); controller.abort();
}, timeout);
}
const returnedData = await new Promise((resolve, reject) => { const returnedData = await new Promise((resolve, reject) => {
let data = ''; let data = '';
@ -112,8 +116,8 @@ async function renderHydrogenToString(renderOptions) {
childError.name = result.name; childError.name = result.name;
childError.message = result.message; childError.message = result.message;
childError.stack = result.stack; childError.stack = result.stack;
// When an error happens while rendering Hydrogen, we only expect one // When an error happens while running the module, we only expect one
// error to come through here from the main line to render Hydrogen. // error to come through here from the main-line to run the module.
// But it's possible to get multiple errors from async out of context // But it's possible to get multiple errors from async out of context
// places since we also listen to `uncaughtException` and // places since we also listen to `uncaughtException` and
// `unhandledRejection`. // `unhandledRejection`.
@ -146,7 +150,7 @@ async function renderHydrogenToString(renderOptions) {
); );
reject( reject(
new RethrownError( new RethrownError(
`Timed out while rendering Hydrogen to string so we aborted the child process after ${RENDER_TIMEOUT}ms. Any child errors? (${childErrors.length})`, `Timed out while running ${modulePath} so we aborted the child process after ${timeout}ms. Any child errors? (${childErrors.length})`,
childErrorSummary childErrorSummary
) )
); );
@ -159,19 +163,12 @@ async function renderHydrogenToString(renderOptions) {
if (!returnedData) { if (!returnedData) {
const childErrorSummary = assembleErrorAfterChildExitsWithErrors(childExitCode, childErrors); const childErrorSummary = assembleErrorAfterChildExitsWithErrors(childExitCode, childErrors);
throw new RethrownError( throw new RethrownError(
`No HTML sent from child process to render Hydrogen. Any child errors? (${childErrors.length})`, `No \`returnedData\` sent from child process while running the module (${modulePath}). Any child errors? (${childErrors.length})`,
childErrorSummary childErrorSummary
); );
} }
return returnedData; return returnedData;
} catch (err) {
throw new RethrownError(
`Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify(
renderOptions
)}`,
err
);
} finally { } finally {
// We don't have to add a undefined/null check here because `clearTimeout` // We don't have to add a undefined/null check here because `clearTimeout`
// works with any value you give it and doesn't throw an error. // works with any value you give it and doesn't throw an error.
@ -179,4 +176,4 @@ async function renderHydrogenToString(renderOptions) {
} }
} }
module.exports = traceFunction(renderHydrogenToString); module.exports = traceFunction(runInChildProcess);

View File

@ -18,8 +18,6 @@ const { readFile } = require('fs').promises;
const crypto = require('crypto'); const crypto = require('crypto');
const { parseHTML } = require('linkedom'); const { parseHTML } = require('linkedom');
const config = require('../lib/config');
// Setup the DOM context with any necessary shims/polyfills and ensure the VM // Setup the DOM context with any necessary shims/polyfills and ensure the VM
// context global has everything that a normal document does so Hydrogen can // context global has everything that a normal document does so Hydrogen can
// render. // render.
@ -62,25 +60,16 @@ function createDomAndSetupVmContext() {
}; };
} }
async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events, stateEventMap }) { async function _renderHydrogenToStringUnsafe(renderOptions) {
assert(fromTimestamp); assert(renderOptions);
assert(roomData); assert(renderOptions.vmRenderScriptFilePath);
assert(events); assert(renderOptions.vmRenderContext);
assert(stateEventMap);
const { dom, vmContext } = createDomAndSetupVmContext(); const { dom, vmContext } = createDomAndSetupVmContext();
// Define this for the SSR context // Define this for the SSR context
dom.window.matrixPublicArchiveContext = { dom.window.matrixPublicArchiveContext = {
fromTimestamp, ...renderOptions.vmRenderContext,
roomData,
events,
stateEventMap,
config: {
basePort: config.get('basePort'),
basePath: config.get('basePath'),
matrixServerUrl: config.get('matrixServerUrl'),
},
}; };
// Serialize it for when we run this again client-side // Serialize it for when we run this again client-side
dom.document.body.insertAdjacentHTML( dom.document.body.insertAdjacentHTML(
@ -92,18 +81,16 @@ async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events,
` `
); );
const hydrogenRenderScriptCode = await readFile( const vmRenderScriptFilePath = renderOptions.vmRenderScriptFilePath;
path.resolve(__dirname, '../../shared/4-hydrogen-vm-render-script.js'), const hydrogenRenderScriptCode = await readFile(vmRenderScriptFilePath, 'utf8');
'utf8'
);
const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, { const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, {
filename: '4-hydrogen-vm-render-script.js', filename: path.basename(vmRenderScriptFilePath),
}); });
// Note: The VM does not exit after the result is returned here and is why // Note: The VM does not exit after the result is returned here and is why
// this should be run in a `child_process` that we can exit. // this should be run in a `child_process` that we can exit.
const vmResult = hydrogenRenderScript.runInContext(vmContext); const vmResult = hydrogenRenderScript.runInContext(vmContext);
// Wait for everything to render // Wait for everything to render
// (waiting on the promise returned from `4-hydrogen-vm-render-script.js`) // (waiting on the promise returned from the VM render script)
await vmResult; await vmResult;
const documentString = dom.document.body.toString(); const documentString = dom.document.body.toString();

View File

@ -0,0 +1,54 @@
'use strict';
// Server-side render Hydrogen to a string.
//
// We use a `child_process` because we want to be able to exit the process after
// we receive the SSR results. We don't want Hydrogen to keep running after we
// get our initial rendered HTML.
const assert = require('assert');
const RethrownError = require('../lib/rethrown-error');
const runInChildProcess = require('../child-process-runner/run-in-child-process');
// The render should be fast. If it's taking more than 5 seconds, something has
// gone really wrong.
const RENDER_TIMEOUT = 5000;
async function renderHydrogenToString(renderOptions) {
assert(renderOptions.vmRenderScriptFilePath);
assert(renderOptions.vmRenderContext);
try {
// In development, if you're running into a hard to track down error with
// the render hydrogen stack and fighting against the multiple layers of
// complexity with `child_process `and `vm`; you can get away with removing
// the `child_process` part of it by using
// `render-hydrogen-to-string-unsafe` directly.
// ```js
// const _renderHydrogenToStringUnsafe = require('../hydrogen-render/render-hydrogen-to-string-unsafe');
// const hydrogenHtmlOutput = await _renderHydrogenToStringUnsafe(renderOptions);
// ```
//
// We use a child_process because we want to be able to exit the process after
// we receive the SSR results. We don't want Hydrogen to keep running after we
// get our initial rendered HTML.
const hydrogenHtmlOutput = await runInChildProcess(
require.resolve('./render-hydrogen-to-string-unsafe'),
renderOptions,
{
timeout: RENDER_TIMEOUT,
}
);
return hydrogenHtmlOutput;
} catch (err) {
throw new RethrownError(
`Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderHydrogenToString arguments: ${JSON.stringify(
renderOptions
)}`,
err
);
}
}
module.exports = renderHydrogenToString;

View File

@ -13,7 +13,7 @@ const timeoutMiddleware = require('./timeout-middleware');
const fetchRoomData = require('../fetch-room-data'); const fetchRoomData = require('../fetch-room-data');
const fetchEventsInRange = require('../fetch-events-in-range'); const fetchEventsInRange = require('../fetch-events-in-range');
const ensureRoomJoined = require('../ensure-room-joined'); const ensureRoomJoined = require('../ensure-room-joined');
const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string'); const renderHydrogenToString = require('../hydrogen-render/render-hydrogen-to-string');
const sanitizeHtml = require('../lib/sanitize-html'); const sanitizeHtml = require('../lib/sanitize-html');
const safeJson = require('../lib/safe-json'); const safeJson = require('../lib/safe-json');
@ -182,21 +182,21 @@ function installRoutes(app) {
throw new Error('TODO: Redirect user to smaller hour range'); throw new Error('TODO: Redirect user to smaller hour range');
} }
// In development, if you're running into a hard to track down error with
// the render hydrogen stack and fighting against the multiple layers of
// complexity with `child_process `and `vm`; you can get away with removing
// the `child_process` part of it by using
// `3-render-hydrogen-to-string-unsafe` directly.
// ```js
// const _renderHydrogenToStringUnsafe = require('../hydrogen-render/3-render-hydrogen-to-string-unsafe');
// const hydrogenHtmlOutput = await _renderHydrogenToStringUnsafe({ /* renderData */ });
// ```
//
const hydrogenHtmlOutput = await renderHydrogenToString({ const hydrogenHtmlOutput = await renderHydrogenToString({
fromTimestamp, vmRenderScriptFilePath: path.resolve(
roomData, __dirname,
events, '../../shared/hydrogen-vm-render-script.js'
stateEventMap, ),
vmRenderContext: {
fromTimestamp,
roomData,
events,
stateEventMap,
config: {
basePath: config.get('basePath'),
matrixServerUrl: config.get('matrixServerUrl'),
},
},
}); });
const serializableSpans = getSerializableSpans(); const serializableSpans = getSerializableSpans();

View File

@ -19,6 +19,7 @@ build(
}) })
); );
// Pass through some args
const args = []; const args = [];
if (process.argv.includes('--tracing')) { if (process.argv.includes('--tracing')) {
args.push('--tracing'); args.push('--tracing');