Render pipeline separation of concerns (#64)

Follow-up to https://github.com/matrix-org/matrix-public-archive/pull/36

Render pipeline separation of concerns:

 1. Run in `child_process`
 2. Hydrogen render
 
It's now just a generic `child_process` runner that runs the Hydrogen render in it. This eliminates the windy path of the 1-4 steps that was only held together by the file names themselves.
This commit is contained in:
Eric Eastwood 2022-09-02 20:49:06 -05:00 committed by GitHub
parent f6bd581f77
commit 02b86a8405
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 128 additions and 79 deletions

View File

@ -1,2 +1,2 @@
import mounted from 'matrix-public-archive-shared/4-hydrogen-vm-render-script';
import mounted from 'matrix-public-archive-shared/hydrogen-vm-render-script';
console.log('mounted', mounted);

View File

@ -1,13 +1,11 @@
'use strict';
// Called by `child_process` `fork` in `render-hydrogen-to-string.js` so we can
// get the data and exit the process cleanly. We don't want Hydrogen to keep
// running after we get our initial rendered HTML.
// Called by `child_process` `fork` in `run-in-child-process.js` so we can
// get the data and exit the process cleanly.
const assert = require('assert');
const RethrownError = require('../lib/rethrown-error');
const _renderHydrogenToStringUnsafe = require('./3-render-hydrogen-to-string-unsafe');
// Serialize the error and send it back up to the parent process so we can
// interact with it and know what happened when the process exits.
@ -42,11 +40,12 @@ async function serializeError(err) {
}
// We don't exit the process after encountering one of these because maybe it
// doesn't matter to the main render process in Hydrogen.
// doesn't matter to the main-line process in the module.
//
// If we don't listen for these events, the child will exit with status code 1
// (error) when they occur.
process.on('uncaughtException', async (err /*, origin*/) => {
console.log('2 uncaughtException', err);
await serializeError(new RethrownError('uncaughtException in child process', err));
});
@ -57,17 +56,28 @@ process.on('unhandledRejection', async (reason /*, promise*/) => {
// Only kick everything off once we receive the options. We pass in the options
// this way instead of argv because we will run into `Error: spawn E2BIG` and
// `Error: spawn ENAMETOOLONG` with argv.
process.on('message', async (renderOptions) => {
process.on('message', async (runArguments) => {
try {
const resultantHtml = await _renderHydrogenToStringUnsafe(renderOptions);
assert(runArguments);
assert(resultantHtml, `No HTML returned from _renderHydrogenToStringUnsafe.`);
// Require the module that we're supposed to run
const modulePath = process.argv[2];
assert(
modulePath,
'Expected `modulePath` to be passed into `child-fork-script.js` via argv[2]'
);
const moduleToRun = require(modulePath);
// Run the module
const result = await moduleToRun(runArguments);
assert(result, `No result returned from module we ran (${modulePath}).`);
// Send back the data we need to the parent.
await new Promise((resolve, reject) => {
process.send(
{
data: resultantHtml,
data: result,
},
(err) => {
if (err) {

View File

@ -1,22 +1,23 @@
'use strict';
// We use a child_process because we want to be able to exit the process after
// we receive the SSR results. We don't want Hydrogen to keep running after we
// get our initial rendered HTML.
const fork = require('child_process').fork;
// Generic `child_process` runner that handles running the given module with the
// given `runArguments` and returning the async result. Handles the complexity
// error handling, passing large argument objects, and timeouts.
//
// Error handling includes main-line errors seen while waiting the async result,
// as well as keeping track of out of band `uncaughtException` and
// `unhandledRejection` to give more context if the process exits with code 1
// (error) or timesout.
const assert = require('assert');
const fork = require('child_process').fork;
const RethrownError = require('../lib/rethrown-error');
const { traceFunction } = require('../tracing/trace-utilities');
const config = require('../lib/config');
const logOutputFromChildProcesses = config.get('logOutputFromChildProcesses');
// The render should be fast. If it's taking more than 5 seconds, something has
// gone really wrong.
const RENDER_TIMEOUT = 5000;
if (!logOutputFromChildProcesses) {
console.warn(
`Silencing logs from child processes (config.logOutputFromChildProcesses = ${logOutputFromChildProcesses})`
@ -60,7 +61,7 @@ function assembleErrorAfterChildExitsWithErrors(exitCode, childErrors) {
return childErrorSummary;
}
async function renderHydrogenToString(renderOptions) {
async function runInChildProcess(modulePath, runArguments, { timeout }) {
let abortTimeoutId;
try {
let childErrors = [];
@ -68,9 +69,10 @@ async function renderHydrogenToString(renderOptions) {
const controller = new AbortController();
const { signal } = controller;
// We use a child_process because we want to be able to exit the process after
// we receive the SSR results.
const child = fork(require.resolve('./2-render-hydrogen-to-string-fork-script'), [], {
// We use a child_process because we want to be able to exit the process
// after we receive the results. We use `fork` instead of `exec`/`spawn` so
// that we can pass a module instead of running a command.
const child = fork(require.resolve('./child-fork-script'), [modulePath], {
signal,
// Default to silencing logs from the child process. We already have
// proper instrumentation of any errors that might occur.
@ -92,15 +94,17 @@ async function renderHydrogenToString(renderOptions) {
});
}
// Pass the renderOptions to the child by sending instead of via argv because we
// will run into `Error: spawn E2BIG` and `Error: spawn ENAMETOOLONG` with
// argv.
child.send(renderOptions);
// Pass the runArguments to the child by sending instead of via argv because
// we will run into `Error: spawn E2BIG` and `Error: spawn ENAMETOOLONG`
// with argv.
child.send(runArguments);
// Stops the child process if it takes too long
abortTimeoutId = setTimeout(() => {
controller.abort();
}, RENDER_TIMEOUT);
if (timeout) {
abortTimeoutId = setTimeout(() => {
controller.abort();
}, timeout);
}
const returnedData = await new Promise((resolve, reject) => {
let data = '';
@ -112,8 +116,8 @@ async function renderHydrogenToString(renderOptions) {
childError.name = result.name;
childError.message = result.message;
childError.stack = result.stack;
// When an error happens while rendering Hydrogen, we only expect one
// error to come through here from the main line to render Hydrogen.
// When an error happens while running the module, we only expect one
// error to come through here from the main-line to run the module.
// But it's possible to get multiple errors from async out of context
// places since we also listen to `uncaughtException` and
// `unhandledRejection`.
@ -146,7 +150,7 @@ async function renderHydrogenToString(renderOptions) {
);
reject(
new RethrownError(
`Timed out while rendering Hydrogen to string so we aborted the child process after ${RENDER_TIMEOUT}ms. Any child errors? (${childErrors.length})`,
`Timed out while running ${modulePath} so we aborted the child process after ${timeout}ms. Any child errors? (${childErrors.length})`,
childErrorSummary
)
);
@ -159,19 +163,12 @@ async function renderHydrogenToString(renderOptions) {
if (!returnedData) {
const childErrorSummary = assembleErrorAfterChildExitsWithErrors(childExitCode, childErrors);
throw new RethrownError(
`No HTML sent from child process to render Hydrogen. Any child errors? (${childErrors.length})`,
`No \`returnedData\` sent from child process while running the module (${modulePath}). Any child errors? (${childErrors.length})`,
childErrorSummary
);
}
return returnedData;
} catch (err) {
throw new RethrownError(
`Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify(
renderOptions
)}`,
err
);
} finally {
// We don't have to add a undefined/null check here because `clearTimeout`
// works with any value you give it and doesn't throw an error.
@ -179,4 +176,4 @@ async function renderHydrogenToString(renderOptions) {
}
}
module.exports = traceFunction(renderHydrogenToString);
module.exports = traceFunction(runInChildProcess);

View File

@ -18,8 +18,6 @@ const { readFile } = require('fs').promises;
const crypto = require('crypto');
const { parseHTML } = require('linkedom');
const config = require('../lib/config');
// Setup the DOM context with any necessary shims/polyfills and ensure the VM
// context global has everything that a normal document does so Hydrogen can
// render.
@ -62,25 +60,16 @@ function createDomAndSetupVmContext() {
};
}
async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events, stateEventMap }) {
assert(fromTimestamp);
assert(roomData);
assert(events);
assert(stateEventMap);
async function _renderHydrogenToStringUnsafe(renderOptions) {
assert(renderOptions);
assert(renderOptions.vmRenderScriptFilePath);
assert(renderOptions.vmRenderContext);
const { dom, vmContext } = createDomAndSetupVmContext();
// Define this for the SSR context
dom.window.matrixPublicArchiveContext = {
fromTimestamp,
roomData,
events,
stateEventMap,
config: {
basePort: config.get('basePort'),
basePath: config.get('basePath'),
matrixServerUrl: config.get('matrixServerUrl'),
},
...renderOptions.vmRenderContext,
};
// Serialize it for when we run this again client-side
dom.document.body.insertAdjacentHTML(
@ -92,18 +81,16 @@ async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events,
`
);
const hydrogenRenderScriptCode = await readFile(
path.resolve(__dirname, '../../shared/4-hydrogen-vm-render-script.js'),
'utf8'
);
const vmRenderScriptFilePath = renderOptions.vmRenderScriptFilePath;
const hydrogenRenderScriptCode = await readFile(vmRenderScriptFilePath, 'utf8');
const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, {
filename: '4-hydrogen-vm-render-script.js',
filename: path.basename(vmRenderScriptFilePath),
});
// Note: The VM does not exit after the result is returned here and is why
// this should be run in a `child_process` that we can exit.
const vmResult = hydrogenRenderScript.runInContext(vmContext);
// Wait for everything to render
// (waiting on the promise returned from `4-hydrogen-vm-render-script.js`)
// (waiting on the promise returned from the VM render script)
await vmResult;
const documentString = dom.document.body.toString();

View File

@ -0,0 +1,54 @@
'use strict';
// Server-side render Hydrogen to a string.
//
// We use a `child_process` because we want to be able to exit the process after
// we receive the SSR results. We don't want Hydrogen to keep running after we
// get our initial rendered HTML.
const assert = require('assert');
const RethrownError = require('../lib/rethrown-error');
const runInChildProcess = require('../child-process-runner/run-in-child-process');
// The render should be fast. If it's taking more than 5 seconds, something has
// gone really wrong.
const RENDER_TIMEOUT = 5000;
async function renderHydrogenToString(renderOptions) {
assert(renderOptions.vmRenderScriptFilePath);
assert(renderOptions.vmRenderContext);
try {
// In development, if you're running into a hard to track down error with
// the render hydrogen stack and fighting against the multiple layers of
// complexity with `child_process `and `vm`; you can get away with removing
// the `child_process` part of it by using
// `render-hydrogen-to-string-unsafe` directly.
// ```js
// const _renderHydrogenToStringUnsafe = require('../hydrogen-render/render-hydrogen-to-string-unsafe');
// const hydrogenHtmlOutput = await _renderHydrogenToStringUnsafe(renderOptions);
// ```
//
// We use a child_process because we want to be able to exit the process after
// we receive the SSR results. We don't want Hydrogen to keep running after we
// get our initial rendered HTML.
const hydrogenHtmlOutput = await runInChildProcess(
require.resolve('./render-hydrogen-to-string-unsafe'),
renderOptions,
{
timeout: RENDER_TIMEOUT,
}
);
return hydrogenHtmlOutput;
} catch (err) {
throw new RethrownError(
`Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderHydrogenToString arguments: ${JSON.stringify(
renderOptions
)}`,
err
);
}
}
module.exports = renderHydrogenToString;

View File

@ -13,7 +13,7 @@ const timeoutMiddleware = require('./timeout-middleware');
const fetchRoomData = require('../fetch-room-data');
const fetchEventsInRange = require('../fetch-events-in-range');
const ensureRoomJoined = require('../ensure-room-joined');
const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string');
const renderHydrogenToString = require('../hydrogen-render/render-hydrogen-to-string');
const sanitizeHtml = require('../lib/sanitize-html');
const safeJson = require('../lib/safe-json');
@ -182,21 +182,21 @@ function installRoutes(app) {
throw new Error('TODO: Redirect user to smaller hour range');
}
// In development, if you're running into a hard to track down error with
// the render hydrogen stack and fighting against the multiple layers of
// complexity with `child_process `and `vm`; you can get away with removing
// the `child_process` part of it by using
// `3-render-hydrogen-to-string-unsafe` directly.
// ```js
// const _renderHydrogenToStringUnsafe = require('../hydrogen-render/3-render-hydrogen-to-string-unsafe');
// const hydrogenHtmlOutput = await _renderHydrogenToStringUnsafe({ /* renderData */ });
// ```
//
const hydrogenHtmlOutput = await renderHydrogenToString({
fromTimestamp,
roomData,
events,
stateEventMap,
vmRenderScriptFilePath: path.resolve(
__dirname,
'../../shared/hydrogen-vm-render-script.js'
),
vmRenderContext: {
fromTimestamp,
roomData,
events,
stateEventMap,
config: {
basePath: config.get('basePath'),
matrixServerUrl: config.get('matrixServerUrl'),
},
},
});
const serializableSpans = getSerializableSpans();

View File

@ -19,6 +19,7 @@ build(
})
);
// Pass through some args
const args = [];
if (process.argv.includes('--tracing')) {
args.push('--tracing');