Stop Hydrogen from running in the background after we get our SSR HTML render data (#36)

We now run the Hydrogen render in a `child_process` so we can exit the whole render process. We still use the `vm` to setup the browser-like globals. With a `vm`, everything continues to run even after it returns and there isn't a way to clean up, stop, kill, terminate the vm script or context so we need this extra `child_process` now to clean up. I don't like the complexity necessary for this though. I wish the `vm` API allowed for this use case. The only way to stop a `vm` is the `timeout` and we want to stop as soon as we return.

Fix https://github.com/matrix-org/matrix-public-archive/issues/34
This commit is contained in:
Eric Eastwood 2022-07-05 17:30:52 -05:00 committed by GitHub
parent 9c8f980e2a
commit f738dbc1da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 242 additions and 100 deletions

View File

@ -1,2 +1,2 @@
import mounted from 'matrix-public-archive-shared/hydrogen-vm-render-script';
import mounted from 'matrix-public-archive-shared/4-hydrogen-vm-render-script';
console.log('mounted', mounted);

View File

@ -0,0 +1,97 @@
'use strict';
// We use a child_process because we want to be able to exit the process after
// we receive the SSR results. We don't want Hydrogen to keep running after we
// get our initial rendered HTML.
const fork = require('child_process').fork;
const RethrownError = require('../lib/rethrown-error');
// The render should be fast. If it's taking more than 5 seconds, something has
// gone really wrong.
const RENDER_TIMEOUT = 5000;
async function renderHydrogenToString(options) {
try {
let data = '';
let childErrors = [];
const controller = new AbortController();
const { signal } = controller;
// We use a child_process because we want to be able to exit the process after
// we receive the SSR results.
const child = fork(
require.resolve('./2-render-hydrogen-to-string-fork-script'),
[JSON.stringify(options)],
{
signal,
//cwd: process.cwd(),
}
);
// Stops the child process if it takes too long
setTimeout(() => {
controller.abort();
}, RENDER_TIMEOUT);
// Collect the data passed back by the child
child.on('message', function (result) {
if (result.error) {
// De-serialize the error
const childError = new Error();
childError.name = result.name;
childError.message = result.message;
childError.stack = result.stack;
// We shouldn't really run into a situation where there are multiple
// errors but since this is just a message bus, it's possible.
childErrors.push(childError);
} else {
data += result.data;
}
});
await new Promise((resolve, reject) => {
child.on('close', (exitCode) => {
// Exited successfully
if (exitCode === 0) {
resolve(data);
} else {
let extraErrorsMessage = '';
if (childErrors.length > 1) {
extraErrorsMessage = ` (somehow we saw ${childErrors.length} errors but we really always expect 1 error)`;
}
const error = new RethrownError(
`Child process failed with exit code ${exitCode}${extraErrorsMessage}`,
childErrors[0]
);
reject(error);
}
});
// When a problem occurs when spawning the process or gets aborted
child.on('error', (err) => {
if (err.name === 'AbortError') {
throw new RethrownError(
`Timed out while rendering Hydrogen to string so we aborted the child process after ${RENDER_TIMEOUT}ms`,
err
);
}
reject(err);
});
});
return data;
} catch (err) {
throw new RethrownError(
`Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify(
arguments[0]
)}`,
err
);
}
}
module.exports = renderHydrogenToString;

View File

@ -0,0 +1,38 @@
'use strict';
// Called by `child_process` `fork` in `render-hydrogen-to-string.js` so we can
// get the data and exit the process cleanly. We don't want Hydrogen to keep
// running after we get our initial rendered HTML.
const assert = require('assert');
const _renderHydrogenToStringUnsafe = require('./3-render-hydrogen-to-string-unsafe');
(async () => {
try {
assert(
process.argv[2],
'No command-line arguments passed to `render-hydrogen-to-string-fork-script.js`. Make sure these are being passed in when we spawn the new process.'
);
const options = JSON.parse(process.argv[2]);
const resultantHtml = await _renderHydrogenToStringUnsafe(options);
// Send back the data we need
process.send({
data: resultantHtml,
});
// End the process gracefully. We got all the data we need.
process.exit(0);
} catch (err) {
// Serialize the error and send it back up to the parent process so we can
// interact with it and know what happened when the process exits.
process.send({
error: true,
name: err.name,
message: err.message,
stack: err.stack,
});
// Throw the error so the process fails and exits
throw err;
}
})();

View File

@ -0,0 +1,100 @@
'use strict';
// Server-side render Hydrogen to a string using a browser-like context thanks
// to `linkedom`. We use a VM so we can put all of the browser-like globals in
// place.
//
// Note: This is marked as unsafe because the render script is run in a VM which
// doesn't exit after we get the result (Hydrogen keeps running). There isn't a
// way to stop, terminate, or kill a vm script or vm context so in order to be
// safe, we need to run this inside of a child_process which we can kill after.
// This is why we have the `1-render-hydrogen-to-string.js` layer to handle
// this.
const assert = require('assert');
const vm = require('vm');
const path = require('path');
const { readFile } = require('fs').promises;
const crypto = require('crypto');
const { parseHTML } = require('linkedom');
const config = require('../lib/config');
async function _renderHydrogenToStringUnsafe({ fromTimestamp, roomData, events, stateEventMap }) {
assert(fromTimestamp);
assert(roomData);
assert(events);
assert(stateEventMap);
const dom = parseHTML(`
<!doctype html>
<html>
<head></head>
<body>
<div id="app" class="hydrogen"></div>
</body>
</html>
`);
if (!dom.requestAnimationFrame) {
dom.requestAnimationFrame = function (cb) {
setTimeout(cb, 0);
};
}
// Define this for the SSR context
dom.window.matrixPublicArchiveContext = {
fromTimestamp,
roomData,
events,
stateEventMap,
config: {
basePort: config.get('basePort'),
basePath: config.get('basePath'),
matrixServerUrl: config.get('matrixServerUrl'),
},
};
// Serialize it for when we run this again client-side
dom.document.body.insertAdjacentHTML(
'beforeend',
`
<script type="text/javascript">
window.matrixPublicArchiveContext = ${JSON.stringify(dom.window.matrixPublicArchiveContext)}
</script>
`
);
const vmContext = vm.createContext(dom);
// Make the dom properties available in sub-`require(...)` calls
vmContext.global.window = dom.window;
vmContext.global.document = dom.document;
vmContext.global.Node = dom.Node;
vmContext.global.navigator = dom.navigator;
vmContext.global.DOMParser = dom.DOMParser;
// Make sure `webcrypto` exists since it was only introduced in Node.js v17
assert(crypto.webcrypto);
vmContext.global.crypto = crypto.webcrypto;
// So require(...) works in the vm
vmContext.global.require = require;
// So we can see logs from the underlying vm
vmContext.global.console = console;
const hydrogenRenderScriptCode = await readFile(
path.resolve(__dirname, '../../shared/4-hydrogen-vm-render-script.js'),
'utf8'
);
const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, {
filename: '4-hydrogen-vm-render-script.js',
});
// Note: The VM does not exit after the result is returned here
const vmResult = hydrogenRenderScript.runInContext(vmContext);
// Wait for everything to render
// (waiting on the promise returned from `4-hydrogen-vm-render-script.js`)
await vmResult;
const documentString = dom.document.body.toString();
return documentString;
}
module.exports = _renderHydrogenToStringUnsafe;

View File

@ -1,98 +0,0 @@
'use strict';
const assert = require('assert');
const vm = require('vm');
const path = require('path');
const { readFile } = require('fs').promises;
const crypto = require('crypto');
const { parseHTML } = require('linkedom');
const RethrownError = require('./lib/rethrown-error');
const config = require('./lib/config');
async function renderHydrogenToString({ fromTimestamp, roomData, events, stateEventMap }) {
try {
assert(fromTimestamp);
assert(roomData);
assert(events);
assert(stateEventMap);
const dom = parseHTML(`
<!doctype html>
<html>
<head></head>
<body>
<div id="app" class="hydrogen"></div>
</body>
</html>
`);
if (!dom.requestAnimationFrame) {
dom.requestAnimationFrame = function (cb) {
setTimeout(cb, 0);
};
}
// Define this for the SSR context
dom.window.matrixPublicArchiveContext = {
fromTimestamp,
roomData,
events,
stateEventMap,
config: {
basePort: config.get('basePort'),
basePath: config.get('basePath'),
matrixServerUrl: config.get('matrixServerUrl'),
},
};
// Serialize it for when we run this again client-side
dom.document.body.insertAdjacentHTML(
'beforeend',
`
<script type="text/javascript">
window.matrixPublicArchiveContext = ${JSON.stringify(dom.window.matrixPublicArchiveContext)}
</script>
`
);
const vmContext = vm.createContext(dom);
// Make the dom properties available in sub-`require(...)` calls
vmContext.global.window = dom.window;
vmContext.global.document = dom.document;
vmContext.global.Node = dom.Node;
vmContext.global.navigator = dom.navigator;
vmContext.global.DOMParser = dom.DOMParser;
// Make sure `webcrypto` exists since it was only introduced in Node.js v17
assert(crypto.webcrypto);
vmContext.global.crypto = crypto.webcrypto;
// So require(...) works in the vm
vmContext.global.require = require;
// So we can see logs from the underlying vm
vmContext.global.console = console;
const hydrogenRenderScriptCode = await readFile(
path.resolve(__dirname, '../shared/hydrogen-vm-render-script.js'),
'utf8'
);
const hydrogenRenderScript = new vm.Script(hydrogenRenderScriptCode, {
filename: 'hydrogen-vm-render-script.js',
});
const vmResult = hydrogenRenderScript.runInContext(vmContext);
// Wait for everything to render
// (waiting on the promise returned from `hydrogen-render-script.js`)
await vmResult;
const documentString = dom.document.body.toString();
return documentString;
} catch (err) {
throw new RethrownError(
`Failed to render Hydrogen to string. In order to reproduce, feed in these arguments into \`renderHydrogenToString(...)\`:\n renderToString arguments: ${JSON.stringify(
arguments[0]
)}`,
err
);
}
}
module.exports = renderHydrogenToString;

View File

@ -9,7 +9,7 @@ const StatusError = require('../lib/status-error');
const fetchRoomData = require('../fetch-room-data');
const fetchEventsInRange = require('../fetch-events-in-range');
const renderHydrogenToString = require('../render-hydrogen-to-string');
const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string');
const config = require('../lib/config');
const basePath = config.get('basePath');

View File

@ -1,5 +1,10 @@
'use strict';
// Isomorphic script that runs in the browser and on the server for SSR (needs
// browser context) that renders Hydrogen to the `document.body`.
//
// Data is passed in via `window.matrixPublicArchiveContext`
const assert = require('matrix-public-archive-shared/lib/assert');
const {
Platform,