OpenTelemetry tracing so we can see spans where the app is taking time (#27)
OpenTelemetry tracing so we can see spans where the app is taking time. For the user, we specifically show the spans for the external API HTTP requests that are slow (so we know when the Matrix API is being slow). Enable tracing: - `npm run start -- --tracing` - `npm run start-dev -- --tracing` What does this PR change: - Adds OpenTelemetry tracing with some of the automatic instrumentation (includes HTTP and express) - We ignore traces for serving static assets (just noise) - Adds `X-Trace-Id` to the response headers - Adds `window.tracingSpansForRequest` which includes the external HTTP API requests made during the request - Adds a fancy 504 timeout page that includes trace details and lists the slow HTTP requests - Adds `jaegerTracesEndpoint` configuration to export tracing spans to Jaeger - Related to, https://github.com/matrix-org/matrix-public-archive/issues/26
This commit is contained in:
parent
13eb92b067
commit
ddfe94beab
|
@ -3,6 +3,8 @@
|
||||||
"basePath": "http://localhost:3050",
|
"basePath": "http://localhost:3050",
|
||||||
"matrixServerUrl": "http://localhost:8008/",
|
"matrixServerUrl": "http://localhost:8008/",
|
||||||
"archiveMessageLimit": 500,
|
"archiveMessageLimit": 500,
|
||||||
|
"requestTimeoutMs": 25000,
|
||||||
|
"UNCOMMENT_jaegerTracesEndpoint": "http://localhost:14268/api/traces",
|
||||||
"testMatrixServerUrl1": "http://localhost:11008/",
|
"testMatrixServerUrl1": "http://localhost:11008/",
|
||||||
"testMatrixServerUrl2": "http://localhost:12008/",
|
"testMatrixServerUrl2": "http://localhost:12008/",
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
11
package.json
11
package.json
|
@ -30,6 +30,17 @@
|
||||||
"vite": "^2.9.6"
|
"vite": "^2.9.6"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@opentelemetry/api": "^1.1.0",
|
||||||
|
"@opentelemetry/auto-instrumentations-node": "^0.31.0",
|
||||||
|
"@opentelemetry/context-async-hooks": "^1.4.0",
|
||||||
|
"@opentelemetry/core": "^1.4.0",
|
||||||
|
"@opentelemetry/exporter-jaeger": "^1.3.1",
|
||||||
|
"@opentelemetry/instrumentation": "^0.30.0",
|
||||||
|
"@opentelemetry/propagator-ot-trace": "^0.26.0",
|
||||||
|
"@opentelemetry/resources": "^1.3.1",
|
||||||
|
"@opentelemetry/sdk-trace-base": "^1.3.1",
|
||||||
|
"@opentelemetry/semantic-conventions": "^1.3.1",
|
||||||
|
"dompurify": "^2.3.9",
|
||||||
"express": "^4.17.2",
|
"express": "^4.17.2",
|
||||||
"hydrogen-view-sdk": "npm:@mlm/hydrogen-view-sdk@^0.0.13-scratch",
|
"hydrogen-view-sdk": "npm:@mlm/hydrogen-view-sdk@^0.0.13-scratch",
|
||||||
"linkedom": "^0.14.1",
|
"linkedom": "^0.14.1",
|
||||||
|
|
|
@ -153,3 +153,26 @@ body {
|
||||||
.CalendarView_dayLink_disabled {
|
.CalendarView_dayLink_disabled {
|
||||||
opacity: 0.5;
|
opacity: 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Error pages */
|
||||||
|
|
||||||
|
.heading-sub-detail {
|
||||||
|
font-weight: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tracing-span-list {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
.tracing-span-list-item {
|
||||||
|
margin-bottom: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tracing-span-item-http-details {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
.tracing-span-item-sub-details {
|
||||||
|
margin-left: 4ch;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
## Tracing
|
||||||
|
|
||||||
|
Run the app with the OpenTelemetry tracing.
|
||||||
|
|
||||||
|
```
|
||||||
|
npm run start -- --tracing
|
||||||
|
# or
|
||||||
|
npm run start-dev -- --tracing
|
||||||
|
```
|
||||||
|
|
||||||
|
Manually:
|
||||||
|
|
||||||
|
```
|
||||||
|
node --require './server/tracing.js' server/server.js
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Traces are made up of many spans. Each span defines a `traceId` which it is associated with.
|
||||||
|
|
||||||
|
## Viewing traces in Jaeger
|
||||||
|
|
||||||
|
- Dashboard where users can see visualisations -> http://localhost:16686
|
||||||
|
|
||||||
|
via https://www.jaegertracing.io/docs/1.35/getting-started/
|
||||||
|
|
||||||
|
```
|
||||||
|
docker run -d --name jaeger \
|
||||||
|
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||||
|
-e COLLECTOR_OTLP_ENABLED=true \
|
||||||
|
-p 6831:6831/udp \
|
||||||
|
-p 6832:6832/udp \
|
||||||
|
-p 5778:5778 \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 4318:4318 \
|
||||||
|
-p 14250:14250 \
|
||||||
|
-p 14268:14268 \
|
||||||
|
-p 14269:14269 \
|
||||||
|
-p 9411:9411 \
|
||||||
|
jaegertracing/all-in-one:1.35
|
||||||
|
```
|
||||||
|
|
||||||
|
| Port | Protocol | Component | Function |
|
||||||
|
| ----- | -------- | --------- | -------------------------------------------------------------------------------------------- |
|
||||||
|
| 6831 | UDP | agent | accept jaeger.thrift over Thrift-compact protocol (used by most SDKs) |
|
||||||
|
| 6832 | UDP | agent | accept jaeger.thrift over Thrift-binary protocol (used by Node.js SDK) |
|
||||||
|
| 5775 | UDP | agent | (deprecated) accept zipkin.thrift over compact Thrift protocol (used by legacy clients only) |
|
||||||
|
| 5778 | HTTP | agent | serve configs (sampling, etc.) |
|
||||||
|
| 16686 | HTTP | query | serve frontend |
|
||||||
|
| 4317 | HTTP | collector | accept OpenTelemetry Protocol (OTLP) over gRPC, if enabled |
|
||||||
|
| 4318 | HTTP | collector | accept OpenTelemetry Protocol (OTLP) over HTTP, if enabled |
|
||||||
|
| 14268 | HTTP | collector | accept jaeger.thrift directly from clients |
|
||||||
|
| 14250 | HTTP | collector | accept model.proto |
|
||||||
|
| 9411 | HTTP | collector | Zipkin compatible endpoint (optional) |
|
||||||
|
|
||||||
|
With Service Performance Monitoring (SPM)
|
|
@ -0,0 +1,10 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
// via https://gitlab.com/gitterHQ/webapp/-/blob/615c78d0b0a314c2c9e4098b8d2ba0471d16961b/modules/templates/lib/safe-json.js
|
||||||
|
function safeJson(string) {
|
||||||
|
if (!string) return string;
|
||||||
|
// From http://benalpert.com/2012/08/03/preventing-xss-json.html
|
||||||
|
return string.replace(/<\//g, '<\\/');
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = safeJson;
|
|
@ -0,0 +1,21 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const createDOMPurify = require('dompurify');
|
||||||
|
const { parseHTML } = require('linkedom');
|
||||||
|
|
||||||
|
const dom = parseHTML(`
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head></head>
|
||||||
|
<body></body>
|
||||||
|
</html>
|
||||||
|
`);
|
||||||
|
|
||||||
|
const DOMPurify = createDOMPurify(dom.window);
|
||||||
|
|
||||||
|
function sanitizeHtml(dirtyHtml) {
|
||||||
|
const cleanHtml = DOMPurify.sanitize(dirtyHtml);
|
||||||
|
return cleanHtml;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = sanitizeHtml;
|
|
@ -7,9 +7,14 @@ const express = require('express');
|
||||||
const asyncHandler = require('../lib/express-async-handler');
|
const asyncHandler = require('../lib/express-async-handler');
|
||||||
const StatusError = require('../lib/status-error');
|
const StatusError = require('../lib/status-error');
|
||||||
|
|
||||||
|
const { handleTracingMiddleware, getSerializableSpans } = require('../tracing/tracing-middleware');
|
||||||
|
const timeoutMiddleware = require('./timeout-middleware');
|
||||||
|
|
||||||
const fetchRoomData = require('../fetch-room-data');
|
const fetchRoomData = require('../fetch-room-data');
|
||||||
const fetchEventsInRange = require('../fetch-events-in-range');
|
const fetchEventsInRange = require('../fetch-events-in-range');
|
||||||
const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string');
|
const renderHydrogenToString = require('../hydrogen-render/1-render-hydrogen-to-string');
|
||||||
|
const sanitizeHtml = require('../lib/sanitize-html');
|
||||||
|
const safeJson = require('../lib/safe-json');
|
||||||
|
|
||||||
const config = require('../lib/config');
|
const config = require('../lib/config');
|
||||||
const basePath = config.get('basePath');
|
const basePath = config.get('basePath');
|
||||||
|
@ -63,9 +68,14 @@ function parseArchiveRangeFromReq(req) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function installRoutes(app) {
|
function installRoutes(app) {
|
||||||
app.get('/health-check', async function (req, res) {
|
app.use(handleTracingMiddleware);
|
||||||
res.send('{ "ok": true }');
|
|
||||||
});
|
app.get(
|
||||||
|
'/health-check',
|
||||||
|
asyncHandler(async function (req, res) {
|
||||||
|
res.send('{ "ok": true }');
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
// We have to disable no-missing-require lint because it doesn't take into
|
// We have to disable no-missing-require lint because it doesn't take into
|
||||||
// account `package.json`. `exports`, see
|
// account `package.json`. `exports`, see
|
||||||
|
@ -73,25 +83,34 @@ function installRoutes(app) {
|
||||||
// eslint-disable-next-line node/no-missing-require
|
// eslint-disable-next-line node/no-missing-require
|
||||||
app.use(express.static(path.dirname(require.resolve('hydrogen-view-sdk/assets/main.js'))));
|
app.use(express.static(path.dirname(require.resolve('hydrogen-view-sdk/assets/main.js'))));
|
||||||
|
|
||||||
app.get('/hydrogen-styles.css', async function (req, res) {
|
app.get(
|
||||||
res.set('Content-Type', 'text/css');
|
'/hydrogen-styles.css',
|
||||||
// We have to disable no-missing-require lint because it doesn't take into
|
asyncHandler(async function (req, res) {
|
||||||
// account `package.json`. `exports`, see
|
res.set('Content-Type', 'text/css');
|
||||||
// https://github.com/mysticatea/eslint-plugin-node/issues/255
|
// We have to disable no-missing-require lint because it doesn't take into
|
||||||
// eslint-disable-next-line node/no-missing-require
|
// account `package.json`. `exports`, see
|
||||||
res.sendFile(require.resolve('hydrogen-view-sdk/assets/theme-element-light.css'));
|
// https://github.com/mysticatea/eslint-plugin-node/issues/255
|
||||||
});
|
// eslint-disable-next-line node/no-missing-require
|
||||||
|
res.sendFile(require.resolve('hydrogen-view-sdk/assets/theme-element-light.css'));
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
// Our own archive app styles
|
// Our own archive app styles
|
||||||
app.get('/styles.css', async function (req, res) {
|
app.get(
|
||||||
res.set('Content-Type', 'text/css');
|
'/styles.css',
|
||||||
res.sendFile(path.join(__dirname, '../../public/styles/styles.css'));
|
asyncHandler(async function (req, res) {
|
||||||
});
|
res.set('Content-Type', 'text/css');
|
||||||
|
res.sendFile(path.join(__dirname, '../../public/styles/styles.css'));
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
app.get('/matrix-public-archive.js', async function (req, res) {
|
app.get(
|
||||||
res.set('Content-Type', 'text/css');
|
'/matrix-public-archive.js',
|
||||||
res.sendFile(path.join(__dirname, '../../dist/matrix-public-archive.es.js'));
|
asyncHandler(async function (req, res) {
|
||||||
});
|
res.set('Content-Type', 'text/css');
|
||||||
|
res.sendFile(path.join(__dirname, '../../dist/matrix-public-archive.es.js'));
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
app.get(
|
app.get(
|
||||||
'/:roomIdOrAlias/event/:eventId',
|
'/:roomIdOrAlias/event/:eventId',
|
||||||
|
@ -106,6 +125,7 @@ function installRoutes(app) {
|
||||||
// https://gitlab.com/gitterHQ/webapp/-/blob/14954e05c905e8c7cb675efebb89116c07cfaab5/server/handlers/app/archive.js#L190-297
|
// https://gitlab.com/gitterHQ/webapp/-/blob/14954e05c905e8c7cb675efebb89116c07cfaab5/server/handlers/app/archive.js#L190-297
|
||||||
app.get(
|
app.get(
|
||||||
'/:roomIdOrAlias/date/:yyyy(\\d{4})/:mm(\\d{2})/:dd(\\d{2})/:hourRange(\\d\\d?-\\d\\d?)?',
|
'/:roomIdOrAlias/date/:yyyy(\\d{4})/:mm(\\d{2})/:dd(\\d{2})/:hourRange(\\d\\d?-\\d\\d?)?',
|
||||||
|
timeoutMiddleware,
|
||||||
asyncHandler(async function (req, res) {
|
asyncHandler(async function (req, res) {
|
||||||
const roomIdOrAlias = req.params.roomIdOrAlias;
|
const roomIdOrAlias = req.params.roomIdOrAlias;
|
||||||
assert(roomIdOrAlias.startsWith('!') || roomIdOrAlias.startsWith('#'));
|
assert(roomIdOrAlias.startsWith('!') || roomIdOrAlias.startsWith('#'));
|
||||||
|
@ -157,6 +177,9 @@ function installRoutes(app) {
|
||||||
stateEventMap,
|
stateEventMap,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const serializableSpans = getSerializableSpans();
|
||||||
|
const serializedSpans = JSON.stringify(serializableSpans);
|
||||||
|
|
||||||
const hydrogenStylesUrl = urlJoin(basePath, 'hydrogen-styles.css');
|
const hydrogenStylesUrl = urlJoin(basePath, 'hydrogen-styles.css');
|
||||||
const stylesUrl = urlJoin(basePath, 'styles.css');
|
const stylesUrl = urlJoin(basePath, 'styles.css');
|
||||||
const jsBundleUrl = urlJoin(basePath, 'matrix-public-archive.js');
|
const jsBundleUrl = urlJoin(basePath, 'matrix-public-archive.js');
|
||||||
|
@ -164,15 +187,19 @@ function installRoutes(app) {
|
||||||
<!doctype html>
|
<!doctype html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<link href="${hydrogenStylesUrl}" rel="stylesheet">
|
${sanitizeHtml(`<title>${roomData.name} - Matrix Public Archive</title>`)}
|
||||||
<link href="${stylesUrl}" rel="stylesheet">
|
<link href="${hydrogenStylesUrl}" rel="stylesheet">
|
||||||
|
<link href="${stylesUrl}" rel="stylesheet">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
${hydrogenHtmlOutput}
|
${hydrogenHtmlOutput}
|
||||||
<script type="text/javascript" src="${jsBundleUrl}"></script>
|
<script type="text/javascript" src="${jsBundleUrl}"></script>
|
||||||
|
<script type="text/javascript">window.tracingSpansForRequest = ${safeJson(
|
||||||
|
serializedSpans
|
||||||
|
)};</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
`;
|
`;
|
||||||
|
|
||||||
res.set('Content-Type', 'text/html');
|
res.set('Content-Type', 'text/html');
|
||||||
res.send(pageHtml);
|
res.send(pageHtml);
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const urlJoin = require('url-join');
|
||||||
|
const asyncHandler = require('../lib/express-async-handler');
|
||||||
|
const { getSerializableSpans, getActiveTraceId } = require('../tracing/tracing-middleware');
|
||||||
|
const sanitizeHtml = require('../lib/sanitize-html');
|
||||||
|
const safeJson = require('../lib/safe-json');
|
||||||
|
|
||||||
|
const config = require('../lib/config');
|
||||||
|
const basePath = config.get('basePath');
|
||||||
|
assert(basePath);
|
||||||
|
const requestTimeoutMs = config.get('requestTimeoutMs');
|
||||||
|
assert(requestTimeoutMs);
|
||||||
|
|
||||||
|
// Based off of the `connect-timeout` middleware,
|
||||||
|
// https://github.com/expressjs/timeout/blob/f2f520f335f2f2ae255d4778e908e8d38e3a4e68/index.js
|
||||||
|
async function timeoutMiddleware(req, res, next) {
|
||||||
|
const timeoutId = setTimeout(() => {
|
||||||
|
const traceId = getActiveTraceId();
|
||||||
|
const serializableSpans = getSerializableSpans();
|
||||||
|
const serializedSpans = JSON.stringify(serializableSpans);
|
||||||
|
|
||||||
|
let humanReadableSpans;
|
||||||
|
if (serializableSpans.length > 0) {
|
||||||
|
humanReadableSpans = serializableSpans.map((serializableSpan) => {
|
||||||
|
const method = serializableSpan.attributes['http.method'];
|
||||||
|
const url = serializableSpan.attributes['http.url'];
|
||||||
|
const statusCode = serializableSpan.attributes['http.status_code'];
|
||||||
|
|
||||||
|
let durationString = `request is still running (${
|
||||||
|
Date.now() - serializableSpan.startTimeInMs
|
||||||
|
}ms so far)`;
|
||||||
|
if (serializableSpan.durationInMs) {
|
||||||
|
durationString = `took ${serializableSpan.durationInMs}ms`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return `<li class="tracing-span-list-item">
|
||||||
|
<div class="tracing-span-item-http-details">${statusCode ?? '🏃'}: ${method} ${url}</div>
|
||||||
|
<div class="tracing-span-item-sub-details">${durationString}</div>
|
||||||
|
</li>`;
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
const noTracingDataAvailableItem = `<li class="tracing-span-list-item">
|
||||||
|
<div class="tracing-span-item-http-details">No tracing data available</div>
|
||||||
|
</li>`;
|
||||||
|
|
||||||
|
humanReadableSpans = [noTracingDataAvailableItem];
|
||||||
|
}
|
||||||
|
|
||||||
|
const hydrogenStylesUrl = urlJoin(basePath, 'hydrogen-styles.css');
|
||||||
|
const stylesUrl = urlJoin(basePath, 'styles.css');
|
||||||
|
|
||||||
|
const pageHtml = `
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Server timeout - Matrix Public Archive</title>
|
||||||
|
<link href="${hydrogenStylesUrl}" rel="stylesheet">
|
||||||
|
<link href="${stylesUrl}" rel="stylesheet">
|
||||||
|
</head>
|
||||||
|
${/* We add the .hydrogen class here just to get normal body styles */ ''}
|
||||||
|
<body class="hydrogen">
|
||||||
|
<h1>504: Server timeout</h1>
|
||||||
|
<p>Server was unable to respond in time (${requestTimeoutMs / 1000}s)</p>
|
||||||
|
<h3>These are the external API requests that made it slow:</h3>
|
||||||
|
${sanitizeHtml(`<ul class="tracing-span-list">
|
||||||
|
${humanReadableSpans.join('\n')}
|
||||||
|
</ul>`)}
|
||||||
|
|
||||||
|
${sanitizeHtml(
|
||||||
|
`<h2>Trace ID: <span class="heading-sub-detail">${
|
||||||
|
traceId ?? `none (tracing is probably not enabled)`
|
||||||
|
}</span></h2>`
|
||||||
|
)}
|
||||||
|
|
||||||
|
<script type="text/javascript">window.tracingSpansForRequest = ${safeJson(
|
||||||
|
serializedSpans
|
||||||
|
)};</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
// 504 Gateway timeout
|
||||||
|
res.status(504);
|
||||||
|
res.set('Content-Type', 'text/html');
|
||||||
|
|
||||||
|
res.send(pageHtml);
|
||||||
|
}, requestTimeoutMs);
|
||||||
|
|
||||||
|
res.on('finish', function () {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
});
|
||||||
|
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = asyncHandler(timeoutMiddleware);
|
|
@ -2,6 +2,12 @@
|
||||||
|
|
||||||
console.log('server process.env.NODE_ENV', process.env.NODE_ENV);
|
console.log('server process.env.NODE_ENV', process.env.NODE_ENV);
|
||||||
|
|
||||||
|
if (process.argv.includes('--tracing')) {
|
||||||
|
console.log('Tracing is active 🕵️');
|
||||||
|
const { startTracing } = require('./tracing/tracing.js');
|
||||||
|
startTracing();
|
||||||
|
}
|
||||||
|
|
||||||
const express = require('express');
|
const express = require('express');
|
||||||
|
|
||||||
const installRoutes = require('./routes/install-routes');
|
const installRoutes = require('./routes/install-routes');
|
||||||
|
|
|
@ -19,6 +19,11 @@ build(
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const args = [];
|
||||||
|
if (process.argv.includes('--tracing')) {
|
||||||
|
args.push('--tracing');
|
||||||
|
}
|
||||||
|
|
||||||
// Listen for any changes to files and restart the Node.js server process
|
// Listen for any changes to files and restart the Node.js server process
|
||||||
//
|
//
|
||||||
// For API docs, see
|
// For API docs, see
|
||||||
|
@ -28,6 +33,7 @@ nodemon({
|
||||||
ext: 'js json',
|
ext: 'js json',
|
||||||
ignoreRoot: ['.git'],
|
ignoreRoot: ['.git'],
|
||||||
ignore: [path.join(__dirname, '../dist/*')],
|
ignore: [path.join(__dirname, '../dist/*')],
|
||||||
|
args,
|
||||||
});
|
});
|
||||||
|
|
||||||
nodemon
|
nodemon
|
||||||
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
//const { SpanProcessor } = require('@opentelemetry/sdk-trace-base');
|
||||||
|
const { suppressTracing } = require('@opentelemetry/core');
|
||||||
|
const { context } = require('@opentelemetry/api');
|
||||||
|
|
||||||
|
// 1. Keeps track of all spans for a given trace after calling
|
||||||
|
// `trackSpansInTrace(traceId)` (call this in a middleware before any other
|
||||||
|
// routes).
|
||||||
|
// 2. Then during the request, you can see all spans for a given trace with
|
||||||
|
// `getSpansInTrace(traceId)`.
|
||||||
|
// 3. Don't forget to clean up with `dropSpansInTrace(traceId)` after you're
|
||||||
|
// done with the spans (should be done in the `res.on('finish', ...)`
|
||||||
|
// callback).
|
||||||
|
//
|
||||||
|
// NoopSpanProcessor.ts reference:
|
||||||
|
// https://github.com/open-telemetry/opentelemetry-js/blob/747c404fb1295d54eb628a434ebf782d2f180bfb/packages/opentelemetry-sdk-trace-base/src/export/NoopSpanProcessor.ts
|
||||||
|
class CaptureSpanProcessor {
|
||||||
|
// Map from traceId to spans in the trace
|
||||||
|
traceMap = {};
|
||||||
|
|
||||||
|
// We capture when the span starts so that we get any ongoing spans if the
|
||||||
|
// request times out and we want to show what it was stuck on.
|
||||||
|
onStart(span /*, ctx*/) {
|
||||||
|
// prevent downstream exporter calls from generating spans
|
||||||
|
context.with(suppressTracing(context.active()), () => {
|
||||||
|
const traceIdsToTrack = Object.keys(this.traceMap);
|
||||||
|
|
||||||
|
const traceId = span.spanContext().traceId;
|
||||||
|
|
||||||
|
if (traceIdsToTrack.includes(traceId)) {
|
||||||
|
this.traceMap[traceId].push(span);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
onEnd(/*span*/) {
|
||||||
|
/* noop */
|
||||||
|
}
|
||||||
|
|
||||||
|
shutdown() {
|
||||||
|
/* noop */
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
forceFlush() {
|
||||||
|
/* noop */
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all spans for a given trace.
|
||||||
|
getSpansInTrace(traceId) {
|
||||||
|
return this.traceMap[traceId];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keeps track of all spans for a given trace after calling
|
||||||
|
// `trackSpansInTrace(traceId)` (call this in a middleware before any other
|
||||||
|
// routes).
|
||||||
|
trackSpansInTrace(traceId) {
|
||||||
|
this.traceMap[traceId] = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't forget to clean up with `dropSpansInTrace(traceId)` after you're done
|
||||||
|
// with the spans (should be done in the `res.on('finish', ...)` callback).
|
||||||
|
//
|
||||||
|
// alias: Dispose
|
||||||
|
dropSpansInTrace(traceId) {
|
||||||
|
delete this.traceMap[traceId];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = CaptureSpanProcessor;
|
|
@ -0,0 +1,44 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const {
|
||||||
|
hrTimeToMilliseconds,
|
||||||
|
//hrTimeToMicroseconds
|
||||||
|
} = require('@opentelemetry/core');
|
||||||
|
|
||||||
|
const SAFE_ATTRIBUTES = ['http.method', 'http.url', 'http.status_code', 'http.target'];
|
||||||
|
|
||||||
|
// Convert a `Span` object to a plain old JavaScript object with only the info
|
||||||
|
// we care about and that is safe to share. We want something we can JSON
|
||||||
|
// serialize.
|
||||||
|
function serializeSpan(span) {
|
||||||
|
const spanContext = span.spanContext();
|
||||||
|
|
||||||
|
const safeAttributes = {};
|
||||||
|
SAFE_ATTRIBUTES.forEach((safeAttribute) => {
|
||||||
|
safeAttributes[safeAttribute] = span.attributes[safeAttribute];
|
||||||
|
});
|
||||||
|
|
||||||
|
const startTimeInMs = hrTimeToMilliseconds(span.startTime);
|
||||||
|
const endTimeInMs = hrTimeToMilliseconds(span.endTime);
|
||||||
|
|
||||||
|
let durationInMs = null;
|
||||||
|
if (startTimeInMs && endTimeInMs) {
|
||||||
|
durationInMs = endTimeInMs - startTimeInMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: span.name,
|
||||||
|
spanContext: {
|
||||||
|
traceId: spanContext.traceId,
|
||||||
|
spanId: spanContext.spanId,
|
||||||
|
},
|
||||||
|
//parentSpanId: span.parentSpanId,
|
||||||
|
startTimeInMs,
|
||||||
|
endTimeInMs,
|
||||||
|
durationInMs,
|
||||||
|
attributes: safeAttributes,
|
||||||
|
//span.links
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = serializeSpan;
|
|
@ -0,0 +1,72 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const opentelemetryApi = require('@opentelemetry/api');
|
||||||
|
|
||||||
|
const asyncHandler = require('../lib/express-async-handler');
|
||||||
|
const { captureSpanProcessor } = require('./tracing');
|
||||||
|
const serializeSpan = require('./serialize-span');
|
||||||
|
|
||||||
|
// From the current active context, grab the `traceId`. The `traceId` will be
|
||||||
|
// shared for the whole request because all spans live under the root span.
|
||||||
|
function getActiveTraceId() {
|
||||||
|
//const rootCtx = opentelemetryApi.ROOT_CONTEXT;
|
||||||
|
const activeCtx = opentelemetryApi.context.active();
|
||||||
|
if (activeCtx) {
|
||||||
|
const span = opentelemetryApi.trace.getSpan(activeCtx);
|
||||||
|
if (span) {
|
||||||
|
const traceId = span.spanContext().traceId;
|
||||||
|
|
||||||
|
return traceId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handles keeping track of tracing spans for each request.
|
||||||
|
// Also adds the traceId to the to the `X-Trace-Id` response header.
|
||||||
|
async function handleTracingMiddleware(req, res, next) {
|
||||||
|
const traceId = getActiveTraceId();
|
||||||
|
if (traceId) {
|
||||||
|
// Add the OpenTelemetry trace ID to the `X-Trace-Id` response header so
|
||||||
|
// we can cross-reference. We can use this to lookup the request in
|
||||||
|
// Jaeger.
|
||||||
|
res.set('X-Trace-Id', traceId);
|
||||||
|
|
||||||
|
// Start keeping track of all of spans that happen during the request
|
||||||
|
captureSpanProcessor.trackSpansInTrace(traceId);
|
||||||
|
|
||||||
|
// Cleanup after the request is done
|
||||||
|
res.on('finish', function () {
|
||||||
|
captureSpanProcessor.dropSpansInTrace(traceId);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all of spans we're willing to show to the user.
|
||||||
|
//
|
||||||
|
// We only care about showing the external API HTTP requests to the user so they
|
||||||
|
// can tell what part of the Matrix API is being so slow.
|
||||||
|
function getSerializableSpans() {
|
||||||
|
const traceId = getActiveTraceId();
|
||||||
|
if (traceId) {
|
||||||
|
const spans = captureSpanProcessor.getSpansInTrace(traceId) ?? [];
|
||||||
|
|
||||||
|
// We only care about showing the external API HTTP requests to the user
|
||||||
|
const filteredSpans = spans.filter((span) => {
|
||||||
|
return span.instrumentationLibrary.name === '@opentelemetry/instrumentation-http';
|
||||||
|
});
|
||||||
|
|
||||||
|
const serializableSpans = filteredSpans.map((span) => serializeSpan(span));
|
||||||
|
|
||||||
|
return serializableSpans;
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
handleTracingMiddleware: asyncHandler(handleTracingMiddleware),
|
||||||
|
getSerializableSpans,
|
||||||
|
getActiveTraceId,
|
||||||
|
};
|
|
@ -0,0 +1,107 @@
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const { registerInstrumentations } = require('@opentelemetry/instrumentation');
|
||||||
|
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
|
||||||
|
const { diag, DiagConsoleLogger, DiagLogLevel } = require('@opentelemetry/api');
|
||||||
|
const { JaegerExporter } = require('@opentelemetry/exporter-jaeger');
|
||||||
|
const {
|
||||||
|
BasicTracerProvider,
|
||||||
|
// ConsoleSpanExporter,
|
||||||
|
// SimpleSpanProcessor,
|
||||||
|
BatchSpanProcessor,
|
||||||
|
} = require('@opentelemetry/sdk-trace-base');
|
||||||
|
const { AsyncLocalStorageContextManager } = require('@opentelemetry/context-async-hooks');
|
||||||
|
const { OTTracePropagator } = require('@opentelemetry/propagator-ot-trace');
|
||||||
|
const { Resource } = require('@opentelemetry/resources');
|
||||||
|
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');
|
||||||
|
|
||||||
|
const CaptureSpanProcessor = require('./capture-span-processor');
|
||||||
|
|
||||||
|
const config = require('../lib/config');
|
||||||
|
const jaegerTracesEndpoint = config.get('jaegerTracesEndpoint');
|
||||||
|
|
||||||
|
const packageInfo = require('../../package.json');
|
||||||
|
assert(packageInfo.name);
|
||||||
|
|
||||||
|
// (Diagnostics) For troubleshooting, set the log level to DiagLogLevel.DEBUG.
|
||||||
|
//
|
||||||
|
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.INFO);
|
||||||
|
|
||||||
|
const provider = new BasicTracerProvider({
|
||||||
|
resource: new Resource({
|
||||||
|
[SemanticResourceAttributes.SERVICE_NAME]: packageInfo.name,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
// Export spans to console (useful for debugging).
|
||||||
|
// provider.addSpanProcessor(new SimpleSpanProcessor(new ConsoleSpanExporter()));
|
||||||
|
|
||||||
|
// Export spans to Jaeger
|
||||||
|
let isExportingToJaeger = false;
|
||||||
|
if (jaegerTracesEndpoint) {
|
||||||
|
const exporter = new JaegerExporter({
|
||||||
|
tags: [],
|
||||||
|
endpoint: jaegerTracesEndpoint,
|
||||||
|
});
|
||||||
|
// Use this for immediate span submission
|
||||||
|
//provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
|
||||||
|
// Otherwise, we should just use the more performant batched processor
|
||||||
|
provider.addSpanProcessor(new BatchSpanProcessor(exporter));
|
||||||
|
|
||||||
|
isExportingToJaeger = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const captureSpanProcessor = new CaptureSpanProcessor();
|
||||||
|
provider.addSpanProcessor(captureSpanProcessor);
|
||||||
|
|
||||||
|
function startTracing() {
|
||||||
|
if (!isExportingToJaeger) {
|
||||||
|
console.warn(
|
||||||
|
`⚠ Tracing was started but \`jaegerTracesEndpoint\` was not configured so we are not exporting anything.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
provider.register({
|
||||||
|
contextManager: new AsyncLocalStorageContextManager(),
|
||||||
|
propagator: new OTTracePropagator(),
|
||||||
|
});
|
||||||
|
|
||||||
|
registerInstrumentations({
|
||||||
|
instrumentations: [
|
||||||
|
getNodeAutoInstrumentations({
|
||||||
|
'@opentelemetry/instrumentation-http': {
|
||||||
|
// Docs:
|
||||||
|
// https://github.com/open-telemetry/opentelemetry-js/tree/51afd54bd63e46d5d530266761144c7be2f6b3a7/experimental/packages/opentelemetry-instrumentation-http#http-instrumentation-options
|
||||||
|
//
|
||||||
|
// This is the place to ignore root level spans for Express routes. My
|
||||||
|
// first guess would be in `ignoreLayers` in
|
||||||
|
// `@opentelemetry/instrumentation-express` but that's not the case, see
|
||||||
|
// https://github.com/open-telemetry/opentelemetry-js-contrib/issues/1034#issuecomment-1158435392
|
||||||
|
ignoreIncomingRequestHook: (req) => {
|
||||||
|
// Ignore spans from static assets.
|
||||||
|
//
|
||||||
|
// FIXME: Ideally, all of the assets would all be served under
|
||||||
|
// `/static/` so we could ignore that way instead. In Hydrogen, this
|
||||||
|
// is tracked by https://github.com/vector-im/hydrogen-web/issues/757
|
||||||
|
const isStaticAsset = !!req.url.match(/\.(css|js|svg|woff2)(\?.*?)?$/);
|
||||||
|
return isStaticAsset;
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
process.on('SIGTERM', () => {
|
||||||
|
provider
|
||||||
|
.shutdown()
|
||||||
|
.then(() => console.log('Tracing terminated'))
|
||||||
|
.catch((error) => console.log('Error terminating tracing', error))
|
||||||
|
.finally(() => process.exit(0));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
startTracing,
|
||||||
|
provider,
|
||||||
|
captureSpanProcessor,
|
||||||
|
};
|
Loading…
Reference in New Issue