From 5d281c10dd3d4d1f96635e92d803a74e3880d6b7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 21 Apr 2021 10:03:31 +0100 Subject: [PATCH] Stop BackgroundProcessLoggingContext making new prometheus timeseries (#9854) This undoes part of b076bc276e881b262048307b6a226061d96c4a8d. --- changelog.d/9854.bugfix | 1 + synapse/metrics/background_process_metrics.py | 20 +++++++++++++++---- synapse/replication/tcp/protocol.py | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 changelog.d/9854.bugfix diff --git a/changelog.d/9854.bugfix b/changelog.d/9854.bugfix new file mode 100644 index 0000000000..e39a3f9915 --- /dev/null +++ b/changelog.d/9854.bugfix @@ -0,0 +1 @@ +Fix a regression in Synapse 1.32.0 which caused Synapse to report large numbers of Prometheus time series, potentially overwhelming Prometheus instances. diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 78e9cfbc26..3f621539f3 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -16,7 +16,7 @@ import logging import threading from functools import wraps -from typing import TYPE_CHECKING, Dict, Optional, Set +from typing import TYPE_CHECKING, Dict, Optional, Set, Union from prometheus_client.core import REGISTRY, Counter, Gauge @@ -199,7 +199,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar _background_process_start_count.labels(desc).inc() _background_process_in_flight_count.labels(desc).inc() - with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context: + with BackgroundProcessLoggingContext(desc, count) as context: try: ctx = noop_context_manager() if bg_start_span: @@ -244,8 +244,20 @@ class BackgroundProcessLoggingContext(LoggingContext): __slots__ = ["_proc"] - def __init__(self, name: str): - super().__init__(name) + def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None): + """ + + Args: + name: The name of the background process. Each distinct `name` gets a + separate prometheus time series. + + instance_id: an identifer to add to `name` to distinguish this instance of + the named background process in the logs. If this is `None`, one is + made up based on id(self). + """ + if instance_id is None: + instance_id = id(self) + super().__init__("%s-%s" % (name, instance_id)) self._proc = _BackgroundProcess(name, self) def start(self, rusage: "Optional[resource._RUsage]"): diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index ba753318bd..d10d574246 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -185,7 +185,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # a logcontext which we use for processing incoming commands. We declare it as a # background process so that the CPU stats get reported to prometheus. self._logging_context = BackgroundProcessLoggingContext( - "replication-conn-%s" % (self.conn_id,) + "replication-conn", self.conn_id ) def connectionMade(self):