From 8a4b3738f3dcf49f70c08204ad457559dcef4112 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 4 Mar 2021 16:40:18 +0000 Subject: [PATCH] Replace `last_*_pdu_age` metrics with timestamps (#9540) Following the advice at https://prometheus.io/docs/practices/instrumentation/#timestamps-not-time-since, it's preferable to export unix timestamps, not ages. There doesn't seem to be any particular naming convention for timestamp metrics. --- changelog.d/9540.feature | 1 + changelog.d/9540.removal | 1 + synapse/federation/federation_server.py | 10 ++++------ synapse/federation/sender/transaction_manager.py | 11 +++++------ 4 files changed, 11 insertions(+), 12 deletions(-) create mode 100644 changelog.d/9540.feature create mode 100644 changelog.d/9540.removal diff --git a/changelog.d/9540.feature b/changelog.d/9540.feature new file mode 100644 index 0000000000..5417e51b93 --- /dev/null +++ b/changelog.d/9540.feature @@ -0,0 +1 @@ +Add `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time` prometheus metrics, which monitor federation delays by reporting the timestamps of messages sent and received to a set of remote servers. diff --git a/changelog.d/9540.removal b/changelog.d/9540.removal new file mode 100644 index 0000000000..d54f553cb9 --- /dev/null +++ b/changelog.d/9540.removal @@ -0,0 +1 @@ +The `synapse_federation_last_sent_pdu_age` and `synapse_federation_last_received_pdu_age` prometheus metrics have been removed. They are replaced by `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time`. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 2f832b47f6..362895bf42 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -90,10 +90,9 @@ pdu_process_time = Histogram( "Time taken to process an event", ) - -last_pdu_age_metric = Gauge( - "synapse_federation_last_received_pdu_age", - "The age (in seconds) of the last PDU successfully received from the given domain", +last_pdu_ts_metric = Gauge( + "synapse_federation_last_received_pdu_time", + "The timestamp of the last PDU which was successfully received from the given domain", labelnames=("server_name",), ) @@ -369,8 +368,7 @@ class FederationServer(FederationBase): ) if newest_pdu_ts and origin in self._federation_metrics_domains: - newest_pdu_age = self._clock.time_msec() - newest_pdu_ts - last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000) + last_pdu_ts_metric.labels(server_name=origin).set(newest_pdu_ts / 1000) return pdu_results diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 763aff296c..2a9cd063c4 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -36,9 +36,9 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -last_pdu_age_metric = Gauge( - "synapse_federation_last_sent_pdu_age", - "The age (in seconds) of the last PDU successfully sent to the given domain", +last_pdu_ts_metric = Gauge( + "synapse_federation_last_sent_pdu_time", + "The timestamp of the last PDU which was successfully sent to the given domain", labelnames=("server_name",), ) @@ -187,9 +187,8 @@ class TransactionManager: if success and pdus and destination in self._federation_metrics_domains: last_pdu = pdus[-1] - last_pdu_age = self.clock.time_msec() - last_pdu.origin_server_ts - last_pdu_age_metric.labels(server_name=destination).set( - last_pdu_age / 1000 + last_pdu_ts_metric.labels(server_name=destination).set( + last_pdu.origin_server_ts / 1000 ) set_tag(tags.ERROR, not success)