Replace `last_*_pdu_age` metrics with timestamps (#9540)
Following the advice at https://prometheus.io/docs/practices/instrumentation/#timestamps-not-time-since, it's preferable to export unix timestamps, not ages. There doesn't seem to be any particular naming convention for timestamp metrics.
This commit is contained in:
parent
df425c2c63
commit
8a4b3738f3
|
@ -0,0 +1 @@
|
||||||
|
Add `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time` prometheus metrics, which monitor federation delays by reporting the timestamps of messages sent and received to a set of remote servers.
|
|
@ -0,0 +1 @@
|
||||||
|
The `synapse_federation_last_sent_pdu_age` and `synapse_federation_last_received_pdu_age` prometheus metrics have been removed. They are replaced by `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time`.
|
|
@ -90,10 +90,9 @@ pdu_process_time = Histogram(
|
||||||
"Time taken to process an event",
|
"Time taken to process an event",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
last_pdu_ts_metric = Gauge(
|
||||||
last_pdu_age_metric = Gauge(
|
"synapse_federation_last_received_pdu_time",
|
||||||
"synapse_federation_last_received_pdu_age",
|
"The timestamp of the last PDU which was successfully received from the given domain",
|
||||||
"The age (in seconds) of the last PDU successfully received from the given domain",
|
|
||||||
labelnames=("server_name",),
|
labelnames=("server_name",),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -369,8 +368,7 @@ class FederationServer(FederationBase):
|
||||||
)
|
)
|
||||||
|
|
||||||
if newest_pdu_ts and origin in self._federation_metrics_domains:
|
if newest_pdu_ts and origin in self._federation_metrics_domains:
|
||||||
newest_pdu_age = self._clock.time_msec() - newest_pdu_ts
|
last_pdu_ts_metric.labels(server_name=origin).set(newest_pdu_ts / 1000)
|
||||||
last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000)
|
|
||||||
|
|
||||||
return pdu_results
|
return pdu_results
|
||||||
|
|
||||||
|
|
|
@ -36,9 +36,9 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
last_pdu_age_metric = Gauge(
|
last_pdu_ts_metric = Gauge(
|
||||||
"synapse_federation_last_sent_pdu_age",
|
"synapse_federation_last_sent_pdu_time",
|
||||||
"The age (in seconds) of the last PDU successfully sent to the given domain",
|
"The timestamp of the last PDU which was successfully sent to the given domain",
|
||||||
labelnames=("server_name",),
|
labelnames=("server_name",),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -187,9 +187,8 @@ class TransactionManager:
|
||||||
|
|
||||||
if success and pdus and destination in self._federation_metrics_domains:
|
if success and pdus and destination in self._federation_metrics_domains:
|
||||||
last_pdu = pdus[-1]
|
last_pdu = pdus[-1]
|
||||||
last_pdu_age = self.clock.time_msec() - last_pdu.origin_server_ts
|
last_pdu_ts_metric.labels(server_name=destination).set(
|
||||||
last_pdu_age_metric.labels(server_name=destination).set(
|
last_pdu.origin_server_ts / 1000
|
||||||
last_pdu_age / 1000
|
|
||||||
)
|
)
|
||||||
|
|
||||||
set_tag(tags.ERROR, not success)
|
set_tag(tags.ERROR, not success)
|
||||||
|
|
Loading…
Reference in New Issue