Fix opentracing and Prometheus metrics for replication requests (#10996)
This commit fixes two bugs to do with decorators not instrumenting `ReplicationEndpoint`'s `send_request` correctly. There are two decorators on `send_request`: Prometheus' `Gauge.track_inprogress()` and Synapse's `opentracing.trace`. `Gauge.track_inprogress()` does not have any support for async functions when used as a decorator. Since async functions behave like regular functions that return coroutines, only the creation of the coroutine was covered by the metric and none of the actual body of `send_request`. `Gauge.track_inprogress()` returns a regular, non-async function wrapping `send_request`, which is the source of the next bug. The `opentracing.trace` decorator would normally handle async functions correctly, but since the wrapped `send_request` is a non-async function, the decorator ends up suffering from the same issue as `Gauge.track_inprogress()`: the opentracing span only measures the creation of the coroutine and none of the actual function body. Using `Gauge.track_inprogress()` as a context manager instead of a decorator resolves both bugs.
This commit is contained in:
parent
406f7bfa17
commit
6b18eb4430
|
@ -0,0 +1 @@
|
|||
Fix a bug introduced in Synapse 1.21.0 that causes opentracing and Prometheus metrics for replication requests to be measured incorrectly.
|
|
@ -807,6 +807,14 @@ def trace(func=None, opname=None):
|
|||
result.addCallbacks(call_back, err_back)
|
||||
|
||||
else:
|
||||
if inspect.isawaitable(result):
|
||||
logger.error(
|
||||
"@trace may not have wrapped %s correctly! "
|
||||
"The function is not async but returned a %s.",
|
||||
func.__qualname__,
|
||||
type(result).__name__,
|
||||
)
|
||||
|
||||
scope.__exit__(None, None, None)
|
||||
|
||||
return result
|
||||
|
|
|
@ -182,8 +182,8 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
|
|||
)
|
||||
|
||||
@trace(opname="outgoing_replication_request")
|
||||
@outgoing_gauge.track_inprogress()
|
||||
async def send_request(*, instance_name="master", **kwargs):
|
||||
with outgoing_gauge.track_inprogress():
|
||||
if instance_name == local_instance_name:
|
||||
raise Exception("Trying to send HTTP request to self")
|
||||
if instance_name == "master":
|
||||
|
@ -229,13 +229,15 @@ class ReplicationEndpoint(metaclass=abc.ABCMeta):
|
|||
|
||||
try:
|
||||
# We keep retrying the same request for timeouts. This is so that we
|
||||
# have a good idea that the request has either succeeded or failed on
|
||||
# the master, and so whether we should clean up or not.
|
||||
# have a good idea that the request has either succeeded or failed
|
||||
# on the master, and so whether we should clean up or not.
|
||||
while True:
|
||||
headers: Dict[bytes, List[bytes]] = {}
|
||||
# Add an authorization header, if configured.
|
||||
if replication_secret:
|
||||
headers[b"Authorization"] = [b"Bearer " + replication_secret]
|
||||
headers[b"Authorization"] = [
|
||||
b"Bearer " + replication_secret
|
||||
]
|
||||
opentracing.inject_header_dict(headers, check_destination=False)
|
||||
try:
|
||||
result = await request_func(uri, data, headers=headers)
|
||||
|
|
Loading…
Reference in New Issue