Make opentracing trace into event persistence (#10134)
* Trace event persistence When we persist a batch of events, set the parent opentracing span to the that from the request, so that we can trace all the way in. * changelog * When we force tracing, set a baggage item ... so that we can check again later. * Link in both directions between persist_events spans
This commit is contained in:
parent
d09e24a52d
commit
9e405034e5
|
@ -0,0 +1 @@
|
||||||
|
Improve OpenTracing for event persistence.
|
|
@ -207,7 +207,7 @@ class Auth:
|
||||||
|
|
||||||
request.requester = user_id
|
request.requester = user_id
|
||||||
if user_id in self._force_tracing_for_users:
|
if user_id in self._force_tracing_for_users:
|
||||||
opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
|
opentracing.force_tracing()
|
||||||
opentracing.set_tag("authenticated_entity", user_id)
|
opentracing.set_tag("authenticated_entity", user_id)
|
||||||
opentracing.set_tag("user_id", user_id)
|
opentracing.set_tag("user_id", user_id)
|
||||||
opentracing.set_tag("appservice_id", app_service.id)
|
opentracing.set_tag("appservice_id", app_service.id)
|
||||||
|
@ -260,7 +260,7 @@ class Auth:
|
||||||
|
|
||||||
request.requester = requester
|
request.requester = requester
|
||||||
if user_info.token_owner in self._force_tracing_for_users:
|
if user_info.token_owner in self._force_tracing_for_users:
|
||||||
opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
|
opentracing.force_tracing()
|
||||||
opentracing.set_tag("authenticated_entity", user_info.token_owner)
|
opentracing.set_tag("authenticated_entity", user_info.token_owner)
|
||||||
opentracing.set_tag("user_id", user_info.user_id)
|
opentracing.set_tag("user_id", user_info.user_id)
|
||||||
if device_id:
|
if device_id:
|
||||||
|
|
|
@ -168,7 +168,7 @@ import inspect
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Pattern, Type
|
from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Pattern, Type
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
|
||||||
|
@ -278,6 +278,10 @@ class SynapseTags:
|
||||||
DB_TXN_ID = "db.txn_id"
|
DB_TXN_ID = "db.txn_id"
|
||||||
|
|
||||||
|
|
||||||
|
class SynapseBaggage:
|
||||||
|
FORCE_TRACING = "synapse-force-tracing"
|
||||||
|
|
||||||
|
|
||||||
# Block everything by default
|
# Block everything by default
|
||||||
# A regex which matches the server_names to expose traces for.
|
# A regex which matches the server_names to expose traces for.
|
||||||
# None means 'block everything'.
|
# None means 'block everything'.
|
||||||
|
@ -285,6 +289,8 @@ _homeserver_whitelist = None # type: Optional[Pattern[str]]
|
||||||
|
|
||||||
# Util methods
|
# Util methods
|
||||||
|
|
||||||
|
Sentinel = object()
|
||||||
|
|
||||||
|
|
||||||
def only_if_tracing(func):
|
def only_if_tracing(func):
|
||||||
"""Executes the function only if we're tracing. Otherwise returns None."""
|
"""Executes the function only if we're tracing. Otherwise returns None."""
|
||||||
|
@ -447,12 +453,28 @@ def start_active_span(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def start_active_span_follows_from(operation_name, contexts):
|
def start_active_span_follows_from(
|
||||||
|
operation_name: str, contexts: Collection, inherit_force_tracing=False
|
||||||
|
):
|
||||||
|
"""Starts an active opentracing span, with additional references to previous spans
|
||||||
|
|
||||||
|
Args:
|
||||||
|
operation_name: name of the operation represented by the new span
|
||||||
|
contexts: the previous spans to inherit from
|
||||||
|
inherit_force_tracing: if set, and any of the previous contexts have had tracing
|
||||||
|
forced, the new span will also have tracing forced.
|
||||||
|
"""
|
||||||
if opentracing is None:
|
if opentracing is None:
|
||||||
return noop_context_manager()
|
return noop_context_manager()
|
||||||
|
|
||||||
references = [opentracing.follows_from(context) for context in contexts]
|
references = [opentracing.follows_from(context) for context in contexts]
|
||||||
scope = start_active_span(operation_name, references=references)
|
scope = start_active_span(operation_name, references=references)
|
||||||
|
|
||||||
|
if inherit_force_tracing and any(
|
||||||
|
is_context_forced_tracing(ctx) for ctx in contexts
|
||||||
|
):
|
||||||
|
force_tracing(scope.span)
|
||||||
|
|
||||||
return scope
|
return scope
|
||||||
|
|
||||||
|
|
||||||
|
@ -551,6 +573,10 @@ def start_active_span_from_edu(
|
||||||
|
|
||||||
|
|
||||||
# Opentracing setters for tags, logs, etc
|
# Opentracing setters for tags, logs, etc
|
||||||
|
@only_if_tracing
|
||||||
|
def active_span():
|
||||||
|
"""Get the currently active span, if any"""
|
||||||
|
return opentracing.tracer.active_span
|
||||||
|
|
||||||
|
|
||||||
@ensure_active_span("set a tag")
|
@ensure_active_span("set a tag")
|
||||||
|
@ -571,6 +597,33 @@ def set_operation_name(operation_name):
|
||||||
opentracing.tracer.active_span.set_operation_name(operation_name)
|
opentracing.tracer.active_span.set_operation_name(operation_name)
|
||||||
|
|
||||||
|
|
||||||
|
@only_if_tracing
|
||||||
|
def force_tracing(span=Sentinel) -> None:
|
||||||
|
"""Force sampling for the active/given span and its children.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
span: span to force tracing for. By default, the active span.
|
||||||
|
"""
|
||||||
|
if span is Sentinel:
|
||||||
|
span = opentracing.tracer.active_span
|
||||||
|
if span is None:
|
||||||
|
logger.error("No active span in force_tracing")
|
||||||
|
return
|
||||||
|
|
||||||
|
span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
|
||||||
|
|
||||||
|
# also set a bit of baggage, so that we have a way of figuring out if
|
||||||
|
# it is enabled later
|
||||||
|
span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1")
|
||||||
|
|
||||||
|
|
||||||
|
def is_context_forced_tracing(span_context) -> bool:
|
||||||
|
"""Check if sampling has been force for the given span context."""
|
||||||
|
if span_context is None:
|
||||||
|
return False
|
||||||
|
return span_context.baggage.get(SynapseBaggage.FORCE_TRACING) is not None
|
||||||
|
|
||||||
|
|
||||||
# Injection and extraction
|
# Injection and extraction
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ import itertools
|
||||||
import logging
|
import logging
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from typing import (
|
from typing import (
|
||||||
|
Any,
|
||||||
Awaitable,
|
Awaitable,
|
||||||
Callable,
|
Callable,
|
||||||
Collection,
|
Collection,
|
||||||
|
@ -40,6 +41,7 @@ from twisted.internet import defer
|
||||||
from synapse.api.constants import EventTypes, Membership
|
from synapse.api.constants import EventTypes, Membership
|
||||||
from synapse.events import EventBase
|
from synapse.events import EventBase
|
||||||
from synapse.events.snapshot import EventContext
|
from synapse.events.snapshot import EventContext
|
||||||
|
from synapse.logging import opentracing
|
||||||
from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
|
from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
|
||||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||||
from synapse.storage.databases import Databases
|
from synapse.storage.databases import Databases
|
||||||
|
@ -103,12 +105,18 @@ times_pruned_extremities = Counter(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@attr.s(auto_attribs=True, frozen=True, slots=True)
|
@attr.s(auto_attribs=True, slots=True)
|
||||||
class _EventPersistQueueItem:
|
class _EventPersistQueueItem:
|
||||||
events_and_contexts: List[Tuple[EventBase, EventContext]]
|
events_and_contexts: List[Tuple[EventBase, EventContext]]
|
||||||
backfilled: bool
|
backfilled: bool
|
||||||
deferred: ObservableDeferred
|
deferred: ObservableDeferred
|
||||||
|
|
||||||
|
parent_opentracing_span_contexts: List = []
|
||||||
|
"""A list of opentracing spans waiting for this batch"""
|
||||||
|
|
||||||
|
opentracing_span_context: Any = None
|
||||||
|
"""The opentracing span under which the persistence actually happened"""
|
||||||
|
|
||||||
|
|
||||||
_PersistResult = TypeVar("_PersistResult")
|
_PersistResult = TypeVar("_PersistResult")
|
||||||
|
|
||||||
|
@ -171,9 +179,27 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
|
||||||
)
|
)
|
||||||
queue.append(end_item)
|
queue.append(end_item)
|
||||||
|
|
||||||
|
# add our events to the queue item
|
||||||
end_item.events_and_contexts.extend(events_and_contexts)
|
end_item.events_and_contexts.extend(events_and_contexts)
|
||||||
|
|
||||||
|
# also add our active opentracing span to the item so that we get a link back
|
||||||
|
span = opentracing.active_span()
|
||||||
|
if span:
|
||||||
|
end_item.parent_opentracing_span_contexts.append(span.context)
|
||||||
|
|
||||||
|
# start a processor for the queue, if there isn't one already
|
||||||
self._handle_queue(room_id)
|
self._handle_queue(room_id)
|
||||||
return await make_deferred_yieldable(end_item.deferred.observe())
|
|
||||||
|
# wait for the queue item to complete
|
||||||
|
res = await make_deferred_yieldable(end_item.deferred.observe())
|
||||||
|
|
||||||
|
# add another opentracing span which links to the persist trace.
|
||||||
|
with opentracing.start_active_span_follows_from(
|
||||||
|
"persist_event_batch_complete", (end_item.opentracing_span_context,)
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
def _handle_queue(self, room_id):
|
def _handle_queue(self, room_id):
|
||||||
"""Attempts to handle the queue for a room if not already being handled.
|
"""Attempts to handle the queue for a room if not already being handled.
|
||||||
|
@ -200,9 +226,17 @@ class _EventPeristenceQueue(Generic[_PersistResult]):
|
||||||
queue = self._get_drainining_queue(room_id)
|
queue = self._get_drainining_queue(room_id)
|
||||||
for item in queue:
|
for item in queue:
|
||||||
try:
|
try:
|
||||||
ret = await self._per_item_callback(
|
with opentracing.start_active_span_follows_from(
|
||||||
item.events_and_contexts, item.backfilled
|
"persist_event_batch",
|
||||||
)
|
item.parent_opentracing_span_contexts,
|
||||||
|
inherit_force_tracing=True,
|
||||||
|
) as scope:
|
||||||
|
if scope:
|
||||||
|
item.opentracing_span_context = scope.span.context
|
||||||
|
|
||||||
|
ret = await self._per_item_callback(
|
||||||
|
item.events_and_contexts, item.backfilled
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
with PreserveLoggingContext():
|
with PreserveLoggingContext():
|
||||||
item.deferred.errback()
|
item.deferred.errback()
|
||||||
|
@ -252,6 +286,7 @@ class EventsPersistenceStorage:
|
||||||
self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch)
|
self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch)
|
||||||
self._state_resolution_handler = hs.get_state_resolution_handler()
|
self._state_resolution_handler = hs.get_state_resolution_handler()
|
||||||
|
|
||||||
|
@opentracing.trace
|
||||||
async def persist_events(
|
async def persist_events(
|
||||||
self,
|
self,
|
||||||
events_and_contexts: Iterable[Tuple[EventBase, EventContext]],
|
events_and_contexts: Iterable[Tuple[EventBase, EventContext]],
|
||||||
|
@ -307,6 +342,7 @@ class EventsPersistenceStorage:
|
||||||
self.main_store.get_room_max_token(),
|
self.main_store.get_room_max_token(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@opentracing.trace
|
||||||
async def persist_event(
|
async def persist_event(
|
||||||
self, event: EventBase, context: EventContext, backfilled: bool = False
|
self, event: EventBase, context: EventContext, backfilled: bool = False
|
||||||
) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]:
|
) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]:
|
||||||
|
|
Loading…
Reference in New Issue