From c3c29aa19625324ac9a7a8ebcdd58a9c0b457f74 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 10 Jul 2018 16:12:36 +0100 Subject: [PATCH] Attempt to include db threads in cpu usage stats (#3496) Let's try to include time spent in the DB threads in the per-request/block cpu usage metrics. --- changelog.d/3496.feature | 1 + synapse/storage/_base.py | 32 +++++++++++++++----------------- synapse/storage/events_worker.py | 3 ++- synapse/util/logcontext.py | 23 +++++++++++++++++++++-- 4 files changed, 39 insertions(+), 20 deletions(-) create mode 100644 changelog.d/3496.feature diff --git a/changelog.d/3496.feature b/changelog.d/3496.feature new file mode 100644 index 0000000000..6a06a7e755 --- /dev/null +++ b/changelog.d/3496.feature @@ -0,0 +1 @@ +Include CPU time from database threads in request/block metrics. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1fd5d8f162..98dde77431 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -220,7 +220,7 @@ class SQLBaseStore(object): self._clock.looping_call(loop, 10000) def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, - logging_context, func, *args, **kwargs): + func, *args, **kwargs): start = time.time() txn_id = self._TXN_ID @@ -284,8 +284,7 @@ class SQLBaseStore(object): end = time.time() duration = end - start - if logging_context is not None: - logging_context.add_database_transaction(duration) + LoggingContext.current_context().add_database_transaction(duration) transaction_logger.debug("[TXN END] {%s} %f sec", name, duration) @@ -309,19 +308,15 @@ class SQLBaseStore(object): Returns: Deferred: The result of func """ - current_context = LoggingContext.current_context() - after_callbacks = [] exception_callbacks = [] - def inner_func(conn, *args, **kwargs): - return self._new_transaction( - conn, desc, after_callbacks, exception_callbacks, current_context, - func, *args, **kwargs - ) - try: - result = yield self.runWithConnection(inner_func, *args, **kwargs) + result = yield self.runWithConnection( + self._new_transaction, + desc, after_callbacks, exception_callbacks, func, + *args, **kwargs + ) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) @@ -346,22 +341,25 @@ class SQLBaseStore(object): Returns: Deferred: The result of func """ - current_context = LoggingContext.current_context() + parent_context = LoggingContext.current_context() + if parent_context == LoggingContext.sentinel: + logger.warn( + "Running db txn from sentinel context: metrics will be lost", + ) + parent_context = None start_time = time.time() def inner_func(conn, *args, **kwargs): - with LoggingContext("runWithConnection") as context: + with LoggingContext("runWithConnection", parent_context) as context: sched_duration_sec = time.time() - start_time sql_scheduling_timer.observe(sched_duration_sec) - current_context.add_database_scheduled(sched_duration_sec) + context.add_database_scheduled(sched_duration_sec) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") conn.reconnect() - current_context.copy_to(context) - return func(conn, *args, **kwargs) with PreserveLoggingContext(): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index fa2659403d..67433606c6 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -261,7 +261,8 @@ class EventsWorkerStore(SQLBaseStore): ] rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + conn, "do_fetch", [], [], + self._fetch_event_rows, event_ids, ) row_dict = { diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 44a0a56818..f6c7175f74 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -137,12 +137,18 @@ class LoggingContext(object): """Additional context for log formatting. Contexts are scoped within a "with" block. + If a parent is given when creating a new context, then: + - logging fields are copied from the parent to the new context on entry + - when the new context exits, the cpu usage stats are copied from the + child to the parent + Args: name (str): Name for the context for debugging. + parent_context (LoggingContext|None): The parent of the new context """ __slots__ = [ - "previous_context", "name", + "previous_context", "name", "parent_context", "_resource_usage", "usage_start", "main_thread", "alive", @@ -183,7 +189,7 @@ class LoggingContext(object): sentinel = Sentinel() - def __init__(self, name=None): + def __init__(self, name=None, parent_context=None): self.previous_context = LoggingContext.current_context() self.name = name @@ -199,6 +205,8 @@ class LoggingContext(object): self.tag = "" self.alive = True + self.parent_context = parent_context + def __str__(self): return "%s@%x" % (self.name, id(self)) @@ -236,6 +244,10 @@ class LoggingContext(object): self.previous_context, old_context ) self.alive = True + + if self.parent_context is not None: + self.parent_context.copy_to(self) + return self def __exit__(self, type, value, traceback): @@ -257,6 +269,13 @@ class LoggingContext(object): self.previous_context = None self.alive = False + # if we have a parent, pass our CPU usage stats on + if self.parent_context is not None: + self.parent_context._resource_usage += self._resource_usage + + # reset them in case we get entered again + self._resource_usage.reset() + def copy_to(self, record): """Copy logging fields from this context to a log record or another LoggingContext