2016-01-06 21:26:29 -07:00
|
|
|
# Copyright 2014-2016 OpenMarket Ltd
|
2019-09-18 14:55:37 -06:00
|
|
|
# Copyright 2019 The Matrix.org Foundation C.I.C.
|
2015-01-06 06:21:39 -07:00
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2017-03-17 09:11:26 -06:00
|
|
|
""" Thread-local-alike tracking of log contexts within synapse
|
|
|
|
|
|
|
|
This module provides objects and utilities for tracking contexts through
|
|
|
|
synapse code, so that log lines can include a request identifier, and so that
|
|
|
|
CPU and database activity can be accounted for against the request that caused
|
|
|
|
them.
|
|
|
|
|
|
|
|
See doc/log_contexts.rst for details on how this works.
|
|
|
|
"""
|
|
|
|
|
2019-12-10 04:22:12 -07:00
|
|
|
import inspect
|
2014-10-29 19:21:33 -06:00
|
|
|
import logging
|
2018-07-09 00:09:20 -06:00
|
|
|
import threading
|
2019-07-02 12:01:28 -06:00
|
|
|
import types
|
2020-03-07 10:57:26 -07:00
|
|
|
from typing import TYPE_CHECKING, Optional, Tuple, TypeVar, Union
|
|
|
|
|
|
|
|
from typing_extensions import Literal
|
2018-07-09 00:09:20 -06:00
|
|
|
|
2018-10-23 06:12:32 -06:00
|
|
|
from twisted.internet import defer, threads
|
2014-10-29 19:21:33 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from synapse.logging.scopecontextmanager import _LogContextScope
|
|
|
|
|
2014-11-20 10:10:37 -07:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2015-12-03 14:03:01 -07:00
|
|
|
try:
|
|
|
|
import resource
|
2015-12-04 04:53:38 -07:00
|
|
|
|
|
|
|
# Python doesn't ship with a definition of RUSAGE_THREAD but it's defined
|
|
|
|
# to be 1 on linux so we hard code it.
|
2015-12-03 14:03:01 -07:00
|
|
|
RUSAGE_THREAD = 1
|
2015-12-04 04:53:38 -07:00
|
|
|
|
|
|
|
# If the system doesn't support RUSAGE_THREAD then this should throw an
|
|
|
|
# exception.
|
2015-12-03 14:03:01 -07:00
|
|
|
resource.getrusage(RUSAGE_THREAD)
|
2015-12-04 04:34:05 -07:00
|
|
|
|
2019-09-18 14:55:37 -06:00
|
|
|
is_thread_resource_usage_supported = True
|
|
|
|
|
2015-12-03 14:03:01 -07:00
|
|
|
def get_thread_resource_usage():
|
|
|
|
return resource.getrusage(RUSAGE_THREAD)
|
2019-06-20 03:32:02 -06:00
|
|
|
|
|
|
|
|
2017-10-23 08:52:32 -06:00
|
|
|
except Exception:
|
2015-12-04 04:53:38 -07:00
|
|
|
# If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we
|
2019-09-18 14:55:37 -06:00
|
|
|
# won't track resource usage.
|
|
|
|
is_thread_resource_usage_supported = False
|
|
|
|
|
2015-12-03 14:03:01 -07:00
|
|
|
def get_thread_resource_usage():
|
|
|
|
return None
|
|
|
|
|
2014-10-30 04:13:46 -06:00
|
|
|
|
2019-07-03 06:40:45 -06:00
|
|
|
# get an id for the current thread.
|
|
|
|
#
|
|
|
|
# threading.get_ident doesn't actually return an OS-level tid, and annoyingly,
|
|
|
|
# on Linux it actually returns the same value either side of a fork() call. However
|
|
|
|
# we only fork in one place, so it's not worth the hoop-jumping to get a real tid.
|
|
|
|
#
|
|
|
|
get_thread_id = threading.get_ident
|
|
|
|
|
|
|
|
|
2018-07-10 06:56:07 -06:00
|
|
|
class ContextResourceUsage(object):
|
|
|
|
"""Object for tracking the resources used by a log context
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
ru_utime (float): user CPU time (in seconds)
|
|
|
|
ru_stime (float): system CPU time (in seconds)
|
|
|
|
db_txn_count (int): number of database transactions done
|
|
|
|
db_sched_duration_sec (float): amount of time spent waiting for a
|
|
|
|
database connection
|
|
|
|
db_txn_duration_sec (float): amount of time spent doing database
|
|
|
|
transactions (excluding scheduling time)
|
|
|
|
evt_db_fetch_count (int): number of events requested from the database
|
|
|
|
"""
|
|
|
|
|
|
|
|
__slots__ = [
|
2019-06-20 03:32:02 -06:00
|
|
|
"ru_stime",
|
|
|
|
"ru_utime",
|
|
|
|
"db_txn_count",
|
|
|
|
"db_txn_duration_sec",
|
|
|
|
"db_sched_duration_sec",
|
2018-07-10 06:56:07 -06:00
|
|
|
"evt_db_fetch_count",
|
|
|
|
]
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __init__(self, copy_from: "Optional[ContextResourceUsage]" = None) -> None:
|
2018-07-10 06:56:07 -06:00
|
|
|
"""Create a new ContextResourceUsage
|
|
|
|
|
|
|
|
Args:
|
|
|
|
copy_from (ContextResourceUsage|None): if not None, an object to
|
|
|
|
copy stats from
|
|
|
|
"""
|
|
|
|
if copy_from is None:
|
|
|
|
self.reset()
|
|
|
|
else:
|
2020-03-07 10:57:26 -07:00
|
|
|
# FIXME: mypy can't infer the types set via reset() above, so specify explicitly for now
|
|
|
|
self.ru_utime = copy_from.ru_utime # type: float
|
|
|
|
self.ru_stime = copy_from.ru_stime # type: float
|
|
|
|
self.db_txn_count = copy_from.db_txn_count # type: int
|
2018-07-10 06:56:07 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
self.db_txn_duration_sec = copy_from.db_txn_duration_sec # type: float
|
|
|
|
self.db_sched_duration_sec = copy_from.db_sched_duration_sec # type: float
|
|
|
|
self.evt_db_fetch_count = copy_from.evt_db_fetch_count # type: int
|
2018-07-10 06:56:07 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def copy(self) -> "ContextResourceUsage":
|
2018-07-10 06:56:07 -06:00
|
|
|
return ContextResourceUsage(copy_from=self)
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def reset(self) -> None:
|
2019-06-20 03:32:02 -06:00
|
|
|
self.ru_stime = 0.0
|
|
|
|
self.ru_utime = 0.0
|
2018-07-10 06:56:07 -06:00
|
|
|
self.db_txn_count = 0
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
self.db_txn_duration_sec = 0.0
|
|
|
|
self.db_sched_duration_sec = 0.0
|
2018-07-10 06:56:07 -06:00
|
|
|
self.evt_db_fetch_count = 0
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __repr__(self) -> str:
|
2019-06-20 03:32:02 -06:00
|
|
|
return (
|
|
|
|
"<ContextResourceUsage ru_stime='%r', ru_utime='%r', "
|
|
|
|
"db_txn_count='%r', db_txn_duration_sec='%r', "
|
|
|
|
"db_sched_duration_sec='%r', evt_db_fetch_count='%r'>"
|
|
|
|
) % (
|
|
|
|
self.ru_stime,
|
|
|
|
self.ru_utime,
|
|
|
|
self.db_txn_count,
|
|
|
|
self.db_txn_duration_sec,
|
|
|
|
self.db_sched_duration_sec,
|
|
|
|
self.evt_db_fetch_count,
|
|
|
|
)
|
2018-07-19 04:58:18 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __iadd__(self, other: "ContextResourceUsage") -> "ContextResourceUsage":
|
2018-07-10 06:56:07 -06:00
|
|
|
"""Add another ContextResourceUsage's stats to this one's.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
other (ContextResourceUsage): the other resource usage object
|
|
|
|
"""
|
|
|
|
self.ru_utime += other.ru_utime
|
|
|
|
self.ru_stime += other.ru_stime
|
|
|
|
self.db_txn_count += other.db_txn_count
|
|
|
|
self.db_txn_duration_sec += other.db_txn_duration_sec
|
|
|
|
self.db_sched_duration_sec += other.db_sched_duration_sec
|
|
|
|
self.evt_db_fetch_count += other.evt_db_fetch_count
|
|
|
|
return self
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __isub__(self, other: "ContextResourceUsage") -> "ContextResourceUsage":
|
2018-07-10 06:56:07 -06:00
|
|
|
self.ru_utime -= other.ru_utime
|
|
|
|
self.ru_stime -= other.ru_stime
|
|
|
|
self.db_txn_count -= other.db_txn_count
|
|
|
|
self.db_txn_duration_sec -= other.db_txn_duration_sec
|
|
|
|
self.db_sched_duration_sec -= other.db_sched_duration_sec
|
|
|
|
self.evt_db_fetch_count -= other.evt_db_fetch_count
|
|
|
|
return self
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __add__(self, other: "ContextResourceUsage") -> "ContextResourceUsage":
|
2018-07-10 06:56:07 -06:00
|
|
|
res = ContextResourceUsage(copy_from=self)
|
|
|
|
res += other
|
|
|
|
return res
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __sub__(self, other: "ContextResourceUsage") -> "ContextResourceUsage":
|
2018-07-10 06:56:07 -06:00
|
|
|
res = ContextResourceUsage(copy_from=self)
|
|
|
|
res -= other
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
LoggingContextOrSentinel = Union["LoggingContext", "LoggingContext.Sentinel"]
|
|
|
|
|
|
|
|
|
2014-10-29 19:21:33 -06:00
|
|
|
class LoggingContext(object):
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Additional context for log formatting. Contexts are scoped within a
|
2016-02-10 04:29:21 -07:00
|
|
|
"with" block.
|
2018-01-11 15:40:51 -07:00
|
|
|
|
2018-07-10 09:12:36 -06:00
|
|
|
If a parent is given when creating a new context, then:
|
|
|
|
- logging fields are copied from the parent to the new context on entry
|
|
|
|
- when the new context exits, the cpu usage stats are copied from the
|
|
|
|
child to the parent
|
|
|
|
|
2014-10-30 04:13:46 -06:00
|
|
|
Args:
|
|
|
|
name (str): Name for the context for debugging.
|
2018-07-10 09:12:36 -06:00
|
|
|
parent_context (LoggingContext|None): The parent of the new context
|
2014-10-30 04:13:46 -06:00
|
|
|
"""
|
|
|
|
|
2015-12-03 14:03:01 -07:00
|
|
|
__slots__ = [
|
2019-06-20 03:32:02 -06:00
|
|
|
"previous_context",
|
|
|
|
"name",
|
|
|
|
"parent_context",
|
2018-07-10 06:56:07 -06:00
|
|
|
"_resource_usage",
|
2018-05-22 04:16:07 -06:00
|
|
|
"usage_start",
|
2019-06-20 03:32:02 -06:00
|
|
|
"main_thread",
|
|
|
|
"alive",
|
|
|
|
"request",
|
|
|
|
"tag",
|
2019-07-11 03:36:03 -06:00
|
|
|
"scope",
|
2015-12-03 14:03:01 -07:00
|
|
|
]
|
2014-10-29 19:21:33 -06:00
|
|
|
|
|
|
|
thread_local = threading.local()
|
|
|
|
|
|
|
|
class Sentinel(object):
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Sentinel to represent the root context"""
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
__slots__ = ["previous_context", "alive", "request", "scope"]
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
# Minimal set for compatibility with LoggingContext
|
|
|
|
self.previous_context = None
|
|
|
|
self.alive = None
|
|
|
|
self.request = None
|
|
|
|
self.scope = None
|
2014-10-30 04:13:46 -06:00
|
|
|
|
2014-11-19 09:37:43 -07:00
|
|
|
def __str__(self):
|
|
|
|
return "sentinel"
|
|
|
|
|
2014-10-29 19:21:33 -06:00
|
|
|
def copy_to(self, record):
|
|
|
|
pass
|
|
|
|
|
2019-08-28 05:18:53 -06:00
|
|
|
def copy_to_twisted_log_entry(self, record):
|
|
|
|
record["request"] = None
|
|
|
|
record["scope"] = None
|
|
|
|
|
2015-12-03 14:03:01 -07:00
|
|
|
def start(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def stop(self):
|
|
|
|
pass
|
|
|
|
|
2018-05-28 03:39:27 -06:00
|
|
|
def add_database_transaction(self, duration_sec):
|
2015-12-07 10:56:11 -07:00
|
|
|
pass
|
|
|
|
|
2018-05-28 03:39:27 -06:00
|
|
|
def add_database_scheduled(self, sched_sec):
|
2018-01-11 17:27:14 -07:00
|
|
|
pass
|
|
|
|
|
2018-06-22 03:42:28 -06:00
|
|
|
def record_event_fetch(self, event_count):
|
|
|
|
pass
|
|
|
|
|
2016-02-03 06:51:25 -07:00
|
|
|
def __nonzero__(self):
|
|
|
|
return False
|
2019-06-20 03:32:02 -06:00
|
|
|
|
2018-04-15 08:39:30 -06:00
|
|
|
__bool__ = __nonzero__ # python3
|
2016-02-03 06:51:25 -07:00
|
|
|
|
2014-10-29 19:21:33 -06:00
|
|
|
sentinel = Sentinel()
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __init__(self, name=None, parent_context=None, request=None) -> None:
|
2016-02-10 04:25:19 -07:00
|
|
|
self.previous_context = LoggingContext.current_context()
|
2014-10-29 19:21:33 -06:00
|
|
|
self.name = name
|
2018-01-11 11:17:54 -07:00
|
|
|
|
2018-07-10 06:56:07 -06:00
|
|
|
# track the resources used by this context so far
|
|
|
|
self._resource_usage = ContextResourceUsage()
|
2018-06-20 23:15:03 -06:00
|
|
|
|
2018-05-22 04:16:07 -06:00
|
|
|
# If alive has the thread resource usage when the logcontext last
|
|
|
|
# became active.
|
2015-12-03 14:03:01 -07:00
|
|
|
self.usage_start = None
|
2018-05-22 04:16:07 -06:00
|
|
|
|
2019-07-03 06:40:45 -06:00
|
|
|
self.main_thread = get_thread_id()
|
2018-01-11 15:40:51 -07:00
|
|
|
self.request = None
|
2016-02-03 06:51:25 -07:00
|
|
|
self.tag = ""
|
2016-02-04 03:22:44 -07:00
|
|
|
self.alive = True
|
2020-03-07 10:57:26 -07:00
|
|
|
self.scope = None # type: Optional[_LogContextScope]
|
2014-10-29 19:21:33 -06:00
|
|
|
|
2018-07-10 09:12:36 -06:00
|
|
|
self.parent_context = parent_context
|
|
|
|
|
2018-09-27 04:25:34 -06:00
|
|
|
if self.parent_context is not None:
|
|
|
|
self.parent_context.copy_to(self)
|
|
|
|
|
|
|
|
if request is not None:
|
|
|
|
# the request param overrides the request from the parent context
|
|
|
|
self.request = request
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __str__(self) -> str:
|
2019-05-29 12:27:50 -06:00
|
|
|
if self.request:
|
|
|
|
return str(self.request)
|
2014-10-30 04:13:46 -06:00
|
|
|
return "%s@%x" % (self.name, id(self))
|
2014-10-29 19:21:33 -06:00
|
|
|
|
|
|
|
@classmethod
|
2020-03-07 10:57:26 -07:00
|
|
|
def current_context(cls) -> LoggingContextOrSentinel:
|
2018-01-11 15:40:51 -07:00
|
|
|
"""Get the current logging context from thread local storage
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
LoggingContext: the current logging context
|
|
|
|
"""
|
2014-10-29 19:21:33 -06:00
|
|
|
return getattr(cls.thread_local, "current_context", cls.sentinel)
|
|
|
|
|
2015-12-07 03:51:18 -07:00
|
|
|
@classmethod
|
2020-03-07 10:57:26 -07:00
|
|
|
def set_current_context(
|
|
|
|
cls, context: LoggingContextOrSentinel
|
|
|
|
) -> LoggingContextOrSentinel:
|
2015-12-07 03:51:18 -07:00
|
|
|
"""Set the current logging context in thread local storage
|
|
|
|
Args:
|
|
|
|
context(LoggingContext): The context to activate.
|
|
|
|
Returns:
|
|
|
|
The context that was previously active
|
|
|
|
"""
|
|
|
|
current = cls.current_context()
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2015-12-07 03:51:18 -07:00
|
|
|
if current is not context:
|
|
|
|
current.stop()
|
|
|
|
cls.thread_local.current_context = context
|
|
|
|
context.start()
|
|
|
|
return current
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __enter__(self) -> "LoggingContext":
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Enters this logging context into thread local storage"""
|
2016-02-10 04:23:32 -07:00
|
|
|
old_context = self.set_current_context(self)
|
2016-02-10 04:25:19 -07:00
|
|
|
if self.previous_context != old_context:
|
2019-10-31 04:23:24 -06:00
|
|
|
logger.warning(
|
2016-02-10 04:25:19 -07:00
|
|
|
"Expected previous context %r, found %r",
|
2019-06-20 03:32:02 -06:00
|
|
|
self.previous_context,
|
|
|
|
old_context,
|
2016-02-10 04:23:32 -07:00
|
|
|
)
|
2016-02-04 03:22:44 -07:00
|
|
|
self.alive = True
|
2018-07-10 09:12:36 -06:00
|
|
|
|
2014-10-29 19:21:33 -06:00
|
|
|
return self
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __exit__(self, type, value, traceback) -> None:
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Restore the logging context in thread local storage to the state it
|
|
|
|
was before this context was entered.
|
|
|
|
Returns:
|
2018-05-22 04:16:07 -06:00
|
|
|
None to avoid suppressing any exceptions that were thrown.
|
2014-10-30 04:13:46 -06:00
|
|
|
"""
|
2016-02-10 04:25:19 -07:00
|
|
|
current = self.set_current_context(self.previous_context)
|
2015-12-07 03:51:18 -07:00
|
|
|
if current is not self:
|
|
|
|
if current is self.sentinel:
|
2019-05-29 12:27:50 -06:00
|
|
|
logger.warning("Expected logging context %s was lost", self)
|
2014-11-25 03:57:31 -07:00
|
|
|
else:
|
2019-05-29 12:27:50 -06:00
|
|
|
logger.warning(
|
|
|
|
"Expected logging context %s but found %s", self, current
|
2014-11-25 03:57:31 -07:00
|
|
|
)
|
2016-02-04 03:22:44 -07:00
|
|
|
self.alive = False
|
2014-10-29 19:21:33 -06:00
|
|
|
|
2018-07-10 09:12:36 -06:00
|
|
|
# if we have a parent, pass our CPU usage stats on
|
2019-06-20 03:32:02 -06:00
|
|
|
if self.parent_context is not None and hasattr(
|
|
|
|
self.parent_context, "_resource_usage"
|
2019-01-27 10:00:27 -07:00
|
|
|
):
|
2018-07-10 09:12:36 -06:00
|
|
|
self.parent_context._resource_usage += self._resource_usage
|
|
|
|
|
|
|
|
# reset them in case we get entered again
|
|
|
|
self._resource_usage.reset()
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def copy_to(self, record) -> None:
|
2018-01-11 15:40:51 -07:00
|
|
|
"""Copy logging fields from this context to a log record or
|
|
|
|
another LoggingContext
|
|
|
|
"""
|
2014-10-29 19:21:33 -06:00
|
|
|
|
2019-07-11 03:36:03 -06:00
|
|
|
# we track the current request
|
2018-01-11 15:40:51 -07:00
|
|
|
record.request = self.request
|
2015-12-03 14:03:01 -07:00
|
|
|
|
2019-07-11 03:36:03 -06:00
|
|
|
# we also track the current scope:
|
|
|
|
record.scope = self.scope
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def copy_to_twisted_log_entry(self, record) -> None:
|
2019-08-28 05:18:53 -06:00
|
|
|
"""
|
|
|
|
Copy logging fields from this context to a Twisted log record.
|
|
|
|
"""
|
|
|
|
record["request"] = self.request
|
|
|
|
record["scope"] = self.scope
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def start(self) -> None:
|
2019-07-03 06:40:45 -06:00
|
|
|
if get_thread_id() != self.main_thread:
|
2018-05-22 04:16:07 -06:00
|
|
|
logger.warning("Started logcontext %s on different thread", self)
|
2015-12-03 14:03:01 -07:00
|
|
|
return
|
|
|
|
|
2018-05-22 04:16:07 -06:00
|
|
|
# If we haven't already started record the thread resource usage so
|
|
|
|
# far
|
2015-12-03 14:03:01 -07:00
|
|
|
if not self.usage_start:
|
|
|
|
self.usage_start = get_thread_resource_usage()
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def stop(self) -> None:
|
2019-07-03 06:40:45 -06:00
|
|
|
if get_thread_id() != self.main_thread:
|
2018-05-22 04:16:07 -06:00
|
|
|
logger.warning("Stopped logcontext %s on different thread", self)
|
2015-12-03 14:03:01 -07:00
|
|
|
return
|
|
|
|
|
2018-07-10 06:56:07 -06:00
|
|
|
# When we stop, let's record the cpu used since we started
|
|
|
|
if not self.usage_start:
|
2019-09-18 14:55:37 -06:00
|
|
|
# Log a warning on platforms that support thread usage tracking
|
|
|
|
if is_thread_resource_usage_supported:
|
|
|
|
logger.warning(
|
|
|
|
"Called stop on logcontext %s without calling start", self
|
|
|
|
)
|
2018-07-10 06:56:07 -06:00
|
|
|
return
|
2018-05-22 04:16:07 -06:00
|
|
|
|
2019-06-19 13:55:53 -06:00
|
|
|
utime_delta, stime_delta = self._get_cputime()
|
|
|
|
self._resource_usage.ru_utime += utime_delta
|
|
|
|
self._resource_usage.ru_stime += stime_delta
|
2018-07-10 06:56:07 -06:00
|
|
|
|
|
|
|
self.usage_start = None
|
2015-12-03 14:03:01 -07:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def get_resource_usage(self) -> ContextResourceUsage:
|
2018-07-10 06:56:07 -06:00
|
|
|
"""Get resources used by this logcontext so far.
|
2018-05-22 04:16:07 -06:00
|
|
|
|
|
|
|
Returns:
|
2018-07-10 06:56:07 -06:00
|
|
|
ContextResourceUsage: a *copy* of the object tracking resource
|
|
|
|
usage so far
|
2018-05-22 04:16:07 -06:00
|
|
|
"""
|
2018-07-10 06:56:07 -06:00
|
|
|
# we always return a copy, for consistency
|
|
|
|
res = self._resource_usage.copy()
|
2015-12-03 14:03:01 -07:00
|
|
|
|
2018-05-22 04:16:07 -06:00
|
|
|
# If we are on the correct thread and we're currently running then we
|
|
|
|
# can include resource usage so far.
|
2019-07-03 06:40:45 -06:00
|
|
|
is_main_thread = get_thread_id() == self.main_thread
|
2018-05-22 04:16:07 -06:00
|
|
|
if self.alive and self.usage_start and is_main_thread:
|
2019-06-19 13:55:53 -06:00
|
|
|
utime_delta, stime_delta = self._get_cputime()
|
|
|
|
res.ru_utime += utime_delta
|
|
|
|
res.ru_stime += stime_delta
|
2015-12-03 14:03:01 -07:00
|
|
|
|
2018-07-10 06:56:07 -06:00
|
|
|
return res
|
2015-12-03 14:03:01 -07:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def _get_cputime(self) -> Tuple[float, float]:
|
2019-06-19 13:55:53 -06:00
|
|
|
"""Get the cpu usage time so far
|
|
|
|
|
|
|
|
Returns: Tuple[float, float]: seconds in user mode, seconds in system mode
|
|
|
|
"""
|
2020-03-07 10:57:26 -07:00
|
|
|
assert self.usage_start is not None
|
|
|
|
|
2019-06-19 13:55:53 -06:00
|
|
|
current = get_thread_resource_usage()
|
|
|
|
|
2019-12-12 08:21:12 -07:00
|
|
|
# Indicate to mypy that we know that self.usage_start is None.
|
|
|
|
assert self.usage_start is not None
|
|
|
|
|
2019-06-19 13:55:53 -06:00
|
|
|
utime_delta = current.ru_utime - self.usage_start.ru_utime
|
|
|
|
stime_delta = current.ru_stime - self.usage_start.ru_stime
|
|
|
|
|
|
|
|
# sanity check
|
|
|
|
if utime_delta < 0:
|
2019-06-24 03:01:16 -06:00
|
|
|
logger.error(
|
|
|
|
"utime went backwards! %f < %f",
|
|
|
|
current.ru_utime,
|
|
|
|
self.usage_start.ru_utime,
|
|
|
|
)
|
|
|
|
utime_delta = 0
|
2019-06-19 13:55:53 -06:00
|
|
|
|
|
|
|
if stime_delta < 0:
|
2019-06-24 03:01:16 -06:00
|
|
|
logger.error(
|
|
|
|
"stime went backwards! %f < %f",
|
|
|
|
current.ru_stime,
|
|
|
|
self.usage_start.ru_stime,
|
|
|
|
)
|
|
|
|
stime_delta = 0
|
2019-06-19 13:55:53 -06:00
|
|
|
|
|
|
|
return utime_delta, stime_delta
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def add_database_transaction(self, duration_sec: float) -> None:
|
2019-06-19 13:55:53 -06:00
|
|
|
if duration_sec < 0:
|
2019-06-24 03:01:16 -06:00
|
|
|
raise ValueError("DB txn time can only be non-negative")
|
2018-07-10 06:56:07 -06:00
|
|
|
self._resource_usage.db_txn_count += 1
|
|
|
|
self._resource_usage.db_txn_duration_sec += duration_sec
|
2015-12-07 10:56:11 -07:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def add_database_scheduled(self, sched_sec: float) -> None:
|
2018-01-11 17:27:14 -07:00
|
|
|
"""Record a use of the database pool
|
|
|
|
|
|
|
|
Args:
|
2018-05-28 03:39:27 -06:00
|
|
|
sched_sec (float): number of seconds it took us to get a
|
2018-01-11 17:27:14 -07:00
|
|
|
connection
|
|
|
|
"""
|
2019-06-19 13:55:53 -06:00
|
|
|
if sched_sec < 0:
|
2019-06-24 03:01:16 -06:00
|
|
|
raise ValueError("DB scheduling time can only be non-negative")
|
2018-07-10 06:56:07 -06:00
|
|
|
self._resource_usage.db_sched_duration_sec += sched_sec
|
2018-01-11 17:27:14 -07:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def record_event_fetch(self, event_count: int) -> None:
|
2018-06-22 03:42:28 -06:00
|
|
|
"""Record a number of events being fetched from the db
|
|
|
|
|
|
|
|
Args:
|
|
|
|
event_count (int): number of events being fetched
|
|
|
|
"""
|
2018-07-10 06:56:07 -06:00
|
|
|
self._resource_usage.evt_db_fetch_count += event_count
|
2018-06-22 03:42:28 -06:00
|
|
|
|
2014-10-29 19:21:33 -06:00
|
|
|
|
|
|
|
class LoggingContextFilter(logging.Filter):
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Logging filter that adds values from the current logging context to each
|
|
|
|
record.
|
|
|
|
Args:
|
|
|
|
**defaults: Default values to avoid formatters complaining about
|
|
|
|
missing fields
|
|
|
|
"""
|
2019-06-20 03:32:02 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __init__(self, **defaults) -> None:
|
2014-10-29 19:21:33 -06:00
|
|
|
self.defaults = defaults
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def filter(self, record) -> Literal[True]:
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Add each fields from the logging contexts to the record.
|
|
|
|
Returns:
|
|
|
|
True to include the record in the log output.
|
|
|
|
"""
|
2014-10-29 19:21:33 -06:00
|
|
|
context = LoggingContext.current_context()
|
|
|
|
for key, value in self.defaults.items():
|
|
|
|
setattr(record, key, value)
|
2018-08-20 11:20:07 -06:00
|
|
|
|
|
|
|
# context should never be None, but if it somehow ends up being, then
|
|
|
|
# we end up in a death spiral of infinite loops, so let's check, for
|
|
|
|
# robustness' sake.
|
|
|
|
if context is not None:
|
|
|
|
context.copy_to(record)
|
|
|
|
|
2014-10-29 19:21:33 -06:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
class PreserveLoggingContext(object):
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Captures the current logging context and restores it when the scope is
|
|
|
|
exited. Used to restore the context after a function using
|
|
|
|
@defer.inlineCallbacks is resumed by a callback from the reactor."""
|
|
|
|
|
2016-02-04 03:22:44 -07:00
|
|
|
__slots__ = ["current_context", "new_context", "has_parent"]
|
2015-12-03 14:03:01 -07:00
|
|
|
|
2020-03-10 08:19:06 -06:00
|
|
|
def __init__(self, new_context: Optional[LoggingContextOrSentinel] = None) -> None:
|
2018-08-20 11:21:10 -06:00
|
|
|
if new_context is None:
|
2020-03-07 10:57:26 -07:00
|
|
|
self.new_context = LoggingContext.sentinel # type: LoggingContextOrSentinel
|
|
|
|
else:
|
|
|
|
self.new_context = new_context
|
2014-10-30 04:13:46 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __enter__(self) -> None:
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Captures the current logging context"""
|
2019-06-20 03:32:02 -06:00
|
|
|
self.current_context = LoggingContext.set_current_context(self.new_context)
|
2014-10-29 19:21:33 -06:00
|
|
|
|
2016-02-04 03:22:44 -07:00
|
|
|
if self.current_context:
|
2016-02-10 04:25:19 -07:00
|
|
|
self.has_parent = self.current_context.previous_context is not None
|
2016-02-04 03:22:44 -07:00
|
|
|
if not self.current_context.alive:
|
2019-06-20 03:32:02 -06:00
|
|
|
logger.debug("Entering dead context: %s", self.current_context)
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def __exit__(self, type, value, traceback) -> None:
|
2014-10-30 04:13:46 -06:00
|
|
|
"""Restores the current logging context"""
|
2016-02-04 03:22:44 -07:00
|
|
|
context = LoggingContext.set_current_context(self.current_context)
|
|
|
|
|
|
|
|
if context != self.new_context:
|
2019-05-29 12:27:50 -06:00
|
|
|
if context is LoggingContext.sentinel:
|
|
|
|
logger.warning("Expected logging context %s was lost", self.new_context)
|
|
|
|
else:
|
|
|
|
logger.warning(
|
|
|
|
"Expected logging context %s but found %s",
|
|
|
|
self.new_context,
|
|
|
|
context,
|
|
|
|
)
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2015-05-08 12:53:34 -06:00
|
|
|
if self.current_context is not LoggingContext.sentinel:
|
2016-02-04 03:22:44 -07:00
|
|
|
if not self.current_context.alive:
|
2019-06-20 03:32:02 -06:00
|
|
|
logger.debug("Restoring dead context: %s", self.current_context)
|
2015-05-08 12:53:34 -06:00
|
|
|
|
2015-05-08 09:32:18 -06:00
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
def nested_logging_context(
|
|
|
|
suffix: str, parent_context: Optional[LoggingContext] = None
|
|
|
|
) -> LoggingContext:
|
2018-09-27 04:25:34 -06:00
|
|
|
"""Creates a new logging context as a child of another.
|
|
|
|
|
|
|
|
The nested logging context will have a 'request' made up of the parent context's
|
|
|
|
request, plus the given suffix.
|
|
|
|
|
|
|
|
CPU/db usage stats will be added to the parent context's on exit.
|
|
|
|
|
|
|
|
Normal usage looks like:
|
|
|
|
|
|
|
|
with nested_logging_context(suffix):
|
|
|
|
# ... do stuff
|
|
|
|
|
|
|
|
Args:
|
|
|
|
suffix (str): suffix to add to the parent context's 'request'.
|
|
|
|
parent_context (LoggingContext|None): parent context. Will use the current context
|
|
|
|
if None.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
LoggingContext: new logging context.
|
|
|
|
"""
|
2020-03-07 10:57:26 -07:00
|
|
|
if parent_context is not None:
|
|
|
|
context = parent_context # type: LoggingContextOrSentinel
|
|
|
|
else:
|
|
|
|
context = LoggingContext.current_context()
|
2018-09-27 04:25:34 -06:00
|
|
|
return LoggingContext(
|
2020-03-07 10:57:26 -07:00
|
|
|
parent_context=context, request=str(context.request) + "-" + suffix
|
2018-09-27 04:25:34 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2017-03-17 14:56:54 -06:00
|
|
|
def preserve_fn(f):
|
2018-03-07 12:59:24 -07:00
|
|
|
"""Function decorator which wraps the function with run_in_background"""
|
2019-06-20 03:32:02 -06:00
|
|
|
|
2018-03-07 12:59:24 -07:00
|
|
|
def g(*args, **kwargs):
|
|
|
|
return run_in_background(f, *args, **kwargs)
|
2019-06-20 03:32:02 -06:00
|
|
|
|
2018-03-07 12:59:24 -07:00
|
|
|
return g
|
|
|
|
|
|
|
|
|
|
|
|
def run_in_background(f, *args, **kwargs):
|
|
|
|
"""Calls a function, ensuring that the current context is restored after
|
2017-03-17 14:56:54 -06:00
|
|
|
return from the function, and that the sentinel context is set once the
|
2018-05-02 04:46:23 -06:00
|
|
|
deferred returned by the function completes.
|
2017-03-14 20:21:07 -06:00
|
|
|
|
2019-07-02 12:01:28 -06:00
|
|
|
Useful for wrapping functions that return a deferred or coroutine, which you don't
|
|
|
|
yield or await on (for instance because you want to pass it to
|
|
|
|
deferred.gatherResults()).
|
2018-04-27 04:07:40 -06:00
|
|
|
|
2020-01-11 06:00:24 -07:00
|
|
|
If f returns a Coroutine object, it will be wrapped into a Deferred (which will have
|
|
|
|
the side effect of executing the coroutine).
|
|
|
|
|
2018-04-27 04:07:40 -06:00
|
|
|
Note that if you completely discard the result, you should make sure that
|
|
|
|
`f` doesn't raise any deferred exceptions, otherwise a scary-looking
|
|
|
|
CRITICAL error about an unhandled error will be logged without much
|
|
|
|
indication about where it came from.
|
2017-03-14 20:21:07 -06:00
|
|
|
"""
|
2018-03-07 12:59:24 -07:00
|
|
|
current = LoggingContext.current_context()
|
2018-04-27 05:17:13 -06:00
|
|
|
try:
|
|
|
|
res = f(*args, **kwargs)
|
2019-06-20 03:32:02 -06:00
|
|
|
except: # noqa: E722
|
2018-04-27 05:17:13 -06:00
|
|
|
# the assumption here is that the caller doesn't want to be disturbed
|
|
|
|
# by synchronous exceptions, so let's turn them into Failures.
|
|
|
|
return defer.fail()
|
|
|
|
|
2019-07-02 12:01:28 -06:00
|
|
|
if isinstance(res, types.CoroutineType):
|
|
|
|
res = defer.ensureDeferred(res)
|
|
|
|
|
2018-05-02 04:46:23 -06:00
|
|
|
if not isinstance(res, defer.Deferred):
|
|
|
|
return res
|
|
|
|
|
|
|
|
if res.called and not res.paused:
|
|
|
|
# The function should have maintained the logcontext, so we can
|
|
|
|
# optimise out the messing about
|
|
|
|
return res
|
|
|
|
|
|
|
|
# The function may have reset the context before returning, so
|
|
|
|
# we need to restore it now.
|
|
|
|
ctx = LoggingContext.set_current_context(current)
|
|
|
|
|
|
|
|
# The original context will be restored when the deferred
|
|
|
|
# completes, but there is nothing waiting for it, so it will
|
|
|
|
# get leaked into the reactor or some other function which
|
|
|
|
# wasn't expecting it. We therefore need to reset the context
|
|
|
|
# here.
|
|
|
|
#
|
|
|
|
# (If this feels asymmetric, consider it this way: we are
|
|
|
|
# effectively forking a new thread of execution. We are
|
|
|
|
# probably currently within a ``with LoggingContext()`` block,
|
|
|
|
# which is supposed to have a single entry and exit point. But
|
|
|
|
# by spawning off another deferred, we are effectively
|
|
|
|
# adding a new exit point.)
|
|
|
|
res.addBoth(_set_context_cb, ctx)
|
2018-03-07 12:59:24 -07:00
|
|
|
return res
|
2016-02-04 03:22:44 -07:00
|
|
|
|
|
|
|
|
2017-03-30 06:22:24 -06:00
|
|
|
def make_deferred_yieldable(deferred):
|
2019-12-10 04:22:12 -07:00
|
|
|
"""Given a deferred (or coroutine), make it follow the Synapse logcontext
|
|
|
|
rules:
|
2017-03-30 06:22:24 -06:00
|
|
|
|
|
|
|
If the deferred has completed (or is not actually a Deferred), essentially
|
|
|
|
does nothing (just returns another completed deferred with the
|
|
|
|
result/failure).
|
|
|
|
|
|
|
|
If the deferred has not yet completed, resets the logcontext before
|
|
|
|
returning a deferred. Then, when the deferred completes, restores the
|
|
|
|
current logcontext before running callbacks/errbacks.
|
|
|
|
|
2018-04-27 04:29:27 -06:00
|
|
|
(This is more-or-less the opposite operation to run_in_background.)
|
2017-03-30 06:22:24 -06:00
|
|
|
"""
|
2019-12-10 04:22:12 -07:00
|
|
|
if inspect.isawaitable(deferred):
|
2019-12-10 06:17:39 -07:00
|
|
|
# If we're given a coroutine we convert it to a deferred so that we
|
|
|
|
# run it and find out if it immediately finishes, it it does then we
|
|
|
|
# don't need to fiddle with log contexts at all and can return
|
|
|
|
# immediately.
|
2019-12-10 04:22:12 -07:00
|
|
|
deferred = defer.ensureDeferred(deferred)
|
|
|
|
|
2018-05-02 04:46:23 -06:00
|
|
|
if not isinstance(deferred, defer.Deferred):
|
|
|
|
return deferred
|
|
|
|
|
|
|
|
if deferred.called and not deferred.paused:
|
|
|
|
# it looks like this deferred is ready to run any callbacks we give it
|
|
|
|
# immediately. We may as well optimise out the logcontext faffery.
|
|
|
|
return deferred
|
|
|
|
|
|
|
|
# ok, we can't be sure that a yield won't block, so let's reset the
|
|
|
|
# logcontext, and add a callback to the deferred to restore it.
|
|
|
|
prev_context = LoggingContext.set_current_context(LoggingContext.sentinel)
|
|
|
|
deferred.addBoth(_set_context_cb, prev_context)
|
2018-03-01 05:19:09 -07:00
|
|
|
return deferred
|
|
|
|
|
|
|
|
|
2020-03-07 10:57:26 -07:00
|
|
|
ResultT = TypeVar("ResultT")
|
|
|
|
|
|
|
|
|
|
|
|
def _set_context_cb(result: ResultT, context: LoggingContext) -> ResultT:
|
2018-03-01 05:19:09 -07:00
|
|
|
"""A callback function which just sets the logging context"""
|
|
|
|
LoggingContext.set_current_context(context)
|
|
|
|
return result
|
2017-03-30 06:22:24 -06:00
|
|
|
|
|
|
|
|
2018-10-23 06:12:32 -06:00
|
|
|
def defer_to_thread(reactor, f, *args, **kwargs):
|
|
|
|
"""
|
|
|
|
Calls the function `f` using a thread from the reactor's default threadpool and
|
|
|
|
returns the result as a Deferred.
|
|
|
|
|
|
|
|
Creates a new logcontext for `f`, which is created as a child of the current
|
|
|
|
logcontext (so its CPU usage metrics will get attributed to the current
|
|
|
|
logcontext). `f` should preserve the logcontext it is given.
|
|
|
|
|
|
|
|
The result deferred follows the Synapse logcontext rules: you should `yield`
|
|
|
|
on it.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
|
|
|
|
the Deferred will be invoked, and whose threadpool we should use for the
|
|
|
|
function.
|
|
|
|
|
|
|
|
Normally this will be hs.get_reactor().
|
|
|
|
|
|
|
|
f (callable): The function to call.
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2018-10-23 06:12:32 -06:00
|
|
|
args: positional arguments to pass to f.
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2018-10-23 06:12:32 -06:00
|
|
|
kwargs: keyword arguments to pass to f.
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2018-10-23 06:12:32 -06:00
|
|
|
Returns:
|
|
|
|
Deferred: A Deferred which fires a callback with the result of `f`, or an
|
|
|
|
errback if `f` throws an exception.
|
2016-02-04 03:22:44 -07:00
|
|
|
"""
|
2018-10-23 06:12:32 -06:00
|
|
|
return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)
|
2016-02-04 03:22:44 -07:00
|
|
|
|
2018-10-23 06:12:32 -06:00
|
|
|
|
|
|
|
def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
|
|
|
|
"""
|
|
|
|
A wrapper for twisted.internet.threads.deferToThreadpool, which handles
|
|
|
|
logcontexts correctly.
|
|
|
|
|
|
|
|
Calls the function `f` using a thread from the given threadpool and returns
|
|
|
|
the result as a Deferred.
|
|
|
|
|
|
|
|
Creates a new logcontext for `f`, which is created as a child of the current
|
|
|
|
logcontext (so its CPU usage metrics will get attributed to the current
|
|
|
|
logcontext). `f` should preserve the logcontext it is given.
|
|
|
|
|
|
|
|
The result deferred follows the Synapse logcontext rules: you should `yield`
|
|
|
|
on it.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
|
|
|
|
the Deferred will be invoked. Normally this will be hs.get_reactor().
|
|
|
|
|
|
|
|
threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
|
|
|
|
running `f`. Normally this will be hs.get_reactor().getThreadPool().
|
|
|
|
|
|
|
|
f (callable): The function to call.
|
|
|
|
|
|
|
|
args: positional arguments to pass to f.
|
|
|
|
|
|
|
|
kwargs: keyword arguments to pass to f.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Deferred: A Deferred which fires a callback with the result of `f`, or an
|
|
|
|
errback if `f` throws an exception.
|
|
|
|
"""
|
|
|
|
logcontext = LoggingContext.current_context()
|
|
|
|
|
|
|
|
def g():
|
|
|
|
with LoggingContext(parent_context=logcontext):
|
|
|
|
return f(*args, **kwargs)
|
|
|
|
|
2019-06-20 03:32:02 -06:00
|
|
|
return make_deferred_yieldable(threads.deferToThreadPool(reactor, threadpool, g))
|