Improve opentracing support for `ResponseCache` (#11607)
This adds some opentracing annotations to ResponseCache, to make it easier to see what's going on; in particular, it adds a link back to the initial trace which is actually doing the work of generating the response.
This commit is contained in:
parent
60fa4935b5
commit
c3e38b88f2
|
@ -0,0 +1 @@
|
||||||
|
Improve opentracing support for requests which use a `ResponseCache`.
|
|
@ -172,7 +172,7 @@ class RoomCreationHandler:
|
||||||
user_id = requester.user.to_string()
|
user_id = requester.user.to_string()
|
||||||
|
|
||||||
# Check if this room is already being upgraded by another person
|
# Check if this room is already being upgraded by another person
|
||||||
for key in self._upgrade_response_cache.pending_result_cache:
|
for key in self._upgrade_response_cache.keys():
|
||||||
if key[0] == old_room_id and key[1] != user_id:
|
if key[0] == old_room_id and key[1] != user_id:
|
||||||
# Two different people are trying to upgrade the same room.
|
# Two different people are trying to upgrade the same room.
|
||||||
# Send the second an error.
|
# Send the second an error.
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import abc
|
||||||
import collections
|
import collections
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -57,7 +58,26 @@ logger = logging.getLogger(__name__)
|
||||||
_T = TypeVar("_T")
|
_T = TypeVar("_T")
|
||||||
|
|
||||||
|
|
||||||
class ObservableDeferred(Generic[_T]):
|
class AbstractObservableDeferred(Generic[_T], metaclass=abc.ABCMeta):
|
||||||
|
"""Abstract base class defining the consumer interface of ObservableDeferred"""
|
||||||
|
|
||||||
|
__slots__ = ()
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def observe(self) -> "defer.Deferred[_T]":
|
||||||
|
"""Add a new observer for this ObservableDeferred
|
||||||
|
|
||||||
|
This returns a brand new deferred that is resolved when the underlying
|
||||||
|
deferred is resolved. Interacting with the returned deferred does not
|
||||||
|
effect the underlying deferred.
|
||||||
|
|
||||||
|
Note that the returned Deferred doesn't follow the Synapse logcontext rules -
|
||||||
|
you will probably want to `make_deferred_yieldable` it.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class ObservableDeferred(Generic[_T], AbstractObservableDeferred[_T]):
|
||||||
"""Wraps a deferred object so that we can add observer deferreds. These
|
"""Wraps a deferred object so that we can add observer deferreds. These
|
||||||
observer deferreds do not affect the callback chain of the original
|
observer deferreds do not affect the callback chain of the original
|
||||||
deferred.
|
deferred.
|
||||||
|
|
|
@ -12,19 +12,37 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Awaitable, Callable, Dict, Generic, Optional, TypeVar
|
from typing import (
|
||||||
|
TYPE_CHECKING,
|
||||||
|
Any,
|
||||||
|
Awaitable,
|
||||||
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Generic,
|
||||||
|
Iterable,
|
||||||
|
Optional,
|
||||||
|
TypeVar,
|
||||||
|
)
|
||||||
|
|
||||||
import attr
|
import attr
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
|
||||||
from synapse.logging.context import make_deferred_yieldable, run_in_background
|
from synapse.logging.context import make_deferred_yieldable, run_in_background
|
||||||
|
from synapse.logging.opentracing import (
|
||||||
|
active_span,
|
||||||
|
start_active_span,
|
||||||
|
start_active_span_follows_from,
|
||||||
|
)
|
||||||
from synapse.util import Clock
|
from synapse.util import Clock
|
||||||
from synapse.util.async_helpers import ObservableDeferred
|
from synapse.util.async_helpers import AbstractObservableDeferred, ObservableDeferred
|
||||||
from synapse.util.caches import register_cache
|
from synapse.util.caches import register_cache
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import opentracing
|
||||||
|
|
||||||
# the type of the key in the cache
|
# the type of the key in the cache
|
||||||
KV = TypeVar("KV")
|
KV = TypeVar("KV")
|
||||||
|
|
||||||
|
@ -54,6 +72,20 @@ class ResponseCacheContext(Generic[KV]):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@attr.s(auto_attribs=True)
|
||||||
|
class ResponseCacheEntry:
|
||||||
|
result: AbstractObservableDeferred
|
||||||
|
"""The (possibly incomplete) result of the operation.
|
||||||
|
|
||||||
|
Note that we continue to store an ObservableDeferred even after the operation
|
||||||
|
completes (rather than switching to an immediate value), since that makes it
|
||||||
|
easier to cache Failure results.
|
||||||
|
"""
|
||||||
|
|
||||||
|
opentracing_span_context: "Optional[opentracing.SpanContext]"
|
||||||
|
"""The opentracing span which generated/is generating the result"""
|
||||||
|
|
||||||
|
|
||||||
class ResponseCache(Generic[KV]):
|
class ResponseCache(Generic[KV]):
|
||||||
"""
|
"""
|
||||||
This caches a deferred response. Until the deferred completes it will be
|
This caches a deferred response. Until the deferred completes it will be
|
||||||
|
@ -63,10 +95,7 @@ class ResponseCache(Generic[KV]):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, clock: Clock, name: str, timeout_ms: float = 0):
|
def __init__(self, clock: Clock, name: str, timeout_ms: float = 0):
|
||||||
# This is poorly-named: it includes both complete and incomplete results.
|
self._result_cache: Dict[KV, ResponseCacheEntry] = {}
|
||||||
# We keep complete results rather than switching to absolute values because
|
|
||||||
# that makes it easier to cache Failure results.
|
|
||||||
self.pending_result_cache: Dict[KV, ObservableDeferred] = {}
|
|
||||||
|
|
||||||
self.clock = clock
|
self.clock = clock
|
||||||
self.timeout_sec = timeout_ms / 1000.0
|
self.timeout_sec = timeout_ms / 1000.0
|
||||||
|
@ -75,56 +104,63 @@ class ResponseCache(Generic[KV]):
|
||||||
self._metrics = register_cache("response_cache", name, self, resizable=False)
|
self._metrics = register_cache("response_cache", name, self, resizable=False)
|
||||||
|
|
||||||
def size(self) -> int:
|
def size(self) -> int:
|
||||||
return len(self.pending_result_cache)
|
return len(self._result_cache)
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return self.size()
|
return self.size()
|
||||||
|
|
||||||
def get(self, key: KV) -> Optional[defer.Deferred]:
|
def keys(self) -> Iterable[KV]:
|
||||||
|
"""Get the keys currently in the result cache
|
||||||
|
|
||||||
|
Returns both incomplete entries, and (if the timeout on this cache is non-zero),
|
||||||
|
complete entries which are still in the cache.
|
||||||
|
|
||||||
|
Note that the returned iterator is not safe in the face of concurrent execution:
|
||||||
|
behaviour is undefined if `wrap` is called during iteration.
|
||||||
|
"""
|
||||||
|
return self._result_cache.keys()
|
||||||
|
|
||||||
|
def _get(self, key: KV) -> Optional[ResponseCacheEntry]:
|
||||||
"""Look up the given key.
|
"""Look up the given key.
|
||||||
|
|
||||||
Returns a new Deferred (which also doesn't follow the synapse
|
|
||||||
logcontext rules). You will probably want to make_deferred_yieldable the result.
|
|
||||||
|
|
||||||
If there is no entry for the key, returns None.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
key: key to get/set in the cache
|
key: key to get in the cache
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None if there is no entry for this key; otherwise a deferred which
|
The entry for this key, if any; else None.
|
||||||
resolves to the result.
|
|
||||||
"""
|
"""
|
||||||
result = self.pending_result_cache.get(key)
|
entry = self._result_cache.get(key)
|
||||||
if result is not None:
|
if entry is not None:
|
||||||
self._metrics.inc_hits()
|
self._metrics.inc_hits()
|
||||||
return result.observe()
|
return entry
|
||||||
else:
|
else:
|
||||||
self._metrics.inc_misses()
|
self._metrics.inc_misses()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _set(
|
def _set(
|
||||||
self, context: ResponseCacheContext[KV], deferred: "defer.Deferred[RV]"
|
self,
|
||||||
) -> "defer.Deferred[RV]":
|
context: ResponseCacheContext[KV],
|
||||||
|
deferred: "defer.Deferred[RV]",
|
||||||
|
opentracing_span_context: "Optional[opentracing.SpanContext]",
|
||||||
|
) -> ResponseCacheEntry:
|
||||||
"""Set the entry for the given key to the given deferred.
|
"""Set the entry for the given key to the given deferred.
|
||||||
|
|
||||||
*deferred* should run its callbacks in the sentinel logcontext (ie,
|
*deferred* should run its callbacks in the sentinel logcontext (ie,
|
||||||
you should wrap normal synapse deferreds with
|
you should wrap normal synapse deferreds with
|
||||||
synapse.logging.context.run_in_background).
|
synapse.logging.context.run_in_background).
|
||||||
|
|
||||||
Returns a new Deferred (which also doesn't follow the synapse logcontext rules).
|
|
||||||
You will probably want to make_deferred_yieldable the result.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
context: Information about the cache miss
|
context: Information about the cache miss
|
||||||
deferred: The deferred which resolves to the result.
|
deferred: The deferred which resolves to the result.
|
||||||
|
opentracing_span_context: An opentracing span wrapping the calculation
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A new deferred which resolves to the actual result.
|
The cache entry object.
|
||||||
"""
|
"""
|
||||||
result = ObservableDeferred(deferred, consumeErrors=True)
|
result = ObservableDeferred(deferred, consumeErrors=True)
|
||||||
key = context.cache_key
|
key = context.cache_key
|
||||||
self.pending_result_cache[key] = result
|
entry = ResponseCacheEntry(result, opentracing_span_context)
|
||||||
|
self._result_cache[key] = entry
|
||||||
|
|
||||||
def on_complete(r: RV) -> RV:
|
def on_complete(r: RV) -> RV:
|
||||||
# if this cache has a non-zero timeout, and the callback has not cleared
|
# if this cache has a non-zero timeout, and the callback has not cleared
|
||||||
|
@ -132,18 +168,18 @@ class ResponseCache(Generic[KV]):
|
||||||
# its removal later.
|
# its removal later.
|
||||||
if self.timeout_sec and context.should_cache:
|
if self.timeout_sec and context.should_cache:
|
||||||
self.clock.call_later(
|
self.clock.call_later(
|
||||||
self.timeout_sec, self.pending_result_cache.pop, key, None
|
self.timeout_sec, self._result_cache.pop, key, None
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# otherwise, remove the result immediately.
|
# otherwise, remove the result immediately.
|
||||||
self.pending_result_cache.pop(key, None)
|
self._result_cache.pop(key, None)
|
||||||
return r
|
return r
|
||||||
|
|
||||||
# make sure we do this *after* adding the entry to pending_result_cache,
|
# make sure we do this *after* adding the entry to result_cache,
|
||||||
# in case the result is already complete (in which case flipping the order would
|
# in case the result is already complete (in which case flipping the order would
|
||||||
# leave us with a stuck entry in the cache).
|
# leave us with a stuck entry in the cache).
|
||||||
result.addBoth(on_complete)
|
result.addBoth(on_complete)
|
||||||
return result.observe()
|
return entry
|
||||||
|
|
||||||
async def wrap(
|
async def wrap(
|
||||||
self,
|
self,
|
||||||
|
@ -189,20 +225,41 @@ class ResponseCache(Generic[KV]):
|
||||||
Returns:
|
Returns:
|
||||||
The result of the callback (from the cache, or otherwise)
|
The result of the callback (from the cache, or otherwise)
|
||||||
"""
|
"""
|
||||||
result = self.get(key)
|
entry = self._get(key)
|
||||||
if not result:
|
if not entry:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[%s]: no cached result for [%s], calculating new one", self._name, key
|
"[%s]: no cached result for [%s], calculating new one", self._name, key
|
||||||
)
|
)
|
||||||
context = ResponseCacheContext(cache_key=key)
|
context = ResponseCacheContext(cache_key=key)
|
||||||
if cache_context:
|
if cache_context:
|
||||||
kwargs["cache_context"] = context
|
kwargs["cache_context"] = context
|
||||||
d = run_in_background(callback, *args, **kwargs)
|
|
||||||
result = self._set(context, d)
|
span_context: Optional[opentracing.SpanContext] = None
|
||||||
elif not isinstance(result, defer.Deferred) or result.called:
|
|
||||||
|
async def cb() -> RV:
|
||||||
|
# NB it is important that we do not `await` before setting span_context!
|
||||||
|
nonlocal span_context
|
||||||
|
with start_active_span(f"ResponseCache[{self._name}].calculate"):
|
||||||
|
span = active_span()
|
||||||
|
if span:
|
||||||
|
span_context = span.context
|
||||||
|
return await callback(*args, **kwargs)
|
||||||
|
|
||||||
|
d = run_in_background(cb)
|
||||||
|
entry = self._set(context, d, span_context)
|
||||||
|
return await make_deferred_yieldable(entry.result.observe())
|
||||||
|
|
||||||
|
result = entry.result.observe()
|
||||||
|
if result.called:
|
||||||
logger.info("[%s]: using completed cached result for [%s]", self._name, key)
|
logger.info("[%s]: using completed cached result for [%s]", self._name, key)
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
"[%s]: using incomplete cached result for [%s]", self._name, key
|
"[%s]: using incomplete cached result for [%s]", self._name, key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
span_context = entry.opentracing_span_context
|
||||||
|
with start_active_span_follows_from(
|
||||||
|
f"ResponseCache[{self._name}].wait",
|
||||||
|
contexts=(span_context,) if span_context else (),
|
||||||
|
):
|
||||||
return await make_deferred_yieldable(result)
|
return await make_deferred_yieldable(result)
|
||||||
|
|
|
@ -11,6 +11,9 @@
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
|
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
|
@ -60,10 +63,15 @@ class ResponseCacheTestCase(TestCase):
|
||||||
self.successResultOf(wrap_d),
|
self.successResultOf(wrap_d),
|
||||||
"initial wrap result should be the same",
|
"initial wrap result should be the same",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# a second call should return the result without a call to the wrapped function
|
||||||
|
unexpected = Mock(spec=())
|
||||||
|
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
|
||||||
|
unexpected.assert_not_called()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
expected_result,
|
expected_result,
|
||||||
self.successResultOf(cache.get(0)),
|
self.successResultOf(wrap2_d),
|
||||||
"cache should have the result",
|
"cache should still have the result",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_cache_miss(self):
|
def test_cache_miss(self):
|
||||||
|
@ -80,7 +88,7 @@ class ResponseCacheTestCase(TestCase):
|
||||||
self.successResultOf(wrap_d),
|
self.successResultOf(wrap_d),
|
||||||
"initial wrap result should be the same",
|
"initial wrap result should be the same",
|
||||||
)
|
)
|
||||||
self.assertIsNone(cache.get(0), "cache should not have the result now")
|
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
|
||||||
|
|
||||||
def test_cache_expire(self):
|
def test_cache_expire(self):
|
||||||
cache = self.with_cache("short_cache", ms=1000)
|
cache = self.with_cache("short_cache", ms=1000)
|
||||||
|
@ -92,16 +100,20 @@ class ResponseCacheTestCase(TestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(expected_result, self.successResultOf(wrap_d))
|
self.assertEqual(expected_result, self.successResultOf(wrap_d))
|
||||||
|
|
||||||
|
# a second call should return the result without a call to the wrapped function
|
||||||
|
unexpected = Mock(spec=())
|
||||||
|
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
|
||||||
|
unexpected.assert_not_called()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
expected_result,
|
expected_result,
|
||||||
self.successResultOf(cache.get(0)),
|
self.successResultOf(wrap2_d),
|
||||||
"cache should still have the result",
|
"cache should still have the result",
|
||||||
)
|
)
|
||||||
|
|
||||||
# cache eviction timer is handled
|
# cache eviction timer is handled
|
||||||
self.reactor.pump((2,))
|
self.reactor.pump((2,))
|
||||||
|
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
|
||||||
self.assertIsNone(cache.get(0), "cache should not have the result now")
|
|
||||||
|
|
||||||
def test_cache_wait_hit(self):
|
def test_cache_wait_hit(self):
|
||||||
cache = self.with_cache("neutral_cache")
|
cache = self.with_cache("neutral_cache")
|
||||||
|
@ -133,16 +145,21 @@ class ResponseCacheTestCase(TestCase):
|
||||||
self.reactor.pump((1, 1))
|
self.reactor.pump((1, 1))
|
||||||
|
|
||||||
self.assertEqual(expected_result, self.successResultOf(wrap_d))
|
self.assertEqual(expected_result, self.successResultOf(wrap_d))
|
||||||
|
|
||||||
|
# a second call should immediately return the result without a call to the
|
||||||
|
# wrapped function
|
||||||
|
unexpected = Mock(spec=())
|
||||||
|
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
|
||||||
|
unexpected.assert_not_called()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
expected_result,
|
expected_result,
|
||||||
self.successResultOf(cache.get(0)),
|
self.successResultOf(wrap2_d),
|
||||||
"cache should still have the result",
|
"cache should still have the result",
|
||||||
)
|
)
|
||||||
|
|
||||||
# (1 + 1 + 2) > 3.0, cache eviction timer is handled
|
# (1 + 1 + 2) > 3.0, cache eviction timer is handled
|
||||||
self.reactor.pump((2,))
|
self.reactor.pump((2,))
|
||||||
|
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
|
||||||
self.assertIsNone(cache.get(0), "cache should not have the result now")
|
|
||||||
|
|
||||||
@parameterized.expand([(True,), (False,)])
|
@parameterized.expand([(True,), (False,)])
|
||||||
def test_cache_context_nocache(self, should_cache: bool):
|
def test_cache_context_nocache(self, should_cache: bool):
|
||||||
|
@ -183,10 +200,16 @@ class ResponseCacheTestCase(TestCase):
|
||||||
self.assertEqual(expected_result, self.successResultOf(wrap2_d))
|
self.assertEqual(expected_result, self.successResultOf(wrap2_d))
|
||||||
|
|
||||||
if should_cache:
|
if should_cache:
|
||||||
|
unexpected = Mock(spec=())
|
||||||
|
wrap3_d = defer.ensureDeferred(cache.wrap(0, unexpected))
|
||||||
|
unexpected.assert_not_called()
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
expected_result,
|
expected_result,
|
||||||
self.successResultOf(cache.get(0)),
|
self.successResultOf(wrap3_d),
|
||||||
"cache should still have the result",
|
"cache should still have the result",
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.assertIsNone(cache.get(0), "cache should not have the result")
|
self.assertCountEqual(
|
||||||
|
[], cache.keys(), "cache should not have the result now"
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue