Improve opentracing support for `ResponseCache` (#11607)

This adds some opentracing annotations to ResponseCache, to make it easier to see what's going on; in particular, it adds a link back to the initial trace which is actually doing the work of generating the response.
This commit is contained in:
Richard van der Hoff 2021-12-20 18:12:08 +00:00 committed by GitHub
parent 60fa4935b5
commit c3e38b88f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 150 additions and 49 deletions

1
changelog.d/11607.misc Normal file
View File

@ -0,0 +1 @@
Improve opentracing support for requests which use a `ResponseCache`.

View File

@ -172,7 +172,7 @@ class RoomCreationHandler:
user_id = requester.user.to_string() user_id = requester.user.to_string()
# Check if this room is already being upgraded by another person # Check if this room is already being upgraded by another person
for key in self._upgrade_response_cache.pending_result_cache: for key in self._upgrade_response_cache.keys():
if key[0] == old_room_id and key[1] != user_id: if key[0] == old_room_id and key[1] != user_id:
# Two different people are trying to upgrade the same room. # Two different people are trying to upgrade the same room.
# Send the second an error. # Send the second an error.

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import abc
import collections import collections
import inspect import inspect
import itertools import itertools
@ -57,7 +58,26 @@ logger = logging.getLogger(__name__)
_T = TypeVar("_T") _T = TypeVar("_T")
class ObservableDeferred(Generic[_T]): class AbstractObservableDeferred(Generic[_T], metaclass=abc.ABCMeta):
"""Abstract base class defining the consumer interface of ObservableDeferred"""
__slots__ = ()
@abc.abstractmethod
def observe(self) -> "defer.Deferred[_T]":
"""Add a new observer for this ObservableDeferred
This returns a brand new deferred that is resolved when the underlying
deferred is resolved. Interacting with the returned deferred does not
effect the underlying deferred.
Note that the returned Deferred doesn't follow the Synapse logcontext rules -
you will probably want to `make_deferred_yieldable` it.
"""
...
class ObservableDeferred(Generic[_T], AbstractObservableDeferred[_T]):
"""Wraps a deferred object so that we can add observer deferreds. These """Wraps a deferred object so that we can add observer deferreds. These
observer deferreds do not affect the callback chain of the original observer deferreds do not affect the callback chain of the original
deferred. deferred.

View File

@ -12,19 +12,37 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging import logging
from typing import Any, Awaitable, Callable, Dict, Generic, Optional, TypeVar from typing import (
TYPE_CHECKING,
Any,
Awaitable,
Callable,
Dict,
Generic,
Iterable,
Optional,
TypeVar,
)
import attr import attr
from twisted.internet import defer from twisted.internet import defer
from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.logging.opentracing import (
active_span,
start_active_span,
start_active_span_follows_from,
)
from synapse.util import Clock from synapse.util import Clock
from synapse.util.async_helpers import ObservableDeferred from synapse.util.async_helpers import AbstractObservableDeferred, ObservableDeferred
from synapse.util.caches import register_cache from synapse.util.caches import register_cache
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
if TYPE_CHECKING:
import opentracing
# the type of the key in the cache # the type of the key in the cache
KV = TypeVar("KV") KV = TypeVar("KV")
@ -54,6 +72,20 @@ class ResponseCacheContext(Generic[KV]):
""" """
@attr.s(auto_attribs=True)
class ResponseCacheEntry:
result: AbstractObservableDeferred
"""The (possibly incomplete) result of the operation.
Note that we continue to store an ObservableDeferred even after the operation
completes (rather than switching to an immediate value), since that makes it
easier to cache Failure results.
"""
opentracing_span_context: "Optional[opentracing.SpanContext]"
"""The opentracing span which generated/is generating the result"""
class ResponseCache(Generic[KV]): class ResponseCache(Generic[KV]):
""" """
This caches a deferred response. Until the deferred completes it will be This caches a deferred response. Until the deferred completes it will be
@ -63,10 +95,7 @@ class ResponseCache(Generic[KV]):
""" """
def __init__(self, clock: Clock, name: str, timeout_ms: float = 0): def __init__(self, clock: Clock, name: str, timeout_ms: float = 0):
# This is poorly-named: it includes both complete and incomplete results. self._result_cache: Dict[KV, ResponseCacheEntry] = {}
# We keep complete results rather than switching to absolute values because
# that makes it easier to cache Failure results.
self.pending_result_cache: Dict[KV, ObservableDeferred] = {}
self.clock = clock self.clock = clock
self.timeout_sec = timeout_ms / 1000.0 self.timeout_sec = timeout_ms / 1000.0
@ -75,56 +104,63 @@ class ResponseCache(Generic[KV]):
self._metrics = register_cache("response_cache", name, self, resizable=False) self._metrics = register_cache("response_cache", name, self, resizable=False)
def size(self) -> int: def size(self) -> int:
return len(self.pending_result_cache) return len(self._result_cache)
def __len__(self) -> int: def __len__(self) -> int:
return self.size() return self.size()
def get(self, key: KV) -> Optional[defer.Deferred]: def keys(self) -> Iterable[KV]:
"""Get the keys currently in the result cache
Returns both incomplete entries, and (if the timeout on this cache is non-zero),
complete entries which are still in the cache.
Note that the returned iterator is not safe in the face of concurrent execution:
behaviour is undefined if `wrap` is called during iteration.
"""
return self._result_cache.keys()
def _get(self, key: KV) -> Optional[ResponseCacheEntry]:
"""Look up the given key. """Look up the given key.
Returns a new Deferred (which also doesn't follow the synapse
logcontext rules). You will probably want to make_deferred_yieldable the result.
If there is no entry for the key, returns None.
Args: Args:
key: key to get/set in the cache key: key to get in the cache
Returns: Returns:
None if there is no entry for this key; otherwise a deferred which The entry for this key, if any; else None.
resolves to the result.
""" """
result = self.pending_result_cache.get(key) entry = self._result_cache.get(key)
if result is not None: if entry is not None:
self._metrics.inc_hits() self._metrics.inc_hits()
return result.observe() return entry
else: else:
self._metrics.inc_misses() self._metrics.inc_misses()
return None return None
def _set( def _set(
self, context: ResponseCacheContext[KV], deferred: "defer.Deferred[RV]" self,
) -> "defer.Deferred[RV]": context: ResponseCacheContext[KV],
deferred: "defer.Deferred[RV]",
opentracing_span_context: "Optional[opentracing.SpanContext]",
) -> ResponseCacheEntry:
"""Set the entry for the given key to the given deferred. """Set the entry for the given key to the given deferred.
*deferred* should run its callbacks in the sentinel logcontext (ie, *deferred* should run its callbacks in the sentinel logcontext (ie,
you should wrap normal synapse deferreds with you should wrap normal synapse deferreds with
synapse.logging.context.run_in_background). synapse.logging.context.run_in_background).
Returns a new Deferred (which also doesn't follow the synapse logcontext rules).
You will probably want to make_deferred_yieldable the result.
Args: Args:
context: Information about the cache miss context: Information about the cache miss
deferred: The deferred which resolves to the result. deferred: The deferred which resolves to the result.
opentracing_span_context: An opentracing span wrapping the calculation
Returns: Returns:
A new deferred which resolves to the actual result. The cache entry object.
""" """
result = ObservableDeferred(deferred, consumeErrors=True) result = ObservableDeferred(deferred, consumeErrors=True)
key = context.cache_key key = context.cache_key
self.pending_result_cache[key] = result entry = ResponseCacheEntry(result, opentracing_span_context)
self._result_cache[key] = entry
def on_complete(r: RV) -> RV: def on_complete(r: RV) -> RV:
# if this cache has a non-zero timeout, and the callback has not cleared # if this cache has a non-zero timeout, and the callback has not cleared
@ -132,18 +168,18 @@ class ResponseCache(Generic[KV]):
# its removal later. # its removal later.
if self.timeout_sec and context.should_cache: if self.timeout_sec and context.should_cache:
self.clock.call_later( self.clock.call_later(
self.timeout_sec, self.pending_result_cache.pop, key, None self.timeout_sec, self._result_cache.pop, key, None
) )
else: else:
# otherwise, remove the result immediately. # otherwise, remove the result immediately.
self.pending_result_cache.pop(key, None) self._result_cache.pop(key, None)
return r return r
# make sure we do this *after* adding the entry to pending_result_cache, # make sure we do this *after* adding the entry to result_cache,
# in case the result is already complete (in which case flipping the order would # in case the result is already complete (in which case flipping the order would
# leave us with a stuck entry in the cache). # leave us with a stuck entry in the cache).
result.addBoth(on_complete) result.addBoth(on_complete)
return result.observe() return entry
async def wrap( async def wrap(
self, self,
@ -189,20 +225,41 @@ class ResponseCache(Generic[KV]):
Returns: Returns:
The result of the callback (from the cache, or otherwise) The result of the callback (from the cache, or otherwise)
""" """
result = self.get(key) entry = self._get(key)
if not result: if not entry:
logger.debug( logger.debug(
"[%s]: no cached result for [%s], calculating new one", self._name, key "[%s]: no cached result for [%s], calculating new one", self._name, key
) )
context = ResponseCacheContext(cache_key=key) context = ResponseCacheContext(cache_key=key)
if cache_context: if cache_context:
kwargs["cache_context"] = context kwargs["cache_context"] = context
d = run_in_background(callback, *args, **kwargs)
result = self._set(context, d) span_context: Optional[opentracing.SpanContext] = None
elif not isinstance(result, defer.Deferred) or result.called:
async def cb() -> RV:
# NB it is important that we do not `await` before setting span_context!
nonlocal span_context
with start_active_span(f"ResponseCache[{self._name}].calculate"):
span = active_span()
if span:
span_context = span.context
return await callback(*args, **kwargs)
d = run_in_background(cb)
entry = self._set(context, d, span_context)
return await make_deferred_yieldable(entry.result.observe())
result = entry.result.observe()
if result.called:
logger.info("[%s]: using completed cached result for [%s]", self._name, key) logger.info("[%s]: using completed cached result for [%s]", self._name, key)
else: else:
logger.info( logger.info(
"[%s]: using incomplete cached result for [%s]", self._name, key "[%s]: using incomplete cached result for [%s]", self._name, key
) )
span_context = entry.opentracing_span_context
with start_active_span_follows_from(
f"ResponseCache[{self._name}].wait",
contexts=(span_context,) if span_context else (),
):
return await make_deferred_yieldable(result) return await make_deferred_yieldable(result)

View File

@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from unittest.mock import Mock
from parameterized import parameterized from parameterized import parameterized
from twisted.internet import defer from twisted.internet import defer
@ -60,10 +63,15 @@ class ResponseCacheTestCase(TestCase):
self.successResultOf(wrap_d), self.successResultOf(wrap_d),
"initial wrap result should be the same", "initial wrap result should be the same",
) )
# a second call should return the result without a call to the wrapped function
unexpected = Mock(spec=())
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual( self.assertEqual(
expected_result, expected_result,
self.successResultOf(cache.get(0)), self.successResultOf(wrap2_d),
"cache should have the result", "cache should still have the result",
) )
def test_cache_miss(self): def test_cache_miss(self):
@ -80,7 +88,7 @@ class ResponseCacheTestCase(TestCase):
self.successResultOf(wrap_d), self.successResultOf(wrap_d),
"initial wrap result should be the same", "initial wrap result should be the same",
) )
self.assertIsNone(cache.get(0), "cache should not have the result now") self.assertCountEqual([], cache.keys(), "cache should not have the result now")
def test_cache_expire(self): def test_cache_expire(self):
cache = self.with_cache("short_cache", ms=1000) cache = self.with_cache("short_cache", ms=1000)
@ -92,16 +100,20 @@ class ResponseCacheTestCase(TestCase):
) )
self.assertEqual(expected_result, self.successResultOf(wrap_d)) self.assertEqual(expected_result, self.successResultOf(wrap_d))
# a second call should return the result without a call to the wrapped function
unexpected = Mock(spec=())
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual( self.assertEqual(
expected_result, expected_result,
self.successResultOf(cache.get(0)), self.successResultOf(wrap2_d),
"cache should still have the result", "cache should still have the result",
) )
# cache eviction timer is handled # cache eviction timer is handled
self.reactor.pump((2,)) self.reactor.pump((2,))
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
self.assertIsNone(cache.get(0), "cache should not have the result now")
def test_cache_wait_hit(self): def test_cache_wait_hit(self):
cache = self.with_cache("neutral_cache") cache = self.with_cache("neutral_cache")
@ -133,16 +145,21 @@ class ResponseCacheTestCase(TestCase):
self.reactor.pump((1, 1)) self.reactor.pump((1, 1))
self.assertEqual(expected_result, self.successResultOf(wrap_d)) self.assertEqual(expected_result, self.successResultOf(wrap_d))
# a second call should immediately return the result without a call to the
# wrapped function
unexpected = Mock(spec=())
wrap2_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual( self.assertEqual(
expected_result, expected_result,
self.successResultOf(cache.get(0)), self.successResultOf(wrap2_d),
"cache should still have the result", "cache should still have the result",
) )
# (1 + 1 + 2) > 3.0, cache eviction timer is handled # (1 + 1 + 2) > 3.0, cache eviction timer is handled
self.reactor.pump((2,)) self.reactor.pump((2,))
self.assertCountEqual([], cache.keys(), "cache should not have the result now")
self.assertIsNone(cache.get(0), "cache should not have the result now")
@parameterized.expand([(True,), (False,)]) @parameterized.expand([(True,), (False,)])
def test_cache_context_nocache(self, should_cache: bool): def test_cache_context_nocache(self, should_cache: bool):
@ -183,10 +200,16 @@ class ResponseCacheTestCase(TestCase):
self.assertEqual(expected_result, self.successResultOf(wrap2_d)) self.assertEqual(expected_result, self.successResultOf(wrap2_d))
if should_cache: if should_cache:
unexpected = Mock(spec=())
wrap3_d = defer.ensureDeferred(cache.wrap(0, unexpected))
unexpected.assert_not_called()
self.assertEqual( self.assertEqual(
expected_result, expected_result,
self.successResultOf(cache.get(0)), self.successResultOf(wrap3_d),
"cache should still have the result", "cache should still have the result",
) )
else: else:
self.assertIsNone(cache.get(0), "cache should not have the result") self.assertCountEqual(
[], cache.keys(), "cache should not have the result now"
)