Merge pull request #2920 from matrix-org/erikj/retry_send_event

Make repl send_event idempotent and retry on timeouts
This commit is contained in:
Erik Johnston 2018-03-01 12:14:21 +00:00 committed by GitHub
commit 17445e6701
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 43 additions and 6 deletions

View File

@ -15,10 +15,15 @@
from twisted.internet import defer
from synapse.api.errors import SynapseError, MatrixCodeMessageException
from synapse.api.errors import (
SynapseError, MatrixCodeMessageException, CodeMessageException,
)
from synapse.events import FrozenEvent
from synapse.events.snapshot import EventContext
from synapse.http.servlet import RestServlet, parse_json_object_from_request
from synapse.util.async import sleep
from synapse.util.caches.response_cache import ResponseCache
from synapse.util.logcontext import make_deferred_yieldable, preserve_fn
from synapse.util.metrics import Measure
from synapse.types import Requester
@ -43,7 +48,9 @@ def send_event_to_master(client, host, port, requester, event, context,
ratelimit (bool)
extra_users (list(str)): Any extra users to notify about event
"""
uri = "http://%s:%s/_synapse/replication/send_event" % (host, port,)
uri = "http://%s:%s/_synapse/replication/send_event/%s" % (
host, port, event.event_id,
)
payload = {
"event": event.get_pdu_json(),
@ -56,7 +63,22 @@ def send_event_to_master(client, host, port, requester, event, context,
}
try:
result = yield client.post_json_get_json(uri, payload)
# We keep retrying the same request for timeouts. This is so that we
# have a good idea that the request has either succeeded or failed on
# the master, and so whether we should clean up or not.
while True:
try:
result = yield client.put_json(uri, payload)
break
except CodeMessageException as e:
if e.code != 504:
raise
logger.warn("send_event request timed out")
# If we timed out we probably don't need to worry about backing
# off too much, but lets just wait a little anyway.
yield sleep(1)
except MatrixCodeMessageException as e:
# We convert to SynapseError as we know that it was a SynapseError
# on the master process that we should send to the client. (And
@ -71,7 +93,7 @@ class ReplicationSendEventRestServlet(RestServlet):
The API looks like:
POST /_synapse/replication/send_event
POST /_synapse/replication/send_event/:event_id
{
"event": { .. serialized event .. },
@ -83,7 +105,7 @@ class ReplicationSendEventRestServlet(RestServlet):
"extra_users": [],
}
"""
PATTERNS = [re.compile("^/_synapse/replication/send_event$")]
PATTERNS = [re.compile("^/_synapse/replication/send_event/(?P<event_id>[^/]+)$")]
def __init__(self, hs):
super(ReplicationSendEventRestServlet, self).__init__()
@ -92,8 +114,23 @@ class ReplicationSendEventRestServlet(RestServlet):
self.store = hs.get_datastore()
self.clock = hs.get_clock()
# The responses are tiny, so we may as well cache them for a while
self.response_cache = ResponseCache(hs, timeout_ms=30 * 60 * 1000)
def on_PUT(self, request, event_id):
result = self.response_cache.get(event_id)
if not result:
result = self.response_cache.set(
event_id,
self._handle_request(request)
)
else:
logger.warn("Returning cached response")
return make_deferred_yieldable(result)
@preserve_fn
@defer.inlineCallbacks
def on_POST(self, request):
def _handle_request(self, request):
with Measure(self.clock, "repl_send_event_parse"):
content = parse_json_object_from_request(request)