Make `handle_new_client_event` throws `PartialStateConflictError` (#14665)
Then adapts calling code to retry when needed so it doesn't 500 to clients. Signed-off-by: Mathieu Velten <mathieuv@matrix.org> Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
This commit is contained in:
parent
046320b9b6
commit
54c012c5a8
|
@ -0,0 +1 @@
|
|||
Change `handle_new_client_event` signature so that a 429 does not reach clients on `PartialStateConflictError`, and internally retry when needed instead.
|
|
@ -1343,7 +1343,16 @@ class FederationHandler:
|
|||
)
|
||||
|
||||
EventValidator().validate_builder(builder)
|
||||
event, context = await self.event_creation_handler.create_new_client_event(
|
||||
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in send_membership_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
(
|
||||
event,
|
||||
context,
|
||||
) = await self.event_creation_handler.create_new_client_event(
|
||||
builder=builder
|
||||
)
|
||||
|
||||
|
@ -1359,9 +1368,13 @@ class FederationHandler:
|
|||
|
||||
try:
|
||||
validate_event_for_room_version(event)
|
||||
await self._event_auth_handler.check_auth_rules_from_context(event)
|
||||
await self._event_auth_handler.check_auth_rules_from_context(
|
||||
event
|
||||
)
|
||||
except AuthError as e:
|
||||
logger.warning("Denying new third party invite %r because %s", event, e)
|
||||
logger.warning(
|
||||
"Denying new third party invite %r because %s", event, e
|
||||
)
|
||||
raise e
|
||||
|
||||
await self._check_signature(event, context)
|
||||
|
@ -1369,6 +1382,14 @@ class FederationHandler:
|
|||
# We retrieve the room member handler here as to not cause a cyclic dependency
|
||||
member_handler = self.hs.get_room_member_handler()
|
||||
await member_handler.send_membership_event(None, event, context)
|
||||
|
||||
break
|
||||
except PartialStateConflictError as e:
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
else:
|
||||
destinations = {x.split(":", 1)[-1] for x in (sender_user_id, room_id)}
|
||||
|
||||
|
@ -1400,7 +1421,15 @@ class FederationHandler:
|
|||
room_version_obj, event_dict
|
||||
)
|
||||
|
||||
event, context = await self.event_creation_handler.create_new_client_event(
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in send_membership_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
(
|
||||
event,
|
||||
context,
|
||||
) = await self.event_creation_handler.create_new_client_event(
|
||||
builder=builder
|
||||
)
|
||||
event, context = await self.add_display_name_to_third_party_invite(
|
||||
|
@ -1417,12 +1446,22 @@ class FederationHandler:
|
|||
|
||||
# We need to tell the transaction queue to send this out, even
|
||||
# though the sender isn't a local user.
|
||||
event.internal_metadata.send_on_behalf_of = get_domain_from_id(event.sender)
|
||||
event.internal_metadata.send_on_behalf_of = get_domain_from_id(
|
||||
event.sender
|
||||
)
|
||||
|
||||
# We retrieve the room member handler here as to not cause a cyclic dependency
|
||||
member_handler = self.hs.get_room_member_handler()
|
||||
await member_handler.send_membership_event(None, event, context)
|
||||
|
||||
break
|
||||
except PartialStateConflictError as e:
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
|
||||
async def add_display_name_to_third_party_invite(
|
||||
self,
|
||||
room_version_obj: RoomVersion,
|
||||
|
|
|
@ -37,7 +37,6 @@ from synapse.api.errors import (
|
|||
AuthError,
|
||||
Codes,
|
||||
ConsentNotGivenError,
|
||||
LimitExceededError,
|
||||
NotFoundError,
|
||||
ShadowBanError,
|
||||
SynapseError,
|
||||
|
@ -999,6 +998,11 @@ class EventCreationHandler:
|
|||
event.internal_metadata.stream_ordering,
|
||||
)
|
||||
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in handle_new_client_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
event, context = await self.create_event(
|
||||
requester,
|
||||
event_dict,
|
||||
|
@ -1054,6 +1058,14 @@ class EventCreationHandler:
|
|||
ignore_shadow_ban=ignore_shadow_ban,
|
||||
)
|
||||
|
||||
break
|
||||
except PartialStateConflictError as e:
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
|
||||
# we know it was persisted, so must have a stream ordering
|
||||
assert ev.internal_metadata.stream_ordering
|
||||
return ev, ev.internal_metadata.stream_ordering
|
||||
|
@ -1356,7 +1368,7 @@ class EventCreationHandler:
|
|||
|
||||
Raises:
|
||||
ShadowBanError if the requester has been shadow-banned.
|
||||
SynapseError(503) if attempting to persist a partial state event in
|
||||
PartialStateConflictError if attempting to persist a partial state event in
|
||||
a room that has been un-partial stated.
|
||||
"""
|
||||
extra_users = extra_users or []
|
||||
|
@ -1418,7 +1430,6 @@ class EventCreationHandler:
|
|||
# We now persist the event (and update the cache in parallel, since we
|
||||
# don't want to block on it).
|
||||
event, context = events_and_context[0]
|
||||
try:
|
||||
result, _ = await make_deferred_yieldable(
|
||||
gather_results(
|
||||
(
|
||||
|
@ -1431,21 +1442,11 @@ class EventCreationHandler:
|
|||
),
|
||||
run_in_background(
|
||||
self.cache_joined_hosts_for_events, events_and_context
|
||||
).addErrback(
|
||||
log_failure, "cache_joined_hosts_for_event failed"
|
||||
),
|
||||
).addErrback(log_failure, "cache_joined_hosts_for_event failed"),
|
||||
),
|
||||
consumeErrors=True,
|
||||
)
|
||||
).addErrback(unwrapFirstError)
|
||||
except PartialStateConflictError as e:
|
||||
# The event context needs to be recomputed.
|
||||
# Turn the error into a 429, as a hint to the client to try again.
|
||||
logger.info(
|
||||
"Room %s was un-partial stated while persisting client event.",
|
||||
event.room_id,
|
||||
)
|
||||
raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
|
||||
|
||||
return result
|
||||
|
||||
|
@ -2011,6 +2012,11 @@ class EventCreationHandler:
|
|||
members = await self.store.get_local_users_in_room(room_id)
|
||||
for user_id in members:
|
||||
requester = create_requester(user_id, authenticated_entity=self.server_name)
|
||||
try:
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in handle_new_client_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
event, context = await self.create_event(
|
||||
requester,
|
||||
|
@ -2032,6 +2038,14 @@ class EventCreationHandler:
|
|||
ratelimit=False,
|
||||
ignore_shadow_ban=True,
|
||||
)
|
||||
|
||||
break
|
||||
except PartialStateConflictError as e:
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
return True
|
||||
except AuthError:
|
||||
logger.info(
|
||||
|
|
|
@ -62,6 +62,7 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents
|
|||
from synapse.handlers.relations import BundledAggregations
|
||||
from synapse.module_api import NOT_SPAM
|
||||
from synapse.rest.admin._base import assert_user_is_admin
|
||||
from synapse.storage.databases.main.events import PartialStateConflictError
|
||||
from synapse.streams import EventSource
|
||||
from synapse.types import (
|
||||
JsonDict,
|
||||
|
@ -207,6 +208,11 @@ class RoomCreationHandler:
|
|||
|
||||
new_room_id = self._generate_room_id()
|
||||
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in _upgrade_room, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
# Check whether the user has the power level to carry out the upgrade.
|
||||
# `check_auth_rules_from_context` will check that they are in the room and have
|
||||
# the required power level to send the tombstone event.
|
||||
|
@ -227,7 +233,9 @@ class RoomCreationHandler:
|
|||
},
|
||||
)
|
||||
validate_event_for_room_version(tombstone_event)
|
||||
await self._event_auth_handler.check_auth_rules_from_context(tombstone_event)
|
||||
await self._event_auth_handler.check_auth_rules_from_context(
|
||||
tombstone_event
|
||||
)
|
||||
|
||||
# Upgrade the room
|
||||
#
|
||||
|
@ -247,6 +255,17 @@ class RoomCreationHandler:
|
|||
)
|
||||
|
||||
return ret
|
||||
except PartialStateConflictError as e:
|
||||
# Clean up the cache so we can retry properly
|
||||
self._upgrade_response_cache.unset((old_room_id, user_id))
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
|
||||
# This is to satisfy mypy and should never happen
|
||||
raise PartialStateConflictError()
|
||||
|
||||
async def _upgrade_room(
|
||||
self,
|
||||
|
|
|
@ -375,6 +375,8 @@ class RoomBatchHandler:
|
|||
# Events are sorted by (topological_ordering, stream_ordering)
|
||||
# where topological_ordering is just depth.
|
||||
for (event, context) in reversed(events_to_persist):
|
||||
# This call can't raise `PartialStateConflictError` since we forbid
|
||||
# use of the historical batch API during partial state
|
||||
await self.event_creation_handler.handle_new_client_event(
|
||||
await self.create_requester_for_user_id_from_app_service(
|
||||
event.sender, app_service_requester.app_service
|
||||
|
|
|
@ -34,6 +34,7 @@ from synapse.events.snapshot import EventContext
|
|||
from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN
|
||||
from synapse.logging import opentracing
|
||||
from synapse.module_api import NOT_SPAM
|
||||
from synapse.storage.databases.main.events import PartialStateConflictError
|
||||
from synapse.types import (
|
||||
JsonDict,
|
||||
Requester,
|
||||
|
@ -392,6 +393,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
|
|||
event_pos = await self.store.get_position_for_event(existing_event_id)
|
||||
return existing_event_id, event_pos.stream
|
||||
|
||||
# Try several times, it could fail with PartialStateConflictError,
|
||||
# in handle_new_client_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
event, context = await self.event_creation_handler.create_event(
|
||||
requester,
|
||||
{
|
||||
|
@ -418,12 +424,16 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
|
|||
StateFilter.from_types([(EventTypes.Member, None)])
|
||||
)
|
||||
|
||||
prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None)
|
||||
prev_member_event_id = prev_state_ids.get(
|
||||
(EventTypes.Member, user_id), None
|
||||
)
|
||||
|
||||
if event.membership == Membership.JOIN:
|
||||
newly_joined = True
|
||||
if prev_member_event_id:
|
||||
prev_member_event = await self.store.get_event(prev_member_event_id)
|
||||
prev_member_event = await self.store.get_event(
|
||||
prev_member_event_id
|
||||
)
|
||||
newly_joined = prev_member_event.membership != Membership.JOIN
|
||||
|
||||
# Only rate-limit if the user actually joined the room, otherwise we'll end
|
||||
|
@ -434,19 +444,31 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
|
|||
requester, key=room_id, update=False
|
||||
)
|
||||
with opentracing.start_active_span("handle_new_client_event"):
|
||||
result_event = await self.event_creation_handler.handle_new_client_event(
|
||||
result_event = (
|
||||
await self.event_creation_handler.handle_new_client_event(
|
||||
requester,
|
||||
events_and_context=[(event, context)],
|
||||
extra_users=[target],
|
||||
ratelimit=ratelimit,
|
||||
)
|
||||
)
|
||||
|
||||
if event.membership == Membership.LEAVE:
|
||||
if prev_member_event_id:
|
||||
prev_member_event = await self.store.get_event(prev_member_event_id)
|
||||
prev_member_event = await self.store.get_event(
|
||||
prev_member_event_id
|
||||
)
|
||||
if prev_member_event.membership == Membership.JOIN:
|
||||
await self._user_left_room(target, room_id)
|
||||
|
||||
break
|
||||
except PartialStateConflictError as e:
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
|
||||
# we know it was persisted, so should have a stream ordering
|
||||
assert result_event.internal_metadata.stream_ordering
|
||||
return result_event.event_id, result_event.internal_metadata.stream_ordering
|
||||
|
@ -1234,6 +1256,8 @@ class RoomMemberHandler(metaclass=abc.ABCMeta):
|
|||
ratelimit: Whether to rate limit this request.
|
||||
Raises:
|
||||
SynapseError if there was a problem changing the membership.
|
||||
PartialStateConflictError: if attempting to persist a partial state event in
|
||||
a room that has been un-partial stated.
|
||||
"""
|
||||
target_user = UserID.from_string(event.state_key)
|
||||
room_id = event.room_id
|
||||
|
@ -1863,6 +1887,11 @@ class RoomMemberMasterHandler(RoomMemberHandler):
|
|||
list(previous_membership_event.auth_event_ids()) + prev_event_ids
|
||||
)
|
||||
|
||||
# Try several times, it could fail with PartialStateConflictError
|
||||
# in handle_new_client_event, cf comment in except block.
|
||||
max_retries = 5
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
event, context = await self.event_creation_handler.create_event(
|
||||
requester,
|
||||
event_dict,
|
||||
|
@ -1873,11 +1902,22 @@ class RoomMemberMasterHandler(RoomMemberHandler):
|
|||
)
|
||||
event.internal_metadata.out_of_band_membership = True
|
||||
|
||||
result_event = await self.event_creation_handler.handle_new_client_event(
|
||||
result_event = (
|
||||
await self.event_creation_handler.handle_new_client_event(
|
||||
requester,
|
||||
events_and_context=[(event, context)],
|
||||
extra_users=[UserID.from_string(target_user)],
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
except PartialStateConflictError as e:
|
||||
# Persisting couldn't happen because the room got un-partial stated
|
||||
# in the meantime and context needs to be recomputed, so let's do so.
|
||||
if i == max_retries - 1:
|
||||
raise e
|
||||
pass
|
||||
|
||||
# we know it was persisted, so must have a stream ordering
|
||||
assert result_event.internal_metadata.stream_ordering
|
||||
|
||||
|
|
|
@ -167,12 +167,10 @@ class ResponseCache(Generic[KV]):
|
|||
# the should_cache bit, we leave it in the cache for now and schedule
|
||||
# its removal later.
|
||||
if self.timeout_sec and context.should_cache:
|
||||
self.clock.call_later(
|
||||
self.timeout_sec, self._result_cache.pop, key, None
|
||||
)
|
||||
self.clock.call_later(self.timeout_sec, self.unset, key)
|
||||
else:
|
||||
# otherwise, remove the result immediately.
|
||||
self._result_cache.pop(key, None)
|
||||
self.unset(key)
|
||||
return r
|
||||
|
||||
# make sure we do this *after* adding the entry to result_cache,
|
||||
|
@ -181,6 +179,14 @@ class ResponseCache(Generic[KV]):
|
|||
result.addBoth(on_complete)
|
||||
return entry
|
||||
|
||||
def unset(self, key: KV) -> None:
|
||||
"""Remove the cached value for this key from the cache, if any.
|
||||
|
||||
Args:
|
||||
key: key used to remove the cached value
|
||||
"""
|
||||
self._result_cache.pop(key, None)
|
||||
|
||||
async def wrap(
|
||||
self,
|
||||
key: KV,
|
||||
|
|
Loading…
Reference in New Issue