Properly update retry_last_ts when hitting the maximum retry interval (#16156)
* Properly update retry_last_ts when hitting the maximum retry interval This was broken in 1.87 when the maximum retry interval got changed from almost infinite to a week (and made configurable). fixes #16101 Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de> * Add changelog * Change fix + add test * Add comment --------- Signed-off-by: Nicolas Werner <nicolas.werner@hotmail.de> Co-authored-by: Mathieu Velten <mathieuv@matrix.org>
This commit is contained in:
parent
dffe095642
commit
19a1cda084
|
@ -0,0 +1 @@
|
||||||
|
Fix a bug introduced in 1.87 where synapse would send an excessive amount of federation requests to servers which have been offline for a long time. Contributed by Nico.
|
|
@ -242,6 +242,8 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
|
||||||
) -> None:
|
) -> None:
|
||||||
# Upsert retry time interval if retry_interval is zero (i.e. we're
|
# Upsert retry time interval if retry_interval is zero (i.e. we're
|
||||||
# resetting it) or greater than the existing retry interval.
|
# resetting it) or greater than the existing retry interval.
|
||||||
|
# We also upsert when the new retry interval is the same as the existing one,
|
||||||
|
# since it will be the case when `destination_max_retry_interval` is reached.
|
||||||
#
|
#
|
||||||
# WARNING: This is executed in autocommit, so we shouldn't add any more
|
# WARNING: This is executed in autocommit, so we shouldn't add any more
|
||||||
# SQL calls in here (without being very careful).
|
# SQL calls in here (without being very careful).
|
||||||
|
@ -257,7 +259,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore):
|
||||||
WHERE
|
WHERE
|
||||||
EXCLUDED.retry_interval = 0
|
EXCLUDED.retry_interval = 0
|
||||||
OR destinations.retry_interval IS NULL
|
OR destinations.retry_interval IS NULL
|
||||||
OR destinations.retry_interval < EXCLUDED.retry_interval
|
OR destinations.retry_interval <= EXCLUDED.retry_interval
|
||||||
"""
|
"""
|
||||||
|
|
||||||
txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
|
txn.execute(sql, (destination, failure_ts, retry_last_ts, retry_interval))
|
||||||
|
|
|
@ -108,3 +108,54 @@ class RetryLimiterTestCase(HomeserverTestCase):
|
||||||
|
|
||||||
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
||||||
self.assertIsNone(new_timings)
|
self.assertIsNone(new_timings)
|
||||||
|
|
||||||
|
def test_max_retry_interval(self) -> None:
|
||||||
|
"""Test that `destination_max_retry_interval` setting works as expected"""
|
||||||
|
store = self.hs.get_datastores().main
|
||||||
|
|
||||||
|
destination_max_retry_interval_ms = (
|
||||||
|
self.hs.config.federation.destination_max_retry_interval_ms
|
||||||
|
)
|
||||||
|
|
||||||
|
self.get_success(get_retry_limiter("test_dest", self.clock, store))
|
||||||
|
self.pump(1)
|
||||||
|
|
||||||
|
failure_ts = self.clock.time_msec()
|
||||||
|
|
||||||
|
# Simulate reaching destination_max_retry_interval
|
||||||
|
self.get_success(
|
||||||
|
store.set_destination_retry_timings(
|
||||||
|
"test_dest",
|
||||||
|
failure_ts=failure_ts,
|
||||||
|
retry_last_ts=failure_ts,
|
||||||
|
retry_interval=destination_max_retry_interval_ms,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check it fails
|
||||||
|
self.get_failure(
|
||||||
|
get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get past retry_interval and we can try again, and still throw an error to continue the backoff
|
||||||
|
self.reactor.advance(destination_max_retry_interval_ms / 1000 + 1)
|
||||||
|
limiter = self.get_success(get_retry_limiter("test_dest", self.clock, store))
|
||||||
|
self.pump(1)
|
||||||
|
try:
|
||||||
|
with limiter:
|
||||||
|
self.pump(1)
|
||||||
|
raise AssertionError("argh")
|
||||||
|
except AssertionError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.pump()
|
||||||
|
|
||||||
|
# retry_interval does not increase and stays at destination_max_retry_interval_ms
|
||||||
|
new_timings = self.get_success(store.get_destination_retry_timings("test_dest"))
|
||||||
|
assert new_timings is not None
|
||||||
|
self.assertEqual(new_timings.retry_interval, destination_max_retry_interval_ms)
|
||||||
|
|
||||||
|
# Check it fails
|
||||||
|
self.get_failure(
|
||||||
|
get_retry_limiter("test_dest", self.clock, store), NotRetryingDestination
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue