From 1c3f4d9ca53b3e3a5ef1849a8da68c6d8cf82938 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 14 Sep 2018 03:09:13 +1000 Subject: [PATCH 01/38] buffer? --- synapse/http/matrixfederationclient.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c49dbacd93..cdbe27eb37 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -221,7 +221,8 @@ class MatrixFederationHttpClient(object): headers=Headers(headers_dict), data=data, agent=self.agent, - reactor=self.hs.get_reactor() + reactor=self.hs.get_reactor(), + unbuffered=True ) request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor()) response = yield make_deferred_yieldable( From 63755fa4c25f120941aba24d6192815313d12ea7 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 14 Sep 2018 03:21:47 +1000 Subject: [PATCH 02/38] we do that higher up --- synapse/http/matrixfederationclient.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index cdbe27eb37..d1a6e87706 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -91,6 +91,7 @@ class MatrixFederationHttpClient(object): self.server_name = hs.hostname reactor = hs.get_reactor() pool = HTTPConnectionPool(reactor) + pool.retryAutomatically = False pool.maxPersistentPerHost = 5 pool.cachedConnectionTimeout = 2 * 60 self.agent = Agent.usingEndpointFactory( From 7c33ab76da91a747fd442f65c9bfe187a81864f7 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 14 Sep 2018 03:45:34 +1000 Subject: [PATCH 03/38] redact better --- synapse/http/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 58ef8d3ce4..a3f9e4f67c 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -38,12 +38,12 @@ def cancelled_to_request_timed_out_error(value, timeout): return value -ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') +ACCESS_TOKEN_RE = re.compile(r'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') def redact_uri(uri): """Strips access tokens from the uri replaces with """ return ACCESS_TOKEN_RE.sub( - br'\1\3', + r'\1\3', uri ) From f1a72646636c08e5e0f6dc6f583d0bffebf94c85 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 13 Sep 2018 11:51:12 -0600 Subject: [PATCH 04/38] Fix minor typo in exception --- synapse/replication/tcp/streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/replication/tcp/streams.py b/synapse/replication/tcp/streams.py index 55fe701c5c..c1e626be3f 100644 --- a/synapse/replication/tcp/streams.py +++ b/synapse/replication/tcp/streams.py @@ -196,7 +196,7 @@ class Stream(object): ) if len(rows) >= MAX_EVENTS_BEHIND: - raise Exception("stream %s has fallen behined" % (self.NAME)) + raise Exception("stream %s has fallen behind" % (self.NAME)) else: rows = yield self.update_function( from_token, current_token, From 8f08d848f53bc79cdd47f8d4025313268a16b227 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 14 Sep 2018 03:53:56 +1000 Subject: [PATCH 05/38] fix --- synapse/http/site.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index f0828c6542..18f2d37e00 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -85,7 +85,7 @@ class SynapseRequest(Request): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): - return redact_uri(self.uri) + return redact_uri(self.uri.decode('ascii')) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] From c971aa7b9d5b4d3900404a285f34dd5004a76f37 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 14 Sep 2018 03:57:02 +1000 Subject: [PATCH 06/38] fix --- synapse/http/site.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/http/site.py b/synapse/http/site.py index 18f2d37e00..e1e53e8ae5 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -85,7 +85,10 @@ class SynapseRequest(Request): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): - return redact_uri(self.uri.decode('ascii')) + uri = self.uri + if isinstance(uri, bytes): + uri = self.uri.decode('ascii') + return redact_uri(uri) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] From 984db8bb0858757884c0f742bdaa27ae044d2c85 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 13 Sep 2018 12:06:04 -0600 Subject: [PATCH 07/38] Create 3860.misc --- changelog.d/3860.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3860.misc diff --git a/changelog.d/3860.misc b/changelog.d/3860.misc new file mode 100644 index 0000000000..364239d3e3 --- /dev/null +++ b/changelog.d/3860.misc @@ -0,0 +1 @@ +Fix typo in replication stream exception. From 0a81038ea0e308d2944b7deae43060e4982b0abe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 14:39:59 +0100 Subject: [PATCH 08/38] Add in flight real time metrics for Measure blocks --- synapse/metrics/__init__.py | 110 +++++++++++++++++++++++++++++++++++- synapse/util/metrics.py | 22 ++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 550f8443f7..98333b2048 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -18,8 +18,11 @@ import gc import logging import os import platform +import threading import time +import six + import attr from prometheus_client import Counter, Gauge, Histogram from prometheus_client.core import REGISTRY, GaugeMetricFamily @@ -68,7 +71,7 @@ class LaterGauge(object): return if isinstance(calls, dict): - for k, v in calls.items(): + for k, v in six.iteritems(calls): g.add_metric(k, v) else: g.add_metric([], calls) @@ -87,6 +90,111 @@ class LaterGauge(object): all_gauges[self.name] = self +class InFlightGauge(object): + """Tracks number of things (e.g. requests, Measure blocks, etc) in flight + at any given time. + + Each InFlightGauge will create a metric called `_total` that counts + the number of in flight blocks, as well as a metrics for each item in the + given `sub_metrics` as `_` which will get updated by the + callbacks. + + Args: + name (str) + desc (str) + labels (list[str]) + sub_metrics (list[str]): A list of sub metrics that the callbacks + will update. + """ + + # TODO: Expand this to + + def __init__(self, name, desc, labels, sub_metrics): + self.name = name + self.desc = desc + self.labels = labels + self.sub_metrics = sub_metrics + + # Create a class which have the sub_metrics values as attributes, which + # default to 0 on initialization. Used to pass to registered callbacks. + self._metrics_class = attr.make_class( + "_MetricsEntry", + attrs={x: attr.ib(0) for x in sub_metrics}, + slots=True, + ) + + # Counts number of in flight blocks for a given set of label values + self._registrations = {} + + # Protects access to _registrations + self._lock = threading.Lock() + + self._register_with_collector() + + def register(self, key, callback): + """Registers that we've entered a new block with labels `key`. + + `callback` gets called each time the metrics are collected. The same + value must also be given to `unregister`. + + `callback` gets called with an object that has an attribute per + sub_metric, which should be updated with the necessary values. Note that + the metrics object is shared between all callbacks registered with the + same key. + + Note that `callback` may be called on a separate thread. + """ + with self._lock: + self._registrations.setdefault(key, set()).add(callback) + + def unregister(self, key, callback): + """Registers that we've exited a block with labels `key`. + """ + + with self._lock: + self._registrations.setdefault(key, set()).discard(callback) + + def collect(self): + """Called by prometheus client when it reads metrics. + + Note: may be called by a separate thread. + """ + in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels) + + metrics_by_key = {} + + # We copy so that we don't mutate the list while iterating + with self._lock: + keys = list(self._registrations) + + for key in keys: + with self._lock: + callbacks = set(self._registrations[key]) + + in_flight.add_metric(key, len(callbacks)) + + metrics = self._metrics_class() + metrics_by_key[key] = metrics + for callback in callbacks: + callback(metrics) + + yield in_flight + + for name in self.sub_metrics: + gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels) + for key, metrics in six.iteritems(metrics_by_key): + gauge.add_metric(key, getattr(metrics, name)) + yield gauge + + def _register_with_collector(self): + if self.name in all_gauges.keys(): + logger.warning("%s already registered, reregistering" % (self.name,)) + REGISTRY.unregister(all_gauges.pop(self.name)) + + REGISTRY.register(self) + all_gauges[self.name] = self + + # # Detailed CPU metrics # diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 97f1267380..4b4ac5f6c7 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -20,6 +20,7 @@ from prometheus_client import Counter from twisted.internet import defer +from synapse.metrics import InFlightGauge from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) @@ -45,6 +46,13 @@ block_db_txn_duration = Counter( block_db_sched_duration = Counter( "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]) +# Tracks the number of blocks currently active +in_flight = InFlightGauge( + "synapse_util_metrics_block_in_flight", "", + labels=["block_name"], + sub_metrics=["real_time_max", "real_time_sum"], +) + def measure_func(name): def wrapper(func): @@ -82,10 +90,14 @@ class Measure(object): self.start_usage = self.start_context.get_resource_usage() + in_flight.register((self.name,), self._update_in_flight) + def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: return + in_flight.unregister((self.name,), self._update_in_flight) + duration = self.clock.time() - self.start block_counter.labels(self.name).inc() @@ -120,3 +132,13 @@ class Measure(object): if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) + + def _update_in_flight(self, metrics): + """Gets called when processing in flight metrics + """ + duration = self.clock.time() - self.start + + metrics.real_time_max = max(metrics.real_time_max, duration) + metrics.real_time_sum += duration + + # TODO: Add other in flight metrics. From f6e82dcddb49a6e6118d85e54655f5adaf1eda4e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 15:08:22 +0100 Subject: [PATCH 09/38] Tests --- tests/test_metrics.py | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 tests/test_metrics.py diff --git a/tests/test_metrics.py b/tests/test_metrics.py new file mode 100644 index 0000000000..17897711a1 --- /dev/null +++ b/tests/test_metrics.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from synapse.metrics import InFlightGauge + +from tests import unittest + + +class TestMauLimit(unittest.TestCase): + def test_basic(self): + gauge = InFlightGauge( + "test1", "", + labels=["test_label"], + sub_metrics=["foo", "bar"], + ) + + def handle1(metrics): + metrics.foo += 2 + metrics.bar = max(metrics.bar, 5) + + def handle2(metrics): + metrics.foo += 3 + metrics.bar = max(metrics.bar, 7) + + gauge.register(("key1",), handle1) + + self.assert_dict({ + "test1_total": {("key1",): 1}, + "test1_foo": {("key1",): 2}, + "test1_bar": {("key1",): 5}, + }, self.get_metrics_from_gauge(gauge)) + + gauge.unregister(("key1",), handle1) + + self.assert_dict({ + "test1_total": {("key1",): 0}, + "test1_foo": {("key1",): 0}, + "test1_bar": {("key1",): 0}, + }, self.get_metrics_from_gauge(gauge)) + + gauge.register(("key1",), handle1) + gauge.register(("key2",), handle2) + + self.assert_dict({ + "test1_total": {("key1",): 1, ("key2",): 1}, + "test1_foo": {("key1",): 2, ("key2",): 3}, + "test1_bar": {("key1",): 5, ("key2",): 7}, + }, self.get_metrics_from_gauge(gauge)) + + gauge.unregister(("key2",), handle2) + gauge.register(("key1",), handle2) + + self.assert_dict({ + "test1_total": {("key1",): 2, ("key2",): 0}, + "test1_foo": {("key1",): 5, ("key2",): 0}, + "test1_bar": {("key1",): 7, ("key2",): 0}, + }, self.get_metrics_from_gauge(gauge)) + + def get_metrics_from_gauge(self, gauge): + results = {} + + for r in gauge.collect(): + results[r.name] = { + tuple(labels[x] for x in gauge.labels): value + for _, labels, value in r.samples + } + + return results From 941ac0f0856729087bd28d9243964c35147fcb7b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 14:41:16 +0100 Subject: [PATCH 10/38] Newsfile --- changelog.d/3871.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3871.misc diff --git a/changelog.d/3871.misc b/changelog.d/3871.misc new file mode 100644 index 0000000000..dd9510ceb6 --- /dev/null +++ b/changelog.d/3871.misc @@ -0,0 +1 @@ +Add in flight real time metrics for Measure blocks From 9e2f9a7b57a8c4d2238684a9ccd6145948229e2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 15:11:26 +0100 Subject: [PATCH 11/38] Measure outbound requests --- synapse/http/matrixfederationclient.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c49dbacd93..c3542b9353 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -43,6 +43,7 @@ from synapse.api.errors import ( from synapse.http.endpoint import matrix_federation_endpoint from synapse.util import logcontext from synapse.util.logcontext import make_deferred_yieldable +from synapse.util.metrics import Measure logger = logging.getLogger(__name__) outbound_logger = logging.getLogger("synapse.http.outbound") @@ -224,9 +225,11 @@ class MatrixFederationHttpClient(object): reactor=self.hs.get_reactor() ) request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor()) - response = yield make_deferred_yieldable( - request_deferred, - ) + + with Measure(self.clock, "outbound_request"): + response = yield make_deferred_yieldable( + request_deferred, + ) log_result = "%d %s" % (response.code, response.phrase,) break From d0f6c1ce2189693db40a385f9ff19bdbbc5c87cf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 15:12:36 +0100 Subject: [PATCH 12/38] Remove spurious comment --- synapse/metrics/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 98333b2048..59900aa5d1 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -107,8 +107,6 @@ class InFlightGauge(object): will update. """ - # TODO: Expand this to - def __init__(self, name, desc, labels, sub_metrics): self.name = name self.desc = desc From 90f8e606e2f6869dac20bc4481b9e8c62d32c534 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 15 Sep 2018 00:23:55 +1000 Subject: [PATCH 13/38] changelog --- changelog.d/3872.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3872.misc diff --git a/changelog.d/3872.misc b/changelog.d/3872.misc new file mode 100644 index 0000000000..b450c506d8 --- /dev/null +++ b/changelog.d/3872.misc @@ -0,0 +1 @@ +Disable buffering and automatic retrying in treq requests to prevent timeouts. \ No newline at end of file From bc9af88a2d971cd75aad419c60e13b0778640b28 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Sat, 15 Sep 2018 00:26:00 +1000 Subject: [PATCH 14/38] fix --- synapse/http/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index d60f87b04e..ec339a92ad 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -93,7 +93,7 @@ class SimpleHttpClient(object): outgoing_requests_counter.labels(method).inc() # log request but strip `access_token` (AS requests for example include this) - logger.info("Sending request %s %s", method, redact_uri(uri.encode('ascii'))) + logger.info("Sending request %s %s", method, redact_uri(uri)) try: request_deferred = treq.request( @@ -108,14 +108,14 @@ class SimpleHttpClient(object): incoming_responses_counter.labels(method, response.code).inc() logger.info( "Received response to %s %s: %s", - method, redact_uri(uri.encode('ascii')), response.code + method, redact_uri(uri), response.code ) defer.returnValue(response) except Exception as e: incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, redact_uri(uri.encode('ascii')), type(e).__name__, e.args[0] + method, redact_uri(uri), type(e).__name__, e.args[0] ) raise From 024be6cf1891fc20baac2356246595dc7f8ba5c5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 14 Sep 2018 18:12:52 +0100 Subject: [PATCH 15/38] don't filter membership events based on history visibility (#3874) don't filter membership events based on history visibility as we will already have filtered the messages in the timeline, and state events are always visible. and because @erikjohnston said so. --- changelog.d/3874.bugfix | 0 synapse/handlers/pagination.py | 9 +-------- 2 files changed, 1 insertion(+), 8 deletions(-) create mode 100644 changelog.d/3874.bugfix diff --git a/changelog.d/3874.bugfix b/changelog.d/3874.bugfix new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 5170d093e3..a155b6e938 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -269,14 +269,7 @@ class PaginationHandler(object): if state_ids: state = yield self.store.get_events(list(state_ids.values())) - - if state: - state = yield filter_events_for_client( - self.store, - user_id, - state.values(), - is_peeking=(member_event_id is None), - ) + state = state.values() time_now = self.clock.time_msec() From fcfe7a850dada2e65982f2bc805d8bc409f07512 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 19:23:07 +0100 Subject: [PATCH 16/38] Add an awful secondary timeout to fix wedged requests This is an attempt to mitigate #3842 by adding yet-another-timeout --- synapse/http/matrixfederationclient.py | 11 ++++++ synapse/util/async_helpers.py | 51 ++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index da16b5dd8c..13b19f7626 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -42,6 +42,7 @@ from synapse.api.errors import ( ) from synapse.http.endpoint import matrix_federation_endpoint from synapse.util import logcontext +from synapse.util.async_helpers import timeout_no_seriously from synapse.util.logcontext import make_deferred_yieldable from synapse.util.metrics import Measure @@ -228,6 +229,16 @@ class MatrixFederationHttpClient(object): ) request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor()) + # Sometimes the timeout above doesn't work, so lets hack yet + # another layer of timeouts in in the vain hope that at some + # point the world made sense and this really really really + # should work. + request_deferred = timeout_no_seriously( + request_deferred, + timeout=_sec_timeout * 2, + reactor=self.hs.get_reactor(), + ) + with Measure(self.clock, "outbound_request"): response = yield make_deferred_yieldable( request_deferred, diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 9b3f2f4b96..083e4f4128 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -438,3 +438,54 @@ def _cancelled_to_timed_out_error(value, timeout): value.trap(CancelledError) raise DeferredTimeoutError(timeout, "Deferred") return value + + +def timeout_no_seriously(deferred, timeout, reactor): + """The in build twisted deferred addTimeout (and the method above) + completely fail to time things out under some unknown circumstances. + + Lets try a different way of timing things out and maybe that will make + things work?! + + TODO: Kill this with fire. + """ + + new_d = defer.Deferred() + + timed_out = [False] + + def time_it_out(): + timed_out[0] = True + deferred.cancel() + + if not new_d.called: + new_d.errback(DeferredTimeoutError(timeout, "Deferred")) + + delayed_call = reactor.callLater(timeout, time_it_out) + + def convert_cancelled(value): + if timed_out[0]: + return _cancelled_to_timed_out_error(value, timeout) + return value + + deferred.addBoth(convert_cancelled) + + def cancel_timeout(result): + # stop the pending call to cancel the deferred if it's been fired + if delayed_call.active(): + delayed_call.cancel() + return result + + deferred.addBoth(cancel_timeout) + + def success_cb(val): + if not new_d.called: + new_d.callback(val) + + def failure_cb(val): + if not new_d.called: + new_d.errback(val) + + deferred.addCallbacks(success_cb, failure_cb) + + return new_d From 335b23a078b416b3ea97d1bef73da6ca5abd8d4b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 19:25:58 +0100 Subject: [PATCH 17/38] Newsfile --- changelog.d/3875.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3875.bugfix diff --git a/changelog.d/3875.bugfix b/changelog.d/3875.bugfix new file mode 100644 index 0000000000..2d2147dd4b --- /dev/null +++ b/changelog.d/3875.bugfix @@ -0,0 +1 @@ +Mitigate outbound federation randomly becoming wedged From 24efb2a70dfd8aa100507612836601991e11affc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Sat, 15 Sep 2018 11:38:39 +0100 Subject: [PATCH 18/38] Fix timeout function Turns out deferred.cancel sometimes throws, so we do that last to ensure that we always do resolve the new deferred. --- synapse/util/async_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 083e4f4128..40c2946129 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -456,11 +456,12 @@ def timeout_no_seriously(deferred, timeout, reactor): def time_it_out(): timed_out[0] = True - deferred.cancel() if not new_d.called: new_d.errback(DeferredTimeoutError(timeout, "Deferred")) + deferred.cancel() + delayed_call = reactor.callLater(timeout, time_it_out) def convert_cancelled(value): From c8642720c9baf16ac434e2cb1b88e0805f2eb00f Mon Sep 17 00:00:00 2001 From: Vincent Breitmoser Date: Sat, 15 Sep 2018 22:03:27 +0200 Subject: [PATCH 19/38] mention libjemalloc in readme (#3877) --- README.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.rst b/README.rst index 2471619706..cfcf8b5219 100644 --- a/README.rst +++ b/README.rst @@ -963,5 +963,13 @@ variable. The default is 0.5, which can be decreased to reduce RAM usage in memory constrained enviroments, or increased if performance starts to degrade. +Using `libjemalloc `_ can also yield a significant +improvement in overall amount, and especially in terms of giving back RAM +to the OS. To use it, the library must simply be put in the LD_PRELOAD +environment variable when launching Synapse. On Debian, this can be done +by installing the ``libjemalloc1`` package and adding this line to +``/etc/default/matrix-synaspse``:: + + LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.1 .. _`key_management`: https://matrix.org/docs/spec/server_server/unstable.html#retrieving-server-keys From 9d13ff4da8483e4fdd66dd16e8ad4ac299f10e43 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sat, 15 Sep 2018 21:04:10 +0100 Subject: [PATCH 20/38] missing changelog --- changelog.d/3877.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3877.misc diff --git a/changelog.d/3877.misc b/changelog.d/3877.misc new file mode 100644 index 0000000000..a80fec4bd8 --- /dev/null +++ b/changelog.d/3877.misc @@ -0,0 +1 @@ +mention jemalloc in the README From d42d79e3c3cc229fa6bb268a97edde4ac81e2df5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sat, 15 Sep 2018 22:27:41 +0100 Subject: [PATCH 21/38] don't ratelimit autojoins --- synapse/handlers/register.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 1e53f2c635..da914c46ff 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -534,4 +534,5 @@ class RegistrationHandler(BaseHandler): room_id=room_id, remote_room_hosts=remote_room_hosts, action="join", + ratelimit=False, ) From c71b93f2a4bec7d12ce905388336e897a5227b35 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sat, 15 Sep 2018 22:28:28 +0100 Subject: [PATCH 22/38] changelog --- changelog.d/3879.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3879.bugfix diff --git a/changelog.d/3879.bugfix b/changelog.d/3879.bugfix new file mode 100644 index 0000000000..82cb2a8ebc --- /dev/null +++ b/changelog.d/3879.bugfix @@ -0,0 +1 @@ +Don't ratelimit autojoins From 9c749a6b612d538a0d8374152ff358508f733734 Mon Sep 17 00:00:00 2001 From: Simon Dwyer Date: Sun, 16 Sep 2018 10:22:27 +1000 Subject: [PATCH 23/38] Added 'MAX_UPLOAD_SIZE' variable and set default to "10M" --- docker/conf/homeserver.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml index 6bc25bb45f..cfe88788f2 100644 --- a/docker/conf/homeserver.yaml +++ b/docker/conf/homeserver.yaml @@ -85,7 +85,7 @@ federation_rc_concurrent: 3 media_store_path: "/data/media" uploads_path: "/data/uploads" -max_upload_size: "10M" +max_upload_size: "{{ SYNAPSE_MAX_UPLOAD_SIZE or "10M" }}" max_image_pixels: "32M" dynamic_thumbnails: false From f472abd792c5f3d969bb16c9ad7ee278b87ee761 Mon Sep 17 00:00:00 2001 From: Simon Dwyer Date: Sun, 16 Sep 2018 10:28:29 +1000 Subject: [PATCH 24/38] Added description for "SYNAPSE_MAX_UPLOAD_SIZE" variable. --- docker/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/README.md b/docker/README.md index 038c78f7c0..ecdf8a8b17 100644 --- a/docker/README.md +++ b/docker/README.md @@ -88,6 +88,7 @@ variables are available for configuration: * ``SYNAPSE_TURN_URIS``, set this variable to the coma-separated list of TURN uris to enable TURN for this homeserver. * ``SYNAPSE_TURN_SECRET``, set this to the TURN shared secret if required. +* ``SYNAPSE_MAX_UPLOAD_SIZE``, the max upload size [default `10M`]. Shared secrets, that will be initialized to random values if not set: From da864a92c9eb8335fdd5602bcd4afe21c98cca39 Mon Sep 17 00:00:00 2001 From: Simon Dwyer Date: Sun, 16 Sep 2018 12:59:23 +1000 Subject: [PATCH 25/38] Added description for "SYNAPSE_MAX_UPLOAD_SIZE" variable. --- docker/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/README.md b/docker/README.md index ecdf8a8b17..3c00d1e948 100644 --- a/docker/README.md +++ b/docker/README.md @@ -88,7 +88,7 @@ variables are available for configuration: * ``SYNAPSE_TURN_URIS``, set this variable to the coma-separated list of TURN uris to enable TURN for this homeserver. * ``SYNAPSE_TURN_SECRET``, set this to the TURN shared secret if required. -* ``SYNAPSE_MAX_UPLOAD_SIZE``, the max upload size [default `10M`]. +* ``SYNAPSE_MAX_UPLOAD_SIZE``, set this variable to change the max upload size [default `10M`]. Shared secrets, that will be initialized to random values if not set: From 0e46ff690473538477037d5dbd2200f6863fc68e Mon Sep 17 00:00:00 2001 From: Simon Dwyer Date: Sun, 16 Sep 2018 13:33:33 +1000 Subject: [PATCH 26/38] Adding the ability to change MAX_UPLOAD_SIZE for the docker container variables. Signed-off-by: Simon Dwyer --- changelog.d/3883.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3883.feature diff --git a/changelog.d/3883.feature b/changelog.d/3883.feature new file mode 100644 index 0000000000..c11e5c5309 --- /dev/null +++ b/changelog.d/3883.feature @@ -0,0 +1 @@ +Adding the ability to change MAX_UPLOAD_SIZE for the docker container variables. \ No newline at end of file From e7b3b4d8c22952d4f090da6623fefb6c1caf0070 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 11:42:42 +0100 Subject: [PATCH 27/38] Remove nuke-room-from-db.sh script The problem with this script is that it is largely untested, entirely unmaintained, and running it is likely to make your synapse blow up in exciting ways. For example, it leaves a bunch of tables with dead values in it, like event_to_state_groups. Having it here sends a message that it is a supported part of synapse, which is absolutely not the case. --- scripts-dev/nuke-room-from-db.sh | 57 -------------------------------- 1 file changed, 57 deletions(-) delete mode 100755 scripts-dev/nuke-room-from-db.sh diff --git a/scripts-dev/nuke-room-from-db.sh b/scripts-dev/nuke-room-from-db.sh deleted file mode 100755 index c62928afdb..0000000000 --- a/scripts-dev/nuke-room-from-db.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -## CAUTION: -## This script will remove (hopefully) all trace of the given room ID from -## your homeserver.db - -## Do not run it lightly. - -set -e - -if [ "$1" == "-h" ] || [ "$1" == "" ]; then - echo "Call with ROOM_ID as first option and then pipe it into the database. So for instance you might run" - echo " nuke-room-from-db.sh | sqlite3 homeserver.db" - echo "or" - echo " nuke-room-from-db.sh | psql --dbname=synapse" - exit -fi - -ROOMID="$1" - -cat < Date: Mon, 17 Sep 2018 11:51:38 +0100 Subject: [PATCH 28/38] changelog --- changelog.d/3888.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3888.misc diff --git a/changelog.d/3888.misc b/changelog.d/3888.misc new file mode 100644 index 0000000000..a10ede547e --- /dev/null +++ b/changelog.d/3888.misc @@ -0,0 +1 @@ +Remove unmaintained "nuke-room-from-db.sh" script From 85a43f4167b5e775f7c3afe96d71137818561272 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 13:19:00 +0100 Subject: [PATCH 29/38] Return a 404 when deleting unknown room alias As per https://github.com/matrix-org/matrix-doc/issues/1675 Fixes https://github.com/matrix-org/synapse/issues/2782 --- synapse/handlers/directory.py | 19 ++++++++++++++++--- synapse/storage/directory.py | 1 - 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index ef866da1b6..c745e6740b 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -20,7 +20,14 @@ import string from twisted.internet import defer from synapse.api.constants import EventTypes -from synapse.api.errors import AuthError, CodeMessageException, Codes, SynapseError +from synapse.api.errors import ( + AuthError, + CodeMessageException, + Codes, + NotFoundError, + StoreError, + SynapseError, +) from synapse.types import RoomAlias, UserID, get_domain_from_id from ._base import BaseHandler @@ -109,7 +116,13 @@ class DirectoryHandler(BaseHandler): def delete_association(self, requester, user_id, room_alias): # association deletion for human users - can_delete = yield self._user_can_delete_alias(room_alias, user_id) + try: + can_delete = yield self._user_can_delete_alias(room_alias, user_id) + except StoreError as e: + if e.code == 404: + raise NotFoundError("Unknown room alias") + raise + if not can_delete: raise AuthError( 403, "You don't have permission to delete the alias.", @@ -320,7 +333,7 @@ class DirectoryHandler(BaseHandler): def _user_can_delete_alias(self, alias, user_id): creator = yield self.store.get_room_alias_creator(alias.to_string()) - if creator and creator == user_id: + if creator == user_id: defer.returnValue(True) is_admin = yield self.auth.is_server_admin(UserID.from_string(user_id)) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 808194236a..cfb687cb53 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -75,7 +75,6 @@ class DirectoryWorkerStore(SQLBaseStore): }, retcol="creator", desc="get_room_alias_creator", - allow_none=True ) @cached(max_entries=5000) From c1ae6b1bce61fecda91aa6ab8a51d152418cf79e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 13:21:08 +0100 Subject: [PATCH 30/38] changelog --- changelog.d/3889.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3889.bugfix diff --git a/changelog.d/3889.bugfix b/changelog.d/3889.bugfix new file mode 100644 index 0000000000..e976425987 --- /dev/null +++ b/changelog.d/3889.bugfix @@ -0,0 +1 @@ +Fix 500 error when deleting unknown room alias From c9c50284d71669ad2496c2b0af30c392428a67e5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 17 Sep 2018 15:58:47 +0100 Subject: [PATCH 31/38] README: run python_dependencies with -m ... to stop things which try to import `types` getting `synapse.types` instead --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index cfcf8b5219..0147429aed 100644 --- a/README.rst +++ b/README.rst @@ -908,7 +908,7 @@ to install using pip and a virtualenv:: virtualenv -p python2.7 env source env/bin/activate - python synapse/python_dependencies.py | xargs pip install + python -m synapse.python_dependencies | xargs pip install pip install lxml mock This will run a process of downloading and installing all the needed From f00a9d2636211e6762d8c8c627ebee3f6cea05c9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 16:15:42 +0100 Subject: [PATCH 32/38] Fix some b'abcd' noise in logs and metrics Python 3 compatibility: make sure that we decode some byte sequences before we use them to create log lines and metrics labels. --- synapse/http/matrixfederationclient.py | 5 ++++- synapse/http/site.py | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 13b19f7626..083484a687 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -244,7 +244,10 @@ class MatrixFederationHttpClient(object): request_deferred, ) - log_result = "%d %s" % (response.code, response.phrase,) + log_result = "%d %s" % ( + response.code, + response.phrase.decode('ascii', errors='replace'), + ) break except Exception as e: if not retry_on_dns_fail and isinstance(e, DNSLookupError): diff --git a/synapse/http/site.py b/synapse/http/site.py index e1e53e8ae5..2191de9836 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -82,7 +82,7 @@ class SynapseRequest(Request): ) def get_request_id(self): - return "%s-%i" % (self.method, self.request_seq) + return "%s-%i" % (self.method.decode('ascii'), self.request_seq) def get_redacted_uri(self): uri = self.uri @@ -119,7 +119,7 @@ class SynapseRequest(Request): # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics - requests_counter.labels(self.method, + requests_counter.labels(self.method.decode('ascii'), self.request_metrics.name).inc() @contextlib.contextmanager @@ -280,9 +280,9 @@ class SynapseRequest(Request): int(usage.db_txn_count), self.sentLength, code, - self.method, + self.method.decode('ascii'), self.get_redacted_uri(), - self.clientproto, + self.clientproto.decode('ascii', errors='replace'), user_agent, usage.evt_db_fetch_count, ) From 0cb7afff35dacb2b43d695536336a403b14d4b62 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 16:17:25 +0100 Subject: [PATCH 33/38] changelog --- changelog.d/3892.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3892.bugfix diff --git a/changelog.d/3892.bugfix b/changelog.d/3892.bugfix new file mode 100644 index 0000000000..8b30afab04 --- /dev/null +++ b/changelog.d/3892.bugfix @@ -0,0 +1 @@ +Fix some b'abcd' noise in logs and metrics From f75b9961c6c782d2ca4586782aa5cd2c0ae9a5b2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 16:52:02 +0100 Subject: [PATCH 34/38] Reinstate missing null check --- synapse/handlers/directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index c745e6740b..18741c5fac 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -333,7 +333,7 @@ class DirectoryHandler(BaseHandler): def _user_can_delete_alias(self, alias, user_id): creator = yield self.store.get_room_alias_creator(alias.to_string()) - if creator == user_id: + if creator is not None and creator == user_id: defer.returnValue(True) is_admin = yield self.auth.is_server_admin(UserID.from_string(user_id)) From ac80cb08fe247812b46c70843b2d5c9764cca619 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 17:16:50 +0100 Subject: [PATCH 35/38] Fix more b'abcd' noise in metrics --- synapse/http/request_metrics.py | 22 +++++++++++----------- synapse/http/site.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 72c2654678..fedb4e6b18 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -162,7 +162,7 @@ class RequestMetrics(object): with _in_flight_requests_lock: _in_flight_requests.add(self) - def stop(self, time_sec, request): + def stop(self, time_sec, response_code, sent_bytes): with _in_flight_requests_lock: _in_flight_requests.discard(self) @@ -179,35 +179,35 @@ class RequestMetrics(object): ) return - response_code = str(request.code) + response_code = str(response_code) - outgoing_responses_counter.labels(request.method, response_code).inc() + outgoing_responses_counter.labels(self.method, response_code).inc() - response_count.labels(request.method, self.name, tag).inc() + response_count.labels(self.method, self.name, tag).inc() - response_timer.labels(request.method, self.name, tag, response_code).observe( + response_timer.labels(self.method, self.name, tag, response_code).observe( time_sec - self.start ) resource_usage = context.get_resource_usage() - response_ru_utime.labels(request.method, self.name, tag).inc( + response_ru_utime.labels(self.method, self.name, tag).inc( resource_usage.ru_utime, ) - response_ru_stime.labels(request.method, self.name, tag).inc( + response_ru_stime.labels(self.method, self.name, tag).inc( resource_usage.ru_stime, ) - response_db_txn_count.labels(request.method, self.name, tag).inc( + response_db_txn_count.labels(self.method, self.name, tag).inc( resource_usage.db_txn_count ) - response_db_txn_duration.labels(request.method, self.name, tag).inc( + response_db_txn_duration.labels(self.method, self.name, tag).inc( resource_usage.db_txn_duration_sec ) - response_db_sched_duration.labels(request.method, self.name, tag).inc( + response_db_sched_duration.labels(self.method, self.name, tag).inc( resource_usage.db_sched_duration_sec ) - response_size.labels(request.method, self.name, tag).inc(request.sentLength) + response_size.labels(self.method, self.name, tag).inc(sent_bytes) # We always call this at the end to ensure that we update the metrics # regardless of whether a call to /metrics while the request was in diff --git a/synapse/http/site.py b/synapse/http/site.py index 2191de9836..9579e8cd0d 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -288,7 +288,7 @@ class SynapseRequest(Request): ) try: - self.request_metrics.stop(self.finish_time, self) + self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warn("Failed to stop metrics: %r", e) From 06f2dbbb5dbe7cf9efe66376f47008e86a180f3b Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 17:18:26 +0100 Subject: [PATCH 36/38] changelog --- changelog.d/3895.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3895.bugfix diff --git a/changelog.d/3895.bugfix b/changelog.d/3895.bugfix new file mode 100644 index 0000000000..8b30afab04 --- /dev/null +++ b/changelog.d/3895.bugfix @@ -0,0 +1 @@ +Fix some b'abcd' noise in logs and metrics From 4ecdf73fc7a9e1b8357c6f74897b33b32243dd3f Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Mon, 17 Sep 2018 22:05:23 -0500 Subject: [PATCH 37/38] Fix typo in README, synaspse -> synapse Signed-off-by: Aaron Raimist --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 0147429aed..9c0f9c09c8 100644 --- a/README.rst +++ b/README.rst @@ -968,7 +968,7 @@ improvement in overall amount, and especially in terms of giving back RAM to the OS. To use it, the library must simply be put in the LD_PRELOAD environment variable when launching Synapse. On Debian, this can be done by installing the ``libjemalloc1`` package and adding this line to -``/etc/default/matrix-synaspse``:: +``/etc/default/matrix-synapse``:: LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.1 From 505abb38f04b8b18adb0b5e5a150238f76b3e4fe Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Mon, 17 Sep 2018 22:07:19 -0500 Subject: [PATCH 38/38] Add changelog Signed-off-by: Aaron Raimist --- changelog.d/3897.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3897.misc diff --git a/changelog.d/3897.misc b/changelog.d/3897.misc new file mode 100644 index 0000000000..87e7ac796e --- /dev/null +++ b/changelog.d/3897.misc @@ -0,0 +1 @@ +Fix typo in README, synaspse -> synapse \ No newline at end of file