From c9326140dc9f8ea849356b5a8397e468636df8d4 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Fri, 14 Apr 2023 15:46:04 -0500 Subject: [PATCH] Refactor `SimpleHttpClient` to pull out reusable methods (#15427) Pulls out some methods to `BaseHttpClient` to eventually be reused in other contexts. --- changelog.d/15427.misc | 1 + synapse/http/client.py | 132 ++++++++++++++++++++++++----------------- 2 files changed, 77 insertions(+), 56 deletions(-) create mode 100644 changelog.d/15427.misc diff --git a/changelog.d/15427.misc b/changelog.d/15427.misc new file mode 100644 index 0000000000..ef873e3b2b --- /dev/null +++ b/changelog.d/15427.misc @@ -0,0 +1 @@ +Refactor `SimpleHttpClient` to pull out a base class. diff --git a/synapse/http/client.py b/synapse/http/client.py index b5cf8123ce..91fe474f36 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -312,35 +312,27 @@ class BlacklistingAgentWrapper(Agent): ) -class SimpleHttpClient: +class BaseHttpClient: """ A simple, no-frills HTTP client with methods that wrap up common ways of - using HTTP in Matrix + using HTTP in Matrix. Does not come with a default Agent, subclasses will need to + define their own. + + Args: + hs: The HomeServer instance to pass in + treq_args: Extra keyword arguments to be given to treq.request. """ + agent: IAgent + def __init__( self, hs: "HomeServer", treq_args: Optional[Dict[str, Any]] = None, - ip_whitelist: Optional[IPSet] = None, - ip_blacklist: Optional[IPSet] = None, - use_proxy: bool = False, ): - """ - Args: - hs - treq_args: Extra keyword arguments to be given to treq.request. - ip_blacklist: The IP addresses that are blacklisted that - we may not request. - ip_whitelist: The whitelisted IP addresses, that we can - request if it were otherwise caught in a blacklist. - use_proxy: Whether proxy settings should be discovered and used - from conventional environment variables. - """ self.hs = hs + self.reactor = hs.get_reactor() - self._ip_whitelist = ip_whitelist - self._ip_blacklist = ip_blacklist self._extra_treq_args = treq_args or {} self.clock = hs.get_clock() @@ -356,44 +348,6 @@ class SimpleHttpClient: # reactor. self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor())) - if self._ip_blacklist: - # If we have an IP blacklist, we need to use a DNS resolver which - # filters out blacklisted IP addresses, to prevent DNS rebinding. - self.reactor: ISynapseReactor = BlacklistingReactorWrapper( - hs.get_reactor(), self._ip_whitelist, self._ip_blacklist - ) - else: - self.reactor = hs.get_reactor() - - # the pusher makes lots of concurrent SSL connections to sygnal, and - # tends to do so in batches, so we need to allow the pool to keep - # lots of idle connections around. - pool = HTTPConnectionPool(self.reactor) - # XXX: The justification for using the cache factor here is that larger instances - # will need both more cache and more connections. - # Still, this should probably be a separate dial - pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5) - pool.cachedConnectionTimeout = 2 * 60 - - self.agent: IAgent = ProxyAgent( - self.reactor, - hs.get_reactor(), - connectTimeout=15, - contextFactory=self.hs.get_http_client_context_factory(), - pool=pool, - use_proxy=use_proxy, - ) - - if self._ip_blacklist: - # If we have an IP blacklist, we then install the blacklisting Agent - # which prevents direct access to IP addresses, that are not caught - # by the DNS resolution. - self.agent = BlacklistingAgentWrapper( - self.agent, - ip_blacklist=self._ip_blacklist, - ip_whitelist=self._ip_whitelist, - ) - async def request( self, method: str, @@ -799,6 +753,72 @@ class SimpleHttpClient: ) +class SimpleHttpClient(BaseHttpClient): + """ + An HTTP client capable of crossing a proxy and respecting a block/allow list. + + This also configures a larger / longer lasting HTTP connection pool. + + Args: + hs: The HomeServer instance to pass in + treq_args: Extra keyword arguments to be given to treq.request. + ip_blacklist: The IP addresses that are blacklisted that + we may not request. + ip_whitelist: The whitelisted IP addresses, that we can + request if it were otherwise caught in a blacklist. + use_proxy: Whether proxy settings should be discovered and used + from conventional environment variables. + """ + + def __init__( + self, + hs: "HomeServer", + treq_args: Optional[Dict[str, Any]] = None, + ip_whitelist: Optional[IPSet] = None, + ip_blacklist: Optional[IPSet] = None, + use_proxy: bool = False, + ): + super().__init__(hs, treq_args=treq_args) + self._ip_whitelist = ip_whitelist + self._ip_blacklist = ip_blacklist + + if self._ip_blacklist: + # If we have an IP blacklist, we need to use a DNS resolver which + # filters out blacklisted IP addresses, to prevent DNS rebinding. + self.reactor: ISynapseReactor = BlacklistingReactorWrapper( + self.reactor, self._ip_whitelist, self._ip_blacklist + ) + + # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to + # do so in batches, so we need to allow the pool to keep lots of idle + # connections around. + pool = HTTPConnectionPool(self.reactor) + # XXX: The justification for using the cache factor here is that larger + # instances will need both more cache and more connections. + # Still, this should probably be a separate dial + pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5) + pool.cachedConnectionTimeout = 2 * 60 + + self.agent: IAgent = ProxyAgent( + self.reactor, + hs.get_reactor(), + connectTimeout=15, + contextFactory=self.hs.get_http_client_context_factory(), + pool=pool, + use_proxy=use_proxy, + ) + + if self._ip_blacklist: + # If we have an IP blacklist, we then install the blacklisting Agent + # which prevents direct access to IP addresses, that are not caught + # by the DNS resolution. + self.agent = BlacklistingAgentWrapper( + self.agent, + ip_blacklist=self._ip_blacklist, + ip_whitelist=self._ip_whitelist, + ) + + def _timeout_to_request_timed_out_error(f: Failure) -> Failure: if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError): # The TCP connection has its own timeout (set by the 'connectTimeout' param