From ddb240293a3d7e0a903f322088e937d7e4f3de68 Mon Sep 17 00:00:00 2001 From: Tim Leung Date: Fri, 26 Feb 2021 17:37:57 +0000 Subject: [PATCH] Add support for no_proxy and case insensitive env variables (#9372) ### Changes proposed in this PR - Add support for the `no_proxy` and `NO_PROXY` environment variables - Internally rely on urllib's [`proxy_bypass_environment`](https://github.com/python/cpython/blob/bdb941be423bde8b02a5695ccf51c303d6204bed/Lib/urllib/request.py#L2519) - Extract env variables using urllib's `getproxies`/[`getproxies_environment`](https://github.com/python/cpython/blob/bdb941be423bde8b02a5695ccf51c303d6204bed/Lib/urllib/request.py#L2488) which supports lowercase + uppercase, preferring lowercase, except for `HTTP_PROXY` in a CGI environment This does contain behaviour changes for consumers so making sure these are called out: - `no_proxy`/`NO_PROXY` is now respected - lowercase `https_proxy` is now allowed and taken over `HTTPS_PROXY` Related to #9306 which also uses `ProxyAgent` Signed-off-by: Timothy Leung tim95@hotmail.co.uk --- changelog.d/9372.feature | 1 + synapse/http/client.py | 10 +- synapse/http/proxyagent.py | 37 ++++- synapse/rest/media/v1/preview_url_resource.py | 3 +- synapse/server.py | 10 +- tests/http/test_proxyagent.py | 131 +++++++++++------- 6 files changed, 121 insertions(+), 71 deletions(-) create mode 100644 changelog.d/9372.feature diff --git a/changelog.d/9372.feature b/changelog.d/9372.feature new file mode 100644 index 0000000000..3cb01004c9 --- /dev/null +++ b/changelog.d/9372.feature @@ -0,0 +1 @@ +The `no_proxy` and `NO_PROXY` environment variables are now respected in proxied HTTP clients with the lowercase form taking precedence if both are present. Additionally, the lowercase `https_proxy` environment variable is now respected in proxied HTTP clients on top of existing support for the uppercase `HTTPS_PROXY` form and takes precedence if both are present. Contributed by Timothy Leung. diff --git a/synapse/http/client.py b/synapse/http/client.py index e54d9bd213..a910548f1e 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -289,8 +289,7 @@ class SimpleHttpClient: treq_args: Dict[str, Any] = {}, ip_whitelist: Optional[IPSet] = None, ip_blacklist: Optional[IPSet] = None, - http_proxy: Optional[bytes] = None, - https_proxy: Optional[bytes] = None, + use_proxy: bool = False, ): """ Args: @@ -300,8 +299,8 @@ class SimpleHttpClient: we may not request. ip_whitelist: The whitelisted IP addresses, that we can request if it were otherwise caught in a blacklist. - http_proxy: proxy server to use for http connections. host[:port] - https_proxy: proxy server to use for https connections. host[:port] + use_proxy: Whether proxy settings should be discovered and used + from conventional environment variables. """ self.hs = hs @@ -345,8 +344,7 @@ class SimpleHttpClient: connectTimeout=15, contextFactory=self.hs.get_http_client_context_factory(), pool=pool, - http_proxy=http_proxy, - https_proxy=https_proxy, + use_proxy=use_proxy, ) if self._ip_blacklist: diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index b730d2c634..3d553ae236 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -14,6 +14,7 @@ # limitations under the License. import logging import re +from urllib.request import getproxies_environment, proxy_bypass_environment from zope.interface import implementer @@ -58,6 +59,9 @@ class ProxyAgent(_AgentBase): pool (HTTPConnectionPool|None): connection pool to be used. If None, a non-persistent pool instance will be created. + + use_proxy (bool): Whether proxy settings should be discovered and used + from conventional environment variables. """ def __init__( @@ -68,8 +72,7 @@ class ProxyAgent(_AgentBase): connectTimeout=None, bindAddress=None, pool=None, - http_proxy=None, - https_proxy=None, + use_proxy=False, ): _AgentBase.__init__(self, reactor, pool) @@ -84,6 +87,15 @@ class ProxyAgent(_AgentBase): if bindAddress is not None: self._endpoint_kwargs["bindAddress"] = bindAddress + http_proxy = None + https_proxy = None + no_proxy = None + if use_proxy: + proxies = getproxies_environment() + http_proxy = proxies["http"].encode() if "http" in proxies else None + https_proxy = proxies["https"].encode() if "https" in proxies else None + no_proxy = proxies["no"] if "no" in proxies else None + self.http_proxy_endpoint = _http_proxy_endpoint( http_proxy, self.proxy_reactor, **self._endpoint_kwargs ) @@ -92,6 +104,8 @@ class ProxyAgent(_AgentBase): https_proxy, self.proxy_reactor, **self._endpoint_kwargs ) + self.no_proxy = no_proxy + self._policy_for_https = contextFactory self._reactor = reactor @@ -139,13 +153,28 @@ class ProxyAgent(_AgentBase): pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port) request_path = parsed_uri.originForm - if parsed_uri.scheme == b"http" and self.http_proxy_endpoint: + should_skip_proxy = False + if self.no_proxy is not None: + should_skip_proxy = proxy_bypass_environment( + parsed_uri.host.decode(), + proxies={"no": self.no_proxy}, + ) + + if ( + parsed_uri.scheme == b"http" + and self.http_proxy_endpoint + and not should_skip_proxy + ): # Cache *all* connections under the same key, since we are only # connecting to a single destination, the proxy: pool_key = ("http-proxy", self.http_proxy_endpoint) endpoint = self.http_proxy_endpoint request_path = uri - elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint: + elif ( + parsed_uri.scheme == b"https" + and self.https_proxy_endpoint + and not should_skip_proxy + ): endpoint = HTTPConnectProxyEndpoint( self.proxy_reactor, self.https_proxy_endpoint, diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6104ef4e46..89dc6b1c98 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -149,8 +149,7 @@ class PreviewUrlResource(DirectServeJsonResource): treq_args={"browser_like_redirects": True}, ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_blacklist=hs.config.url_preview_ip_range_blacklist, - http_proxy=os.getenvb(b"http_proxy"), - https_proxy=os.getenvb(b"HTTPS_PROXY"), + use_proxy=True, ) self.media_repo = media_repo self.primary_base_path = media_repo.primary_base_path diff --git a/synapse/server.py b/synapse/server.py index 4b9ec7f0ae..1d4370e0ba 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -24,7 +24,6 @@ import abc import functools import logging -import os from typing import ( TYPE_CHECKING, Any, @@ -370,11 +369,7 @@ class HomeServer(metaclass=abc.ABCMeta): """ An HTTP client that uses configured HTTP(S) proxies. """ - return SimpleHttpClient( - self, - http_proxy=os.getenvb(b"http_proxy"), - https_proxy=os.getenvb(b"HTTPS_PROXY"), - ) + return SimpleHttpClient(self, use_proxy=True) @cache_in_self def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: @@ -386,8 +381,7 @@ class HomeServer(metaclass=abc.ABCMeta): self, ip_whitelist=self.config.ip_range_whitelist, ip_blacklist=self.config.ip_range_blacklist, - http_proxy=os.getenvb(b"http_proxy"), - https_proxy=os.getenvb(b"HTTPS_PROXY"), + use_proxy=True, ) @cache_in_self diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index 9a56e1c14a..505ffcd300 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import os +from unittest.mock import patch import treq from netaddr import IPSet @@ -100,62 +102,36 @@ class MatrixFederationAgentTests(TestCase): return http_protocol - def test_http_request(self): - agent = ProxyAgent(self.reactor) + def _test_request_direct_connection(self, agent, scheme, hostname, path): + """Runs a test case for a direct connection not going through a proxy. - self.reactor.lookups["test.com"] = "1.2.3.4" - d = agent.request(b"GET", b"http://test.com") + Args: + agent (ProxyAgent): the proxy agent being tested + + scheme (bytes): expected to be either "http" or "https" + + hostname (bytes): the hostname to connect to in the test + + path (bytes): the path to connect to in the test + """ + is_https = scheme == b"https" + + self.reactor.lookups[hostname.decode()] = "1.2.3.4" + d = agent.request(b"GET", scheme + b"://" + hostname + b"/" + path) # there should be a pending TCP connection clients = self.reactor.tcpClients self.assertEqual(len(clients), 1) (host, port, client_factory, _timeout, _bindAddress) = clients[0] self.assertEqual(host, "1.2.3.4") - self.assertEqual(port, 80) - - # make a test server, and wire up the client - http_server = self._make_connection( - client_factory, _get_test_protocol_factory() - ) - - # the FakeTransport is async, so we need to pump the reactor - self.reactor.advance(0) - - # now there should be a pending request - self.assertEqual(len(http_server.requests), 1) - - request = http_server.requests[0] - self.assertEqual(request.method, b"GET") - self.assertEqual(request.path, b"/") - self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) - request.write(b"result") - request.finish() - - self.reactor.advance(0) - - resp = self.successResultOf(d) - body = self.successResultOf(treq.content(resp)) - self.assertEqual(body, b"result") - - def test_https_request(self): - agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy()) - - self.reactor.lookups["test.com"] = "1.2.3.4" - d = agent.request(b"GET", b"https://test.com/abc") - - # there should be a pending TCP connection - clients = self.reactor.tcpClients - self.assertEqual(len(clients), 1) - (host, port, client_factory, _timeout, _bindAddress) = clients[0] - self.assertEqual(host, "1.2.3.4") - self.assertEqual(port, 443) + self.assertEqual(port, 443 if is_https else 80) # make a test server, and wire up the client http_server = self._make_connection( client_factory, _get_test_protocol_factory(), - ssl=True, - expected_sni=b"test.com", + ssl=is_https, + expected_sni=hostname if is_https else None, ) # the FakeTransport is async, so we need to pump the reactor @@ -166,8 +142,8 @@ class MatrixFederationAgentTests(TestCase): request = http_server.requests[0] self.assertEqual(request.method, b"GET") - self.assertEqual(request.path, b"/abc") - self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) + self.assertEqual(request.path, b"/" + path) + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [hostname]) request.write(b"result") request.finish() @@ -177,8 +153,58 @@ class MatrixFederationAgentTests(TestCase): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + def test_http_request(self): + agent = ProxyAgent(self.reactor) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") + + def test_https_request(self): + agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy()) + self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") + + def test_http_request_use_proxy_empty_environment(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") + + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"}) + def test_http_request_via_uppercase_no_proxy(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") + + @patch.dict( + os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"} + ) + def test_http_request_via_no_proxy(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") + + @patch.dict( + os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"} + ) + def test_https_request_via_no_proxy(self): + agent = ProxyAgent( + self.reactor, + contextFactory=get_test_https_policy(), + use_proxy=True, + ) + self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") + + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"}) + def test_http_request_via_no_proxy_star(self): + agent = ProxyAgent(self.reactor, use_proxy=True) + self._test_request_direct_connection(agent, b"http", b"test.com", b"") + + @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"}) + def test_https_request_via_no_proxy_star(self): + agent = ProxyAgent( + self.reactor, + contextFactory=get_test_https_policy(), + use_proxy=True, + ) + self._test_request_direct_connection(agent, b"https", b"test.com", b"abc") + + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"}) def test_http_request_via_proxy(self): - agent = ProxyAgent(self.reactor, http_proxy=b"proxy.com:8888") + agent = ProxyAgent(self.reactor, use_proxy=True) self.reactor.lookups["proxy.com"] = "1.2.3.5" d = agent.request(b"GET", b"http://test.com") @@ -214,11 +240,12 @@ class MatrixFederationAgentTests(TestCase): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + @patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"}) def test_https_request_via_proxy(self): agent = ProxyAgent( self.reactor, contextFactory=get_test_https_policy(), - https_proxy=b"proxy.com", + use_proxy=True, ) self.reactor.lookups["proxy.com"] = "1.2.3.5" @@ -294,6 +321,7 @@ class MatrixFederationAgentTests(TestCase): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") + @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) def test_http_request_via_proxy_with_blacklist(self): # The blacklist includes the configured proxy IP. agent = ProxyAgent( @@ -301,7 +329,7 @@ class MatrixFederationAgentTests(TestCase): self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) ), self.reactor, - http_proxy=b"proxy.com:8888", + use_proxy=True, ) self.reactor.lookups["proxy.com"] = "1.2.3.5" @@ -338,7 +366,8 @@ class MatrixFederationAgentTests(TestCase): body = self.successResultOf(treq.content(resp)) self.assertEqual(body, b"result") - def test_https_request_via_proxy_with_blacklist(self): + @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"}) + def test_https_request_via_uppercase_proxy_with_blacklist(self): # The blacklist includes the configured proxy IP. agent = ProxyAgent( BlacklistingReactorWrapper( @@ -346,7 +375,7 @@ class MatrixFederationAgentTests(TestCase): ), self.reactor, contextFactory=get_test_https_policy(), - https_proxy=b"proxy.com", + use_proxy=True, ) self.reactor.lookups["proxy.com"] = "1.2.3.5"