Add support for no_proxy and case insensitive env variables (#9372)

### Changes proposed in this PR

- Add support for the `no_proxy` and `NO_PROXY` environment variables
  - Internally rely on urllib's [`proxy_bypass_environment`](bdb941be42/Lib/urllib/request.py (L2519))
- Extract env variables using urllib's `getproxies`/[`getproxies_environment`](bdb941be42/Lib/urllib/request.py (L2488)) which supports lowercase + uppercase, preferring lowercase, except for `HTTP_PROXY` in a CGI environment

This does contain behaviour changes for consumers so making sure these are called out:
- `no_proxy`/`NO_PROXY` is now respected
- lowercase `https_proxy` is now allowed and taken over `HTTPS_PROXY`

Related to #9306 which also uses `ProxyAgent`

Signed-off-by: Timothy Leung tim95@hotmail.co.uk
This commit is contained in:
Tim Leung 2021-02-26 17:37:57 +00:00 committed by GitHub
parent 15090de850
commit ddb240293a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 121 additions and 71 deletions

1
changelog.d/9372.feature Normal file
View File

@ -0,0 +1 @@
The `no_proxy` and `NO_PROXY` environment variables are now respected in proxied HTTP clients with the lowercase form taking precedence if both are present. Additionally, the lowercase `https_proxy` environment variable is now respected in proxied HTTP clients on top of existing support for the uppercase `HTTPS_PROXY` form and takes precedence if both are present. Contributed by Timothy Leung.

View File

@ -289,8 +289,7 @@ class SimpleHttpClient:
treq_args: Dict[str, Any] = {}, treq_args: Dict[str, Any] = {},
ip_whitelist: Optional[IPSet] = None, ip_whitelist: Optional[IPSet] = None,
ip_blacklist: Optional[IPSet] = None, ip_blacklist: Optional[IPSet] = None,
http_proxy: Optional[bytes] = None, use_proxy: bool = False,
https_proxy: Optional[bytes] = None,
): ):
""" """
Args: Args:
@ -300,8 +299,8 @@ class SimpleHttpClient:
we may not request. we may not request.
ip_whitelist: The whitelisted IP addresses, that we can ip_whitelist: The whitelisted IP addresses, that we can
request if it were otherwise caught in a blacklist. request if it were otherwise caught in a blacklist.
http_proxy: proxy server to use for http connections. host[:port] use_proxy: Whether proxy settings should be discovered and used
https_proxy: proxy server to use for https connections. host[:port] from conventional environment variables.
""" """
self.hs = hs self.hs = hs
@ -345,8 +344,7 @@ class SimpleHttpClient:
connectTimeout=15, connectTimeout=15,
contextFactory=self.hs.get_http_client_context_factory(), contextFactory=self.hs.get_http_client_context_factory(),
pool=pool, pool=pool,
http_proxy=http_proxy, use_proxy=use_proxy,
https_proxy=https_proxy,
) )
if self._ip_blacklist: if self._ip_blacklist:

View File

@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
import logging import logging
import re import re
from urllib.request import getproxies_environment, proxy_bypass_environment
from zope.interface import implementer from zope.interface import implementer
@ -58,6 +59,9 @@ class ProxyAgent(_AgentBase):
pool (HTTPConnectionPool|None): connection pool to be used. If None, a pool (HTTPConnectionPool|None): connection pool to be used. If None, a
non-persistent pool instance will be created. non-persistent pool instance will be created.
use_proxy (bool): Whether proxy settings should be discovered and used
from conventional environment variables.
""" """
def __init__( def __init__(
@ -68,8 +72,7 @@ class ProxyAgent(_AgentBase):
connectTimeout=None, connectTimeout=None,
bindAddress=None, bindAddress=None,
pool=None, pool=None,
http_proxy=None, use_proxy=False,
https_proxy=None,
): ):
_AgentBase.__init__(self, reactor, pool) _AgentBase.__init__(self, reactor, pool)
@ -84,6 +87,15 @@ class ProxyAgent(_AgentBase):
if bindAddress is not None: if bindAddress is not None:
self._endpoint_kwargs["bindAddress"] = bindAddress self._endpoint_kwargs["bindAddress"] = bindAddress
http_proxy = None
https_proxy = None
no_proxy = None
if use_proxy:
proxies = getproxies_environment()
http_proxy = proxies["http"].encode() if "http" in proxies else None
https_proxy = proxies["https"].encode() if "https" in proxies else None
no_proxy = proxies["no"] if "no" in proxies else None
self.http_proxy_endpoint = _http_proxy_endpoint( self.http_proxy_endpoint = _http_proxy_endpoint(
http_proxy, self.proxy_reactor, **self._endpoint_kwargs http_proxy, self.proxy_reactor, **self._endpoint_kwargs
) )
@ -92,6 +104,8 @@ class ProxyAgent(_AgentBase):
https_proxy, self.proxy_reactor, **self._endpoint_kwargs https_proxy, self.proxy_reactor, **self._endpoint_kwargs
) )
self.no_proxy = no_proxy
self._policy_for_https = contextFactory self._policy_for_https = contextFactory
self._reactor = reactor self._reactor = reactor
@ -139,13 +153,28 @@ class ProxyAgent(_AgentBase):
pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port) pool_key = (parsed_uri.scheme, parsed_uri.host, parsed_uri.port)
request_path = parsed_uri.originForm request_path = parsed_uri.originForm
if parsed_uri.scheme == b"http" and self.http_proxy_endpoint: should_skip_proxy = False
if self.no_proxy is not None:
should_skip_proxy = proxy_bypass_environment(
parsed_uri.host.decode(),
proxies={"no": self.no_proxy},
)
if (
parsed_uri.scheme == b"http"
and self.http_proxy_endpoint
and not should_skip_proxy
):
# Cache *all* connections under the same key, since we are only # Cache *all* connections under the same key, since we are only
# connecting to a single destination, the proxy: # connecting to a single destination, the proxy:
pool_key = ("http-proxy", self.http_proxy_endpoint) pool_key = ("http-proxy", self.http_proxy_endpoint)
endpoint = self.http_proxy_endpoint endpoint = self.http_proxy_endpoint
request_path = uri request_path = uri
elif parsed_uri.scheme == b"https" and self.https_proxy_endpoint: elif (
parsed_uri.scheme == b"https"
and self.https_proxy_endpoint
and not should_skip_proxy
):
endpoint = HTTPConnectProxyEndpoint( endpoint = HTTPConnectProxyEndpoint(
self.proxy_reactor, self.proxy_reactor,
self.https_proxy_endpoint, self.https_proxy_endpoint,

View File

@ -149,8 +149,7 @@ class PreviewUrlResource(DirectServeJsonResource):
treq_args={"browser_like_redirects": True}, treq_args={"browser_like_redirects": True},
ip_whitelist=hs.config.url_preview_ip_range_whitelist, ip_whitelist=hs.config.url_preview_ip_range_whitelist,
ip_blacklist=hs.config.url_preview_ip_range_blacklist, ip_blacklist=hs.config.url_preview_ip_range_blacklist,
http_proxy=os.getenvb(b"http_proxy"), use_proxy=True,
https_proxy=os.getenvb(b"HTTPS_PROXY"),
) )
self.media_repo = media_repo self.media_repo = media_repo
self.primary_base_path = media_repo.primary_base_path self.primary_base_path = media_repo.primary_base_path

View File

@ -24,7 +24,6 @@
import abc import abc
import functools import functools
import logging import logging
import os
from typing import ( from typing import (
TYPE_CHECKING, TYPE_CHECKING,
Any, Any,
@ -370,11 +369,7 @@ class HomeServer(metaclass=abc.ABCMeta):
""" """
An HTTP client that uses configured HTTP(S) proxies. An HTTP client that uses configured HTTP(S) proxies.
""" """
return SimpleHttpClient( return SimpleHttpClient(self, use_proxy=True)
self,
http_proxy=os.getenvb(b"http_proxy"),
https_proxy=os.getenvb(b"HTTPS_PROXY"),
)
@cache_in_self @cache_in_self
def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient: def get_proxied_blacklisted_http_client(self) -> SimpleHttpClient:
@ -386,8 +381,7 @@ class HomeServer(metaclass=abc.ABCMeta):
self, self,
ip_whitelist=self.config.ip_range_whitelist, ip_whitelist=self.config.ip_range_whitelist,
ip_blacklist=self.config.ip_range_blacklist, ip_blacklist=self.config.ip_range_blacklist,
http_proxy=os.getenvb(b"http_proxy"), use_proxy=True,
https_proxy=os.getenvb(b"HTTPS_PROXY"),
) )
@cache_in_self @cache_in_self

View File

@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging import logging
import os
from unittest.mock import patch
import treq import treq
from netaddr import IPSet from netaddr import IPSet
@ -100,62 +102,36 @@ class MatrixFederationAgentTests(TestCase):
return http_protocol return http_protocol
def test_http_request(self): def _test_request_direct_connection(self, agent, scheme, hostname, path):
agent = ProxyAgent(self.reactor) """Runs a test case for a direct connection not going through a proxy.
self.reactor.lookups["test.com"] = "1.2.3.4" Args:
d = agent.request(b"GET", b"http://test.com") agent (ProxyAgent): the proxy agent being tested
scheme (bytes): expected to be either "http" or "https"
hostname (bytes): the hostname to connect to in the test
path (bytes): the path to connect to in the test
"""
is_https = scheme == b"https"
self.reactor.lookups[hostname.decode()] = "1.2.3.4"
d = agent.request(b"GET", scheme + b"://" + hostname + b"/" + path)
# there should be a pending TCP connection # there should be a pending TCP connection
clients = self.reactor.tcpClients clients = self.reactor.tcpClients
self.assertEqual(len(clients), 1) self.assertEqual(len(clients), 1)
(host, port, client_factory, _timeout, _bindAddress) = clients[0] (host, port, client_factory, _timeout, _bindAddress) = clients[0]
self.assertEqual(host, "1.2.3.4") self.assertEqual(host, "1.2.3.4")
self.assertEqual(port, 80) self.assertEqual(port, 443 if is_https else 80)
# make a test server, and wire up the client
http_server = self._make_connection(
client_factory, _get_test_protocol_factory()
)
# the FakeTransport is async, so we need to pump the reactor
self.reactor.advance(0)
# now there should be a pending request
self.assertEqual(len(http_server.requests), 1)
request = http_server.requests[0]
self.assertEqual(request.method, b"GET")
self.assertEqual(request.path, b"/")
self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"])
request.write(b"result")
request.finish()
self.reactor.advance(0)
resp = self.successResultOf(d)
body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result")
def test_https_request(self):
agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy())
self.reactor.lookups["test.com"] = "1.2.3.4"
d = agent.request(b"GET", b"https://test.com/abc")
# there should be a pending TCP connection
clients = self.reactor.tcpClients
self.assertEqual(len(clients), 1)
(host, port, client_factory, _timeout, _bindAddress) = clients[0]
self.assertEqual(host, "1.2.3.4")
self.assertEqual(port, 443)
# make a test server, and wire up the client # make a test server, and wire up the client
http_server = self._make_connection( http_server = self._make_connection(
client_factory, client_factory,
_get_test_protocol_factory(), _get_test_protocol_factory(),
ssl=True, ssl=is_https,
expected_sni=b"test.com", expected_sni=hostname if is_https else None,
) )
# the FakeTransport is async, so we need to pump the reactor # the FakeTransport is async, so we need to pump the reactor
@ -166,8 +142,8 @@ class MatrixFederationAgentTests(TestCase):
request = http_server.requests[0] request = http_server.requests[0]
self.assertEqual(request.method, b"GET") self.assertEqual(request.method, b"GET")
self.assertEqual(request.path, b"/abc") self.assertEqual(request.path, b"/" + path)
self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"test.com"]) self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [hostname])
request.write(b"result") request.write(b"result")
request.finish() request.finish()
@ -177,8 +153,58 @@ class MatrixFederationAgentTests(TestCase):
body = self.successResultOf(treq.content(resp)) body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result") self.assertEqual(body, b"result")
def test_http_request(self):
agent = ProxyAgent(self.reactor)
self._test_request_direct_connection(agent, b"http", b"test.com", b"")
def test_https_request(self):
agent = ProxyAgent(self.reactor, contextFactory=get_test_https_policy())
self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
def test_http_request_use_proxy_empty_environment(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_direct_connection(agent, b"http", b"test.com", b"")
@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "NO_PROXY": "test.com"})
def test_http_request_via_uppercase_no_proxy(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_direct_connection(agent, b"http", b"test.com", b"")
@patch.dict(
os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "test.com,unused.com"}
)
def test_http_request_via_no_proxy(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_direct_connection(agent, b"http", b"test.com", b"")
@patch.dict(
os.environ, {"https_proxy": "proxy.com", "no_proxy": "test.com,unused.com"}
)
def test_https_request_via_no_proxy(self):
agent = ProxyAgent(
self.reactor,
contextFactory=get_test_https_policy(),
use_proxy=True,
)
self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "*"})
def test_http_request_via_no_proxy_star(self):
agent = ProxyAgent(self.reactor, use_proxy=True)
self._test_request_direct_connection(agent, b"http", b"test.com", b"")
@patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "*"})
def test_https_request_via_no_proxy_star(self):
agent = ProxyAgent(
self.reactor,
contextFactory=get_test_https_policy(),
use_proxy=True,
)
self._test_request_direct_connection(agent, b"https", b"test.com", b"abc")
@patch.dict(os.environ, {"http_proxy": "proxy.com:8888", "no_proxy": "unused.com"})
def test_http_request_via_proxy(self): def test_http_request_via_proxy(self):
agent = ProxyAgent(self.reactor, http_proxy=b"proxy.com:8888") agent = ProxyAgent(self.reactor, use_proxy=True)
self.reactor.lookups["proxy.com"] = "1.2.3.5" self.reactor.lookups["proxy.com"] = "1.2.3.5"
d = agent.request(b"GET", b"http://test.com") d = agent.request(b"GET", b"http://test.com")
@ -214,11 +240,12 @@ class MatrixFederationAgentTests(TestCase):
body = self.successResultOf(treq.content(resp)) body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result") self.assertEqual(body, b"result")
@patch.dict(os.environ, {"https_proxy": "proxy.com", "no_proxy": "unused.com"})
def test_https_request_via_proxy(self): def test_https_request_via_proxy(self):
agent = ProxyAgent( agent = ProxyAgent(
self.reactor, self.reactor,
contextFactory=get_test_https_policy(), contextFactory=get_test_https_policy(),
https_proxy=b"proxy.com", use_proxy=True,
) )
self.reactor.lookups["proxy.com"] = "1.2.3.5" self.reactor.lookups["proxy.com"] = "1.2.3.5"
@ -294,6 +321,7 @@ class MatrixFederationAgentTests(TestCase):
body = self.successResultOf(treq.content(resp)) body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result") self.assertEqual(body, b"result")
@patch.dict(os.environ, {"http_proxy": "proxy.com:8888"})
def test_http_request_via_proxy_with_blacklist(self): def test_http_request_via_proxy_with_blacklist(self):
# The blacklist includes the configured proxy IP. # The blacklist includes the configured proxy IP.
agent = ProxyAgent( agent = ProxyAgent(
@ -301,7 +329,7 @@ class MatrixFederationAgentTests(TestCase):
self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"]) self.reactor, ip_whitelist=None, ip_blacklist=IPSet(["1.0.0.0/8"])
), ),
self.reactor, self.reactor,
http_proxy=b"proxy.com:8888", use_proxy=True,
) )
self.reactor.lookups["proxy.com"] = "1.2.3.5" self.reactor.lookups["proxy.com"] = "1.2.3.5"
@ -338,7 +366,8 @@ class MatrixFederationAgentTests(TestCase):
body = self.successResultOf(treq.content(resp)) body = self.successResultOf(treq.content(resp))
self.assertEqual(body, b"result") self.assertEqual(body, b"result")
def test_https_request_via_proxy_with_blacklist(self): @patch.dict(os.environ, {"HTTPS_PROXY": "proxy.com"})
def test_https_request_via_uppercase_proxy_with_blacklist(self):
# The blacklist includes the configured proxy IP. # The blacklist includes the configured proxy IP.
agent = ProxyAgent( agent = ProxyAgent(
BlacklistingReactorWrapper( BlacklistingReactorWrapper(
@ -346,7 +375,7 @@ class MatrixFederationAgentTests(TestCase):
), ),
self.reactor, self.reactor,
contextFactory=get_test_https_policy(), contextFactory=get_test_https_policy(),
https_proxy=b"proxy.com", use_proxy=True,
) )
self.reactor.lookups["proxy.com"] = "1.2.3.5" self.reactor.lookups["proxy.com"] = "1.2.3.5"