add helpful headers
This commit is contained in:
parent
c776c805e7
commit
28c7b60271
|
@ -16,4 +16,11 @@ servers without worrying about implementing anything special clientside.
|
|||
## Use
|
||||
|
||||
To start the load balancer server, navigate to `./proxy-skeleton` and run `python3 -m app`. The systemd service
|
||||
`loadbalancer.service` is provided as a service example.
|
||||
`loadbalancer.service` is provided as a service example.
|
||||
|
||||
## Special Headers
|
||||
|
||||
The load balancer accepts special headers to control its behavior.
|
||||
|
||||
- `Smartproxy-Bypass`: don't use any SmartProxy endpoints.
|
||||
- `Smartproxy-Disable-BV3HI`: don't filter SmartProxy endpoints by the 503 connect error.
|
|
@ -46,10 +46,13 @@ def entry_point() -> None:
|
|||
# NOTE: Pass plugins via *args if you define custom flags.
|
||||
# Currently plugins passed via **kwargs are not discovered for
|
||||
# custom flags by proxy.py
|
||||
#
|
||||
# See https://github.com/abhinavsingh/proxy.py/issues/871
|
||||
plugins=[
|
||||
'app.plugins.ProxyLoadBalancer',
|
||||
],
|
||||
disable_headers=[
|
||||
b'smartproxy-bypass',
|
||||
b'smartproxy-disable-bv3hi'
|
||||
]
|
||||
) as _:
|
||||
proxy.sleep_loop()
|
||||
|
|
|
@ -13,6 +13,8 @@ from .pid import zombie_slayer
|
|||
from .redis_cycle import add_backend_cycler
|
||||
from .smartproxy import transform_smartproxy
|
||||
|
||||
DEBUG_MODE = False
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
|
@ -51,6 +53,7 @@ def validate_proxies():
|
|||
|
||||
our_online_backends = {}
|
||||
smartproxy_online_backends = {}
|
||||
smartproxy_broken_proxies = {}
|
||||
ip_addresses = set()
|
||||
|
||||
def check_proxy(pxy):
|
||||
|
@ -59,50 +62,66 @@ def validate_proxies():
|
|||
if pxy in SMARTPROXY_POOL:
|
||||
smartproxy = True
|
||||
r = requests.get(IP_CHECKER, proxies={'http': transform_smartproxy(pxy), 'https': transform_smartproxy(pxy)}, timeout=15, headers=headers)
|
||||
|
||||
# TODO: remove when fixed
|
||||
for d in SMARTPROXY_BV3HI_FIX:
|
||||
r2 = requests.get(d, proxies={'http': transform_smartproxy(pxy), 'https': transform_smartproxy(pxy)}, timeout=15, headers=headers)
|
||||
if r2.status_code != 200:
|
||||
logger.info(f'PROXY BV3HI TEST failed - {pxy} - got code {r2.status_code}')
|
||||
return
|
||||
else:
|
||||
r = requests.get(IP_CHECKER, proxies={'http': pxy, 'https': pxy}, timeout=15, headers=headers)
|
||||
|
||||
if r.status_code != 200:
|
||||
logger.info(f'PROXY TEST failed - {pxy} - got code {r.status_code}')
|
||||
return
|
||||
|
||||
ip = r.text
|
||||
if ip not in ip_addresses:
|
||||
proxy_dict = our_online_backends if not smartproxy else smartproxy_online_backends
|
||||
ip_addresses.add(ip)
|
||||
proxy_dict[pxy] = ip
|
||||
else:
|
||||
s = ' Smartproxy ' if smartproxy else ' '
|
||||
logger.info(f'Duplicate{s}IP: {ip}')
|
||||
except Exception as e:
|
||||
logger.info(f'PROXY TEST failed - {pxy} - {e}') # ': {e.__class__.__name__}')
|
||||
# traceback.print_exc()
|
||||
return
|
||||
|
||||
ip = r.text
|
||||
if ip not in ip_addresses:
|
||||
proxy_dict = our_online_backends if not smartproxy else smartproxy_online_backends
|
||||
ip_addresses.add(ip)
|
||||
proxy_dict[pxy] = ip
|
||||
else:
|
||||
s = ' Smartproxy ' if smartproxy else ' '
|
||||
logger.warning(f'Duplicate{s}IP: {ip}')
|
||||
return
|
||||
|
||||
# TODO: remove when fixed
|
||||
try:
|
||||
if smartproxy:
|
||||
for d in SMARTPROXY_BV3HI_FIX:
|
||||
r2 = requests.get(d, proxies={'http': transform_smartproxy(pxy), 'https': transform_smartproxy(pxy)}, timeout=15, headers=headers)
|
||||
if r2.status_code != 200:
|
||||
smartproxy_broken_proxies[pxy] = r.text
|
||||
logger.info(f'PROXY BV3HI TEST failed - {pxy} - got code {r2.status_code}')
|
||||
except Exception as e:
|
||||
smartproxy_broken_proxies[pxy] = r.text
|
||||
logger.info(f'PROXY BV3HI TEST failed - {pxy} - {e}')
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_PROXY_CHECKERS) as executor:
|
||||
executor.map(check_proxy, set(PROXY_POOL) | set(SMARTPROXY_POOL))
|
||||
|
||||
our_valid_proxies = list(our_online_backends.keys())
|
||||
smartproxy_valid_proxies = list(smartproxy_online_backends.keys())
|
||||
|
||||
# Remove the broken SmartProxy proxies from the working ones.
|
||||
sp_all = list(smartproxy_online_backends.keys())
|
||||
smartproxy_broken_proxies = list(smartproxy_broken_proxies.keys())
|
||||
smartproxy_valid_proxies = list(set(sp_all) - set(smartproxy_broken_proxies))
|
||||
|
||||
all_valid_proxies = list(set(our_valid_proxies) | set(smartproxy_valid_proxies))
|
||||
all_valid_proxies_with_broken_smartproxy = list(set(all_valid_proxies) | set(sp_all))
|
||||
|
||||
if not started:
|
||||
random.shuffle(all_valid_proxies)
|
||||
random.shuffle(our_valid_proxies)
|
||||
started = True
|
||||
add_backend_cycler('all_proxy_backends', all_valid_proxies)
|
||||
add_backend_cycler('our_proxy_backends', our_valid_proxies)
|
||||
|
||||
if logger.level == logging.DEBUG:
|
||||
logger.debug(f'Our Backends Online ({len(our_valid_proxies)}): {our_online_backends}')
|
||||
logger.debug(f'Smartproxy Backends Online ({len(smartproxy_valid_proxies)}): {smartproxy_valid_proxies}')
|
||||
add_backend_cycler('all_valid_proxies', all_valid_proxies)
|
||||
add_backend_cycler('our_valid_proxies', our_valid_proxies)
|
||||
add_backend_cycler('all_valid_proxies_with_broken_smartproxy', all_valid_proxies_with_broken_smartproxy)
|
||||
|
||||
if DEBUG_MODE:
|
||||
logger.info(f'Our Backends Online ({len(our_valid_proxies)}): {all_valid_proxies}')
|
||||
logger.info(f'Smartproxy Backends Online ({len(smartproxy_valid_proxies)}): {smartproxy_valid_proxies}')
|
||||
logger.info(f'Smartproxy Broken Backends ({len(smartproxy_broken_proxies)}): {smartproxy_broken_proxies}')
|
||||
else:
|
||||
logger.info(f'Our Backends Online: {len(our_valid_proxies)}, Smartproxy Backends Online: {len(smartproxy_valid_proxies)}, Total: {len(our_valid_proxies) + len(smartproxy_valid_proxies)}')
|
||||
logger.info(f'Our Backends Online: {len(our_valid_proxies)}, Smartproxy Backends Online: {len(smartproxy_valid_proxies)}, Smartproxy Broken Backends: {len(smartproxy_broken_proxies)}, Total Online: {len(our_valid_proxies) + len(smartproxy_valid_proxies)}')
|
||||
|
||||
redis.set('balancer_online', 1)
|
||||
time.sleep(10)
|
||||
|
|
|
@ -57,6 +57,12 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin):
|
|||
See :class:`~proxy.core.connection.pool.UpstreamConnectionPool` which is a work
|
||||
in progress for SSL cache handling.
|
||||
"""
|
||||
# Select the proxy to use.
|
||||
self._endpoint = self._select_proxy(request.host.decode(), request.has_header(b'smartproxy-bypass'), request.has_header(b'smartproxy-disable-bv3hi'))
|
||||
|
||||
request.del_header(b'smartproxy-bypass')
|
||||
request.del_header(b'smartproxy-disable-bv3hi')
|
||||
|
||||
# We don't want to send private IP requests to remote proxies
|
||||
try:
|
||||
if ipaddress.ip_address(text_(request.host)).is_private:
|
||||
|
@ -64,9 +70,6 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin):
|
|||
except ValueError:
|
||||
pass
|
||||
|
||||
# Select the proxy to use.
|
||||
self._endpoint = self._select_proxy(request.host.decode(), request.has_header(b'smartproxy-bypass'))
|
||||
|
||||
# If chosen proxy is the local instance, bypass upstream proxies
|
||||
assert self._endpoint.port and self._endpoint.hostname
|
||||
if self._endpoint.port == self.flags.port and \
|
||||
|
@ -153,7 +156,13 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin):
|
|||
self._endpoint.password,
|
||||
),
|
||||
)
|
||||
self.upstream.queue(memoryview(request.build(for_proxy=True)))
|
||||
self.upstream.queue(memoryview(request.build(
|
||||
for_proxy=True,
|
||||
disable_headers=[
|
||||
b'smartproxy-bypass',
|
||||
b'smartproxy-disable-bv3hi'
|
||||
]
|
||||
)))
|
||||
return request
|
||||
|
||||
def handle_client_data(self, raw: memoryview) -> Optional[memoryview]:
|
||||
|
@ -202,16 +211,25 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin):
|
|||
log_attrs[attr] = value.decode('utf-8')
|
||||
logger.info(access_log_format.format_map(log_attrs))
|
||||
|
||||
def _select_proxy(self, request_host: str = None, smartproxy_bypass: bool = True) -> Url:
|
||||
def _select_proxy(self, request_host: str = None, smartproxy_bypass: bool = False, disable_smartproxy_bv3hi: bool = False) -> Url:
|
||||
online = int(self.redis.get('balancer_online'))
|
||||
if not online:
|
||||
logger.error('Server is not online!')
|
||||
return Url()
|
||||
|
||||
if request_host in BYPASS_SMARTPROXY_DOMAINS or smartproxy_bypass:
|
||||
valid_backends = redis_cycle('our_proxy_backends')
|
||||
if disable_smartproxy_bv3hi and smartproxy_bypass:
|
||||
# Prevent undefined behavior.
|
||||
logger.error('Duplicate options headers detected. Rejecting request.')
|
||||
return Url()
|
||||
|
||||
if not disable_smartproxy_bv3hi:
|
||||
# The normal route.
|
||||
if request_host in BYPASS_SMARTPROXY_DOMAINS or smartproxy_bypass:
|
||||
valid_backends = redis_cycle('our_valid_proxies')
|
||||
else:
|
||||
valid_backends = redis_cycle('all_valid_proxies')
|
||||
else:
|
||||
valid_backends = redis_cycle('all_proxy_backends')
|
||||
valid_backends = redis_cycle('all_valid_proxies_with_broken_smartproxy')
|
||||
|
||||
if not len(valid_backends):
|
||||
logger.error('No valid backends!')
|
||||
|
@ -238,7 +256,7 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin):
|
|||
# start_time = time.time()
|
||||
# while not len(backends) and time.time() - start_time < 30: # wait a max of 30 seconds.
|
||||
# time.sleep(1) # wait for 1 second before checking again
|
||||
# backends = redis_cycle('all_proxy_backends')
|
||||
# backends = redis_cycle('all_valid_proxies')
|
||||
# if not len(backends):
|
||||
# logger.error('No available proxy after 30 seconds.')
|
||||
# return Url()
|
||||
|
|
Loading…
Reference in New Issue