From 28c7b602714c8b01234f143afdf4f85c3761e232 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Wed, 20 Mar 2024 19:06:06 -0600 Subject: [PATCH] add helpful headers --- README.md | 9 ++- proxy-skeleton/app/app.py | 5 +- proxy-skeleton/app/background.py | 67 +++++++++++++-------- proxy-skeleton/app/plugins/load_balancer.py | 36 ++++++++--- 4 files changed, 82 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index a047145..2fb4b5b 100644 --- a/README.md +++ b/README.md @@ -16,4 +16,11 @@ servers without worrying about implementing anything special clientside. ## Use To start the load balancer server, navigate to `./proxy-skeleton` and run `python3 -m app`. The systemd service -`loadbalancer.service` is provided as a service example. \ No newline at end of file +`loadbalancer.service` is provided as a service example. + +## Special Headers + +The load balancer accepts special headers to control its behavior. + +- `Smartproxy-Bypass`: don't use any SmartProxy endpoints. +- `Smartproxy-Disable-BV3HI`: don't filter SmartProxy endpoints by the 503 connect error. \ No newline at end of file diff --git a/proxy-skeleton/app/app.py b/proxy-skeleton/app/app.py index e5ce389..e3cb72b 100644 --- a/proxy-skeleton/app/app.py +++ b/proxy-skeleton/app/app.py @@ -46,10 +46,13 @@ def entry_point() -> None: # NOTE: Pass plugins via *args if you define custom flags. # Currently plugins passed via **kwargs are not discovered for # custom flags by proxy.py - # # See https://github.com/abhinavsingh/proxy.py/issues/871 plugins=[ 'app.plugins.ProxyLoadBalancer', ], + disable_headers=[ + b'smartproxy-bypass', + b'smartproxy-disable-bv3hi' + ] ) as _: proxy.sleep_loop() diff --git a/proxy-skeleton/app/background.py b/proxy-skeleton/app/background.py index f363733..fc98aed 100644 --- a/proxy-skeleton/app/background.py +++ b/proxy-skeleton/app/background.py @@ -13,6 +13,8 @@ from .pid import zombie_slayer from .redis_cycle import add_backend_cycler from .smartproxy import transform_smartproxy +DEBUG_MODE = False + headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', @@ -51,6 +53,7 @@ def validate_proxies(): our_online_backends = {} smartproxy_online_backends = {} + smartproxy_broken_proxies = {} ip_addresses = set() def check_proxy(pxy): @@ -59,50 +62,66 @@ def validate_proxies(): if pxy in SMARTPROXY_POOL: smartproxy = True r = requests.get(IP_CHECKER, proxies={'http': transform_smartproxy(pxy), 'https': transform_smartproxy(pxy)}, timeout=15, headers=headers) - - # TODO: remove when fixed - for d in SMARTPROXY_BV3HI_FIX: - r2 = requests.get(d, proxies={'http': transform_smartproxy(pxy), 'https': transform_smartproxy(pxy)}, timeout=15, headers=headers) - if r2.status_code != 200: - logger.info(f'PROXY BV3HI TEST failed - {pxy} - got code {r2.status_code}') - return else: r = requests.get(IP_CHECKER, proxies={'http': pxy, 'https': pxy}, timeout=15, headers=headers) if r.status_code != 200: logger.info(f'PROXY TEST failed - {pxy} - got code {r.status_code}') return - - ip = r.text - if ip not in ip_addresses: - proxy_dict = our_online_backends if not smartproxy else smartproxy_online_backends - ip_addresses.add(ip) - proxy_dict[pxy] = ip - else: - s = ' Smartproxy ' if smartproxy else ' ' - logger.info(f'Duplicate{s}IP: {ip}') except Exception as e: logger.info(f'PROXY TEST failed - {pxy} - {e}') # ': {e.__class__.__name__}') - # traceback.print_exc() + return + + ip = r.text + if ip not in ip_addresses: + proxy_dict = our_online_backends if not smartproxy else smartproxy_online_backends + ip_addresses.add(ip) + proxy_dict[pxy] = ip + else: + s = ' Smartproxy ' if smartproxy else ' ' + logger.warning(f'Duplicate{s}IP: {ip}') + return + + # TODO: remove when fixed + try: + if smartproxy: + for d in SMARTPROXY_BV3HI_FIX: + r2 = requests.get(d, proxies={'http': transform_smartproxy(pxy), 'https': transform_smartproxy(pxy)}, timeout=15, headers=headers) + if r2.status_code != 200: + smartproxy_broken_proxies[pxy] = r.text + logger.info(f'PROXY BV3HI TEST failed - {pxy} - got code {r2.status_code}') + except Exception as e: + smartproxy_broken_proxies[pxy] = r.text + logger.info(f'PROXY BV3HI TEST failed - {pxy} - {e}') with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_PROXY_CHECKERS) as executor: executor.map(check_proxy, set(PROXY_POOL) | set(SMARTPROXY_POOL)) our_valid_proxies = list(our_online_backends.keys()) - smartproxy_valid_proxies = list(smartproxy_online_backends.keys()) + + # Remove the broken SmartProxy proxies from the working ones. + sp_all = list(smartproxy_online_backends.keys()) + smartproxy_broken_proxies = list(smartproxy_broken_proxies.keys()) + smartproxy_valid_proxies = list(set(sp_all) - set(smartproxy_broken_proxies)) + all_valid_proxies = list(set(our_valid_proxies) | set(smartproxy_valid_proxies)) + all_valid_proxies_with_broken_smartproxy = list(set(all_valid_proxies) | set(sp_all)) + if not started: random.shuffle(all_valid_proxies) random.shuffle(our_valid_proxies) started = True - add_backend_cycler('all_proxy_backends', all_valid_proxies) - add_backend_cycler('our_proxy_backends', our_valid_proxies) - if logger.level == logging.DEBUG: - logger.debug(f'Our Backends Online ({len(our_valid_proxies)}): {our_online_backends}') - logger.debug(f'Smartproxy Backends Online ({len(smartproxy_valid_proxies)}): {smartproxy_valid_proxies}') + add_backend_cycler('all_valid_proxies', all_valid_proxies) + add_backend_cycler('our_valid_proxies', our_valid_proxies) + add_backend_cycler('all_valid_proxies_with_broken_smartproxy', all_valid_proxies_with_broken_smartproxy) + + if DEBUG_MODE: + logger.info(f'Our Backends Online ({len(our_valid_proxies)}): {all_valid_proxies}') + logger.info(f'Smartproxy Backends Online ({len(smartproxy_valid_proxies)}): {smartproxy_valid_proxies}') + logger.info(f'Smartproxy Broken Backends ({len(smartproxy_broken_proxies)}): {smartproxy_broken_proxies}') else: - logger.info(f'Our Backends Online: {len(our_valid_proxies)}, Smartproxy Backends Online: {len(smartproxy_valid_proxies)}, Total: {len(our_valid_proxies) + len(smartproxy_valid_proxies)}') + logger.info(f'Our Backends Online: {len(our_valid_proxies)}, Smartproxy Backends Online: {len(smartproxy_valid_proxies)}, Smartproxy Broken Backends: {len(smartproxy_broken_proxies)}, Total Online: {len(our_valid_proxies) + len(smartproxy_valid_proxies)}') redis.set('balancer_online', 1) time.sleep(10) diff --git a/proxy-skeleton/app/plugins/load_balancer.py b/proxy-skeleton/app/plugins/load_balancer.py index 34fedca..18095c9 100644 --- a/proxy-skeleton/app/plugins/load_balancer.py +++ b/proxy-skeleton/app/plugins/load_balancer.py @@ -57,6 +57,12 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin): See :class:`~proxy.core.connection.pool.UpstreamConnectionPool` which is a work in progress for SSL cache handling. """ + # Select the proxy to use. + self._endpoint = self._select_proxy(request.host.decode(), request.has_header(b'smartproxy-bypass'), request.has_header(b'smartproxy-disable-bv3hi')) + + request.del_header(b'smartproxy-bypass') + request.del_header(b'smartproxy-disable-bv3hi') + # We don't want to send private IP requests to remote proxies try: if ipaddress.ip_address(text_(request.host)).is_private: @@ -64,9 +70,6 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin): except ValueError: pass - # Select the proxy to use. - self._endpoint = self._select_proxy(request.host.decode(), request.has_header(b'smartproxy-bypass')) - # If chosen proxy is the local instance, bypass upstream proxies assert self._endpoint.port and self._endpoint.hostname if self._endpoint.port == self.flags.port and \ @@ -153,7 +156,13 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin): self._endpoint.password, ), ) - self.upstream.queue(memoryview(request.build(for_proxy=True))) + self.upstream.queue(memoryview(request.build( + for_proxy=True, + disable_headers=[ + b'smartproxy-bypass', + b'smartproxy-disable-bv3hi' + ] + ))) return request def handle_client_data(self, raw: memoryview) -> Optional[memoryview]: @@ -202,16 +211,25 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin): log_attrs[attr] = value.decode('utf-8') logger.info(access_log_format.format_map(log_attrs)) - def _select_proxy(self, request_host: str = None, smartproxy_bypass: bool = True) -> Url: + def _select_proxy(self, request_host: str = None, smartproxy_bypass: bool = False, disable_smartproxy_bv3hi: bool = False) -> Url: online = int(self.redis.get('balancer_online')) if not online: logger.error('Server is not online!') return Url() - if request_host in BYPASS_SMARTPROXY_DOMAINS or smartproxy_bypass: - valid_backends = redis_cycle('our_proxy_backends') + if disable_smartproxy_bv3hi and smartproxy_bypass: + # Prevent undefined behavior. + logger.error('Duplicate options headers detected. Rejecting request.') + return Url() + + if not disable_smartproxy_bv3hi: + # The normal route. + if request_host in BYPASS_SMARTPROXY_DOMAINS or smartproxy_bypass: + valid_backends = redis_cycle('our_valid_proxies') + else: + valid_backends = redis_cycle('all_valid_proxies') else: - valid_backends = redis_cycle('all_proxy_backends') + valid_backends = redis_cycle('all_valid_proxies_with_broken_smartproxy') if not len(valid_backends): logger.error('No valid backends!') @@ -238,7 +256,7 @@ class ProxyLoadBalancer(TcpUpstreamConnectionHandler, HttpProxyBasePlugin): # start_time = time.time() # while not len(backends) and time.time() - start_time < 30: # wait a max of 30 seconds. # time.sleep(1) # wait for 1 second before checking again - # backends = redis_cycle('all_proxy_backends') + # backends = redis_cycle('all_valid_proxies') # if not len(backends): # logger.error('No available proxy after 30 seconds.') # return Url()