diff --git a/README.md b/README.md index 863c54d..3ce2a9f 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ Memory usage sits at around 25M under load. 3. Edit the config. 4. Start the loadbalancer with `./proxy-loadbalancer --config [path to your config.yml]` -The load balancer supports using [curl-impersonate](https://github.com/lwthiker/curl-impersonate) to masquerade as the -Chrome browser when performing proxy checks. This is experimental. +The load balancer has experimental support for using [curl-impersonate](https://github.com/lwthiker/curl-impersonate) to masquerade as the Chrome browser when +performing proxy checks. 1. Download `*.x86_64-linux-gnu.tar.gz ` from https://github.com/lwthiker/curl-impersonate/releases 2. Set `proxy_check_impersonate_chrome: true` @@ -35,15 +35,19 @@ An example systemd service `loadbalancer.service` is provided. The server displays health, stats, info at `/json`. +Use `--log-third-party-test-failures` along with `--debug` when you want extra info on the third-party proxy tests. This +can get very noisy if you have lots of third-party proxies so it's hidden behind an extra flag. + ``` === Proxy Load Balancer === -Usage of ./proxy-loadbalancer: +Usage of /tmp/go-build1714785557/b001/exe/proxy-loadbalancer: --config [string] - Path to the config file - -d, --debug - Enable debug mode - --v Print version and exit - -h, --help Print this help message + Path to the config file + -d, --debug + Enable debug mode + -l, --log-third-party-test-failures + Log third-party test debug info + -v Print version and exit ``` ## Special Headers diff --git a/config.example.yml b/config.example.yml index 0f64d30..6c20b45 100644 --- a/config.example.yml +++ b/config.example.yml @@ -13,6 +13,10 @@ ip_checker_url: https://api.ipify.org # Connection timeout for the proxies in seconds. proxy_connect_timeout: 60 +# How many times to retry a proxy connection. +# On each retry a new proxy will be chosen. +proxy_connect_retry: 3 + # Use `curl-impersonate` to pretend to be Chrome when testing proxies. proxy_check_impersonate_chrome: false proxy_check_impersonate_chrome_binary: ./curl_chrome116 diff --git a/src/config/config.go b/src/config/config.go index 3c80dfd..bebe282 100644 --- a/src/config/config.go +++ b/src/config/config.go @@ -27,6 +27,7 @@ type Config struct { ProxyCheckInterval int ProxyCheckImpersonateChrome bool ProxyCheckImpersonateChromeBinary string + ProxyConnectRetries int } func SetConfig(configFile string) (*Config, error) { @@ -49,6 +50,7 @@ func SetConfig(configFile string) (*Config, error) { viper.SetDefault("proxy_check_interval", 60) viper.SetDefault("proxy_check_impersonate_chrome", false) viper.SetDefault("proxy_check_impersonate_chrome_binary", nil) + viper.SetDefault("proxy_connect_retries", 3) err := viper.ReadInConfig() if err != nil { @@ -69,6 +71,7 @@ func SetConfig(configFile string) (*Config, error) { ProxyCheckInterval: viper.GetInt("proxy_check_interval"), ProxyCheckImpersonateChrome: viper.GetBool("proxy_check_impersonate_chrome"), ProxyCheckImpersonateChromeBinary: viper.GetString("proxy_check_impersonate_chrome_binary"), + ProxyConnectRetries: viper.GetInt("proxy_connect_retries"), } if len(config.ProxyPoolOurs) == 0 && len(config.ProxyPoolThirdparty) == 0 { @@ -106,6 +109,10 @@ func SetConfig(configFile string) (*Config, error) { } } + if config.ProxyConnectRetries <= 0 { + return nil, errors.New("proxy_connect_retries must be greater than 0") + } + cfg = config return config, nil } diff --git a/src/config/flags.go b/src/config/flags.go new file mode 100644 index 0000000..81e0707 --- /dev/null +++ b/src/config/flags.go @@ -0,0 +1,27 @@ +package config + +import "flag" + +var CliArgs *CliConfig + +type CliConfig struct { + ConfigFile string + Debug bool + Help bool + Version bool + LogThirdPartyTest bool +} + +func ParseArgs() { + if CliArgs != nil { + panic("already defined") + } + CliArgs = &CliConfig{} + flag.StringVar(&CliArgs.ConfigFile, "config", "", "Path to the config file") + flag.BoolVar(&CliArgs.Debug, "d", false, "Enable debug mode") + flag.BoolVar(&CliArgs.Debug, "debug", false, "Enable debug mode") + flag.BoolVar(&CliArgs.Debug, "l", false, "Log third-party test debug info") + flag.BoolVar(&CliArgs.Debug, "log-third-party-test-failures", false, "Log third-party test debug info") + flag.BoolVar(&CliArgs.Version, "v", false, "Print version and exit") + flag.Parse() +} diff --git a/src/proxy-loadbalancer.go b/src/proxy-loadbalancer.go index c9a1e15..ee4dae9 100644 --- a/src/proxy-loadbalancer.go +++ b/src/proxy-loadbalancer.go @@ -13,26 +13,17 @@ import ( "runtime/debug" ) -type cliConfig struct { - configFile string - initialCrawl bool - debug bool - disableElasticSync bool - help bool - version bool -} - var Version = "development" var VersionDate = "not set" func main() { fmt.Println("=== Proxy Load Balancer ===") - cliArgs := parseArgs() - if cliArgs.help { + config.ParseArgs() + if config.CliArgs.Help { flag.Usage() os.Exit(0) } - if cliArgs.version { + if config.CliArgs.Version { buildInfo, ok := debug.ReadBuildInfo() if ok { @@ -51,7 +42,7 @@ func main() { os.Exit(0) } - if cliArgs.debug { + if config.CliArgs.Debug { logging.InitLogger(logrus.DebugLevel) } else { logging.InitLogger(logrus.InfoLevel) @@ -59,7 +50,7 @@ func main() { log := logging.GetLogger() log.Debugln("Initializing...") - if cliArgs.configFile == "" { + if config.CliArgs.ConfigFile == "" { exePath, err := os.Executable() if err != nil { panic(err) @@ -70,20 +61,23 @@ func main() { if _, err := os.Stat(filepath.Join(exeDir, "config.yaml")); err == nil { log.Fatalln("Both config.yml and config.yaml exist in the executable directory. Please specify one with the --config flag.") } - cliArgs.configFile = filepath.Join(exeDir, "config.yml") + config.CliArgs.ConfigFile = filepath.Join(exeDir, "config.yml") } else if _, err := os.Stat(filepath.Join(exeDir, "config.yaml")); err == nil { - cliArgs.configFile = filepath.Join(exeDir, "config.yaml") + config.CliArgs.ConfigFile = filepath.Join(exeDir, "config.yaml") } else { log.Fatalln("No config file found in the executable directory. Please provide one with the --config flag.") } } - configData, err := config.SetConfig(cliArgs.configFile) + configData, err := config.SetConfig(config.CliArgs.ConfigFile) if err != nil { log.Fatalf(`Failed to load config: %s`, err) } log.Debugf(`Proxy check interval: %d sec`, config.GetConfig().ProxyCheckInterval) - log.Debugf(`Using curl-impersonate binary: %s`, config.GetConfig().ProxyCheckImpersonateChromeBinary) + + if config.GetConfig().ProxyCheckImpersonateChrome { + log.Debugf(`Using curl-impersonate binary: %s`, config.GetConfig().ProxyCheckImpersonateChromeBinary) + } proxyCluster := proxy.NewForwardProxyCluster() go func() { @@ -97,13 +91,3 @@ func main() { select {} } - -func parseArgs() cliConfig { - var cliArgs cliConfig - flag.StringVar(&cliArgs.configFile, "config", "", "Path to the config file") - flag.BoolVar(&cliArgs.debug, "d", false, "Enable debug mode") - flag.BoolVar(&cliArgs.debug, "debug", false, "Enable debug mode") - flag.BoolVar(&cliArgs.version, "v", false, "Print version and exit") - flag.Parse() - return cliArgs -} diff --git a/src/proxy/handleConnect.go b/src/proxy/handleConnect.go index ad89810..6e5e94c 100644 --- a/src/proxy/handleConnect.go +++ b/src/proxy/handleConnect.go @@ -97,7 +97,6 @@ func (p *ForwardProxyCluster) proxyHttpConnect(w http.ResponseWriter, req *http. remoteAddr, _, _ := net.SplitHostPort(req.RemoteAddr) _, proxyUser, proxyPass, proxyHost, parsedProxyUrl, err := p.validateRequestAndGetProxy(w, req) if err != nil { - // Error has already been handled, just log and return. if proxyHost == "" { proxyHost = "none" } @@ -105,7 +104,6 @@ func (p *ForwardProxyCluster) proxyHttpConnect(w http.ResponseWriter, req *http. return } - // Variables for later var returnCode *int returnCode = new(int) *returnCode = -1 @@ -135,18 +133,25 @@ func (p *ForwardProxyCluster) proxyHttpConnect(w http.ResponseWriter, req *http. copyHeader(proxyReq.Header, req.Header) proxyReq.Header.Set("X-Forwarded-For", req.RemoteAddr) - resp, err := client.Do(proxyReq) - if err != nil { - *errorMsg = fmt.Sprintf(`Failed to execute %s request to "%s": %s`, req.Method, req.URL.String(), err) - http.Error(w, "failed to execute request to downstream", http.StatusServiceUnavailable) - return + for i := 0; i < config.GetConfig().ProxyConnectRetries; i++ { // Retry mechanic + resp, err := client.Do(proxyReq) + if err != nil { + *errorMsg = fmt.Sprintf(`Failed to execute %s request to "%s" - attempt %d/%d - %s`, req.Method, req.URL.String(), i+1, config.GetConfig().ProxyConnectRetries+1, err) + if i < config.GetConfig().ProxyConnectRetries-1 { + continue + } else { + http.Error(w, "failed to execute request to downstream", http.StatusServiceUnavailable) + return + } + } else { + defer resp.Body.Close() + *returnCode = resp.StatusCode + copyHeader(w.Header(), resp.Header) + w.WriteHeader(resp.StatusCode) + io.Copy(w, resp.Body) + break + } } - defer resp.Body.Close() - *returnCode = resp.StatusCode - - copyHeader(w.Header(), resp.Header) - w.WriteHeader(resp.StatusCode) - io.Copy(w, resp.Body) } func (p *ForwardProxyCluster) proxyHttpsConnect(w http.ResponseWriter, req *http.Request) { @@ -155,7 +160,6 @@ func (p *ForwardProxyCluster) proxyHttpsConnect(w http.ResponseWriter, req *http targetHost, _, _ := net.SplitHostPort(req.Host) _, proxyUser, proxyPass, proxyHost, _, err := p.validateRequestAndGetProxy(w, req) if err != nil { - // Error has already been handled, just log and return. if proxyHost == "" { proxyHost = "none" } @@ -170,12 +174,21 @@ func (p *ForwardProxyCluster) proxyHttpsConnect(w http.ResponseWriter, req *http *errorMsg = "" defer logProxyRequest(remoteAddr, proxyHost, targetHost, returnCode, "CONNECT", requestStartTime, errorMsg) - // Start a connection to the downstream proxy server. - proxyConn, err := net.DialTimeout("tcp", proxyHost, config.GetConfig().ProxyConnectTimeout) - if err != nil { - *errorMsg = fmt.Sprintf(`Failed to dial proxy %s - %s`, proxyHost, err) - http.Error(w, "failed to make request to downstream", http.StatusServiceUnavailable) - return + var proxyConn net.Conn + for i := 0; i < config.GetConfig().ProxyConnectRetries; i++ { + // Start a connection to the downstream proxy server. + proxyConn, err = net.DialTimeout("tcp", proxyHost, config.GetConfig().ProxyConnectTimeout) + if err != nil { + *errorMsg = fmt.Sprintf(`Failed to dial proxy %s - attempt %d/%d - %s`, proxyHost, i+1, config.GetConfig().ProxyConnectRetries+1, err) + if i < config.GetConfig().ProxyConnectRetries-1 { + continue + } else { + http.Error(w, "failed to make request to downstream", http.StatusServiceUnavailable) + return + } + } else { + break + } } // Proxy authentication diff --git a/src/proxy/threads.go b/src/proxy/threads.go index 6ec2362..90b7e11 100644 --- a/src/proxy/threads.go +++ b/src/proxy/threads.go @@ -10,6 +10,8 @@ import ( "time" ) +// TODO: fix 503 errors returned during proxy checking process + func (p *ForwardProxyCluster) ValidateProxiesThread() { log.Infoln("Doing initial backend check, please wait...") started := false @@ -58,10 +60,13 @@ func (p *ForwardProxyCluster) ValidateProxiesThread() { // Test the proxy. ipAddr, testErr := sendRequestThroughProxy(pxy, config.GetConfig().IpCheckerURL) if testErr != nil { - log.Debugf("Validate - %s failed: %s", proxyHost, testErr) if isThirdparty(pxy) { + if config.CliArgs.LogThirdPartyTest { + log.Debugf("Validate - %s failed: %s", proxyHost, testErr) + } newThirdpartyOfflineProxies = append(newThirdpartyOfflineProxies, pxy) } else { + log.Debugf("Validate - %s failed: %s", proxyHost, testErr) newOurOfflineProxies = append(newOurOfflineProxies, pxy) } return @@ -85,7 +90,9 @@ func (p *ForwardProxyCluster) ValidateProxiesThread() { if bv3hiErr != nil { okToAdd = false newThirdpartyBrokenProxies = append(newThirdpartyBrokenProxies, pxy) - log.Debugf(`%s failed third-party test for URL "%s" -- %s`, proxyHost, d, bv3hiErr) + if config.CliArgs.LogThirdPartyTest { + log.Debugf(`%s failed third-party test for URL "%s" -- %s`, proxyHost, d, bv3hiErr) + } break } }