This commit is contained in:
ultrafunkamsterdam 2021-04-27 20:19:51 +02:00
parent e4e373543f
commit d99809c8c6
3 changed files with 118 additions and 168 deletions

View File

@ -15,15 +15,17 @@ from setuptools import setup
import os import os
import re import re
with open(os.path.join(os.path.abspath( with open(
os.path.dirname(__file__)), os.path.join(
'undetected_chromedriver', os.path.abspath(os.path.dirname(__file__)),
'__init__.py'), "undetected_chromedriver",
mode='r', "__init__.py",
encoding='latin1') as fp: ),
mode="r",
encoding="latin1",
) as fp:
try: try:
version = re.findall(r"^__version__ = '([^']+)'\r?$", version = re.findall(r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0]
fp.read(), re.M)[0]
except Exception: except Exception:
raise RuntimeError("unable to determine version") raise RuntimeError("unable to determine version")

View File

@ -31,7 +31,7 @@ from selenium.webdriver import Chrome as _Chrome
from selenium.webdriver import ChromeOptions as _ChromeOptions from selenium.webdriver import ChromeOptions as _ChromeOptions
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
__version__ = "2.2.6" __version__ = '2.2.6'
TARGET_VERSION = 0 TARGET_VERSION = 0

View File

@ -38,7 +38,6 @@ import logging
import os import os
import random import random
import re import re
import shutil
import string import string
import subprocess import subprocess
import sys import sys
@ -47,11 +46,12 @@ import time
import zipfile import zipfile
from distutils.version import LooseVersion from distutils.version import LooseVersion
from urllib.request import urlopen, urlretrieve from urllib.request import urlopen, urlretrieve
from selenium.webdriver.chrome.options import Options as _ChromeOptions
import selenium.webdriver.chrome.service import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver import selenium.webdriver.chrome.webdriver
import selenium.webdriver.common.service import selenium.webdriver.common.service
import selenium.webdriver.remote.webdriver import selenium.webdriver.remote.webdriver
from selenium.webdriver.chrome.options import Options as _ChromeOptions
__all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable") __all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable")
@ -61,41 +61,6 @@ logger = logging.getLogger("uc")
logger.setLevel(logging.getLogger().getEffectiveLevel()) logger.setLevel(logging.getLogger().getEffectiveLevel())
#
# def get_driver(user_data_dir=None, keep_profile=False, verbose=True, headless=False):
# """
#
# Args:
# executable_path:
# profile_path:
# keep_profile:
# verbose:
# headless:
#
# Returns:
#
# """
# log_level = 0
#
# opts = ChromeOptions()
# if user_data_dir:
# opts.add_argument('--user-data-dir=%s' % user_data_dir)
#
# if headless:
# opts.headless = True
#
# if verbose:
# logging.basicConfig(level=10)
# logger.setLevel(10)
# service_log_path = 'chrome.verbose.log'
#
# else:
# service_log_path = None
#
# return Chrome(options=opts, log_level=log_level, service_log_path=service_log_path, keep_profile=keep_profile)
def find_chrome_executable(): def find_chrome_executable():
""" """
returns the full path to the chrome _browser binary returns the full path to the chrome _browser binary
@ -131,14 +96,14 @@ def find_chrome_executable():
class Chrome(object): class Chrome(object):
__doc__ = ( __doc__ = (
"""\ """\
-------------------------------------------------------------------------- --------------------------------------------------------------------------
NOTE: NOTE:
Chrome has everything included to work out of the box. Chrome has everything included to work out of the box.
it does not `need` customizations. it does not `need` customizations.
any customizations MAY lead to trigger bot migitation systems. any customizations MAY lead to trigger bot migitation systems.
-------------------------------------------------------------------------- --------------------------------------------------------------------------
""" """
+ selenium.webdriver.remote.webdriver.WebDriver.__doc__ + selenium.webdriver.remote.webdriver.WebDriver.__doc__
) )
@ -152,44 +117,31 @@ class Chrome(object):
service_args=None, service_args=None,
desired_capabilities=None, desired_capabilities=None,
service_log_path=None, service_log_path=None,
chrome_options=None,
keep_alive=True, keep_alive=True,
keep_profile=None, keep_user_data_dir=False,
debug_addr=None,
log_level=0, log_level=0,
factor=1,
delay=2,
emulate_touch=False, emulate_touch=False,
): ):
p = Patcher.auto(executable_path=executable_path) p = Patcher.auto(executable_path=executable_path)
# p.auto(False) # p.auto(False)
self._patcher = p self._patcher = p
self.factor = factor
self.delay = delay
self.port = port self.port = port
self.process = None self.process = None
self.browser_args = None self.browser_args = None
self._rcount = 0 self._rcount = 0
self._rdiff = 10 self._rdiff = 10
self.keep_profile = keep_profile self.keep_user_data_dir = keep_user_data_dir
try: debug_port = selenium.webdriver.common.service.utils.free_port()
dbg = debug_addr.split(":") debug_host = "127.0.0.1"
debug_host, debug_port = str(dbg[0]), int(dbg[1])
except AttributeError:
debug_port = selenium.webdriver.common.service.utils.free_port()
debug_host = "127.0.0.1"
if not debug_addr:
debug_addr = f"{debug_host}:{debug_port}"
if not options: if not options:
options = selenium.webdriver.chrome.webdriver.Options() options = selenium.webdriver.chrome.webdriver.Options()
if not options.debugger_address: if not options.debugger_address:
options.debugger_address = debug_addr options.debugger_address = "%s:%d" % (debug_host, debug_port)
if not options.binary_location: if not options.binary_location:
options.binary_location = find_chrome_executable() options.binary_location = find_chrome_executable()
@ -197,24 +149,32 @@ class Chrome(object):
if not desired_capabilities: if not desired_capabilities:
desired_capabilities = options.to_capabilities() desired_capabilities = options.to_capabilities()
user_data_dir = None user_data_dir = None
for arg in options.arguments: for arg in options.arguments:
if 'user-data-dir' in arg: if "user-data-dir" in arg:
m = re.search('(?:--)?user-data-dir(?:[ =])?(.*)', arg) m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
try: try:
user_data_dir = m[1] user_data_dir = m[1]
logger.debug('user-data-dir found in user argument %s => %s' % (arg, m[1])) logger.debug(
"user-data-dir found in user argument %s => %s" % (arg, m[1])
)
self.keep_user_data_dir = True
break break
except IndexError: except IndexError:
logger.debug('no user data dir could be extracted from supplied argument %s ' % arg) logger.debug(
"no user data dir could be extracted from supplied argument %s "
% arg
)
else: else:
user_data_dir = os.path.normpath(tempfile.mkdtemp()) user_data_dir = os.path.normpath(tempfile.mkdtemp())
arg = '--user-data-dir=%s' % user_data_dir self.keep_user_data_dir = False
arg = "--user-data-dir=%s" % user_data_dir
options.add_argument(arg) options.add_argument(arg)
logger.debug('created a temporary folder in which the user-data (profile) will be stored during this\n' logger.debug(
'session, and added it to chrome startup arguments: %s' % arg) "created a temporary folder in which the user-data (profile) will be stored during this\n"
"session, and added it to chrome startup arguments: %s" % arg
)
self.user_data_dir = user_data_dir self.user_data_dir = user_data_dir
self.options = options self.options = options
@ -229,7 +189,8 @@ class Chrome(object):
options.binary_location, options.binary_location,
"--remote-debugging-host=%s" % debug_host, "--remote-debugging-host=%s" % debug_host,
"--remote-debugging-port=%s" % debug_port, "--remote-debugging-port=%s" % debug_port,
"--log-level=%d" % log_level or divmod(logging.getLogger().getEffectiveLevel(), 10)[0], "--log-level=%d" % log_level
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0],
*extra_args, *extra_args,
] ]
@ -248,7 +209,6 @@ class Chrome(object):
service_args=service_args, service_args=service_args,
desired_capabilities=desired_capabilities, desired_capabilities=desired_capabilities,
service_log_path=service_log_path, service_log_path=service_log_path,
chrome_options=chrome_options,
keep_alive=keep_alive, keep_alive=keep_alive,
) )
@ -331,71 +291,71 @@ class Chrome(object):
capabilities = self.options.to_capabilities() capabilities = self.options.to_capabilities()
self.webdriver.start_session(capabilities, browser_profile) self.webdriver.start_session(capabilities, browser_profile)
def get_in(self, url: str, delay=2, factor=1): # def get_in(self, url: str, delay=2, factor=1):
""" # """
:param url: str # :param url: str
:param delay: int # :param delay: int
:param factor: disconnect <factor> seconds after .get() # :param factor: disconnect <factor> seconds after .get()
too low will disconnect before get() fired. # too low will disconnect before get() fired.
#
================================================= # =================================================
#
In case you are being detected by some sophisticated # In case you are being detected by some sophisticated
algorithm, and you are the kind that hates losing, # algorithm, and you are the kind that hates losing,
this might be your friend. # this might be your friend.
#
this currently works for hCaptcha based systems # this currently works for hCaptcha based systems
(this includes CloudFlare!), and also passes many # (this includes CloudFlare!), and also passes many
custom setups (eg: ticketmaster.com), # custom setups (eg: ticketmaster.com),
#
#
Once you are past the first challenge, a cookie is saved # Once you are past the first challenge, a cookie is saved
which (in my tests) also worked for other sites, and lasted # which (in my tests) also worked for other sites, and lasted
my entire session! However, to play safe, i'd recommend to just # my entire session! However, to play safe, i'd recommend to just
call it once for every new site/domain you navigate to. # call it once for every new site/domain you navigate to.
#
NOTE: mileage may vary! # NOTE: mileage may vary!
bad behaviour can still be detected, and this program does not # bad behaviour can still be detected, and this program does not
magically "fix" a flagged ip. # magically "fix" a flagged ip.
#
please don't spam issues on github! first look if the issue # please don't spam issues on github! first look if the issue
is not already reported. # is not already reported.
""" # """
try: # try:
self.get(url) # self.get(url)
finally: # finally:
self.service.stop() # self.service.stop()
# threading.Timer(factor or self.factor, self.close).start() # # threading.Timer(factor or self.factor, self.close).start()
time.sleep(delay or self.delay) # time.sleep(delay or self.delay)
self.service.start() # self.service.start()
self.start_session() # self.start_session()
#
def quit(self): # def quit(self):
logger.debug("closing webdriver") # logger.debug("closing webdriver")
try: # try:
self.webdriver.quit() # self.webdriver.quit()
except Exception: # noqa # except Exception: # noqa
pass # pass
try: # try:
logger.debug("killing browser") # logger.debug("killing browser")
self.browser.kill() # self.browser.kill()
self.browser.wait(1) # self.browser.wait(1)
except TimeoutError as e: # except TimeoutError as e:
logger.debug(e, exc_info=True) # logger.debug(e, exc_info=True)
except Exception: # noqa # except Exception: # noqa
pass # pass
if not self.keep_profile or self.keep_profile is False: # if not self.keep_user_data_dir or self.keep_user_data_dir is False:
for _ in range(3): # for _ in range(3):
try: # try:
logger.debug("removing profile : %s" % self.user_data_dir) # logger.debug("removing profile : %s" % self.user_data_dir)
shutil.rmtree(self.user_data_dir, ignore_errors=False) # shutil.rmtree(self.user_data_dir, ignore_errors=False)
except FileNotFoundError: # except FileNotFoundError:
pass # pass
except PermissionError: # except PermissionError:
logger.debug("permission error. files are still in use/locked. retying...") # logger.debug("permission error. files are still in use/locked. retying...")
else: # else:
break # break
time.sleep(1) # time.sleep(1)
def __del__(self): def __del__(self):
self.quit() self.quit()
@ -414,9 +374,7 @@ class Chrome(object):
return hash(self.options.debugger_address) return hash(self.options.debugger_address)
class Patcher(object): class Patcher(object):
url_repo = "https://chromedriver.storage.googleapis.com" url_repo = "https://chromedriver.storage.googleapis.com"
zip_name = "chromedriver_%s.zip" zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s" exe_name = "chromedriver%s"
@ -442,8 +400,6 @@ class Patcher(object):
d = "~/.undetected_chromedriver" d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d)) data_path = os.path.abspath(os.path.expanduser(d))
def __init__(self, executable_path=None, force=False, version_main: int = 0): def __init__(self, executable_path=None, force=False, version_main: int = 0):
""" """
@ -465,16 +421,15 @@ class Patcher(object):
if not executable_path[-4:] == ".exe": if not executable_path[-4:] == ".exe":
executable_path += ".exe" executable_path += ".exe"
self.zip_path = os.path.join( self.zip_path = os.path.join(self.data_path, self.zip_name)
self.data_path, self.zip_name)
self.executable_path = os.path.abspath(os.path.join('.', executable_path)) self.executable_path = os.path.abspath(os.path.join(".", executable_path))
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
@classmethod @classmethod
def auto(cls, executable_path='./chromedriver', force=False): def auto(cls, executable_path="./chromedriver", force=False):
""" """
Args: Args:
@ -527,7 +482,7 @@ class Patcher(object):
def parse_exe_version(self): def parse_exe_version(self):
with io.open(self.executable_path, "rb") as f: with io.open(self.executable_path, "rb") as f:
for line in iter(lambda: f.readline(), b""): for line in iter(lambda: f.readline(), b""):
match = re.search(br"platform_handle\x00content\x00([0-9\.]*)", line) match = re.search(br"platform_handle\x00content\x00([0-9.]*)", line)
if match: if match:
return LooseVersion(match[1].decode()) return LooseVersion(match[1].decode())
@ -554,10 +509,7 @@ class Patcher(object):
except (FileNotFoundError, OSError): except (FileNotFoundError, OSError):
pass pass
os.makedirs( os.makedirs(self.data_path, mode=0o755, exist_ok=True)
self.data_path,
mode=0o755,
exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf: with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, os.path.dirname(self.executable_path)) zf.extract(self.exe_name, os.path.dirname(self.executable_path))
@ -571,8 +523,8 @@ class Patcher(object):
def force_kill_instances(exe_name): def force_kill_instances(exe_name):
""" """
kills running instances. kills running instances.
:param: executable name to kill, may be a path as well
:param self:
:return: True on success else False :return: True on success else False
""" """
exe_name = os.path.basename(exe_name) exe_name = os.path.basename(exe_name)
@ -603,7 +555,6 @@ class Patcher(object):
else: else:
return True return True
def patch_exe(self): def patch_exe(self):
""" """
Patches the ChromeDriver binary Patches the ChromeDriver binary
@ -626,12 +577,9 @@ class Patcher(object):
# class ChromeOptions(selenium.webdriver.chrome.webdriver.Options): # class ChromeOptions(selenium.webdriver.chrome.webdriver.Options):
class ChromeOptions(_ChromeOptions): class ChromeOptions(_ChromeOptions):
def add_extension_file_crx(self, extension=None): def add_extension_file_crx(self, extension=None):
if extension: if extension:
extension_to_add = os.path.abspath(os.path.expanduser(extension)) extension_to_add = os.path.abspath(os.path.expanduser(extension))
logger.debug('extension_to_add: %s' % extension_to_add) logger.debug("extension_to_add: %s" % extension_to_add)
return super().add_extension(r'%s' % extension)
return super().add_extension(r"%s" % extension)