tidy up
This commit is contained in:
parent
e4e373543f
commit
d99809c8c6
18
setup.py
18
setup.py
|
@ -15,15 +15,17 @@ from setuptools import setup
|
|||
import os
|
||||
import re
|
||||
|
||||
with open(os.path.join(os.path.abspath(
|
||||
os.path.dirname(__file__)),
|
||||
'undetected_chromedriver',
|
||||
'__init__.py'),
|
||||
mode='r',
|
||||
encoding='latin1') as fp:
|
||||
with open(
|
||||
os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"undetected_chromedriver",
|
||||
"__init__.py",
|
||||
),
|
||||
mode="r",
|
||||
encoding="latin1",
|
||||
) as fp:
|
||||
try:
|
||||
version = re.findall(r"^__version__ = '([^']+)'\r?$",
|
||||
fp.read(), re.M)[0]
|
||||
version = re.findall(r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0]
|
||||
except Exception:
|
||||
raise RuntimeError("unable to determine version")
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ from selenium.webdriver import Chrome as _Chrome
|
|||
from selenium.webdriver import ChromeOptions as _ChromeOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
__version__ = "2.2.6"
|
||||
__version__ = '2.2.6'
|
||||
|
||||
TARGET_VERSION = 0
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@ import logging
|
|||
import os
|
||||
import random
|
||||
import re
|
||||
import shutil
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -47,11 +46,12 @@ import time
|
|||
import zipfile
|
||||
from distutils.version import LooseVersion
|
||||
from urllib.request import urlopen, urlretrieve
|
||||
from selenium.webdriver.chrome.options import Options as _ChromeOptions
|
||||
|
||||
import selenium.webdriver.chrome.service
|
||||
import selenium.webdriver.chrome.webdriver
|
||||
import selenium.webdriver.common.service
|
||||
import selenium.webdriver.remote.webdriver
|
||||
from selenium.webdriver.chrome.options import Options as _ChromeOptions
|
||||
|
||||
__all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable")
|
||||
|
||||
|
@ -61,41 +61,6 @@ logger = logging.getLogger("uc")
|
|||
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
||||
|
||||
|
||||
#
|
||||
# def get_driver(user_data_dir=None, keep_profile=False, verbose=True, headless=False):
|
||||
# """
|
||||
#
|
||||
# Args:
|
||||
# executable_path:
|
||||
# profile_path:
|
||||
# keep_profile:
|
||||
# verbose:
|
||||
# headless:
|
||||
#
|
||||
# Returns:
|
||||
#
|
||||
# """
|
||||
# log_level = 0
|
||||
#
|
||||
# opts = ChromeOptions()
|
||||
# if user_data_dir:
|
||||
# opts.add_argument('--user-data-dir=%s' % user_data_dir)
|
||||
#
|
||||
# if headless:
|
||||
# opts.headless = True
|
||||
#
|
||||
# if verbose:
|
||||
# logging.basicConfig(level=10)
|
||||
# logger.setLevel(10)
|
||||
# service_log_path = 'chrome.verbose.log'
|
||||
#
|
||||
# else:
|
||||
# service_log_path = None
|
||||
#
|
||||
# return Chrome(options=opts, log_level=log_level, service_log_path=service_log_path, keep_profile=keep_profile)
|
||||
|
||||
|
||||
|
||||
def find_chrome_executable():
|
||||
"""
|
||||
returns the full path to the chrome _browser binary
|
||||
|
@ -131,14 +96,14 @@ def find_chrome_executable():
|
|||
class Chrome(object):
|
||||
__doc__ = (
|
||||
"""\
|
||||
--------------------------------------------------------------------------
|
||||
NOTE:
|
||||
Chrome has everything included to work out of the box.
|
||||
it does not `need` customizations.
|
||||
any customizations MAY lead to trigger bot migitation systems.
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
"""
|
||||
--------------------------------------------------------------------------
|
||||
NOTE:
|
||||
Chrome has everything included to work out of the box.
|
||||
it does not `need` customizations.
|
||||
any customizations MAY lead to trigger bot migitation systems.
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
"""
|
||||
+ selenium.webdriver.remote.webdriver.WebDriver.__doc__
|
||||
)
|
||||
|
||||
|
@ -152,44 +117,31 @@ class Chrome(object):
|
|||
service_args=None,
|
||||
desired_capabilities=None,
|
||||
service_log_path=None,
|
||||
chrome_options=None,
|
||||
keep_alive=True,
|
||||
keep_profile=None,
|
||||
debug_addr=None,
|
||||
keep_user_data_dir=False,
|
||||
log_level=0,
|
||||
factor=1,
|
||||
delay=2,
|
||||
emulate_touch=False,
|
||||
):
|
||||
):
|
||||
|
||||
p = Patcher.auto(executable_path=executable_path)
|
||||
# p.auto(False)
|
||||
|
||||
self._patcher = p
|
||||
self.factor = factor
|
||||
self.delay = delay
|
||||
self.port = port
|
||||
self.process = None
|
||||
self.browser_args = None
|
||||
self._rcount = 0
|
||||
self._rdiff = 10
|
||||
self.keep_profile = keep_profile
|
||||
self.keep_user_data_dir = keep_user_data_dir
|
||||
|
||||
try:
|
||||
dbg = debug_addr.split(":")
|
||||
debug_host, debug_port = str(dbg[0]), int(dbg[1])
|
||||
except AttributeError:
|
||||
debug_port = selenium.webdriver.common.service.utils.free_port()
|
||||
debug_host = "127.0.0.1"
|
||||
|
||||
if not debug_addr:
|
||||
debug_addr = f"{debug_host}:{debug_port}"
|
||||
debug_port = selenium.webdriver.common.service.utils.free_port()
|
||||
debug_host = "127.0.0.1"
|
||||
|
||||
if not options:
|
||||
options = selenium.webdriver.chrome.webdriver.Options()
|
||||
|
||||
if not options.debugger_address:
|
||||
options.debugger_address = debug_addr
|
||||
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
||||
|
||||
if not options.binary_location:
|
||||
options.binary_location = find_chrome_executable()
|
||||
|
@ -197,24 +149,32 @@ class Chrome(object):
|
|||
if not desired_capabilities:
|
||||
desired_capabilities = options.to_capabilities()
|
||||
|
||||
|
||||
user_data_dir = None
|
||||
|
||||
for arg in options.arguments:
|
||||
if 'user-data-dir' in arg:
|
||||
m = re.search('(?:--)?user-data-dir(?:[ =])?(.*)', arg)
|
||||
if "user-data-dir" in arg:
|
||||
m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
|
||||
try:
|
||||
user_data_dir = m[1]
|
||||
logger.debug('user-data-dir found in user argument %s => %s' % (arg, m[1]))
|
||||
logger.debug(
|
||||
"user-data-dir found in user argument %s => %s" % (arg, m[1])
|
||||
)
|
||||
self.keep_user_data_dir = True
|
||||
break
|
||||
except IndexError:
|
||||
logger.debug('no user data dir could be extracted from supplied argument %s ' % arg)
|
||||
logger.debug(
|
||||
"no user data dir could be extracted from supplied argument %s "
|
||||
% arg
|
||||
)
|
||||
else:
|
||||
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
||||
arg = '--user-data-dir=%s' % user_data_dir
|
||||
self.keep_user_data_dir = False
|
||||
arg = "--user-data-dir=%s" % user_data_dir
|
||||
options.add_argument(arg)
|
||||
logger.debug('created a temporary folder in which the user-data (profile) will be stored during this\n'
|
||||
'session, and added it to chrome startup arguments: %s' % arg)
|
||||
logger.debug(
|
||||
"created a temporary folder in which the user-data (profile) will be stored during this\n"
|
||||
"session, and added it to chrome startup arguments: %s" % arg
|
||||
)
|
||||
self.user_data_dir = user_data_dir
|
||||
|
||||
self.options = options
|
||||
|
@ -229,7 +189,8 @@ class Chrome(object):
|
|||
options.binary_location,
|
||||
"--remote-debugging-host=%s" % debug_host,
|
||||
"--remote-debugging-port=%s" % debug_port,
|
||||
"--log-level=%d" % log_level or divmod(logging.getLogger().getEffectiveLevel(), 10)[0],
|
||||
"--log-level=%d" % log_level
|
||||
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0],
|
||||
*extra_args,
|
||||
]
|
||||
|
||||
|
@ -248,7 +209,6 @@ class Chrome(object):
|
|||
service_args=service_args,
|
||||
desired_capabilities=desired_capabilities,
|
||||
service_log_path=service_log_path,
|
||||
chrome_options=chrome_options,
|
||||
keep_alive=keep_alive,
|
||||
)
|
||||
|
||||
|
@ -331,71 +291,71 @@ class Chrome(object):
|
|||
capabilities = self.options.to_capabilities()
|
||||
self.webdriver.start_session(capabilities, browser_profile)
|
||||
|
||||
def get_in(self, url: str, delay=2, factor=1):
|
||||
"""
|
||||
:param url: str
|
||||
:param delay: int
|
||||
:param factor: disconnect <factor> seconds after .get()
|
||||
too low will disconnect before get() fired.
|
||||
|
||||
=================================================
|
||||
|
||||
In case you are being detected by some sophisticated
|
||||
algorithm, and you are the kind that hates losing,
|
||||
this might be your friend.
|
||||
|
||||
this currently works for hCaptcha based systems
|
||||
(this includes CloudFlare!), and also passes many
|
||||
custom setups (eg: ticketmaster.com),
|
||||
|
||||
|
||||
Once you are past the first challenge, a cookie is saved
|
||||
which (in my tests) also worked for other sites, and lasted
|
||||
my entire session! However, to play safe, i'd recommend to just
|
||||
call it once for every new site/domain you navigate to.
|
||||
|
||||
NOTE: mileage may vary!
|
||||
bad behaviour can still be detected, and this program does not
|
||||
magically "fix" a flagged ip.
|
||||
|
||||
please don't spam issues on github! first look if the issue
|
||||
is not already reported.
|
||||
"""
|
||||
try:
|
||||
self.get(url)
|
||||
finally:
|
||||
self.service.stop()
|
||||
# threading.Timer(factor or self.factor, self.close).start()
|
||||
time.sleep(delay or self.delay)
|
||||
self.service.start()
|
||||
self.start_session()
|
||||
|
||||
def quit(self):
|
||||
logger.debug("closing webdriver")
|
||||
try:
|
||||
self.webdriver.quit()
|
||||
except Exception: # noqa
|
||||
pass
|
||||
try:
|
||||
logger.debug("killing browser")
|
||||
self.browser.kill()
|
||||
self.browser.wait(1)
|
||||
except TimeoutError as e:
|
||||
logger.debug(e, exc_info=True)
|
||||
except Exception: # noqa
|
||||
pass
|
||||
if not self.keep_profile or self.keep_profile is False:
|
||||
for _ in range(3):
|
||||
try:
|
||||
logger.debug("removing profile : %s" % self.user_data_dir)
|
||||
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except PermissionError:
|
||||
logger.debug("permission error. files are still in use/locked. retying...")
|
||||
else:
|
||||
break
|
||||
time.sleep(1)
|
||||
# def get_in(self, url: str, delay=2, factor=1):
|
||||
# """
|
||||
# :param url: str
|
||||
# :param delay: int
|
||||
# :param factor: disconnect <factor> seconds after .get()
|
||||
# too low will disconnect before get() fired.
|
||||
#
|
||||
# =================================================
|
||||
#
|
||||
# In case you are being detected by some sophisticated
|
||||
# algorithm, and you are the kind that hates losing,
|
||||
# this might be your friend.
|
||||
#
|
||||
# this currently works for hCaptcha based systems
|
||||
# (this includes CloudFlare!), and also passes many
|
||||
# custom setups (eg: ticketmaster.com),
|
||||
#
|
||||
#
|
||||
# Once you are past the first challenge, a cookie is saved
|
||||
# which (in my tests) also worked for other sites, and lasted
|
||||
# my entire session! However, to play safe, i'd recommend to just
|
||||
# call it once for every new site/domain you navigate to.
|
||||
#
|
||||
# NOTE: mileage may vary!
|
||||
# bad behaviour can still be detected, and this program does not
|
||||
# magically "fix" a flagged ip.
|
||||
#
|
||||
# please don't spam issues on github! first look if the issue
|
||||
# is not already reported.
|
||||
# """
|
||||
# try:
|
||||
# self.get(url)
|
||||
# finally:
|
||||
# self.service.stop()
|
||||
# # threading.Timer(factor or self.factor, self.close).start()
|
||||
# time.sleep(delay or self.delay)
|
||||
# self.service.start()
|
||||
# self.start_session()
|
||||
#
|
||||
# def quit(self):
|
||||
# logger.debug("closing webdriver")
|
||||
# try:
|
||||
# self.webdriver.quit()
|
||||
# except Exception: # noqa
|
||||
# pass
|
||||
# try:
|
||||
# logger.debug("killing browser")
|
||||
# self.browser.kill()
|
||||
# self.browser.wait(1)
|
||||
# except TimeoutError as e:
|
||||
# logger.debug(e, exc_info=True)
|
||||
# except Exception: # noqa
|
||||
# pass
|
||||
# if not self.keep_user_data_dir or self.keep_user_data_dir is False:
|
||||
# for _ in range(3):
|
||||
# try:
|
||||
# logger.debug("removing profile : %s" % self.user_data_dir)
|
||||
# shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||
# except FileNotFoundError:
|
||||
# pass
|
||||
# except PermissionError:
|
||||
# logger.debug("permission error. files are still in use/locked. retying...")
|
||||
# else:
|
||||
# break
|
||||
# time.sleep(1)
|
||||
|
||||
def __del__(self):
|
||||
self.quit()
|
||||
|
@ -414,9 +374,7 @@ class Chrome(object):
|
|||
return hash(self.options.debugger_address)
|
||||
|
||||
|
||||
|
||||
class Patcher(object):
|
||||
|
||||
url_repo = "https://chromedriver.storage.googleapis.com"
|
||||
zip_name = "chromedriver_%s.zip"
|
||||
exe_name = "chromedriver%s"
|
||||
|
@ -442,8 +400,6 @@ class Patcher(object):
|
|||
d = "~/.undetected_chromedriver"
|
||||
data_path = os.path.abspath(os.path.expanduser(d))
|
||||
|
||||
|
||||
|
||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
||||
"""
|
||||
|
||||
|
@ -465,16 +421,15 @@ class Patcher(object):
|
|||
if not executable_path[-4:] == ".exe":
|
||||
executable_path += ".exe"
|
||||
|
||||
self.zip_path = os.path.join(
|
||||
self.data_path, self.zip_name)
|
||||
self.zip_path = os.path.join(self.data_path, self.zip_name)
|
||||
|
||||
self.executable_path = os.path.abspath(os.path.join('.', executable_path))
|
||||
self.executable_path = os.path.abspath(os.path.join(".", executable_path))
|
||||
|
||||
self.version_main = version_main
|
||||
self.version_full = None
|
||||
|
||||
@classmethod
|
||||
def auto(cls, executable_path='./chromedriver', force=False):
|
||||
def auto(cls, executable_path="./chromedriver", force=False):
|
||||
"""
|
||||
|
||||
Args:
|
||||
|
@ -527,7 +482,7 @@ class Patcher(object):
|
|||
def parse_exe_version(self):
|
||||
with io.open(self.executable_path, "rb") as f:
|
||||
for line in iter(lambda: f.readline(), b""):
|
||||
match = re.search(br"platform_handle\x00content\x00([0-9\.]*)", line)
|
||||
match = re.search(br"platform_handle\x00content\x00([0-9.]*)", line)
|
||||
if match:
|
||||
return LooseVersion(match[1].decode())
|
||||
|
||||
|
@ -554,10 +509,7 @@ class Patcher(object):
|
|||
except (FileNotFoundError, OSError):
|
||||
pass
|
||||
|
||||
os.makedirs(
|
||||
self.data_path,
|
||||
mode=0o755,
|
||||
exist_ok=True)
|
||||
os.makedirs(self.data_path, mode=0o755, exist_ok=True)
|
||||
|
||||
with zipfile.ZipFile(fp, mode="r") as zf:
|
||||
zf.extract(self.exe_name, os.path.dirname(self.executable_path))
|
||||
|
@ -571,8 +523,8 @@ class Patcher(object):
|
|||
def force_kill_instances(exe_name):
|
||||
"""
|
||||
kills running instances.
|
||||
:param: executable name to kill, may be a path as well
|
||||
|
||||
:param self:
|
||||
:return: True on success else False
|
||||
"""
|
||||
exe_name = os.path.basename(exe_name)
|
||||
|
@ -603,7 +555,6 @@ class Patcher(object):
|
|||
else:
|
||||
return True
|
||||
|
||||
|
||||
def patch_exe(self):
|
||||
"""
|
||||
Patches the ChromeDriver binary
|
||||
|
@ -626,12 +577,9 @@ class Patcher(object):
|
|||
|
||||
# class ChromeOptions(selenium.webdriver.chrome.webdriver.Options):
|
||||
class ChromeOptions(_ChromeOptions):
|
||||
|
||||
def add_extension_file_crx(self, extension=None):
|
||||
|
||||
if extension:
|
||||
extension_to_add = os.path.abspath(os.path.expanduser(extension))
|
||||
logger.debug('extension_to_add: %s' % extension_to_add)
|
||||
|
||||
return super().add_extension(r'%s' % extension)
|
||||
logger.debug("extension_to_add: %s" % extension_to_add)
|
||||
|
||||
return super().add_extension(r"%s" % extension)
|
||||
|
|
Loading…
Reference in New Issue