tidy up
This commit is contained in:
parent
e4e373543f
commit
d99809c8c6
18
setup.py
18
setup.py
|
@ -15,15 +15,17 @@ from setuptools import setup
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
with open(os.path.join(os.path.abspath(
|
with open(
|
||||||
os.path.dirname(__file__)),
|
os.path.join(
|
||||||
'undetected_chromedriver',
|
os.path.abspath(os.path.dirname(__file__)),
|
||||||
'__init__.py'),
|
"undetected_chromedriver",
|
||||||
mode='r',
|
"__init__.py",
|
||||||
encoding='latin1') as fp:
|
),
|
||||||
|
mode="r",
|
||||||
|
encoding="latin1",
|
||||||
|
) as fp:
|
||||||
try:
|
try:
|
||||||
version = re.findall(r"^__version__ = '([^']+)'\r?$",
|
version = re.findall(r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0]
|
||||||
fp.read(), re.M)[0]
|
|
||||||
except Exception:
|
except Exception:
|
||||||
raise RuntimeError("unable to determine version")
|
raise RuntimeError("unable to determine version")
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ from selenium.webdriver import Chrome as _Chrome
|
||||||
from selenium.webdriver import ChromeOptions as _ChromeOptions
|
from selenium.webdriver import ChromeOptions as _ChromeOptions
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
__version__ = "2.2.6"
|
__version__ = '2.2.6'
|
||||||
|
|
||||||
TARGET_VERSION = 0
|
TARGET_VERSION = 0
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,6 @@ import logging
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shutil
|
|
||||||
import string
|
import string
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
@ -47,11 +46,12 @@ import time
|
||||||
import zipfile
|
import zipfile
|
||||||
from distutils.version import LooseVersion
|
from distutils.version import LooseVersion
|
||||||
from urllib.request import urlopen, urlretrieve
|
from urllib.request import urlopen, urlretrieve
|
||||||
from selenium.webdriver.chrome.options import Options as _ChromeOptions
|
|
||||||
import selenium.webdriver.chrome.service
|
import selenium.webdriver.chrome.service
|
||||||
import selenium.webdriver.chrome.webdriver
|
import selenium.webdriver.chrome.webdriver
|
||||||
import selenium.webdriver.common.service
|
import selenium.webdriver.common.service
|
||||||
import selenium.webdriver.remote.webdriver
|
import selenium.webdriver.remote.webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options as _ChromeOptions
|
||||||
|
|
||||||
__all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable")
|
__all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable")
|
||||||
|
|
||||||
|
@ -61,41 +61,6 @@ logger = logging.getLogger("uc")
|
||||||
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# def get_driver(user_data_dir=None, keep_profile=False, verbose=True, headless=False):
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# Args:
|
|
||||||
# executable_path:
|
|
||||||
# profile_path:
|
|
||||||
# keep_profile:
|
|
||||||
# verbose:
|
|
||||||
# headless:
|
|
||||||
#
|
|
||||||
# Returns:
|
|
||||||
#
|
|
||||||
# """
|
|
||||||
# log_level = 0
|
|
||||||
#
|
|
||||||
# opts = ChromeOptions()
|
|
||||||
# if user_data_dir:
|
|
||||||
# opts.add_argument('--user-data-dir=%s' % user_data_dir)
|
|
||||||
#
|
|
||||||
# if headless:
|
|
||||||
# opts.headless = True
|
|
||||||
#
|
|
||||||
# if verbose:
|
|
||||||
# logging.basicConfig(level=10)
|
|
||||||
# logger.setLevel(10)
|
|
||||||
# service_log_path = 'chrome.verbose.log'
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# service_log_path = None
|
|
||||||
#
|
|
||||||
# return Chrome(options=opts, log_level=log_level, service_log_path=service_log_path, keep_profile=keep_profile)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_chrome_executable():
|
def find_chrome_executable():
|
||||||
"""
|
"""
|
||||||
returns the full path to the chrome _browser binary
|
returns the full path to the chrome _browser binary
|
||||||
|
@ -131,14 +96,14 @@ def find_chrome_executable():
|
||||||
class Chrome(object):
|
class Chrome(object):
|
||||||
__doc__ = (
|
__doc__ = (
|
||||||
"""\
|
"""\
|
||||||
--------------------------------------------------------------------------
|
--------------------------------------------------------------------------
|
||||||
NOTE:
|
NOTE:
|
||||||
Chrome has everything included to work out of the box.
|
Chrome has everything included to work out of the box.
|
||||||
it does not `need` customizations.
|
it does not `need` customizations.
|
||||||
any customizations MAY lead to trigger bot migitation systems.
|
any customizations MAY lead to trigger bot migitation systems.
|
||||||
|
|
||||||
--------------------------------------------------------------------------
|
--------------------------------------------------------------------------
|
||||||
"""
|
"""
|
||||||
+ selenium.webdriver.remote.webdriver.WebDriver.__doc__
|
+ selenium.webdriver.remote.webdriver.WebDriver.__doc__
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -152,44 +117,31 @@ class Chrome(object):
|
||||||
service_args=None,
|
service_args=None,
|
||||||
desired_capabilities=None,
|
desired_capabilities=None,
|
||||||
service_log_path=None,
|
service_log_path=None,
|
||||||
chrome_options=None,
|
|
||||||
keep_alive=True,
|
keep_alive=True,
|
||||||
keep_profile=None,
|
keep_user_data_dir=False,
|
||||||
debug_addr=None,
|
|
||||||
log_level=0,
|
log_level=0,
|
||||||
factor=1,
|
|
||||||
delay=2,
|
|
||||||
emulate_touch=False,
|
emulate_touch=False,
|
||||||
):
|
):
|
||||||
|
|
||||||
p = Patcher.auto(executable_path=executable_path)
|
p = Patcher.auto(executable_path=executable_path)
|
||||||
# p.auto(False)
|
# p.auto(False)
|
||||||
|
|
||||||
self._patcher = p
|
self._patcher = p
|
||||||
self.factor = factor
|
|
||||||
self.delay = delay
|
|
||||||
self.port = port
|
self.port = port
|
||||||
self.process = None
|
self.process = None
|
||||||
self.browser_args = None
|
self.browser_args = None
|
||||||
self._rcount = 0
|
self._rcount = 0
|
||||||
self._rdiff = 10
|
self._rdiff = 10
|
||||||
self.keep_profile = keep_profile
|
self.keep_user_data_dir = keep_user_data_dir
|
||||||
|
|
||||||
try:
|
debug_port = selenium.webdriver.common.service.utils.free_port()
|
||||||
dbg = debug_addr.split(":")
|
debug_host = "127.0.0.1"
|
||||||
debug_host, debug_port = str(dbg[0]), int(dbg[1])
|
|
||||||
except AttributeError:
|
|
||||||
debug_port = selenium.webdriver.common.service.utils.free_port()
|
|
||||||
debug_host = "127.0.0.1"
|
|
||||||
|
|
||||||
if not debug_addr:
|
|
||||||
debug_addr = f"{debug_host}:{debug_port}"
|
|
||||||
|
|
||||||
if not options:
|
if not options:
|
||||||
options = selenium.webdriver.chrome.webdriver.Options()
|
options = selenium.webdriver.chrome.webdriver.Options()
|
||||||
|
|
||||||
if not options.debugger_address:
|
if not options.debugger_address:
|
||||||
options.debugger_address = debug_addr
|
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
||||||
|
|
||||||
if not options.binary_location:
|
if not options.binary_location:
|
||||||
options.binary_location = find_chrome_executable()
|
options.binary_location = find_chrome_executable()
|
||||||
|
@ -197,24 +149,32 @@ class Chrome(object):
|
||||||
if not desired_capabilities:
|
if not desired_capabilities:
|
||||||
desired_capabilities = options.to_capabilities()
|
desired_capabilities = options.to_capabilities()
|
||||||
|
|
||||||
|
|
||||||
user_data_dir = None
|
user_data_dir = None
|
||||||
|
|
||||||
for arg in options.arguments:
|
for arg in options.arguments:
|
||||||
if 'user-data-dir' in arg:
|
if "user-data-dir" in arg:
|
||||||
m = re.search('(?:--)?user-data-dir(?:[ =])?(.*)', arg)
|
m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
|
||||||
try:
|
try:
|
||||||
user_data_dir = m[1]
|
user_data_dir = m[1]
|
||||||
logger.debug('user-data-dir found in user argument %s => %s' % (arg, m[1]))
|
logger.debug(
|
||||||
|
"user-data-dir found in user argument %s => %s" % (arg, m[1])
|
||||||
|
)
|
||||||
|
self.keep_user_data_dir = True
|
||||||
break
|
break
|
||||||
except IndexError:
|
except IndexError:
|
||||||
logger.debug('no user data dir could be extracted from supplied argument %s ' % arg)
|
logger.debug(
|
||||||
|
"no user data dir could be extracted from supplied argument %s "
|
||||||
|
% arg
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
||||||
arg = '--user-data-dir=%s' % user_data_dir
|
self.keep_user_data_dir = False
|
||||||
|
arg = "--user-data-dir=%s" % user_data_dir
|
||||||
options.add_argument(arg)
|
options.add_argument(arg)
|
||||||
logger.debug('created a temporary folder in which the user-data (profile) will be stored during this\n'
|
logger.debug(
|
||||||
'session, and added it to chrome startup arguments: %s' % arg)
|
"created a temporary folder in which the user-data (profile) will be stored during this\n"
|
||||||
|
"session, and added it to chrome startup arguments: %s" % arg
|
||||||
|
)
|
||||||
self.user_data_dir = user_data_dir
|
self.user_data_dir = user_data_dir
|
||||||
|
|
||||||
self.options = options
|
self.options = options
|
||||||
|
@ -229,7 +189,8 @@ class Chrome(object):
|
||||||
options.binary_location,
|
options.binary_location,
|
||||||
"--remote-debugging-host=%s" % debug_host,
|
"--remote-debugging-host=%s" % debug_host,
|
||||||
"--remote-debugging-port=%s" % debug_port,
|
"--remote-debugging-port=%s" % debug_port,
|
||||||
"--log-level=%d" % log_level or divmod(logging.getLogger().getEffectiveLevel(), 10)[0],
|
"--log-level=%d" % log_level
|
||||||
|
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0],
|
||||||
*extra_args,
|
*extra_args,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -248,7 +209,6 @@ class Chrome(object):
|
||||||
service_args=service_args,
|
service_args=service_args,
|
||||||
desired_capabilities=desired_capabilities,
|
desired_capabilities=desired_capabilities,
|
||||||
service_log_path=service_log_path,
|
service_log_path=service_log_path,
|
||||||
chrome_options=chrome_options,
|
|
||||||
keep_alive=keep_alive,
|
keep_alive=keep_alive,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -331,71 +291,71 @@ class Chrome(object):
|
||||||
capabilities = self.options.to_capabilities()
|
capabilities = self.options.to_capabilities()
|
||||||
self.webdriver.start_session(capabilities, browser_profile)
|
self.webdriver.start_session(capabilities, browser_profile)
|
||||||
|
|
||||||
def get_in(self, url: str, delay=2, factor=1):
|
# def get_in(self, url: str, delay=2, factor=1):
|
||||||
"""
|
# """
|
||||||
:param url: str
|
# :param url: str
|
||||||
:param delay: int
|
# :param delay: int
|
||||||
:param factor: disconnect <factor> seconds after .get()
|
# :param factor: disconnect <factor> seconds after .get()
|
||||||
too low will disconnect before get() fired.
|
# too low will disconnect before get() fired.
|
||||||
|
#
|
||||||
=================================================
|
# =================================================
|
||||||
|
#
|
||||||
In case you are being detected by some sophisticated
|
# In case you are being detected by some sophisticated
|
||||||
algorithm, and you are the kind that hates losing,
|
# algorithm, and you are the kind that hates losing,
|
||||||
this might be your friend.
|
# this might be your friend.
|
||||||
|
#
|
||||||
this currently works for hCaptcha based systems
|
# this currently works for hCaptcha based systems
|
||||||
(this includes CloudFlare!), and also passes many
|
# (this includes CloudFlare!), and also passes many
|
||||||
custom setups (eg: ticketmaster.com),
|
# custom setups (eg: ticketmaster.com),
|
||||||
|
#
|
||||||
|
#
|
||||||
Once you are past the first challenge, a cookie is saved
|
# Once you are past the first challenge, a cookie is saved
|
||||||
which (in my tests) also worked for other sites, and lasted
|
# which (in my tests) also worked for other sites, and lasted
|
||||||
my entire session! However, to play safe, i'd recommend to just
|
# my entire session! However, to play safe, i'd recommend to just
|
||||||
call it once for every new site/domain you navigate to.
|
# call it once for every new site/domain you navigate to.
|
||||||
|
#
|
||||||
NOTE: mileage may vary!
|
# NOTE: mileage may vary!
|
||||||
bad behaviour can still be detected, and this program does not
|
# bad behaviour can still be detected, and this program does not
|
||||||
magically "fix" a flagged ip.
|
# magically "fix" a flagged ip.
|
||||||
|
#
|
||||||
please don't spam issues on github! first look if the issue
|
# please don't spam issues on github! first look if the issue
|
||||||
is not already reported.
|
# is not already reported.
|
||||||
"""
|
# """
|
||||||
try:
|
# try:
|
||||||
self.get(url)
|
# self.get(url)
|
||||||
finally:
|
# finally:
|
||||||
self.service.stop()
|
# self.service.stop()
|
||||||
# threading.Timer(factor or self.factor, self.close).start()
|
# # threading.Timer(factor or self.factor, self.close).start()
|
||||||
time.sleep(delay or self.delay)
|
# time.sleep(delay or self.delay)
|
||||||
self.service.start()
|
# self.service.start()
|
||||||
self.start_session()
|
# self.start_session()
|
||||||
|
#
|
||||||
def quit(self):
|
# def quit(self):
|
||||||
logger.debug("closing webdriver")
|
# logger.debug("closing webdriver")
|
||||||
try:
|
# try:
|
||||||
self.webdriver.quit()
|
# self.webdriver.quit()
|
||||||
except Exception: # noqa
|
# except Exception: # noqa
|
||||||
pass
|
# pass
|
||||||
try:
|
# try:
|
||||||
logger.debug("killing browser")
|
# logger.debug("killing browser")
|
||||||
self.browser.kill()
|
# self.browser.kill()
|
||||||
self.browser.wait(1)
|
# self.browser.wait(1)
|
||||||
except TimeoutError as e:
|
# except TimeoutError as e:
|
||||||
logger.debug(e, exc_info=True)
|
# logger.debug(e, exc_info=True)
|
||||||
except Exception: # noqa
|
# except Exception: # noqa
|
||||||
pass
|
# pass
|
||||||
if not self.keep_profile or self.keep_profile is False:
|
# if not self.keep_user_data_dir or self.keep_user_data_dir is False:
|
||||||
for _ in range(3):
|
# for _ in range(3):
|
||||||
try:
|
# try:
|
||||||
logger.debug("removing profile : %s" % self.user_data_dir)
|
# logger.debug("removing profile : %s" % self.user_data_dir)
|
||||||
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
# shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||||
except FileNotFoundError:
|
# except FileNotFoundError:
|
||||||
pass
|
# pass
|
||||||
except PermissionError:
|
# except PermissionError:
|
||||||
logger.debug("permission error. files are still in use/locked. retying...")
|
# logger.debug("permission error. files are still in use/locked. retying...")
|
||||||
else:
|
# else:
|
||||||
break
|
# break
|
||||||
time.sleep(1)
|
# time.sleep(1)
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.quit()
|
self.quit()
|
||||||
|
@ -414,9 +374,7 @@ class Chrome(object):
|
||||||
return hash(self.options.debugger_address)
|
return hash(self.options.debugger_address)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Patcher(object):
|
class Patcher(object):
|
||||||
|
|
||||||
url_repo = "https://chromedriver.storage.googleapis.com"
|
url_repo = "https://chromedriver.storage.googleapis.com"
|
||||||
zip_name = "chromedriver_%s.zip"
|
zip_name = "chromedriver_%s.zip"
|
||||||
exe_name = "chromedriver%s"
|
exe_name = "chromedriver%s"
|
||||||
|
@ -442,8 +400,6 @@ class Patcher(object):
|
||||||
d = "~/.undetected_chromedriver"
|
d = "~/.undetected_chromedriver"
|
||||||
data_path = os.path.abspath(os.path.expanduser(d))
|
data_path = os.path.abspath(os.path.expanduser(d))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -465,16 +421,15 @@ class Patcher(object):
|
||||||
if not executable_path[-4:] == ".exe":
|
if not executable_path[-4:] == ".exe":
|
||||||
executable_path += ".exe"
|
executable_path += ".exe"
|
||||||
|
|
||||||
self.zip_path = os.path.join(
|
self.zip_path = os.path.join(self.data_path, self.zip_name)
|
||||||
self.data_path, self.zip_name)
|
|
||||||
|
|
||||||
self.executable_path = os.path.abspath(os.path.join('.', executable_path))
|
self.executable_path = os.path.abspath(os.path.join(".", executable_path))
|
||||||
|
|
||||||
self.version_main = version_main
|
self.version_main = version_main
|
||||||
self.version_full = None
|
self.version_full = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def auto(cls, executable_path='./chromedriver', force=False):
|
def auto(cls, executable_path="./chromedriver", force=False):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -527,7 +482,7 @@ class Patcher(object):
|
||||||
def parse_exe_version(self):
|
def parse_exe_version(self):
|
||||||
with io.open(self.executable_path, "rb") as f:
|
with io.open(self.executable_path, "rb") as f:
|
||||||
for line in iter(lambda: f.readline(), b""):
|
for line in iter(lambda: f.readline(), b""):
|
||||||
match = re.search(br"platform_handle\x00content\x00([0-9\.]*)", line)
|
match = re.search(br"platform_handle\x00content\x00([0-9.]*)", line)
|
||||||
if match:
|
if match:
|
||||||
return LooseVersion(match[1].decode())
|
return LooseVersion(match[1].decode())
|
||||||
|
|
||||||
|
@ -554,10 +509,7 @@ class Patcher(object):
|
||||||
except (FileNotFoundError, OSError):
|
except (FileNotFoundError, OSError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
os.makedirs(
|
os.makedirs(self.data_path, mode=0o755, exist_ok=True)
|
||||||
self.data_path,
|
|
||||||
mode=0o755,
|
|
||||||
exist_ok=True)
|
|
||||||
|
|
||||||
with zipfile.ZipFile(fp, mode="r") as zf:
|
with zipfile.ZipFile(fp, mode="r") as zf:
|
||||||
zf.extract(self.exe_name, os.path.dirname(self.executable_path))
|
zf.extract(self.exe_name, os.path.dirname(self.executable_path))
|
||||||
|
@ -571,8 +523,8 @@ class Patcher(object):
|
||||||
def force_kill_instances(exe_name):
|
def force_kill_instances(exe_name):
|
||||||
"""
|
"""
|
||||||
kills running instances.
|
kills running instances.
|
||||||
|
:param: executable name to kill, may be a path as well
|
||||||
|
|
||||||
:param self:
|
|
||||||
:return: True on success else False
|
:return: True on success else False
|
||||||
"""
|
"""
|
||||||
exe_name = os.path.basename(exe_name)
|
exe_name = os.path.basename(exe_name)
|
||||||
|
@ -603,7 +555,6 @@ class Patcher(object):
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def patch_exe(self):
|
def patch_exe(self):
|
||||||
"""
|
"""
|
||||||
Patches the ChromeDriver binary
|
Patches the ChromeDriver binary
|
||||||
|
@ -626,12 +577,9 @@ class Patcher(object):
|
||||||
|
|
||||||
# class ChromeOptions(selenium.webdriver.chrome.webdriver.Options):
|
# class ChromeOptions(selenium.webdriver.chrome.webdriver.Options):
|
||||||
class ChromeOptions(_ChromeOptions):
|
class ChromeOptions(_ChromeOptions):
|
||||||
|
|
||||||
def add_extension_file_crx(self, extension=None):
|
def add_extension_file_crx(self, extension=None):
|
||||||
|
|
||||||
if extension:
|
if extension:
|
||||||
extension_to_add = os.path.abspath(os.path.expanduser(extension))
|
extension_to_add = os.path.abspath(os.path.expanduser(extension))
|
||||||
logger.debug('extension_to_add: %s' % extension_to_add)
|
logger.debug("extension_to_add: %s" % extension_to_add)
|
||||||
|
|
||||||
return super().add_extension(r'%s' % extension)
|
|
||||||
|
|
||||||
|
return super().add_extension(r"%s" % extension)
|
||||||
|
|
Loading…
Reference in New Issue