fixed a number of bugs

- specifying custom profile
- specifying custom binary path
- downloading, patching and storing now (if not explicity specified)
    happens in a writable folder, instead of the current working dir.

Committer: UltrafunkAmsterdam <UltrafunkAmsterdam@github>
This commit is contained in:
ultrafunkamsterdam 2021-04-27 20:05:34 +02:00
parent 5a1ce796b4
commit e4e373543f
5 changed files with 289 additions and 154 deletions

View File

@ -12,13 +12,29 @@ Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y
BY ULTRAFUNKAMSTERDAM (https://github.com/ultrafunkamsterdam)""" BY ULTRAFUNKAMSTERDAM (https://github.com/ultrafunkamsterdam)"""
from setuptools import setup from setuptools import setup
import os
import re
with open(os.path.join(os.path.abspath(
os.path.dirname(__file__)),
'undetected_chromedriver',
'__init__.py'),
mode='r',
encoding='latin1') as fp:
try:
version = re.findall(r"^__version__ = '([^']+)'\r?$",
fp.read(), re.M)[0]
except Exception:
raise RuntimeError("unable to determine version")
setup( setup(
name="undetected-chromedriver", name="undetected-chromedriver",
version="2.2.1", version=version,
packages=["undetected_chromedriver"], packages=["undetected_chromedriver"],
install_requires=["selenium",], install_requires=[
"selenium",
],
url="https://github.com/ultrafunkamsterdam/undetected-chromedriver", url="https://github.com/ultrafunkamsterdam/undetected-chromedriver",
license="GPL-3.0", license="GPL-3.0",
author="UltrafunkAmsterdam", author="UltrafunkAmsterdam",
@ -37,4 +53,3 @@ setup(
"Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.7",
], ],
) )

View File

@ -1,36 +0,0 @@
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import time # noqa
def test_undetected_chromedriver():
import undetected_chromedriver.v2 as uc
driver = uc.Chrome()
with driver:
driver.get("https://coinfaucet.eu")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("coinfaucet.eu.png")
with driver:
driver.get("https://cia.gov")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("cia.gov.png")
with driver:
driver.get("https://lhcdn.botprotect.io")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("notprotect.io.png")
with driver:
driver.get("https://www.datadome.co")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("datadome.co.png")
test_undetected_chromedriver()

View File

@ -31,7 +31,7 @@ from selenium.webdriver import Chrome as _Chrome
from selenium.webdriver import ChromeOptions as _ChromeOptions from selenium.webdriver import ChromeOptions as _ChromeOptions
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
__version__ = "2.2.6"
TARGET_VERSION = 0 TARGET_VERSION = 0

View File

@ -0,0 +1,55 @@
import logging
import os
import sys
import time # noqa
from ..v2 import *
logging.basicConfig(level=10)
logger = logging.getLogger('TEST')
logger.setLevel(20)
def test_quick():
import undetected_chromedriver.v2 as uc
print('uc module: ', uc)
# options = selenium.webdriver.ChromeOptions()
options = uc.ChromeOptions()
options.add_argument('--user-data-dir=c:\\temp')
options.binary_location = uc.find_chrome_executable()
driver = uc.Chrome(executable_path='./chromedriver.exe', options=options,
service_log_path='c:\\temp\\service.log.txt')
while True:
sys.stdin.read()
def test_undetected_chromedriver():
import undetected_chromedriver.v2 as uc
driver = uc.Chrome()
with driver:
driver.get("https://coinfaucet.eu")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("coinfaucet.eu.png")
with driver:
driver.get("https://cia.gov")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("cia.gov.png")
with driver:
driver.get("https://lhcdn.botprotect.io")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("notprotect.io.png")
with driver:
driver.get("https://www.datadome.co")
time.sleep(4) # sleep only used for timing of screenshot
driver.save_screenshot("datadome.co.png")
# test_quick()
# #test_undetected_chromedriver()

View File

@ -31,7 +31,6 @@ whats new:
""" """
from __future__ import annotations from __future__ import annotations
import io import io
@ -44,14 +43,11 @@ import string
import subprocess import subprocess
import sys import sys
import tempfile import tempfile
import threading
import time import time
import zipfile import zipfile
import atexit
import contextlib
from distutils.version import LooseVersion from distutils.version import LooseVersion
from urllib.request import urlopen, urlretrieve from urllib.request import urlopen, urlretrieve
from selenium.webdriver.chrome.options import Options as _ChromeOptions
import selenium.webdriver.chrome.service import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver import selenium.webdriver.chrome.webdriver
import selenium.webdriver.common.service import selenium.webdriver.common.service
@ -62,6 +58,42 @@ __all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable")
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux")) IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux"))
logger = logging.getLogger("uc") logger = logging.getLogger("uc")
logger.setLevel(logging.getLogger().getEffectiveLevel())
#
# def get_driver(user_data_dir=None, keep_profile=False, verbose=True, headless=False):
# """
#
# Args:
# executable_path:
# profile_path:
# keep_profile:
# verbose:
# headless:
#
# Returns:
#
# """
# log_level = 0
#
# opts = ChromeOptions()
# if user_data_dir:
# opts.add_argument('--user-data-dir=%s' % user_data_dir)
#
# if headless:
# opts.headless = True
#
# if verbose:
# logging.basicConfig(level=10)
# logger.setLevel(10)
# service_log_path = 'chrome.verbose.log'
#
# else:
# service_log_path = None
#
# return Chrome(options=opts, log_level=log_level, service_log_path=service_log_path, keep_profile=keep_profile)
def find_chrome_executable(): def find_chrome_executable():
@ -97,17 +129,16 @@ def find_chrome_executable():
class Chrome(object): class Chrome(object):
__doc__ = ( __doc__ = (
"""\ """\
-------------------------------------------------------------------------- --------------------------------------------------------------------------
NOTE: NOTE:
Chrome has everything included to work out of the box. Chrome has everything included to work out of the box.
it does not `need` customizations. it does not `need` customizations.
any customizations MAY lead to trigger bot migitation systems. any customizations MAY lead to trigger bot migitation systems.
-------------------------------------------------------------------------- --------------------------------------------------------------------------
""" """
+ selenium.webdriver.remote.webdriver.WebDriver.__doc__ + selenium.webdriver.remote.webdriver.WebDriver.__doc__
) )
@ -123,15 +154,16 @@ class Chrome(object):
service_log_path=None, service_log_path=None,
chrome_options=None, chrome_options=None,
keep_alive=True, keep_alive=True,
keep_profile=None,
debug_addr=None, debug_addr=None,
user_data_dir=None, log_level=0,
factor=1, factor=1,
delay=2, delay=2,
emulate_touch=False, emulate_touch=False,
): ):
p = Patcher(target_path=executable_path) p = Patcher.auto(executable_path=executable_path)
p.auto(False) # p.auto(False)
self._patcher = p self._patcher = p
self.factor = factor self.factor = factor
@ -141,6 +173,7 @@ class Chrome(object):
self.browser_args = None self.browser_args = None
self._rcount = 0 self._rcount = 0
self._rdiff = 10 self._rdiff = 10
self.keep_profile = keep_profile
try: try:
dbg = debug_addr.split(":") dbg = debug_addr.split(":")
@ -152,9 +185,6 @@ class Chrome(object):
if not debug_addr: if not debug_addr:
debug_addr = f"{debug_host}:{debug_port}" debug_addr = f"{debug_host}:{debug_port}"
if not user_data_dir:
user_data_dir = os.path.normpath(tempfile.mkdtemp())
if not options: if not options:
options = selenium.webdriver.chrome.webdriver.Options() options = selenium.webdriver.chrome.webdriver.Options()
@ -167,9 +197,28 @@ class Chrome(object):
if not desired_capabilities: if not desired_capabilities:
desired_capabilities = options.to_capabilities() desired_capabilities = options.to_capabilities()
self.options = options
user_data_dir = None
for arg in options.arguments:
if 'user-data-dir' in arg:
m = re.search('(?:--)?user-data-dir(?:[ =])?(.*)', arg)
try:
user_data_dir = m[1]
logger.debug('user-data-dir found in user argument %s => %s' % (arg, m[1]))
break
except IndexError:
logger.debug('no user data dir could be extracted from supplied argument %s ' % arg)
else:
user_data_dir = os.path.normpath(tempfile.mkdtemp())
arg = '--user-data-dir=%s' % user_data_dir
options.add_argument(arg)
logger.debug('created a temporary folder in which the user-data (profile) will be stored during this\n'
'session, and added it to chrome startup arguments: %s' % arg)
self.user_data_dir = user_data_dir self.user_data_dir = user_data_dir
self.options = options
extra_args = options.arguments extra_args = options.arguments
if options.headless: if options.headless:
@ -178,10 +227,9 @@ class Chrome(object):
self.browser_args = [ self.browser_args = [
options.binary_location, options.binary_location,
"--user-data-dir=%s" % user_data_dir,
"--remote-debugging-host=%s" % debug_host, "--remote-debugging-host=%s" % debug_host,
"--remote-debugging-port=%s" % debug_port, "--remote-debugging-port=%s" % debug_port,
"--log-level=%d" % divmod(logging.getLogger().getEffectiveLevel(), 10)[0], "--log-level=%d" % log_level or divmod(logging.getLogger().getEffectiveLevel(), 10)[0],
*extra_args, *extra_args,
] ]
@ -194,7 +242,7 @@ class Chrome(object):
) )
self.webdriver = selenium.webdriver.chrome.webdriver.WebDriver( self.webdriver = selenium.webdriver.chrome.webdriver.WebDriver(
executable_path=p.target_path, # executable_path=p.executable_path,
port=port, port=port,
options=options, options=options,
service_args=service_args, service_args=service_args,
@ -336,13 +384,18 @@ class Chrome(object):
logger.debug(e, exc_info=True) logger.debug(e, exc_info=True)
except Exception: # noqa except Exception: # noqa
pass pass
try: if not self.keep_profile or self.keep_profile is False:
logger.debug("removing profile : %s" % self.user_data_dir) for _ in range(3):
shutil.rmtree(self.user_data_dir, ignore_errors=False) try:
except PermissionError: logger.debug("removing profile : %s" % self.user_data_dir)
logger.debug("permission error. files are still in use/locked. retying...") shutil.rmtree(self.user_data_dir, ignore_errors=False)
time.sleep(1) except FileNotFoundError:
self.quit() pass
except PermissionError:
logger.debug("permission error. files are still in use/locked. retying...")
else:
break
time.sleep(1)
def __del__(self): def __del__(self):
self.quit() self.quit()
@ -361,50 +414,102 @@ class Chrome(object):
return hash(self.options.debugger_address) return hash(self.options.debugger_address)
class Patcher(object):
url_repo = "https://chromedriver.storage.googleapis.com"
def __init__( class Patcher(object):
self, target_path="./chromedriver", force=False, version_main: int = 0
): url_repo = "https://chromedriver.storage.googleapis.com"
if not IS_POSIX: zip_name = "chromedriver_%s.zip"
if not target_path[-4:] == ".exe": exe_name = "chromedriver%s"
target_path += ".exe"
platform = sys.platform
if platform.endswith("win32"):
zip_name %= "win32"
exe_name %= ".exe"
if platform.endswith("linux"):
zip_name %= "linux64"
exe_name %= ""
if platform.endswith("darwin"):
zip_name %= "mac64"
exe_name %= ""
if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver"
elif platform.startswith("linux"):
d = "~/.local/share/undetected_chromedriver"
elif platform.endswith("darwin"):
d = "~/Library/Application Support/undetected_chromedriver"
else:
d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d))
def __init__(self, executable_path=None, force=False, version_main: int = 0):
"""
Args:
executable_path: None = automatic
a full file path to the chromedriver executable
force: False
terminate processes which are holding lock
version_main: 0 = auto
specify main chrome version (rounded, ex: 82)
"""
self.force = force self.force = force
z, e = self.get_package_name()
if not target_path:
target_path = e
self.exename = e if not executable_path:
self.target_path = target_path executable_path = os.path.join(self.data_path, self.exe_name)
self.zipname = z
if not IS_POSIX:
if not executable_path[-4:] == ".exe":
executable_path += ".exe"
self.zip_path = os.path.join(
self.data_path, self.zip_name)
self.executable_path = os.path.abspath(os.path.join('.', executable_path))
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
def auto(self, force=False): @classmethod
def auto(cls, executable_path='./chromedriver', force=False):
"""
Args:
force:
Returns:
"""
i = cls(executable_path, force=force)
try: try:
os.unlink(self.target_path) os.unlink(i.executable_path)
except PermissionError: except PermissionError:
if i.force:
if force or self.force: cls.force_kill_instances(i.executable_path)
self.force_kill_instances() return i.auto(force=False)
return self.auto() try:
if i.is_binary_patched():
if self.verify_patch(): # assumes already running AND patched
# assumes already running AND patched return True
return True except PermissionError:
return False pass
# return False
except FileNotFoundError: except FileNotFoundError:
pass pass
release = self.fetch_release_number() release = i.fetch_release_number()
self.version_main = release.version[0] i.version_main = release.version[0]
self.version_full = release i.version_full = release
self.fetch_package() i.unzip_package(i.fetch_package())
self.unzip_package() i.patch()
return i
def patch(self):
self.patch_exe() self.patch_exe()
return self.verify_patch() return self.is_binary_patched()
def fetch_release_number(self): def fetch_release_number(self):
""" """
@ -420,7 +525,7 @@ class Patcher(object):
return LooseVersion(urlopen(self.url_repo + path).read().decode()) return LooseVersion(urlopen(self.url_repo + path).read().decode())
def parse_exe_version(self): def parse_exe_version(self):
with io.open(self.target_path, "rb") as f: with io.open(self.executable_path, "rb") as f:
for line in iter(lambda: f.readline(), b""): for line in iter(lambda: f.readline(), b""):
match = re.search(br"platform_handle\x00content\x00([0-9\.]*)", line) match = re.search(br"platform_handle\x00content\x00([0-9\.]*)", line)
if match: if match:
@ -432,61 +537,49 @@ class Patcher(object):
:return: path to downloaded file :return: path to downloaded file
""" """
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zipname) u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name)
logger.debug("downloading from %s" % u) logger.debug("downloading from %s" % u)
zp, *_ = urlretrieve(u, filename=self.zipname) # return urlretrieve(u, filename=self.data_path)[0]
return zp return urlretrieve(u)[0]
def unzip_package(self): def unzip_package(self, fp):
""" """
Does what it says Does what it says
:return: path to unpacked executable :return: path to unpacked executable
""" """
logger.debug("unzipping %s" % self.zipname) logger.debug("unzipping %s" % fp)
try: try:
os.makedirs(os.path.dirname(self.target_path), mode=0o755) os.unlink(self.zip_path)
except OSError: except (FileNotFoundError, OSError):
pass pass
with zipfile.ZipFile(self.zipname, mode="r") as zf:
zf.extract(self.exename) os.makedirs(
os.rename(self.exename, self.target_path) self.data_path,
os.remove(self.zipname) mode=0o755,
os.chmod(self.target_path, 0o755) exist_ok=True)
return self.target_path
with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, os.path.dirname(self.executable_path))
# os.rename(self.zip_path, self.executable_path)
os.remove(fp)
os.chmod(self.executable_path, 0o755)
return self.executable_path
@staticmethod @staticmethod
def get_package_name(): def force_kill_instances(exe_name):
"""
returns a tuple of (zipname, exename) depending on platform.
:return: (zipname, exename)
"""
zipname = "chromedriver_%s.zip"
exe = "chromedriver%s"
platform = sys.platform
if platform.endswith("win32"):
zipname %= "win32"
exe %= ".exe"
if platform.endswith("linux"):
zipname %= "linux64"
exe %= ""
if platform.endswith("darwin"):
zipname %= "mac64"
exe %= ""
return zipname, exe
def force_kill_instances(self):
""" """
kills running instances. kills running instances.
:param self: :param self:
:return: True on success else False :return: True on success else False
""" """
exe_name = os.path.basename(exe_name)
if IS_POSIX: if IS_POSIX:
r = os.system("kill -f -9 $(pidof %s)" % self.exename) r = os.system("kill -f -9 $(pidof %s)" % exe_name)
else: else:
r = os.system("taskkill /f /im %s" % self.exename) r = os.system("taskkill /f /im %s" % exe_name)
return not r return not r
@staticmethod @staticmethod
@ -497,19 +590,19 @@ class Patcher(object):
cdc[3] = "_" cdc[3] = "_"
return "".join(cdc).encode() return "".join(cdc).encode()
def verify_patch(self): def is_binary_patched(self, executable_path=None):
"""simple check if executable is patched. """simple check if executable is patched.
:return: False if not patched, else True :return: False if not patched, else True
""" """
try: executable_path = executable_path or self.executable_path
with io.open(self.target_path, "rb") as fh: with io.open(executable_path, "rb") as fh:
for line in iter(lambda: fh.readline(), b""): for line in iter(lambda: fh.readline(), b""):
if b"cdc_" in line: if b"cdc_" in line:
return False return False
return True else:
except FileNotFoundError: return True
return False
def patch_exe(self): def patch_exe(self):
""" """
@ -517,12 +610,11 @@ class Patcher(object):
:return: False on failure, binary name on success :return: False on failure, binary name on success
""" """
logger.info("patching driver executable %s" % self.executable_path)
logger.info("patching driver executable %s" % self.target_path)
linect = 0 linect = 0
replacement = self.gen_random_cdc() replacement = self.gen_random_cdc()
with io.open(self.target_path, "r+b") as fh: with io.open(self.executable_path, "r+b") as fh:
for line in iter(lambda: fh.readline(), b""): for line in iter(lambda: fh.readline(), b""):
if b"cdc_" in line: if b"cdc_" in line:
fh.seek(-len(line), 1) fh.seek(-len(line), 1)
@ -532,5 +624,14 @@ class Patcher(object):
return linect return linect
class ChromeOptions(selenium.webdriver.chrome.webdriver.Options): # class ChromeOptions(selenium.webdriver.chrome.webdriver.Options):
pass class ChromeOptions(_ChromeOptions):
def add_extension_file_crx(self, extension=None):
if extension:
extension_to_add = os.path.abspath(os.path.expanduser(extension))
logger.debug('extension_to_add: %s' % extension_to_add)
return super().add_extension(r'%s' % extension)