From 487969811851be6bcf6e3c55c8fc0d471940c6c3 Mon Sep 17 00:00:00 2001 From: sebdelsol Date: Wed, 16 Mar 2022 20:32:26 +0100 Subject: [PATCH] - fix unlinking driver at exit - speedup exit process - fix creation of driver in multithreaded scenario - experimental_option now supports "nested" string (eg: example: options.add_experimental_option("prefs": {"profile.default_content_setting_values.images": 2 }) ) Author: sebdelsol Author: UltrafunkAmsterdam --- setup.py | 9 ++--- undetected_chromedriver/__init__.py | 12 +++--- undetected_chromedriver/options.py | 34 +++++++++++++++++ undetected_chromedriver/patcher.py | 54 +++++++++++++++++---------- undetected_chromedriver/webelement.py | 2 - 5 files changed, 80 insertions(+), 31 deletions(-) diff --git a/setup.py b/setup.py index 70e1bc5..bf688f1 100644 --- a/setup.py +++ b/setup.py @@ -29,9 +29,10 @@ with codecs.open( except Exception: raise RuntimeError("unable to determine version") -description = ('Selenium.webdriver.Chrome replacement with compatiblity for Brave, and other Chromium based browsers.', -'Not triggered by CloudFlare/Imperva/hCaptcha and such.', -'NOTE: results may vary due to many factors. No guarantees are given, except for ongoing efforts in understanding detection algorithms.' +description = ( + "Selenium.webdriver.Chrome replacement with compatiblity for Brave, and other Chromium based browsers.", + "Not triggered by CloudFlare/Imperva/hCaptcha and such.", + "NOTE: results may vary due to many factors. No guarantees are given, except for ongoing efforts in understanding detection algorithms.", ) setup( @@ -48,9 +49,7 @@ setup( author="UltrafunkAmsterdam", author_email="info@blackhat-security.nl", description=description, - long_description=open(os.path.join(dirname, "README.md"), encoding="utf-8").read(), - long_description_content_type="text/markdown", classifiers=[ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 5a50601..7f872de 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -19,7 +19,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) """ -__version__ = "3.1.5r2" +__version__ = "3.1.5r3" import json @@ -414,6 +414,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if advanced_elements: from .webelement import WebElement + self._web_element_cls = WebElement if options.headless: @@ -555,6 +556,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): """ if not hasattr(self, "cdp"): from .cdp import CDP + cdp = CDP(self.options) cdp.tab_new(url) @@ -628,7 +630,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): def __del__(self): try: - self.service.process.kill() + super().quit() + # self.service.process.kill() except: # noqa pass self.quit() @@ -670,8 +673,8 @@ def find_chrome_executable(): if "darwin" in sys.platform: candidates.update( [ - "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", - "/Applications/Chromium.app/Contents/MacOS/Chromium" + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", ] ) else: @@ -682,7 +685,6 @@ def find_chrome_executable(): "Google/Chrome/Application", "Google/Chrome Beta/Application", "Google/Chrome Canary/Application", - ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: diff --git a/undetected_chromedriver/options.py b/undetected_chromedriver/options.py index d6f8b8f..f262319 100644 --- a/undetected_chromedriver/options.py +++ b/undetected_chromedriver/options.py @@ -2,7 +2,9 @@ # this module is part of undetected_chromedriver +import json import os + from selenium.webdriver.chromium.options import ChromiumOptions as _ChromiumOptions @@ -29,6 +31,38 @@ class ChromeOptions(_ChromiumOptions): apath = os.path.abspath(path) self._user_data_dir = os.path.normpath(apath) + @staticmethod + def _undot_key(key, value): + """turn a (dotted key, value) into a proper nested dict""" + if "." in key: + key, rest = key.split(".", 1) + value = ChromeOptions._undot_key(rest, value) + return {key: value} + + def handle_prefs(self, user_data_dir): + prefs = self.experimental_options.get("prefs") + if prefs: + + user_data_dir = user_data_dir or self._user_data_dir + default_path = os.path.join(user_data_dir, "Default") + os.makedirs(default_path, exist_ok=True) + + # undot prefs dict keys + undot_prefs = {} + for key, value in prefs.items(): + undot_prefs.update(self._undot_key(key, value)) + + prefs_file = os.path.join(default_path, "Preferences") + if os.path.exists(prefs_file): + with open(prefs_file, encoding="latin1", mode="r") as f: + undot_prefs.update(json.load(f)) + + with open(prefs_file, encoding="latin1", mode="w") as f: + json.dump(undot_prefs, f) + + # remove the experimental_options to avoid an error + del self._experimental_options["prefs"] + @classmethod def from_options(cls, options): o = cls() diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index 4bc13e7..c7818c7 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -8,6 +8,7 @@ import random import re import string import sys +import time import zipfile from distutils.version import LooseVersion from urllib.request import urlopen, urlretrieve @@ -61,6 +62,9 @@ class Patcher(object): self.executable_path = None prefix = secrets.token_hex(8) + if not os.path.exists(self.data_path): + os.makedirs(self.data_path, exist_ok=True) + if not executable_path: self.executable_path = os.path.join( self.data_path, "_".join([prefix, self.exe_name]) @@ -71,7 +75,7 @@ class Patcher(object): if not executable_path[-4:] == ".exe": executable_path += ".exe" - self.zip_path = os.path.join(self.data_path, self.zip_name) + self.zip_path = os.path.join(self.data_path, prefix) if not executable_path: self.executable_path = os.path.abspath( @@ -146,7 +150,7 @@ class Patcher(object): def parse_exe_version(self): with io.open(self.executable_path, "rb") as f: for line in iter(lambda: f.readline(), b""): - match = re.search(br"platform_handle\x00content\x00([0-9.]*)", line) + match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line) if match: return LooseVersion(match[1].decode()) @@ -173,14 +177,12 @@ class Patcher(object): except (FileNotFoundError, OSError): pass - os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True) + os.makedirs(self.zip_path, mode=0o755, exist_ok=True) with zipfile.ZipFile(fp, mode="r") as zf: - zf.extract(self.exe_name, os.path.dirname(self.zip_path)) - os.rename( - os.path.join(self.data_path, self.exe_name), - self.executable_path - ) + zf.extract(self.exe_name, self.zip_path) + os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path) os.remove(fp) + os.rmdir(self.zip_path) os.chmod(self.executable_path, 0o755) return self.executable_path @@ -246,15 +248,29 @@ class Patcher(object): ) def __del__(self): - try: - if not self._custom_exe_path: - # we will not delete custom exe paths. - # but this also voids support. - # downloading and patching makes sure you never use the same $cdc values, see patch_exe() - # after all, this program has a focus on detectability... - os.unlink(self.executable_path) - # except (OSError, RuntimeError, PermissionError): - # pass - except: - raise + if self._custom_exe_path: + # if the driver binary is specified by user + # we assume it is important enough to not delete it + return + else: + timeout = 3 # stop trying after this many seconds + t = time.monotonic() + while True: + now = time.monotonic() + if now - t > timeout: + # we don't want to wait until the end of time + logger.debug( + "could not unlink %s in time (%d seconds)" + % (self.executable_path, timeout) + ) + break + try: + os.unlink(self.executable_path) + logger.debug("successfully unlinked %s" % self.executable_path) + break + except (OSError, RuntimeError, PermissionError): + time.sleep(0.1) + continue + except FileNotFoundError: + break diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py index 4c2affc..5b0abe4 100644 --- a/undetected_chromedriver/webelement.py +++ b/undetected_chromedriver/webelement.py @@ -14,8 +14,6 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement): """ - - @property def attrs(self): if not hasattr(self, "_attrs"):