Merge pull request #543 from ultrafunkamsterdam/3.1.5

3.1.5
2022-03-14 00:40:55 +01:00 · 2022-03-14 00:40:55 +01:00 · fdd8e3c705
parent b13d94e08a 5c0d2e4cb8
commit fdd8e3c705
4 changed files with 202 additions and 24 deletions
--- a/undetected_chromedriver/init.py
+++ b/undetected_chromedriver/init.py
@ -18,7 +18,9 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)

 """

-__version__ = "3.1.2"
+
+__version__ = "3.1.5r2"
+

 import json
 import logging
@ -99,13 +101,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):

    def __init__(
        self,
+        options=None,
        user_data_dir=None,
+        driver_executable_path=None,
        browser_executable_path=None,
        port=0,
-        options=None,
        enable_cdp_events=False,
        service_args=None,
        desired_capabilities=None,
+        advanced_elements=False,
        service_log_path=None,
        keep_alive=True,
        log_level=0,
@ -125,10 +129,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        Parameters
        ----------

+        options: ChromeOptions, optional, default: None - automatic useful defaults
+            this takes an instance of ChromeOptions, mainly to customize browser behavior.
+            anything other dan the default, for example extensions or startup options
+            are not supported in case of failure, and can probably lowers your undetectability.
+
+
        user_data_dir: str , optional, default: None (creates temp profile)
            if user_data_dir is a path to a valid chrome profile directory, use it,
            and turn off automatic removal mechanism at exit.

+        driver_executable_path: str, optional, default: None(=downloads and patches new binary)
+
        browser_executable_path: str, optional, default: None - use find_chrome_executable
            Path to the browser executable.
            If not specified, make sure the executable's folder is in $PATH
@ -136,11 +148,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        port: int, optional, default: 0
            port you would like the service to run, if left as 0, a free port will be found.

-        options: ChromeOptions, optional, default: None - automatic useful defaults
-            this takes an instance of ChromeOptions, mainly to customize browser behavior.
-            anything other dan the default, for example extensions or startup options
-            are not supported in case of failure, and can probably lowers your undetectability.
-
        enable_cdp_events: bool, default: False
            :: currently for chrome only
            this enables the handling of wire messages
@ -149,12 +156,26 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
                driver.add_cdp_listener("Network.dataReceived", yourcallback)
                # yourcallback is an callable which accepts exactly 1 dict as parameter

+
        service_args: list of str, optional, default: None
            arguments to pass to the driver service

        desired_capabilities: dict, optional, default: None - auto from config
            Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".

+        advanced_elements:  bool, optional, default: False
+            makes it easier to recognize elements like you know them from html/browser inspection, especially when working
+            in an interactive environment
+
+            default webelement repr:
+            <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
+
+            advanced webelement repr
+            <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
+
+            note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
+
+
        service_log_path: str, optional, default: None
             path to log information from the driver.

@ -205,12 +226,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        """
        self.debug = debug
        patcher = Patcher(
-            executable_path=None,
+            executable_path=driver_executable_path,
            force=patcher_force_close,
            version_main=version_main,
        )
        patcher.auto()
-
+        self.patcher = patcher
        if not options:
            options = ChromeOptions()

@ -357,7 +378,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            desired_capabilities = options.to_capabilities()

        if not use_subprocess:
-            self.browser_pid = start_detached(options.binary_location, *options.arguments)
+            self.browser_pid = start_detached(
+                options.binary_location, *options.arguments
+            )
        else:
            browser = subprocess.Popen(
                [options.binary_location, *options.arguments],
@ -368,8 +391,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            )
            self.browser_pid = browser.pid

-
-
        super(Chrome, self).__init__(
            executable_path=patcher.executable_path,
            port=port,
@ -391,6 +412,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            reactor.start()
            self.reactor = reactor

+        if advanced_elements:
+            from .webelement import WebElement
+            self._web_element_cls = WebElement
+
        if options.headless:
            self._configure_headless()

@ -530,9 +555,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        """
        if not hasattr(self, "cdp"):
            from .cdp import CDP
-
-            self.cdp = CDP(self.options)
-        self.cdp.tab_new(url)
+            cdp = CDP(self.options)
+            cdp.tab_new(url)

    def reconnect(self, timeout=0.1):
        try:
@ -560,7 +584,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):

    def quit(self):
        logger.debug("closing webdriver")
-        if hasattr(self, 'service') and getattr(self.service, 'process', None):
+        if hasattr(self, "service") and getattr(self.service, "process", None):
            self.service.process.kill()
        try:
            if self.reactor and isinstance(self.reactor, Reactor):
@ -598,10 +622,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
                    break
                time.sleep(0.1)

+        # dereference patcher, so patcher can start cleaning up as well.
+        # this must come last, otherwise it will throw 'in use' errors
+        self.patcher = None
+
    def __del__(self):
        try:
            self.service.process.kill()
-        except:
+        except:  # noqa
            pass
        self.quit()

@ -631,11 +659,20 @@ def find_chrome_executable():
    candidates = set()
    if IS_POSIX:
        for item in os.environ.get("PATH").split(os.pathsep):
-            for subitem in ("google-chrome", "chromium", "chromium-browser", "chrome"):
+            for subitem in (
+                "google-chrome",
+                "chromium",
+                "chromium-browser",
+                "chrome",
+                "google-chrome-stable",
+            ):
                candidates.add(os.sep.join((item, subitem)))
        if "darwin" in sys.platform:
            candidates.update(
-                ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"]
+                [
+                  "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+                  "/Applications/Chromium.app/Contents/MacOS/Chromium"
+                ]
            )
    else:
        for item in map(
@ -645,6 +682,7 @@ def find_chrome_executable():
                "Google/Chrome/Application",
                "Google/Chrome Beta/Application",
                "Google/Chrome Canary/Application",
+                   
            ):
                candidates.add(os.sep.join((item, subitem, "chrome.exe")))
    for candidate in candidates:
--- a/undetected_chromedriver/patcher.py
+++ b/undetected_chromedriver/patcher.py
@ -11,6 +11,8 @@ import sys
 import zipfile
 from distutils.version import LooseVersion
 from urllib.request import urlopen, urlretrieve
+import secrets
+

 logger = logging.getLogger(__name__)

@ -57,9 +59,12 @@ class Patcher(object):

        self.force = force
        self.executable_path = None
+        prefix = secrets.token_hex(8)

        if not executable_path:
-            self.executable_path = os.path.join(self.data_path, self.exe_name)
+            self.executable_path = os.path.join(
+                self.data_path, "_".join([prefix, self.exe_name])
+            )

        if not IS_POSIX:
            if executable_path:
@ -119,7 +124,6 @@ class Patcher(object):
        self.version_main = release.version[0]
        self.version_full = release
        self.unzip_package(self.fetch_package())
-        # i.patch()
        return self.patch()

    def patch(self):
@ -169,10 +173,13 @@ class Patcher(object):
        except (FileNotFoundError, OSError):
            pass

-        os.makedirs(self.data_path, mode=0o755, exist_ok=True)
-
+        os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True)
        with zipfile.ZipFile(fp, mode="r") as zf:
-            zf.extract(self.exe_name, os.path.dirname(self.executable_path))
+            zf.extract(self.exe_name, os.path.dirname(self.zip_path))
+        os.rename(
+            os.path.join(self.data_path, self.exe_name),
+            self.executable_path
+        )
        os.remove(fp)
        os.chmod(self.executable_path, 0o755)
        return self.executable_path
@ -237,3 +244,17 @@ class Patcher(object):
            self.__class__.__name__,
            self.executable_path,
        )
+
+    def __del__(self):
+        try:
+            if not self._custom_exe_path:
+                # we will not delete custom exe paths.
+                # but this also voids support.
+                # downloading and patching makes sure you never use the same $cdc values, see patch_exe()
+                # after all, this program has a focus on detectability...
+                os.unlink(self.executable_path)
+
+        # except (OSError, RuntimeError, PermissionError):
+        #     pass
+        except:
+            raise
--- a/undetected_chromedriver/tests/quick_test_cf.cmd
+++ b/undetected_chromedriver/tests/quick_test_cf.cmd
@ -0,0 +1,80 @@
+@echo off
+:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+::
+::   QUICK TEST FOR UNDETECTED-CHROMEDRIVER TO CHECK IF CLOUDFLARE IAUAM CAN BE PASSED
+::
+::   To make it as clean as possible without interfering packages or plugins:
+::     - this creates a new python virtual environment
+::     - installs undetected chromedriver
+::     - executes a test
+::     - cleans up the virtual environment
+::
+::   this is for Windows only currently
+::
+:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
+
+
+set uc_test_dir=%temp%\ucvenv
+
+set curdir=%CD%
+set prog=
+
+
+:: ===================
+:main
+
+call :hasprog "conda"
+if [%prog%]==[conda]  (
+    echo "conda is found, activating..."
+    call %prog% activate
+    goto :next
+    exit
+)
+
+call :hasprog "python"
+if [%prog%]==[python] (
+    echo "python is found"
+    goto :next
+    exit
+)
+
+echo "no python interpreter or conda could be found. exiting"
+exit 1
+
+
+
+:: ===================
+:hasprog
+call %~1 --help  >nul 2>&1
+if ERRORLEVEL 0 (
+    set prog=%~1
+)
+exit /B
+
+
+
+:: ===================
+:next
+
+mkdir %uc_test_dir%
+echo "created temp directory for the virtual environment: %uc_test_dir%"
+
+python -m venv %uc_test_dir%
+
+set pythonv=%uc_test_dir%\scripts\python
+%pythonv% -m pip install -U undetected-chromedriver
+%pythonv% -c  "exec(\"import time,logging,undetected_chromedriver as uc,selenium.webdriver.support.expected_conditions as ec,selenium.webdriver.support.wait as wwait;logging.basicConfig(level=10);dr=uc.Chrome();dr.get('https://nowsecure.nl');wwait.WebDriverWait(dr,15).until(ec.visibility_of_element_located(('css selector','.hystericalbg')));print('====================WORKING=============');time.sleep(3)\")"
+
+
+if [%prog%]==[conda] (
+    echo "deactivating conda env"
+    %prog% deactivate
+)
+
+cd %curdir%
+rd /S /Q %uc_test_dir%
+echo "cleaning up temp directory for the virtual environment: %uc_test_dir%"
+
+
+
+
--- a/undetected_chromedriver/webelement.py
+++ b/undetected_chromedriver/webelement.py
@ -0,0 +1,39 @@
+import selenium.webdriver.remote.webelement
+
+
+class WebElement(selenium.webdriver.remote.webelement.WebElement):
+    """
+    Custom WebElement class which makes it easier to view elements when
+    working in an interactive environment.
+
+    standard webelement repr:
+    <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
+
+    using this WebElement class:
+    <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
+
+    """
+
+
+
+    @property
+    def attrs(self):
+        if not hasattr(self, "_attrs"):
+            self._attrs = self._parent.execute_script(
+                """
+                var items = {}; 
+                for (index = 0; index < arguments[0].attributes.length; ++index) 
+                {
+                 items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value 
+                }; 
+                return items;
+                """,
+                self,
+            )
+        return self._attrs
+
+    def __repr__(self):
+        strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
+        if strattrs:
+            strattrs = " " + strattrs
+        return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"