diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index e453432..5a50601 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -18,7 +18,9 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) """ -__version__ = "3.1.2" + +__version__ = "3.1.5r2" + import json import logging @@ -99,13 +101,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): def __init__( self, + options=None, user_data_dir=None, + driver_executable_path=None, browser_executable_path=None, port=0, - options=None, enable_cdp_events=False, service_args=None, desired_capabilities=None, + advanced_elements=False, service_log_path=None, keep_alive=True, log_level=0, @@ -125,10 +129,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): Parameters ---------- + options: ChromeOptions, optional, default: None - automatic useful defaults + this takes an instance of ChromeOptions, mainly to customize browser behavior. + anything other dan the default, for example extensions or startup options + are not supported in case of failure, and can probably lowers your undetectability. + + user_data_dir: str , optional, default: None (creates temp profile) if user_data_dir is a path to a valid chrome profile directory, use it, and turn off automatic removal mechanism at exit. + driver_executable_path: str, optional, default: None(=downloads and patches new binary) + browser_executable_path: str, optional, default: None - use find_chrome_executable Path to the browser executable. If not specified, make sure the executable's folder is in $PATH @@ -136,11 +148,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): port: int, optional, default: 0 port you would like the service to run, if left as 0, a free port will be found. - options: ChromeOptions, optional, default: None - automatic useful defaults - this takes an instance of ChromeOptions, mainly to customize browser behavior. - anything other dan the default, for example extensions or startup options - are not supported in case of failure, and can probably lowers your undetectability. - enable_cdp_events: bool, default: False :: currently for chrome only this enables the handling of wire messages @@ -149,12 +156,26 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): driver.add_cdp_listener("Network.dataReceived", yourcallback) # yourcallback is an callable which accepts exactly 1 dict as parameter + service_args: list of str, optional, default: None arguments to pass to the driver service desired_capabilities: dict, optional, default: None - auto from config Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref". + advanced_elements: bool, optional, default: False + makes it easier to recognize elements like you know them from html/browser inspection, especially when working + in an interactive environment + + default webelement repr: + + + advanced webelement repr + )> + + note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time. + + service_log_path: str, optional, default: None path to log information from the driver. @@ -205,12 +226,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): """ self.debug = debug patcher = Patcher( - executable_path=None, + executable_path=driver_executable_path, force=patcher_force_close, version_main=version_main, ) patcher.auto() - + self.patcher = patcher if not options: options = ChromeOptions() @@ -357,7 +378,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): desired_capabilities = options.to_capabilities() if not use_subprocess: - self.browser_pid = start_detached(options.binary_location, *options.arguments) + self.browser_pid = start_detached( + options.binary_location, *options.arguments + ) else: browser = subprocess.Popen( [options.binary_location, *options.arguments], @@ -368,8 +391,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): ) self.browser_pid = browser.pid - - super(Chrome, self).__init__( executable_path=patcher.executable_path, port=port, @@ -391,6 +412,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): reactor.start() self.reactor = reactor + if advanced_elements: + from .webelement import WebElement + self._web_element_cls = WebElement + if options.headless: self._configure_headless() @@ -530,9 +555,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): """ if not hasattr(self, "cdp"): from .cdp import CDP - - self.cdp = CDP(self.options) - self.cdp.tab_new(url) + cdp = CDP(self.options) + cdp.tab_new(url) def reconnect(self, timeout=0.1): try: @@ -560,7 +584,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): def quit(self): logger.debug("closing webdriver") - if hasattr(self, 'service') and getattr(self.service, 'process', None): + if hasattr(self, "service") and getattr(self.service, "process", None): self.service.process.kill() try: if self.reactor and isinstance(self.reactor, Reactor): @@ -598,10 +622,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): break time.sleep(0.1) + # dereference patcher, so patcher can start cleaning up as well. + # this must come last, otherwise it will throw 'in use' errors + self.patcher = None + def __del__(self): try: self.service.process.kill() - except: + except: # noqa pass self.quit() @@ -631,11 +659,20 @@ def find_chrome_executable(): candidates = set() if IS_POSIX: for item in os.environ.get("PATH").split(os.pathsep): - for subitem in ("google-chrome", "chromium", "chromium-browser", "chrome"): + for subitem in ( + "google-chrome", + "chromium", + "chromium-browser", + "chrome", + "google-chrome-stable", + ): candidates.add(os.sep.join((item, subitem))) if "darwin" in sys.platform: candidates.update( - ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"] + [ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium" + ] ) else: for item in map( @@ -645,6 +682,7 @@ def find_chrome_executable(): "Google/Chrome/Application", "Google/Chrome Beta/Application", "Google/Chrome Canary/Application", + ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index d53be59..4bc13e7 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -11,6 +11,8 @@ import sys import zipfile from distutils.version import LooseVersion from urllib.request import urlopen, urlretrieve +import secrets + logger = logging.getLogger(__name__) @@ -57,9 +59,12 @@ class Patcher(object): self.force = force self.executable_path = None + prefix = secrets.token_hex(8) if not executable_path: - self.executable_path = os.path.join(self.data_path, self.exe_name) + self.executable_path = os.path.join( + self.data_path, "_".join([prefix, self.exe_name]) + ) if not IS_POSIX: if executable_path: @@ -119,7 +124,6 @@ class Patcher(object): self.version_main = release.version[0] self.version_full = release self.unzip_package(self.fetch_package()) - # i.patch() return self.patch() def patch(self): @@ -169,10 +173,13 @@ class Patcher(object): except (FileNotFoundError, OSError): pass - os.makedirs(self.data_path, mode=0o755, exist_ok=True) - + os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True) with zipfile.ZipFile(fp, mode="r") as zf: - zf.extract(self.exe_name, os.path.dirname(self.executable_path)) + zf.extract(self.exe_name, os.path.dirname(self.zip_path)) + os.rename( + os.path.join(self.data_path, self.exe_name), + self.executable_path + ) os.remove(fp) os.chmod(self.executable_path, 0o755) return self.executable_path @@ -237,3 +244,17 @@ class Patcher(object): self.__class__.__name__, self.executable_path, ) + + def __del__(self): + try: + if not self._custom_exe_path: + # we will not delete custom exe paths. + # but this also voids support. + # downloading and patching makes sure you never use the same $cdc values, see patch_exe() + # after all, this program has a focus on detectability... + os.unlink(self.executable_path) + + # except (OSError, RuntimeError, PermissionError): + # pass + except: + raise diff --git a/undetected_chromedriver/tests/quick_test_cf.cmd b/undetected_chromedriver/tests/quick_test_cf.cmd new file mode 100644 index 0000000..00a4810 --- /dev/null +++ b/undetected_chromedriver/tests/quick_test_cf.cmd @@ -0,0 +1,80 @@ +@echo off +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:: +:: QUICK TEST FOR UNDETECTED-CHROMEDRIVER TO CHECK IF CLOUDFLARE IAUAM CAN BE PASSED +:: +:: To make it as clean as possible without interfering packages or plugins: +:: - this creates a new python virtual environment +:: - installs undetected chromedriver +:: - executes a test +:: - cleans up the virtual environment +:: +:: this is for Windows only currently +:: +::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + + +set uc_test_dir=%temp%\ucvenv + +set curdir=%CD% +set prog= + + +:: =================== +:main + +call :hasprog "conda" +if [%prog%]==[conda] ( + echo "conda is found, activating..." + call %prog% activate + goto :next + exit +) + +call :hasprog "python" +if [%prog%]==[python] ( + echo "python is found" + goto :next + exit +) + +echo "no python interpreter or conda could be found. exiting" +exit 1 + + + +:: =================== +:hasprog +call %~1 --help >nul 2>&1 +if ERRORLEVEL 0 ( + set prog=%~1 +) +exit /B + + + +:: =================== +:next + +mkdir %uc_test_dir% +echo "created temp directory for the virtual environment: %uc_test_dir%" + +python -m venv %uc_test_dir% + +set pythonv=%uc_test_dir%\scripts\python +%pythonv% -m pip install -U undetected-chromedriver +%pythonv% -c "exec(\"import time,logging,undetected_chromedriver as uc,selenium.webdriver.support.expected_conditions as ec,selenium.webdriver.support.wait as wwait;logging.basicConfig(level=10);dr=uc.Chrome();dr.get('https://nowsecure.nl');wwait.WebDriverWait(dr,15).until(ec.visibility_of_element_located(('css selector','.hystericalbg')));print('====================WORKING=============');time.sleep(3)\")" + + +if [%prog%]==[conda] ( + echo "deactivating conda env" + %prog% deactivate +) + +cd %curdir% +rd /S /Q %uc_test_dir% +echo "cleaning up temp directory for the virtual environment: %uc_test_dir%" + + + + diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py new file mode 100644 index 0000000..4c2affc --- /dev/null +++ b/undetected_chromedriver/webelement.py @@ -0,0 +1,39 @@ +import selenium.webdriver.remote.webelement + + +class WebElement(selenium.webdriver.remote.webelement.WebElement): + """ + Custom WebElement class which makes it easier to view elements when + working in an interactive environment. + + standard webelement repr: + + + using this WebElement class: + )> + + """ + + + + @property + def attrs(self): + if not hasattr(self, "_attrs"): + self._attrs = self._parent.execute_script( + """ + var items = {}; + for (index = 0; index < arguments[0].attributes.length; ++index) + { + items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value + }; + return items; + """, + self, + ) + return self._attrs + + def __repr__(self): + strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()]) + if strattrs: + strattrs = " " + strattrs + return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"