From 2710213a7e89f12b8d21a4b0d18a828cb4148ae9 Mon Sep 17 00:00:00 2001 From: UltrafunkAmsterdam Date: Sun, 13 Mar 2022 23:05:22 +0100 Subject: [PATCH 1/4] Patcher: changed the way how patcher works (for those using multiple sessions/processes). when not specifying a executable_path (the default, and recommended!), the filename gets randomized to _chromedriver[.exe]. this should fix the issue for multiprocessing (although Chrome/driver itself has restrictions in this as well, see it using processhacker). As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.) find_chrome_executable: added google-chrome-stable to the list, as some distro's have this name. advanced_webelements: bool, optional, default: False makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment default webelement repr: advanced webelement repr )> note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch Chrome() parameters driver_executable_path=None ( = executable_path ) if you really need to specify your own chromedriver binary. (don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork) browser_executable_path=None ( = browser binary path ) to specify your browser in case you use exotic locations instead of the more default install folders advanced_elements=False if set to True, webelements get a nicer REPR showing. this is very convenient when working interactively (like ipython for example). )> instead of --- undetected_chromedriver/__init__.py | 25 +++++++++++------ undetected_chromedriver/patcher.py | 32 ++++++++++++++++++--- undetected_chromedriver/webelement.py | 40 +++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 undetected_chromedriver/webelement.py diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index e453432..fff8159 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -99,10 +99,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): def __init__( self, + options=None, user_data_dir=None, + driver_executable_path=None, browser_executable_path=None, port=0, - options=None, enable_cdp_events=False, service_args=None, desired_capabilities=None, @@ -125,10 +126,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): Parameters ---------- + options: ChromeOptions, optional, default: None - automatic useful defaults + this takes an instance of ChromeOptions, mainly to customize browser behavior. + anything other dan the default, for example extensions or startup options + are not supported in case of failure, and can probably lowers your undetectability. + + user_data_dir: str , optional, default: None (creates temp profile) if user_data_dir is a path to a valid chrome profile directory, use it, and turn off automatic removal mechanism at exit. + driver_executable_path: str, optional, default: None(=downloads and patches new binary) + browser_executable_path: str, optional, default: None - use find_chrome_executable Path to the browser executable. If not specified, make sure the executable's folder is in $PATH @@ -136,11 +145,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): port: int, optional, default: 0 port you would like the service to run, if left as 0, a free port will be found. - options: ChromeOptions, optional, default: None - automatic useful defaults - this takes an instance of ChromeOptions, mainly to customize browser behavior. - anything other dan the default, for example extensions or startup options - are not supported in case of failure, and can probably lowers your undetectability. - enable_cdp_events: bool, default: False :: currently for chrome only this enables the handling of wire messages @@ -205,12 +209,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): """ self.debug = debug patcher = Patcher( - executable_path=None, + executable_path=driver_executable_path, force=patcher_force_close, version_main=version_main, ) patcher.auto() - + self.patcher = patcher if not options: options = ChromeOptions() @@ -598,6 +602,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): break time.sleep(0.1) + # dereference patcher, so patcher can start cleaning up as well. + # this must come last, otherwise it will throw 'in use' errors + self.patcher = None + + def __del__(self): try: self.service.process.kill() diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index d53be59..d11b5f2 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -11,6 +11,8 @@ import sys import zipfile from distutils.version import LooseVersion from urllib.request import urlopen, urlretrieve +import secrets + logger = logging.getLogger(__name__) @@ -57,16 +59,19 @@ class Patcher(object): self.force = force self.executable_path = None + prefix = secrets.token_hex(8) if not executable_path: - self.executable_path = os.path.join(self.data_path, self.exe_name) + + self.executable_path = os.path.join(self.data_path, "_".join([prefix, self.exe_name])) if not IS_POSIX: if executable_path: if not executable_path[-4:] == ".exe": executable_path += ".exe" - self.zip_path = os.path.join(self.data_path, self.zip_name) + + # self.zip_path = os.path.join(self.data_path, self.zip_name) if not executable_path: self.executable_path = os.path.abspath( @@ -78,6 +83,9 @@ class Patcher(object): if executable_path: self._custom_exe_path = True self.executable_path = executable_path + self.data_path = os.path.dirname(executable_path) + + self.zip_path = os.path.join(os.path.dirname(self.executable_path), self.exe_name) self.version_main = version_main self.version_full = None @@ -169,10 +177,11 @@ class Patcher(object): except (FileNotFoundError, OSError): pass - os.makedirs(self.data_path, mode=0o755, exist_ok=True) + os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True) with zipfile.ZipFile(fp, mode="r") as zf: - zf.extract(self.exe_name, os.path.dirname(self.executable_path)) + zf.extract(self.exe_name, os.path.dirname(self.zip_path)) + os.rename(self.zip_path, self.executable_path) os.remove(fp) os.chmod(self.executable_path, 0o755) return self.executable_path @@ -237,3 +246,18 @@ class Patcher(object): self.__class__.__name__, self.executable_path, ) + + def __del__(self): + print('patcher__del__ called') + try: + if not self._custom_exe_path: + # we will not delete custom exe paths. + # but this also voids support. + # downloading and patching makes sure you never use the same $cdc values, see patch_exe() + # after all, this program has a focus on detectability... + os.unlink(self.executable_path) + + # except (OSError, RuntimeError, PermissionError): + # pass + except: + raise diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py new file mode 100644 index 0000000..d1bcbee --- /dev/null +++ b/undetected_chromedriver/webelement.py @@ -0,0 +1,40 @@ +import selenium.webdriver.remote.webelement + + +class WebElement(selenium.webdriver.remote.webelement.WebElement): + """ + Custom WebElement class which makes it easier to view elements when + working in an interactive environment. + + standard webelement repr: + + + using this WebElement class: + )> + + """ + _attrs = {} + + @property + def attrs(self): + if not hasattr(self, "_attrs"): + self._attrs = self._parent.execute_script( + """ + var items = {}; + for (index = 0; index < arguments[0].attributes.length; ++index) + { + items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value + }; + return items; + """, + self, + ) + return self._attrs + + def __repr__(self): + strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()]) + if strattrs: + strattrs = " " + strattrs + return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>" + + From 087fa8d732936f7da507f1e9e86c7ff912370abf Mon Sep 17 00:00:00 2001 From: UltrafunkAmsterdam Date: Sun, 13 Mar 2022 23:05:22 +0100 Subject: [PATCH 2/4] Patcher: changed the way how patcher works (for those using multiple sessions/processes). when not specifying a executable_path (the default, and recommended!), the filename gets randomized to _chromedriver[.exe]. this should fix the issue for multiprocessing (although Chrome/driver itself has restrictions in this as well, see it using processhacker). As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.) find_chrome_executable: added google-chrome-stable to the list, as some distro's have this name. advanced_webelements: bool, optional, default: False makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment default webelement repr: advanced webelement repr )> note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch Chrome() parameters driver_executable_path=None ( = executable_path ) if you really need to specify your own chromedriver binary. (don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork) browser_executable_path=None ( = browser binary path ) to specify your browser in case you use exotic locations instead of the more default install folders advanced_elements=False if set to True, webelements get a nicer REPR showing. this is very convenient when working interactively (like ipython for example). )> instead of --- undetected_chromedriver/__init__.py | 25 +++++++++++------ undetected_chromedriver/patcher.py | 34 ++++++++++++++++++++--- undetected_chromedriver/webelement.py | 40 +++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 12 deletions(-) create mode 100644 undetected_chromedriver/webelement.py diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index e453432..fff8159 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -99,10 +99,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): def __init__( self, + options=None, user_data_dir=None, + driver_executable_path=None, browser_executable_path=None, port=0, - options=None, enable_cdp_events=False, service_args=None, desired_capabilities=None, @@ -125,10 +126,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): Parameters ---------- + options: ChromeOptions, optional, default: None - automatic useful defaults + this takes an instance of ChromeOptions, mainly to customize browser behavior. + anything other dan the default, for example extensions or startup options + are not supported in case of failure, and can probably lowers your undetectability. + + user_data_dir: str , optional, default: None (creates temp profile) if user_data_dir is a path to a valid chrome profile directory, use it, and turn off automatic removal mechanism at exit. + driver_executable_path: str, optional, default: None(=downloads and patches new binary) + browser_executable_path: str, optional, default: None - use find_chrome_executable Path to the browser executable. If not specified, make sure the executable's folder is in $PATH @@ -136,11 +145,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): port: int, optional, default: 0 port you would like the service to run, if left as 0, a free port will be found. - options: ChromeOptions, optional, default: None - automatic useful defaults - this takes an instance of ChromeOptions, mainly to customize browser behavior. - anything other dan the default, for example extensions or startup options - are not supported in case of failure, and can probably lowers your undetectability. - enable_cdp_events: bool, default: False :: currently for chrome only this enables the handling of wire messages @@ -205,12 +209,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): """ self.debug = debug patcher = Patcher( - executable_path=None, + executable_path=driver_executable_path, force=patcher_force_close, version_main=version_main, ) patcher.auto() - + self.patcher = patcher if not options: options = ChromeOptions() @@ -598,6 +602,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): break time.sleep(0.1) + # dereference patcher, so patcher can start cleaning up as well. + # this must come last, otherwise it will throw 'in use' errors + self.patcher = None + + def __del__(self): try: self.service.process.kill() diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index d53be59..e36d5e7 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -11,6 +11,8 @@ import sys import zipfile from distutils.version import LooseVersion from urllib.request import urlopen, urlretrieve +import secrets + logger = logging.getLogger(__name__) @@ -57,16 +59,20 @@ class Patcher(object): self.force = force self.executable_path = None + prefix = secrets.token_hex(8) if not executable_path: - self.executable_path = os.path.join(self.data_path, self.exe_name) + + self.executable_path = os.path.join( + self.data_path, "_".join([prefix, self.exe_name]) + ) if not IS_POSIX: if executable_path: if not executable_path[-4:] == ".exe": executable_path += ".exe" - self.zip_path = os.path.join(self.data_path, self.zip_name) + # self.zip_path = os.path.join(self.data_path, self.zip_name) if not executable_path: self.executable_path = os.path.abspath( @@ -78,6 +84,11 @@ class Patcher(object): if executable_path: self._custom_exe_path = True self.executable_path = executable_path + self.data_path = os.path.dirname(executable_path) + + self.zip_path = os.path.join( + os.path.dirname(self.executable_path), self.exe_name + ) self.version_main = version_main self.version_full = None @@ -169,10 +180,11 @@ class Patcher(object): except (FileNotFoundError, OSError): pass - os.makedirs(self.data_path, mode=0o755, exist_ok=True) + os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True) with zipfile.ZipFile(fp, mode="r") as zf: - zf.extract(self.exe_name, os.path.dirname(self.executable_path)) + zf.extract(self.exe_name, os.path.dirname(self.zip_path)) + os.rename(self.zip_path, self.executable_path) os.remove(fp) os.chmod(self.executable_path, 0o755) return self.executable_path @@ -237,3 +249,17 @@ class Patcher(object): self.__class__.__name__, self.executable_path, ) + + def __del__(self): + try: + if not self._custom_exe_path: + # we will not delete custom exe paths. + # but this also voids support. + # downloading and patching makes sure you never use the same $cdc values, see patch_exe() + # after all, this program has a focus on detectability... + os.unlink(self.executable_path) + + # except (OSError, RuntimeError, PermissionError): + # pass + except: + raise diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py new file mode 100644 index 0000000..d1bcbee --- /dev/null +++ b/undetected_chromedriver/webelement.py @@ -0,0 +1,40 @@ +import selenium.webdriver.remote.webelement + + +class WebElement(selenium.webdriver.remote.webelement.WebElement): + """ + Custom WebElement class which makes it easier to view elements when + working in an interactive environment. + + standard webelement repr: + + + using this WebElement class: + )> + + """ + _attrs = {} + + @property + def attrs(self): + if not hasattr(self, "_attrs"): + self._attrs = self._parent.execute_script( + """ + var items = {}; + for (index = 0; index < arguments[0].attributes.length; ++index) + { + items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value + }; + return items; + """, + self, + ) + return self._attrs + + def __repr__(self): + strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()]) + if strattrs: + strattrs = " " + strattrs + return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>" + + From fa007b1742f1527191b2cdf03c0077845e5b96eb Mon Sep 17 00:00:00 2001 From: UltrafunkAmsterdam Date: Mon, 14 Mar 2022 00:22:13 +0100 Subject: [PATCH 3/4] added quic test cloudflare script for windows --- undetected_chromedriver/webelement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py index c944db8..4c2affc 100644 --- a/undetected_chromedriver/webelement.py +++ b/undetected_chromedriver/webelement.py @@ -36,4 +36,4 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement): strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()]) if strattrs: strattrs = " " + strattrs - return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>" + return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>" From 5c0d2e4cb8d404eb40361b9629643027a40cae60 Mon Sep 17 00:00:00 2001 From: Leon Date: Mon, 14 Mar 2022 00:37:12 +0100 Subject: [PATCH 4/4] Update __init__.py --- undetected_chromedriver/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 39617e5..5a50601 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -669,7 +669,10 @@ def find_chrome_executable(): candidates.add(os.sep.join((item, subitem))) if "darwin" in sys.platform: candidates.update( - ["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"] + [ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium" + ] ) else: for item in map( @@ -679,6 +682,7 @@ def find_chrome_executable(): "Google/Chrome/Application", "Google/Chrome Beta/Application", "Google/Chrome Canary/Application", + ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: