diff --git a/README.md b/README.md index d777816..3897988 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ - # undetected_chromedriver # https://github.com/ultrafunkamsterdam/undetected-chromedriver @@ -14,17 +13,15 @@ Automatically downloads the driver binary and patches it. ### 3.2.0 ### -* added an example containing some typical webdriver code, answers to commonly asked questions, -pitfalls + showcasing some tricks to ditch the need for multithreading. +* added an example containing some typical webdriver code, answers to commonly asked questions, pitfalls + showcasing some tricks to ditch + the need for multithreading. -### [>>>> example code here <<<<](https://github.com/ultrafunkamsterdam/undetected-chromedriver/blob/master/example/example.py) +### [>>>> example code here <<<<](https://github.com/ultrafunkamsterdam/undetected-chromedriver/blob/master/example/example.py) -* added WebElement.click_safe() method, which you can try in case you get detected - after clicking a link. This is not guaranteed t o work. +* added WebElement.click_safe() method, which you can try in case you get detected after clicking a link. This is not guaranteed t o work. * added WebElement.children(self, tag=None, recursive=False) - to easily get/find child nodes. - example: + to easily get/find child nodes. example: ``` body = driver.find_element('tag name', 'body') @@ -32,101 +29,96 @@ pitfalls + showcasing some tricks to ditch the need for multithreading. images = body.children()[6].children('img', True) srcs = list(map(lambda _:_.attrs.get('src'), images)) ``` - -* added example.py where i can point people at - when asking silly questions + +* added example.py where i can point people at when asking silly questions (no, its actually quite cool, everyone should see it) * added support for lambda platform * added support for x86_32 * added support for systems reporting as linux2 * some refactoring - ### 3.1.6 ### + ### still passing strong ### -- use_subprocess now defaults to True. too many people don't understand multiprocessing and __name__ == '__main__, and after testing, it seems not to make a difference anymore in chrome 104+ +- use_subprocess now defaults to True. too many people don't understand multiprocessing and __name__ == '__main__, and after testing, it + seems not to make a difference anymore in chrome 104+ - added no_sandbox, which defaults to True, and this without the annoying "you are using unsecure command line ..." bar. -- update [Docker image](https://hub.docker.com/r/ultrafunk/undetected-chromedriver). - you can now vnc or rdp into your container to see the actual browser window -[![demo](https://i.imgur.com/51Ang6R.gif)](https://i.imgur.com/W7vriN9.mp4) +- update [Docker image](https://hub.docker.com/r/ultrafunk/undetected-chromedriver). you can now vnc or rdp into your container to see the + actual browser window + [![demo](https://i.imgur.com/51Ang6R.gif)](https://i.imgur.com/W7vriN9.mp4) - of course, "regular" mode works as well -[![demo](https://i.imgur.com/2qSNyuK.gif)](https://i.imgur.com/2qSNyuK.mp4) - + [![demo](https://i.imgur.com/2qSNyuK.gif)](https://i.imgur.com/2qSNyuK.mp4) ### 3.1.0 ### - **this version `might` break your code, test before update!** +**this version `might` break your code, test before update!** - - **added new anti-detection logic!** - - - v2 has become the main module, so no need for references to v2 anymore. this mean you can now simply use: - ```python - import undetected_chromedriver as uc - driver = uc.Chrome() - driver.get('https://nowsecure.nl') - ``` - for backwards compatibility, v2 is not removed, but aliassed to the main module. - - - Fixed "welcome screen" nagging on non-windows OS-es. - For those nagfetishists who ❤ welcome screens and feeding google with even more data, use Chrome(suppress_welcome=False). +- **added new anti-detection logic!** - - replaced `executable_path` in constructor in favor of `browser_executable_path` - which should not be used unless you are the edge case (yep, you are) who can't add your custom chrome installation folder to your PATH environment variable, or have an army of different browsers/versions and automatic lookup returns the wrong browser +- v2 has become the main module, so no need for references to v2 anymore. this mean you can now simply use: + ```python + import undetected_chromedriver as uc + driver = uc.Chrome() + driver.get('https://nowsecure.nl') + ``` + for backwards compatibility, v2 is not removed, but aliassed to the main module. - - "v1" (?) moved to _compat for now. - - - fixed dependency versions - - - ChromeOptions custom handling removed, so it is compatible with `webdriver.chromium.options.ChromiumOptions`. +- Fixed "welcome screen" nagging on non-windows OS-es. For those nagfetishists who ❤ welcome screens and feeding google with even more data, + use Chrome(suppress_welcome=False). - - removed Chrome.get() fu and restored back to "almost" original: - - no `with` statements needed anymore, although it will still - work for the sake of backward-compatibility. - - no sleeps, stop-start-sessions, delays, or async cdp black magic! - - this will solve a lot of other "issues" as well. +- replaced `executable_path` in constructor in favor of `browser_executable_path` + which should not be used unless you are the edge case (yep, you are) who can't add your custom chrome installation folder to your PATH + environment variable, or have an army of different browsers/versions and automatic lookup returns the wrong browser - - test success to date: 100% - - - just to mention it another time, since some people have hard time reading: - **headless is still WIP. Raising issues is needless** - - +- "v1" (?) moved to _compat for now. +- fixed dependency versions +- ChromeOptions custom handling removed, so it is compatible with `webdriver.chromium.options.ChromiumOptions`. +- removed Chrome.get() fu and restored back to "almost" original: + - no `with` statements needed anymore, although it will still work for the sake of backward-compatibility. + - no sleeps, stop-start-sessions, delays, or async cdp black magic! + - this will solve a lot of other "issues" as well. + +- test success to date: 100% + +- just to mention it another time, since some people have hard time reading: + **headless is still WIP. Raising issues is needless** # 3.0.4 changes # - - change process creation behavior to be fully detached - - changed .get(url) method to always use the contextmanager - - changed .get(url) method to use cdp under the hood. - ... the `with` statement is not necessary anymore .. - - - todo: work towards asyncification and selenium 4 - - #### words of wisdom: #### - Whenever you encounter the daunted - - ```from session not created: This version of ChromeDriver only supports Chrome version 96 # or what ever version``` - - the solution is simple: +- change process creation behavior to be fully detached +- changed .get(url) method to always use the contextmanager +- changed .get(url) method to use cdp under the hood. + + ... the `with` statement is not necessary anymore .. + +- todo: work towards asyncification and selenium 4 + +#### words of wisdom: #### + +Whenever you encounter the daunted + +```from session not created: This version of ChromeDriver only supports Chrome version 96 # or what ever version``` + +the solution is simple: + ```python - import undetected_chromedriver.v2 as uc - driver = uc.Chrome(version_main=95) + import undetected_chromedriver as uc + driver = uc.Chrome( version_main = 95 ) ``` - - **July 2021: Currently busy implementing selenium 4 for undetected-chromedriver** **newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255** - ## Installation ## + ``` pip install undetected-chromedriver ``` @@ -138,42 +130,46 @@ To prevent unnecessary hair-pulling and issue-raising, please mind the **[import
### easy ### -Literally, this is all you have to do. -Settings are included and your browser executable is found automagically. -This is also the snippet i recommend using in case you experience an issue. -```python -import undetected_chromedriver.v2 as uc -driver = uc.Chrome() -driver.get('https://nowsecure.nl') # my own test test site with max anti-bot protection -``` - -### more advanced way, including setting profie folder ### -Literally, this is all you have to do. -If a specified folder does not exist, a NEW profile is created. -Data dirs which are specified like this will not be autoremoved on exit. +Literally, this is all you have to do. Settings are included and your browser executable is found automagically. This is also the snippet i +recommend using in case you experience an issue. ```python import undetected_chromedriver as uc + + +driver = uc.Chrome() +driver.get( 'https://nowsecure.nl' ) # my own test test site with max anti-bot protection +``` + +### more advanced way, including setting profie folder ### + +Literally, this is all you have to do. If a specified folder does not exist, a NEW profile is created. Data dirs which are specified like +this will not be autoremoved on exit. + +```python +import undetected_chromedriver as uc + + options = uc.ChromeOptions() # setting profile options.user_data_dir = "c:\\temp\\profile" # use specific (older) version -driver = uc.Chrome(options=options, version_main=94) # version_main allows to specify your chrome version instead of following chrome global version +driver = uc.Chrome( + options = options , version_main = 94 + ) # version_main allows to specify your chrome version instead of following chrome global version -driver.get('https://nowsecure.nl') # my own test test site with max anti-bot protection +driver.get( 'https://nowsecure.nl' ) # my own test test site with max anti-bot protection ``` - ### expert mode, including Devtool/Wire events ### -Literally, this is all you have to do. -You can now listen and subscribe to the low level devtools-protocol. -I just recently found out that is also on planning for future release of the official chromedriver. -However i implemented my own for now. Since i needed it myself for investigation. +Literally, this is all you have to do. You can now listen and subscribe to the low level devtools-protocol. I just recently found out that +is also on planning for future release of the official chromedriver. However i implemented my own for now. Since i needed it myself for +investigation. ```python @@ -1127,89 +1123,98 @@ driver.get('https://nowsecure.nl') # hopefullly you get the idea. ``` - - - -

#### the easy way (v1 old stuff) #### + ```python import undetected_chromedriver as uc + + driver = uc.Chrome() -driver.get('https://distilnetworks.com') +driver.get( 'https://distilnetworks.com' ) ``` - - - #### target specific chrome version (v1 old stuff) #### + ```python import undetected_chromedriver as uc + + uc.TARGET_VERSION = 85 driver = uc.Chrome() ``` - #### monkeypatch mode (v1 old stuff) #### + Needs to be done before importing from selenium package ```python import undetected_chromedriver as uc + + uc.install() from selenium.webdriver import Chrome + + driver = Chrome() -driver.get('https://distilnetworks.com') +driver.get( 'https://distilnetworks.com' ) ``` - #### the customized way (v1 old stuff) #### + ```python import undetected_chromedriver as uc -#specify chromedriver version to download and patch -uc.TARGET_VERSION = 78 + +# specify chromedriver version to download and patch +uc.TARGET_VERSION = 78 # or specify your own chromedriver binary (why you would need this, i don't know) uc.install( - executable_path='c:/users/user1/chromedriver.exe', -) + executable_path = 'c:/users/user1/chromedriver.exe' , + ) opts = uc.ChromeOptions() -opts.add_argument(f'--proxy-server=socks5://127.0.0.1:9050') -driver = uc.Chrome(options=opts) -driver.get('https://distilnetworks.com') +opts.add_argument( f'--proxy-server=socks5://127.0.0.1:9050' ) +driver = uc.Chrome( options = opts ) +driver.get( 'https://distilnetworks.com' ) ``` - #### datadome.co example (v1 old stuff) #### -These guys have actually a powerful product, and a link to this repo, which makes me wanna test their product. -Make sure you use a "clean" ip for this one. + +These guys have actually a powerful product, and a link to this repo, which makes me wanna test their product. Make sure you use a "clean"ip +for this one. + ```python # # STANDARD selenium Chromedriver # from selenium import webdriver -chrome = webdriver.Chrome() -chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/') -chrome.save_screenshot('datadome_regular_webdriver.png') -True # it caused my ip to be flagged, unfortunately +chrome = webdriver.Chrome() +chrome.get( 'https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/' ) +chrome.save_screenshot( 'datadome_regular_webdriver.png' ) +True # it caused my ip to be flagged, unfortunately + # # UNDETECTED chromedriver (headless,even) # import undetected_chromedriver as uc + + options = uc.ChromeOptions() -options.headless=True -options.add_argument('--headless') -chrome = uc.Chrome(options=options) -chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/') -chrome.save_screenshot('datadome_undetected_webddriver.png') +options.headless = True +options.add_argument( '--headless' ) +chrome = uc.Chrome( options = options ) +chrome.get( 'https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/' ) +chrome.save_screenshot( 'datadome_undetected_webddriver.png' ) ``` + **Check both saved screenhots [here](https://imgur.com/a/fEmqadP)** diff --git a/example/example.py b/example/example.py index 596c7e2..622975a 100644 --- a/example/example.py +++ b/example/example.py @@ -9,7 +9,6 @@ import undetected_chromedriver as uc def main(args=None): - TAKE_IT_EASY = True if args: @@ -28,7 +27,9 @@ def main(args=None): driver.get("https://www.google.com") # accept the terms - driver.find_elements(By.XPATH, '//*[contains(text(), "Reject all")]')[-1].click() # ;) + driver.find_elements(By.XPATH, '//*[contains(text(), "Reject all")]')[ + -1 + ].click() # ;) inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') diff --git a/setup.py b/setup.py index 4e7f0dd..12b1d87 100644 --- a/setup.py +++ b/setup.py @@ -11,10 +11,11 @@ Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y BY ULTRAFUNKAMSTERDAM (https://github.com/ultrafunkamsterdam)""" -from setuptools import setup +import codecs import os import re -import codecs + +from setuptools import setup dirname = os.path.abspath(os.path.dirname(__file__)) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 3487ef0..7e321c4 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.2.1" +__version__ = "3.4" import json import logging @@ -600,37 +600,38 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): self.get = get_wrapped - def _get_cdc_props(self): - return self.execute_script( - """ - let objectToInspect = window, - result = []; - while(objectToInspect !== null) - { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); - objectToInspect = Object.getPrototypeOf(objectToInspect); } - return result.filter(i => i.match(/.+_.+_(Array|Promise|Symbol)/ig)) - """ - ) - - def _hook_remove_cdc_props(self): - self.execute_cdp_cmd( - "Page.addScriptToEvaluateOnNewDocument", - { - "source": """ - let objectToInspect = window, - result = []; - while(objectToInspect !== null) - { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); - objectToInspect = Object.getPrototypeOf(objectToInspect); } - result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig) - &&delete window[p]&&console.log('removed',p)) - """ - }, - ) + # def _get_cdc_props(self): + # return self.execute_script( + # """ + # let objectToInspect = window, + # result = []; + # while(objectToInspect !== null) + # { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); + # objectToInspect = Object.getPrototypeOf(objectToInspect); } + # + # return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)) + # """ + # ) + # + # def _hook_remove_cdc_props(self): + # self.execute_cdp_cmd( + # "Page.addScriptToEvaluateOnNewDocument", + # { + # "source": """ + # let objectToInspect = window, + # result = []; + # while(objectToInspect !== null) + # { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); + # objectToInspect = Object.getPrototypeOf(objectToInspect); } + # result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig) + # &&delete window[p]&&console.log('removed',p)) + # """ + # }, + # ) def get(self, url): - if self._get_cdc_props(): - self._hook_remove_cdc_props() + # if self._get_cdc_props(): + # self._hook_remove_cdc_props() return super().get(url) def add_cdp_listener(self, event_name, callback): diff --git a/undetected_chromedriver/_compat.py b/undetected_chromedriver/_compat.py deleted file mode 100644 index 6b2f28a..0000000 --- a/undetected_chromedriver/_compat.py +++ /dev/null @@ -1,262 +0,0 @@ -#!/usr/bin/env python3 -# this module is part of undetected_chromedriver - - -""" - - 888 888 d8b - 888 888 Y8P - 888 888 - .d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888 -d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P" -888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888 -Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888 - "Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888 - -by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) - -""" - -from distutils.version import LooseVersion -import io -import logging -import os -import random -import re -import string -import sys -from urllib.request import urlopen -from urllib.request import urlretrieve -import zipfile - -from selenium.webdriver import Chrome as _Chrome -from selenium.webdriver import ChromeOptions as _ChromeOptions - - -TARGET_VERSION = 0 -logger = logging.getLogger("uc") - - -class Chrome: - def __new__(cls, *args, emulate_touch=False, **kwargs): - - if not ChromeDriverManager.installed: - ChromeDriverManager(*args, **kwargs).install() - if not ChromeDriverManager.selenium_patched: - ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver() - if not kwargs.get("executable_path"): - kwargs["executable_path"] = "./{}".format( - ChromeDriverManager(*args, **kwargs).executable_path - ) - if not kwargs.get("options"): - kwargs["options"] = ChromeOptions() - instance = object.__new__(_Chrome) - instance.__init__(*args, **kwargs) - - instance._orig_get = instance.get - - def _get_wrapped(*args, **kwargs): - if instance.execute_script("return navigator.webdriver"): - instance.execute_cdp_cmd( - "Page.addScriptToEvaluateOnNewDocument", - { - "source": """ - - Object.defineProperty(window, 'navigator', { - value: new Proxy(navigator, { - has: (target, key) => (key === 'webdriver' ? false : key in target), - get: (target, key) => - key === 'webdriver' - ? undefined - : typeof target[key] === 'function' - ? target[key].bind(target) - : target[key] - }) - }); - - - """ - }, - ) - return instance._orig_get(*args, **kwargs) - - instance.get = _get_wrapped - instance.get = _get_wrapped - instance.get = _get_wrapped - - original_user_agent_string = instance.execute_script( - "return navigator.userAgent" - ) - instance.execute_cdp_cmd( - "Network.setUserAgentOverride", - { - "userAgent": original_user_agent_string.replace("Headless", ""), - }, - ) - if emulate_touch: - instance.execute_cdp_cmd( - "Page.addScriptToEvaluateOnNewDocument", - { - "source": """ - Object.defineProperty(navigator, 'maxTouchPoints', { - get: () => 1 - })""" - }, - ) - logger.info(f"starting undetected_chromedriver.Chrome({args}, {kwargs})") - return instance - - -class ChromeOptions: - def __new__(cls, *args, **kwargs): - if not ChromeDriverManager.installed: - ChromeDriverManager(*args, **kwargs).install() - if not ChromeDriverManager.selenium_patched: - ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver() - - instance = object.__new__(_ChromeOptions) - instance.__init__() - instance.add_argument("start-maximized") - instance.add_experimental_option("excludeSwitches", ["enable-automation"]) - instance.add_argument("--disable-blink-features=AutomationControlled") - return instance - - -class ChromeDriverManager(object): - installed = False - selenium_patched = False - target_version = None - - DL_BASE = "https://chromedriver.storage.googleapis.com/" - - def __init__(self, executable_path=None, target_version=None, *args, **kwargs): - - _platform = sys.platform - - if TARGET_VERSION: - # use global if set - self.target_version = TARGET_VERSION - - if target_version: - # use explicitly passed target - self.target_version = target_version # user override - - if not self.target_version: - # none of the above (default) and just get current version - self.target_version = self.get_release_version_number().version[ - 0 - ] # only major version int - - self._base = base_ = "chromedriver{}" - - exe_name = self._base - if _platform in ("win32",): - exe_name = base_.format(".exe") - if _platform in ("linux",): - _platform += "64" - exe_name = exe_name.format("") - if _platform in ("darwin",): - _platform = "mac64" - exe_name = exe_name.format("") - self.platform = _platform - self.executable_path = executable_path or exe_name - self._exe_name = exe_name - - def patch_selenium_webdriver(self_): - """ - Patches selenium package Chrome, ChromeOptions classes for current session - - :return: - """ - import selenium.webdriver.chrome.service - import selenium.webdriver - - selenium.webdriver.Chrome = Chrome - selenium.webdriver.ChromeOptions = ChromeOptions - logger.info("Selenium patched. Safe to import Chrome / ChromeOptions") - self_.__class__.selenium_patched = True - - def install(self, patch_selenium=True): - """ - Initialize the patch - - This will: - download chromedriver if not present - patch the downloaded chromedriver - patch selenium package if is True (default) - - :param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session) - :return: - """ - if not os.path.exists(self.executable_path): - self.fetch_chromedriver() - if not self.__class__.installed: - if self.patch_binary(): - self.__class__.installed = True - - if patch_selenium: - self.patch_selenium_webdriver() - - def get_release_version_number(self): - """ - Gets the latest major version available, or the latest major version of self.target_version if set explicitly. - - :return: version string - """ - path = ( - "LATEST_RELEASE" - if not self.target_version - else f"LATEST_RELEASE_{self.target_version}" - ) - return LooseVersion(urlopen(self.__class__.DL_BASE + path).read().decode()) - - def fetch_chromedriver(self): - """ - Downloads ChromeDriver from source and unpacks the executable - - :return: on success, name of the unpacked executable - """ - base_ = self._base - zip_name = base_.format(".zip") - ver = self.get_release_version_number().vstring - if os.path.exists(self.executable_path): - return self.executable_path - urlretrieve( - f"{self.__class__.DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip", - filename=zip_name, - ) - with zipfile.ZipFile(zip_name) as zf: - zf.extract(self._exe_name) - os.remove(zip_name) - if sys.platform != "win32": - os.chmod(self._exe_name, 0o755) - return self._exe_name - - @staticmethod - def random_cdc(): - cdc = random.choices(string.ascii_lowercase, k=26) - cdc[-6:-4] = map(str.upper, cdc[-6:-4]) - cdc[2] = cdc[0] - cdc[3] = "_" - return "".join(cdc).encode() - - def patch_binary(self): - """ - Patches the ChromeDriver binary - - :return: False on failure, binary name on success - """ - linect = 0 - replacement = self.random_cdc() - with io.open(self.executable_path, "r+b") as fh: - for line in iter(lambda: fh.readline(), b""): - if b"cdc_" in line: - fh.seek(-len(line), 1) - newline = re.sub(b"cdc_.{22}", replacement, line) - fh.write(newline) - linect += 1 - return linect - - -def install(executable_path=None, target_version=None, *args, **kwargs): - ChromeDriverManager(executable_path, target_version, *args, **kwargs).install() diff --git a/undetected_chromedriver/dprocess.py b/undetected_chromedriver/dprocess.py index fa6be9b..e6187fa 100644 --- a/undetected_chromedriver/dprocess.py +++ b/undetected_chromedriver/dprocess.py @@ -44,7 +44,6 @@ def start_detached(executable, *args): def _start_detached(executable, *args, writer: multiprocessing.Pipe = None): - # configure launch kwargs = {} if platform.system() == "Windows": diff --git a/undetected_chromedriver/options.py b/undetected_chromedriver/options.py index f5c9057..8078ae9 100644 --- a/undetected_chromedriver/options.py +++ b/undetected_chromedriver/options.py @@ -56,7 +56,6 @@ class ChromeOptions(_ChromiumOptions): def handle_prefs(self, user_data_dir): prefs = self.experimental_options.get("prefs") if prefs: - user_data_dir = user_data_dir or self._user_data_dir default_path = os.path.join(user_data_dir, "Default") os.makedirs(default_path, exist_ok=True) diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index c20ead8..aa5a6b7 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -7,7 +7,6 @@ import logging import os import random import re -import secrets import string import sys import time @@ -41,7 +40,7 @@ class Patcher(object): d = "~/appdata/roaming/undetected_chromedriver" elif "LAMBDA_TASK_ROOT" in os.environ: d = "/tmp/undetected_chromedriver" - elif platform.startswith(("linux","linux2")): + elif platform.startswith(("linux", "linux2")): d = "~/.local/share/undetected_chromedriver" elif platform.endswith("darwin"): d = "~/Library/Application Support/undetected_chromedriver" @@ -62,12 +61,13 @@ class Patcher(object): """ self.force = force - self.executable_path = None - prefix = secrets.token_hex(8) + + prefix = "undetected" if not os.path.exists(self.data_path): os.makedirs(self.data_path, exist_ok=True) + self.executable_path = executable_path if not executable_path: self.executable_path = os.path.join( self.data_path, "_".join([prefix, self.exe_name]) @@ -80,10 +80,10 @@ class Patcher(object): self.zip_path = os.path.join(self.data_path, prefix) - if not executable_path: - self.executable_path = os.path.abspath( - os.path.join(".", self.executable_path) - ) + # if not executable_path: + # self.executable_path = os.path.abspath( + # os.path.join(".", self.executable_path) + # ) self._custom_exe_path = False @@ -94,8 +94,9 @@ class Patcher(object): self.version_full = None def auto(self, executable_path=None, force=False, version_main=None): - """""" + if executable_path: + self.executable_path = executable_path self._custom_exe_path = True @@ -206,43 +207,59 @@ class Patcher(object): @staticmethod def gen_random_cdc(): - cdc = random.choices(string.ascii_lowercase, k=26) - cdc[-6:-4] = map(str.upper, cdc[-6:-4]) - cdc[2] = cdc[0] - cdc[3] = "_" + # make cdc_variables without underscores + cdc = random.choices(string.ascii_letters, k=27) + + # cdc[-6:-4] = map(str.upper, cdc[-6:-4]) + # cdc[2] = cdc[0] + # cdc[3] = "_" return "".join(cdc).encode() def is_binary_patched(self, executable_path=None): - """simple check if executable is patched. - - :return: False if not patched, else True - """ executable_path = executable_path or self.executable_path with io.open(executable_path, "rb") as fh: - for line in iter(lambda: fh.readline(), b""): - if b"cdc_" in line: - return False - else: - return True + return fh.read().find(b"undetected chromedriver") != -1 def patch_exe(self): - """ - Patches the ChromeDriver binary - - :return: False on failure, binary name on success - """ + start = time.perf_counter() logger.info("patching driver executable %s" % self.executable_path) - - linect = 0 - replacement = self.gen_random_cdc() with io.open(self.executable_path, "r+b") as fh: - for line in iter(lambda: fh.readline(), b""): - if b"cdc_" in line: - fh.seek(-len(line), 1) - newline = re.sub(b"cdc_.{22}", replacement, line) - fh.write(newline) - linect += 1 - return linect + content = fh.read() + match_injected_codeblock = re.search(rb"{window.*;}", content) + if match_injected_codeblock: + target_bytes = match_injected_codeblock[0] + new_target_bytes = ( + b'{console.log("undetected chromedriver 1337!")}'.ljust( + len(target_bytes), b" " + ) + ) + new_content = content.replace(target_bytes, new_target_bytes) + if new_content == content: + logger.warning( + "something went wrong patching the driver binary. could not find injection code block" + ) + else: + logger.debug( + "found block:\n%s\nreplacing with:\n%s" + % (target_bytes, new_target_bytes) + ) + fh.seek(0) + fh.write(new_content) + + # we just keep the cdc variables as they can't be injected anyways so no harm + # keeping for reference + # fh.seek(0) + # for line in iter( lambda: fh.readline() , b"" ): + # if b'cdc_' in line: + # fh.seek( -len( line ) , 1 ) + # new_line = re.sub( b"cdc_.{22}_" , self.gen_random_cdc() , line ) + # logger.debug( 'replaced %s\n\twith:%s' % (line , new_line) ) + # fh.write( new_line ) + else: + logger.info("%s seems already patched ?!?!" % self.executable_path) + logger.debug( + "patching took us {:.2f} seconds".format(time.perf_counter() - start) + ) def __repr__(self): return "{0:s}({1:s})".format( @@ -251,7 +268,6 @@ class Patcher(object): ) def __del__(self): - if self._custom_exe_path: # if the driver binary is specified by user # we assume it is important enough to not delete it diff --git a/undetected_chromedriver/reactor.py b/undetected_chromedriver/reactor.py index 191fa0e..d52e312 100644 --- a/undetected_chromedriver/reactor.py +++ b/undetected_chromedriver/reactor.py @@ -6,6 +6,7 @@ import json import logging import threading + logger = logging.getLogger(__name__) @@ -63,9 +64,7 @@ class Reactor(threading.Thread): break async def listen(self): - while self.running: - await self._wait_service_started() await asyncio.sleep(1) @@ -74,9 +73,7 @@ class Reactor(threading.Thread): log_entries = self.driver.get_log("performance") for entry in log_entries: - try: - obj_serialized: str = entry.get("message") obj = json.loads(obj_serialized) message = obj.get("message") diff --git a/undetected_chromedriver/tests/cloudflare_iauam_bypass_check.cmd b/undetected_chromedriver/tests/cloudflare_iauam_bypass_check.cmd deleted file mode 100644 index 77ddbd7..0000000 --- a/undetected_chromedriver/tests/cloudflare_iauam_bypass_check.cmd +++ /dev/null @@ -1,80 +0,0 @@ -@echo off -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:: -:: QUICK TEST FOR UNDETECTED-CHROMEDRIVER TO CHECK IF CLOUDFLARE IAUAM CAN BE PASSED -:: -:: To make it as clean as possible without interfering packages or plugins: -:: - this creates a new python virtual environment -:: - installs undetected chromedriver -:: - executes a test -:: - cleans up the virtual environment -:: -:: this is for Windows only currently -:: -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - - -set uc_test_dir=%temp%\ucvenv - -set curdir=%CD% -set prog= - - -:: =================== -:main - -call :hasprog "conda" -if [%prog%]==[conda] ( - echo "conda is found, activating..." - call %prog% activate - goto :next - exit -) - -call :hasprog "python" -if [%prog%]==[python] ( - echo "python is found" - goto :next - exit -) - -echo "no python interpreter or conda could be found. exiting" -exit 1 - - - -:: =================== -:hasprog -call %~1 --help >nul 2>&1 -if ERRORLEVEL 0 ( - set prog=%~1 -) -exit /B - - - -:: =================== -:next - -mkdir %uc_test_dir% -echo "created temp directory for the virtual environment: %uc_test_dir%" - -python -m venv %uc_test_dir% - -set pythonv=%uc_test_dir%\scripts\python -%pythonv% -m pip install -U undetected-chromedriver -%pythonv% -c "exec(\"import time,logging,undetected_chromedriver as uc,selenium.webdriver.support.expected_conditions as ec,selenium.webdriver.support.wait as wwait;logging.basicConfig(level=10);dr=uc.Chrome();dr.get('https://nowsecure.nl');wwait.WebDriverWait(dr,15).until(ec.visibility_of_element_located(('css selector','.hystericalbg')));print('====================WORKING=============');time.sleep(3)\")" - - -if [%prog%]==[conda] ( - echo "deactivating conda env" - %prog% deactivate -) - -cd %curdir% -rd /S /Q %uc_test_dir% -echo "cleaning up temp directory for the virtual environment: %uc_test_dir%" - - - - diff --git a/undetected_chromedriver/tests/quick_test_cf.cmd b/undetected_chromedriver/tests/quick_test_cf.cmd deleted file mode 100644 index 00a4810..0000000 --- a/undetected_chromedriver/tests/quick_test_cf.cmd +++ /dev/null @@ -1,80 +0,0 @@ -@echo off -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:: -:: QUICK TEST FOR UNDETECTED-CHROMEDRIVER TO CHECK IF CLOUDFLARE IAUAM CAN BE PASSED -:: -:: To make it as clean as possible without interfering packages or plugins: -:: - this creates a new python virtual environment -:: - installs undetected chromedriver -:: - executes a test -:: - cleans up the virtual environment -:: -:: this is for Windows only currently -:: -::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - - -set uc_test_dir=%temp%\ucvenv - -set curdir=%CD% -set prog= - - -:: =================== -:main - -call :hasprog "conda" -if [%prog%]==[conda] ( - echo "conda is found, activating..." - call %prog% activate - goto :next - exit -) - -call :hasprog "python" -if [%prog%]==[python] ( - echo "python is found" - goto :next - exit -) - -echo "no python interpreter or conda could be found. exiting" -exit 1 - - - -:: =================== -:hasprog -call %~1 --help >nul 2>&1 -if ERRORLEVEL 0 ( - set prog=%~1 -) -exit /B - - - -:: =================== -:next - -mkdir %uc_test_dir% -echo "created temp directory for the virtual environment: %uc_test_dir%" - -python -m venv %uc_test_dir% - -set pythonv=%uc_test_dir%\scripts\python -%pythonv% -m pip install -U undetected-chromedriver -%pythonv% -c "exec(\"import time,logging,undetected_chromedriver as uc,selenium.webdriver.support.expected_conditions as ec,selenium.webdriver.support.wait as wwait;logging.basicConfig(level=10);dr=uc.Chrome();dr.get('https://nowsecure.nl');wwait.WebDriverWait(dr,15).until(ec.visibility_of_element_located(('css selector','.hystericalbg')));print('====================WORKING=============');time.sleep(3)\")" - - -if [%prog%]==[conda] ( - echo "deactivating conda env" - %prog% deactivate -) - -cd %curdir% -rd /S /Q %uc_test_dir% -echo "cleaning up temp directory for the virtual environment: %uc_test_dir%" - - - - diff --git a/undetected_chromedriver/tests/v2/test_uc.py b/undetected_chromedriver/tests/v2/test_uc.py deleted file mode 100644 index 245ea15..0000000 --- a/undetected_chromedriver/tests/v2/test_uc.py +++ /dev/null @@ -1,38 +0,0 @@ -from _pytest.fixtures import FixtureRequest -import pytest - -import undetected_chromedriver as uc - - -FAILED_SCREENSHOT_NAME = "failed.png" - - -@pytest.fixture -def head_uc(request: FixtureRequest): - request.instance.driver = uc.Chrome() - - def teardown(): - request.instance.driver.save_screenshot(FAILED_SCREENSHOT_NAME) - request.instance.driver.quit() - - request.addfinalizer(teardown) - - return request.instance.driver - - -@pytest.fixture -def headless_uc(request: FixtureRequest): - options = uc.ChromeOptions() - options.headless = True - request.instance.driver = uc.Chrome(options=options) - - def teardown(): - request.instance.driver.save_screenshot(FAILED_SCREENSHOT_NAME) - request.instance.driver.quit() - - request.addfinalizer(teardown) - - return request.instance.driver - - -pytest.main() diff --git a/undetected_chromedriver/v2.py b/undetected_chromedriver/v2.py deleted file mode 100644 index 8e0870e..0000000 --- a/undetected_chromedriver/v2.py +++ /dev/null @@ -1,4 +0,0 @@ -# for backward compatibility -import sys - -sys.modules[__name__] = sys.modules[__package__] diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py index f4225f9..03d6878 100644 --- a/undetected_chromedriver/webelement.py +++ b/undetected_chromedriver/webelement.py @@ -1,6 +1,7 @@ +from typing import List + from selenium.webdriver.common.by import By import selenium.webdriver.remote.webelement -from typing import List class WebElement(selenium.webdriver.remote.webelement.WebElement):