diff --git a/README.md b/README.md index 0ccb324..8fae863 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,28 @@ Automatically downloads the driver binary and patches it. * Python 3.6++** +### 3.0.4 changes #### + - change process creation behavior to be fully detached + - changed .get(url) method to always use the contextmanager + - changed .get(url) method to use cdp under the hood. + + ... the `with` statement is not necessary anymore .. + + - todo: work towards asyncification and selenium 4 + + #### words of wisdom: #### + Whenever you encounter the daunted + + ```from session not created: This version of ChromeDriver only supports Chrome version 96 # or what ever version``` + + the solution is simple: + ```python + import undetected_chromedriver.v2 as uc + driver = uc.Chrome(version_main=95) + ``` + + + **July 2021: Currently busy implementing selenium 4 for undetected-chromedriver** **newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255** @@ -34,8 +56,7 @@ This is also the snippet i recommend using in case you experience an issue. ```python import undetected_chromedriver.v2 as uc driver = uc.Chrome() -with driver: - driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode" +driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode" ``` ### The Version 2 more advanced way, including setting profie folder ### @@ -56,10 +77,9 @@ options.add_argument('--user-data-dir=c:\\temp\\profile2') # just some options passing in to skip annoying popups options.add_argument('--no-first-run --no-service-autorun --password-store=basic') -driver = uc.Chrome(options=options) +driver = uc.Chrome(options=options, version_main=94) # version_main allows to specify your chrome version instead of following chrome global version -with driver: - driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode" +driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode" ``` @@ -76,7 +96,7 @@ However i implemented my own for now. Since i needed it myself for investigation import undetected_chromedriver.v2 as uc from pprint import pformat -driver = uc.Chrome(enable_cdp_event=True) +driver = uc.Chrome(enable_cdp_events=True) def mylousyprintfunction(eventdata): print(pformat(eventdata)) @@ -102,8 +122,7 @@ driver.add_cdp_listener('Network.dataReceived', mylousyprintfunction) # now all these events will be printed in my console -with driver: - driver.get('https://nowsecure.nl') +driver.get('https://nowsecure.nl') {'method': 'Network.requestWillBeSent', diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 4bae8fa..f26c077 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -34,7 +34,7 @@ from . import options ChromeOptionsV2 = v2.ChromeOptions logger = logging.getLogger(__name__) -__version__ = "3.0.3" +__version__ = "3.0.4" TARGET_VERSION = 0 diff --git a/undetected_chromedriver/dprocess.py b/undetected_chromedriver/dprocess.py new file mode 100644 index 0000000..d93ea89 --- /dev/null +++ b/undetected_chromedriver/dprocess.py @@ -0,0 +1,72 @@ +import multiprocessing +import os +import platform +import sys +from subprocess import PIPE +from subprocess import Popen +import atexit +import traceback +import logging +import signal + +CREATE_NEW_PROCESS_GROUP = 0x00000200 +DETACHED_PROCESS = 0x00000008 + +REGISTERED = [] + + +def start_detached(executable, *args): + """ + Starts a fully independent subprocess (with no parent) + :param executable: executable + :param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...] + :return: pid of the grandchild process + """ + + # create pipe + reader, writer = multiprocessing.Pipe(False) + + # do not keep reference + multiprocessing.Process(target=_start_detached, args=(executable, *args), kwargs={'writer': writer}, + daemon=True).start() + # receive pid from pipe + pid = reader.recv() + REGISTERED.append(pid) + # close pipes + writer.close() + reader.close() + + return pid + + +def _start_detached(executable, *args, writer: multiprocessing.Pipe = None): + + # configure launch + kwargs = {} + if platform.system() == 'Windows': + kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP) + elif sys.version_info < (3, 2): + # assume posix + kwargs.update(preexec_fn=os.setsid) + else: # Python 3.2+ and Unix + kwargs.update(start_new_session=True) + + # run + p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs) + + # send pid to pipe + writer.send(p.pid) + exit() + + +def _cleanup(): + for pid in REGISTERED: + try: + logging.getLogger(__name__).debug('cleaning up pid %d ' % pid) + os.kill(pid, signal.SIGTERM) + except: # noqa + pass + + +atexit.register(_cleanup) + diff --git a/undetected_chromedriver/v2.py b/undetected_chromedriver/v2.py index f5bc525..da94a65 100644 --- a/undetected_chromedriver/v2.py +++ b/undetected_chromedriver/v2.py @@ -3,24 +3,28 @@ from __future__ import annotations +import asyncio import json import logging import os import re import shutil -import subprocess import sys import tempfile import time +import inspect +import requests import selenium.webdriver.chrome.service import selenium.webdriver.chrome.webdriver import selenium.webdriver.common.service import selenium.webdriver.remote.webdriver +import websockets from .cdp import CDP from .options import ChromeOptions -from .patcher import IS_POSIX, Patcher +from .patcher import IS_POSIX +from .patcher import Patcher from .reactor import Reactor __all__ = ( @@ -35,6 +39,8 @@ __all__ = ( logger = logging.getLogger("uc") logger.setLevel(logging.getLogger().getEffectiveLevel()) +from .dprocess import start_detached + class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): """ @@ -77,20 +83,20 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): session_id = None def __init__( - self, - executable_path=None, - port=0, - options=None, - enable_cdp_events=False, - service_args=None, - desired_capabilities=None, - service_log_path=None, - keep_alive=False, - log_level=0, - headless=False, - delay=5, - version_main=None, - patcher_force_close=False, + self, + executable_path=None, + port=0, + options=None, + enable_cdp_events=False, + service_args=None, + desired_capabilities=None, + service_log_path=None, + keep_alive=False, + log_level=0, + headless=False, + delay=5, + version_main=None, + patcher_force_close=False, ): """ Creates a new instance of the chrome driver. @@ -167,7 +173,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): try: if hasattr(options, "_session") and options._session is not None: - # prevent reuse of options, # as it just appends arguments, not replace them # you'll get conflicts starting chrome @@ -272,9 +277,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): # fix exit_type flag to prevent tab-restore nag try: with open( - os.path.join(user_data_dir, "Default/Preferences"), - encoding="latin1", - mode="r+", + os.path.join(user_data_dir, "Default/Preferences"), + encoding="latin1", + mode="r+", ) as fs: config = json.load(fs) if config["profile"]["exit_type"] is not None: @@ -291,14 +296,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if not desired_capabilities: desired_capabilities = options.to_capabilities() + self.browser_pid = start_detached(options.binary_location, *options.arguments) - self.browser = subprocess.Popen( - [options.binary_location, *options.arguments], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=IS_POSIX, - ) + # self.browser = subprocess.Popen( + # [options.binary_location, *options.arguments], + # stdin=subprocess.PIPE, + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE, + # close_fds=IS_POSIX, + # ) super(Chrome, self).__init__( executable_path=patcher.executable_path, @@ -523,11 +529,27 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): def __dir__(self): return object.__dir__(self) + def get(self, url): + + tabs = requests.get('http://{0}:{1}/json'.format(*self.options.debugger_address.split(':'))).json() + for tab in tabs: + if tab['type'] == 'page': + break + + async def _get(): + wsurl = tab['webSocketDebuggerUrl'] + async with websockets.connect(wsurl) as ws: + await ws.send(json.dumps({"method": "Page.navigate", "params": {"url": url}, "id": 1})) + return await ws.recv() + + with self: + return asyncio.get_event_loop().run_until_complete(_get()) + def add_cdp_listener(self, event_name, callback): if ( - self.reactor - and self.reactor is not None - and isinstance(self.reactor, Reactor) + self.reactor + and self.reactor is not None + and isinstance(self.reactor, Reactor) ): self.reactor.add_event_handler(event_name, callback) return self.reactor.handlers @@ -577,7 +599,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): capabilities = self.options.to_capabilities() super(Chrome, self).start_session(capabilities, browser_profile) - def quit(self): logger.debug("closing webdriver") self.service.process.kill() @@ -588,8 +609,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): pass try: logger.debug("killing browser") - self.browser.terminate() - self.browser.wait(1) + os.kill(self.browser_pid) + # self.browser.terminate() + # self.browser.wait(1) except TimeoutError as e: logger.debug(e, exc_info=True) @@ -597,9 +619,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): pass if ( - hasattr(self, "keep_user_data_dir") - and hasattr(self, "user_data_dir") - and not self.keep_user_data_dir + hasattr(self, "keep_user_data_dir") + and hasattr(self, "user_data_dir") + and not self.keep_user_data_dir ): for _ in range(5): try: @@ -625,6 +647,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): self.quit() def __enter__(self): + try: + curframe = inspect.currentframe() + callframe = inspect.getouterframes(curframe, 2) + caller = callframe[1][3] + logging.getLogger(__name__).debug('__enter__ caller: %s' % caller) + if caller == 'get': + return + except (AttributeError, ValueError, KeyError, OSError) as e: + logging.getLogger(__name__).debug(e) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -637,7 +668,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): return hash(self.options.debugger_address) - def find_chrome_executable(): """ Finds the chrome, chrome beta, chrome canary, chromium executable @@ -659,12 +689,12 @@ def find_chrome_executable(): ) else: for item in map( - os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA") + os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA") ): for subitem in ( - "Google/Chrome/Application", - "Google/Chrome Beta/Application", - "Google/Chrome Canary/Application", + "Google/Chrome/Application", + "Google/Chrome Beta/Application", + "Google/Chrome Canary/Application", ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: