Merge pull request #357 from ultrafunkamsterdam/3.0.4

3.0.4
This commit is contained in:
Leon 2021-11-16 18:47:17 +01:00 committed by GitHub
commit e7a2908e4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 171 additions and 50 deletions

View File

@ -11,6 +11,28 @@ Automatically downloads the driver binary and patches it.
* Python 3.6++** * Python 3.6++**
### 3.0.4 changes ####
- change process creation behavior to be fully detached
- changed .get(url) method to always use the contextmanager
- changed .get(url) method to use cdp under the hood.
... the `with` statement is not necessary anymore ..
- todo: work towards asyncification and selenium 4
#### words of wisdom: ####
Whenever you encounter the daunted
```from session not created: This version of ChromeDriver only supports Chrome version 96 # or what ever version```
the solution is simple:
```python
import undetected_chromedriver.v2 as uc
driver = uc.Chrome(version_main=95)
```
**July 2021: Currently busy implementing selenium 4 for undetected-chromedriver** **July 2021: Currently busy implementing selenium 4 for undetected-chromedriver**
**newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255** **newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255**
@ -34,8 +56,7 @@ This is also the snippet i recommend using in case you experience an issue.
```python ```python
import undetected_chromedriver.v2 as uc import undetected_chromedriver.v2 as uc
driver = uc.Chrome() driver = uc.Chrome()
with driver: driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode"
driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode"
``` ```
### The Version 2 more advanced way, including setting profie folder ### ### The Version 2 more advanced way, including setting profie folder ###
@ -56,10 +77,9 @@ options.add_argument('--user-data-dir=c:\\temp\\profile2')
# just some options passing in to skip annoying popups # just some options passing in to skip annoying popups
options.add_argument('--no-first-run --no-service-autorun --password-store=basic') options.add_argument('--no-first-run --no-service-autorun --password-store=basic')
driver = uc.Chrome(options=options) driver = uc.Chrome(options=options, version_main=94) # version_main allows to specify your chrome version instead of following chrome global version
with driver: driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode"
driver.get('https://nowsecure.nl') # known url using cloudflare's "under attack mode"
``` ```
@ -76,7 +96,7 @@ However i implemented my own for now. Since i needed it myself for investigation
import undetected_chromedriver.v2 as uc import undetected_chromedriver.v2 as uc
from pprint import pformat from pprint import pformat
driver = uc.Chrome(enable_cdp_event=True) driver = uc.Chrome(enable_cdp_events=True)
def mylousyprintfunction(eventdata): def mylousyprintfunction(eventdata):
print(pformat(eventdata)) print(pformat(eventdata))
@ -102,8 +122,7 @@ driver.add_cdp_listener('Network.dataReceived', mylousyprintfunction)
# now all these events will be printed in my console # now all these events will be printed in my console
with driver: driver.get('https://nowsecure.nl')
driver.get('https://nowsecure.nl')
{'method': 'Network.requestWillBeSent', {'method': 'Network.requestWillBeSent',

View File

@ -34,7 +34,7 @@ from . import options
ChromeOptionsV2 = v2.ChromeOptions ChromeOptionsV2 = v2.ChromeOptions
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
__version__ = "3.0.3" __version__ = "3.0.4"
TARGET_VERSION = 0 TARGET_VERSION = 0

View File

@ -0,0 +1,72 @@
import multiprocessing
import os
import platform
import sys
from subprocess import PIPE
from subprocess import Popen
import atexit
import traceback
import logging
import signal
CREATE_NEW_PROCESS_GROUP = 0x00000200
DETACHED_PROCESS = 0x00000008
REGISTERED = []
def start_detached(executable, *args):
"""
Starts a fully independent subprocess (with no parent)
:param executable: executable
:param args: arguments to the executable, eg: ['--param1_key=param1_val', '-vvv' ...]
:return: pid of the grandchild process
"""
# create pipe
reader, writer = multiprocessing.Pipe(False)
# do not keep reference
multiprocessing.Process(target=_start_detached, args=(executable, *args), kwargs={'writer': writer},
daemon=True).start()
# receive pid from pipe
pid = reader.recv()
REGISTERED.append(pid)
# close pipes
writer.close()
reader.close()
return pid
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
# configure launch
kwargs = {}
if platform.system() == 'Windows':
kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP)
elif sys.version_info < (3, 2):
# assume posix
kwargs.update(preexec_fn=os.setsid)
else: # Python 3.2+ and Unix
kwargs.update(start_new_session=True)
# run
p = Popen([executable, *args], stdin=PIPE, stdout=PIPE, stderr=PIPE, **kwargs)
# send pid to pipe
writer.send(p.pid)
exit()
def _cleanup():
for pid in REGISTERED:
try:
logging.getLogger(__name__).debug('cleaning up pid %d ' % pid)
os.kill(pid, signal.SIGTERM)
except: # noqa
pass
atexit.register(_cleanup)

View File

@ -3,24 +3,28 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import json import json
import logging import logging
import os import os
import re import re
import shutil import shutil
import subprocess
import sys import sys
import tempfile import tempfile
import time import time
import inspect
import requests
import selenium.webdriver.chrome.service import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver import selenium.webdriver.chrome.webdriver
import selenium.webdriver.common.service import selenium.webdriver.common.service
import selenium.webdriver.remote.webdriver import selenium.webdriver.remote.webdriver
import websockets
from .cdp import CDP from .cdp import CDP
from .options import ChromeOptions from .options import ChromeOptions
from .patcher import IS_POSIX, Patcher from .patcher import IS_POSIX
from .patcher import Patcher
from .reactor import Reactor from .reactor import Reactor
__all__ = ( __all__ = (
@ -35,6 +39,8 @@ __all__ = (
logger = logging.getLogger("uc") logger = logging.getLogger("uc")
logger.setLevel(logging.getLogger().getEffectiveLevel()) logger.setLevel(logging.getLogger().getEffectiveLevel())
from .dprocess import start_detached
class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
""" """
@ -77,20 +83,20 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
session_id = None session_id = None
def __init__( def __init__(
self, self,
executable_path=None, executable_path=None,
port=0, port=0,
options=None, options=None,
enable_cdp_events=False, enable_cdp_events=False,
service_args=None, service_args=None,
desired_capabilities=None, desired_capabilities=None,
service_log_path=None, service_log_path=None,
keep_alive=False, keep_alive=False,
log_level=0, log_level=0,
headless=False, headless=False,
delay=5, delay=5,
version_main=None, version_main=None,
patcher_force_close=False, patcher_force_close=False,
): ):
""" """
Creates a new instance of the chrome driver. Creates a new instance of the chrome driver.
@ -167,7 +173,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
try: try:
if hasattr(options, "_session") and options._session is not None: if hasattr(options, "_session") and options._session is not None:
# prevent reuse of options, # prevent reuse of options,
# as it just appends arguments, not replace them # as it just appends arguments, not replace them
# you'll get conflicts starting chrome # you'll get conflicts starting chrome
@ -272,9 +277,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# fix exit_type flag to prevent tab-restore nag # fix exit_type flag to prevent tab-restore nag
try: try:
with open( with open(
os.path.join(user_data_dir, "Default/Preferences"), os.path.join(user_data_dir, "Default/Preferences"),
encoding="latin1", encoding="latin1",
mode="r+", mode="r+",
) as fs: ) as fs:
config = json.load(fs) config = json.load(fs)
if config["profile"]["exit_type"] is not None: if config["profile"]["exit_type"] is not None:
@ -291,14 +296,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not desired_capabilities: if not desired_capabilities:
desired_capabilities = options.to_capabilities() desired_capabilities = options.to_capabilities()
self.browser_pid = start_detached(options.binary_location, *options.arguments)
self.browser = subprocess.Popen( # self.browser = subprocess.Popen(
[options.binary_location, *options.arguments], # [options.binary_location, *options.arguments],
stdin=subprocess.PIPE, # stdin=subprocess.PIPE,
stdout=subprocess.PIPE, # stdout=subprocess.PIPE,
stderr=subprocess.PIPE, # stderr=subprocess.PIPE,
close_fds=IS_POSIX, # close_fds=IS_POSIX,
) # )
super(Chrome, self).__init__( super(Chrome, self).__init__(
executable_path=patcher.executable_path, executable_path=patcher.executable_path,
@ -523,11 +529,27 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def __dir__(self): def __dir__(self):
return object.__dir__(self) return object.__dir__(self)
def get(self, url):
tabs = requests.get('http://{0}:{1}/json'.format(*self.options.debugger_address.split(':'))).json()
for tab in tabs:
if tab['type'] == 'page':
break
async def _get():
wsurl = tab['webSocketDebuggerUrl']
async with websockets.connect(wsurl) as ws:
await ws.send(json.dumps({"method": "Page.navigate", "params": {"url": url}, "id": 1}))
return await ws.recv()
with self:
return asyncio.get_event_loop().run_until_complete(_get())
def add_cdp_listener(self, event_name, callback): def add_cdp_listener(self, event_name, callback):
if ( if (
self.reactor self.reactor
and self.reactor is not None and self.reactor is not None
and isinstance(self.reactor, Reactor) and isinstance(self.reactor, Reactor)
): ):
self.reactor.add_event_handler(event_name, callback) self.reactor.add_event_handler(event_name, callback)
return self.reactor.handlers return self.reactor.handlers
@ -577,7 +599,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
capabilities = self.options.to_capabilities() capabilities = self.options.to_capabilities()
super(Chrome, self).start_session(capabilities, browser_profile) super(Chrome, self).start_session(capabilities, browser_profile)
def quit(self): def quit(self):
logger.debug("closing webdriver") logger.debug("closing webdriver")
self.service.process.kill() self.service.process.kill()
@ -588,8 +609,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
pass pass
try: try:
logger.debug("killing browser") logger.debug("killing browser")
self.browser.terminate() os.kill(self.browser_pid)
self.browser.wait(1) # self.browser.terminate()
# self.browser.wait(1)
except TimeoutError as e: except TimeoutError as e:
logger.debug(e, exc_info=True) logger.debug(e, exc_info=True)
@ -597,9 +619,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
pass pass
if ( if (
hasattr(self, "keep_user_data_dir") hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir") and hasattr(self, "user_data_dir")
and not self.keep_user_data_dir and not self.keep_user_data_dir
): ):
for _ in range(5): for _ in range(5):
try: try:
@ -625,6 +647,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.quit() self.quit()
def __enter__(self): def __enter__(self):
try:
curframe = inspect.currentframe()
callframe = inspect.getouterframes(curframe, 2)
caller = callframe[1][3]
logging.getLogger(__name__).debug('__enter__ caller: %s' % caller)
if caller == 'get':
return
except (AttributeError, ValueError, KeyError, OSError) as e:
logging.getLogger(__name__).debug(e)
return self return self
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
@ -637,7 +668,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
return hash(self.options.debugger_address) return hash(self.options.debugger_address)
def find_chrome_executable(): def find_chrome_executable():
""" """
Finds the chrome, chrome beta, chrome canary, chromium executable Finds the chrome, chrome beta, chrome canary, chromium executable
@ -659,12 +689,12 @@ def find_chrome_executable():
) )
else: else:
for item in map( for item in map(
os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA") os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA")
): ):
for subitem in ( for subitem in (
"Google/Chrome/Application", "Google/Chrome/Application",
"Google/Chrome Beta/Application", "Google/Chrome Beta/Application",
"Google/Chrome Canary/Application", "Google/Chrome Canary/Application",
): ):
candidates.add(os.sep.join((item, subitem, "chrome.exe"))) candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates: for candidate in candidates: