changed the way how patcher works (for those using multiple sessions/processes).

    when not specifying a executable_path (the default, and recommended!), the filename
    gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
    (although Chrome/driver itself has restrictions in this as well, see it using processhacker).
    As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)

find_chrome_executable:
    added google-chrome-stable to the list, as some distro's have this name.

advanced_webelements:  bool, optional, default: False
        makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment

        default webelement repr:
        <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>

        advanced webelement repr
        <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>

    note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch

Chrome() parameters

    driver_executable_path=None
     ( = executable_path )
    if you really need to specify your own chromedriver binary.

    (don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)

    browser_executable_path=None
        ( = browser binary path )
    to specify your browser in case you use exotic locations instead of the more default install folders

    advanced_elements=False
        if set to True, webelements get a nicer REPR showing. this is very convenient when working
        interactively (like ipython for example).

        <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>

        instead of

        <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
This commit is contained in:
UltrafunkAmsterdam 2022-03-13 23:42:41 +01:00
commit b876db7e9a
3 changed files with 41 additions and 23 deletions

View File

@ -18,7 +18,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
"""
__version__ = "3.1.2"
__version__ = "3.1.5r1"
import json
import logging
@ -107,6 +107,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
enable_cdp_events=False,
service_args=None,
desired_capabilities=None,
advanced_elements=False,
service_log_path=None,
keep_alive=True,
log_level=0,
@ -153,12 +154,26 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
driver.add_cdp_listener("Network.dataReceived", yourcallback)
# yourcallback is an callable which accepts exactly 1 dict as parameter
service_args: list of str, optional, default: None
arguments to pass to the driver service
desired_capabilities: dict, optional, default: None - auto from config
Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working
in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
service_log_path: str, optional, default: None
path to log information from the driver.
@ -361,7 +376,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
desired_capabilities = options.to_capabilities()
if not use_subprocess:
self.browser_pid = start_detached(options.binary_location, *options.arguments)
self.browser_pid = start_detached(
options.binary_location, *options.arguments
)
else:
browser = subprocess.Popen(
[options.binary_location, *options.arguments],
@ -372,8 +389,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
)
self.browser_pid = browser.pid
super(Chrome, self).__init__(
executable_path=patcher.executable_path,
port=port,
@ -395,6 +410,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
reactor.start()
self.reactor = reactor
if advanced_elements:
from .webelement import WebElement
self._web_element_cls = WebElement
if options.headless:
self._configure_headless()
@ -534,9 +554,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
"""
if not hasattr(self, "cdp"):
from .cdp import CDP
self.cdp = CDP(self.options)
self.cdp.tab_new(url)
cdp = CDP(self.options)
cdp.tab_new(url)
def reconnect(self, timeout=0.1):
try:
@ -564,7 +583,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def quit(self):
logger.debug("closing webdriver")
if hasattr(self, 'service') and getattr(self.service, 'process', None):
if hasattr(self, "service") and getattr(self.service, "process", None):
self.service.process.kill()
try:
if self.reactor and isinstance(self.reactor, Reactor):
@ -606,11 +625,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# this must come last, otherwise it will throw 'in use' errors
self.patcher = None
def __del__(self):
try:
self.service.process.kill()
except:
except: # noqa
pass
self.quit()
@ -640,7 +658,13 @@ def find_chrome_executable():
candidates = set()
if IS_POSIX:
for item in os.environ.get("PATH").split(os.pathsep):
for subitem in ("google-chrome", "chromium", "chromium-browser", "chrome"):
for subitem in (
"google-chrome",
"chromium",
"chromium-browser",
"chrome",
"google-chrome-stable",
):
candidates.add(os.sep.join((item, subitem)))
if "darwin" in sys.platform:
candidates.update(

View File

@ -62,7 +62,6 @@ class Patcher(object):
prefix = secrets.token_hex(8)
if not executable_path:
self.executable_path = os.path.join(
self.data_path, "_".join([prefix, self.exe_name])
)
@ -72,7 +71,7 @@ class Patcher(object):
if not executable_path[-4:] == ".exe":
executable_path += ".exe"
# self.zip_path = os.path.join(self.data_path, self.zip_name)
self.zip_path = os.path.join(self.data_path, self.zip_name)
if not executable_path:
self.executable_path = os.path.abspath(
@ -84,11 +83,6 @@ class Patcher(object):
if executable_path:
self._custom_exe_path = True
self.executable_path = executable_path
self.data_path = os.path.dirname(executable_path)
self.zip_path = os.path.join(
os.path.dirname(self.executable_path), self.exe_name
)
self.version_main = version_main
self.version_full = None
@ -130,7 +124,6 @@ class Patcher(object):
self.version_main = release.version[0]
self.version_full = release
self.unzip_package(self.fetch_package())
# i.patch()
return self.patch()
def patch(self):
@ -181,10 +174,12 @@ class Patcher(object):
pass
os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, os.path.dirname(self.zip_path))
os.rename(self.zip_path, self.executable_path)
os.rename(
os.path.join(self.data_path, self.exe_name),
self.executable_path
)
os.remove(fp)
os.chmod(self.executable_path, 0o755)
return self.executable_path

View File

@ -13,6 +13,7 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
"""
_attrs = {}
@property
@ -36,5 +37,3 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
if strattrs:
strattrs = " " + strattrs
return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>"