changed the way how patcher works (for those using multiple sessions/processes).

    when not specifying a executable_path (the default, and recommended!), the filename
    gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
    (although Chrome/driver itself has restrictions in this as well, see it using processhacker).
    As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)

find_chrome_executable:
    added google-chrome-stable to the list, as some distro's have this name.

advanced_webelements:  bool, optional, default: False
        makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment

        default webelement repr:
        <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>

        advanced webelement repr
        <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>

    note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch

Chrome() parameters

    driver_executable_path=None
     ( = executable_path )
    if you really need to specify your own chromedriver binary.

    (don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)

    browser_executable_path=None
        ( = browser binary path )
    to specify your browser in case you use exotic locations instead of the more default install folders

    advanced_elements=False
        if set to True, webelements get a nicer REPR showing. this is very convenient when working
        interactively (like ipython for example).

        <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>

        instead of

        <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
This commit is contained in:
UltrafunkAmsterdam 2022-03-13 23:42:41 +01:00
commit a6cf33b0e2
3 changed files with 41 additions and 23 deletions

View File

@ -18,7 +18,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
""" """
__version__ = "3.1.2" __version__ = "3.1.5"
import json import json
import logging import logging
@ -107,6 +107,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
enable_cdp_events=False, enable_cdp_events=False,
service_args=None, service_args=None,
desired_capabilities=None, desired_capabilities=None,
advanced_elements=False,
service_log_path=None, service_log_path=None,
keep_alive=True, keep_alive=True,
log_level=0, log_level=0,
@ -153,12 +154,26 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
driver.add_cdp_listener("Network.dataReceived", yourcallback) driver.add_cdp_listener("Network.dataReceived", yourcallback)
# yourcallback is an callable which accepts exactly 1 dict as parameter # yourcallback is an callable which accepts exactly 1 dict as parameter
service_args: list of str, optional, default: None service_args: list of str, optional, default: None
arguments to pass to the driver service arguments to pass to the driver service
desired_capabilities: dict, optional, default: None - auto from config desired_capabilities: dict, optional, default: None - auto from config
Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref". Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working
in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
service_log_path: str, optional, default: None service_log_path: str, optional, default: None
path to log information from the driver. path to log information from the driver.
@ -361,7 +376,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
desired_capabilities = options.to_capabilities() desired_capabilities = options.to_capabilities()
if not use_subprocess: if not use_subprocess:
self.browser_pid = start_detached(options.binary_location, *options.arguments) self.browser_pid = start_detached(
options.binary_location, *options.arguments
)
else: else:
browser = subprocess.Popen( browser = subprocess.Popen(
[options.binary_location, *options.arguments], [options.binary_location, *options.arguments],
@ -372,8 +389,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
) )
self.browser_pid = browser.pid self.browser_pid = browser.pid
super(Chrome, self).__init__( super(Chrome, self).__init__(
executable_path=patcher.executable_path, executable_path=patcher.executable_path,
port=port, port=port,
@ -395,6 +410,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
reactor.start() reactor.start()
self.reactor = reactor self.reactor = reactor
if advanced_elements:
from .webelement import WebElement
self._web_element_cls = WebElement
if options.headless: if options.headless:
self._configure_headless() self._configure_headless()
@ -534,9 +554,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
""" """
if not hasattr(self, "cdp"): if not hasattr(self, "cdp"):
from .cdp import CDP from .cdp import CDP
cdp = CDP(self.options)
self.cdp = CDP(self.options) cdp.tab_new(url)
self.cdp.tab_new(url)
def reconnect(self, timeout=0.1): def reconnect(self, timeout=0.1):
try: try:
@ -564,7 +583,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def quit(self): def quit(self):
logger.debug("closing webdriver") logger.debug("closing webdriver")
if hasattr(self, 'service') and getattr(self.service, 'process', None): if hasattr(self, "service") and getattr(self.service, "process", None):
self.service.process.kill() self.service.process.kill()
try: try:
if self.reactor and isinstance(self.reactor, Reactor): if self.reactor and isinstance(self.reactor, Reactor):
@ -606,11 +625,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# this must come last, otherwise it will throw 'in use' errors # this must come last, otherwise it will throw 'in use' errors
self.patcher = None self.patcher = None
def __del__(self): def __del__(self):
try: try:
self.service.process.kill() self.service.process.kill()
except: except: # noqa
pass pass
self.quit() self.quit()
@ -640,7 +658,13 @@ def find_chrome_executable():
candidates = set() candidates = set()
if IS_POSIX: if IS_POSIX:
for item in os.environ.get("PATH").split(os.pathsep): for item in os.environ.get("PATH").split(os.pathsep):
for subitem in ("google-chrome", "chromium", "chromium-browser", "chrome"): for subitem in (
"google-chrome",
"chromium",
"chromium-browser",
"chrome",
"google-chrome-stable",
):
candidates.add(os.sep.join((item, subitem))) candidates.add(os.sep.join((item, subitem)))
if "darwin" in sys.platform: if "darwin" in sys.platform:
candidates.update( candidates.update(

View File

@ -62,7 +62,6 @@ class Patcher(object):
prefix = secrets.token_hex(8) prefix = secrets.token_hex(8)
if not executable_path: if not executable_path:
self.executable_path = os.path.join( self.executable_path = os.path.join(
self.data_path, "_".join([prefix, self.exe_name]) self.data_path, "_".join([prefix, self.exe_name])
) )
@ -72,7 +71,7 @@ class Patcher(object):
if not executable_path[-4:] == ".exe": if not executable_path[-4:] == ".exe":
executable_path += ".exe" executable_path += ".exe"
# self.zip_path = os.path.join(self.data_path, self.zip_name) self.zip_path = os.path.join(self.data_path, self.zip_name)
if not executable_path: if not executable_path:
self.executable_path = os.path.abspath( self.executable_path = os.path.abspath(
@ -84,11 +83,6 @@ class Patcher(object):
if executable_path: if executable_path:
self._custom_exe_path = True self._custom_exe_path = True
self.executable_path = executable_path self.executable_path = executable_path
self.data_path = os.path.dirname(executable_path)
self.zip_path = os.path.join(
os.path.dirname(self.executable_path), self.exe_name
)
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
@ -130,7 +124,6 @@ class Patcher(object):
self.version_main = release.version[0] self.version_main = release.version[0]
self.version_full = release self.version_full = release
self.unzip_package(self.fetch_package()) self.unzip_package(self.fetch_package())
# i.patch()
return self.patch() return self.patch()
def patch(self): def patch(self):
@ -181,10 +174,12 @@ class Patcher(object):
pass pass
os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True) os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf: with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, os.path.dirname(self.zip_path)) zf.extract(self.exe_name, os.path.dirname(self.zip_path))
os.rename(self.zip_path, self.executable_path) os.rename(
os.path.join(self.data_path, self.exe_name),
self.executable_path
)
os.remove(fp) os.remove(fp)
os.chmod(self.executable_path, 0o755) os.chmod(self.executable_path, 0o755)
return self.executable_path return self.executable_path

View File

@ -13,6 +13,7 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)> <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
""" """
_attrs = {} _attrs = {}
@property @property
@ -36,5 +37,3 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
if strattrs: if strattrs:
strattrs = " " + strattrs strattrs = " " + strattrs
return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>" return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>"