changed the way how patcher works (for those using multiple sessions/processes).

    when not specifying a executable_path (the default, and recommended!), the filename
    gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
    (although Chrome/driver itself has restrictions in this as well, see it using processhacker).
    As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)

find_chrome_executable:
    added google-chrome-stable to the list, as some distro's have this name.

 advanced_webelements:  bool, optional, default: False
        makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment

        default webelement repr:
        <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>

        advanced webelement repr
        <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>

    note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch

Chrome() parameters

    driver_executable_path=None
     ( = executable_path )
    if you really need to specify your own chromedriver binary.

    (don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)

    browser_executable_path=None
        ( = browser binary path )
    to specify your browser in case you use exotic locations instead of the more default install folders

    advanced_elements=False
        if set to True, webelements get a nicer REPR showing. this is very convenient when working
        interactively (like ipython for example).

        <WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>

        instead of

        <selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
This commit is contained in:
UltrafunkAmsterdam 2022-03-13 23:05:22 +01:00
parent a4cc4a8b72
commit 087fa8d732
3 changed files with 87 additions and 12 deletions

View File

@ -99,10 +99,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def __init__(
self,
options=None,
user_data_dir=None,
driver_executable_path=None,
browser_executable_path=None,
port=0,
options=None,
enable_cdp_events=False,
service_args=None,
desired_capabilities=None,
@ -125,10 +126,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
Parameters
----------
options: ChromeOptions, optional, default: None - automatic useful defaults
this takes an instance of ChromeOptions, mainly to customize browser behavior.
anything other dan the default, for example extensions or startup options
are not supported in case of failure, and can probably lowers your undetectability.
user_data_dir: str , optional, default: None (creates temp profile)
if user_data_dir is a path to a valid chrome profile directory, use it,
and turn off automatic removal mechanism at exit.
driver_executable_path: str, optional, default: None(=downloads and patches new binary)
browser_executable_path: str, optional, default: None - use find_chrome_executable
Path to the browser executable.
If not specified, make sure the executable's folder is in $PATH
@ -136,11 +145,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
port: int, optional, default: 0
port you would like the service to run, if left as 0, a free port will be found.
options: ChromeOptions, optional, default: None - automatic useful defaults
this takes an instance of ChromeOptions, mainly to customize browser behavior.
anything other dan the default, for example extensions or startup options
are not supported in case of failure, and can probably lowers your undetectability.
enable_cdp_events: bool, default: False
:: currently for chrome only
this enables the handling of wire messages
@ -205,12 +209,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
"""
self.debug = debug
patcher = Patcher(
executable_path=None,
executable_path=driver_executable_path,
force=patcher_force_close,
version_main=version_main,
)
patcher.auto()
self.patcher = patcher
if not options:
options = ChromeOptions()
@ -598,6 +602,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
break
time.sleep(0.1)
# dereference patcher, so patcher can start cleaning up as well.
# this must come last, otherwise it will throw 'in use' errors
self.patcher = None
def __del__(self):
try:
self.service.process.kill()

View File

@ -11,6 +11,8 @@ import sys
import zipfile
from distutils.version import LooseVersion
from urllib.request import urlopen, urlretrieve
import secrets
logger = logging.getLogger(__name__)
@ -57,16 +59,20 @@ class Patcher(object):
self.force = force
self.executable_path = None
prefix = secrets.token_hex(8)
if not executable_path:
self.executable_path = os.path.join(self.data_path, self.exe_name)
self.executable_path = os.path.join(
self.data_path, "_".join([prefix, self.exe_name])
)
if not IS_POSIX:
if executable_path:
if not executable_path[-4:] == ".exe":
executable_path += ".exe"
self.zip_path = os.path.join(self.data_path, self.zip_name)
# self.zip_path = os.path.join(self.data_path, self.zip_name)
if not executable_path:
self.executable_path = os.path.abspath(
@ -78,6 +84,11 @@ class Patcher(object):
if executable_path:
self._custom_exe_path = True
self.executable_path = executable_path
self.data_path = os.path.dirname(executable_path)
self.zip_path = os.path.join(
os.path.dirname(self.executable_path), self.exe_name
)
self.version_main = version_main
self.version_full = None
@ -169,10 +180,11 @@ class Patcher(object):
except (FileNotFoundError, OSError):
pass
os.makedirs(self.data_path, mode=0o755, exist_ok=True)
os.makedirs(os.path.dirname(self.zip_path), mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, os.path.dirname(self.executable_path))
zf.extract(self.exe_name, os.path.dirname(self.zip_path))
os.rename(self.zip_path, self.executable_path)
os.remove(fp)
os.chmod(self.executable_path, 0o755)
return self.executable_path
@ -237,3 +249,17 @@ class Patcher(object):
self.__class__.__name__,
self.executable_path,
)
def __del__(self):
try:
if not self._custom_exe_path:
# we will not delete custom exe paths.
# but this also voids support.
# downloading and patching makes sure you never use the same $cdc values, see patch_exe()
# after all, this program has a focus on detectability...
os.unlink(self.executable_path)
# except (OSError, RuntimeError, PermissionError):
# pass
except:
raise

View File

@ -0,0 +1,40 @@
import selenium.webdriver.remote.webelement
class WebElement(selenium.webdriver.remote.webelement.WebElement):
"""
Custom WebElement class which makes it easier to view elements when
working in an interactive environment.
standard webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
using this WebElement class:
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
"""
_attrs = {}
@property
def attrs(self):
if not hasattr(self, "_attrs"):
self._attrs = self._parent.execute_script(
"""
var items = {};
for (index = 0; index < arguments[0].attributes.length; ++index)
{
items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
};
return items;
""",
self,
)
return self._attrs
def __repr__(self):
strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
if strattrs:
strattrs = " " + strattrs
return f"<{self.__class__.__name__}(<{self.tag_name}{strattrs}>)>"