added more compatibility for multi thtreads / processing using Chrome(user_multi_procs=True) => ensure you have at least 1 undetected_chromedriver in the roaming appdata/undetected_chromedriver

This commit is contained in:
unknown 2023-05-09 22:08:53 +02:00
parent bf7dcf8b57
commit e55104be8d
3 changed files with 102 additions and 17 deletions

0
quicktest.py Normal file
View File

View File

@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
from __future__ import annotations from __future__ import annotations
__version__ = "3.4.6" __version__ = "3.4.7"
import json import json
import logging import logging
@ -123,6 +123,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
use_subprocess=True, use_subprocess=True,
debug=False, debug=False,
no_sandbox=True, no_sandbox=True,
user_multi_procs: bool = False,
**kw, **kw,
): ):
""" """
@ -234,6 +235,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
this option has a default of True since many people seem to run this as root (....) , and chrome does not start this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag. when running as root without using --no-sandbox flag.
user_multi_procs:
set to true when you are using multithreads/multiprocessing
ensures not all processes are trying to modify a binary which is in use by another.
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
this requirement can be done by just running this program "normal" and close/kill it.
""" """
finalize(self, self._ensure_close, self) finalize(self, self._ensure_close, self)
@ -242,8 +251,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
executable_path=driver_executable_path, executable_path=driver_executable_path,
force=patcher_force_close, force=patcher_force_close,
version_main=version_main, version_main=version_main,
user_multi_procs=user_multi_procs,
) )
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
self.patcher.auto() self.patcher.auto()
# self.patcher = patcher # self.patcher = patcher
if not options: if not options:
options = ChromeOptions() options = ChromeOptions()

View File

@ -5,15 +5,17 @@ from distutils.version import LooseVersion
import io import io
import logging import logging
import os import os
import pathlib
import random import random
import re import re
import shutil
import string import string
import sys import sys
import time import time
from urllib.request import urlopen from urllib.request import urlopen
from urllib.request import urlretrieve from urllib.request import urlretrieve
import zipfile import zipfile
from multiprocessing import Lock
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
class Patcher(object): class Patcher(object):
lock = Lock()
url_repo = "https://chromedriver.storage.googleapis.com" url_repo = "https://chromedriver.storage.googleapis.com"
zip_name = "chromedriver_%s.zip" zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s" exe_name = "chromedriver%s"
@ -48,7 +51,13 @@ class Patcher(object):
d = "~/.undetected_chromedriver" d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d)) data_path = os.path.abspath(os.path.expanduser(d))
def __init__(self, executable_path=None, force=False, version_main: int = 0): def __init__(
self,
executable_path=None,
force=False,
version_main: int = 0,
user_multi_procs=False,
):
""" """
Args: Args:
executable_path: None = automatic executable_path: None = automatic
@ -61,6 +70,7 @@ class Patcher(object):
self.force = force self.force = force
self._custom_exe_path = False self._custom_exe_path = False
prefix = "undetected" prefix = "undetected"
self.user_multi_procs = user_multi_procs
if not os.path.exists(self.data_path): if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True) os.makedirs(self.data_path, exist_ok=True)
@ -78,17 +88,41 @@ class Patcher(object):
self.zip_path = os.path.join(self.data_path, prefix) self.zip_path = os.path.join(self.data_path, prefix)
if not executable_path: if not executable_path:
self.executable_path = os.path.abspath( if not self.user_multi_procs:
os.path.join(".", self.executable_path) self.executable_path = os.path.abspath(
) os.path.join(".", self.executable_path)
)
if executable_path: if executable_path:
self._custom_exe_path = True self._custom_exe_path = True
self.executable_path = executable_path self.executable_path = executable_path
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
def auto(self, executable_path=None, force=False, version_main=None): def auto(self, executable_path=None, force=False, version_main=None, _=None):
"""
Args:
executable_path:
force:
version_main:
Returns:
"""
# if self.user_multi_procs and \
# self.user_multi_procs != -1:
# # -1 being a skip value used later in this block
#
p = pathlib.Path(self.data_path)
with Lock():
files = list(p.rglob("*chromedriver*?"))
for file in files:
if self.is_binary_patched(file):
self.executable_path = str(file)
return True
if executable_path: if executable_path:
self.executable_path = executable_path self.executable_path = executable_path
self._custom_exe_path = True self._custom_exe_path = True
@ -127,6 +161,49 @@ class Patcher(object):
self.unzip_package(self.fetch_package()) self.unzip_package(self.fetch_package())
return self.patch() return self.patch()
def driver_binary_in_use(self, path: str = None) -> bool:
"""
naive test to check if a found chromedriver binary is
currently in use
Args:
path: a string or PathLike object to the binary to check.
if not specified, we check use this object's executable_path
"""
if not path:
path = self.executable_path
p = pathlib.Path(path)
if not p.exists():
raise OSError("file does not exist: %s" % p)
try:
with open(p, mode="a+b") as fs:
exc = []
try:
fs.seek(0, 0)
except PermissionError as e:
exc.append(e) # since some systems apprently allow seeking
# we conduct another test
try:
fs.readline()
except PermissionError as e:
exc.append(e)
if exc:
return True
return False
# ok safe to assume this is in use
except Exception as e:
# logger.exception("whoops ", e)
pass
def cleanup_unused_files(self):
p = pathlib.Path(self.data_path)
items = list(p.glob("*undetected*"))
print(items)
def patch(self): def patch(self):
self.patch_exe() self.patch_exe()
return self.is_binary_patched() return self.is_binary_patched()
@ -255,21 +332,17 @@ class Patcher(object):
else: else:
timeout = 3 # stop trying after this many seconds timeout = 3 # stop trying after this many seconds
t = time.monotonic() t = time.monotonic()
while True: now = lambda: time.monotonic()
now = time.monotonic() while now() - t > timeout:
if now - t > timeout: # we don't want to wait until the end of time
# we don't want to wait until the end of time
logger.debug(
"could not unlink %s in time (%d seconds)"
% (self.executable_path, timeout)
)
break
try: try:
if self.user_multi_procs:
break
os.unlink(self.executable_path) os.unlink(self.executable_path)
logger.debug("successfully unlinked %s" % self.executable_path) logger.debug("successfully unlinked %s" % self.executable_path)
break break
except (OSError, RuntimeError, PermissionError): except (OSError, RuntimeError, PermissionError):
time.sleep(0.1) time.sleep(0.01)
continue continue
except FileNotFoundError: except FileNotFoundError:
break break