diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml
new file mode 100644
index 0000000..c50660e
--- /dev/null
+++ b/.github/workflows/workflow.yml
@@ -0,0 +1,51 @@
+
+
+name: Python package
+
+on:
+ push:
+ branches: [ "master" ]
+ pull_request:
+ branches: [ "master" ]
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.8", "3.9", "3.10","3.11"]
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Setup Chrome
+ uses: browser-actions/setup-chrome@v1.2.0
+ with:
+ chrome-version: stable
+ - name: set chrome in path
+ run: |
+ echo "/opt/hostedtoolcache/chromium/stable/x64" >> $GITHUB_PATH
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install package
+ run: |
+ python -m pip install --upgrade pip
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install -U . ; fi
+ - name: run example
+ run: |
+ python example/test_workflow.py
+ - name: Upload a Build Artifact
+ uses: actions/upload-artifact@v3.1.2
+ with:
+ # Artifact name
+ name: screenshots
+ # A file, directory or wildcard pattern that describes what to upload
+ path: /home/runner/work/_temp/*p*
+
+
+
+
+
diff --git a/README.md b/README.md
index d405c9c..61b17de 100644
--- a/README.md
+++ b/README.md
@@ -7,9 +7,22 @@ Optimized Selenium Chromedriver patch which does not trigger anti-bot services l
Automatically downloads the driver binary and patches it.
* Tested until current chrome beta versions
-* Works also on Brave Browser and many other Chromium based browsers, some tweaking
+* Works also on Brave Browser and many other Chromium based browsers, but you need to know what you're doing and needs some tweaking.
* Python 3.6++**
+
+## Installation ##
+
+```
+pip install undetected-chromedriver
+```
+or , if you're feeling adventurous, install directly via github
+
+```
+pip install git+https://www.github.com/ultrafunkamsterdam/undetected-chromedriver@master # replace @master with @branchname for other branches
+```
+
+
- - -
## Message for all ##
I will be putting limits on the issue tracker. It has beeen abused too long.
@@ -17,6 +30,33 @@ any good news?
Yes, i've opened [Undetected-Discussions](https://github.com/ultrafunkamsterdam/undetected-chromedriver/discussions) which i think will help us better in the long run.
- - -
+What this is not
+---
+**THIS PACKAGE DOES NOT, and i repeat DOES NOT hide your IP address, so when running from a datacenter (even smaller ones), chances are large you will not pass! Also, if your ip reputation at home is low, you won't pass!**
+
+Running following code from home , and from a datacenter.
+```python
+import undetected_chromedriver as uc
+driver = uc.Chrome(headless=True,use_subprocess=False)
+driver.get('https://nowsecure.nl')
+driver.save_screenshot('nowsecure.png')
+```
+
+
+
+
+
+
+
+
+
+## 3.5.0 ##
+- selenium 4.10 caused some issues. 3.5.0 is compatible and has selenium 4.9 or above pinned. I can't support <4.9 any longer.
+- Removed some kwargs from constructor: service_args, service_creationflags, service_log_path.
+- added find_elements_recursive generator function. which is more of a convenience funtion as lots of websites seem to serve different content from different frames, making it hard
+ to use find_elements
+
+
## 3.4.5 ##
- What a week. Had the recent advancedments in Automation-Detection algorithms pwned (so i thought) with 3.4.0, but apparently, for some OS-es this caused an error when interacting with elements. Had to revert back using a different method, fix bugs, and now eventually was still able to stick to the initial idea (+ fixing bugs)
- Update to chrome 110 caused another surprise, this time for HEADLESS users.
@@ -143,11 +183,7 @@ the solution is simple:
**newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255**
-## Installation ##
-```
-pip install undetected-chromedriver
-```
## Usage ##
diff --git a/example/example.py b/example/example.py
index 622975a..48fde5e 100644
--- a/example/example.py
+++ b/example/example.py
@@ -1,10 +1,13 @@
import time
+import logging
+logging.basicConfig(level=10)
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.remote.webdriver import By
import selenium.webdriver.support.expected_conditions as EC # noqa
from selenium.webdriver.support.wait import WebDriverWait
+
import undetected_chromedriver as uc
@@ -164,7 +167,8 @@ def main(args=None):
print("lets go to UC project page")
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
- input("press a key if you have RTFM")
+
+ sleep(2)
driver.quit()
diff --git a/example/test_workflow.py b/example/test_workflow.py
new file mode 100644
index 0000000..e4d3914
--- /dev/null
+++ b/example/test_workflow.py
@@ -0,0 +1,121 @@
+# coding: utf-8
+
+import time
+import logging
+import os
+from selenium.webdriver.support.wait import WebDriverWait
+import selenium.webdriver.support.expected_conditions as EC
+from selenium.common.exceptions import TimeoutException
+import undetected_chromedriver as uc
+from pathlib import Path
+
+
+logging.basicConfig(level=10)
+logger = logging.getLogger('test')
+
+def main():
+
+ ####
+ # this block is a dirty helper since
+ # in the action runner devices serveral chrome versions exists
+ # and i need to ensure it takes the one which is installed
+ # by the task.
+ ####
+
+ for k,v in os.environ.items():
+ logger.info("%s = %s" % (k,v))
+ logger.info('==== END ENV ==== ')
+ tmp = Path('/tmp').resolve()
+
+ for item in tmp.rglob('**'):
+ logger.info('found %s ' % item)
+
+ if item.is_dir():
+ if 'chrome-' in item.name:
+
+ logger.info('adding %s to PATH' % str(item))
+ logger.info('current PATH: %s' % str(os.environ.get('PATH')))
+ path_list = os.environ['PATH'].split(os.pathsep)
+ path_list.insert(0, str(item))
+ os.environ['PATH'] = os.pathsep.join(path_list)
+ logger.info('new PATH %s:' % str(os.environ.get('PATH')))
+ browser_executable_path = str(item / 'chrome')
+ break
+
+ ####
+ # test really starts here
+ #3##
+
+
+ driver = uc.Chrome(headless=True, browser_executable_path=browser_executable_path)
+ logging.getLogger().setLevel(10)
+
+ driver.get('chrome://version')
+
+ driver.save_screenshot('/home/runner/work/_temp/versioninfo.png')
+
+ driver.get('chrome://settings/help')
+ driver.save_screenshot('/home/runner/work/_temp/helpinfo.png')
+
+ driver.get('https://www.google.com')
+ driver.save_screenshot('/home/runner/work/_temp/google.com.png')
+
+ driver.get('https://bot.incolumitas.com/#botChallenge')
+
+ pdfdata = driver.execute_cdp_cmd('Page.printToPDF', {})
+ if pdfdata:
+ if 'data' in pdfdata:
+ data = pdfdata['data']
+ import base64
+ buffer = base64.b64decode(data)
+ with open('/home/runner/work/_temp/report.pdf', 'w+b') as f:
+ f.write(buffer)
+
+ driver.get('https://www.nowsecure.nl')
+
+ logger.info('current url %s' % driver.current_url)
+
+ try:
+ WebDriverWait(driver,15).until(EC.title_contains('moment'))
+ except TimeoutException:
+ pass
+
+ logger.info('current page source:\n%s' % driver.page_source)
+
+ logger.info('current url %s' % driver.current_url)
+
+ try:
+ WebDriverWait(driver,15).until(EC.title_contains('nowSecure'))
+ logger.info('PASSED CLOUDFLARE!')
+
+ except TimeoutException:
+ logger.info('timeout')
+ print(driver.current_url)
+
+ logger.info('current page source:\n%s\n' % driver.page_source)
+
+ #logger.info('trying to save a screenshot via imgur')
+
+ driver.save_screenshot('/home/runner/work/_temp/nowsecure.png')
+
+ #driver.get('https://imgur.com/upload')
+
+ #driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/nowsecure.png')
+
+ #time.sleep(1)
+ #logger.info('current url %s' % driver.current_url)
+ #time.sleep(1)
+ #logger.info(f'A SCREENSHOT IS SAVED ON {driver.current_url} <<< if this ends onlywith /upload than it failed. after all we are running from a datacenter no human being would ever surf the internet from ')
+ #time.sleep(5)
+
+ driver.quit()
+
+
+
+
+
+
+
+
+if __name__ == "__main__":
+ main()
diff --git a/setup.py b/setup.py
index 12b1d87..aaba3a7 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ setup(
version=version,
packages=["undetected_chromedriver"],
install_requires=[
- "selenium>=4.0.0",
+ "selenium>=4.9.0",
"requests",
"websockets",
],
diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py
index ca28b43..2af11bc 100644
--- a/undetected_chromedriver/__init__.py
+++ b/undetected_chromedriver/__init__.py
@@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
from __future__ import annotations
-__version__ = "3.4.6"
+__version__ = "3.5.0"
import json
import logging
@@ -33,7 +33,7 @@ from weakref import finalize
import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver
from selenium.webdriver.common.by import By
-import selenium.webdriver.common.service
+import selenium.webdriver.chromium.service
import selenium.webdriver.remote.command
import selenium.webdriver.remote.webdriver
@@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path=None,
port=0,
enable_cdp_events=False,
- service_args=None,
- service_creationflags=None,
+ # service_args=None,
+ # service_creationflags=None,
desired_capabilities=None,
advanced_elements=False,
- service_log_path=None,
+ # service_log_path=None,
keep_alive=True,
log_level=0,
headless=False,
@@ -123,6 +123,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
use_subprocess=True,
debug=False,
no_sandbox=True,
+ user_multi_procs: bool = False,
**kw,
):
"""
@@ -234,6 +235,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag.
+
+ user_multi_procs:
+ set to true when you are using multithreads/multiprocessing
+ ensures not all processes are trying to modify a binary which is in use by another.
+ for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
+ this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
+
+
"""
finalize(self, self._ensure_close, self)
@@ -242,8 +251,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
executable_path=driver_executable_path,
force=patcher_force_close,
version_main=version_main,
+ user_multi_procs=user_multi_procs,
)
+ # self.patcher.auto(user_multiprocess = user_multi_num_procs)
self.patcher.auto()
+
# self.patcher = patcher
if not options:
options = ChromeOptions()
@@ -371,9 +383,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.arguments.extend(["--no-sandbox", "--test-type"])
if headless or options.headless:
- if self.patcher.version_main < 108:
- options.add_argument("--headless=chrome")
- elif self.patcher.version_main >= 108:
+ #workaround until a better checking is found
+ try:
+ if self.patcher.version_main < 108:
+ options.add_argument("--headless=chrome")
+ elif self.patcher.version_main >= 108:
+ options.add_argument("--headless=new")
+ except:
+ logger.warning("could not detect version_main."
+ "therefore, we are assuming it is chrome 108 or higher")
options.add_argument("--headless=new")
options.add_argument("--window-size=1920,1080")
@@ -427,28 +445,13 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
)
self.browser_pid = browser.pid
- if keep_alive:
- service_args.append('--keep_alive') if service_args else service_args = ['--keep_alive']
-
- if service_creationflags:
- service = selenium.webdriver.common.service.Service(
- self.patcher.executable_path, port, service_args, service_log_path
- )
- for attr_name in ("creationflags", "creation_flags"):
- if hasattr(service, attr_name):
- setattr(service, attr_name, service_creationflags)
- break
- else:
- service = selenium.webdriver.chrome.service.Service(
- self.patcher.executable_path
- )
+ service = selenium.webdriver.chromium.service.ChromiumService(
+ self.patcher.executable_path
+ )
super(Chrome, self).__init__(
- port=port,
- options=options,
- service_args=service_args,
- service_log_path=service_log_path,
- service=service, # needed or the service will be re-created
+ service=service,
+ options=options
)
self.reactor = None
@@ -704,10 +707,45 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not capabilities:
capabilities = self.options.to_capabilities()
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
- capabilities, browser_profile
+ capabilities
)
# super(Chrome, self).start_session(capabilities, browser_profile)
+ def find_elements_recursive(self, by, value):
+ """
+ find elements in all frames
+ this is a generator function, which is needed
+ since if it would return a list of elements, they
+ will be stale on arrival.
+ using generator, when the element is returned we are in the correct frame
+ to use it directly
+ Args:
+ by: By
+ value: str
+ Returns: Generator[webelement.WebElement]
+ """
+ def search_frame(f=None):
+ if not f:
+ # ensure we are on main content frame
+ self.switch_to.default_content()
+ else:
+ self.switch_to.frame(f)
+ for elem in self.find_elements(by, value):
+ yield elem
+ # switch back to main content, otherwise we will get StaleElementReferenceException
+ self.switch_to.default_content()
+
+ # search root frame
+ for elem in search_frame():
+ yield elem
+ # get iframes
+ frames = self.find_elements('css selector', 'iframe')
+
+ # search per frame
+ for f in frames:
+ for elem in search_frame(f):
+ yield elem
+
def quit(self):
try:
self.service.process.kill()
@@ -723,7 +761,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
os.kill(self.browser_pid, 15)
logger.debug("gracefully closed browser")
except Exception as e: # noqa
- logger.debug(e, exc_info=True)
+ pass
if (
hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir")
@@ -842,5 +880,7 @@ def find_chrome_executable():
):
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates:
+ logger.debug('checking if %s exists and is executable' % candidate)
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
+ logger.debug('found! using %s' % candidate)
return os.path.normpath(candidate)
diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py
index 24da802..d083dc3 100644
--- a/undetected_chromedriver/patcher.py
+++ b/undetected_chromedriver/patcher.py
@@ -5,15 +5,17 @@ from distutils.version import LooseVersion
import io
import logging
import os
+import pathlib
import random
import re
+import shutil
import string
import sys
import time
from urllib.request import urlopen
from urllib.request import urlretrieve
import zipfile
-
+from multiprocessing import Lock
logger = logging.getLogger(__name__)
@@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
class Patcher(object):
+ lock = Lock()
url_repo = "https://chromedriver.storage.googleapis.com"
zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s"
@@ -48,7 +51,13 @@ class Patcher(object):
d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d))
- def __init__(self, executable_path=None, force=False, version_main: int = 0):
+ def __init__(
+ self,
+ executable_path=None,
+ force=False,
+ version_main: int = 0,
+ user_multi_procs=False,
+ ):
"""
Args:
executable_path: None = automatic
@@ -61,6 +70,7 @@ class Patcher(object):
self.force = force
self._custom_exe_path = False
prefix = "undetected"
+ self.user_multi_procs = user_multi_procs
if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True)
@@ -78,17 +88,41 @@ class Patcher(object):
self.zip_path = os.path.join(self.data_path, prefix)
if not executable_path:
- self.executable_path = os.path.abspath(
- os.path.join(".", self.executable_path)
- )
+ if not self.user_multi_procs:
+ self.executable_path = os.path.abspath(
+ os.path.join(".", self.executable_path)
+ )
if executable_path:
self._custom_exe_path = True
self.executable_path = executable_path
+
self.version_main = version_main
self.version_full = None
- def auto(self, executable_path=None, force=False, version_main=None):
+ def auto(self, executable_path=None, force=False, version_main=None, _=None):
+ """
+
+ Args:
+ executable_path:
+ force:
+ version_main:
+
+ Returns:
+
+ """
+ # if self.user_multi_procs and \
+ # self.user_multi_procs != -1:
+ # # -1 being a skip value used later in this block
+ #
+ p = pathlib.Path(self.data_path)
+ with Lock():
+ files = list(p.rglob("*chromedriver*?"))
+ for file in files:
+ if self.is_binary_patched(file):
+ self.executable_path = str(file)
+ return True
+
if executable_path:
self.executable_path = executable_path
self._custom_exe_path = True
@@ -127,6 +161,49 @@ class Patcher(object):
self.unzip_package(self.fetch_package())
return self.patch()
+ def driver_binary_in_use(self, path: str = None) -> bool:
+ """
+ naive test to check if a found chromedriver binary is
+ currently in use
+
+ Args:
+ path: a string or PathLike object to the binary to check.
+ if not specified, we check use this object's executable_path
+ """
+ if not path:
+ path = self.executable_path
+ p = pathlib.Path(path)
+
+ if not p.exists():
+ raise OSError("file does not exist: %s" % p)
+ try:
+ with open(p, mode="a+b") as fs:
+ exc = []
+ try:
+
+ fs.seek(0, 0)
+ except PermissionError as e:
+ exc.append(e) # since some systems apprently allow seeking
+ # we conduct another test
+ try:
+ fs.readline()
+ except PermissionError as e:
+ exc.append(e)
+
+ if exc:
+
+ return True
+ return False
+ # ok safe to assume this is in use
+ except Exception as e:
+ # logger.exception("whoops ", e)
+ pass
+
+ def cleanup_unused_files(self):
+ p = pathlib.Path(self.data_path)
+ items = list(p.glob("*undetected*"))
+ print(items)
+
def patch(self):
self.patch_exe()
return self.is_binary_patched()
@@ -255,21 +332,17 @@ class Patcher(object):
else:
timeout = 3 # stop trying after this many seconds
t = time.monotonic()
- while True:
- now = time.monotonic()
- if now - t > timeout:
- # we don't want to wait until the end of time
- logger.debug(
- "could not unlink %s in time (%d seconds)"
- % (self.executable_path, timeout)
- )
- break
+ now = lambda: time.monotonic()
+ while now() - t > timeout:
+ # we don't want to wait until the end of time
try:
+ if self.user_multi_procs:
+ break
os.unlink(self.executable_path)
logger.debug("successfully unlinked %s" % self.executable_path)
break
except (OSError, RuntimeError, PermissionError):
- time.sleep(0.1)
+ time.sleep(0.01)
continue
except FileNotFoundError:
break