From e55104be8d1f9c513f713da7f6a28836fb174f4a Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 May 2023 22:08:53 +0200 Subject: [PATCH 01/59] added more compatibility for multi thtreads / processing using Chrome(user_multi_procs=True) => ensure you have at least 1 undetected_chromedriver in the roaming appdata/undetected_chromedriver --- quicktest.py | 0 undetected_chromedriver/__init__.py | 14 +++- undetected_chromedriver/patcher.py | 105 +++++++++++++++++++++++----- 3 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 quicktest.py diff --git a/quicktest.py b/quicktest.py new file mode 100644 index 0000000..e69de29 diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 6c73ab2..8a95cd9 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.4.6" +__version__ = "3.4.7" import json import logging @@ -123,6 +123,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): use_subprocess=True, debug=False, no_sandbox=True, + user_multi_procs: bool = False, **kw, ): """ @@ -234,6 +235,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar this option has a default of True since many people seem to run this as root (....) , and chrome does not start when running as root without using --no-sandbox flag. + + user_multi_procs: + set to true when you are using multithreads/multiprocessing + ensures not all processes are trying to modify a binary which is in use by another. + for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER. + this requirement can be done by just running this program "normal" and close/kill it. + + """ finalize(self, self._ensure_close, self) @@ -242,8 +251,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): executable_path=driver_executable_path, force=patcher_force_close, version_main=version_main, + user_multi_procs=user_multi_procs, ) + # self.patcher.auto(user_multiprocess = user_multi_num_procs) self.patcher.auto() + # self.patcher = patcher if not options: options = ChromeOptions() diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index 24da802..d083dc3 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -5,15 +5,17 @@ from distutils.version import LooseVersion import io import logging import os +import pathlib import random import re +import shutil import string import sys import time from urllib.request import urlopen from urllib.request import urlretrieve import zipfile - +from multiprocessing import Lock logger = logging.getLogger(__name__) @@ -21,6 +23,7 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2")) class Patcher(object): + lock = Lock() url_repo = "https://chromedriver.storage.googleapis.com" zip_name = "chromedriver_%s.zip" exe_name = "chromedriver%s" @@ -48,7 +51,13 @@ class Patcher(object): d = "~/.undetected_chromedriver" data_path = os.path.abspath(os.path.expanduser(d)) - def __init__(self, executable_path=None, force=False, version_main: int = 0): + def __init__( + self, + executable_path=None, + force=False, + version_main: int = 0, + user_multi_procs=False, + ): """ Args: executable_path: None = automatic @@ -61,6 +70,7 @@ class Patcher(object): self.force = force self._custom_exe_path = False prefix = "undetected" + self.user_multi_procs = user_multi_procs if not os.path.exists(self.data_path): os.makedirs(self.data_path, exist_ok=True) @@ -78,17 +88,41 @@ class Patcher(object): self.zip_path = os.path.join(self.data_path, prefix) if not executable_path: - self.executable_path = os.path.abspath( - os.path.join(".", self.executable_path) - ) + if not self.user_multi_procs: + self.executable_path = os.path.abspath( + os.path.join(".", self.executable_path) + ) if executable_path: self._custom_exe_path = True self.executable_path = executable_path + self.version_main = version_main self.version_full = None - def auto(self, executable_path=None, force=False, version_main=None): + def auto(self, executable_path=None, force=False, version_main=None, _=None): + """ + + Args: + executable_path: + force: + version_main: + + Returns: + + """ + # if self.user_multi_procs and \ + # self.user_multi_procs != -1: + # # -1 being a skip value used later in this block + # + p = pathlib.Path(self.data_path) + with Lock(): + files = list(p.rglob("*chromedriver*?")) + for file in files: + if self.is_binary_patched(file): + self.executable_path = str(file) + return True + if executable_path: self.executable_path = executable_path self._custom_exe_path = True @@ -127,6 +161,49 @@ class Patcher(object): self.unzip_package(self.fetch_package()) return self.patch() + def driver_binary_in_use(self, path: str = None) -> bool: + """ + naive test to check if a found chromedriver binary is + currently in use + + Args: + path: a string or PathLike object to the binary to check. + if not specified, we check use this object's executable_path + """ + if not path: + path = self.executable_path + p = pathlib.Path(path) + + if not p.exists(): + raise OSError("file does not exist: %s" % p) + try: + with open(p, mode="a+b") as fs: + exc = [] + try: + + fs.seek(0, 0) + except PermissionError as e: + exc.append(e) # since some systems apprently allow seeking + # we conduct another test + try: + fs.readline() + except PermissionError as e: + exc.append(e) + + if exc: + + return True + return False + # ok safe to assume this is in use + except Exception as e: + # logger.exception("whoops ", e) + pass + + def cleanup_unused_files(self): + p = pathlib.Path(self.data_path) + items = list(p.glob("*undetected*")) + print(items) + def patch(self): self.patch_exe() return self.is_binary_patched() @@ -255,21 +332,17 @@ class Patcher(object): else: timeout = 3 # stop trying after this many seconds t = time.monotonic() - while True: - now = time.monotonic() - if now - t > timeout: - # we don't want to wait until the end of time - logger.debug( - "could not unlink %s in time (%d seconds)" - % (self.executable_path, timeout) - ) - break + now = lambda: time.monotonic() + while now() - t > timeout: + # we don't want to wait until the end of time try: + if self.user_multi_procs: + break os.unlink(self.executable_path) logger.debug("successfully unlinked %s" % self.executable_path) break except (OSError, RuntimeError, PermissionError): - time.sleep(0.1) + time.sleep(0.01) continue except FileNotFoundError: break From 6eab5577f15eb6e6f88cdaeb9389e9722b3a27a2 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 May 2023 22:18:22 +0200 Subject: [PATCH 02/59] 3.4.7 - added 'more' compatibility for multi thtreads / processing using Chrome(user_multi_procs=True) => ensure you have at least 1 undetected_chromedriver binary in the roaming appdata/undetected_chromedriver before using --- undetected_chromedriver/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 8a95cd9..cc66e29 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -240,7 +240,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): set to true when you are using multithreads/multiprocessing ensures not all processes are trying to modify a binary which is in use by another. for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER. - this requirement can be done by just running this program "normal" and close/kill it. + this requirement can be easily satisfied, by just running this program "normal" and close/kill it. """ From d29b3e300fe75aa878e0313bce37a1816d6bd4c0 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 May 2023 22:26:07 +0200 Subject: [PATCH 03/59] 3.4.7 --- quicktest.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 quicktest.py diff --git a/quicktest.py b/quicktest.py deleted file mode 100644 index e69de29..0000000 From c2270d4c4617f3285b93accef89df5f6bb6de16d Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 21:41:50 +0200 Subject: [PATCH 04/59] Create main.yml --- .github/workflows/main.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..8ef8d25 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,30 @@ +name: Undetected Chromedriver + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install package + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install -U . ; fi + - name: run example + run: | + python example/example.py + From aa9ba6fbb23ef0cce4bf3bd527623fb6b3107d96 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 21:43:29 +0200 Subject: [PATCH 05/59] Update main.yml --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8ef8d25..1097de9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 From 0532d8cd78aa3c85435ec664939164b20be58868 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 21:48:52 +0200 Subject: [PATCH 06/59] Update main.yml --- .github/workflows/main.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1097de9..f1e6a12 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,11 +10,13 @@ jobs: build: runs-on: ubuntu-latest strategy: - fail-fast: false + fail-fast: true matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: + - name: Setup Chrome + uses: browser-actions/setup-chrome@v1.2.0 - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 From 841d13d4e99a749c45bd0c3fa53ca955d300684c Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 21:54:37 +0200 Subject: [PATCH 07/59] Delete main.yml --- .github/workflows/main.yml | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index f1e6a12..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Undetected Chromedriver - -on: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - fail-fast: true - matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] - - steps: - - name: Setup Chrome - uses: browser-actions/setup-chrome@v1.2.0 - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install package - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install -U . ; fi - - name: run example - run: | - python example/example.py - From bdcef14e80b744146a52ec32212960288cc38a37 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 21:56:42 +0200 Subject: [PATCH 08/59] Create workflow.yml --- .github/workflows/workflow.yml | 35 ++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/workflow.yml diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml new file mode 100644 index 0000000..2edfdef --- /dev/null +++ b/.github/workflows/workflow.yml @@ -0,0 +1,35 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10","3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Setup Chrome + uses: browser-actions/setup-chrome@v1.2.0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install package + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install -U . ; fi + - name: run example + run: | + python example/example.py From 2abe29782f508188e6b4bd3174dd53508de6c14a Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 21:59:18 +0200 Subject: [PATCH 09/59] Update workflow.yml --- .github/workflows/workflow.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 2edfdef..6d9830a 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -21,7 +21,11 @@ jobs: steps: - uses: actions/checkout@v3 - name: Setup Chrome + # You may pin to the exact commit or the version. + # uses: browser-actions/setup-chrome@c485fa3bab6be59dce18dbc18ef6ab7cbc8ff5f1 uses: browser-actions/setup-chrome@v1.2.0 + # The Google Chrome/Chromium version to install and use. + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: From 6e372935bbd6d7fd9fdd6f7ce85668369f34b9a2 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 22:07:45 +0200 Subject: [PATCH 10/59] Update workflow.yml --- .github/workflows/workflow.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 6d9830a..cd97619 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -1,5 +1,4 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + name: Python package From a24541ab27a2d7cab87920f6ce16bb21f73af7f4 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 22:21:19 +0200 Subject: [PATCH 11/59] Update workflow.yml --- .github/workflows/workflow.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index cd97619..736fdf0 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -20,11 +20,10 @@ jobs: steps: - uses: actions/checkout@v3 - name: Setup Chrome - # You may pin to the exact commit or the version. - # uses: browser-actions/setup-chrome@c485fa3bab6be59dce18dbc18ef6ab7cbc8ff5f1 uses: browser-actions/setup-chrome@v1.2.0 - # The Google Chrome/Chromium version to install and use. - + - name: set chrome in path + run: | + echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: From fad2b621c917c092d9961b686ae8b2297accde8a Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 22:26:00 +0200 Subject: [PATCH 12/59] Update example.py --- example/example.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/example/example.py b/example/example.py index 622975a..48fde5e 100644 --- a/example/example.py +++ b/example/example.py @@ -1,10 +1,13 @@ import time +import logging +logging.basicConfig(level=10) from selenium.common.exceptions import WebDriverException from selenium.webdriver.remote.webdriver import By import selenium.webdriver.support.expected_conditions as EC # noqa from selenium.webdriver.support.wait import WebDriverWait + import undetected_chromedriver as uc @@ -164,7 +167,8 @@ def main(args=None): print("lets go to UC project page") driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") - input("press a key if you have RTFM") + + sleep(2) driver.quit() From 522588179911319c22a70a40825a720467926142 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 22:34:33 +0200 Subject: [PATCH 13/59] Update __init__.py --- undetected_chromedriver/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index cc66e29..1f9d437 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -733,7 +733,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): os.kill(self.browser_pid, 15) logger.debug("gracefully closed browser") except Exception as e: # noqa - logger.debug(e, exc_info=True) + pass if ( hasattr(self, "keep_user_data_dir") and hasattr(self, "user_data_dir") From 1a1ba7350b0da2ca5e6f9ec5e90bc4e88779dddf Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 22:42:43 +0200 Subject: [PATCH 14/59] Update __init__.py --- undetected_chromedriver/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 1f9d437..5bae19e 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -852,5 +852,7 @@ def find_chrome_executable(): ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: + logger.debug('checking if %s exists and is executable' % condidate) if os.path.exists(candidate) and os.access(candidate, os.X_OK): + logger.debug('found! using %s' % candidate) return os.path.normpath(candidate) From 874cf2852b4750f7e9d20085a585784a4c1c373f Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 22:44:05 +0200 Subject: [PATCH 15/59] Update __init__.py --- undetected_chromedriver/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 5bae19e..5934f26 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -852,7 +852,7 @@ def find_chrome_executable(): ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: - logger.debug('checking if %s exists and is executable' % condidate) + logger.debug('checking if %s exists and is executable' % candidate) if os.path.exists(candidate) and os.access(candidate, os.X_OK): logger.debug('found! using %s' % candidate) return os.path.normpath(candidate) From 46b000ae2123059f37fdb70bbe390b9b8a62df5b Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:04:09 +0200 Subject: [PATCH 16/59] Update __init__.py --- undetected_chromedriver/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 5934f26..0ddd8d0 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -383,10 +383,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): options.arguments.extend(["--no-sandbox", "--test-type"]) if headless or options.headless: - if self.patcher.version_main < 108: - options.add_argument("--headless=chrome") - elif self.patcher.version_main >= 108: - options.add_argument("--headless=new") + #workaround until a better checking is found + options.add_argument("--headless=new") + #if self.patcher.version_main < 108: + # options.add_argument("--headless=chrome") + #elif self.patcher.version_main >= 108: + options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") From c2042fb7588d1b6dec19a7717d68144f3ccc33c2 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:07:37 +0200 Subject: [PATCH 17/59] Create test_workflow.py --- example/test_workflow.py | 184 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 example/test_workflow.py diff --git a/example/test_workflow.py b/example/test_workflow.py new file mode 100644 index 0000000..712ab07 --- /dev/null +++ b/example/test_workflow.py @@ -0,0 +1,184 @@ +import time +import logging +logging.basicConfig(level=10) + +from selenium.common.exceptions import WebDriverException +from selenium.webdriver.remote.webdriver import By +import selenium.webdriver.support.expected_conditions as EC # noqa +from selenium.webdriver.support.wait import WebDriverWait + + +import undetected_chromedriver as uc + + +def main(args=None): + TAKE_IT_EASY = True + + if args: + TAKE_IT_EASY = ( + args.no_sleeps + ) # so the demo is 'follow-able' instead of some flashes and boom => done. set it how you like + + if TAKE_IT_EASY: + sleep = time.sleep + else: + sleep = lambda n: print( + "we could be sleeping %d seconds here, but we don't" % n + ) + + driver = uc.Chrome(headless=True) + driver.get("https://www.google.com") + + # accept the terms + driver.find_elements(By.XPATH, '//*[contains(text(), "Reject all")]')[ + -1 + ].click() # ;) + + inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') + + inp_search.send_keys( + "site:stackoverflow.com undetected chromedriver\n" + ) # \n as equivalent of ENTER key + + results_container = WebDriverWait(driver, timeout=3).until( + EC.presence_of_element_located((By.ID, "rso")) + ) + + driver.execute_script( + """ + let container = document.querySelector('#rso'); + let el = document.createElement('div'); + el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:1em;font-size:1.5em'; + el.textContent = "Excluded from support...!"; + container.insertAdjacentElement('afterBegin', el); + setTimeout(() => { + el.textContent = "<<< OH , CHECK YOUR CONSOLE! >>>"}, 2500) + + """ + ) + + sleep(2) # never use this. this is for demonstration purposes only + + for item in results_container.children("a", recursive=True): + print(item) + + # switching default WebElement for uc.WebElement and do it again + driver._web_element_cls = uc.UCWebElement + + print("switched to use uc.WebElement. which is more descriptive") + results_container = driver.find_element(By.ID, "rso") + + # gets only direct children of results_container + # children is a method unique for undetected chromedriver. it is + # incompatible when you use regular chromedriver + for item in results_container.children(): + print(item.tag_name) + for grandchild in item.children(recursive=True): + print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text) + + print("lets go to image search") + inp_search = driver.find_element(By.XPATH, '//input[@name="q"]') + inp_search.clear() + inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER + + body = driver.find_element(By.TAG_NAME, "body") + body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe() + + # you can't reuse the body from above, because we are on another page right now + # so the body above is not attached anymore + image_search_body = WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) + ) + + # gets all images and prints the src + print("getting image sources data, hold on...") + + for item in image_search_body.children("img", recursive=True): + print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n") + + USELESS_SITES = [ + "https://www.trumpdonald.org", + "https://www.isitchristmas.com", + "https://isnickelbacktheworstbandever.tumblr.com", + "https://www.isthatcherdeadyet.co.uk", + "https://whitehouse.gov", + "https://www.nsa.gov", + "https://kimjongillookingatthings.tumblr.com", + "https://instantrimshot.com", + "https://www.nyan.cat", + "https://twitter.com", + ] + + print("opening 9 additinal windows and control them") + sleep(1) # never use this. this is for demonstration purposes only + for _ in range(9): + driver.window_new() + + print("now we got 10 windows") + sleep(1) + print("using the new windows to open 9 other useless sites") + sleep(1) # never use this. this is for demonstration purposes only + + for idx in range(1, 10): + # skip the first handle which is our original window + print("opening ", USELESS_SITES[idx]) + driver.switch_to.window(driver.window_handles[idx]) + + # because of geographical location, (corporate) firewalls and 1001 + # other reasons why a connection could be dropped we will use a try/except clause here. + try: + driver.get(USELESS_SITES[idx]) + except WebDriverException as e: + print( + ( + "webdriver exception. this is not an issue in chromedriver, but rather " + "an issue specific to your current connection. message:", + e.args, + ) + ) + continue + + for handle in driver.window_handles[1:]: + driver.switch_to.window(handle) + print("look. %s is working" % driver.current_url) + sleep(1) # never use this. it is here only so you can follow along + + print( + "close windows (including the initial one!), but keep the last new opened window" + ) + sleep(4) # never use this. wait until nowsecure passed the bot checks + + for handle in driver.window_handles[:-1]: + driver.switch_to.window(handle) + print("look. %s is closing" % driver.current_url) + sleep(1) + driver.close() + + # attach to the last open window + driver.switch_to.window(driver.window_handles[0]) + print("now we only got ", driver.current_url, "left") + + sleep(1) + + driver.get("https://www.nowsecure.nl") + + sleep(5) + + print("lets go to UC project page") + driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") + + + sleep(2) + + driver.quit() + + +if __name__ == "__main__": + import argparse + p = argparse.ArgumentParser() + p.add_argument("--no-sleeps", "-ns", action="store_false") + a = p.parse_args() + try: + main(a) + except: + logging.getLogger(__name__).debug('woops', exc_info=True) From ddc61cfa5ae97f85f2a93450081c46ca5cb873dd Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:08:18 +0200 Subject: [PATCH 18/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 736fdf0..9480667 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -34,4 +34,4 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install -U . ; fi - name: run example run: | - python example/example.py + python example/test_workflow.py From 1477e31625f3c504bc46a7c7753c5b6be47ae958 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:17:53 +0200 Subject: [PATCH 19/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 9480667..7a6d68a 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -21,6 +21,8 @@ jobs: - uses: actions/checkout@v3 - name: Setup Chrome uses: browser-actions/setup-chrome@v1.2.0 + with: + chrome-version: 114 - name: set chrome in path run: | echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH From a2808543a996ef923a98ee9a3959e1f767d73f45 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:20:08 +0200 Subject: [PATCH 20/59] Update test_workflow.py --- example/test_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 712ab07..4574a78 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -26,7 +26,7 @@ def main(args=None): "we could be sleeping %d seconds here, but we don't" % n ) - driver = uc.Chrome(headless=True) + driver = uc.Chrome(headless=True, version_main=113) driver.get("https://www.google.com") # accept the terms From 594908a612656f2f6254c13e58b54e292863f2f3 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:20:31 +0200 Subject: [PATCH 21/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 7a6d68a..cfc18e0 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -22,7 +22,7 @@ jobs: - name: Setup Chrome uses: browser-actions/setup-chrome@v1.2.0 with: - chrome-version: 114 + chrome-version: 113 - name: set chrome in path run: | echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH From 8cc697e81a9fb0d072ca0dd1301f33c921d4f777 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:24:45 +0200 Subject: [PATCH 22/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index cfc18e0..5550f58 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -22,7 +22,7 @@ jobs: - name: Setup Chrome uses: browser-actions/setup-chrome@v1.2.0 with: - chrome-version: 113 + chrome-version: 113.0.5672.63 - name: set chrome in path run: | echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH From b3d7e66da70598661fa8f094b3674f99bef61000 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:25:40 +0200 Subject: [PATCH 23/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 5550f58..4c7d165 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -22,7 +22,7 @@ jobs: - name: Setup Chrome uses: browser-actions/setup-chrome@v1.2.0 with: - chrome-version: 113.0.5672.63 + chrome-version: "113.0.5672.63" - name: set chrome in path run: | echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH From 2713a38ddcfa414a7bea4ba557c43844e5982436 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:27:41 +0200 Subject: [PATCH 24/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 4c7d165..9480667 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -21,8 +21,6 @@ jobs: - uses: actions/checkout@v3 - name: Setup Chrome uses: browser-actions/setup-chrome@v1.2.0 - with: - chrome-version: "113.0.5672.63" - name: set chrome in path run: | echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH From 1af7b3da0c0fe3aa890a131accd698b99c158e06 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:39:15 +0200 Subject: [PATCH 25/59] Update test_workflow.py --- example/test_workflow.py | 178 ++------------------------------------- 1 file changed, 8 insertions(+), 170 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 4574a78..cc1a947 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -4,181 +4,19 @@ logging.basicConfig(level=10) from selenium.common.exceptions import WebDriverException from selenium.webdriver.remote.webdriver import By -import selenium.webdriver.support.expected_conditions as EC # noqa +import selenium.webdriver.support.expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait - import undetected_chromedriver as uc - -def main(args=None): - TAKE_IT_EASY = True - - if args: - TAKE_IT_EASY = ( - args.no_sleeps - ) # so the demo is 'follow-able' instead of some flashes and boom => done. set it how you like - - if TAKE_IT_EASY: - sleep = time.sleep - else: - sleep = lambda n: print( - "we could be sleeping %d seconds here, but we don't" % n - ) - - driver = uc.Chrome(headless=True, version_main=113) - driver.get("https://www.google.com") - - # accept the terms - driver.find_elements(By.XPATH, '//*[contains(text(), "Reject all")]')[ - -1 - ].click() # ;) - - inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') - - inp_search.send_keys( - "site:stackoverflow.com undetected chromedriver\n" - ) # \n as equivalent of ENTER key - - results_container = WebDriverWait(driver, timeout=3).until( - EC.presence_of_element_located((By.ID, "rso")) - ) - - driver.execute_script( - """ - let container = document.querySelector('#rso'); - let el = document.createElement('div'); - el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:1em;font-size:1.5em'; - el.textContent = "Excluded from support...!"; - container.insertAdjacentElement('afterBegin', el); - setTimeout(() => { - el.textContent = "<<< OH , CHECK YOUR CONSOLE! >>>"}, 2500) - - """ - ) - - sleep(2) # never use this. this is for demonstration purposes only - - for item in results_container.children("a", recursive=True): - print(item) - - # switching default WebElement for uc.WebElement and do it again - driver._web_element_cls = uc.UCWebElement - - print("switched to use uc.WebElement. which is more descriptive") - results_container = driver.find_element(By.ID, "rso") - - # gets only direct children of results_container - # children is a method unique for undetected chromedriver. it is - # incompatible when you use regular chromedriver - for item in results_container.children(): - print(item.tag_name) - for grandchild in item.children(recursive=True): - print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text) - - print("lets go to image search") - inp_search = driver.find_element(By.XPATH, '//input[@name="q"]') - inp_search.clear() - inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER - - body = driver.find_element(By.TAG_NAME, "body") - body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe() - - # you can't reuse the body from above, because we are on another page right now - # so the body above is not attached anymore - image_search_body = WebDriverWait(driver, 5).until( - EC.presence_of_element_located((By.TAG_NAME, "body")) - ) - - # gets all images and prints the src - print("getting image sources data, hold on...") - - for item in image_search_body.children("img", recursive=True): - print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n") - - USELESS_SITES = [ - "https://www.trumpdonald.org", - "https://www.isitchristmas.com", - "https://isnickelbacktheworstbandever.tumblr.com", - "https://www.isthatcherdeadyet.co.uk", - "https://whitehouse.gov", - "https://www.nsa.gov", - "https://kimjongillookingatthings.tumblr.com", - "https://instantrimshot.com", - "https://www.nyan.cat", - "https://twitter.com", - ] - - print("opening 9 additinal windows and control them") - sleep(1) # never use this. this is for demonstration purposes only - for _ in range(9): - driver.window_new() - - print("now we got 10 windows") - sleep(1) - print("using the new windows to open 9 other useless sites") - sleep(1) # never use this. this is for demonstration purposes only - - for idx in range(1, 10): - # skip the first handle which is our original window - print("opening ", USELESS_SITES[idx]) - driver.switch_to.window(driver.window_handles[idx]) - - # because of geographical location, (corporate) firewalls and 1001 - # other reasons why a connection could be dropped we will use a try/except clause here. - try: - driver.get(USELESS_SITES[idx]) - except WebDriverException as e: - print( - ( - "webdriver exception. this is not an issue in chromedriver, but rather " - "an issue specific to your current connection. message:", - e.args, - ) - ) - continue - - for handle in driver.window_handles[1:]: - driver.switch_to.window(handle) - print("look. %s is working" % driver.current_url) - sleep(1) # never use this. it is here only so you can follow along - - print( - "close windows (including the initial one!), but keep the last new opened window" - ) - sleep(4) # never use this. wait until nowsecure passed the bot checks - - for handle in driver.window_handles[:-1]: - driver.switch_to.window(handle) - print("look. %s is closing" % driver.current_url) - sleep(1) - driver.close() - - # attach to the last open window - driver.switch_to.window(driver.window_handles[0]) - print("now we only got ", driver.current_url, "left") - - sleep(1) - - driver.get("https://www.nowsecure.nl") - - sleep(5) - - print("lets go to UC project page") - driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") - - - sleep(2) - +def main() + driver = uc.Chrome(browser_executable_path="/opt/hostedtoolcache/chromium/latest/x64/chrome", headless=True) + driver.get("https://nowsecure.nl") + WebDriverWait(driver, 15).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) driver.quit() if __name__ == "__main__": - import argparse - p = argparse.ArgumentParser() - p.add_argument("--no-sleeps", "-ns", action="store_false") - a = p.parse_args() - try: - main(a) - except: - logging.getLogger(__name__).debug('woops', exc_info=True) + + main(a) + From feef830190bbbf50eaf5dfd25d3308210e14c179 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:40:15 +0200 Subject: [PATCH 26/59] Update test_workflow.py --- example/test_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index cc1a947..3036da2 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -9,7 +9,7 @@ from selenium.webdriver.support.wait import WebDriverWait import undetected_chromedriver as uc -def main() +def main(): driver = uc.Chrome(browser_executable_path="/opt/hostedtoolcache/chromium/latest/x64/chrome", headless=True) driver.get("https://nowsecure.nl") WebDriverWait(driver, 15).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) @@ -18,5 +18,5 @@ def main() if __name__ == "__main__": - main(a) + main() From b23d189209293072ef450d59debdee81afa17aa5 Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:51:11 +0200 Subject: [PATCH 27/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 9480667..1c4ce06 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -21,6 +21,8 @@ jobs: - uses: actions/checkout@v3 - name: Setup Chrome uses: browser-actions/setup-chrome@v1.2.0 + with: + chrome-version: stable - name: set chrome in path run: | echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH From cbb1cd268dacc2248d4aa702af43c30f0cbd1f0c Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:55:06 +0200 Subject: [PATCH 28/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 1c4ce06..cafc843 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -25,7 +25,7 @@ jobs: chrome-version: stable - name: set chrome in path run: | - echo "/opt/hostedtoolcache/chromium/latest/x64/chrome" >> $GITHUB_PATH + echo "/opt/hostedtoolcache/chromium/stable/x64" >> $GITHUB_PATH - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: From 5c78b7cca30a0274440fb75a5f9d51caf77bb68b Mon Sep 17 00:00:00 2001 From: Leon Date: Fri, 2 Jun 2023 23:56:53 +0200 Subject: [PATCH 29/59] Update test_workflow.py --- example/test_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 3036da2..345fb22 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -10,7 +10,7 @@ from selenium.webdriver.support.wait import WebDriverWait import undetected_chromedriver as uc def main(): - driver = uc.Chrome(browser_executable_path="/opt/hostedtoolcache/chromium/latest/x64/chrome", headless=True) + driver = uc.Chrome(headless=True) driver.get("https://nowsecure.nl") WebDriverWait(driver, 15).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) driver.quit() From 4e81361ced3754daae17a10c0d47325c0702bc97 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:06:25 +0200 Subject: [PATCH 30/59] Update test_workflow.py --- example/test_workflow.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 345fb22..b1705c4 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -1,14 +1,25 @@ import time import logging +import os logging.basicConfig(level=10) -from selenium.common.exceptions import WebDriverException from selenium.webdriver.remote.webdriver import By import selenium.webdriver.support.expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait - +from pathlib import Path import undetected_chromedriver as uc +# due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally +tmp = Path('/tmp') +for item in tmp.glob('chrome*'): + if item.is_dir(): + path_list = os.environ['PATH'].split(os.pathsep) + path_list.insert(0, str(item)) + os.environ['PATH'] = os.pathsep.join(path_list) + + + + def main(): driver = uc.Chrome(headless=True) driver.get("https://nowsecure.nl") From 4db5ce60c5e522efc338cfc570978f52d7e19dc7 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:10:28 +0200 Subject: [PATCH 31/59] Update test_workflow.py --- example/test_workflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/example/test_workflow.py b/example/test_workflow.py index b1705c4..b2d597d 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -12,6 +12,8 @@ import undetected_chromedriver as uc # due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally tmp = Path('/tmp') for item in tmp.glob('chrome*'): + print(item) + time.sleep(1) if item.is_dir(): path_list = os.environ['PATH'].split(os.pathsep) path_list.insert(0, str(item)) From bd81310c268f49eda99899d41a58cf6099fca49b Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:15:57 +0200 Subject: [PATCH 32/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index cafc843..0a91970 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -25,7 +25,7 @@ jobs: chrome-version: stable - name: set chrome in path run: | - echo "/opt/hostedtoolcache/chromium/stable/x64" >> $GITHUB_PATH + echo ${{ steps.setup-chrome.outputs.chrome-path}} >> $GITHUB_PATH - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: From 0ecf670b5732932c6636d17a74142229a3797163 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:21:24 +0200 Subject: [PATCH 33/59] Update test_workflow.py --- example/test_workflow.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index b2d597d..95d5821 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -1,35 +1,36 @@ -import time import logging import os -logging.basicConfig(level=10) -from selenium.webdriver.remote.webdriver import By +logging.basicConfig(level=10) +logger = logging.getLogger(__name__) + import selenium.webdriver.support.expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from pathlib import Path import undetected_chromedriver as uc -# due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally -tmp = Path('/tmp') -for item in tmp.glob('chrome*'): - print(item) - time.sleep(1) - if item.is_dir(): - path_list = os.environ['PATH'].split(os.pathsep) - path_list.insert(0, str(item)) - os.environ['PATH'] = os.pathsep.join(path_list) - - - def main(): + + # due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally + + for k,v in os.environ.items(): + logger.info("%s = %s" % (k,v)) + tmp = Path('/tmp') + + for item in tmp.glob('chrome*'): + print(item) + if item.is_dir(): + path_list = os.environ['PATH'].split(os.pathsep) + path_list.insert(0, str(item)) + os.environ['PATH'] = os.pathsep.join(path_list) + time.sleep(5) driver = uc.Chrome(headless=True) driver.get("https://nowsecure.nl") - WebDriverWait(driver, 15).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) + WebDriverWait(driver, 15).until( + EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) driver.quit() if __name__ == "__main__": - main() - From 591fe54cc161f241047a2ba137765ca70a2fe5ae Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:23:28 +0200 Subject: [PATCH 34/59] Update test_workflow.py --- example/test_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 95d5821..7ec17a8 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -1,6 +1,6 @@ import logging import os - +import time logging.basicConfig(level=10) logger = logging.getLogger(__name__) From bebcdc21b56b3226c25ab543ef7082a6e1998759 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:34:07 +0200 Subject: [PATCH 35/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 0a91970..cafc843 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -25,7 +25,7 @@ jobs: chrome-version: stable - name: set chrome in path run: | - echo ${{ steps.setup-chrome.outputs.chrome-path}} >> $GITHUB_PATH + echo "/opt/hostedtoolcache/chromium/stable/x64" >> $GITHUB_PATH - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: From e10ac0382b6c3801f6140de8d12ab6d4f9067646 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 00:40:33 +0200 Subject: [PATCH 36/59] Update test_workflow.py --- example/test_workflow.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 7ec17a8..ac9a438 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -16,14 +16,18 @@ def main(): for k,v in os.environ.items(): logger.info("%s = %s" % (k,v)) - tmp = Path('/tmp') + tmp = Path('/tmp').resolve() - for item in tmp.glob('chrome*'): + for item in tmp.rglob('**'): + print(item) + if item.is_dir(): - path_list = os.environ['PATH'].split(os.pathsep) - path_list.insert(0, str(item)) - os.environ['PATH'] = os.pathsep.join(path_list) + if 'chrome-' in item.name: + path_list = os.environ['PATH'].split(os.pathsep) + path_list.insert(0, str(item)) + os.environ['PATH'] = os.pathsep.join(path_list) + break time.sleep(5) driver = uc.Chrome(headless=True) driver.get("https://nowsecure.nl") From c53876a83ad77a7aba9d94b91a17c02d68202fd7 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:31:47 +0200 Subject: [PATCH 37/59] Update test_workflow.py --- example/test_workflow.py | 72 +++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index ac9a438..5af26c2 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -1,20 +1,17 @@ -import logging -import os +# coding: utf-8 + import time -logging.basicConfig(level=10) -logger = logging.getLogger(__name__) - -import selenium.webdriver.support.expected_conditions as EC +import logging from selenium.webdriver.support.wait import WebDriverWait -from pathlib import Path -import undetected_chromedriver as uc +import selenium.webdriver.support.expected_conditions as EC +from selenium.common.exceptions import TimeoutException +import undetected_chromedriver as uc + +logging.basicConfig(level=10) +logger = logging.getLogger('test') -def main(): - - # due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally - - for k,v in os.environ.items(): +for k,v in os.environ.items(): logger.info("%s = %s" % (k,v)) tmp = Path('/tmp').resolve() @@ -28,13 +25,48 @@ def main(): path_list.insert(0, str(item)) os.environ['PATH'] = os.pathsep.join(path_list) break + +driver = uc.Chrome(headless=True) +driver.get('https://www.nowsecure.nl') + +print(driver.current_url) + +try: + WebDriverWait(driver,10).until(EC.visibility_of_element_located(("css selector", "body"))) +except TimeoutException: + pass +print(driver.current_url) +try: + WebDriverWait(driver,10).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) +except TimeoutError: + logging.getLogger().setLevel(20) + print(driver.current_url) + logger.info('trying to save a screenshot via imgur') +# driver.reconnect() + driver.save_screenshot('c:/tmp/screenshot.jpg') + driver.get('https://imgur.com/upload') + driver.find_element('css selector', 'input').send_keys('c:/tmp/screenshot.jpg') + + time.sleep(2) + logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) time.sleep(5) - driver = uc.Chrome(headless=True) - driver.get("https://nowsecure.nl") - WebDriverWait(driver, 15).until( - EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) - driver.quit() +driver.quit() -if __name__ == "__main__": - main() + + +# def main(): + +# # due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally + + +# time.sleep(5) +# driver = uc.Chrome(headless=True) +# driver.get("https://nowsecure.nl") +# WebDriverWait(driver, 15).until( +# EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) +# driver.quit() + + +# if __name__ == "__main__": +# main() From 073c94f6a94f960eb3e2dc01cca0edfe8da31a51 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:32:04 +0200 Subject: [PATCH 38/59] Update test_workflow.py --- example/test_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 5af26c2..d7af464 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -43,9 +43,9 @@ except TimeoutError: print(driver.current_url) logger.info('trying to save a screenshot via imgur') # driver.reconnect() - driver.save_screenshot('c:/tmp/screenshot.jpg') + driver.save_screenshot('/tmp/screenshot.jpg') driver.get('https://imgur.com/upload') - driver.find_element('css selector', 'input').send_keys('c:/tmp/screenshot.jpg') + driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.jpg') time.sleep(2) logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) From 3c3fda228add4acacf7b70a94769339a1eaa4eb4 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:34:28 +0200 Subject: [PATCH 39/59] Update test_workflow.py --- example/test_workflow.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index d7af464..818b4fe 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -2,29 +2,33 @@ import time import logging +import os from selenium.webdriver.support.wait import WebDriverWait import selenium.webdriver.support.expected_conditions as EC from selenium.common.exceptions import TimeoutException import undetected_chromedriver as uc +from pathlib import Path + logging.basicConfig(level=10) logger = logging.getLogger('test') for k,v in os.environ.items(): - logger.info("%s = %s" % (k,v)) - tmp = Path('/tmp').resolve() + logger.info("%s = %s" % (k,v)) + +tmp = Path('/tmp').resolve() - for item in tmp.rglob('**'): +for item in tmp.rglob('**'): - print(item) + print(item) - if item.is_dir(): - if 'chrome-' in item.name: - path_list = os.environ['PATH'].split(os.pathsep) - path_list.insert(0, str(item)) - os.environ['PATH'] = os.pathsep.join(path_list) - break + if item.is_dir(): + if 'chrome-' in item.name: + path_list = os.environ['PATH'].split(os.pathsep) + path_list.insert(0, str(item)) + os.environ['PATH'] = os.pathsep.join(path_list) + break driver = uc.Chrome(headless=True) driver.get('https://www.nowsecure.nl') From 41633403241b5cc27cd5f7732054cc83eec2128e Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:40:27 +0200 Subject: [PATCH 40/59] Update test_workflow.py --- example/test_workflow.py | 90 +++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 818b4fe..63abe22 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -13,64 +13,50 @@ from pathlib import Path logging.basicConfig(level=10) logger = logging.getLogger('test') - -for k,v in os.environ.items(): - logger.info("%s = %s" % (k,v)) - -tmp = Path('/tmp').resolve() - -for item in tmp.rglob('**'): +def main(): + for k,v in os.environ.items(): + logger.info("%s = %s" % (k,v)) + logger.info('==== END ENV ==== ') + tmp = Path('/tmp').resolve() + for item in tmp.rglob('**'): + logger.info('found %s ' % item) - print(item) - - if item.is_dir(): - if 'chrome-' in item.name: - path_list = os.environ['PATH'].split(os.pathsep) - path_list.insert(0, str(item)) - os.environ['PATH'] = os.pathsep.join(path_list) - break + if item.is_dir(): + if 'chrome-' in item.name: + path_list = os.environ['PATH'].split(os.pathsep) + path_list.insert(0, str(item)) + os.environ['PATH'] = os.pathsep.join(path_list) + break -driver = uc.Chrome(headless=True) -driver.get('https://www.nowsecure.nl') + driver = uc.Chrome(headless=True) + driver.get('https://www.nowsecure.nl') -print(driver.current_url) - -try: - WebDriverWait(driver,10).until(EC.visibility_of_element_located(("css selector", "body"))) -except TimeoutException: - pass -print(driver.current_url) -try: - WebDriverWait(driver,10).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) -except TimeoutError: - logging.getLogger().setLevel(20) print(driver.current_url) - logger.info('trying to save a screenshot via imgur') -# driver.reconnect() - driver.save_screenshot('/tmp/screenshot.jpg') - driver.get('https://imgur.com/upload') - driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.jpg') - - time.sleep(2) - logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) - time.sleep(5) -driver.quit() + + try: + WebDriverWait(driver,10).until(EC.visibility_of_element_located(("css selector", "body"))) + except TimeoutException: + pass + print(driver.current_url) + try: + WebDriverWait(driver,10).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) + except TimeoutError: + logging.getLogger().setLevel(20) + print(driver.current_url) + logger.info('trying to save a screenshot via imgur') + # driver.reconnect() + driver.save_screenshot('/tmp/screenshot.jpg') + driver.get('https://imgur.com/upload') + driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.jpg') + + time.sleep(2) + logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) + time.sleep(5) + driver.quit() - -# def main(): - -# # due to the randomneess of the chrome install path on the runner when running action, i have to find it manufally - - -# time.sleep(5) -# driver = uc.Chrome(headless=True) -# driver.get("https://nowsecure.nl") -# WebDriverWait(driver, 15).until( -# EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) -# driver.quit() -# if __name__ == "__main__": -# main() +if __name__ == "__main__": + main() From 64527812684a128aa7eb5e79ad732dbf5df13b00 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:43:50 +0200 Subject: [PATCH 41/59] Update test_workflow.py --- example/test_workflow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 63abe22..e6ed0c5 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -10,7 +10,7 @@ import undetected_chromedriver as uc from pathlib import Path -logging.basicConfig(level=10) +logging.basicConfig(level=20) logger = logging.getLogger('test') def main(): @@ -42,6 +42,7 @@ def main(): WebDriverWait(driver,10).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) except TimeoutError: logging.getLogger().setLevel(20) + driver.reconnect() print(driver.current_url) logger.info('trying to save a screenshot via imgur') # driver.reconnect() From dc56f5b33b7edb8745fd5f75a7421ad79218f831 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:47:52 +0200 Subject: [PATCH 42/59] Update test_workflow.py --- example/test_workflow.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index e6ed0c5..aa61055 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -10,7 +10,7 @@ import undetected_chromedriver as uc from pathlib import Path -logging.basicConfig(level=20) +logging.basicConfig(level=10) logger = logging.getLogger('test') def main(): @@ -28,7 +28,11 @@ def main(): os.environ['PATH'] = os.pathsep.join(path_list) break + + driver = uc.Chrome(headless=True) + logging.getLogger().setLevel(20) + driver.get('https://www.nowsecure.nl') print(driver.current_url) From 04f45c08234504e687f054e731e34918a46854e7 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:53:42 +0200 Subject: [PATCH 43/59] Update test_workflow.py --- example/test_workflow.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/example/test_workflow.py b/example/test_workflow.py index aa61055..4012d6f 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -23,9 +23,12 @@ def main(): if item.is_dir(): if 'chrome-' in item.name: + logger.info('adding %s to PATH' % str(item)) + logger.info('current PATH:' % str(os.environ.get('PATH'))) path_list = os.environ['PATH'].split(os.pathsep) path_list.insert(0, str(item)) os.environ['PATH'] = os.pathsep.join(path_list) + logger.info('new PATH:' % str(os.environ.get('PATH'))) break From c2180df88f336a28654abcf96bf012dfc54e458a Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 01:54:59 +0200 Subject: [PATCH 44/59] Update test_workflow.py --- example/test_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 4012d6f..3c523dd 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -24,11 +24,11 @@ def main(): if item.is_dir(): if 'chrome-' in item.name: logger.info('adding %s to PATH' % str(item)) - logger.info('current PATH:' % str(os.environ.get('PATH'))) + logger.info('current PATH: %s' % str(os.environ.get('PATH'))) path_list = os.environ['PATH'].split(os.pathsep) path_list.insert(0, str(item)) os.environ['PATH'] = os.pathsep.join(path_list) - logger.info('new PATH:' % str(os.environ.get('PATH'))) + logger.info('new PATH %s:' % str(os.environ.get('PATH'))) break From 7ebd4033becdf1f616849d251d99b3562be9ecd1 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 02:01:52 +0200 Subject: [PATCH 45/59] Update test_workflow.py --- example/test_workflow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 3c523dd..c2c9136 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -23,17 +23,19 @@ def main(): if item.is_dir(): if 'chrome-' in item.name: + logger.info('adding %s to PATH' % str(item)) logger.info('current PATH: %s' % str(os.environ.get('PATH'))) path_list = os.environ['PATH'].split(os.pathsep) path_list.insert(0, str(item)) os.environ['PATH'] = os.pathsep.join(path_list) logger.info('new PATH %s:' % str(os.environ.get('PATH'))) + browser_executable_path = str(item / 'chrome') break - driver = uc.Chrome(headless=True) + driver = uc.Chrome(headless=True, browser_executable_path=browser_executable_path) logging.getLogger().setLevel(20) driver.get('https://www.nowsecure.nl') From 618e47de3599e836959e70fa4219797a6d369e18 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 02:06:49 +0200 Subject: [PATCH 46/59] Update test_workflow.py --- example/test_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index c2c9136..e1b5ca7 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -49,7 +49,7 @@ def main(): print(driver.current_url) try: WebDriverWait(driver,10).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) - except TimeoutError: + except TimeoutException: logging.getLogger().setLevel(20) driver.reconnect() print(driver.current_url) From dc7dd12f05e5a0461c4c5f4ab0a8778579bb499b Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 02:10:26 +0200 Subject: [PATCH 47/59] Update test_workflow.py --- example/test_workflow.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index e1b5ca7..9e0d187 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -55,11 +55,12 @@ def main(): print(driver.current_url) logger.info('trying to save a screenshot via imgur') # driver.reconnect() - driver.save_screenshot('/tmp/screenshot.jpg') + driver.save_screenshot('/tmp/screenshot.png') driver.get('https://imgur.com/upload') + driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.jpg') - - time.sleep(2) + + time.sleep(5) logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) time.sleep(5) driver.quit() From f2d1273dc4d7c7f313f762c4d83492b954828c53 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 02:30:50 +0200 Subject: [PATCH 48/59] Update test_workflow.py --- example/test_workflow.py | 62 ++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 9e0d187..4407851 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -13,11 +13,23 @@ from pathlib import Path logging.basicConfig(level=10) logger = logging.getLogger('test') + + + def main(): + + #### + # this block is a dirty helper since + # in the action runner devices serveral chrome versions exists + # and i need to ensure it takes the one which is installed + # by the task. + #### + for k,v in os.environ.items(): logger.info("%s = %s" % (k,v)) logger.info('==== END ENV ==== ') tmp = Path('/tmp').resolve() + for item in tmp.rglob('**'): logger.info('found %s ' % item) @@ -33,37 +45,51 @@ def main(): browser_executable_path = str(item / 'chrome') break + #### + # test really starts here + #3## - driver = uc.Chrome(headless=True, browser_executable_path=browser_executable_path) logging.getLogger().setLevel(20) - driver.get('https://www.nowsecure.nl') - - print(driver.current_url) - + + logger.info('current url %s' % driver.current_url) + try: - WebDriverWait(driver,10).until(EC.visibility_of_element_located(("css selector", "body"))) + WebDriverWait(driver,10).until(EC.title_contains('moment')) except TimeoutException: pass - print(driver.current_url) + + + logger.info('current url %s' % driver.current_url) + try: - WebDriverWait(driver,10).until(EC.text_to_be_present_in_element(("css selector", "main h1"), "OH YEAH, you passed!")) + WebDriverWait(driver,10).until(EC.title_contains('nowSecure')) except TimeoutException: logging.getLogger().setLevel(20) - driver.reconnect() print(driver.current_url) - logger.info('trying to save a screenshot via imgur') + else: + logger.info( "PASSED CLOUDFLARE! ") + + + + logger.info('trying to save a screenshot via imgur') # driver.reconnect() - driver.save_screenshot('/tmp/screenshot.png') - driver.get('https://imgur.com/upload') - - driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.jpg') - - time.sleep(5) - logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) - time.sleep(5) + driver.save_screenshot('/tmp/screenshot.png') + + driver.get('https://imgur.com/upload') + + driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.png') + + time.sleep(1) + logger.info('current url %s' % driver.current_url) + time.sleep(1) + logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) + time.sleep(5) driver.quit() + + + From 0c197a6b9b961f3fa899ff406540a9225a1b8b57 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 02:39:06 +0200 Subject: [PATCH 49/59] Update test_workflow.py --- example/test_workflow.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index 4407851..dbfa23a 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -50,29 +50,32 @@ def main(): #3## driver = uc.Chrome(headless=True, browser_executable_path=browser_executable_path) - logging.getLogger().setLevel(20) + logging.getLogger().setLevel(10) driver.get('https://www.nowsecure.nl') logger.info('current url %s' % driver.current_url) try: - WebDriverWait(driver,10).until(EC.title_contains('moment')) + WebDriverWait(driver,15).until(EC.title_contains('moment')) except TimeoutException: pass + logger.info('current page source:\n%s' % driver.page_source) logger.info('current url %s' % driver.current_url) try: - WebDriverWait(driver,10).until(EC.title_contains('nowSecure')) - except TimeoutException: - logging.getLogger().setLevel(20) + WebDriverWait(driver,15).until(EC.title_contains('nowSecure')) + logger.info('PASSED CLOUDFLARE!') + + except TimeoutException: + logger.info('timeout') print(driver.current_url) - else: - logger.info( "PASSED CLOUDFLARE! ") + + logger.info('current page source:\n%s' % driver.page_source) - + logging.getLogger().setLevel(20) logger.info('trying to save a screenshot via imgur') # driver.reconnect() driver.save_screenshot('/tmp/screenshot.png') From 250ee98bb7098caeb1ffd57af68aadcb764ab0b3 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 02:51:05 +0200 Subject: [PATCH 50/59] Update test_workflow.py --- example/test_workflow.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index dbfa23a..e69c8ac 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -71,24 +71,23 @@ def main(): except TimeoutException: logger.info('timeout') print(driver.current_url) - - logger.info('current page source:\n%s' % driver.page_source) - - logging.getLogger().setLevel(20) + logger.info('current page source:\n%s\n' % driver.page_source) + logger.info('trying to save a screenshot via imgur') - # driver.reconnect() - driver.save_screenshot('/tmp/screenshot.png') + + driver.save_screenshot('/home/runner/work/_temp/screenshot.png') driver.get('https://imgur.com/upload') - driver.find_element('css selector', 'input').send_keys('/tmp/screenshot.png') + driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/screenshot.png') time.sleep(1) logger.info('current url %s' % driver.current_url) time.sleep(1) - logger.info('A SCREENSHOT IS SAVED ON %s' % driver.current_url) + logger.info(f'A SCREENSHOT IS SAVED ON {driver.current_url} <<< if this ends onlywith /upload than it failed. after all we are running from a datacenter no human being would ever surf the internet from ') time.sleep(5) + driver.quit() From b59d9fa47ea6905b7d63ef548ce829d23936eb66 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 03:07:09 +0200 Subject: [PATCH 51/59] Update test_workflow.py --- example/test_workflow.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/example/test_workflow.py b/example/test_workflow.py index e69c8ac..eb2bb79 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -49,8 +49,15 @@ def main(): # test really starts here #3## + driver = uc.Chrome(headless=True, browser_executable_path=browser_executable_path) logging.getLogger().setLevel(10) + + driver.get('chrome://version') + + driver.save_screenshot('/home/runner/work/_temp/versioninfo.png') + + driver.get('https://www.nowsecure.nl') logger.info('current url %s' % driver.current_url) From b6618390f84e89b4dc9016d652b29f602a8fe579 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 03:08:47 +0200 Subject: [PATCH 52/59] Update workflow.yml --- .github/workflows/workflow.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index cafc843..c658f17 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -37,3 +37,15 @@ jobs: - name: run example run: | python example/test_workflow.py + - name: Upload a Build Artifact + uses: actions/upload-artifact@v3.1.2 + with: + # Artifact name + name: screenshots + # A file, directory or wildcard pattern that describes what to upload + path: /home/runner/work/_temp/*.png + + + + + From 6d352fbfa6494495022757e6bf08b2e87683ea58 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 03:46:07 +0200 Subject: [PATCH 53/59] Update test_workflow.py --- example/test_workflow.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index eb2bb79..a42fd71 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -13,9 +13,6 @@ from pathlib import Path logging.basicConfig(level=10) logger = logging.getLogger('test') - - - def main(): #### @@ -57,6 +54,22 @@ def main(): driver.save_screenshot('/home/runner/work/_temp/versioninfo.png') + driver.get('chrome://settings/help') + driver.save_screenshot('/home/runner/work/_temp/helpinfo.png') + + driver.get('https://www.google.com') + driver.save_screenshot('/home/runner/work/_temp/google.com.png') + + driver.get('https://bot.incolumitas.com/#botChallenge') + + pdfdata = driver.execute_cdp_cmd('Page.printToPDF', {}) + if pdfdata: + if 'data' in pdfdata: + data = pdfdata['data'] + import base64 + buffer = base64.b64decode(data) + with open('/home/runner/work/_temp/report.pdf', 'w+b') as f: + f.write(buffer) driver.get('https://www.nowsecure.nl') @@ -81,19 +94,19 @@ def main(): logger.info('current page source:\n%s\n' % driver.page_source) - logger.info('trying to save a screenshot via imgur') + #logger.info('trying to save a screenshot via imgur') - driver.save_screenshot('/home/runner/work/_temp/screenshot.png') + driver.save_screenshot('/home/runner/work/_temp/nowsecure.png') - driver.get('https://imgur.com/upload') + #driver.get('https://imgur.com/upload') - driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/screenshot.png') + driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/nowsecure.png') - time.sleep(1) - logger.info('current url %s' % driver.current_url) - time.sleep(1) - logger.info(f'A SCREENSHOT IS SAVED ON {driver.current_url} <<< if this ends onlywith /upload than it failed. after all we are running from a datacenter no human being would ever surf the internet from ') - time.sleep(5) + #time.sleep(1) + #logger.info('current url %s' % driver.current_url) + #time.sleep(1) + #logger.info(f'A SCREENSHOT IS SAVED ON {driver.current_url} <<< if this ends onlywith /upload than it failed. after all we are running from a datacenter no human being would ever surf the internet from ') + #time.sleep(5) driver.quit() From 38ac0c1529f9a9e8fe3233454843871afe952f9e Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 03:48:29 +0200 Subject: [PATCH 54/59] Update test_workflow.py --- example/test_workflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/test_workflow.py b/example/test_workflow.py index a42fd71..e4d3914 100644 --- a/example/test_workflow.py +++ b/example/test_workflow.py @@ -95,12 +95,12 @@ def main(): logger.info('current page source:\n%s\n' % driver.page_source) #logger.info('trying to save a screenshot via imgur') - + driver.save_screenshot('/home/runner/work/_temp/nowsecure.png') #driver.get('https://imgur.com/upload') - driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/nowsecure.png') + #driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/nowsecure.png') #time.sleep(1) #logger.info('current url %s' % driver.current_url) From 3611632182209ab0c512c70fcfc2f5cf678b3851 Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 03:50:07 +0200 Subject: [PATCH 55/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index c658f17..2f8a23c 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -43,7 +43,7 @@ jobs: # Artifact name name: screenshots # A file, directory or wildcard pattern that describes what to upload - path: /home/runner/work/_temp/*.png + path: /home/runner/work/_temp/*.[pngdf] From ebd3508a03fc8c0ede8294cdd232e61ba84c4c5c Mon Sep 17 00:00:00 2001 From: Leon Date: Sat, 3 Jun 2023 03:52:04 +0200 Subject: [PATCH 56/59] Update workflow.yml --- .github/workflows/workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 2f8a23c..c50660e 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -43,7 +43,7 @@ jobs: # Artifact name name: screenshots # A file, directory or wildcard pattern that describes what to upload - path: /home/runner/work/_temp/*.[pngdf] + path: /home/runner/work/_temp/*p* From 2b035b4ea1d88224abd570b187f16094663462a3 Mon Sep 17 00:00:00 2001 From: UltrafunkAmsterdam Date: Mon, 12 Jun 2023 11:30:57 +0200 Subject: [PATCH 57/59] compatible with selenium 4.10. also removed service_args, service_creationflags, service_log_path from constructor. added find_elements_recursive helper function --- setup.py | 2 +- undetected_chromedriver/__init__.py | 82 +++++++++++++++++++---------- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/setup.py b/setup.py index 12b1d87..aaba3a7 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ setup( version=version, packages=["undetected_chromedriver"], install_requires=[ - "selenium>=4.0.0", + "selenium>=4.9.0", "requests", "websockets", ], diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 0ddd8d0..2139c58 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.4.7" +__version__ = "3.5.0" import json import logging @@ -33,7 +33,7 @@ from weakref import finalize import selenium.webdriver.chrome.service import selenium.webdriver.chrome.webdriver from selenium.webdriver.common.by import By -import selenium.webdriver.common.service +import selenium.webdriver.chromium.service import selenium.webdriver.remote.command import selenium.webdriver.remote.webdriver @@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): browser_executable_path=None, port=0, enable_cdp_events=False, - service_args=None, - service_creationflags=None, + # service_args=None, + # service_creationflags=None, desired_capabilities=None, advanced_elements=False, - service_log_path=None, + # service_log_path=None, keep_alive=True, log_level=0, headless=False, @@ -384,11 +384,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if headless or options.headless: #workaround until a better checking is found - options.add_argument("--headless=new") - #if self.patcher.version_main < 108: - # options.add_argument("--headless=chrome") - #elif self.patcher.version_main >= 108: - + try: + if self.patcher.version_main < 108: + options.add_argument("--headless=chrome") + elif self.patcher.version_main >= 108: + options.add_argument("--headless=new") + except: + logger.warning("could not detect version_main." + "therefore, we are assuming it is chrome 108 or higher") + options.add_argument("--headless=new") options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") @@ -441,26 +445,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): ) self.browser_pid = browser.pid - if service_creationflags: - service = selenium.webdriver.common.service.Service( - self.patcher.executable_path, port, service_args, service_log_path - ) - for attr_name in ("creationflags", "creation_flags"): - if hasattr(service, attr_name): - setattr(service, attr_name, service_creationflags) - break - else: - service = None + + service = selenium.webdriver.chromium.service.ChromiumService( + self.patcher.executable_path + ) super(Chrome, self).__init__( - executable_path=self.patcher.executable_path, - port=port, + service=service, options=options, - service_args=service_args, - desired_capabilities=desired_capabilities, - service_log_path=service_log_path, keep_alive=keep_alive, - service=service, # needed or the service will be re-created ) self.reactor = None @@ -716,10 +709,45 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if not capabilities: capabilities = self.options.to_capabilities() super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session( - capabilities, browser_profile + capabilities ) # super(Chrome, self).start_session(capabilities, browser_profile) + def find_elements_recursive(self, by, value): + """ + find elements in all frames + this is a generator function, which is needed + since if it would return a list of elements, they + will be stale on arrival. + using generator, when the element is returned we are in the correct frame + to use it directly + Args: + by: By + value: str + Returns: Generator[webelement.WebElement] + """ + def search_frame(f=None): + if not f: + # ensure we are on main content frame + self.switch_to.default_content() + else: + self.switch_to.frame(f) + for elem in self.find_elements(by, value): + yield elem + # switch back to main content, otherwise we will get StaleElementReferenceException + self.switch_to.default_content() + + # search root frame + for elem in search_frame(): + yield elem + # get iframes + frames = self.find_elements('css selector', 'iframe') + + # search per frame + for f in frames: + for elem in search_frame(f): + yield elem + def quit(self): try: self.service.process.kill() From bc1445aac720eda300ffba70e26bbf95eef4b6d1 Mon Sep 17 00:00:00 2001 From: Leon Date: Mon, 12 Jun 2023 12:22:17 +0200 Subject: [PATCH 58/59] 3.5.0 --- README.md | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d405c9c..2c05acd 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,21 @@ Optimized Selenium Chromedriver patch which does not trigger anti-bot services l Automatically downloads the driver binary and patches it. * Tested until current chrome beta versions -* Works also on Brave Browser and many other Chromium based browsers, some tweaking +* Works also on Brave Browser and many other Chromium based browsers, but you need to know what you're doing and needs some tweaking. * Python 3.6++** +## Installation ## + +``` +pip install undetected-chromedriver +``` +or , if you're feeling adventurous, install directly via github + +``` +pip install git+https://www.github.com/ultrafunkamsterdam/undetected-chromedriver@master # replace @master with @branchname for other branches +``` + + - - - ## Message for all ## I will be putting limits on the issue tracker. It has beeen abused too long. @@ -17,6 +29,13 @@ any good news? Yes, i've opened [Undetected-Discussions](https://github.com/ultrafunkamsterdam/undetected-chromedriver/discussions) which i think will help us better in the long run. - - - +## 3.5.0 ## +- selenium 4.10 caused some issues. 3.5.0 is compatible and has selenium 4.9 or above pinned. I can't support <4.9 any longer. +- Removed some kwargs from constructor: service_args, service_creationflags, service_log_path. +- added find_elements_recursive generator function. which is more of a convenience funtion as lots of websites seem to serve different content from different frames, making it hard + to use find_elements + + ## 3.4.5 ## - What a week. Had the recent advancedments in Automation-Detection algorithms pwned (so i thought) with 3.4.0, but apparently, for some OS-es this caused an error when interacting with elements. Had to revert back using a different method, fix bugs, and now eventually was still able to stick to the initial idea (+ fixing bugs) - Update to chrome 110 caused another surprise, this time for HEADLESS users. @@ -143,11 +162,7 @@ the solution is simple: **newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255** -## Installation ## -``` -pip install undetected-chromedriver -``` ## Usage ## From 1c704a71cf4f29181a59ecf19ddff32f1b4fbfc0 Mon Sep 17 00:00:00 2001 From: Leon Date: Mon, 12 Jun 2023 13:46:59 +0200 Subject: [PATCH 59/59] update README to show importance of IP --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 2c05acd..61b17de 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Automatically downloads the driver binary and patches it. * Works also on Brave Browser and many other Chromium based browsers, but you need to know what you're doing and needs some tweaking. * Python 3.6++** + ## Installation ## ``` @@ -29,6 +30,26 @@ any good news? Yes, i've opened [Undetected-Discussions](https://github.com/ultrafunkamsterdam/undetected-chromedriver/discussions) which i think will help us better in the long run. - - - +What this is not +--- +**THIS PACKAGE DOES NOT, and i repeat DOES NOT hide your IP address, so when running from a datacenter (even smaller ones), chances are large you will not pass! Also, if your ip reputation at home is low, you won't pass!** + +Running following code from home , and from a datacenter. +```python +import undetected_chromedriver as uc +driver = uc.Chrome(headless=True,use_subprocess=False) +driver.get('https://nowsecure.nl') +driver.save_screenshot('nowsecure.png') +``` +
+ + +
+ + + + + ## 3.5.0 ## - selenium 4.10 caused some issues. 3.5.0 is compatible and has selenium 4.9 or above pinned. I can't support <4.9 any longer. - Removed some kwargs from constructor: service_args, service_creationflags, service_log_path.