diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml new file mode 100644 index 0000000..c50660e --- /dev/null +++ b/.github/workflows/workflow.yml @@ -0,0 +1,51 @@ + + +name: Python package + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10","3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Setup Chrome + uses: browser-actions/setup-chrome@v1.2.0 + with: + chrome-version: stable + - name: set chrome in path + run: | + echo "/opt/hostedtoolcache/chromium/stable/x64" >> $GITHUB_PATH + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install package + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; else pip install -U . ; fi + - name: run example + run: | + python example/test_workflow.py + - name: Upload a Build Artifact + uses: actions/upload-artifact@v3.1.2 + with: + # Artifact name + name: screenshots + # A file, directory or wildcard pattern that describes what to upload + path: /home/runner/work/_temp/*p* + + + + + diff --git a/README.md b/README.md index d405c9c..61b17de 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,22 @@ Optimized Selenium Chromedriver patch which does not trigger anti-bot services l Automatically downloads the driver binary and patches it. * Tested until current chrome beta versions -* Works also on Brave Browser and many other Chromium based browsers, some tweaking +* Works also on Brave Browser and many other Chromium based browsers, but you need to know what you're doing and needs some tweaking. * Python 3.6++** + +## Installation ## + +``` +pip install undetected-chromedriver +``` +or , if you're feeling adventurous, install directly via github + +``` +pip install git+https://www.github.com/ultrafunkamsterdam/undetected-chromedriver@master # replace @master with @branchname for other branches +``` + + - - - ## Message for all ## I will be putting limits on the issue tracker. It has beeen abused too long. @@ -17,6 +30,33 @@ any good news? Yes, i've opened [Undetected-Discussions](https://github.com/ultrafunkamsterdam/undetected-chromedriver/discussions) which i think will help us better in the long run. - - - +What this is not +--- +**THIS PACKAGE DOES NOT, and i repeat DOES NOT hide your IP address, so when running from a datacenter (even smaller ones), chances are large you will not pass! Also, if your ip reputation at home is low, you won't pass!** + +Running following code from home , and from a datacenter. +```python +import undetected_chromedriver as uc +driver = uc.Chrome(headless=True,use_subprocess=False) +driver.get('https://nowsecure.nl') +driver.save_screenshot('nowsecure.png') +``` +
+ + +
+ + + + + +## 3.5.0 ## +- selenium 4.10 caused some issues. 3.5.0 is compatible and has selenium 4.9 or above pinned. I can't support <4.9 any longer. +- Removed some kwargs from constructor: service_args, service_creationflags, service_log_path. +- added find_elements_recursive generator function. which is more of a convenience funtion as lots of websites seem to serve different content from different frames, making it hard + to use find_elements + + ## 3.4.5 ## - What a week. Had the recent advancedments in Automation-Detection algorithms pwned (so i thought) with 3.4.0, but apparently, for some OS-es this caused an error when interacting with elements. Had to revert back using a different method, fix bugs, and now eventually was still able to stick to the initial idea (+ fixing bugs) - Update to chrome 110 caused another surprise, this time for HEADLESS users. @@ -143,11 +183,7 @@ the solution is simple: **newsflash: https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/255** -## Installation ## -``` -pip install undetected-chromedriver -``` ## Usage ## diff --git a/example/example.py b/example/example.py index 622975a..48fde5e 100644 --- a/example/example.py +++ b/example/example.py @@ -1,10 +1,13 @@ import time +import logging +logging.basicConfig(level=10) from selenium.common.exceptions import WebDriverException from selenium.webdriver.remote.webdriver import By import selenium.webdriver.support.expected_conditions as EC # noqa from selenium.webdriver.support.wait import WebDriverWait + import undetected_chromedriver as uc @@ -164,7 +167,8 @@ def main(args=None): print("lets go to UC project page") driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") - input("press a key if you have RTFM") + + sleep(2) driver.quit() diff --git a/example/test_workflow.py b/example/test_workflow.py new file mode 100644 index 0000000..e4d3914 --- /dev/null +++ b/example/test_workflow.py @@ -0,0 +1,121 @@ +# coding: utf-8 + +import time +import logging +import os +from selenium.webdriver.support.wait import WebDriverWait +import selenium.webdriver.support.expected_conditions as EC +from selenium.common.exceptions import TimeoutException +import undetected_chromedriver as uc +from pathlib import Path + + +logging.basicConfig(level=10) +logger = logging.getLogger('test') + +def main(): + + #### + # this block is a dirty helper since + # in the action runner devices serveral chrome versions exists + # and i need to ensure it takes the one which is installed + # by the task. + #### + + for k,v in os.environ.items(): + logger.info("%s = %s" % (k,v)) + logger.info('==== END ENV ==== ') + tmp = Path('/tmp').resolve() + + for item in tmp.rglob('**'): + logger.info('found %s ' % item) + + if item.is_dir(): + if 'chrome-' in item.name: + + logger.info('adding %s to PATH' % str(item)) + logger.info('current PATH: %s' % str(os.environ.get('PATH'))) + path_list = os.environ['PATH'].split(os.pathsep) + path_list.insert(0, str(item)) + os.environ['PATH'] = os.pathsep.join(path_list) + logger.info('new PATH %s:' % str(os.environ.get('PATH'))) + browser_executable_path = str(item / 'chrome') + break + + #### + # test really starts here + #3## + + + driver = uc.Chrome(headless=True, browser_executable_path=browser_executable_path) + logging.getLogger().setLevel(10) + + driver.get('chrome://version') + + driver.save_screenshot('/home/runner/work/_temp/versioninfo.png') + + driver.get('chrome://settings/help') + driver.save_screenshot('/home/runner/work/_temp/helpinfo.png') + + driver.get('https://www.google.com') + driver.save_screenshot('/home/runner/work/_temp/google.com.png') + + driver.get('https://bot.incolumitas.com/#botChallenge') + + pdfdata = driver.execute_cdp_cmd('Page.printToPDF', {}) + if pdfdata: + if 'data' in pdfdata: + data = pdfdata['data'] + import base64 + buffer = base64.b64decode(data) + with open('/home/runner/work/_temp/report.pdf', 'w+b') as f: + f.write(buffer) + + driver.get('https://www.nowsecure.nl') + + logger.info('current url %s' % driver.current_url) + + try: + WebDriverWait(driver,15).until(EC.title_contains('moment')) + except TimeoutException: + pass + + logger.info('current page source:\n%s' % driver.page_source) + + logger.info('current url %s' % driver.current_url) + + try: + WebDriverWait(driver,15).until(EC.title_contains('nowSecure')) + logger.info('PASSED CLOUDFLARE!') + + except TimeoutException: + logger.info('timeout') + print(driver.current_url) + + logger.info('current page source:\n%s\n' % driver.page_source) + + #logger.info('trying to save a screenshot via imgur') + + driver.save_screenshot('/home/runner/work/_temp/nowsecure.png') + + #driver.get('https://imgur.com/upload') + + #driver.find_element('css selector', 'input').send_keys('/home/runner/work/_temp/nowsecure.png') + + #time.sleep(1) + #logger.info('current url %s' % driver.current_url) + #time.sleep(1) + #logger.info(f'A SCREENSHOT IS SAVED ON {driver.current_url} <<< if this ends onlywith /upload than it failed. after all we are running from a datacenter no human being would ever surf the internet from ') + #time.sleep(5) + + driver.quit() + + + + + + + + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py index 12b1d87..aaba3a7 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ setup( version=version, packages=["undetected_chromedriver"], install_requires=[ - "selenium>=4.0.0", + "selenium>=4.9.0", "requests", "websockets", ], diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 08146f6..06359a6 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -17,7 +17,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.4.7" +__version__ = "3.5.0" import json import logging @@ -33,7 +33,7 @@ from weakref import finalize import selenium.webdriver.chrome.service import selenium.webdriver.chrome.webdriver from selenium.webdriver.common.by import By -import selenium.webdriver.common.service +import selenium.webdriver.chromium.service import selenium.webdriver.remote.command import selenium.webdriver.remote.webdriver @@ -109,11 +109,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): browser_executable_path=None, port=0, enable_cdp_events=False, - service_args=None, - service_creationflags=None, + # service_args=None, + # service_creationflags=None, desired_capabilities=None, advanced_elements=False, - service_log_path=None, + # service_log_path=None, keep_alive=True, log_level=0, headless=False, @@ -383,9 +383,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): options.arguments.extend(["--no-sandbox", "--test-type"]) if headless or options.headless: - if self.patcher.version_main < 108: - options.add_argument("--headless=chrome") - elif self.patcher.version_main >= 108: + #workaround until a better checking is found + try: + if self.patcher.version_main < 108: + options.add_argument("--headless=chrome") + elif self.patcher.version_main >= 108: + options.add_argument("--headless=new") + except: + logger.warning("could not detect version_main." + "therefore, we are assuming it is chrome 108 or higher") options.add_argument("--headless=new") options.add_argument("--window-size=1920,1080") @@ -439,26 +445,15 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): ) self.browser_pid = browser.pid - if service_creationflags: - service = selenium.webdriver.common.service.Service( - self.patcher.executable_path, port, service_args, service_log_path - ) - for attr_name in ("creationflags", "creation_flags"): - if hasattr(service, attr_name): - setattr(service, attr_name, service_creationflags) - break - else: - service = None + + service = selenium.webdriver.chromium.service.ChromiumService( + self.patcher.executable_path + ) super(Chrome, self).__init__( - executable_path=self.patcher.executable_path, - port=port, + service=service, options=options, - service_args=service_args, - desired_capabilities=desired_capabilities, - service_log_path=service_log_path, keep_alive=keep_alive, - service=service, # needed or the service will be re-created ) self.reactor = None @@ -714,10 +709,45 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if not capabilities: capabilities = self.options.to_capabilities() super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session( - capabilities, browser_profile + capabilities ) # super(Chrome, self).start_session(capabilities, browser_profile) + def find_elements_recursive(self, by, value): + """ + find elements in all frames + this is a generator function, which is needed + since if it would return a list of elements, they + will be stale on arrival. + using generator, when the element is returned we are in the correct frame + to use it directly + Args: + by: By + value: str + Returns: Generator[webelement.WebElement] + """ + def search_frame(f=None): + if not f: + # ensure we are on main content frame + self.switch_to.default_content() + else: + self.switch_to.frame(f) + for elem in self.find_elements(by, value): + yield elem + # switch back to main content, otherwise we will get StaleElementReferenceException + self.switch_to.default_content() + + # search root frame + for elem in search_frame(): + yield elem + # get iframes + frames = self.find_elements('css selector', 'iframe') + + # search per frame + for f in frames: + for elem in search_frame(f): + yield elem + def quit(self): try: self.service.process.kill() @@ -733,7 +763,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): os.kill(self.browser_pid, 15) logger.debug("gracefully closed browser") except Exception as e: # noqa - logger.debug(e, exc_info=True) + pass if ( hasattr(self, "keep_user_data_dir") and hasattr(self, "user_data_dir") @@ -852,5 +882,7 @@ def find_chrome_executable(): ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: + logger.debug('checking if %s exists and is executable' % candidate) if os.path.exists(candidate) and os.access(candidate, os.X_OK): + logger.debug('found! using %s' % candidate) return os.path.normpath(candidate)