> added WebElement.click_safe() method, in case you get detected

after clicking a link > added WebElement.children(self, tag=None, recursive=False) to easily get/find child nodes > added example.py where i can point people at when asking silly questions (no, its actually quite cool, everyone should see it) > some refactoring
2022-11-28 23:40:41 +01:00 · 2022-11-28 23:40:41 +01:00 · 25bca31f35
parent 33d2a72848
commit 25bca31f35
3 changed files with 212 additions and 29 deletions
--- a/undetected_chromedriver/init.py
+++ b/undetected_chromedriver/init.py
@ -2,6 +2,11 @@
 from __future__ import annotations
 import subprocess
 from typing import List
 from typing import Optional
 from selenium.webdriver.common.by import By
 """
@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
 """
-
+__version__ = "3.2.0"
 __version__ = "3.1.7"
 import inspect
 import json
@ -30,15 +33,12 @@ import re
 import shutil
 import sys
 import tempfile
 import threading
 import time
 import selenium.webdriver.chrome.service
 import selenium.webdriver.chrome.webdriver
 import selenium.webdriver.common.service
 import selenium.webdriver.remote.webdriver
 from selenium.webdriver.chrome.service import Service
 import selenium.webdriver.remote.command
 from .cdp import CDP
@ -46,6 +46,8 @@ from .dprocess import start_detached
 from .options import ChromeOptions
 from .patcher import IS_POSIX, Patcher
 from .reactor import Reactor
 from .webelement import WebElement, UCWebElement
 __all__ = (
    "Chrome",
@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        # see if a custom user profile is specified in options
        for arg in options.arguments:
            if "lang" in arg:
                m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
                try:
@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
                    )
        if not user_data_dir:
            # backward compatiblity
            # check if an old uc.ChromeOptions is used, and extract the user data dir
@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            self.browser_pid = browser.pid
        if service_creationflags:
-            service = Service(
+            service = selenium.webdriver.common.service.Service(
                patcher.executable_path, port, service_args, service_log_path
            )
            for attr_name in ("creationflags", "creation_flags"):
@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            self.reactor = reactor
        if advanced_elements:
-            from .webelement import WebElement
+            self._web_element_cls = UCWebElement
-
+        else:
            self._web_element_cls = WebElement
        if options.headless:
            self._configure_headless()
    def __getattribute__(self, item):
        if not super().__getattribute__("debug"):
            return super().__getattribute__(item)
        else:
@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            return original
    def _configure_headless(self):
        orig_get = self.get
        logger.info("setting properties for headless")
@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        # super(Chrome, self).start_session(capabilities, browser_profile)
    def quit(self):
-        logger.debug("closing webdriver")
+        try:
        if hasattr(self, "service") and getattr(self.service, "process", None):
            self.service.process.kill()
            logger.debug("webdriver process ended")
        except (AttributeError, RuntimeError, OSError):
            pass
        try:
            if self.reactor and isinstance(self.reactor, Reactor):
                logger.debug("shutting down reactor")
            self.reactor.event.set()
-        except Exception:  # noqa
+            logger.debug("shutting down reactor")
        except AttributeError:
            pass
        try:
            logger.debug("killing browser")
            os.kill(self.browser_pid, 15)
-
+            logger.debug("gracefully closed browser")
-        except TimeoutError as e:
+        except Exception as e:  # noqa
            logger.debug(e, exc_info=True)
        except Exception:  # noqa
            pass
        if (
            hasattr(self, "keep_user_data_dir")
            and hasattr(self, "user_data_dir")
@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        ):
            for _ in range(5):
                try:
                    shutil.rmtree(self.user_data_dir, ignore_errors=False)
                except FileNotFoundError:
                    pass
--- a/undetected_chromedriver/example/example.py
+++ b/undetected_chromedriver/example/example.py
@ -0,0 +1,142 @@
 import time
 from selenium.webdriver.remote.webdriver import By
 import selenium.webdriver.support.expected_conditions as EC  # noqa
 from selenium.webdriver.support.wait import WebDriverWait
 import undetected_chromedriver as uc
 driver = uc.Chrome()
 driver.get("https://www.google.com")
 # accept the terms
 driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
 inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
 inp_search.send_keys(
    "site:stackoverflow.com undetected chromedriver\n"
 )  # \n as equivalent of ENTER key
 results_container = WebDriverWait(driver, timeout=3).until(
    EC.presence_of_element_located((By.ID, "rso"))
 )
 driver.execute_script(
    """
    let container = document.querySelector('#rso');
    let el = document.createElement('div');
    el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em';
    el.textContent = "these are excluded from offical support ;)";
    container.insertAdjacentElement('afterBegin', el);
 """
 )
 time.sleep(2)
 for item in results_container.children("a", recursive=True):
    print(item)
 # switching default WebElement for uc.WebElement and do it again
 driver._web_element_cls = uc.UCWebElement
 print("switched to use uc.WebElement. which is more descriptive")
 results_container = driver.find_element(By.ID, "rso")
 # gets only direct children of results_container
 # children is a method unique for undetected chromedriver. it is
 # incompatible when you use regular chromedriver
 for item in results_container.children():
    print(item.tag_name)
    for grandchild in item.children(recursive=True):
        print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
 print("lets go to image search")
 inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
 inp_search.clear()
 inp_search.send_keys("hot girls\n")  # \n as equivalent of ENTER
 body = driver.find_element(By.TAG_NAME, "body")
 # inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
 # inp_search.send_keys("hot nude girls")  # \n as equivalent of ENTER
 body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
 # you can't reuse the body from above, because we are on another page right now
 # so the body above is not attached anymore
 image_search_body = WebDriverWait(driver, 5).until(
    EC.presence_of_element_located((By.TAG_NAME, "body"))
 )
 # gets all images and prints the src
 print("getting image data, hold on...")
 for item in image_search_body.children("img", recursive=True):
    print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
 USELESS_SITES = [
    "https://www.trumpdonald.org",
    "https://www.isitchristmas.com",
    "https://isnickelbacktheworstbandever.tumblr.com",
    "https://www.isthatcherdeadyet.co.uk",
    "https://whitehouse.gov",
    "https://www.nsa.gov",
    "https://kimjongillookingatthings.tumblr.com",
    "https://instantrimshot.com",
    "https://www.nyan.cat",
    "https://twitter.com",
 ]
 print("opening 9 additinal windows and control them")
 time.sleep(1)  # never use this. this is for demonstration purposes only
 for _ in range(9):
    driver.window_new()
 print("now we got 10 windows")
 time.sleep(1)
 print("using the new windows to open 9 other useless sites")
 time.sleep(1)  # never use this. this is for demonstration purposes only
 for idx in range(1, 10):
    # skip the first handle which is our original window
    print("opening ", USELESS_SITES[idx])
    driver.switch_to.window(driver.window_handles[idx])
    driver.get(USELESS_SITES[idx])
 for handle in driver.window_handles[1:]:
    driver.switch_to.window(handle)
    print("look. %s is working" % driver.current_url)
    time.sleep(1)  # never use this. it is here only so you can follow along
 print("close windows (including the initial one!), but keep the last new opened window")
 time.sleep(4)  # never use this. wait until nowsecure passed the bot checks
 for handle in driver.window_handles[:-1]:
    driver.switch_to.window(handle)
    print("look. %s is closing" % driver.current_url)
    time.sleep(1)
    driver.close()
 # attach to the last open window
 driver.switch_to.window(driver.window_handles[0])
 print("now we only got ", driver.current_url, "left")
 time.sleep(1)
 driver.get("https://www.nowsecure.nl")
 time.sleep(5)
 print("lets go to UC project page")
 driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
 input("press a key if you have RTFM")
 driver.quit()
--- a/undetected_chromedriver/webelement.py
+++ b/undetected_chromedriver/webelement.py
@ -1,7 +1,28 @@
 from selenium.webdriver.common.by import By
 import selenium.webdriver.remote.webelement
 class WebElement(selenium.webdriver.remote.webelement.WebElement):
    def click_safe(self):
        super().click()
        self._parent.reconnect(0.1)
    def children(
        self, tag=None, recursive=False
    ) -> list[selenium.webdriver.remote.webelement.WebElement]:
        """
        returns direct child elements of current element
        :param tag: str,  if supplied, returns <tag> nodes only
        """
        script = "return [... arguments[0].children]"
        if tag:
            script += ".filter( node => node.tagName === '%s')" % tag.upper()
        if recursive:
            return _recursive_children(self, tag)
        return self._parent.execute_script(script, self)
 class UCWebElement(WebElement):
    """
    Custom WebElement class which makes it easier to view elements when
    working in an interactive environment.
@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
    """
    def __init__(self, parent, id_):
        super().__init__(parent, id_)
        self._attrs = None
    @property
    def attrs(self):
-        if not hasattr(self, "_attrs"):
+        if not self._attrs:
            self._attrs = self._parent.execute_script(
                """
                var items = {}; 
@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
        if strattrs:
            strattrs = " " + strattrs
        return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
 def _recursive_children(element, tag: str = None, _results=None):
    """
    returns all children of <element> recursively
    :param element: `WebElement` object.
            find children below this <element>
    :param tag: str = None.
            if provided, return only <tag> elements. example: 'a', or 'img'
    :param _results: do not use!
    """
    results = _results or set()
    for element in element.children():
        if tag:
            if element.tag_name == tag:
                results.add(element)
        else:
            results.add(element)
        results |= _recursive_children(element, tag, results)
    return results