> added WebElement.click_safe() method, in case you get detected

after clicking a link > added WebElement.children(self, tag=None, recursive=False) to easily get/find child nodes > added example.py where i can point people at when asking silly questions (no, its actually quite cool, everyone should see it) > some refactoring
2022-11-28 23:40:41 +01:00 · 2022-11-28 23:40:41 +01:00 · 25bca31f35
parent 33d2a72848
commit 25bca31f35
3 changed files with 212 additions and 29 deletions
--- a/undetected_chromedriver/init.py
+++ b/undetected_chromedriver/init.py
@ -2,6 +2,11 @@
 from __future__ import annotations

 import subprocess
+from typing import List
+from typing import Optional
+
+from selenium.webdriver.common.by import By
+

 """

@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)

 """

-
-__version__ = "3.1.7"
-
+__version__ = "3.2.0"

 import inspect
 import json
@ -30,15 +33,12 @@ import re
 import shutil
 import sys
 import tempfile
-import threading
 import time

 import selenium.webdriver.chrome.service
 import selenium.webdriver.chrome.webdriver
 import selenium.webdriver.common.service
 import selenium.webdriver.remote.webdriver
-
-from selenium.webdriver.chrome.service import Service
 import selenium.webdriver.remote.command

 from .cdp import CDP
@ -46,6 +46,8 @@ from .dprocess import start_detached
 from .options import ChromeOptions
 from .patcher import IS_POSIX, Patcher
 from .reactor import Reactor
+from .webelement import WebElement, UCWebElement
+

 __all__ = (
    "Chrome",
@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):

        # see if a custom user profile is specified in options
        for arg in options.arguments:
-
            if "lang" in arg:
                m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
                try:
@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
                    )

        if not user_data_dir:
-
            # backward compatiblity
            # check if an old uc.ChromeOptions is used, and extract the user data dir

@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            self.browser_pid = browser.pid

        if service_creationflags:
-            service = Service(
+            service = selenium.webdriver.common.service.Service(
                patcher.executable_path, port, service_args, service_log_path
            )
            for attr_name in ("creationflags", "creation_flags"):
@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            self.reactor = reactor

        if advanced_elements:
-            from .webelement import WebElement
-
+            self._web_element_cls = UCWebElement
+        else:
            self._web_element_cls = WebElement

        if options.headless:
            self._configure_headless()

    def __getattribute__(self, item):
-
        if not super().__getattribute__("debug"):
            return super().__getattribute__(item)
        else:
@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
            return original

    def _configure_headless(self):
-
        orig_get = self.get
        logger.info("setting properties for headless")

@ -641,7 +639,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
                "source": """
                    let objectToInspect = window,
                        result = [];
-                    while(objectToInspect !== null) 
+                    while(objectToInspect !== null)
                    { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
                      objectToInspect = Object.getPrototypeOf(objectToInspect); }
                    result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig)
@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        # super(Chrome, self).start_session(capabilities, browser_profile)

    def quit(self):
-        logger.debug("closing webdriver")
-        if hasattr(self, "service") and getattr(self.service, "process", None):
+        try:
            self.service.process.kill()
-        try:
-            if self.reactor and isinstance(self.reactor, Reactor):
-                logger.debug("shutting down reactor")
-                self.reactor.event.set()
-        except Exception:  # noqa
+            logger.debug("webdriver process ended")
+        except (AttributeError, RuntimeError, OSError):
+            pass
+        try:
+            self.reactor.event.set()
+            logger.debug("shutting down reactor")
+        except AttributeError:
            pass
        try:
-            logger.debug("killing browser")
            os.kill(self.browser_pid, 15)
-
-        except TimeoutError as e:
+            logger.debug("gracefully closed browser")
+        except Exception as e:  # noqa
            logger.debug(e, exc_info=True)
-        except Exception:  # noqa
-            pass
-
        if (
            hasattr(self, "keep_user_data_dir")
            and hasattr(self, "user_data_dir")
@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
        ):
            for _ in range(5):
                try:
-
                    shutil.rmtree(self.user_data_dir, ignore_errors=False)
                except FileNotFoundError:
                    pass
--- a/undetected_chromedriver/example/example.py
+++ b/undetected_chromedriver/example/example.py
@ -0,0 +1,142 @@
+import time
+
+from selenium.webdriver.remote.webdriver import By
+import selenium.webdriver.support.expected_conditions as EC  # noqa
+from selenium.webdriver.support.wait import WebDriverWait
+
+import undetected_chromedriver as uc
+
+
+driver = uc.Chrome()
+driver.get("https://www.google.com")
+
+# accept the terms
+driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
+
+inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
+
+inp_search.send_keys(
+    "site:stackoverflow.com undetected chromedriver\n"
+)  # \n as equivalent of ENTER key
+
+results_container = WebDriverWait(driver, timeout=3).until(
+    EC.presence_of_element_located((By.ID, "rso"))
+)
+
+driver.execute_script(
+    """
+    let container = document.querySelector('#rso');
+    let el = document.createElement('div');
+    el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em';
+    el.textContent = "these are excluded from offical support ;)";
+    container.insertAdjacentElement('afterBegin', el);
+    
+"""
+)
+
+time.sleep(2)
+
+for item in results_container.children("a", recursive=True):
+    print(item)
+
+# switching default WebElement for uc.WebElement and do it again
+driver._web_element_cls = uc.UCWebElement
+
+print("switched to use uc.WebElement. which is more descriptive")
+results_container = driver.find_element(By.ID, "rso")
+
+# gets only direct children of results_container
+# children is a method unique for undetected chromedriver. it is
+# incompatible when you use regular chromedriver
+for item in results_container.children():
+    print(item.tag_name)
+    for grandchild in item.children(recursive=True):
+        print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
+
+
+print("lets go to image search")
+inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
+inp_search.clear()
+inp_search.send_keys("hot girls\n")  # \n as equivalent of ENTER
+
+body = driver.find_element(By.TAG_NAME, "body")
+# inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
+# inp_search.send_keys("hot nude girls")  # \n as equivalent of ENTER
+body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
+
+# you can't reuse the body from above, because we are on another page right now
+# so the body above is not attached anymore
+image_search_body = WebDriverWait(driver, 5).until(
+    EC.presence_of_element_located((By.TAG_NAME, "body"))
+)
+
+# gets all images and prints the src
+print("getting image data, hold on...")
+
+for item in image_search_body.children("img", recursive=True):
+
+    print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
+
+
+USELESS_SITES = [
+    "https://www.trumpdonald.org",
+    "https://www.isitchristmas.com",
+    "https://isnickelbacktheworstbandever.tumblr.com",
+    "https://www.isthatcherdeadyet.co.uk",
+    "https://whitehouse.gov",
+    "https://www.nsa.gov",
+    "https://kimjongillookingatthings.tumblr.com",
+    "https://instantrimshot.com",
+    "https://www.nyan.cat",
+    "https://twitter.com",
+]
+
+print("opening 9 additinal windows and control them")
+time.sleep(1)  # never use this. this is for demonstration purposes only
+for _ in range(9):
+    driver.window_new()
+
+print("now we got 10 windows")
+time.sleep(1)
+print("using the new windows to open 9 other useless sites")
+time.sleep(1)  # never use this. this is for demonstration purposes only
+
+for idx in range(1, 10):
+    # skip the first handle which is our original window
+    print("opening ", USELESS_SITES[idx])
+    driver.switch_to.window(driver.window_handles[idx])
+    driver.get(USELESS_SITES[idx])
+
+
+for handle in driver.window_handles[1:]:
+    driver.switch_to.window(handle)
+    print("look. %s is working" % driver.current_url)
+    time.sleep(1)  # never use this. it is here only so you can follow along
+
+
+print("close windows (including the initial one!), but keep the last new opened window")
+time.sleep(4)  # never use this. wait until nowsecure passed the bot checks
+
+for handle in driver.window_handles[:-1]:
+    driver.switch_to.window(handle)
+    print("look. %s is closing" % driver.current_url)
+    time.sleep(1)
+    driver.close()
+
+
+# attach to the last open window
+driver.switch_to.window(driver.window_handles[0])
+print("now we only got ", driver.current_url, "left")
+
+time.sleep(1)
+
+driver.get("https://www.nowsecure.nl")
+
+time.sleep(5)
+
+print("lets go to UC project page")
+
+driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
+
+input("press a key if you have RTFM")
+driver.quit()
--- a/undetected_chromedriver/webelement.py
+++ b/undetected_chromedriver/webelement.py
@ -1,7 +1,28 @@
+from selenium.webdriver.common.by import By
 import selenium.webdriver.remote.webelement


 class WebElement(selenium.webdriver.remote.webelement.WebElement):
+    def click_safe(self):
+        super().click()
+        self._parent.reconnect(0.1)
+
+    def children(
+        self, tag=None, recursive=False
+    ) -> list[selenium.webdriver.remote.webelement.WebElement]:
+        """
+        returns direct child elements of current element
+        :param tag: str,  if supplied, returns <tag> nodes only
+        """
+        script = "return [... arguments[0].children]"
+        if tag:
+            script += ".filter( node => node.tagName === '%s')" % tag.upper()
+        if recursive:
+            return _recursive_children(self, tag)
+        return self._parent.execute_script(script, self)
+
+
+class UCWebElement(WebElement):
    """
    Custom WebElement class which makes it easier to view elements when
    working in an interactive environment.
@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):

    """

+    def __init__(self, parent, id_):
+        super().__init__(parent, id_)
+        self._attrs = None
+
    @property
    def attrs(self):
-        if not hasattr(self, "_attrs"):
+        if not self._attrs:
            self._attrs = self._parent.execute_script(
                """
                var items = {}; 
@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
        if strattrs:
            strattrs = " " + strattrs
        return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
+
+
+def _recursive_children(element, tag: str = None, _results=None):
+    """
+    returns all children of <element> recursively
+
+    :param element: `WebElement` object.
+            find children below this <element>
+
+    :param tag: str = None.
+            if provided, return only <tag> elements. example: 'a', or 'img'
+    :param _results: do not use!
+    """
+    results = _results or set()
+    for element in element.children():
+        if tag:
+            if element.tag_name == tag:
+                results.add(element)
+        else:
+            results.add(element)
+        results |= _recursive_children(element, tag, results)
+    return results