diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 336e253..66214e1 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -2,6 +2,11 @@ from __future__ import annotations import subprocess +from typing import List +from typing import Optional + +from selenium.webdriver.common.by import By + """ @@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) """ - -__version__ = "3.1.7" - +__version__ = "3.2.0" import inspect import json @@ -30,15 +33,12 @@ import re import shutil import sys import tempfile -import threading import time import selenium.webdriver.chrome.service import selenium.webdriver.chrome.webdriver import selenium.webdriver.common.service import selenium.webdriver.remote.webdriver - -from selenium.webdriver.chrome.service import Service import selenium.webdriver.remote.command from .cdp import CDP @@ -46,6 +46,8 @@ from .dprocess import start_detached from .options import ChromeOptions from .patcher import IS_POSIX, Patcher from .reactor import Reactor +from .webelement import WebElement, UCWebElement + __all__ = ( "Chrome", @@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): # see if a custom user profile is specified in options for arg in options.arguments: - if "lang" in arg: m = re.search("(?:--)?lang(?:[ =])?(.*)", arg) try: @@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): ) if not user_data_dir: - # backward compatiblity # check if an old uc.ChromeOptions is used, and extract the user data dir @@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): self.browser_pid = browser.pid if service_creationflags: - service = Service( + service = selenium.webdriver.common.service.Service( patcher.executable_path, port, service_args, service_log_path ) for attr_name in ("creationflags", "creation_flags"): @@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): self.reactor = reactor if advanced_elements: - from .webelement import WebElement - + self._web_element_cls = UCWebElement + else: self._web_element_cls = WebElement if options.headless: self._configure_headless() def __getattribute__(self, item): - if not super().__getattribute__("debug"): return super().__getattribute__(item) else: @@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): return original def _configure_headless(self): - orig_get = self.get logger.info("setting properties for headless") @@ -641,7 +639,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): "source": """ let objectToInspect = window, result = []; - while(objectToInspect !== null) + while(objectToInspect !== null) { result = result.concat(Object.getOwnPropertyNames(objectToInspect)); objectToInspect = Object.getPrototypeOf(objectToInspect); } result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig) @@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): # super(Chrome, self).start_session(capabilities, browser_profile) def quit(self): - logger.debug("closing webdriver") - if hasattr(self, "service") and getattr(self.service, "process", None): + try: self.service.process.kill() - try: - if self.reactor and isinstance(self.reactor, Reactor): - logger.debug("shutting down reactor") - self.reactor.event.set() - except Exception: # noqa + logger.debug("webdriver process ended") + except (AttributeError, RuntimeError, OSError): + pass + try: + self.reactor.event.set() + logger.debug("shutting down reactor") + except AttributeError: pass try: - logger.debug("killing browser") os.kill(self.browser_pid, 15) - - except TimeoutError as e: + logger.debug("gracefully closed browser") + except Exception as e: # noqa logger.debug(e, exc_info=True) - except Exception: # noqa - pass - if ( hasattr(self, "keep_user_data_dir") and hasattr(self, "user_data_dir") @@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): ): for _ in range(5): try: - shutil.rmtree(self.user_data_dir, ignore_errors=False) except FileNotFoundError: pass diff --git a/undetected_chromedriver/example/example.py b/undetected_chromedriver/example/example.py new file mode 100644 index 0000000..afc2880 --- /dev/null +++ b/undetected_chromedriver/example/example.py @@ -0,0 +1,142 @@ +import time + +from selenium.webdriver.remote.webdriver import By +import selenium.webdriver.support.expected_conditions as EC # noqa +from selenium.webdriver.support.wait import WebDriverWait + +import undetected_chromedriver as uc + + +driver = uc.Chrome() +driver.get("https://www.google.com") + +# accept the terms +driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click() + +inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') + +inp_search.send_keys( + "site:stackoverflow.com undetected chromedriver\n" +) # \n as equivalent of ENTER key + +results_container = WebDriverWait(driver, timeout=3).until( + EC.presence_of_element_located((By.ID, "rso")) +) + +driver.execute_script( + """ + let container = document.querySelector('#rso'); + let el = document.createElement('div'); + el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em'; + el.textContent = "these are excluded from offical support ;)"; + container.insertAdjacentElement('afterBegin', el); + +""" +) + +time.sleep(2) + +for item in results_container.children("a", recursive=True): + print(item) + +# switching default WebElement for uc.WebElement and do it again +driver._web_element_cls = uc.UCWebElement + +print("switched to use uc.WebElement. which is more descriptive") +results_container = driver.find_element(By.ID, "rso") + +# gets only direct children of results_container +# children is a method unique for undetected chromedriver. it is +# incompatible when you use regular chromedriver +for item in results_container.children(): + print(item.tag_name) + for grandchild in item.children(recursive=True): + print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text) + + +print("lets go to image search") +inp_search = driver.find_element(By.XPATH, '//input[@name="q"]') +inp_search.clear() +inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER + +body = driver.find_element(By.TAG_NAME, "body") +# inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') +# inp_search.send_keys("hot nude girls") # \n as equivalent of ENTER +body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe() + +# you can't reuse the body from above, because we are on another page right now +# so the body above is not attached anymore +image_search_body = WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) +) + +# gets all images and prints the src +print("getting image data, hold on...") + +for item in image_search_body.children("img", recursive=True): + + print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n") + + +USELESS_SITES = [ + "https://www.trumpdonald.org", + "https://www.isitchristmas.com", + "https://isnickelbacktheworstbandever.tumblr.com", + "https://www.isthatcherdeadyet.co.uk", + "https://whitehouse.gov", + "https://www.nsa.gov", + "https://kimjongillookingatthings.tumblr.com", + "https://instantrimshot.com", + "https://www.nyan.cat", + "https://twitter.com", +] + +print("opening 9 additinal windows and control them") +time.sleep(1) # never use this. this is for demonstration purposes only +for _ in range(9): + driver.window_new() + +print("now we got 10 windows") +time.sleep(1) +print("using the new windows to open 9 other useless sites") +time.sleep(1) # never use this. this is for demonstration purposes only + +for idx in range(1, 10): + # skip the first handle which is our original window + print("opening ", USELESS_SITES[idx]) + driver.switch_to.window(driver.window_handles[idx]) + driver.get(USELESS_SITES[idx]) + + +for handle in driver.window_handles[1:]: + driver.switch_to.window(handle) + print("look. %s is working" % driver.current_url) + time.sleep(1) # never use this. it is here only so you can follow along + + +print("close windows (including the initial one!), but keep the last new opened window") +time.sleep(4) # never use this. wait until nowsecure passed the bot checks + +for handle in driver.window_handles[:-1]: + driver.switch_to.window(handle) + print("look. %s is closing" % driver.current_url) + time.sleep(1) + driver.close() + + +# attach to the last open window +driver.switch_to.window(driver.window_handles[0]) +print("now we only got ", driver.current_url, "left") + +time.sleep(1) + +driver.get("https://www.nowsecure.nl") + +time.sleep(5) + +print("lets go to UC project page") + +driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") + +input("press a key if you have RTFM") +driver.quit() diff --git a/undetected_chromedriver/webelement.py b/undetected_chromedriver/webelement.py index 5b0abe4..26050fb 100644 --- a/undetected_chromedriver/webelement.py +++ b/undetected_chromedriver/webelement.py @@ -1,7 +1,28 @@ +from selenium.webdriver.common.by import By import selenium.webdriver.remote.webelement class WebElement(selenium.webdriver.remote.webelement.WebElement): + def click_safe(self): + super().click() + self._parent.reconnect(0.1) + + def children( + self, tag=None, recursive=False + ) -> list[selenium.webdriver.remote.webelement.WebElement]: + """ + returns direct child elements of current element + :param tag: str, if supplied, returns nodes only + """ + script = "return [... arguments[0].children]" + if tag: + script += ".filter( node => node.tagName === '%s')" % tag.upper() + if recursive: + return _recursive_children(self, tag) + return self._parent.execute_script(script, self) + + +class UCWebElement(WebElement): """ Custom WebElement class which makes it easier to view elements when working in an interactive environment. @@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement): """ + def __init__(self, parent, id_): + super().__init__(parent, id_) + self._attrs = None + @property def attrs(self): - if not hasattr(self, "_attrs"): + if not self._attrs: self._attrs = self._parent.execute_script( """ var items = {}; @@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement): if strattrs: strattrs = " " + strattrs return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>" + + +def _recursive_children(element, tag: str = None, _results=None): + """ + returns all children of recursively + + :param element: `WebElement` object. + find children below this + + :param tag: str = None. + if provided, return only elements. example: 'a', or 'img' + :param _results: do not use! + """ + results = _results or set() + for element in element.children(): + if tag: + if element.tag_name == tag: + results.add(element) + else: + results.add(element) + results |= _recursive_children(element, tag, results) + return results