> added WebElement.click_safe() method, in case you get detected

after clicking a link

> added WebElement.children(self, tag=None, recursive=False)
  to easily get/find child nodes

> added example.py where i can point people at
  when asking silly questions
  (no, its actually quite cool, everyone should see it)

> some refactoring
This commit is contained in:
UltrafunkAmsterdam 2022-11-28 23:40:41 +01:00
parent 33d2a72848
commit 25bca31f35
3 changed files with 212 additions and 29 deletions

View File

@ -2,6 +2,11 @@
from __future__ import annotations from __future__ import annotations
import subprocess import subprocess
from typing import List
from typing import Optional
from selenium.webdriver.common.by import By
""" """
@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
""" """
__version__ = "3.2.0"
__version__ = "3.1.7"
import inspect import inspect
import json import json
@ -30,15 +33,12 @@ import re
import shutil import shutil
import sys import sys
import tempfile import tempfile
import threading
import time import time
import selenium.webdriver.chrome.service import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver import selenium.webdriver.chrome.webdriver
import selenium.webdriver.common.service import selenium.webdriver.common.service
import selenium.webdriver.remote.webdriver import selenium.webdriver.remote.webdriver
from selenium.webdriver.chrome.service import Service
import selenium.webdriver.remote.command import selenium.webdriver.remote.command
from .cdp import CDP from .cdp import CDP
@ -46,6 +46,8 @@ from .dprocess import start_detached
from .options import ChromeOptions from .options import ChromeOptions
from .patcher import IS_POSIX, Patcher from .patcher import IS_POSIX, Patcher
from .reactor import Reactor from .reactor import Reactor
from .webelement import WebElement, UCWebElement
__all__ = ( __all__ = (
"Chrome", "Chrome",
@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# see if a custom user profile is specified in options # see if a custom user profile is specified in options
for arg in options.arguments: for arg in options.arguments:
if "lang" in arg: if "lang" in arg:
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg) m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
try: try:
@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
) )
if not user_data_dir: if not user_data_dir:
# backward compatiblity # backward compatiblity
# check if an old uc.ChromeOptions is used, and extract the user data dir # check if an old uc.ChromeOptions is used, and extract the user data dir
@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.browser_pid = browser.pid self.browser_pid = browser.pid
if service_creationflags: if service_creationflags:
service = Service( service = selenium.webdriver.common.service.Service(
patcher.executable_path, port, service_args, service_log_path patcher.executable_path, port, service_args, service_log_path
) )
for attr_name in ("creationflags", "creation_flags"): for attr_name in ("creationflags", "creation_flags"):
@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.reactor = reactor self.reactor = reactor
if advanced_elements: if advanced_elements:
from .webelement import WebElement self._web_element_cls = UCWebElement
else:
self._web_element_cls = WebElement self._web_element_cls = WebElement
if options.headless: if options.headless:
self._configure_headless() self._configure_headless()
def __getattribute__(self, item): def __getattribute__(self, item):
if not super().__getattribute__("debug"): if not super().__getattribute__("debug"):
return super().__getattribute__(item) return super().__getattribute__(item)
else: else:
@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
return original return original
def _configure_headless(self): def _configure_headless(self):
orig_get = self.get orig_get = self.get
logger.info("setting properties for headless") logger.info("setting properties for headless")
@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# super(Chrome, self).start_session(capabilities, browser_profile) # super(Chrome, self).start_session(capabilities, browser_profile)
def quit(self): def quit(self):
logger.debug("closing webdriver") try:
if hasattr(self, "service") and getattr(self.service, "process", None):
self.service.process.kill() self.service.process.kill()
logger.debug("webdriver process ended")
except (AttributeError, RuntimeError, OSError):
pass
try: try:
if self.reactor and isinstance(self.reactor, Reactor):
logger.debug("shutting down reactor")
self.reactor.event.set() self.reactor.event.set()
except Exception: # noqa logger.debug("shutting down reactor")
except AttributeError:
pass pass
try: try:
logger.debug("killing browser")
os.kill(self.browser_pid, 15) os.kill(self.browser_pid, 15)
logger.debug("gracefully closed browser")
except TimeoutError as e: except Exception as e: # noqa
logger.debug(e, exc_info=True) logger.debug(e, exc_info=True)
except Exception: # noqa
pass
if ( if (
hasattr(self, "keep_user_data_dir") hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir") and hasattr(self, "user_data_dir")
@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
): ):
for _ in range(5): for _ in range(5):
try: try:
shutil.rmtree(self.user_data_dir, ignore_errors=False) shutil.rmtree(self.user_data_dir, ignore_errors=False)
except FileNotFoundError: except FileNotFoundError:
pass pass

View File

@ -0,0 +1,142 @@
import time
from selenium.webdriver.remote.webdriver import By
import selenium.webdriver.support.expected_conditions as EC # noqa
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc
driver = uc.Chrome()
driver.get("https://www.google.com")
# accept the terms
driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
inp_search.send_keys(
"site:stackoverflow.com undetected chromedriver\n"
) # \n as equivalent of ENTER key
results_container = WebDriverWait(driver, timeout=3).until(
EC.presence_of_element_located((By.ID, "rso"))
)
driver.execute_script(
"""
let container = document.querySelector('#rso');
let el = document.createElement('div');
el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em';
el.textContent = "these are excluded from offical support ;)";
container.insertAdjacentElement('afterBegin', el);
"""
)
time.sleep(2)
for item in results_container.children("a", recursive=True):
print(item)
# switching default WebElement for uc.WebElement and do it again
driver._web_element_cls = uc.UCWebElement
print("switched to use uc.WebElement. which is more descriptive")
results_container = driver.find_element(By.ID, "rso")
# gets only direct children of results_container
# children is a method unique for undetected chromedriver. it is
# incompatible when you use regular chromedriver
for item in results_container.children():
print(item.tag_name)
for grandchild in item.children(recursive=True):
print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
print("lets go to image search")
inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
inp_search.clear()
inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER
body = driver.find_element(By.TAG_NAME, "body")
# inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
# inp_search.send_keys("hot nude girls") # \n as equivalent of ENTER
body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
# you can't reuse the body from above, because we are on another page right now
# so the body above is not attached anymore
image_search_body = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# gets all images and prints the src
print("getting image data, hold on...")
for item in image_search_body.children("img", recursive=True):
print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
USELESS_SITES = [
"https://www.trumpdonald.org",
"https://www.isitchristmas.com",
"https://isnickelbacktheworstbandever.tumblr.com",
"https://www.isthatcherdeadyet.co.uk",
"https://whitehouse.gov",
"https://www.nsa.gov",
"https://kimjongillookingatthings.tumblr.com",
"https://instantrimshot.com",
"https://www.nyan.cat",
"https://twitter.com",
]
print("opening 9 additinal windows and control them")
time.sleep(1) # never use this. this is for demonstration purposes only
for _ in range(9):
driver.window_new()
print("now we got 10 windows")
time.sleep(1)
print("using the new windows to open 9 other useless sites")
time.sleep(1) # never use this. this is for demonstration purposes only
for idx in range(1, 10):
# skip the first handle which is our original window
print("opening ", USELESS_SITES[idx])
driver.switch_to.window(driver.window_handles[idx])
driver.get(USELESS_SITES[idx])
for handle in driver.window_handles[1:]:
driver.switch_to.window(handle)
print("look. %s is working" % driver.current_url)
time.sleep(1) # never use this. it is here only so you can follow along
print("close windows (including the initial one!), but keep the last new opened window")
time.sleep(4) # never use this. wait until nowsecure passed the bot checks
for handle in driver.window_handles[:-1]:
driver.switch_to.window(handle)
print("look. %s is closing" % driver.current_url)
time.sleep(1)
driver.close()
# attach to the last open window
driver.switch_to.window(driver.window_handles[0])
print("now we only got ", driver.current_url, "left")
time.sleep(1)
driver.get("https://www.nowsecure.nl")
time.sleep(5)
print("lets go to UC project page")
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
input("press a key if you have RTFM")
driver.quit()

View File

@ -1,7 +1,28 @@
from selenium.webdriver.common.by import By
import selenium.webdriver.remote.webelement import selenium.webdriver.remote.webelement
class WebElement(selenium.webdriver.remote.webelement.WebElement): class WebElement(selenium.webdriver.remote.webelement.WebElement):
def click_safe(self):
super().click()
self._parent.reconnect(0.1)
def children(
self, tag=None, recursive=False
) -> list[selenium.webdriver.remote.webelement.WebElement]:
"""
returns direct child elements of current element
:param tag: str, if supplied, returns <tag> nodes only
"""
script = "return [... arguments[0].children]"
if tag:
script += ".filter( node => node.tagName === '%s')" % tag.upper()
if recursive:
return _recursive_children(self, tag)
return self._parent.execute_script(script, self)
class UCWebElement(WebElement):
""" """
Custom WebElement class which makes it easier to view elements when Custom WebElement class which makes it easier to view elements when
working in an interactive environment. working in an interactive environment.
@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
""" """
def __init__(self, parent, id_):
super().__init__(parent, id_)
self._attrs = None
@property @property
def attrs(self): def attrs(self):
if not hasattr(self, "_attrs"): if not self._attrs:
self._attrs = self._parent.execute_script( self._attrs = self._parent.execute_script(
""" """
var items = {}; var items = {};
@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
if strattrs: if strattrs:
strattrs = " " + strattrs strattrs = " " + strattrs
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>" return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
def _recursive_children(element, tag: str = None, _results=None):
"""
returns all children of <element> recursively
:param element: `WebElement` object.
find children below this <element>
:param tag: str = None.
if provided, return only <tag> elements. example: 'a', or 'img'
:param _results: do not use!
"""
results = _results or set()
for element in element.children():
if tag:
if element.tag_name == tag:
results.add(element)
else:
results.add(element)
results |= _recursive_children(element, tag, results)
return results