> added WebElement.click_safe() method, in case you get detected

after clicking a link

> added WebElement.children(self, tag=None, recursive=False)
  to easily get/find child nodes

> added example.py where i can point people at
  when asking silly questions
  (no, its actually quite cool, everyone should see it)

> some refactoring
This commit is contained in:
UltrafunkAmsterdam 2022-11-28 23:40:41 +01:00
parent 33d2a72848
commit 25bca31f35
3 changed files with 212 additions and 29 deletions

View File

@ -2,6 +2,11 @@
from __future__ import annotations
import subprocess
from typing import List
from typing import Optional
from selenium.webdriver.common.by import By
"""
@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
"""
__version__ = "3.1.7"
__version__ = "3.2.0"
import inspect
import json
@ -30,15 +33,12 @@ import re
import shutil
import sys
import tempfile
import threading
import time
import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver
import selenium.webdriver.common.service
import selenium.webdriver.remote.webdriver
from selenium.webdriver.chrome.service import Service
import selenium.webdriver.remote.command
from .cdp import CDP
@ -46,6 +46,8 @@ from .dprocess import start_detached
from .options import ChromeOptions
from .patcher import IS_POSIX, Patcher
from .reactor import Reactor
from .webelement import WebElement, UCWebElement
__all__ = (
"Chrome",
@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# see if a custom user profile is specified in options
for arg in options.arguments:
if "lang" in arg:
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
try:
@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
)
if not user_data_dir:
# backward compatiblity
# check if an old uc.ChromeOptions is used, and extract the user data dir
@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.browser_pid = browser.pid
if service_creationflags:
service = Service(
service = selenium.webdriver.common.service.Service(
patcher.executable_path, port, service_args, service_log_path
)
for attr_name in ("creationflags", "creation_flags"):
@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.reactor = reactor
if advanced_elements:
from .webelement import WebElement
self._web_element_cls = UCWebElement
else:
self._web_element_cls = WebElement
if options.headless:
self._configure_headless()
def __getattribute__(self, item):
if not super().__getattribute__("debug"):
return super().__getattribute__(item)
else:
@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
return original
def _configure_headless(self):
orig_get = self.get
logger.info("setting properties for headless")
@ -641,7 +639,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
"source": """
let objectToInspect = window,
result = [];
while(objectToInspect !== null)
while(objectToInspect !== null)
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect));
objectToInspect = Object.getPrototypeOf(objectToInspect); }
result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig)
@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
# super(Chrome, self).start_session(capabilities, browser_profile)
def quit(self):
logger.debug("closing webdriver")
if hasattr(self, "service") and getattr(self.service, "process", None):
try:
self.service.process.kill()
try:
if self.reactor and isinstance(self.reactor, Reactor):
logger.debug("shutting down reactor")
self.reactor.event.set()
except Exception: # noqa
logger.debug("webdriver process ended")
except (AttributeError, RuntimeError, OSError):
pass
try:
self.reactor.event.set()
logger.debug("shutting down reactor")
except AttributeError:
pass
try:
logger.debug("killing browser")
os.kill(self.browser_pid, 15)
except TimeoutError as e:
logger.debug("gracefully closed browser")
except Exception as e: # noqa
logger.debug(e, exc_info=True)
except Exception: # noqa
pass
if (
hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir")
@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
):
for _ in range(5):
try:
shutil.rmtree(self.user_data_dir, ignore_errors=False)
except FileNotFoundError:
pass

View File

@ -0,0 +1,142 @@
import time
from selenium.webdriver.remote.webdriver import By
import selenium.webdriver.support.expected_conditions as EC # noqa
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc
driver = uc.Chrome()
driver.get("https://www.google.com")
# accept the terms
driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
inp_search.send_keys(
"site:stackoverflow.com undetected chromedriver\n"
) # \n as equivalent of ENTER key
results_container = WebDriverWait(driver, timeout=3).until(
EC.presence_of_element_located((By.ID, "rso"))
)
driver.execute_script(
"""
let container = document.querySelector('#rso');
let el = document.createElement('div');
el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em';
el.textContent = "these are excluded from offical support ;)";
container.insertAdjacentElement('afterBegin', el);
"""
)
time.sleep(2)
for item in results_container.children("a", recursive=True):
print(item)
# switching default WebElement for uc.WebElement and do it again
driver._web_element_cls = uc.UCWebElement
print("switched to use uc.WebElement. which is more descriptive")
results_container = driver.find_element(By.ID, "rso")
# gets only direct children of results_container
# children is a method unique for undetected chromedriver. it is
# incompatible when you use regular chromedriver
for item in results_container.children():
print(item.tag_name)
for grandchild in item.children(recursive=True):
print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
print("lets go to image search")
inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
inp_search.clear()
inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER
body = driver.find_element(By.TAG_NAME, "body")
# inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
# inp_search.send_keys("hot nude girls") # \n as equivalent of ENTER
body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
# you can't reuse the body from above, because we are on another page right now
# so the body above is not attached anymore
image_search_body = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# gets all images and prints the src
print("getting image data, hold on...")
for item in image_search_body.children("img", recursive=True):
print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
USELESS_SITES = [
"https://www.trumpdonald.org",
"https://www.isitchristmas.com",
"https://isnickelbacktheworstbandever.tumblr.com",
"https://www.isthatcherdeadyet.co.uk",
"https://whitehouse.gov",
"https://www.nsa.gov",
"https://kimjongillookingatthings.tumblr.com",
"https://instantrimshot.com",
"https://www.nyan.cat",
"https://twitter.com",
]
print("opening 9 additinal windows and control them")
time.sleep(1) # never use this. this is for demonstration purposes only
for _ in range(9):
driver.window_new()
print("now we got 10 windows")
time.sleep(1)
print("using the new windows to open 9 other useless sites")
time.sleep(1) # never use this. this is for demonstration purposes only
for idx in range(1, 10):
# skip the first handle which is our original window
print("opening ", USELESS_SITES[idx])
driver.switch_to.window(driver.window_handles[idx])
driver.get(USELESS_SITES[idx])
for handle in driver.window_handles[1:]:
driver.switch_to.window(handle)
print("look. %s is working" % driver.current_url)
time.sleep(1) # never use this. it is here only so you can follow along
print("close windows (including the initial one!), but keep the last new opened window")
time.sleep(4) # never use this. wait until nowsecure passed the bot checks
for handle in driver.window_handles[:-1]:
driver.switch_to.window(handle)
print("look. %s is closing" % driver.current_url)
time.sleep(1)
driver.close()
# attach to the last open window
driver.switch_to.window(driver.window_handles[0])
print("now we only got ", driver.current_url, "left")
time.sleep(1)
driver.get("https://www.nowsecure.nl")
time.sleep(5)
print("lets go to UC project page")
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
input("press a key if you have RTFM")
driver.quit()

View File

@ -1,7 +1,28 @@
from selenium.webdriver.common.by import By
import selenium.webdriver.remote.webelement
class WebElement(selenium.webdriver.remote.webelement.WebElement):
def click_safe(self):
super().click()
self._parent.reconnect(0.1)
def children(
self, tag=None, recursive=False
) -> list[selenium.webdriver.remote.webelement.WebElement]:
"""
returns direct child elements of current element
:param tag: str, if supplied, returns <tag> nodes only
"""
script = "return [... arguments[0].children]"
if tag:
script += ".filter( node => node.tagName === '%s')" % tag.upper()
if recursive:
return _recursive_children(self, tag)
return self._parent.execute_script(script, self)
class UCWebElement(WebElement):
"""
Custom WebElement class which makes it easier to view elements when
working in an interactive environment.
@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
"""
def __init__(self, parent, id_):
super().__init__(parent, id_)
self._attrs = None
@property
def attrs(self):
if not hasattr(self, "_attrs"):
if not self._attrs:
self._attrs = self._parent.execute_script(
"""
var items = {};
@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
if strattrs:
strattrs = " " + strattrs
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
def _recursive_children(element, tag: str = None, _results=None):
"""
returns all children of <element> recursively
:param element: `WebElement` object.
find children below this <element>
:param tag: str = None.
if provided, return only <tag> elements. example: 'a', or 'img'
:param _results: do not use!
"""
results = _results or set()
for element in element.children():
if tag:
if element.tag_name == tag:
results.add(element)
else:
results.add(element)
results |= _recursive_children(element, tag, results)
return results