> added WebElement.click_safe() method, in case you get detected
after clicking a link > added WebElement.children(self, tag=None, recursive=False) to easily get/find child nodes > added example.py where i can point people at when asking silly questions (no, its actually quite cool, everyone should see it) > some refactoring
This commit is contained in:
parent
33d2a72848
commit
25bca31f35
|
@ -2,6 +2,11 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
|
||||
"""
|
||||
|
||||
|
@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
|||
|
||||
"""
|
||||
|
||||
|
||||
__version__ = "3.1.7"
|
||||
|
||||
__version__ = "3.2.0"
|
||||
|
||||
import inspect
|
||||
import json
|
||||
|
@ -30,15 +33,12 @@ import re
|
|||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
|
||||
import selenium.webdriver.chrome.service
|
||||
import selenium.webdriver.chrome.webdriver
|
||||
import selenium.webdriver.common.service
|
||||
import selenium.webdriver.remote.webdriver
|
||||
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
import selenium.webdriver.remote.command
|
||||
|
||||
from .cdp import CDP
|
||||
|
@ -46,6 +46,8 @@ from .dprocess import start_detached
|
|||
from .options import ChromeOptions
|
||||
from .patcher import IS_POSIX, Patcher
|
||||
from .reactor import Reactor
|
||||
from .webelement import WebElement, UCWebElement
|
||||
|
||||
|
||||
__all__ = (
|
||||
"Chrome",
|
||||
|
@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
|
||||
# see if a custom user profile is specified in options
|
||||
for arg in options.arguments:
|
||||
|
||||
if "lang" in arg:
|
||||
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
||||
try:
|
||||
|
@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
)
|
||||
|
||||
if not user_data_dir:
|
||||
|
||||
# backward compatiblity
|
||||
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
||||
|
||||
|
@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
self.browser_pid = browser.pid
|
||||
|
||||
if service_creationflags:
|
||||
service = Service(
|
||||
service = selenium.webdriver.common.service.Service(
|
||||
patcher.executable_path, port, service_args, service_log_path
|
||||
)
|
||||
for attr_name in ("creationflags", "creation_flags"):
|
||||
|
@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
self.reactor = reactor
|
||||
|
||||
if advanced_elements:
|
||||
from .webelement import WebElement
|
||||
|
||||
self._web_element_cls = UCWebElement
|
||||
else:
|
||||
self._web_element_cls = WebElement
|
||||
|
||||
if options.headless:
|
||||
self._configure_headless()
|
||||
|
||||
def __getattribute__(self, item):
|
||||
|
||||
if not super().__getattribute__("debug"):
|
||||
return super().__getattribute__(item)
|
||||
else:
|
||||
|
@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
return original
|
||||
|
||||
def _configure_headless(self):
|
||||
|
||||
orig_get = self.get
|
||||
logger.info("setting properties for headless")
|
||||
|
||||
|
@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
# super(Chrome, self).start_session(capabilities, browser_profile)
|
||||
|
||||
def quit(self):
|
||||
logger.debug("closing webdriver")
|
||||
if hasattr(self, "service") and getattr(self.service, "process", None):
|
||||
try:
|
||||
self.service.process.kill()
|
||||
logger.debug("webdriver process ended")
|
||||
except (AttributeError, RuntimeError, OSError):
|
||||
pass
|
||||
try:
|
||||
if self.reactor and isinstance(self.reactor, Reactor):
|
||||
logger.debug("shutting down reactor")
|
||||
self.reactor.event.set()
|
||||
except Exception: # noqa
|
||||
logger.debug("shutting down reactor")
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
logger.debug("killing browser")
|
||||
os.kill(self.browser_pid, 15)
|
||||
|
||||
except TimeoutError as e:
|
||||
logger.debug("gracefully closed browser")
|
||||
except Exception as e: # noqa
|
||||
logger.debug(e, exc_info=True)
|
||||
except Exception: # noqa
|
||||
pass
|
||||
|
||||
if (
|
||||
hasattr(self, "keep_user_data_dir")
|
||||
and hasattr(self, "user_data_dir")
|
||||
|
@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
|||
):
|
||||
for _ in range(5):
|
||||
try:
|
||||
|
||||
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
import time
|
||||
|
||||
from selenium.webdriver.remote.webdriver import By
|
||||
import selenium.webdriver.support.expected_conditions as EC # noqa
|
||||
from selenium.webdriver.support.wait import WebDriverWait
|
||||
|
||||
import undetected_chromedriver as uc
|
||||
|
||||
|
||||
driver = uc.Chrome()
|
||||
driver.get("https://www.google.com")
|
||||
|
||||
# accept the terms
|
||||
driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
|
||||
|
||||
inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
|
||||
|
||||
inp_search.send_keys(
|
||||
"site:stackoverflow.com undetected chromedriver\n"
|
||||
) # \n as equivalent of ENTER key
|
||||
|
||||
results_container = WebDriverWait(driver, timeout=3).until(
|
||||
EC.presence_of_element_located((By.ID, "rso"))
|
||||
)
|
||||
|
||||
driver.execute_script(
|
||||
"""
|
||||
let container = document.querySelector('#rso');
|
||||
let el = document.createElement('div');
|
||||
el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em';
|
||||
el.textContent = "these are excluded from offical support ;)";
|
||||
container.insertAdjacentElement('afterBegin', el);
|
||||
|
||||
"""
|
||||
)
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
for item in results_container.children("a", recursive=True):
|
||||
print(item)
|
||||
|
||||
# switching default WebElement for uc.WebElement and do it again
|
||||
driver._web_element_cls = uc.UCWebElement
|
||||
|
||||
print("switched to use uc.WebElement. which is more descriptive")
|
||||
results_container = driver.find_element(By.ID, "rso")
|
||||
|
||||
# gets only direct children of results_container
|
||||
# children is a method unique for undetected chromedriver. it is
|
||||
# incompatible when you use regular chromedriver
|
||||
for item in results_container.children():
|
||||
print(item.tag_name)
|
||||
for grandchild in item.children(recursive=True):
|
||||
print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
|
||||
|
||||
|
||||
print("lets go to image search")
|
||||
inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
|
||||
inp_search.clear()
|
||||
inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER
|
||||
|
||||
body = driver.find_element(By.TAG_NAME, "body")
|
||||
# inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
|
||||
# inp_search.send_keys("hot nude girls") # \n as equivalent of ENTER
|
||||
body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
|
||||
|
||||
# you can't reuse the body from above, because we are on another page right now
|
||||
# so the body above is not attached anymore
|
||||
image_search_body = WebDriverWait(driver, 5).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
||||
)
|
||||
|
||||
# gets all images and prints the src
|
||||
print("getting image data, hold on...")
|
||||
|
||||
for item in image_search_body.children("img", recursive=True):
|
||||
|
||||
print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
|
||||
|
||||
|
||||
USELESS_SITES = [
|
||||
"https://www.trumpdonald.org",
|
||||
"https://www.isitchristmas.com",
|
||||
"https://isnickelbacktheworstbandever.tumblr.com",
|
||||
"https://www.isthatcherdeadyet.co.uk",
|
||||
"https://whitehouse.gov",
|
||||
"https://www.nsa.gov",
|
||||
"https://kimjongillookingatthings.tumblr.com",
|
||||
"https://instantrimshot.com",
|
||||
"https://www.nyan.cat",
|
||||
"https://twitter.com",
|
||||
]
|
||||
|
||||
print("opening 9 additinal windows and control them")
|
||||
time.sleep(1) # never use this. this is for demonstration purposes only
|
||||
for _ in range(9):
|
||||
driver.window_new()
|
||||
|
||||
print("now we got 10 windows")
|
||||
time.sleep(1)
|
||||
print("using the new windows to open 9 other useless sites")
|
||||
time.sleep(1) # never use this. this is for demonstration purposes only
|
||||
|
||||
for idx in range(1, 10):
|
||||
# skip the first handle which is our original window
|
||||
print("opening ", USELESS_SITES[idx])
|
||||
driver.switch_to.window(driver.window_handles[idx])
|
||||
driver.get(USELESS_SITES[idx])
|
||||
|
||||
|
||||
for handle in driver.window_handles[1:]:
|
||||
driver.switch_to.window(handle)
|
||||
print("look. %s is working" % driver.current_url)
|
||||
time.sleep(1) # never use this. it is here only so you can follow along
|
||||
|
||||
|
||||
print("close windows (including the initial one!), but keep the last new opened window")
|
||||
time.sleep(4) # never use this. wait until nowsecure passed the bot checks
|
||||
|
||||
for handle in driver.window_handles[:-1]:
|
||||
driver.switch_to.window(handle)
|
||||
print("look. %s is closing" % driver.current_url)
|
||||
time.sleep(1)
|
||||
driver.close()
|
||||
|
||||
|
||||
# attach to the last open window
|
||||
driver.switch_to.window(driver.window_handles[0])
|
||||
print("now we only got ", driver.current_url, "left")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
driver.get("https://www.nowsecure.nl")
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
print("lets go to UC project page")
|
||||
|
||||
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
|
||||
|
||||
input("press a key if you have RTFM")
|
||||
driver.quit()
|
|
@ -1,7 +1,28 @@
|
|||
from selenium.webdriver.common.by import By
|
||||
import selenium.webdriver.remote.webelement
|
||||
|
||||
|
||||
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||
def click_safe(self):
|
||||
super().click()
|
||||
self._parent.reconnect(0.1)
|
||||
|
||||
def children(
|
||||
self, tag=None, recursive=False
|
||||
) -> list[selenium.webdriver.remote.webelement.WebElement]:
|
||||
"""
|
||||
returns direct child elements of current element
|
||||
:param tag: str, if supplied, returns <tag> nodes only
|
||||
"""
|
||||
script = "return [... arguments[0].children]"
|
||||
if tag:
|
||||
script += ".filter( node => node.tagName === '%s')" % tag.upper()
|
||||
if recursive:
|
||||
return _recursive_children(self, tag)
|
||||
return self._parent.execute_script(script, self)
|
||||
|
||||
|
||||
class UCWebElement(WebElement):
|
||||
"""
|
||||
Custom WebElement class which makes it easier to view elements when
|
||||
working in an interactive environment.
|
||||
|
@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, parent, id_):
|
||||
super().__init__(parent, id_)
|
||||
self._attrs = None
|
||||
|
||||
@property
|
||||
def attrs(self):
|
||||
if not hasattr(self, "_attrs"):
|
||||
if not self._attrs:
|
||||
self._attrs = self._parent.execute_script(
|
||||
"""
|
||||
var items = {};
|
||||
|
@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
|||
if strattrs:
|
||||
strattrs = " " + strattrs
|
||||
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
||||
|
||||
|
||||
def _recursive_children(element, tag: str = None, _results=None):
|
||||
"""
|
||||
returns all children of <element> recursively
|
||||
|
||||
:param element: `WebElement` object.
|
||||
find children below this <element>
|
||||
|
||||
:param tag: str = None.
|
||||
if provided, return only <tag> elements. example: 'a', or 'img'
|
||||
:param _results: do not use!
|
||||
"""
|
||||
results = _results or set()
|
||||
for element in element.children():
|
||||
if tag:
|
||||
if element.tag_name == tag:
|
||||
results.add(element)
|
||||
else:
|
||||
results.add(element)
|
||||
results |= _recursive_children(element, tag, results)
|
||||
return results
|
||||
|
|
Loading…
Reference in New Issue