> added WebElement.click_safe() method, in case you get detected
after clicking a link > added WebElement.children(self, tag=None, recursive=False) to easily get/find child nodes > added example.py where i can point people at when asking silly questions (no, its actually quite cool, everyone should see it) > some refactoring
This commit is contained in:
parent
33d2a72848
commit
25bca31f35
|
@ -2,6 +2,11 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from typing import List
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -18,9 +23,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
__version__ = "3.2.0"
|
||||||
__version__ = "3.1.7"
|
|
||||||
|
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
|
@ -30,15 +33,12 @@ import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import selenium.webdriver.chrome.service
|
import selenium.webdriver.chrome.service
|
||||||
import selenium.webdriver.chrome.webdriver
|
import selenium.webdriver.chrome.webdriver
|
||||||
import selenium.webdriver.common.service
|
import selenium.webdriver.common.service
|
||||||
import selenium.webdriver.remote.webdriver
|
import selenium.webdriver.remote.webdriver
|
||||||
|
|
||||||
from selenium.webdriver.chrome.service import Service
|
|
||||||
import selenium.webdriver.remote.command
|
import selenium.webdriver.remote.command
|
||||||
|
|
||||||
from .cdp import CDP
|
from .cdp import CDP
|
||||||
|
@ -46,6 +46,8 @@ from .dprocess import start_detached
|
||||||
from .options import ChromeOptions
|
from .options import ChromeOptions
|
||||||
from .patcher import IS_POSIX, Patcher
|
from .patcher import IS_POSIX, Patcher
|
||||||
from .reactor import Reactor
|
from .reactor import Reactor
|
||||||
|
from .webelement import WebElement, UCWebElement
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
"Chrome",
|
"Chrome",
|
||||||
|
@ -282,7 +284,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
|
|
||||||
# see if a custom user profile is specified in options
|
# see if a custom user profile is specified in options
|
||||||
for arg in options.arguments:
|
for arg in options.arguments:
|
||||||
|
|
||||||
if "lang" in arg:
|
if "lang" in arg:
|
||||||
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
||||||
try:
|
try:
|
||||||
|
@ -307,7 +308,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
)
|
)
|
||||||
|
|
||||||
if not user_data_dir:
|
if not user_data_dir:
|
||||||
|
|
||||||
# backward compatiblity
|
# backward compatiblity
|
||||||
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
||||||
|
|
||||||
|
@ -416,7 +416,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
self.browser_pid = browser.pid
|
self.browser_pid = browser.pid
|
||||||
|
|
||||||
if service_creationflags:
|
if service_creationflags:
|
||||||
service = Service(
|
service = selenium.webdriver.common.service.Service(
|
||||||
patcher.executable_path, port, service_args, service_log_path
|
patcher.executable_path, port, service_args, service_log_path
|
||||||
)
|
)
|
||||||
for attr_name in ("creationflags", "creation_flags"):
|
for attr_name in ("creationflags", "creation_flags"):
|
||||||
|
@ -449,15 +449,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
self.reactor = reactor
|
self.reactor = reactor
|
||||||
|
|
||||||
if advanced_elements:
|
if advanced_elements:
|
||||||
from .webelement import WebElement
|
self._web_element_cls = UCWebElement
|
||||||
|
else:
|
||||||
self._web_element_cls = WebElement
|
self._web_element_cls = WebElement
|
||||||
|
|
||||||
if options.headless:
|
if options.headless:
|
||||||
self._configure_headless()
|
self._configure_headless()
|
||||||
|
|
||||||
def __getattribute__(self, item):
|
def __getattribute__(self, item):
|
||||||
|
|
||||||
if not super().__getattribute__("debug"):
|
if not super().__getattribute__("debug"):
|
||||||
return super().__getattribute__(item)
|
return super().__getattribute__(item)
|
||||||
else:
|
else:
|
||||||
|
@ -477,7 +476,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
return original
|
return original
|
||||||
|
|
||||||
def _configure_headless(self):
|
def _configure_headless(self):
|
||||||
|
|
||||||
orig_get = self.get
|
orig_get = self.get
|
||||||
logger.info("setting properties for headless")
|
logger.info("setting properties for headless")
|
||||||
|
|
||||||
|
@ -641,7 +639,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
"source": """
|
"source": """
|
||||||
let objectToInspect = window,
|
let objectToInspect = window,
|
||||||
result = [];
|
result = [];
|
||||||
while(objectToInspect !== null)
|
while(objectToInspect !== null)
|
||||||
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
||||||
objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
||||||
result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig)
|
result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig)
|
||||||
|
@ -718,24 +716,21 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
# super(Chrome, self).start_session(capabilities, browser_profile)
|
# super(Chrome, self).start_session(capabilities, browser_profile)
|
||||||
|
|
||||||
def quit(self):
|
def quit(self):
|
||||||
logger.debug("closing webdriver")
|
try:
|
||||||
if hasattr(self, "service") and getattr(self.service, "process", None):
|
|
||||||
self.service.process.kill()
|
self.service.process.kill()
|
||||||
try:
|
logger.debug("webdriver process ended")
|
||||||
if self.reactor and isinstance(self.reactor, Reactor):
|
except (AttributeError, RuntimeError, OSError):
|
||||||
logger.debug("shutting down reactor")
|
pass
|
||||||
self.reactor.event.set()
|
try:
|
||||||
except Exception: # noqa
|
self.reactor.event.set()
|
||||||
|
logger.debug("shutting down reactor")
|
||||||
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
logger.debug("killing browser")
|
|
||||||
os.kill(self.browser_pid, 15)
|
os.kill(self.browser_pid, 15)
|
||||||
|
logger.debug("gracefully closed browser")
|
||||||
except TimeoutError as e:
|
except Exception as e: # noqa
|
||||||
logger.debug(e, exc_info=True)
|
logger.debug(e, exc_info=True)
|
||||||
except Exception: # noqa
|
|
||||||
pass
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
hasattr(self, "keep_user_data_dir")
|
hasattr(self, "keep_user_data_dir")
|
||||||
and hasattr(self, "user_data_dir")
|
and hasattr(self, "user_data_dir")
|
||||||
|
@ -743,7 +738,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
||||||
):
|
):
|
||||||
for _ in range(5):
|
for _ in range(5):
|
||||||
try:
|
try:
|
||||||
|
|
||||||
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -0,0 +1,142 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
from selenium.webdriver.remote.webdriver import By
|
||||||
|
import selenium.webdriver.support.expected_conditions as EC # noqa
|
||||||
|
from selenium.webdriver.support.wait import WebDriverWait
|
||||||
|
|
||||||
|
import undetected_chromedriver as uc
|
||||||
|
|
||||||
|
|
||||||
|
driver = uc.Chrome()
|
||||||
|
driver.get("https://www.google.com")
|
||||||
|
|
||||||
|
# accept the terms
|
||||||
|
driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
|
||||||
|
|
||||||
|
inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
|
||||||
|
|
||||||
|
inp_search.send_keys(
|
||||||
|
"site:stackoverflow.com undetected chromedriver\n"
|
||||||
|
) # \n as equivalent of ENTER key
|
||||||
|
|
||||||
|
results_container = WebDriverWait(driver, timeout=3).until(
|
||||||
|
EC.presence_of_element_located((By.ID, "rso"))
|
||||||
|
)
|
||||||
|
|
||||||
|
driver.execute_script(
|
||||||
|
"""
|
||||||
|
let container = document.querySelector('#rso');
|
||||||
|
let el = document.createElement('div');
|
||||||
|
el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:2em;font-size:1.5em';
|
||||||
|
el.textContent = "these are excluded from offical support ;)";
|
||||||
|
container.insertAdjacentElement('afterBegin', el);
|
||||||
|
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
for item in results_container.children("a", recursive=True):
|
||||||
|
print(item)
|
||||||
|
|
||||||
|
# switching default WebElement for uc.WebElement and do it again
|
||||||
|
driver._web_element_cls = uc.UCWebElement
|
||||||
|
|
||||||
|
print("switched to use uc.WebElement. which is more descriptive")
|
||||||
|
results_container = driver.find_element(By.ID, "rso")
|
||||||
|
|
||||||
|
# gets only direct children of results_container
|
||||||
|
# children is a method unique for undetected chromedriver. it is
|
||||||
|
# incompatible when you use regular chromedriver
|
||||||
|
for item in results_container.children():
|
||||||
|
print(item.tag_name)
|
||||||
|
for grandchild in item.children(recursive=True):
|
||||||
|
print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
|
||||||
|
|
||||||
|
|
||||||
|
print("lets go to image search")
|
||||||
|
inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
|
||||||
|
inp_search.clear()
|
||||||
|
inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER
|
||||||
|
|
||||||
|
body = driver.find_element(By.TAG_NAME, "body")
|
||||||
|
# inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
|
||||||
|
# inp_search.send_keys("hot nude girls") # \n as equivalent of ENTER
|
||||||
|
body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
|
||||||
|
|
||||||
|
# you can't reuse the body from above, because we are on another page right now
|
||||||
|
# so the body above is not attached anymore
|
||||||
|
image_search_body = WebDriverWait(driver, 5).until(
|
||||||
|
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# gets all images and prints the src
|
||||||
|
print("getting image data, hold on...")
|
||||||
|
|
||||||
|
for item in image_search_body.children("img", recursive=True):
|
||||||
|
|
||||||
|
print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
USELESS_SITES = [
|
||||||
|
"https://www.trumpdonald.org",
|
||||||
|
"https://www.isitchristmas.com",
|
||||||
|
"https://isnickelbacktheworstbandever.tumblr.com",
|
||||||
|
"https://www.isthatcherdeadyet.co.uk",
|
||||||
|
"https://whitehouse.gov",
|
||||||
|
"https://www.nsa.gov",
|
||||||
|
"https://kimjongillookingatthings.tumblr.com",
|
||||||
|
"https://instantrimshot.com",
|
||||||
|
"https://www.nyan.cat",
|
||||||
|
"https://twitter.com",
|
||||||
|
]
|
||||||
|
|
||||||
|
print("opening 9 additinal windows and control them")
|
||||||
|
time.sleep(1) # never use this. this is for demonstration purposes only
|
||||||
|
for _ in range(9):
|
||||||
|
driver.window_new()
|
||||||
|
|
||||||
|
print("now we got 10 windows")
|
||||||
|
time.sleep(1)
|
||||||
|
print("using the new windows to open 9 other useless sites")
|
||||||
|
time.sleep(1) # never use this. this is for demonstration purposes only
|
||||||
|
|
||||||
|
for idx in range(1, 10):
|
||||||
|
# skip the first handle which is our original window
|
||||||
|
print("opening ", USELESS_SITES[idx])
|
||||||
|
driver.switch_to.window(driver.window_handles[idx])
|
||||||
|
driver.get(USELESS_SITES[idx])
|
||||||
|
|
||||||
|
|
||||||
|
for handle in driver.window_handles[1:]:
|
||||||
|
driver.switch_to.window(handle)
|
||||||
|
print("look. %s is working" % driver.current_url)
|
||||||
|
time.sleep(1) # never use this. it is here only so you can follow along
|
||||||
|
|
||||||
|
|
||||||
|
print("close windows (including the initial one!), but keep the last new opened window")
|
||||||
|
time.sleep(4) # never use this. wait until nowsecure passed the bot checks
|
||||||
|
|
||||||
|
for handle in driver.window_handles[:-1]:
|
||||||
|
driver.switch_to.window(handle)
|
||||||
|
print("look. %s is closing" % driver.current_url)
|
||||||
|
time.sleep(1)
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
|
||||||
|
# attach to the last open window
|
||||||
|
driver.switch_to.window(driver.window_handles[0])
|
||||||
|
print("now we only got ", driver.current_url, "left")
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
driver.get("https://www.nowsecure.nl")
|
||||||
|
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
print("lets go to UC project page")
|
||||||
|
|
||||||
|
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
|
||||||
|
|
||||||
|
input("press a key if you have RTFM")
|
||||||
|
driver.quit()
|
|
@ -1,7 +1,28 @@
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
import selenium.webdriver.remote.webelement
|
import selenium.webdriver.remote.webelement
|
||||||
|
|
||||||
|
|
||||||
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||||
|
def click_safe(self):
|
||||||
|
super().click()
|
||||||
|
self._parent.reconnect(0.1)
|
||||||
|
|
||||||
|
def children(
|
||||||
|
self, tag=None, recursive=False
|
||||||
|
) -> list[selenium.webdriver.remote.webelement.WebElement]:
|
||||||
|
"""
|
||||||
|
returns direct child elements of current element
|
||||||
|
:param tag: str, if supplied, returns <tag> nodes only
|
||||||
|
"""
|
||||||
|
script = "return [... arguments[0].children]"
|
||||||
|
if tag:
|
||||||
|
script += ".filter( node => node.tagName === '%s')" % tag.upper()
|
||||||
|
if recursive:
|
||||||
|
return _recursive_children(self, tag)
|
||||||
|
return self._parent.execute_script(script, self)
|
||||||
|
|
||||||
|
|
||||||
|
class UCWebElement(WebElement):
|
||||||
"""
|
"""
|
||||||
Custom WebElement class which makes it easier to view elements when
|
Custom WebElement class which makes it easier to view elements when
|
||||||
working in an interactive environment.
|
working in an interactive environment.
|
||||||
|
@ -14,9 +35,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(self, parent, id_):
|
||||||
|
super().__init__(parent, id_)
|
||||||
|
self._attrs = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def attrs(self):
|
def attrs(self):
|
||||||
if not hasattr(self, "_attrs"):
|
if not self._attrs:
|
||||||
self._attrs = self._parent.execute_script(
|
self._attrs = self._parent.execute_script(
|
||||||
"""
|
"""
|
||||||
var items = {};
|
var items = {};
|
||||||
|
@ -35,3 +60,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
||||||
if strattrs:
|
if strattrs:
|
||||||
strattrs = " " + strattrs
|
strattrs = " " + strattrs
|
||||||
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
||||||
|
|
||||||
|
|
||||||
|
def _recursive_children(element, tag: str = None, _results=None):
|
||||||
|
"""
|
||||||
|
returns all children of <element> recursively
|
||||||
|
|
||||||
|
:param element: `WebElement` object.
|
||||||
|
find children below this <element>
|
||||||
|
|
||||||
|
:param tag: str = None.
|
||||||
|
if provided, return only <tag> elements. example: 'a', or 'img'
|
||||||
|
:param _results: do not use!
|
||||||
|
"""
|
||||||
|
results = _results or set()
|
||||||
|
for element in element.children():
|
||||||
|
if tag:
|
||||||
|
if element.tag_name == tag:
|
||||||
|
results.add(element)
|
||||||
|
else:
|
||||||
|
results.add(element)
|
||||||
|
results |= _recursive_children(element, tag, results)
|
||||||
|
return results
|
||||||
|
|
Loading…
Reference in New Issue