more refactoring; fix bug that browser stays opened when script exits

This commit is contained in:
UltrafunkAmsterdam 2022-12-26 01:48:01 +01:00
parent 5df8e00a5a
commit 07abe814a6
5 changed files with 201 additions and 164 deletions

176
example/example.py Normal file
View File

@ -0,0 +1,176 @@
import time
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.remote.webdriver import By
import selenium.webdriver.support.expected_conditions as EC # noqa
from selenium.webdriver.support.wait import WebDriverWait
import undetected_chromedriver as uc
def main(args=None):
TAKE_IT_EASY = True
if args:
TAKE_IT_EASY = (
args.no_sleeps
) # so the demo is 'follow-able' instead of some flashes and boom => done. set it how you like
if TAKE_IT_EASY:
sleep = time.sleep
else:
sleep = lambda n: print(
"we could be sleeping %d seconds here, but we don't" % n
)
driver = uc.Chrome()
driver.get("https://www.google.com")
# accept the terms
driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
inp_search.send_keys(
"site:stackoverflow.com undetected chromedriver\n"
) # \n as equivalent of ENTER key
results_container = WebDriverWait(driver, timeout=3).until(
EC.presence_of_element_located((By.ID, "rso"))
)
driver.execute_script(
"""
let container = document.querySelector('#rso');
let el = document.createElement('div');
el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:1em;font-size:1.5em';
el.textContent = "Excluded from support...!";
container.insertAdjacentElement('afterBegin', el);
setTimeout(() => {
el.textContent = "<<< OH , CHECK YOUR CONSOLE! >>>"}, 2500)
"""
)
sleep(2) # never use this. this is for demonstration purposes only
for item in results_container.children("a", recursive=True):
print(item)
# switching default WebElement for uc.WebElement and do it again
driver._web_element_cls = uc.UCWebElement
print("switched to use uc.WebElement. which is more descriptive")
results_container = driver.find_element(By.ID, "rso")
# gets only direct children of results_container
# children is a method unique for undetected chromedriver. it is
# incompatible when you use regular chromedriver
for item in results_container.children():
print(item.tag_name)
for grandchild in item.children(recursive=True):
print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
print("lets go to image search")
inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
inp_search.clear()
inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER
body = driver.find_element(By.TAG_NAME, "body")
body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
# you can't reuse the body from above, because we are on another page right now
# so the body above is not attached anymore
image_search_body = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# gets all images and prints the src
print("getting image sources data, hold on...")
for item in image_search_body.children("img", recursive=True):
print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
USELESS_SITES = [
"https://www.trumpdonald.org",
"https://www.isitchristmas.com",
"https://isnickelbacktheworstbandever.tumblr.com",
"https://www.isthatcherdeadyet.co.uk",
"https://whitehouse.gov",
"https://www.nsa.gov",
"https://kimjongillookingatthings.tumblr.com",
"https://instantrimshot.com",
"https://www.nyan.cat",
"https://twitter.com",
]
print("opening 9 additinal windows and control them")
sleep(1) # never use this. this is for demonstration purposes only
for _ in range(9):
driver.window_new()
print("now we got 10 windows")
sleep(1)
print("using the new windows to open 9 other useless sites")
sleep(1) # never use this. this is for demonstration purposes only
for idx in range(1, 10):
# skip the first handle which is our original window
print("opening ", USELESS_SITES[idx])
driver.switch_to.window(driver.window_handles[idx])
# because of geographical location, (corporate) firewalls and 1001
# other reasons why a connection could be dropped we will use a try/except clause here.
try:
driver.get(USELESS_SITES[idx])
except WebDriverException as e:
print(
(
"webdriver exception. this is not an issue in chromedriver, but rather "
"an issue specific to your current connection. message:",
e.args,
)
)
continue
for handle in driver.window_handles[1:]:
driver.switch_to.window(handle)
print("look. %s is working" % driver.current_url)
sleep(1) # never use this. it is here only so you can follow along
print(
"close windows (including the initial one!), but keep the last new opened window"
)
sleep(4) # never use this. wait until nowsecure passed the bot checks
for handle in driver.window_handles[:-1]:
driver.switch_to.window(handle)
print("look. %s is closing" % driver.current_url)
sleep(1)
driver.close()
# attach to the last open window
driver.switch_to.window(driver.window_handles[0])
print("now we only got ", driver.current_url, "left")
sleep(1)
driver.get("https://www.nowsecure.nl")
sleep(5)
print("lets go to UC project page")
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
input("press a key if you have RTFM")
driver.quit()
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("--no-sleeps", "-ns", action="store_false")
a = p.parse_args()
main(a)

View File

@ -61,4 +61,4 @@ setup(
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
],
)
)

View File

@ -16,6 +16,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
"""
from __future__ import annotations
__version__ = "3.2.0"
import json
@ -23,25 +24,28 @@ import logging
import os
import re
import shutil
import subprocess
import sys
import tempfile
import time
import subprocess
from selenium.webdriver.common.by import By
from weakref import finalize
import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver
from selenium.webdriver.common.by import By
import selenium.webdriver.common.service
import selenium.webdriver.remote.webdriver
import selenium.webdriver.remote.command
import selenium.webdriver.remote.webdriver
from .cdp import CDP
from .dprocess import start_detached
from .options import ChromeOptions
from .patcher import IS_POSIX, Patcher
from .patcher import IS_POSIX
from .patcher import Patcher
from .reactor import Reactor
from .webelement import WebElement, UCWebElement
from .webelement import UCWebElement
from .webelement import WebElement
__all__ = (
"Chrome",
@ -231,6 +235,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag.
"""
finalize(self, self._ensure_close, self)
self.debug = debug
patcher = Patcher(
executable_path=driver_executable_path,
@ -770,6 +776,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
pass
self.quit()
@classmethod
def _ensure_close(cls, self):
# needs to be a classmethod so finalize can find the reference
logger.info("ensuring close")
if (
hasattr(self, "service")
and hasattr(self.service, "process")
and hasattr(self.service.process, "kill")
):
self.service.process.kill()
def find_chrome_executable():
"""

View File

@ -1,156 +0,0 @@
import time
import sys
import undetected_chromedriver as uc
import selenium.webdriver.support.expected_conditions as EC # noqa
from selenium.webdriver.remote.webdriver import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import WebDriverException
TAKE_IT_EASY = True # so the demo is 'follow-able' instead of some flashes and boom => done. set it how you like
if TAKE_IT_EASY:
sleep = time.sleep
else:
sleep = lambda *a,**kw: print("we could be sleeping here, but we don't")
driver = uc.Chrome()
driver.get("https://www.google.com")
# accept the terms
driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click()
inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]')
inp_search.send_keys(
"site:stackoverflow.com undetected chromedriver\n"
) # \n as equivalent of ENTER key
results_container = WebDriverWait(driver, timeout=3).until(
EC.presence_of_element_located((By.ID, "rso"))
)
driver.execute_script(
"""
let container = document.querySelector('#rso');
let el = document.createElement('div');
el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:1em;font-size:1.5em';
el.textContent = "Excluded from support...!";
container.insertAdjacentElement('afterBegin', el);
"""
)
sleep(2) # never use this. this is for demonstration purposes only
for item in results_container.children("a", recursive=True):
print(item)
# switching default WebElement for uc.WebElement and do it again
driver._web_element_cls = uc.UCWebElement
print("switched to use uc.WebElement. which is more descriptive")
results_container = driver.find_element(By.ID, "rso")
# gets only direct children of results_container
# children is a method unique for undetected chromedriver. it is
# incompatible when you use regular chromedriver
for item in results_container.children():
print(item.tag_name)
for grandchild in item.children(recursive=True):
print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text)
print("lets go to image search")
inp_search = driver.find_element(By.XPATH, '//input[@name="q"]')
inp_search.clear()
inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER
body = driver.find_element(By.TAG_NAME, "body")
body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe()
# you can't reuse the body from above, because we are on another page right now
# so the body above is not attached anymore
image_search_body = WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# gets all images and prints the src
print("getting image sources data, hold on...")
for item in image_search_body.children("img", recursive=True):
print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n")
USELESS_SITES = [
"https://www.trumpdonald.org",
"https://www.isitchristmas.com",
"https://isnickelbacktheworstbandever.tumblr.com",
"https://www.isthatcherdeadyet.co.uk",
"https://whitehouse.gov",
"https://www.nsa.gov",
"https://kimjongillookingatthings.tumblr.com",
"https://instantrimshot.com",
"https://www.nyan.cat",
"https://twitter.com",
]
print("opening 9 additinal windows and control them")
sleep(1) # never use this. this is for demonstration purposes only
for _ in range(9):
driver.window_new()
print("now we got 10 windows")
sleep(1)
print("using the new windows to open 9 other useless sites")
sleep(1) # never use this. this is for demonstration purposes only
for idx in range(1, 10):
# skip the first handle which is our original window
print("opening ", USELESS_SITES[idx])
driver.switch_to.window(driver.window_handles[idx])
# because of geographical location, (corporate) firewalls and 1001
# other reasons why a connection could be dropped we will use a try/except clause here.
try:
driver.get(USELESS_SITES[idx])
except WebDriverException as e:
print((
'webdriver exception. this is not an issue in chromedriver, but rather '
'an issue specific to your current connection. message:', e.args))
continue
for handle in driver.window_handles[1:]:
driver.switch_to.window(handle)
print("look. %s is working" % driver.current_url)
sleep(1) # never use this. it is here only so you can follow along
print("close windows (including the initial one!), but keep the last new opened window")
sleep(4) # never use this. wait until nowsecure passed the bot checks
for handle in driver.window_handles[:-1]:
driver.switch_to.window(handle)
print("look. %s is closing" % driver.current_url)
sleep(1)
driver.close()
# attach to the last open window
driver.switch_to.window(driver.window_handles[0])
print("now we only got ", driver.current_url, "left")
sleep(1)
driver.get("https://www.nowsecure.nl")
sleep(5)
print("lets go to UC project page")
driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver")
input("press a key if you have RTFM")
driver.quit()

View File

@ -39,7 +39,7 @@ class Patcher(object):
if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver"
elif 'LAMBDA_TASK_ROOT' in os.environ:
elif "LAMBDA_TASK_ROOT" in os.environ:
d = "/tmp/undetected_chromedriver"
elif platform.startswith("linux"):
d = "~/.local/share/undetected_chromedriver"