From 07abe814a6bdcdf12afca50fa4e652202cd29d01 Mon Sep 17 00:00:00 2001 From: UltrafunkAmsterdam Date: Mon, 26 Dec 2022 01:48:01 +0100 Subject: [PATCH] more refactoring; fix bug that browser stays opened when script exits --- example/example.py | 176 +++++++++++++++++++++ setup.py | 2 +- undetected_chromedriver/__init__.py | 29 +++- undetected_chromedriver/example/example.py | 156 ------------------ undetected_chromedriver/patcher.py | 2 +- 5 files changed, 201 insertions(+), 164 deletions(-) create mode 100644 example/example.py delete mode 100644 undetected_chromedriver/example/example.py diff --git a/example/example.py b/example/example.py new file mode 100644 index 0000000..1ea1c6d --- /dev/null +++ b/example/example.py @@ -0,0 +1,176 @@ +import time + +from selenium.common.exceptions import WebDriverException +from selenium.webdriver.remote.webdriver import By +import selenium.webdriver.support.expected_conditions as EC # noqa +from selenium.webdriver.support.wait import WebDriverWait + +import undetected_chromedriver as uc + + +def main(args=None): + + TAKE_IT_EASY = True + + if args: + TAKE_IT_EASY = ( + args.no_sleeps + ) # so the demo is 'follow-able' instead of some flashes and boom => done. set it how you like + + if TAKE_IT_EASY: + sleep = time.sleep + else: + sleep = lambda n: print( + "we could be sleeping %d seconds here, but we don't" % n + ) + + driver = uc.Chrome() + driver.get("https://www.google.com") + + # accept the terms + driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click() + + inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') + + inp_search.send_keys( + "site:stackoverflow.com undetected chromedriver\n" + ) # \n as equivalent of ENTER key + + results_container = WebDriverWait(driver, timeout=3).until( + EC.presence_of_element_located((By.ID, "rso")) + ) + + driver.execute_script( + """ + let container = document.querySelector('#rso'); + let el = document.createElement('div'); + el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:1em;font-size:1.5em'; + el.textContent = "Excluded from support...!"; + container.insertAdjacentElement('afterBegin', el); + setTimeout(() => { + el.textContent = "<<< OH , CHECK YOUR CONSOLE! >>>"}, 2500) + + """ + ) + + sleep(2) # never use this. this is for demonstration purposes only + + for item in results_container.children("a", recursive=True): + print(item) + + # switching default WebElement for uc.WebElement and do it again + driver._web_element_cls = uc.UCWebElement + + print("switched to use uc.WebElement. which is more descriptive") + results_container = driver.find_element(By.ID, "rso") + + # gets only direct children of results_container + # children is a method unique for undetected chromedriver. it is + # incompatible when you use regular chromedriver + for item in results_container.children(): + print(item.tag_name) + for grandchild in item.children(recursive=True): + print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text) + + print("lets go to image search") + inp_search = driver.find_element(By.XPATH, '//input[@name="q"]') + inp_search.clear() + inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER + + body = driver.find_element(By.TAG_NAME, "body") + body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe() + + # you can't reuse the body from above, because we are on another page right now + # so the body above is not attached anymore + image_search_body = WebDriverWait(driver, 5).until( + EC.presence_of_element_located((By.TAG_NAME, "body")) + ) + + # gets all images and prints the src + print("getting image sources data, hold on...") + + for item in image_search_body.children("img", recursive=True): + print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n") + + USELESS_SITES = [ + "https://www.trumpdonald.org", + "https://www.isitchristmas.com", + "https://isnickelbacktheworstbandever.tumblr.com", + "https://www.isthatcherdeadyet.co.uk", + "https://whitehouse.gov", + "https://www.nsa.gov", + "https://kimjongillookingatthings.tumblr.com", + "https://instantrimshot.com", + "https://www.nyan.cat", + "https://twitter.com", + ] + + print("opening 9 additinal windows and control them") + sleep(1) # never use this. this is for demonstration purposes only + for _ in range(9): + driver.window_new() + + print("now we got 10 windows") + sleep(1) + print("using the new windows to open 9 other useless sites") + sleep(1) # never use this. this is for demonstration purposes only + + for idx in range(1, 10): + # skip the first handle which is our original window + print("opening ", USELESS_SITES[idx]) + driver.switch_to.window(driver.window_handles[idx]) + + # because of geographical location, (corporate) firewalls and 1001 + # other reasons why a connection could be dropped we will use a try/except clause here. + try: + driver.get(USELESS_SITES[idx]) + except WebDriverException as e: + print( + ( + "webdriver exception. this is not an issue in chromedriver, but rather " + "an issue specific to your current connection. message:", + e.args, + ) + ) + continue + + for handle in driver.window_handles[1:]: + driver.switch_to.window(handle) + print("look. %s is working" % driver.current_url) + sleep(1) # never use this. it is here only so you can follow along + + print( + "close windows (including the initial one!), but keep the last new opened window" + ) + sleep(4) # never use this. wait until nowsecure passed the bot checks + + for handle in driver.window_handles[:-1]: + driver.switch_to.window(handle) + print("look. %s is closing" % driver.current_url) + sleep(1) + driver.close() + + # attach to the last open window + driver.switch_to.window(driver.window_handles[0]) + print("now we only got ", driver.current_url, "left") + + sleep(1) + + driver.get("https://www.nowsecure.nl") + + sleep(5) + + print("lets go to UC project page") + driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") + + input("press a key if you have RTFM") + driver.quit() + + +if __name__ == "__main__": + import argparse + + p = argparse.ArgumentParser() + p.add_argument("--no-sleeps", "-ns", action="store_false") + a = p.parse_args() + main(a) diff --git a/setup.py b/setup.py index 5ffc718..4e7f0dd 100644 --- a/setup.py +++ b/setup.py @@ -61,4 +61,4 @@ setup( "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ], -) \ No newline at end of file +) diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 698f334..45ed305 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -16,6 +16,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) """ from __future__ import annotations + __version__ = "3.2.0" import json @@ -23,25 +24,28 @@ import logging import os import re import shutil +import subprocess import sys import tempfile import time -import subprocess - -from selenium.webdriver.common.by import By +from weakref import finalize import selenium.webdriver.chrome.service import selenium.webdriver.chrome.webdriver +from selenium.webdriver.common.by import By import selenium.webdriver.common.service -import selenium.webdriver.remote.webdriver import selenium.webdriver.remote.command +import selenium.webdriver.remote.webdriver from .cdp import CDP from .dprocess import start_detached from .options import ChromeOptions -from .patcher import IS_POSIX, Patcher +from .patcher import IS_POSIX +from .patcher import Patcher from .reactor import Reactor -from .webelement import WebElement, UCWebElement +from .webelement import UCWebElement +from .webelement import WebElement + __all__ = ( "Chrome", @@ -231,6 +235,8 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): this option has a default of True since many people seem to run this as root (....) , and chrome does not start when running as root without using --no-sandbox flag. """ + + finalize(self, self._ensure_close, self) self.debug = debug patcher = Patcher( executable_path=driver_executable_path, @@ -770,6 +776,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): pass self.quit() + @classmethod + def _ensure_close(cls, self): + # needs to be a classmethod so finalize can find the reference + logger.info("ensuring close") + if ( + hasattr(self, "service") + and hasattr(self.service, "process") + and hasattr(self.service.process, "kill") + ): + self.service.process.kill() + def find_chrome_executable(): """ diff --git a/undetected_chromedriver/example/example.py b/undetected_chromedriver/example/example.py deleted file mode 100644 index cdfea2b..0000000 --- a/undetected_chromedriver/example/example.py +++ /dev/null @@ -1,156 +0,0 @@ -import time -import sys -import undetected_chromedriver as uc -import selenium.webdriver.support.expected_conditions as EC # noqa - -from selenium.webdriver.remote.webdriver import By -from selenium.webdriver.support.wait import WebDriverWait -from selenium.common.exceptions import WebDriverException - - -TAKE_IT_EASY = True # so the demo is 'follow-able' instead of some flashes and boom => done. set it how you like - -if TAKE_IT_EASY: - sleep = time.sleep -else: - sleep = lambda *a,**kw: print("we could be sleeping here, but we don't") - -driver = uc.Chrome() -driver.get("https://www.google.com") - -# accept the terms -driver.find_elements(By.XPATH, '//*[contains(text(), "Accept all")]')[-1].click() - -inp_search = driver.find_element(By.XPATH, '//input[@title="Search"]') - -inp_search.send_keys( - "site:stackoverflow.com undetected chromedriver\n" -) # \n as equivalent of ENTER key - -results_container = WebDriverWait(driver, timeout=3).until( - EC.presence_of_element_located((By.ID, "rso")) -) - -driver.execute_script( - """ - let container = document.querySelector('#rso'); - let el = document.createElement('div'); - el.style = 'width:500px;display:block;background:red;color:white;z-index:999;transition:all 2s ease;padding:1em;font-size:1.5em'; - el.textContent = "Excluded from support...!"; - container.insertAdjacentElement('afterBegin', el); - -""" -) - -sleep(2) # never use this. this is for demonstration purposes only - -for item in results_container.children("a", recursive=True): - print(item) - -# switching default WebElement for uc.WebElement and do it again -driver._web_element_cls = uc.UCWebElement - -print("switched to use uc.WebElement. which is more descriptive") -results_container = driver.find_element(By.ID, "rso") - -# gets only direct children of results_container -# children is a method unique for undetected chromedriver. it is -# incompatible when you use regular chromedriver -for item in results_container.children(): - print(item.tag_name) - for grandchild in item.children(recursive=True): - print("\t\t", grandchild.tag_name, "\n\t\t\t", grandchild.text) - - -print("lets go to image search") -inp_search = driver.find_element(By.XPATH, '//input[@name="q"]') -inp_search.clear() -inp_search.send_keys("hot girls\n") # \n as equivalent of ENTER - -body = driver.find_element(By.TAG_NAME, "body") -body.find_elements(By.XPATH, '//a[contains(text(), "Images")]')[0].click_safe() - -# you can't reuse the body from above, because we are on another page right now -# so the body above is not attached anymore -image_search_body = WebDriverWait(driver, 5).until( - EC.presence_of_element_located((By.TAG_NAME, "body")) -) - -# gets all images and prints the src -print("getting image sources data, hold on...") - -for item in image_search_body.children("img", recursive=True): - - print(item.attrs.get("src", item.attrs.get("data-src")), "\n\n") - - -USELESS_SITES = [ - "https://www.trumpdonald.org", - "https://www.isitchristmas.com", - "https://isnickelbacktheworstbandever.tumblr.com", - "https://www.isthatcherdeadyet.co.uk", - "https://whitehouse.gov", - "https://www.nsa.gov", - "https://kimjongillookingatthings.tumblr.com", - "https://instantrimshot.com", - "https://www.nyan.cat", - "https://twitter.com", -] - -print("opening 9 additinal windows and control them") -sleep(1) # never use this. this is for demonstration purposes only -for _ in range(9): - driver.window_new() - -print("now we got 10 windows") -sleep(1) -print("using the new windows to open 9 other useless sites") -sleep(1) # never use this. this is for demonstration purposes only - -for idx in range(1, 10): - # skip the first handle which is our original window - print("opening ", USELESS_SITES[idx]) - driver.switch_to.window(driver.window_handles[idx]) - - # because of geographical location, (corporate) firewalls and 1001 - # other reasons why a connection could be dropped we will use a try/except clause here. - try: - driver.get(USELESS_SITES[idx]) - except WebDriverException as e: - print(( - 'webdriver exception. this is not an issue in chromedriver, but rather ' - 'an issue specific to your current connection. message:', e.args)) - continue - -for handle in driver.window_handles[1:]: - driver.switch_to.window(handle) - print("look. %s is working" % driver.current_url) - sleep(1) # never use this. it is here only so you can follow along - - -print("close windows (including the initial one!), but keep the last new opened window") -sleep(4) # never use this. wait until nowsecure passed the bot checks - -for handle in driver.window_handles[:-1]: - driver.switch_to.window(handle) - print("look. %s is closing" % driver.current_url) - sleep(1) - driver.close() - - -# attach to the last open window -driver.switch_to.window(driver.window_handles[0]) -print("now we only got ", driver.current_url, "left") - -sleep(1) - -driver.get("https://www.nowsecure.nl") - -sleep(5) - -print("lets go to UC project page") - -driver.get("https://www.github.com/ultrafunkamsterdam/undetected-chromedriver") - -input("press a key if you have RTFM") -driver.quit() diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index 84ef9e9..a313a18 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -39,7 +39,7 @@ class Patcher(object): if platform.endswith("win32"): d = "~/appdata/roaming/undetected_chromedriver" - elif 'LAMBDA_TASK_ROOT' in os.environ: + elif "LAMBDA_TASK_ROOT" in os.environ: d = "/tmp/undetected_chromedriver" elif platform.startswith("linux"): d = "~/.local/share/undetected_chromedriver"