2021-12-15 21:53:41 -07:00
|
|
|
#!/usr/bin/env python3
|
2021-12-23 10:23:25 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
|
|
|
|
888 888 d8b
|
|
|
|
888 888 Y8P
|
|
|
|
888 888
|
|
|
|
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
|
|
|
|
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
|
|
|
|
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
|
|
|
|
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
|
|
|
|
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
|
|
|
|
|
|
|
|
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
|
|
|
|
|
|
|
"""
|
2022-11-29 10:26:11 -07:00
|
|
|
from __future__ import annotations
|
2021-12-15 21:53:41 -07:00
|
|
|
|
2022-12-25 17:48:01 -07:00
|
|
|
|
3.5.3
Sorry for not getting earlier at this, my pc had a complete meltdown, m2, and gpu both dead.
picking up a new one this afternoon.
thanks to @jdholtz :
This PR adds support for downloading Chromedriver versions 115+. This is necessary due to the Chromium team's change to Chromedriver's release process (see here).
If the version_main is 114 or older, the Chromedriver will still be downloaded using LATEST_RELEASE_{version}. If the version_main is specified and is 115+, the /latest-versions-per-milestone-with-downloads.json from the new JSON endpoint is used and the version is selected from the corresponding milestone. Last, if the version_main is not specified, the /last-known-good-versions-with-downloads.json endpoint is used to fetch the latest stable version.
In contrast with #1427, this PR uses the new JSON endpoints instead of reverting back to old versions if the LATEST_RELEASE endpoint isn't found (causing version discrepancy errors).
I also added compatibility for installing x86 and arm64 for Mac separately since the platform names changed for the new endpoints. However, I have only tested on Linux and Windows so it would be great if someone could test on Mac (x86 and ARM) It has been tested on Linux, Windows, and Mac with success. The Chromedriver doesn't work on ARM devices when downloading the ARM chromedriver, but it seems to work fine with the x64 version (possibly with Rosetta installed).
This also allows for users to download the Dev and Beta versions (currently 117 and 118) if they specify it using version_main.
2023-08-25 03:57:31 -06:00
|
|
|
__version__ = "3.5.3"
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
import os
|
2023-08-09 11:42:11 -06:00
|
|
|
import pathlib
|
2021-12-15 21:53:41 -07:00
|
|
|
import re
|
|
|
|
import shutil
|
2022-12-25 17:48:01 -07:00
|
|
|
import subprocess
|
2021-12-15 21:53:41 -07:00
|
|
|
import sys
|
|
|
|
import tempfile
|
2022-04-04 05:20:25 -06:00
|
|
|
import time
|
2022-12-25 17:48:01 -07:00
|
|
|
from weakref import finalize
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
import selenium.webdriver.chrome.service
|
|
|
|
import selenium.webdriver.chrome.webdriver
|
2022-12-25 17:48:01 -07:00
|
|
|
from selenium.webdriver.common.by import By
|
2023-06-12 03:30:57 -06:00
|
|
|
import selenium.webdriver.chromium.service
|
2022-07-17 03:18:24 -06:00
|
|
|
import selenium.webdriver.remote.command
|
2022-12-25 17:48:01 -07:00
|
|
|
import selenium.webdriver.remote.webdriver
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
from .cdp import CDP
|
2022-04-04 05:20:25 -06:00
|
|
|
from .dprocess import start_detached
|
2021-12-15 21:53:41 -07:00
|
|
|
from .options import ChromeOptions
|
2022-12-25 17:48:01 -07:00
|
|
|
from .patcher import IS_POSIX
|
|
|
|
from .patcher import Patcher
|
2021-12-15 21:53:41 -07:00
|
|
|
from .reactor import Reactor
|
2022-12-25 17:48:01 -07:00
|
|
|
from .webelement import UCWebElement
|
|
|
|
from .webelement import WebElement
|
|
|
|
|
2022-11-28 15:40:41 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
__all__ = (
|
2023-02-07 17:27:50 -07:00
|
|
|
"Chrome",
|
|
|
|
"ChromeOptions",
|
|
|
|
"Patcher",
|
|
|
|
"Reactor",
|
|
|
|
"CDP",
|
|
|
|
"find_chrome_executable",
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
|
2023-02-07 17:27:50 -07:00
|
|
|
logger = logging.getLogger("uc")
|
|
|
|
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
|
2023-02-07 17:27:50 -07:00
|
|
|
class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
|
|
|
|
Controls the ChromeDriver and allows you to drive the browser.
|
|
|
|
|
|
|
|
The webdriver file will be downloaded by this module automatically,
|
|
|
|
you do not need to specify this. however, you may if you wish.
|
|
|
|
|
|
|
|
Attributes
|
|
|
|
----------
|
|
|
|
|
|
|
|
Methods
|
|
|
|
-------
|
|
|
|
|
|
|
|
reconnect()
|
|
|
|
|
|
|
|
this can be useful in case of heavy detection methods
|
|
|
|
-stops the chromedriver service which runs in the background
|
|
|
|
-starts the chromedriver service which runs in the background
|
|
|
|
-recreate session
|
|
|
|
|
|
|
|
|
|
|
|
start_session(capabilities=None, browser_profile=None)
|
|
|
|
|
|
|
|
differentiates from the regular method in that it does not
|
|
|
|
require a capabilities argument. The capabilities are automatically
|
|
|
|
recreated from the options at creation time.
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
NOTE:
|
|
|
|
Chrome has everything included to work out of the box.
|
|
|
|
it does not `need` customizations.
|
|
|
|
any customizations MAY lead to trigger bot migitation systems.
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
_instances = set()
|
|
|
|
session_id = None
|
2021-12-22 07:07:27 -07:00
|
|
|
debug = False
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
def __init__(
|
2023-02-07 17:27:50 -07:00
|
|
|
self,
|
|
|
|
options=None,
|
|
|
|
user_data_dir=None,
|
|
|
|
driver_executable_path=None,
|
|
|
|
browser_executable_path=None,
|
|
|
|
port=0,
|
|
|
|
enable_cdp_events=False,
|
2023-06-12 03:30:57 -06:00
|
|
|
# service_args=None,
|
|
|
|
# service_creationflags=None,
|
2023-02-07 17:27:50 -07:00
|
|
|
desired_capabilities=None,
|
|
|
|
advanced_elements=False,
|
2023-06-12 03:30:57 -06:00
|
|
|
# service_log_path=None,
|
2023-02-07 17:27:50 -07:00
|
|
|
keep_alive=True,
|
|
|
|
log_level=0,
|
|
|
|
headless=False,
|
|
|
|
version_main=None,
|
|
|
|
patcher_force_close=False,
|
|
|
|
suppress_welcome=True,
|
|
|
|
use_subprocess=True,
|
|
|
|
debug=False,
|
|
|
|
no_sandbox=True,
|
2023-05-09 14:08:53 -06:00
|
|
|
user_multi_procs: bool = False,
|
2023-02-07 17:27:50 -07:00
|
|
|
**kw,
|
|
|
|
):
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
Creates a new instance of the chrome driver.
|
|
|
|
|
|
|
|
Starts the service and then creates new instance of chrome driver.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2021-12-22 07:07:27 -07:00
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
options: ChromeOptions, optional, default: None - automatic useful defaults
|
|
|
|
this takes an instance of ChromeOptions, mainly to customize browser behavior.
|
|
|
|
anything other dan the default, for example extensions or startup options
|
|
|
|
are not supported in case of failure, and can probably lowers your undetectability.
|
|
|
|
|
|
|
|
|
2021-12-21 09:31:04 -07:00
|
|
|
user_data_dir: str , optional, default: None (creates temp profile)
|
|
|
|
if user_data_dir is a path to a valid chrome profile directory, use it,
|
|
|
|
and turn off automatic removal mechanism at exit.
|
2021-12-22 07:07:27 -07:00
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
driver_executable_path: str, optional, default: None(=downloads and patches new binary)
|
|
|
|
|
2021-12-21 09:42:09 -07:00
|
|
|
browser_executable_path: str, optional, default: None - use find_chrome_executable
|
2021-12-22 07:07:27 -07:00
|
|
|
Path to the browser executable.
|
2021-12-21 09:42:09 -07:00
|
|
|
If not specified, make sure the executable's folder is in $PATH
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
port: int, optional, default: 0
|
2022-11-29 03:16:45 -07:00
|
|
|
port to be used by the chromedriver executable, this is NOT the debugger port.
|
|
|
|
leave it at 0 unless you know what you are doing.
|
|
|
|
the default value of 0 automatically picks an available port.
|
2022-11-29 10:26:11 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
enable_cdp_events: bool, default: False
|
|
|
|
:: currently for chrome only
|
|
|
|
this enables the handling of wire messages
|
|
|
|
when enabled, you can subscribe to CDP events by using:
|
|
|
|
|
|
|
|
driver.add_cdp_listener("Network.dataReceived", yourcallback)
|
|
|
|
# yourcallback is an callable which accepts exactly 1 dict as parameter
|
|
|
|
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
service_args: list of str, optional, default: None
|
|
|
|
arguments to pass to the driver service
|
|
|
|
|
|
|
|
desired_capabilities: dict, optional, default: None - auto from config
|
|
|
|
Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
|
|
|
|
|
3.1.5r2
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:49:02 -06:00
|
|
|
advanced_elements: bool, optional, default: False
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
makes it easier to recognize elements like you know them from html/browser inspection, especially when working
|
|
|
|
in an interactive environment
|
|
|
|
|
|
|
|
default webelement repr:
|
|
|
|
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
|
|
|
|
|
|
|
advanced webelement repr
|
|
|
|
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
|
|
|
|
|
|
|
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
|
|
|
|
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
service_log_path: str, optional, default: None
|
|
|
|
path to log information from the driver.
|
|
|
|
|
|
|
|
keep_alive: bool, optional, default: True
|
|
|
|
Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
|
|
|
|
|
|
|
|
log_level: int, optional, default: adapts to python global log level
|
|
|
|
|
|
|
|
headless: bool, optional, default: False
|
|
|
|
can also be specified in the options instance.
|
|
|
|
Specify whether you want to use the browser in headless mode.
|
|
|
|
warning: this lowers undetectability and not fully supported.
|
|
|
|
|
|
|
|
version_main: int, optional, default: None (=auto)
|
|
|
|
if you, for god knows whatever reason, use
|
|
|
|
an older version of Chrome. You can specify it's full rounded version number
|
|
|
|
here. Example: 87 for all versions of 87
|
|
|
|
|
|
|
|
patcher_force_close: bool, optional, default: False
|
|
|
|
instructs the patcher to do whatever it can to access the chromedriver binary
|
|
|
|
if the file is locked, it will force shutdown all instances.
|
|
|
|
setting it is not recommended, unless you know the implications and think
|
|
|
|
you might need it.
|
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
suppress_welcome: bool, optional , default: True
|
|
|
|
a "welcome" alert might show up on *nix-like systems asking whether you want to set
|
|
|
|
chrome as your default browser, and if you want to send even more data to google.
|
|
|
|
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
|
|
|
|
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
|
|
|
|
|
2022-10-15 08:18:13 -06:00
|
|
|
use_subprocess: bool, optional , default: True,
|
2021-12-24 07:31:51 -07:00
|
|
|
|
|
|
|
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
|
|
|
|
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
|
|
|
|
program exits or using .quit()
|
2022-10-15 08:18:13 -06:00
|
|
|
you should be knowing what you're doing, and know how python works.
|
2021-12-24 07:31:51 -07:00
|
|
|
|
|
|
|
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
|
|
|
|
--start script--
|
|
|
|
import undetected_chromedriver as uc
|
|
|
|
d = uc.Chrome()
|
|
|
|
d.get('https://somesite/')
|
|
|
|
---end script --
|
|
|
|
|
|
|
|
and will be greeted with an error, since the program exists before chrome has a change to launch.
|
|
|
|
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
|
|
|
|
! setting it to True comes with NO support when being detected. !
|
2022-11-20 14:05:04 -07:00
|
|
|
|
|
|
|
no_sandbox: bool, optional, default=True
|
2022-10-15 08:18:13 -06:00
|
|
|
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
|
|
|
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
|
|
|
when running as root without using --no-sandbox flag.
|
2023-05-09 14:08:53 -06:00
|
|
|
|
|
|
|
user_multi_procs:
|
|
|
|
set to true when you are using multithreads/multiprocessing
|
|
|
|
ensures not all processes are trying to modify a binary which is in use by another.
|
|
|
|
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
|
2023-05-09 14:18:22 -06:00
|
|
|
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
|
2023-05-09 14:08:53 -06:00
|
|
|
|
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
finalize(self, self._ensure_close, self)
|
2021-12-22 07:07:27 -07:00
|
|
|
self.debug = debug
|
2023-02-05 07:36:31 -07:00
|
|
|
self.patcher = Patcher(
|
2023-02-07 17:27:50 -07:00
|
|
|
executable_path=driver_executable_path,
|
|
|
|
force=patcher_force_close,
|
|
|
|
version_main=version_main,
|
2023-05-09 14:08:53 -06:00
|
|
|
user_multi_procs=user_multi_procs,
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
2023-05-09 14:08:53 -06:00
|
|
|
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
|
2023-02-05 07:36:31 -07:00
|
|
|
self.patcher.auto()
|
2023-05-09 14:08:53 -06:00
|
|
|
|
2023-02-05 07:36:31 -07:00
|
|
|
# self.patcher = patcher
|
2021-12-15 21:53:41 -07:00
|
|
|
if not options:
|
|
|
|
options = ChromeOptions()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
if hasattr(options, "_session") and options._session is not None:
|
2021-12-15 21:53:41 -07:00
|
|
|
# prevent reuse of options,
|
|
|
|
# as it just appends arguments, not replace them
|
|
|
|
# you'll get conflicts starting chrome
|
2023-02-07 17:27:50 -07:00
|
|
|
raise RuntimeError("you cannot reuse the ChromeOptions object")
|
2021-12-15 21:53:41 -07:00
|
|
|
except AttributeError:
|
|
|
|
pass
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
options._session = self
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not options.debugger_address:
|
2022-08-30 05:46:41 -06:00
|
|
|
debug_port = (
|
|
|
|
port
|
|
|
|
if port != 0
|
|
|
|
else selenium.webdriver.common.service.utils.free_port()
|
|
|
|
)
|
2022-06-29 04:07:25 -06:00
|
|
|
debug_host = "127.0.0.1"
|
2023-02-07 17:27:50 -07:00
|
|
|
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
2022-06-29 04:07:25 -06:00
|
|
|
else:
|
2023-02-07 17:27:50 -07:00
|
|
|
debug_host, debug_port = options.debugger_address.split(":")
|
|
|
|
debug_port = int(debug_port)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if enable_cdp_events:
|
2022-08-30 05:46:41 -06:00
|
|
|
options.set_capability(
|
2023-02-07 17:27:50 -07:00
|
|
|
"goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
|
|
|
|
)
|
|
|
|
|
|
|
|
options.add_argument("--remote-debugging-host=%s" % debug_host)
|
|
|
|
options.add_argument("--remote-debugging-port=%s" % debug_port)
|
|
|
|
|
2022-03-16 15:47:48 -06:00
|
|
|
if user_data_dir:
|
2023-02-07 17:27:50 -07:00
|
|
|
options.add_argument("--user-data-dir=%s" % user_data_dir)
|
|
|
|
|
|
|
|
language, keep_user_data_dir = None, bool(user_data_dir)
|
|
|
|
|
2021-12-21 09:31:04 -07:00
|
|
|
# see if a custom user profile is specified in options
|
2021-12-15 21:53:41 -07:00
|
|
|
for arg in options.arguments:
|
2023-02-08 09:48:52 -07:00
|
|
|
|
|
|
|
if any([_ in arg for _ in ("--headless", "headless")]):
|
|
|
|
options.arguments.remove(arg)
|
|
|
|
options.headless = True
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if "lang" in arg:
|
2023-02-07 17:27:50 -07:00
|
|
|
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
language = m[1]
|
2021-12-15 21:53:41 -07:00
|
|
|
except IndexError:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("will set the language to en-US,en;q=0.9")
|
2021-12-15 21:53:41 -07:00
|
|
|
language = "en-US,en;q=0.9"
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if "user-data-dir" in arg:
|
2023-02-07 17:27:50 -07:00
|
|
|
m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
user_data_dir = m[1]
|
2022-08-30 05:46:41 -06:00
|
|
|
logger.debug(
|
2023-02-07 17:27:50 -07:00
|
|
|
"user-data-dir found in user argument %s => %s" % (arg, m[1])
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
keep_user_data_dir = True
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
except IndexError:
|
|
|
|
logger.debug(
|
2022-08-30 05:46:41 -06:00
|
|
|
"no user data dir could be extracted from supplied argument %s "
|
|
|
|
% arg
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not user_data_dir:
|
2021-12-22 07:07:27 -07:00
|
|
|
# backward compatiblity
|
|
|
|
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
if hasattr(options, "user_data_dir") and getattr(
|
|
|
|
options, "user_data_dir", None
|
|
|
|
):
|
2021-12-22 07:07:27 -07:00
|
|
|
import warnings
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
warnings.warn(
|
|
|
|
"using ChromeOptions.user_data_dir might stop working in future versions."
|
|
|
|
"use uc.Chrome(user_data_dir='/xyz/some/data') in case you need existing profile folder"
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
options.add_argument("--user-data-dir=%s" % options.user_data_dir)
|
2021-12-15 21:53:41 -07:00
|
|
|
keep_user_data_dir = True
|
2022-08-30 05:46:41 -06:00
|
|
|
logger.debug(
|
|
|
|
"user_data_dir property found in options object: %s" % user_data_dir
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
else:
|
2023-02-07 17:27:50 -07:00
|
|
|
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
2021-12-15 21:53:41 -07:00
|
|
|
keep_user_data_dir = False
|
|
|
|
arg = "--user-data-dir=%s" % user_data_dir
|
2023-02-07 17:27:50 -07:00
|
|
|
options.add_argument(arg)
|
2021-12-15 21:53:41 -07:00
|
|
|
logger.debug(
|
|
|
|
"created a temporary folder in which the user-data (profile) will be stored during this\n"
|
|
|
|
"session, and added it to chrome startup arguments: %s" % arg
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not language:
|
|
|
|
try:
|
|
|
|
import locale
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
language = locale.getdefaultlocale()[0].replace("_", "-")
|
2021-12-15 21:53:41 -07:00
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
if not language:
|
|
|
|
language = "en-US"
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
options.add_argument("--lang=%s" % language)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not options.binary_location:
|
2022-08-30 05:46:41 -06:00
|
|
|
options.binary_location = (
|
2023-02-07 17:27:50 -07:00
|
|
|
browser_executable_path or find_chrome_executable()
|
2022-08-30 05:46:41 -06:00
|
|
|
)
|
2023-02-08 09:31:44 -07:00
|
|
|
|
2023-08-09 11:42:11 -06:00
|
|
|
if not options.binary_location or not \
|
|
|
|
pathlib.Path(options.binary_location).exists():
|
|
|
|
raise FileNotFoundError(
|
|
|
|
"\n---------------------\n"
|
|
|
|
"Could not determine browser executable."
|
|
|
|
"\n---------------------\n"
|
|
|
|
"Make sure your browser is installed in the default location (path).\n"
|
|
|
|
"If you are sure about the browser executable, you can specify it using\n"
|
|
|
|
"the `browser_executable_path='{}` parameter.\n\n"
|
|
|
|
.format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
|
|
|
|
)
|
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
self._delay = 3
|
2023-02-08 09:31:44 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.user_data_dir = user_data_dir
|
|
|
|
self.keep_user_data_dir = keep_user_data_dir
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
if suppress_welcome:
|
2023-02-07 17:27:50 -07:00
|
|
|
options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
|
2022-10-15 08:18:13 -06:00
|
|
|
if no_sandbox:
|
2023-02-07 17:27:50 -07:00
|
|
|
options.arguments.extend(["--no-sandbox", "--test-type"])
|
2023-02-08 09:48:52 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if headless or options.headless:
|
2023-06-02 15:04:09 -06:00
|
|
|
#workaround until a better checking is found
|
2023-06-12 03:30:57 -06:00
|
|
|
try:
|
|
|
|
if self.patcher.version_main < 108:
|
|
|
|
options.add_argument("--headless=chrome")
|
|
|
|
elif self.patcher.version_main >= 108:
|
|
|
|
options.add_argument("--headless=new")
|
|
|
|
except:
|
|
|
|
logger.warning("could not detect version_main."
|
|
|
|
"therefore, we are assuming it is chrome 108 or higher")
|
|
|
|
options.add_argument("--headless=new")
|
2023-02-08 09:48:52 -07:00
|
|
|
|
|
|
|
options.add_argument("--window-size=1920,1080")
|
|
|
|
options.add_argument("--start-maximized")
|
|
|
|
options.add_argument("--no-sandbox")
|
|
|
|
# fixes "could not connect to chrome" error when running
|
|
|
|
# on linux using privileged user like root (which i don't recommend)
|
2023-02-08 09:31:44 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
options.add_argument(
|
2022-08-30 05:46:41 -06:00
|
|
|
"--log-level=%d" % log_level
|
2023-02-07 17:27:50 -07:00
|
|
|
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
|
|
|
|
)
|
|
|
|
|
|
|
|
if hasattr(options, "handle_prefs"):
|
|
|
|
options.handle_prefs(user_data_dir)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
# fix exit_type flag to prevent tab-restore nag
|
|
|
|
try:
|
|
|
|
with open(
|
2023-02-07 17:27:50 -07:00
|
|
|
os.path.join(user_data_dir, "Default/Preferences"),
|
|
|
|
encoding="latin1",
|
|
|
|
mode="r+",
|
|
|
|
) as fs:
|
|
|
|
config = json.load(fs)
|
|
|
|
if config["profile"]["exit_type"] is not None:
|
2021-12-15 21:53:41 -07:00
|
|
|
# fixing the restore-tabs-nag
|
2023-02-07 17:27:50 -07:00
|
|
|
config["profile"]["exit_type"] = None
|
|
|
|
fs.seek(0, 0)
|
|
|
|
json.dump(config, fs)
|
2022-04-04 05:22:28 -06:00
|
|
|
fs.truncate() # the file might be shorter
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("fixed exit_type flag")
|
2021-12-15 21:53:41 -07:00
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("did not find a bad exit_type flag ")
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.options = options
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not desired_capabilities:
|
|
|
|
desired_capabilities = options.to_capabilities()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-24 07:31:51 -07:00
|
|
|
if not use_subprocess:
|
2022-08-30 05:46:41 -06:00
|
|
|
self.browser_pid = start_detached(
|
2023-02-07 17:27:50 -07:00
|
|
|
options.binary_location, *options.arguments
|
|
|
|
)
|
2021-12-24 07:31:51 -07:00
|
|
|
else:
|
2021-12-23 10:23:25 -07:00
|
|
|
browser = subprocess.Popen(
|
2023-02-07 17:27:50 -07:00
|
|
|
[options.binary_location, *options.arguments],
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
close_fds=IS_POSIX,
|
|
|
|
)
|
2021-12-23 10:23:25 -07:00
|
|
|
self.browser_pid = browser.pid
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2023-06-12 03:30:57 -06:00
|
|
|
|
|
|
|
service = selenium.webdriver.chromium.service.ChromiumService(
|
|
|
|
self.patcher.executable_path
|
|
|
|
)
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
super(Chrome, self).__init__(
|
2023-06-12 03:30:57 -06:00
|
|
|
service=service,
|
2023-02-07 17:27:50 -07:00
|
|
|
options=options,
|
|
|
|
keep_alive=keep_alive,
|
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.reactor = None
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
if enable_cdp_events:
|
2021-12-15 21:53:41 -07:00
|
|
|
if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
|
2022-08-30 05:46:41 -06:00
|
|
|
logging.getLogger(
|
|
|
|
"selenium.webdriver.remote.remote_connection"
|
2023-02-07 17:27:50 -07:00
|
|
|
).setLevel(20)
|
|
|
|
reactor = Reactor(self)
|
2021-12-15 21:53:41 -07:00
|
|
|
reactor.start()
|
|
|
|
self.reactor = reactor
|
2023-02-07 17:27:50 -07:00
|
|
|
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
if advanced_elements:
|
2022-11-28 15:40:41 -07:00
|
|
|
self._web_element_cls = UCWebElement
|
|
|
|
else:
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
self._web_element_cls = WebElement
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if options.headless:
|
|
|
|
self._configure_headless()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def _configure_headless(self):
|
2021-12-15 21:53:41 -07:00
|
|
|
orig_get = self.get
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.info("setting properties for headless")
|
|
|
|
|
|
|
|
def get_wrapped(*args, **kwargs):
|
|
|
|
if self.execute_script("return navigator.webdriver"):
|
|
|
|
logger.info("patch navigator.webdriver")
|
2021-12-15 21:53:41 -07:00
|
|
|
self.execute_cdp_cmd(
|
2023-02-07 17:27:50 -07:00
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
2021-12-15 21:53:41 -07:00
|
|
|
{
|
|
|
|
"source": """
|
|
|
|
|
2023-02-05 10:37:28 -07:00
|
|
|
Object.defineProperty(window, "navigator", {
|
|
|
|
Object.defineProperty(window, "navigator", {
|
|
|
|
value: new Proxy(navigator, {
|
|
|
|
has: (target, key) => (key === "webdriver" ? false : key in target),
|
|
|
|
get: (target, key) =>
|
|
|
|
key === "webdriver"
|
|
|
|
? false
|
|
|
|
: typeof target[key] === "function"
|
|
|
|
? target[key].bind(target)
|
|
|
|
: target[key],
|
|
|
|
}),
|
|
|
|
});
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.info("patch user-agent string")
|
2021-12-15 21:53:41 -07:00
|
|
|
self.execute_cdp_cmd(
|
2023-02-07 17:27:50 -07:00
|
|
|
"Network.setUserAgentOverride",
|
2021-12-15 21:53:41 -07:00
|
|
|
{
|
2022-08-30 05:46:41 -06:00
|
|
|
"userAgent": self.execute_script(
|
|
|
|
"return navigator.userAgent"
|
2023-02-07 17:27:50 -07:00
|
|
|
).replace("Headless", "")
|
|
|
|
},
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
self.execute_cdp_cmd(
|
2023-02-07 17:27:50 -07:00
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
2021-12-15 21:53:41 -07:00
|
|
|
{
|
|
|
|
"source": """
|
2022-04-04 05:20:25 -06:00
|
|
|
Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
|
|
|
|
Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
|
|
|
|
|
|
|
|
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
|
|
|
|
window.chrome = {
|
|
|
|
app: {
|
|
|
|
isInstalled: false,
|
|
|
|
InstallState: {
|
|
|
|
DISABLED: 'disabled',
|
|
|
|
INSTALLED: 'installed',
|
|
|
|
NOT_INSTALLED: 'not_installed'
|
|
|
|
},
|
|
|
|
RunningState: {
|
|
|
|
CANNOT_RUN: 'cannot_run',
|
|
|
|
READY_TO_RUN: 'ready_to_run',
|
|
|
|
RUNNING: 'running'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
runtime: {
|
|
|
|
OnInstalledReason: {
|
|
|
|
CHROME_UPDATE: 'chrome_update',
|
|
|
|
INSTALL: 'install',
|
|
|
|
SHARED_MODULE_UPDATE: 'shared_module_update',
|
|
|
|
UPDATE: 'update'
|
|
|
|
},
|
|
|
|
OnRestartRequiredReason: {
|
|
|
|
APP_UPDATE: 'app_update',
|
|
|
|
OS_UPDATE: 'os_update',
|
|
|
|
PERIODIC: 'periodic'
|
|
|
|
},
|
|
|
|
PlatformArch: {
|
|
|
|
ARM: 'arm',
|
|
|
|
ARM64: 'arm64',
|
|
|
|
MIPS: 'mips',
|
|
|
|
MIPS64: 'mips64',
|
|
|
|
X86_32: 'x86-32',
|
|
|
|
X86_64: 'x86-64'
|
|
|
|
},
|
|
|
|
PlatformNaclArch: {
|
|
|
|
ARM: 'arm',
|
|
|
|
MIPS: 'mips',
|
|
|
|
MIPS64: 'mips64',
|
|
|
|
X86_32: 'x86-32',
|
|
|
|
X86_64: 'x86-64'
|
|
|
|
},
|
|
|
|
PlatformOs: {
|
|
|
|
ANDROID: 'android',
|
|
|
|
CROS: 'cros',
|
|
|
|
LINUX: 'linux',
|
|
|
|
MAC: 'mac',
|
|
|
|
OPENBSD: 'openbsd',
|
|
|
|
WIN: 'win'
|
|
|
|
},
|
|
|
|
RequestUpdateCheckStatus: {
|
|
|
|
NO_UPDATE: 'no_update',
|
|
|
|
THROTTLED: 'throttled',
|
|
|
|
UPDATE_AVAILABLE: 'update_available'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
|
|
|
|
if (!window.Notification) {
|
|
|
|
window.Notification = {
|
|
|
|
permission: 'denied'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const originalQuery = window.navigator.permissions.query
|
|
|
|
window.navigator.permissions.__proto__.query = parameters =>
|
|
|
|
parameters.name === 'notifications'
|
|
|
|
? Promise.resolve({ state: window.Notification.permission })
|
|
|
|
: originalQuery(parameters)
|
|
|
|
|
|
|
|
const oldCall = Function.prototype.call
|
|
|
|
function call() {
|
|
|
|
return oldCall.apply(this, arguments)
|
|
|
|
}
|
|
|
|
Function.prototype.call = call
|
|
|
|
|
|
|
|
const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
|
|
|
|
const oldToString = Function.prototype.toString
|
|
|
|
|
|
|
|
function functionToString() {
|
|
|
|
if (this === window.navigator.permissions.query) {
|
|
|
|
return 'function query() { [native code] }'
|
|
|
|
}
|
|
|
|
if (this === functionToString) {
|
|
|
|
return nativeToStringFunctionString
|
|
|
|
}
|
|
|
|
return oldCall.call(oldToString, this)
|
|
|
|
}
|
|
|
|
// eslint-disable-next-line
|
|
|
|
Function.prototype.toString = functionToString
|
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
},
|
|
|
|
)
|
|
|
|
return orig_get(*args, **kwargs)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.get = get_wrapped
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2023-02-04 14:02:46 -07:00
|
|
|
# def _get_cdc_props(self):
|
|
|
|
# return self.execute_script(
|
|
|
|
# """
|
|
|
|
# let objectToInspect = window,
|
|
|
|
# result = [];
|
|
|
|
# while(objectToInspect !== null)
|
|
|
|
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
|
|
|
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
|
|
|
#
|
|
|
|
# return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
|
|
|
|
# """
|
|
|
|
# )
|
|
|
|
#
|
|
|
|
# def _hook_remove_cdc_props(self):
|
|
|
|
# self.execute_cdp_cmd(
|
|
|
|
# "Page.addScriptToEvaluateOnNewDocument",
|
|
|
|
# {
|
|
|
|
# "source": """
|
|
|
|
# let objectToInspect = window,
|
|
|
|
# result = [];
|
|
|
|
# while(objectToInspect !== null)
|
|
|
|
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
|
|
|
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
|
|
|
# result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
|
|
|
|
# &&delete window[p]&&console.log('removed',p))
|
|
|
|
# """
|
|
|
|
# },
|
|
|
|
# )
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def get(self, url):
|
2023-02-04 14:02:46 -07:00
|
|
|
# if self._get_cdc_props():
|
|
|
|
# self._hook_remove_cdc_props()
|
2023-02-07 17:27:50 -07:00
|
|
|
return super().get(url)
|
|
|
|
|
|
|
|
def add_cdp_listener(self, event_name, callback):
|
2022-08-30 05:46:41 -06:00
|
|
|
if (
|
2023-02-07 17:27:50 -07:00
|
|
|
self.reactor
|
|
|
|
and self.reactor is not None
|
|
|
|
and isinstance(self.reactor, Reactor)
|
2022-08-30 05:46:41 -06:00
|
|
|
):
|
2023-02-07 17:27:50 -07:00
|
|
|
self.reactor.add_event_handler(event_name, callback)
|
2021-12-15 21:53:41 -07:00
|
|
|
return self.reactor.handlers
|
|
|
|
return False
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def clear_cdp_listeners(self):
|
|
|
|
if self.reactor and isinstance(self.reactor, Reactor):
|
2021-12-15 21:53:41 -07:00
|
|
|
self.reactor.handlers.clear()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def window_new(self):
|
2022-07-17 03:18:24 -06:00
|
|
|
self.execute(
|
2023-02-07 17:27:50 -07:00
|
|
|
selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
|
|
|
|
)
|
|
|
|
|
|
|
|
def tab_new(self, url: str):
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
this opens a url in a new tab.
|
|
|
|
apparently, that passes all tests directly!
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
url
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
|
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
if not hasattr(self, "cdp"):
|
2021-12-15 21:53:41 -07:00
|
|
|
from .cdp import CDP
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
cdp = CDP(self.options)
|
|
|
|
cdp.tab_new(url)
|
|
|
|
|
|
|
|
def reconnect(self, timeout=0.1):
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
|
|
|
self.service.stop()
|
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug(e)
|
|
|
|
time.sleep(timeout)
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
|
|
|
self.service.start()
|
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug(e)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
|
|
|
self.start_session()
|
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug(e)
|
|
|
|
|
|
|
|
def start_session(self, capabilities=None, browser_profile=None):
|
2021-12-15 21:53:41 -07:00
|
|
|
if not capabilities:
|
|
|
|
capabilities = self.options.to_capabilities()
|
2023-02-07 17:27:50 -07:00
|
|
|
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
2023-06-12 03:30:57 -06:00
|
|
|
capabilities
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
2021-12-22 07:07:27 -07:00
|
|
|
# super(Chrome, self).start_session(capabilities, browser_profile)
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2023-06-12 03:30:57 -06:00
|
|
|
def find_elements_recursive(self, by, value):
|
|
|
|
"""
|
|
|
|
find elements in all frames
|
|
|
|
this is a generator function, which is needed
|
|
|
|
since if it would return a list of elements, they
|
|
|
|
will be stale on arrival.
|
|
|
|
using generator, when the element is returned we are in the correct frame
|
|
|
|
to use it directly
|
|
|
|
Args:
|
|
|
|
by: By
|
|
|
|
value: str
|
|
|
|
Returns: Generator[webelement.WebElement]
|
|
|
|
"""
|
|
|
|
def search_frame(f=None):
|
|
|
|
if not f:
|
|
|
|
# ensure we are on main content frame
|
|
|
|
self.switch_to.default_content()
|
|
|
|
else:
|
|
|
|
self.switch_to.frame(f)
|
|
|
|
for elem in self.find_elements(by, value):
|
|
|
|
yield elem
|
|
|
|
# switch back to main content, otherwise we will get StaleElementReferenceException
|
|
|
|
self.switch_to.default_content()
|
|
|
|
|
|
|
|
# search root frame
|
|
|
|
for elem in search_frame():
|
|
|
|
yield elem
|
|
|
|
# get iframes
|
|
|
|
frames = self.find_elements('css selector', 'iframe')
|
|
|
|
|
|
|
|
# search per frame
|
|
|
|
for f in frames:
|
|
|
|
for elem in search_frame(f):
|
|
|
|
yield elem
|
|
|
|
|
2023-02-07 17:27:50 -07:00
|
|
|
def quit(self):
|
2022-11-28 15:40:41 -07:00
|
|
|
try:
|
2021-12-23 10:23:25 -07:00
|
|
|
self.service.process.kill()
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("webdriver process ended")
|
|
|
|
except (AttributeError, RuntimeError, OSError):
|
2022-11-28 15:40:41 -07:00
|
|
|
pass
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2022-11-28 15:40:41 -07:00
|
|
|
self.reactor.event.set()
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("shutting down reactor")
|
2022-11-28 15:40:41 -07:00
|
|
|
except AttributeError:
|
2021-12-15 21:53:41 -07:00
|
|
|
pass
|
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
os.kill(self.browser_pid, 15)
|
|
|
|
logger.debug("gracefully closed browser")
|
2022-11-28 15:40:41 -07:00
|
|
|
except Exception as e: # noqa
|
2023-06-02 14:34:33 -06:00
|
|
|
pass
|
2021-12-15 21:53:41 -07:00
|
|
|
if (
|
2023-02-07 17:27:50 -07:00
|
|
|
hasattr(self, "keep_user_data_dir")
|
|
|
|
and hasattr(self, "user_data_dir")
|
|
|
|
and not self.keep_user_data_dir
|
2021-12-15 21:53:41 -07:00
|
|
|
):
|
2023-02-07 17:27:50 -07:00
|
|
|
for _ in range(5):
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
2021-12-15 21:53:41 -07:00
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
2023-02-07 17:27:50 -07:00
|
|
|
except (RuntimeError, OSError, PermissionError) as e:
|
2021-12-15 21:53:41 -07:00
|
|
|
logger.debug(
|
2021-12-22 07:07:27 -07:00
|
|
|
"When removing the temp profile, a %s occured: %s\nretrying..."
|
2023-02-07 17:27:50 -07:00
|
|
|
% (e.__class__.__name__, e)
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
else:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("successfully removed %s" % self.user_data_dir)
|
2021-12-15 21:53:41 -07:00
|
|
|
break
|
2023-02-07 17:27:50 -07:00
|
|
|
time.sleep(0.1)
|
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
# dereference patcher, so patcher can start cleaning up as well.
|
|
|
|
# this must come last, otherwise it will throw 'in use' errors
|
|
|
|
self.patcher = None
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __getattribute__(self, item):
|
|
|
|
if not super().__getattribute__("debug"):
|
|
|
|
return super().__getattribute__(item)
|
2022-11-29 10:26:11 -07:00
|
|
|
else:
|
|
|
|
import inspect
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
original = super().__getattribute__(item)
|
|
|
|
if inspect.ismethod(original) and not inspect.isclass(original):
|
|
|
|
|
|
|
|
def newfunc(*args, **kwargs):
|
2022-11-29 10:26:11 -07:00
|
|
|
logger.debug(
|
|
|
|
"calling %s with args %s and kwargs %s\n"
|
2023-02-07 17:27:50 -07:00
|
|
|
% (original.__qualname__, args, kwargs)
|
|
|
|
)
|
|
|
|
return original(*args, **kwargs)
|
|
|
|
|
2022-11-29 10:26:11 -07:00
|
|
|
return newfunc
|
|
|
|
return original
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __enter__(self):
|
2021-12-15 21:53:41 -07:00
|
|
|
return self
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
2021-12-15 21:53:41 -07:00
|
|
|
self.service.stop()
|
2023-02-07 17:27:50 -07:00
|
|
|
time.sleep(self._delay)
|
2021-12-15 21:53:41 -07:00
|
|
|
self.service.start()
|
|
|
|
self.start_session()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return hash(self.options.debugger_address)
|
|
|
|
|
|
|
|
def __dir__(self):
|
|
|
|
return object.__dir__(self)
|
|
|
|
|
|
|
|
def __del__(self):
|
2022-11-29 10:26:11 -07:00
|
|
|
try:
|
|
|
|
self.service.process.kill()
|
|
|
|
except: # noqa
|
|
|
|
pass
|
|
|
|
self.quit()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2022-12-25 17:48:01 -07:00
|
|
|
@classmethod
|
2023-02-07 17:27:50 -07:00
|
|
|
def _ensure_close(cls, self):
|
2022-12-25 17:48:01 -07:00
|
|
|
# needs to be a classmethod so finalize can find the reference
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.info("ensuring close")
|
2022-12-25 17:48:01 -07:00
|
|
|
if (
|
2023-02-07 17:27:50 -07:00
|
|
|
hasattr(self, "service")
|
|
|
|
and hasattr(self.service, "process")
|
|
|
|
and hasattr(self.service.process, "kill")
|
2022-12-25 17:48:01 -07:00
|
|
|
):
|
|
|
|
self.service.process.kill()
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
def find_chrome_executable():
|
|
|
|
"""
|
|
|
|
Finds the chrome, chrome beta, chrome canary, chromium executable
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
executable_path : str
|
|
|
|
the full file path to found executable
|
|
|
|
|
|
|
|
"""
|
|
|
|
candidates = set()
|
|
|
|
if IS_POSIX:
|
2023-02-07 17:27:50 -07:00
|
|
|
for item in os.environ.get("PATH").split(os.pathsep):
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
for subitem in (
|
2023-02-07 17:27:50 -07:00
|
|
|
"google-chrome",
|
|
|
|
"chromium",
|
|
|
|
"chromium-browser",
|
|
|
|
"chrome",
|
|
|
|
"google-chrome-stable",
|
|
|
|
):
|
|
|
|
candidates.add(os.sep.join((item, subitem)))
|
2021-12-15 21:53:41 -07:00
|
|
|
if "darwin" in sys.platform:
|
|
|
|
candidates.update(
|
2022-03-13 17:37:12 -06:00
|
|
|
[
|
2023-02-07 17:27:50 -07:00
|
|
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
|
|
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
|
|
]
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
else:
|
2022-08-30 05:46:41 -06:00
|
|
|
for item in map(
|
2023-02-07 17:27:50 -07:00
|
|
|
os.environ.get,
|
|
|
|
("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
|
|
|
|
):
|
2022-03-18 09:11:55 -06:00
|
|
|
if item is not None:
|
|
|
|
for subitem in (
|
2023-02-07 17:27:50 -07:00
|
|
|
"Google/Chrome/Application",
|
|
|
|
):
|
|
|
|
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
2021-12-15 21:53:41 -07:00
|
|
|
for candidate in candidates:
|
2023-06-02 14:44:05 -06:00
|
|
|
logger.debug('checking if %s exists and is executable' % candidate)
|
2023-02-07 17:27:50 -07:00
|
|
|
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
2023-06-02 14:42:43 -06:00
|
|
|
logger.debug('found! using %s' % candidate)
|
2023-02-07 17:27:50 -07:00
|
|
|
return os.path.normpath(candidate)
|