2021-12-15 21:53:41 -07:00
|
|
|
#!/usr/bin/env python3
|
2021-12-23 10:23:25 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
|
|
|
|
888 888 d8b
|
|
|
|
888 888 Y8P
|
|
|
|
888 888
|
|
|
|
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
|
|
|
|
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
|
|
|
|
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
|
|
|
|
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
|
|
|
|
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
|
|
|
|
|
|
|
|
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
|
|
|
|
|
|
|
"""
|
2022-11-29 10:26:11 -07:00
|
|
|
from __future__ import annotations
|
2021-12-15 21:53:41 -07:00
|
|
|
|
2022-12-25 17:48:01 -07:00
|
|
|
|
2023-05-09 14:08:53 -06:00
|
|
|
__version__ = "3.4.7"
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import shutil
|
2022-12-25 17:48:01 -07:00
|
|
|
import subprocess
|
2021-12-15 21:53:41 -07:00
|
|
|
import sys
|
|
|
|
import tempfile
|
2022-04-04 05:20:25 -06:00
|
|
|
import time
|
2022-12-25 17:48:01 -07:00
|
|
|
from weakref import finalize
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
import selenium.webdriver.chrome.service
|
|
|
|
import selenium.webdriver.chrome.webdriver
|
2022-12-25 17:48:01 -07:00
|
|
|
from selenium.webdriver.common.by import By
|
2021-12-15 21:53:41 -07:00
|
|
|
import selenium.webdriver.common.service
|
2022-07-17 03:18:24 -06:00
|
|
|
import selenium.webdriver.remote.command
|
2022-12-25 17:48:01 -07:00
|
|
|
import selenium.webdriver.remote.webdriver
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
from .cdp import CDP
|
2022-04-04 05:20:25 -06:00
|
|
|
from .dprocess import start_detached
|
2021-12-15 21:53:41 -07:00
|
|
|
from .options import ChromeOptions
|
2022-12-25 17:48:01 -07:00
|
|
|
from .patcher import IS_POSIX
|
|
|
|
from .patcher import Patcher
|
2021-12-15 21:53:41 -07:00
|
|
|
from .reactor import Reactor
|
2022-12-25 17:48:01 -07:00
|
|
|
from .webelement import UCWebElement
|
|
|
|
from .webelement import WebElement
|
|
|
|
|
2022-11-28 15:40:41 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
__all__ = (
|
2023-02-07 17:27:50 -07:00
|
|
|
"Chrome",
|
|
|
|
"ChromeOptions",
|
|
|
|
"Patcher",
|
|
|
|
"Reactor",
|
|
|
|
"CDP",
|
|
|
|
"find_chrome_executable",
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
|
2023-02-07 17:27:50 -07:00
|
|
|
logger = logging.getLogger("uc")
|
|
|
|
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
|
2023-02-07 17:27:50 -07:00
|
|
|
class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
|
|
|
|
Controls the ChromeDriver and allows you to drive the browser.
|
|
|
|
|
|
|
|
The webdriver file will be downloaded by this module automatically,
|
|
|
|
you do not need to specify this. however, you may if you wish.
|
|
|
|
|
|
|
|
Attributes
|
|
|
|
----------
|
|
|
|
|
|
|
|
Methods
|
|
|
|
-------
|
|
|
|
|
|
|
|
reconnect()
|
|
|
|
|
|
|
|
this can be useful in case of heavy detection methods
|
|
|
|
-stops the chromedriver service which runs in the background
|
|
|
|
-starts the chromedriver service which runs in the background
|
|
|
|
-recreate session
|
|
|
|
|
|
|
|
|
|
|
|
start_session(capabilities=None, browser_profile=None)
|
|
|
|
|
|
|
|
differentiates from the regular method in that it does not
|
|
|
|
require a capabilities argument. The capabilities are automatically
|
|
|
|
recreated from the options at creation time.
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
NOTE:
|
|
|
|
Chrome has everything included to work out of the box.
|
|
|
|
it does not `need` customizations.
|
|
|
|
any customizations MAY lead to trigger bot migitation systems.
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
_instances = set()
|
|
|
|
session_id = None
|
2021-12-22 07:07:27 -07:00
|
|
|
debug = False
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
def __init__(
|
2023-02-07 17:27:50 -07:00
|
|
|
self,
|
|
|
|
options=None,
|
|
|
|
user_data_dir=None,
|
|
|
|
driver_executable_path=None,
|
|
|
|
browser_executable_path=None,
|
|
|
|
port=0,
|
|
|
|
enable_cdp_events=False,
|
|
|
|
service_args=None,
|
|
|
|
service_creationflags=None,
|
|
|
|
desired_capabilities=None,
|
|
|
|
advanced_elements=False,
|
|
|
|
service_log_path=None,
|
|
|
|
keep_alive=True,
|
|
|
|
log_level=0,
|
|
|
|
headless=False,
|
|
|
|
version_main=None,
|
|
|
|
patcher_force_close=False,
|
|
|
|
suppress_welcome=True,
|
|
|
|
use_subprocess=True,
|
|
|
|
debug=False,
|
|
|
|
no_sandbox=True,
|
2023-05-09 14:08:53 -06:00
|
|
|
user_multi_procs: bool = False,
|
2023-02-07 17:27:50 -07:00
|
|
|
**kw,
|
|
|
|
):
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
Creates a new instance of the chrome driver.
|
|
|
|
|
|
|
|
Starts the service and then creates new instance of chrome driver.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2021-12-22 07:07:27 -07:00
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
options: ChromeOptions, optional, default: None - automatic useful defaults
|
|
|
|
this takes an instance of ChromeOptions, mainly to customize browser behavior.
|
|
|
|
anything other dan the default, for example extensions or startup options
|
|
|
|
are not supported in case of failure, and can probably lowers your undetectability.
|
|
|
|
|
|
|
|
|
2021-12-21 09:31:04 -07:00
|
|
|
user_data_dir: str , optional, default: None (creates temp profile)
|
|
|
|
if user_data_dir is a path to a valid chrome profile directory, use it,
|
|
|
|
and turn off automatic removal mechanism at exit.
|
2021-12-22 07:07:27 -07:00
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
driver_executable_path: str, optional, default: None(=downloads and patches new binary)
|
|
|
|
|
2021-12-21 09:42:09 -07:00
|
|
|
browser_executable_path: str, optional, default: None - use find_chrome_executable
|
2021-12-22 07:07:27 -07:00
|
|
|
Path to the browser executable.
|
2021-12-21 09:42:09 -07:00
|
|
|
If not specified, make sure the executable's folder is in $PATH
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
port: int, optional, default: 0
|
2022-11-29 03:16:45 -07:00
|
|
|
port to be used by the chromedriver executable, this is NOT the debugger port.
|
|
|
|
leave it at 0 unless you know what you are doing.
|
|
|
|
the default value of 0 automatically picks an available port.
|
2022-11-29 10:26:11 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
enable_cdp_events: bool, default: False
|
|
|
|
:: currently for chrome only
|
|
|
|
this enables the handling of wire messages
|
|
|
|
when enabled, you can subscribe to CDP events by using:
|
|
|
|
|
|
|
|
driver.add_cdp_listener("Network.dataReceived", yourcallback)
|
|
|
|
# yourcallback is an callable which accepts exactly 1 dict as parameter
|
|
|
|
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
service_args: list of str, optional, default: None
|
|
|
|
arguments to pass to the driver service
|
|
|
|
|
|
|
|
desired_capabilities: dict, optional, default: None - auto from config
|
|
|
|
Dictionary object with non-browser specific capabilities only, such as "item" or "loggingPref".
|
|
|
|
|
3.1.5r2
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:49:02 -06:00
|
|
|
advanced_elements: bool, optional, default: False
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
makes it easier to recognize elements like you know them from html/browser inspection, especially when working
|
|
|
|
in an interactive environment
|
|
|
|
|
|
|
|
default webelement repr:
|
|
|
|
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
|
|
|
|
|
|
|
advanced webelement repr
|
|
|
|
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
|
|
|
|
|
|
|
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and print them, it does take a little more time.
|
|
|
|
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
service_log_path: str, optional, default: None
|
|
|
|
path to log information from the driver.
|
|
|
|
|
|
|
|
keep_alive: bool, optional, default: True
|
|
|
|
Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
|
|
|
|
|
|
|
|
log_level: int, optional, default: adapts to python global log level
|
|
|
|
|
|
|
|
headless: bool, optional, default: False
|
|
|
|
can also be specified in the options instance.
|
|
|
|
Specify whether you want to use the browser in headless mode.
|
|
|
|
warning: this lowers undetectability and not fully supported.
|
|
|
|
|
|
|
|
version_main: int, optional, default: None (=auto)
|
|
|
|
if you, for god knows whatever reason, use
|
|
|
|
an older version of Chrome. You can specify it's full rounded version number
|
|
|
|
here. Example: 87 for all versions of 87
|
|
|
|
|
|
|
|
patcher_force_close: bool, optional, default: False
|
|
|
|
instructs the patcher to do whatever it can to access the chromedriver binary
|
|
|
|
if the file is locked, it will force shutdown all instances.
|
|
|
|
setting it is not recommended, unless you know the implications and think
|
|
|
|
you might need it.
|
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
suppress_welcome: bool, optional , default: True
|
|
|
|
a "welcome" alert might show up on *nix-like systems asking whether you want to set
|
|
|
|
chrome as your default browser, and if you want to send even more data to google.
|
|
|
|
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
|
|
|
|
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
|
|
|
|
|
2022-10-15 08:18:13 -06:00
|
|
|
use_subprocess: bool, optional , default: True,
|
2021-12-24 07:31:51 -07:00
|
|
|
|
|
|
|
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
|
|
|
|
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
|
|
|
|
program exits or using .quit()
|
2022-10-15 08:18:13 -06:00
|
|
|
you should be knowing what you're doing, and know how python works.
|
2021-12-24 07:31:51 -07:00
|
|
|
|
|
|
|
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
|
|
|
|
--start script--
|
|
|
|
import undetected_chromedriver as uc
|
|
|
|
d = uc.Chrome()
|
|
|
|
d.get('https://somesite/')
|
|
|
|
---end script --
|
|
|
|
|
|
|
|
and will be greeted with an error, since the program exists before chrome has a change to launch.
|
|
|
|
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
|
|
|
|
! setting it to True comes with NO support when being detected. !
|
2022-11-20 14:05:04 -07:00
|
|
|
|
|
|
|
no_sandbox: bool, optional, default=True
|
2022-10-15 08:18:13 -06:00
|
|
|
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
|
|
|
|
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
|
|
|
|
when running as root without using --no-sandbox flag.
|
2023-05-09 14:08:53 -06:00
|
|
|
|
|
|
|
user_multi_procs:
|
|
|
|
set to true when you are using multithreads/multiprocessing
|
|
|
|
ensures not all processes are trying to modify a binary which is in use by another.
|
|
|
|
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
|
2023-05-09 14:18:22 -06:00
|
|
|
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
|
2023-05-09 14:08:53 -06:00
|
|
|
|
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
finalize(self, self._ensure_close, self)
|
2021-12-22 07:07:27 -07:00
|
|
|
self.debug = debug
|
2023-02-05 07:36:31 -07:00
|
|
|
self.patcher = Patcher(
|
2023-02-07 17:27:50 -07:00
|
|
|
executable_path=driver_executable_path,
|
|
|
|
force=patcher_force_close,
|
|
|
|
version_main=version_main,
|
2023-05-09 14:08:53 -06:00
|
|
|
user_multi_procs=user_multi_procs,
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
2023-05-09 14:08:53 -06:00
|
|
|
# self.patcher.auto(user_multiprocess = user_multi_num_procs)
|
2023-02-05 07:36:31 -07:00
|
|
|
self.patcher.auto()
|
2023-05-09 14:08:53 -06:00
|
|
|
|
2023-02-05 07:36:31 -07:00
|
|
|
# self.patcher = patcher
|
2021-12-15 21:53:41 -07:00
|
|
|
if not options:
|
|
|
|
options = ChromeOptions()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
if hasattr(options, "_session") and options._session is not None:
|
2021-12-15 21:53:41 -07:00
|
|
|
# prevent reuse of options,
|
|
|
|
# as it just appends arguments, not replace them
|
|
|
|
# you'll get conflicts starting chrome
|
2023-02-07 17:27:50 -07:00
|
|
|
raise RuntimeError("you cannot reuse the ChromeOptions object")
|
2021-12-15 21:53:41 -07:00
|
|
|
except AttributeError:
|
|
|
|
pass
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
options._session = self
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not options.debugger_address:
|
2022-08-30 05:46:41 -06:00
|
|
|
debug_port = (
|
|
|
|
port
|
|
|
|
if port != 0
|
|
|
|
else selenium.webdriver.common.service.utils.free_port()
|
|
|
|
)
|
2022-06-29 04:07:25 -06:00
|
|
|
debug_host = "127.0.0.1"
|
2023-02-07 17:27:50 -07:00
|
|
|
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
2022-06-29 04:07:25 -06:00
|
|
|
else:
|
2023-02-07 17:27:50 -07:00
|
|
|
debug_host, debug_port = options.debugger_address.split(":")
|
|
|
|
debug_port = int(debug_port)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if enable_cdp_events:
|
2022-08-30 05:46:41 -06:00
|
|
|
options.set_capability(
|
2023-02-07 17:27:50 -07:00
|
|
|
"goog:loggingPrefs", {"performance": "ALL", "browser": "ALL"}
|
|
|
|
)
|
|
|
|
|
|
|
|
options.add_argument("--remote-debugging-host=%s" % debug_host)
|
|
|
|
options.add_argument("--remote-debugging-port=%s" % debug_port)
|
|
|
|
|
2022-03-16 15:47:48 -06:00
|
|
|
if user_data_dir:
|
2023-02-07 17:27:50 -07:00
|
|
|
options.add_argument("--user-data-dir=%s" % user_data_dir)
|
|
|
|
|
|
|
|
language, keep_user_data_dir = None, bool(user_data_dir)
|
|
|
|
|
2021-12-21 09:31:04 -07:00
|
|
|
# see if a custom user profile is specified in options
|
2021-12-15 21:53:41 -07:00
|
|
|
for arg in options.arguments:
|
2023-02-08 09:48:52 -07:00
|
|
|
|
|
|
|
if any([_ in arg for _ in ("--headless", "headless")]):
|
|
|
|
options.arguments.remove(arg)
|
|
|
|
options.headless = True
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if "lang" in arg:
|
2023-02-07 17:27:50 -07:00
|
|
|
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
language = m[1]
|
2021-12-15 21:53:41 -07:00
|
|
|
except IndexError:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("will set the language to en-US,en;q=0.9")
|
2021-12-15 21:53:41 -07:00
|
|
|
language = "en-US,en;q=0.9"
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if "user-data-dir" in arg:
|
2023-02-07 17:27:50 -07:00
|
|
|
m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
user_data_dir = m[1]
|
2022-08-30 05:46:41 -06:00
|
|
|
logger.debug(
|
2023-02-07 17:27:50 -07:00
|
|
|
"user-data-dir found in user argument %s => %s" % (arg, m[1])
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
keep_user_data_dir = True
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
except IndexError:
|
|
|
|
logger.debug(
|
2022-08-30 05:46:41 -06:00
|
|
|
"no user data dir could be extracted from supplied argument %s "
|
|
|
|
% arg
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not user_data_dir:
|
2021-12-22 07:07:27 -07:00
|
|
|
# backward compatiblity
|
|
|
|
# check if an old uc.ChromeOptions is used, and extract the user data dir
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
if hasattr(options, "user_data_dir") and getattr(
|
|
|
|
options, "user_data_dir", None
|
|
|
|
):
|
2021-12-22 07:07:27 -07:00
|
|
|
import warnings
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
warnings.warn(
|
|
|
|
"using ChromeOptions.user_data_dir might stop working in future versions."
|
|
|
|
"use uc.Chrome(user_data_dir='/xyz/some/data') in case you need existing profile folder"
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
options.add_argument("--user-data-dir=%s" % options.user_data_dir)
|
2021-12-15 21:53:41 -07:00
|
|
|
keep_user_data_dir = True
|
2022-08-30 05:46:41 -06:00
|
|
|
logger.debug(
|
|
|
|
"user_data_dir property found in options object: %s" % user_data_dir
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
else:
|
2023-02-07 17:27:50 -07:00
|
|
|
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
2021-12-15 21:53:41 -07:00
|
|
|
keep_user_data_dir = False
|
|
|
|
arg = "--user-data-dir=%s" % user_data_dir
|
2023-02-07 17:27:50 -07:00
|
|
|
options.add_argument(arg)
|
2021-12-15 21:53:41 -07:00
|
|
|
logger.debug(
|
|
|
|
"created a temporary folder in which the user-data (profile) will be stored during this\n"
|
|
|
|
"session, and added it to chrome startup arguments: %s" % arg
|
2023-02-07 17:27:50 -07:00
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not language:
|
|
|
|
try:
|
|
|
|
import locale
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
language = locale.getdefaultlocale()[0].replace("_", "-")
|
2021-12-15 21:53:41 -07:00
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
if not language:
|
|
|
|
language = "en-US"
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
options.add_argument("--lang=%s" % language)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not options.binary_location:
|
2022-08-30 05:46:41 -06:00
|
|
|
options.binary_location = (
|
2023-02-07 17:27:50 -07:00
|
|
|
browser_executable_path or find_chrome_executable()
|
2022-08-30 05:46:41 -06:00
|
|
|
)
|
2023-02-08 09:31:44 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
self._delay = 3
|
2023-02-08 09:31:44 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.user_data_dir = user_data_dir
|
|
|
|
self.keep_user_data_dir = keep_user_data_dir
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
if suppress_welcome:
|
2023-02-07 17:27:50 -07:00
|
|
|
options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
|
2022-10-15 08:18:13 -06:00
|
|
|
if no_sandbox:
|
2023-02-07 17:27:50 -07:00
|
|
|
options.arguments.extend(["--no-sandbox", "--test-type"])
|
2023-02-08 09:48:52 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if headless or options.headless:
|
2023-06-02 15:04:09 -06:00
|
|
|
#workaround until a better checking is found
|
|
|
|
options.add_argument("--headless=new")
|
|
|
|
#if self.patcher.version_main < 108:
|
|
|
|
# options.add_argument("--headless=chrome")
|
|
|
|
#elif self.patcher.version_main >= 108:
|
|
|
|
|
2023-02-08 09:48:52 -07:00
|
|
|
|
|
|
|
options.add_argument("--window-size=1920,1080")
|
|
|
|
options.add_argument("--start-maximized")
|
|
|
|
options.add_argument("--no-sandbox")
|
|
|
|
# fixes "could not connect to chrome" error when running
|
|
|
|
# on linux using privileged user like root (which i don't recommend)
|
2023-02-08 09:31:44 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
options.add_argument(
|
2022-08-30 05:46:41 -06:00
|
|
|
"--log-level=%d" % log_level
|
2023-02-07 17:27:50 -07:00
|
|
|
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
|
|
|
|
)
|
|
|
|
|
|
|
|
if hasattr(options, "handle_prefs"):
|
|
|
|
options.handle_prefs(user_data_dir)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
# fix exit_type flag to prevent tab-restore nag
|
|
|
|
try:
|
|
|
|
with open(
|
2023-02-07 17:27:50 -07:00
|
|
|
os.path.join(user_data_dir, "Default/Preferences"),
|
|
|
|
encoding="latin1",
|
|
|
|
mode="r+",
|
|
|
|
) as fs:
|
|
|
|
config = json.load(fs)
|
|
|
|
if config["profile"]["exit_type"] is not None:
|
2021-12-15 21:53:41 -07:00
|
|
|
# fixing the restore-tabs-nag
|
2023-02-07 17:27:50 -07:00
|
|
|
config["profile"]["exit_type"] = None
|
|
|
|
fs.seek(0, 0)
|
|
|
|
json.dump(config, fs)
|
2022-04-04 05:22:28 -06:00
|
|
|
fs.truncate() # the file might be shorter
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("fixed exit_type flag")
|
2021-12-15 21:53:41 -07:00
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("did not find a bad exit_type flag ")
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.options = options
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if not desired_capabilities:
|
|
|
|
desired_capabilities = options.to_capabilities()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-24 07:31:51 -07:00
|
|
|
if not use_subprocess:
|
2022-08-30 05:46:41 -06:00
|
|
|
self.browser_pid = start_detached(
|
2023-02-07 17:27:50 -07:00
|
|
|
options.binary_location, *options.arguments
|
|
|
|
)
|
2021-12-24 07:31:51 -07:00
|
|
|
else:
|
2021-12-23 10:23:25 -07:00
|
|
|
browser = subprocess.Popen(
|
2023-02-07 17:27:50 -07:00
|
|
|
[options.binary_location, *options.arguments],
|
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
close_fds=IS_POSIX,
|
|
|
|
)
|
2021-12-23 10:23:25 -07:00
|
|
|
self.browser_pid = browser.pid
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2022-08-30 05:46:41 -06:00
|
|
|
if service_creationflags:
|
2022-11-28 15:40:41 -07:00
|
|
|
service = selenium.webdriver.common.service.Service(
|
2023-02-07 17:27:50 -07:00
|
|
|
self.patcher.executable_path, port, service_args, service_log_path
|
|
|
|
)
|
|
|
|
for attr_name in ("creationflags", "creation_flags"):
|
|
|
|
if hasattr(service, attr_name):
|
|
|
|
setattr(service, attr_name, service_creationflags)
|
2022-11-20 14:05:04 -07:00
|
|
|
break
|
2022-08-30 05:46:41 -06:00
|
|
|
else:
|
|
|
|
service = None
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
super(Chrome, self).__init__(
|
|
|
|
executable_path=self.patcher.executable_path,
|
|
|
|
port=port,
|
|
|
|
options=options,
|
|
|
|
service_args=service_args,
|
|
|
|
desired_capabilities=desired_capabilities,
|
|
|
|
service_log_path=service_log_path,
|
|
|
|
keep_alive=keep_alive,
|
|
|
|
service=service, # needed or the service will be re-created
|
|
|
|
)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.reactor = None
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-22 07:07:27 -07:00
|
|
|
if enable_cdp_events:
|
2021-12-15 21:53:41 -07:00
|
|
|
if logging.getLogger().getEffectiveLevel() == logging.DEBUG:
|
2022-08-30 05:46:41 -06:00
|
|
|
logging.getLogger(
|
|
|
|
"selenium.webdriver.remote.remote_connection"
|
2023-02-07 17:27:50 -07:00
|
|
|
).setLevel(20)
|
|
|
|
reactor = Reactor(self)
|
2021-12-15 21:53:41 -07:00
|
|
|
reactor.start()
|
|
|
|
self.reactor = reactor
|
2023-02-07 17:27:50 -07:00
|
|
|
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
if advanced_elements:
|
2022-11-28 15:40:41 -07:00
|
|
|
self._web_element_cls = UCWebElement
|
|
|
|
else:
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
self._web_element_cls = WebElement
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
if options.headless:
|
|
|
|
self._configure_headless()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def _configure_headless(self):
|
2021-12-15 21:53:41 -07:00
|
|
|
orig_get = self.get
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.info("setting properties for headless")
|
|
|
|
|
|
|
|
def get_wrapped(*args, **kwargs):
|
|
|
|
if self.execute_script("return navigator.webdriver"):
|
|
|
|
logger.info("patch navigator.webdriver")
|
2021-12-15 21:53:41 -07:00
|
|
|
self.execute_cdp_cmd(
|
2023-02-07 17:27:50 -07:00
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
2021-12-15 21:53:41 -07:00
|
|
|
{
|
|
|
|
"source": """
|
|
|
|
|
2023-02-05 10:37:28 -07:00
|
|
|
Object.defineProperty(window, "navigator", {
|
|
|
|
Object.defineProperty(window, "navigator", {
|
|
|
|
value: new Proxy(navigator, {
|
|
|
|
has: (target, key) => (key === "webdriver" ? false : key in target),
|
|
|
|
get: (target, key) =>
|
|
|
|
key === "webdriver"
|
|
|
|
? false
|
|
|
|
: typeof target[key] === "function"
|
|
|
|
? target[key].bind(target)
|
|
|
|
: target[key],
|
|
|
|
}),
|
|
|
|
});
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
logger.info("patch user-agent string")
|
2021-12-15 21:53:41 -07:00
|
|
|
self.execute_cdp_cmd(
|
2023-02-07 17:27:50 -07:00
|
|
|
"Network.setUserAgentOverride",
|
2021-12-15 21:53:41 -07:00
|
|
|
{
|
2022-08-30 05:46:41 -06:00
|
|
|
"userAgent": self.execute_script(
|
|
|
|
"return navigator.userAgent"
|
2023-02-07 17:27:50 -07:00
|
|
|
).replace("Headless", "")
|
|
|
|
},
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
self.execute_cdp_cmd(
|
2023-02-07 17:27:50 -07:00
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
2021-12-15 21:53:41 -07:00
|
|
|
{
|
|
|
|
"source": """
|
2022-04-04 05:20:25 -06:00
|
|
|
Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
|
|
|
|
Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
|
|
|
|
|
|
|
|
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
|
|
|
|
window.chrome = {
|
|
|
|
app: {
|
|
|
|
isInstalled: false,
|
|
|
|
InstallState: {
|
|
|
|
DISABLED: 'disabled',
|
|
|
|
INSTALLED: 'installed',
|
|
|
|
NOT_INSTALLED: 'not_installed'
|
|
|
|
},
|
|
|
|
RunningState: {
|
|
|
|
CANNOT_RUN: 'cannot_run',
|
|
|
|
READY_TO_RUN: 'ready_to_run',
|
|
|
|
RUNNING: 'running'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
runtime: {
|
|
|
|
OnInstalledReason: {
|
|
|
|
CHROME_UPDATE: 'chrome_update',
|
|
|
|
INSTALL: 'install',
|
|
|
|
SHARED_MODULE_UPDATE: 'shared_module_update',
|
|
|
|
UPDATE: 'update'
|
|
|
|
},
|
|
|
|
OnRestartRequiredReason: {
|
|
|
|
APP_UPDATE: 'app_update',
|
|
|
|
OS_UPDATE: 'os_update',
|
|
|
|
PERIODIC: 'periodic'
|
|
|
|
},
|
|
|
|
PlatformArch: {
|
|
|
|
ARM: 'arm',
|
|
|
|
ARM64: 'arm64',
|
|
|
|
MIPS: 'mips',
|
|
|
|
MIPS64: 'mips64',
|
|
|
|
X86_32: 'x86-32',
|
|
|
|
X86_64: 'x86-64'
|
|
|
|
},
|
|
|
|
PlatformNaclArch: {
|
|
|
|
ARM: 'arm',
|
|
|
|
MIPS: 'mips',
|
|
|
|
MIPS64: 'mips64',
|
|
|
|
X86_32: 'x86-32',
|
|
|
|
X86_64: 'x86-64'
|
|
|
|
},
|
|
|
|
PlatformOs: {
|
|
|
|
ANDROID: 'android',
|
|
|
|
CROS: 'cros',
|
|
|
|
LINUX: 'linux',
|
|
|
|
MAC: 'mac',
|
|
|
|
OPENBSD: 'openbsd',
|
|
|
|
WIN: 'win'
|
|
|
|
},
|
|
|
|
RequestUpdateCheckStatus: {
|
|
|
|
NO_UPDATE: 'no_update',
|
|
|
|
THROTTLED: 'throttled',
|
|
|
|
UPDATE_AVAILABLE: 'update_available'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
|
|
|
|
if (!window.Notification) {
|
|
|
|
window.Notification = {
|
|
|
|
permission: 'denied'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const originalQuery = window.navigator.permissions.query
|
|
|
|
window.navigator.permissions.__proto__.query = parameters =>
|
|
|
|
parameters.name === 'notifications'
|
|
|
|
? Promise.resolve({ state: window.Notification.permission })
|
|
|
|
: originalQuery(parameters)
|
|
|
|
|
|
|
|
const oldCall = Function.prototype.call
|
|
|
|
function call() {
|
|
|
|
return oldCall.apply(this, arguments)
|
|
|
|
}
|
|
|
|
Function.prototype.call = call
|
|
|
|
|
|
|
|
const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
|
|
|
|
const oldToString = Function.prototype.toString
|
|
|
|
|
|
|
|
function functionToString() {
|
|
|
|
if (this === window.navigator.permissions.query) {
|
|
|
|
return 'function query() { [native code] }'
|
|
|
|
}
|
|
|
|
if (this === functionToString) {
|
|
|
|
return nativeToStringFunctionString
|
|
|
|
}
|
|
|
|
return oldCall.call(oldToString, this)
|
|
|
|
}
|
|
|
|
// eslint-disable-next-line
|
|
|
|
Function.prototype.toString = functionToString
|
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
},
|
|
|
|
)
|
|
|
|
return orig_get(*args, **kwargs)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
self.get = get_wrapped
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2023-02-04 14:02:46 -07:00
|
|
|
# def _get_cdc_props(self):
|
|
|
|
# return self.execute_script(
|
|
|
|
# """
|
|
|
|
# let objectToInspect = window,
|
|
|
|
# result = [];
|
|
|
|
# while(objectToInspect !== null)
|
|
|
|
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
|
|
|
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
|
|
|
#
|
|
|
|
# return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
|
|
|
|
# """
|
|
|
|
# )
|
|
|
|
#
|
|
|
|
# def _hook_remove_cdc_props(self):
|
|
|
|
# self.execute_cdp_cmd(
|
|
|
|
# "Page.addScriptToEvaluateOnNewDocument",
|
|
|
|
# {
|
|
|
|
# "source": """
|
|
|
|
# let objectToInspect = window,
|
|
|
|
# result = [];
|
|
|
|
# while(objectToInspect !== null)
|
|
|
|
# { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
|
|
|
|
# objectToInspect = Object.getPrototypeOf(objectToInspect); }
|
|
|
|
# result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
|
|
|
|
# &&delete window[p]&&console.log('removed',p))
|
|
|
|
# """
|
|
|
|
# },
|
|
|
|
# )
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def get(self, url):
|
2023-02-04 14:02:46 -07:00
|
|
|
# if self._get_cdc_props():
|
|
|
|
# self._hook_remove_cdc_props()
|
2023-02-07 17:27:50 -07:00
|
|
|
return super().get(url)
|
|
|
|
|
|
|
|
def add_cdp_listener(self, event_name, callback):
|
2022-08-30 05:46:41 -06:00
|
|
|
if (
|
2023-02-07 17:27:50 -07:00
|
|
|
self.reactor
|
|
|
|
and self.reactor is not None
|
|
|
|
and isinstance(self.reactor, Reactor)
|
2022-08-30 05:46:41 -06:00
|
|
|
):
|
2023-02-07 17:27:50 -07:00
|
|
|
self.reactor.add_event_handler(event_name, callback)
|
2021-12-15 21:53:41 -07:00
|
|
|
return self.reactor.handlers
|
|
|
|
return False
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def clear_cdp_listeners(self):
|
|
|
|
if self.reactor and isinstance(self.reactor, Reactor):
|
2021-12-15 21:53:41 -07:00
|
|
|
self.reactor.handlers.clear()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def window_new(self):
|
2022-07-17 03:18:24 -06:00
|
|
|
self.execute(
|
2023-02-07 17:27:50 -07:00
|
|
|
selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
|
|
|
|
)
|
|
|
|
|
|
|
|
def tab_new(self, url: str):
|
2021-12-15 21:53:41 -07:00
|
|
|
"""
|
|
|
|
this opens a url in a new tab.
|
|
|
|
apparently, that passes all tests directly!
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
url
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
|
|
|
|
"""
|
2023-02-07 17:27:50 -07:00
|
|
|
if not hasattr(self, "cdp"):
|
2021-12-15 21:53:41 -07:00
|
|
|
from .cdp import CDP
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
cdp = CDP(self.options)
|
|
|
|
cdp.tab_new(url)
|
|
|
|
|
|
|
|
def reconnect(self, timeout=0.1):
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
|
|
|
self.service.stop()
|
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug(e)
|
|
|
|
time.sleep(timeout)
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
|
|
|
self.service.start()
|
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug(e)
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
|
|
|
self.start_session()
|
|
|
|
except Exception as e:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug(e)
|
|
|
|
|
|
|
|
def start_session(self, capabilities=None, browser_profile=None):
|
2021-12-15 21:53:41 -07:00
|
|
|
if not capabilities:
|
|
|
|
capabilities = self.options.to_capabilities()
|
2023-02-07 17:27:50 -07:00
|
|
|
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
|
|
|
|
capabilities, browser_profile
|
|
|
|
)
|
2021-12-22 07:07:27 -07:00
|
|
|
# super(Chrome, self).start_session(capabilities, browser_profile)
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def quit(self):
|
2022-11-28 15:40:41 -07:00
|
|
|
try:
|
2021-12-23 10:23:25 -07:00
|
|
|
self.service.process.kill()
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("webdriver process ended")
|
|
|
|
except (AttributeError, RuntimeError, OSError):
|
2022-11-28 15:40:41 -07:00
|
|
|
pass
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2022-11-28 15:40:41 -07:00
|
|
|
self.reactor.event.set()
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("shutting down reactor")
|
2022-11-28 15:40:41 -07:00
|
|
|
except AttributeError:
|
2021-12-15 21:53:41 -07:00
|
|
|
pass
|
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
os.kill(self.browser_pid, 15)
|
|
|
|
logger.debug("gracefully closed browser")
|
2022-11-28 15:40:41 -07:00
|
|
|
except Exception as e: # noqa
|
2023-06-02 14:34:33 -06:00
|
|
|
pass
|
2021-12-15 21:53:41 -07:00
|
|
|
if (
|
2023-02-07 17:27:50 -07:00
|
|
|
hasattr(self, "keep_user_data_dir")
|
|
|
|
and hasattr(self, "user_data_dir")
|
|
|
|
and not self.keep_user_data_dir
|
2021-12-15 21:53:41 -07:00
|
|
|
):
|
2023-02-07 17:27:50 -07:00
|
|
|
for _ in range(5):
|
2021-12-15 21:53:41 -07:00
|
|
|
try:
|
2023-02-07 17:27:50 -07:00
|
|
|
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
2021-12-15 21:53:41 -07:00
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
2023-02-07 17:27:50 -07:00
|
|
|
except (RuntimeError, OSError, PermissionError) as e:
|
2021-12-15 21:53:41 -07:00
|
|
|
logger.debug(
|
2021-12-22 07:07:27 -07:00
|
|
|
"When removing the temp profile, a %s occured: %s\nretrying..."
|
2023-02-07 17:27:50 -07:00
|
|
|
% (e.__class__.__name__, e)
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
else:
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.debug("successfully removed %s" % self.user_data_dir)
|
2021-12-15 21:53:41 -07:00
|
|
|
break
|
2023-02-07 17:27:50 -07:00
|
|
|
time.sleep(0.1)
|
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
# dereference patcher, so patcher can start cleaning up as well.
|
|
|
|
# this must come last, otherwise it will throw 'in use' errors
|
|
|
|
self.patcher = None
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __getattribute__(self, item):
|
|
|
|
if not super().__getattribute__("debug"):
|
|
|
|
return super().__getattribute__(item)
|
2022-11-29 10:26:11 -07:00
|
|
|
else:
|
|
|
|
import inspect
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
original = super().__getattribute__(item)
|
|
|
|
if inspect.ismethod(original) and not inspect.isclass(original):
|
|
|
|
|
|
|
|
def newfunc(*args, **kwargs):
|
2022-11-29 10:26:11 -07:00
|
|
|
logger.debug(
|
|
|
|
"calling %s with args %s and kwargs %s\n"
|
2023-02-07 17:27:50 -07:00
|
|
|
% (original.__qualname__, args, kwargs)
|
|
|
|
)
|
|
|
|
return original(*args, **kwargs)
|
|
|
|
|
2022-11-29 10:26:11 -07:00
|
|
|
return newfunc
|
|
|
|
return original
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __enter__(self):
|
2021-12-15 21:53:41 -07:00
|
|
|
return self
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
2021-12-15 21:53:41 -07:00
|
|
|
self.service.stop()
|
2023-02-07 17:27:50 -07:00
|
|
|
time.sleep(self._delay)
|
2021-12-15 21:53:41 -07:00
|
|
|
self.service.start()
|
|
|
|
self.start_session()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return hash(self.options.debugger_address)
|
|
|
|
|
|
|
|
def __dir__(self):
|
|
|
|
return object.__dir__(self)
|
|
|
|
|
|
|
|
def __del__(self):
|
2022-11-29 10:26:11 -07:00
|
|
|
try:
|
|
|
|
self.service.process.kill()
|
|
|
|
except: # noqa
|
|
|
|
pass
|
|
|
|
self.quit()
|
2023-02-07 17:27:50 -07:00
|
|
|
|
2022-12-25 17:48:01 -07:00
|
|
|
@classmethod
|
2023-02-07 17:27:50 -07:00
|
|
|
def _ensure_close(cls, self):
|
2022-12-25 17:48:01 -07:00
|
|
|
# needs to be a classmethod so finalize can find the reference
|
2023-02-07 17:27:50 -07:00
|
|
|
logger.info("ensuring close")
|
2022-12-25 17:48:01 -07:00
|
|
|
if (
|
2023-02-07 17:27:50 -07:00
|
|
|
hasattr(self, "service")
|
|
|
|
and hasattr(self.service, "process")
|
|
|
|
and hasattr(self.service.process, "kill")
|
2022-12-25 17:48:01 -07:00
|
|
|
):
|
|
|
|
self.service.process.kill()
|
|
|
|
|
2021-12-15 21:53:41 -07:00
|
|
|
|
|
|
|
def find_chrome_executable():
|
|
|
|
"""
|
|
|
|
Finds the chrome, chrome beta, chrome canary, chromium executable
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
executable_path : str
|
|
|
|
the full file path to found executable
|
|
|
|
|
|
|
|
"""
|
|
|
|
candidates = set()
|
|
|
|
if IS_POSIX:
|
2023-02-07 17:27:50 -07:00
|
|
|
for item in os.environ.get("PATH").split(os.pathsep):
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
for subitem in (
|
2023-02-07 17:27:50 -07:00
|
|
|
"google-chrome",
|
|
|
|
"chromium",
|
|
|
|
"chromium-browser",
|
|
|
|
"chrome",
|
|
|
|
"google-chrome-stable",
|
|
|
|
):
|
|
|
|
candidates.add(os.sep.join((item, subitem)))
|
2021-12-15 21:53:41 -07:00
|
|
|
if "darwin" in sys.platform:
|
|
|
|
candidates.update(
|
2022-03-13 17:37:12 -06:00
|
|
|
[
|
2023-02-07 17:27:50 -07:00
|
|
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
|
|
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
|
|
]
|
|
|
|
)
|
2021-12-15 21:53:41 -07:00
|
|
|
else:
|
2022-08-30 05:46:41 -06:00
|
|
|
for item in map(
|
2023-02-07 17:27:50 -07:00
|
|
|
os.environ.get,
|
|
|
|
("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
|
|
|
|
):
|
2022-03-18 09:11:55 -06:00
|
|
|
if item is not None:
|
|
|
|
for subitem in (
|
2023-02-07 17:27:50 -07:00
|
|
|
"Google/Chrome/Application",
|
|
|
|
"Google/Chrome Beta/Application",
|
|
|
|
"Google/Chrome Canary/Application",
|
|
|
|
):
|
|
|
|
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
2021-12-15 21:53:41 -07:00
|
|
|
for candidate in candidates:
|
2023-06-02 14:44:05 -06:00
|
|
|
logger.debug('checking if %s exists and is executable' % candidate)
|
2023-02-07 17:27:50 -07:00
|
|
|
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
2023-06-02 14:42:43 -06:00
|
|
|
logger.debug('found! using %s' % candidate)
|
2023-02-07 17:27:50 -07:00
|
|
|
return os.path.normpath(candidate)
|