2021-01-16 22:11:11 -07:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# this module is part of undetected_chromedriver
|
|
|
|
|
|
|
|
"""
|
|
|
|
V2 beta
|
|
|
|
|
|
|
|
whats new:
|
|
|
|
|
|
|
|
- currently this v2 module will be available as option.
|
|
|
|
to use it / test it, you need to alter your imports by appending .v2
|
|
|
|
|
|
|
|
- headless mode not (yet) supported in v2
|
|
|
|
|
|
|
|
example:
|
|
|
|
|
|
|
|
```python
|
|
|
|
import undetected_chromedriver.v2 as uc
|
|
|
|
driver = uc.Chrome()
|
|
|
|
driver.get('https://somewebsite.xyz')
|
|
|
|
|
|
|
|
# if site is protected by hCaptcha/Cloudflare
|
|
|
|
driver.get_in('https://cloudflareprotectedsite.xyz')
|
|
|
|
|
|
|
|
# if site is protected by hCaptcha/Cloudflare
|
|
|
|
# (different syntax, same function)
|
|
|
|
with driver:
|
|
|
|
driver.get('https://cloudflareprotectedsite.xyz')
|
|
|
|
```
|
|
|
|
|
|
|
|
tests/example in ../tests/test_undetected_chromedriver.py
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
import io
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import random
|
|
|
|
import re
|
2021-04-29 04:54:49 -06:00
|
|
|
import shutil
|
2021-01-16 22:11:11 -07:00
|
|
|
import string
|
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
import tempfile
|
|
|
|
import time
|
|
|
|
import zipfile
|
|
|
|
from distutils.version import LooseVersion
|
|
|
|
from urllib.request import urlopen, urlretrieve
|
2021-04-27 12:19:51 -06:00
|
|
|
|
2021-01-16 22:11:11 -07:00
|
|
|
import selenium.webdriver.chrome.service
|
|
|
|
import selenium.webdriver.chrome.webdriver
|
|
|
|
import selenium.webdriver.common.service
|
|
|
|
import selenium.webdriver.remote.webdriver
|
2021-04-27 12:19:51 -06:00
|
|
|
from selenium.webdriver.chrome.options import Options as _ChromeOptions
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-01-17 12:31:22 -07:00
|
|
|
__all__ = ("Chrome", "ChromeOptions", "Patcher", "find_chrome_executable")
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux"))
|
|
|
|
|
|
|
|
logger = logging.getLogger("uc")
|
2021-04-27 12:05:34 -06:00
|
|
|
logger.setLevel(logging.getLogger().getEffectiveLevel())
|
|
|
|
|
|
|
|
|
2021-01-16 22:11:11 -07:00
|
|
|
def find_chrome_executable():
|
|
|
|
"""
|
2021-04-29 04:54:49 -06:00
|
|
|
Finds the chrome, chrome beta, chrome canary, chromium executable
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
executable_path : str
|
|
|
|
the full file path to found executable
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
"""
|
|
|
|
candidates = set()
|
|
|
|
if IS_POSIX:
|
|
|
|
for item in os.environ.get("PATH").split(os.pathsep):
|
|
|
|
for subitem in ("google-chrome", "chromium", "chromium-browser"):
|
|
|
|
candidates.add(os.sep.join((item, subitem)))
|
2021-02-04 04:15:22 -07:00
|
|
|
if "darwin" in sys.platform:
|
|
|
|
candidates.update(
|
|
|
|
["/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"]
|
|
|
|
)
|
2021-01-16 22:11:11 -07:00
|
|
|
else:
|
|
|
|
for item in map(
|
|
|
|
os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA")
|
|
|
|
):
|
|
|
|
for subitem in (
|
|
|
|
"Google/Chrome/Application",
|
|
|
|
"Google/Chrome Beta/Application",
|
|
|
|
"Google/Chrome Canary/Application",
|
|
|
|
):
|
|
|
|
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
|
|
|
|
for candidate in candidates:
|
|
|
|
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
|
|
|
|
return os.path.normpath(candidate)
|
|
|
|
|
|
|
|
|
2021-03-19 22:28:42 -06:00
|
|
|
class Chrome(object):
|
2021-04-29 04:54:49 -06:00
|
|
|
"""
|
|
|
|
Controls the ChromeDriver and allows you to drive the browser.
|
|
|
|
|
|
|
|
The webdriver file will be downloaded by this module automatically,
|
|
|
|
you do not need to specify this. however, you may if you wish.
|
|
|
|
|
|
|
|
|
|
|
|
Attributes
|
|
|
|
----------
|
|
|
|
|
|
|
|
|
|
|
|
Methods
|
|
|
|
-------
|
|
|
|
|
|
|
|
reconnect()
|
|
|
|
|
|
|
|
this can be useful in case of heavy detection methods
|
|
|
|
-stops the chromedriver service which runs in the background
|
|
|
|
-starts the chromedriver service which runs in the background
|
|
|
|
-recreate session
|
|
|
|
|
|
|
|
|
|
|
|
start_session(capabilities=None, browser_profile=None)
|
|
|
|
|
|
|
|
differentiates from the regular method in that it does not
|
|
|
|
require a capabilities argument. The capabilities are automatically
|
|
|
|
recreated from the options at creation time.
|
|
|
|
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
NOTE:
|
2021-04-27 12:19:51 -06:00
|
|
|
Chrome has everything included to work out of the box.
|
|
|
|
it does not `need` customizations.
|
|
|
|
any customizations MAY lead to trigger bot migitation systems.
|
2021-04-29 04:54:49 -06:00
|
|
|
|
|
|
|
--------------------------------------------------------------------------
|
|
|
|
"""
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
_instances = set()
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
2021-04-29 04:54:49 -06:00
|
|
|
executable_path=None,
|
2021-01-16 22:11:11 -07:00
|
|
|
port=0,
|
|
|
|
options=None,
|
|
|
|
service_args=None,
|
|
|
|
desired_capabilities=None,
|
|
|
|
service_log_path=None,
|
|
|
|
keep_alive=True,
|
2021-04-27 12:05:34 -06:00
|
|
|
log_level=0,
|
2021-04-29 04:54:49 -06:00
|
|
|
headless=False,
|
2021-02-01 23:56:13 -07:00
|
|
|
emulate_touch=False,
|
2021-04-29 04:54:49 -06:00
|
|
|
delay=5,
|
2021-04-27 12:19:51 -06:00
|
|
|
):
|
2021-04-29 04:54:49 -06:00
|
|
|
"""
|
|
|
|
Creates a new instance of the chrome driver.
|
2021-02-01 23:56:13 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
Starts the service and then creates new instance of chrome driver.
|
2021-03-19 22:28:42 -06:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
# Parameters
|
|
|
|
# -----------
|
|
|
|
# - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
|
|
|
|
# - port - port you would like the service to run, if left as 0, a free port will be found.
|
|
|
|
# - options - this takes an instance of ChromeOptions
|
|
|
|
# - service_args - List of args to pass to the driver service
|
|
|
|
# - desired_capabilities - Dictionary object with non-browser specific
|
|
|
|
# capabilities only, such as "proxy" or "loggingPref".
|
|
|
|
# - service_log_path - Where to log information from the driver.
|
|
|
|
# - chrome_options - Deprecated argument for options
|
|
|
|
# - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
executable_path: str, optional, default: None - use find_chrome_executable
|
|
|
|
Path to the executable. If the default is used it assumes the executable is in the $PATH
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
port: int, optional, default: 0
|
|
|
|
port you would like the service to run, if left as 0, a free port will be found.
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
options: ChromeOptions, optional, default: None - automatic useful defaults
|
|
|
|
this takes an instance of ChromeOptions, mainly to customize browser behavior.
|
|
|
|
anything other dan the default, for example extensions or startup options
|
|
|
|
are not supported in case of failure, and can probably lowers your undetectability.
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
service_args: list of str, optional, default: None
|
|
|
|
arguments to pass to the driver service
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
desired_capabilities: dict, optional, default: None - auto from config
|
|
|
|
Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref".
|
|
|
|
|
|
|
|
service_log_path: str, optional, default: None
|
|
|
|
path to log information from the driver.
|
|
|
|
|
|
|
|
keep_alive: bool, optional, default: True
|
|
|
|
Whether to configure ChromeRemoteConnection to use HTTP keep-alive.
|
|
|
|
|
|
|
|
log_level: int, optional, default: adapts to python global log level
|
|
|
|
|
|
|
|
headless: bool, optional, default: False
|
|
|
|
can also be specified in the options instance.
|
|
|
|
Specify whether you want to use the browser in headless mode.
|
|
|
|
warning: this lowers undetectability and not fully supported.
|
|
|
|
|
|
|
|
emulate_touch: bool, optional, default: False
|
|
|
|
if set to True, patches window.maxTouchPoints to always return non-zero
|
|
|
|
|
|
|
|
delay: int, optional, default: 5
|
|
|
|
delay in seconds to wait before giving back control.
|
|
|
|
this is used only when using the context manager
|
|
|
|
(`with` statement) to bypass, for example CloudFlare.
|
|
|
|
5 seconds is a foolproof value.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
patcher = Patcher(executable_path=executable_path)
|
|
|
|
patcher.auto()
|
|
|
|
|
|
|
|
debug_port = selenium.webdriver.common.service.utils.free_port()
|
|
|
|
debug_host = "127.0.0.1"
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
if not options:
|
|
|
|
options = selenium.webdriver.chrome.webdriver.Options()
|
2021-04-27 12:05:34 -06:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
# see if a custom user profile is specified
|
2021-04-27 12:05:34 -06:00
|
|
|
for arg in options.arguments:
|
2021-04-27 12:19:51 -06:00
|
|
|
if "user-data-dir" in arg:
|
|
|
|
m = re.search("(?:--)?user-data-dir(?:[ =])?(.*)", arg)
|
2021-04-27 12:05:34 -06:00
|
|
|
try:
|
|
|
|
user_data_dir = m[1]
|
2021-04-27 12:19:51 -06:00
|
|
|
logger.debug(
|
|
|
|
"user-data-dir found in user argument %s => %s" % (arg, m[1])
|
|
|
|
)
|
2021-04-29 04:54:49 -06:00
|
|
|
keep_user_data_dir = True
|
2021-04-27 12:05:34 -06:00
|
|
|
break
|
|
|
|
except IndexError:
|
2021-04-27 12:19:51 -06:00
|
|
|
logger.debug(
|
|
|
|
"no user data dir could be extracted from supplied argument %s "
|
|
|
|
% arg
|
|
|
|
)
|
2021-04-27 12:05:34 -06:00
|
|
|
else:
|
|
|
|
user_data_dir = os.path.normpath(tempfile.mkdtemp())
|
2021-04-29 04:54:49 -06:00
|
|
|
keep_user_data_dir = False
|
|
|
|
options.add_argument("--user-data-dir=%s" % user_data_dir)
|
2021-04-27 12:19:51 -06:00
|
|
|
logger.debug(
|
|
|
|
"created a temporary folder in which the user-data (profile) will be stored during this\n"
|
|
|
|
"session, and added it to chrome startup arguments: %s" % arg
|
|
|
|
)
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
if not options.debugger_address:
|
|
|
|
options.debugger_address = "%s:%d" % (debug_host, debug_port)
|
2021-04-27 12:05:34 -06:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
if not options.binary_location:
|
|
|
|
options.binary_location = find_chrome_executable()
|
2021-03-25 03:23:32 -06:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
self._delay = delay
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
self.user_data_dir = user_data_dir
|
|
|
|
self.keep_user_data_dir = keep_user_data_dir
|
|
|
|
|
|
|
|
if headless or options.headless:
|
|
|
|
options.headless = True
|
|
|
|
options.add_argument("--window-size=1920,1080")
|
|
|
|
options.add_argument("--start-maximized")
|
|
|
|
|
|
|
|
options.add_argument("--remote-debugging-host=%s " % debug_host)
|
|
|
|
options.add_argument("--remote-debugging-port=%s" % debug_port)
|
|
|
|
options.add_argument(
|
2021-04-27 12:19:51 -06:00
|
|
|
"--log-level=%d" % log_level
|
2021-04-29 04:54:49 -06:00
|
|
|
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
|
|
|
|
)
|
|
|
|
|
|
|
|
self.options = options
|
|
|
|
|
|
|
|
# fix exit_type flag to prevent tab-restore nag
|
|
|
|
try:
|
|
|
|
with open(
|
|
|
|
os.path.join(user_data_dir, "Default/Preferences"),
|
|
|
|
encoding="latin1",
|
|
|
|
mode="r+",
|
|
|
|
) as fs:
|
|
|
|
import json
|
|
|
|
|
|
|
|
config = json.load(fs)
|
|
|
|
if config["profile"]["exit_type"] is not None:
|
|
|
|
# fixing the restore-tabs-nag
|
|
|
|
config["profile"]["exit_type"] = None
|
|
|
|
fs.seek(0, 0)
|
|
|
|
fs.write(json.dumps(config, indent=4))
|
|
|
|
logger.debug("fixed exit_type flag")
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug("did not find a bad exit_type flag ")
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
self.browser = subprocess.Popen(
|
2021-04-29 04:54:49 -06:00
|
|
|
[options.binary_location, *options.arguments],
|
2021-01-16 22:11:11 -07:00
|
|
|
stdin=subprocess.PIPE,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
)
|
2021-03-25 03:23:32 -06:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
if not desired_capabilities:
|
|
|
|
desired_capabilities = options.to_capabilities()
|
|
|
|
|
2021-03-19 22:28:42 -06:00
|
|
|
self.webdriver = selenium.webdriver.chrome.webdriver.WebDriver(
|
2021-04-29 04:54:49 -06:00
|
|
|
executable_path=patcher.executable_path,
|
2021-01-16 22:11:11 -07:00
|
|
|
port=port,
|
|
|
|
options=options,
|
|
|
|
service_args=service_args,
|
|
|
|
desired_capabilities=desired_capabilities,
|
|
|
|
service_log_path=service_log_path,
|
|
|
|
keep_alive=keep_alive,
|
|
|
|
)
|
|
|
|
|
2021-02-01 23:56:13 -07:00
|
|
|
if options.headless:
|
2021-04-29 04:54:49 -06:00
|
|
|
if emulate_touch:
|
|
|
|
self.execute_cdp_cmd(
|
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
|
|
|
{
|
|
|
|
"source": """
|
|
|
|
Object.defineProperty(navigator, 'maxTouchPoints', {
|
|
|
|
get: () => 1
|
|
|
|
})"""
|
|
|
|
},
|
|
|
|
)
|
2021-02-01 23:56:13 -07:00
|
|
|
|
2021-03-19 22:28:42 -06:00
|
|
|
orig_get = self.webdriver.get
|
2021-02-01 23:56:13 -07:00
|
|
|
|
2021-03-25 03:23:32 -06:00
|
|
|
logger.info("setting properties for headless")
|
|
|
|
|
2021-02-01 23:56:13 -07:00
|
|
|
def get_wrapped(*args, **kwargs):
|
2021-03-25 03:23:32 -06:00
|
|
|
|
2021-02-01 23:56:13 -07:00
|
|
|
if self.execute_script("return navigator.webdriver"):
|
|
|
|
self.execute_cdp_cmd(
|
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
|
|
|
{
|
|
|
|
"source": """
|
|
|
|
Object.defineProperty(window, 'navigator', {
|
|
|
|
value: new Proxy(navigator, {
|
|
|
|
has: (target, key) => (key === 'webdriver' ? false : key in target),
|
|
|
|
get: (target, key) =>
|
|
|
|
key === 'webdriver'
|
|
|
|
? undefined
|
|
|
|
: typeof target[key] === 'function'
|
|
|
|
? target[key].bind(target)
|
|
|
|
: target[key]
|
|
|
|
})
|
|
|
|
});
|
|
|
|
"""
|
|
|
|
},
|
|
|
|
)
|
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
logger.info("removing headless from user-agent string")
|
|
|
|
|
|
|
|
self.execute_cdp_cmd(
|
|
|
|
"Network.setUserAgentOverride",
|
|
|
|
{
|
|
|
|
"userAgent": self.execute_script(
|
|
|
|
"return navigator.userAgent"
|
|
|
|
).replace("Headless", "")
|
|
|
|
},
|
|
|
|
)
|
|
|
|
self.execute_cdp_cmd(
|
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
|
|
|
{
|
|
|
|
"source": """
|
|
|
|
// fix Notification permission in headless mode
|
|
|
|
Object.defineProperty(Notification, 'permission', { get: () => "default"});
|
|
|
|
"""
|
|
|
|
},
|
|
|
|
)
|
2021-03-25 03:23:32 -06:00
|
|
|
|
|
|
|
if emulate_touch:
|
|
|
|
self.execute_cdp_cmd(
|
|
|
|
"Page.addScriptToEvaluateOnNewDocument",
|
|
|
|
{
|
|
|
|
"source": """
|
|
|
|
Object.defineProperty(navigator, 'maxTouchPoints', {
|
|
|
|
get: () => 1
|
|
|
|
})"""
|
|
|
|
},
|
|
|
|
)
|
2021-02-01 23:56:13 -07:00
|
|
|
return orig_get(*args, **kwargs)
|
|
|
|
|
2021-03-19 22:28:42 -06:00
|
|
|
self.webdriver.get = get_wrapped
|
2021-02-01 23:56:13 -07:00
|
|
|
|
2021-03-19 22:28:42 -06:00
|
|
|
def __getattribute__(self, attr):
|
|
|
|
try:
|
|
|
|
return object.__getattribute__(self, attr)
|
|
|
|
except AttributeError:
|
|
|
|
try:
|
|
|
|
return object.__getattribute__(self.webdriver, attr)
|
|
|
|
except AttributeError:
|
|
|
|
raise
|
|
|
|
|
|
|
|
def __dir__(self):
|
|
|
|
return object.__dir__(self) + object.__dir__(self.webdriver)
|
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
def reconnect(self):
|
|
|
|
try:
|
|
|
|
self.service.stop()
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug(e)
|
|
|
|
|
|
|
|
try:
|
|
|
|
self.service.start()
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug(e)
|
|
|
|
|
|
|
|
try:
|
|
|
|
self.start_session()
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug(e)
|
|
|
|
|
2021-01-16 22:11:11 -07:00
|
|
|
def start_session(self, capabilities=None, browser_profile=None):
|
|
|
|
if not capabilities:
|
|
|
|
capabilities = self.options.to_capabilities()
|
2021-03-19 22:28:42 -06:00
|
|
|
self.webdriver.start_session(capabilities, browser_profile)
|
|
|
|
|
2021-04-27 12:41:18 -06:00
|
|
|
def quit(self):
|
|
|
|
logger.debug("closing webdriver")
|
|
|
|
try:
|
|
|
|
self.webdriver.quit()
|
|
|
|
except Exception: # noqa
|
|
|
|
pass
|
|
|
|
try:
|
|
|
|
logger.debug("killing browser")
|
|
|
|
self.browser.kill()
|
|
|
|
self.browser.wait(1)
|
|
|
|
except TimeoutError as e:
|
|
|
|
logger.debug(e, exc_info=True)
|
|
|
|
except Exception: # noqa
|
|
|
|
pass
|
|
|
|
if not self.keep_user_data_dir or self.keep_user_data_dir is False:
|
|
|
|
for _ in range(3):
|
|
|
|
try:
|
|
|
|
logger.debug("removing profile : %s" % self.user_data_dir)
|
|
|
|
shutil.rmtree(self.user_data_dir, ignore_errors=False)
|
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
except PermissionError:
|
2021-04-29 04:54:49 -06:00
|
|
|
logger.debug(
|
|
|
|
"permission error. files are still in use/locked. retying..."
|
|
|
|
)
|
2021-04-27 12:41:18 -06:00
|
|
|
else:
|
|
|
|
break
|
|
|
|
time.sleep(1)
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
def __del__(self):
|
|
|
|
self.quit()
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
2021-02-04 03:57:45 -07:00
|
|
|
self.service.stop()
|
2021-04-29 04:54:49 -06:00
|
|
|
time.sleep(self._delay)
|
2021-02-04 03:57:45 -07:00
|
|
|
self.service.start()
|
2021-02-04 04:55:44 -07:00
|
|
|
self.start_session()
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
return hash(self.options.debugger_address)
|
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
def find_elements_by_text(self, text: str):
|
|
|
|
for elem in self.find_elements_by_css_selector("*"):
|
|
|
|
try:
|
|
|
|
if text.lower() in elem.text.lower():
|
|
|
|
yield elem
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug("find_elements_by_text: %s" % e)
|
|
|
|
|
|
|
|
def find_element_by_text(self, text: str):
|
|
|
|
for elem in self.find_elements_by_css_selector("*"):
|
|
|
|
try:
|
|
|
|
if text.lower() in elem.text.lower():
|
|
|
|
return elem
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug("find_elements_by_text: %s" % e)
|
|
|
|
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
class Patcher(object):
|
|
|
|
url_repo = "https://chromedriver.storage.googleapis.com"
|
2021-04-27 12:05:34 -06:00
|
|
|
zip_name = "chromedriver_%s.zip"
|
|
|
|
exe_name = "chromedriver%s"
|
|
|
|
|
|
|
|
platform = sys.platform
|
|
|
|
if platform.endswith("win32"):
|
|
|
|
zip_name %= "win32"
|
|
|
|
exe_name %= ".exe"
|
|
|
|
if platform.endswith("linux"):
|
|
|
|
zip_name %= "linux64"
|
|
|
|
exe_name %= ""
|
|
|
|
if platform.endswith("darwin"):
|
|
|
|
zip_name %= "mac64"
|
|
|
|
exe_name %= ""
|
|
|
|
|
|
|
|
if platform.endswith("win32"):
|
|
|
|
d = "~/appdata/roaming/undetected_chromedriver"
|
|
|
|
elif platform.startswith("linux"):
|
|
|
|
d = "~/.local/share/undetected_chromedriver"
|
|
|
|
elif platform.endswith("darwin"):
|
|
|
|
d = "~/Library/Application Support/undetected_chromedriver"
|
|
|
|
else:
|
|
|
|
d = "~/.undetected_chromedriver"
|
|
|
|
data_path = os.path.abspath(os.path.expanduser(d))
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
def __init__(self, executable_path=None, force=False, version_main: int = 0):
|
|
|
|
"""
|
|
|
|
|
|
|
|
Args:
|
|
|
|
executable_path: None = automatic
|
|
|
|
a full file path to the chromedriver executable
|
|
|
|
force: False
|
|
|
|
terminate processes which are holding lock
|
|
|
|
version_main: 0 = auto
|
|
|
|
specify main chrome version (rounded, ex: 82)
|
|
|
|
"""
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
self.force = force
|
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
if not executable_path:
|
|
|
|
executable_path = os.path.join(self.data_path, self.exe_name)
|
|
|
|
|
|
|
|
if not IS_POSIX:
|
|
|
|
if not executable_path[-4:] == ".exe":
|
|
|
|
executable_path += ".exe"
|
|
|
|
|
2021-04-27 12:19:51 -06:00
|
|
|
self.zip_path = os.path.join(self.data_path, self.zip_name)
|
2021-04-27 12:05:34 -06:00
|
|
|
|
2021-04-27 12:19:51 -06:00
|
|
|
self.executable_path = os.path.abspath(os.path.join(".", executable_path))
|
2021-04-27 12:05:34 -06:00
|
|
|
|
2021-01-16 22:11:11 -07:00
|
|
|
self.version_main = version_main
|
|
|
|
self.version_full = None
|
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
@classmethod
|
2021-04-29 04:54:49 -06:00
|
|
|
def auto(cls, executable_path=None, force=False):
|
2021-04-27 12:05:34 -06:00
|
|
|
"""
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
Args:
|
|
|
|
force:
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
|
|
|
i = cls(executable_path, force=force)
|
|
|
|
try:
|
|
|
|
os.unlink(i.executable_path)
|
|
|
|
except PermissionError:
|
|
|
|
if i.force:
|
|
|
|
cls.force_kill_instances(i.executable_path)
|
|
|
|
return i.auto(force=False)
|
|
|
|
try:
|
|
|
|
if i.is_binary_patched():
|
|
|
|
# assumes already running AND patched
|
|
|
|
return True
|
|
|
|
except PermissionError:
|
|
|
|
pass
|
|
|
|
# return False
|
2021-01-16 22:11:11 -07:00
|
|
|
except FileNotFoundError:
|
|
|
|
pass
|
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
release = i.fetch_release_number()
|
|
|
|
i.version_main = release.version[0]
|
|
|
|
i.version_full = release
|
|
|
|
i.unzip_package(i.fetch_package())
|
|
|
|
i.patch()
|
|
|
|
return i
|
|
|
|
|
|
|
|
def patch(self):
|
2021-01-16 22:11:11 -07:00
|
|
|
self.patch_exe()
|
2021-04-27 12:05:34 -06:00
|
|
|
return self.is_binary_patched()
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
def fetch_release_number(self):
|
|
|
|
"""
|
|
|
|
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
|
|
|
|
:return: version string
|
|
|
|
:rtype: LooseVersion
|
|
|
|
"""
|
2021-02-04 04:15:22 -07:00
|
|
|
path = "/latest_release"
|
|
|
|
if self.version_main:
|
|
|
|
path += f"_{self.version_main}"
|
|
|
|
path = path.upper()
|
2021-01-16 22:11:11 -07:00
|
|
|
logger.debug("getting release number from %s" % path)
|
|
|
|
return LooseVersion(urlopen(self.url_repo + path).read().decode())
|
|
|
|
|
|
|
|
def parse_exe_version(self):
|
2021-04-27 12:05:34 -06:00
|
|
|
with io.open(self.executable_path, "rb") as f:
|
2021-01-16 22:11:11 -07:00
|
|
|
for line in iter(lambda: f.readline(), b""):
|
2021-04-27 12:19:51 -06:00
|
|
|
match = re.search(br"platform_handle\x00content\x00([0-9.]*)", line)
|
2021-01-16 22:11:11 -07:00
|
|
|
if match:
|
|
|
|
return LooseVersion(match[1].decode())
|
|
|
|
|
|
|
|
def fetch_package(self):
|
|
|
|
"""
|
|
|
|
Downloads ChromeDriver from source
|
|
|
|
|
|
|
|
:return: path to downloaded file
|
|
|
|
"""
|
2021-04-27 12:05:34 -06:00
|
|
|
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name)
|
2021-01-16 22:11:11 -07:00
|
|
|
logger.debug("downloading from %s" % u)
|
2021-04-27 12:05:34 -06:00
|
|
|
# return urlretrieve(u, filename=self.data_path)[0]
|
|
|
|
return urlretrieve(u)[0]
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
def unzip_package(self, fp):
|
2021-01-16 22:11:11 -07:00
|
|
|
"""
|
|
|
|
Does what it says
|
|
|
|
|
|
|
|
:return: path to unpacked executable
|
|
|
|
"""
|
2021-04-27 12:05:34 -06:00
|
|
|
logger.debug("unzipping %s" % fp)
|
2021-01-17 12:31:22 -07:00
|
|
|
try:
|
2021-04-27 12:05:34 -06:00
|
|
|
os.unlink(self.zip_path)
|
|
|
|
except (FileNotFoundError, OSError):
|
2021-01-17 12:31:22 -07:00
|
|
|
pass
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-27 12:19:51 -06:00
|
|
|
os.makedirs(self.data_path, mode=0o755, exist_ok=True)
|
2021-01-16 22:11:11 -07:00
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
with zipfile.ZipFile(fp, mode="r") as zf:
|
|
|
|
zf.extract(self.exe_name, os.path.dirname(self.executable_path))
|
|
|
|
os.remove(fp)
|
|
|
|
os.chmod(self.executable_path, 0o755)
|
|
|
|
return self.executable_path
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def force_kill_instances(exe_name):
|
2021-01-16 22:11:11 -07:00
|
|
|
"""
|
|
|
|
kills running instances.
|
2021-04-27 12:19:51 -06:00
|
|
|
:param: executable name to kill, may be a path as well
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
:return: True on success else False
|
|
|
|
"""
|
2021-04-27 12:05:34 -06:00
|
|
|
exe_name = os.path.basename(exe_name)
|
2021-01-16 22:11:11 -07:00
|
|
|
if IS_POSIX:
|
2021-04-27 12:05:34 -06:00
|
|
|
r = os.system("kill -f -9 $(pidof %s)" % exe_name)
|
2021-01-16 22:11:11 -07:00
|
|
|
else:
|
2021-04-27 12:05:34 -06:00
|
|
|
r = os.system("taskkill /f /im %s" % exe_name)
|
2021-01-16 22:11:11 -07:00
|
|
|
return not r
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def gen_random_cdc():
|
|
|
|
cdc = random.choices(string.ascii_lowercase, k=26)
|
|
|
|
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
|
|
|
|
cdc[2] = cdc[0]
|
|
|
|
cdc[3] = "_"
|
|
|
|
return "".join(cdc).encode()
|
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
def is_binary_patched(self, executable_path=None):
|
2021-01-16 22:11:11 -07:00
|
|
|
"""simple check if executable is patched.
|
|
|
|
|
|
|
|
:return: False if not patched, else True
|
|
|
|
"""
|
2021-04-27 12:05:34 -06:00
|
|
|
executable_path = executable_path or self.executable_path
|
|
|
|
with io.open(executable_path, "rb") as fh:
|
|
|
|
for line in iter(lambda: fh.readline(), b""):
|
|
|
|
if b"cdc_" in line:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
2021-01-16 22:11:11 -07:00
|
|
|
def patch_exe(self):
|
|
|
|
"""
|
|
|
|
Patches the ChromeDriver binary
|
|
|
|
|
|
|
|
:return: False on failure, binary name on success
|
|
|
|
"""
|
2021-04-27 12:05:34 -06:00
|
|
|
logger.info("patching driver executable %s" % self.executable_path)
|
2021-01-16 22:11:11 -07:00
|
|
|
|
|
|
|
linect = 0
|
|
|
|
replacement = self.gen_random_cdc()
|
2021-04-27 12:05:34 -06:00
|
|
|
with io.open(self.executable_path, "r+b") as fh:
|
2021-01-16 22:11:11 -07:00
|
|
|
for line in iter(lambda: fh.readline(), b""):
|
|
|
|
if b"cdc_" in line:
|
|
|
|
fh.seek(-len(line), 1)
|
|
|
|
newline = re.sub(b"cdc_.{22}", replacement, line)
|
|
|
|
fh.write(newline)
|
|
|
|
linect += 1
|
|
|
|
return linect
|
2021-02-01 23:56:13 -07:00
|
|
|
|
2021-04-29 04:54:49 -06:00
|
|
|
def __repr__(self):
|
|
|
|
return "{0:s}({1:s})".format(
|
|
|
|
self.__class__.__name__,
|
|
|
|
self.executable_path,
|
|
|
|
)
|
|
|
|
|
2021-02-01 23:56:13 -07:00
|
|
|
|
2021-04-27 12:05:34 -06:00
|
|
|
class ChromeOptions(_ChromeOptions):
|
|
|
|
def add_extension_file_crx(self, extension=None):
|
|
|
|
if extension:
|
|
|
|
extension_to_add = os.path.abspath(os.path.expanduser(extension))
|
2021-04-27 12:19:51 -06:00
|
|
|
logger.debug("extension_to_add: %s" % extension_to_add)
|
2021-04-27 12:05:34 -06:00
|
|
|
|
2021-04-27 12:19:51 -06:00
|
|
|
return super().add_extension(r"%s" % extension)
|