undetected-chromedriver/undetected_chromedriver/__init__.py

261 lines
9.3 KiB
Python
Raw Normal View History

2019-12-22 05:48:39 -07:00
#!/usr/bin/env python3
"""
888 888 d8b
888 888 Y8P
888 888
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
2019-12-22 06:28:02 -07:00
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
2019-12-22 05:48:39 -07:00
"""
import io
import logging
import os
2020-10-12 19:51:35 -06:00
import re
2019-12-22 05:48:39 -07:00
import sys
import zipfile
import string
import random
2020-09-01 20:45:12 -06:00
from distutils.version import LooseVersion
2019-12-22 05:48:39 -07:00
from urllib.request import urlopen, urlretrieve
from selenium.webdriver import Chrome as _Chrome
from selenium.webdriver import ChromeOptions as _ChromeOptions
2019-12-22 06:45:37 -07:00
logger = logging.getLogger(__name__)
2.2.6 - hodl your breath (#161) * 2.2.2 * fixed a number of bugs - specifying custom profile - specifying custom binary path - downloading, patching and storing now (if not explicity specified) happens in a writable folder, instead of the current working dir. Committer: UltrafunkAmsterdam <UltrafunkAmsterdam@github> * tidy up * uncomment block * - support for specifying and reusing the user profile folder. if a user-data-dir is specified, that folder will NOT be deleted on exit. example: options.add_argument('--user-data-dir=c:\\temp') - uses a platform specific app data folder to store driver instead of the current workdir. - impoved headless mode. fixed detection by notification perms. - eliminates the "restore tabs" notification at startup - added methods find_elements_by_text and find_element_by_text - updated docs (partly) -known issues: - extensions not running. this is due to the inner workings of chromedriver. still working on this. - driver window is not always closing along with a program exit. - MacOS: startup nag notifications. might be solved by re(using) a profile directory. - known stuff: - some specific use cases, network conditions or behaviour can cause being detected. * Squashed commit of the following: commit 7ce8e7a236cbee770cb117145d4bf6dc245b936a Author: ultrafunkamsterdam <info@blackhat-security.nl> Date: Fri Apr 30 18:22:39 2021 +0200 readme change commit f214dcf33f26f8b35616d7b61cf6dee656596c3f Author: ultrafunkamsterdam <info@blackhat-security.nl> Date: Fri Apr 30 18:18:09 2021 +0200 - make sure options cannot be reused as it will cause double and conflicting arguments to chrome commit cf059a638cc9139f6fda5da23072488d06577071 Author: ultrafunkamsterdam <info@blackhat-security.nl> Date: Thu Apr 29 12:54:49 2021 +0200 - support for specifying and reusing the user profile folder. if a user-data-dir is specified, that folder will NOT be deleted on exit. example: options.add_argument('--user-data-dir=c:\\temp') - uses a platform specific app data folder to store driver instead of the current workdir. - impoved headless mode. fixed detection by notification perms. - eliminates the "restore tabs" notification at startup - added methods find_elements_by_text and find_element_by_text - updated docs (partly) -known issues: - extensions not running. this is due to the inner workings of chromedriver. still working on this. - driver window is not always closing along with a program exit. - MacOS: startup nag notifications. might be solved by re(using) a profile directory. - known stuff: - some specific use cases, network conditions or behaviour can cause being detected. commit b40d23c6495e89172ddb36ac1a9014bea1319d08 Author: ultrafunkamsterdam <info@blackhat-security.nl> Date: Tue Apr 27 20:41:18 2021 +0200 uncomment block commit d99809c8c61ea38efe9f97aa319170e5e34a8e5a Author: ultrafunkamsterdam <info@blackhat-security.nl> Date: Tue Apr 27 20:19:51 2021 +0200 tidy up * . * 2.2.7 Co-authored-by: ultrafunkamsterdam <info@blackhat-security.nl>
2021-05-01 14:49:59 -06:00
__version__ = "2.2.7"
2020-09-01 20:45:12 -06:00
TARGET_VERSION = 0
2019-12-22 05:48:39 -07:00
class Chrome:
def __new__(cls, *args, emulate_touch=False, **kwargs):
2020-04-24 06:02:15 -06:00
2019-12-22 05:48:39 -07:00
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
2020-09-03 14:12:09 -06:00
if not kwargs.get("executable_path"):
kwargs["executable_path"] = "./{}".format(
ChromeDriverManager(*args, **kwargs).executable_path
)
if not kwargs.get("options"):
kwargs["options"] = ChromeOptions()
2019-12-22 05:48:39 -07:00
instance = object.__new__(_Chrome)
2020-01-01 14:17:45 -07:00
instance.__init__(*args, **kwargs)
2020-10-12 19:51:35 -06:00
2020-09-19 10:22:03 -06:00
instance._orig_get = instance.get
2020-10-12 19:51:35 -06:00
2020-09-19 10:22:03 -06:00
def _get_wrapped(*args, **kwargs):
if instance.execute_script("return navigator.webdriver"):
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
2020-10-12 19:51:35 -06:00
{
"source": """
2020-09-19 10:22:03 -06:00
Object.defineProperty(window, 'navigator', {
value: new Proxy(navigator, {
has: (target, key) => (key === 'webdriver' ? false : key in target),
get: (target, key) =>
key === 'webdriver'
? undefined
: typeof target[key] === 'function'
? target[key].bind(target)
: target[key]
})
});
"""
2020-10-12 19:51:35 -06:00
},
2020-09-19 10:22:03 -06:00
)
return instance._orig_get(*args, **kwargs)
2020-10-12 19:51:35 -06:00
2020-09-19 10:22:03 -06:00
instance.get = _get_wrapped
instance.get = _get_wrapped
instance.get = _get_wrapped
2020-10-12 19:51:35 -06:00
2019-12-22 05:48:39 -07:00
original_user_agent_string = instance.execute_script(
"return navigator.userAgent"
)
instance.execute_cdp_cmd(
"Network.setUserAgentOverride",
2021-02-04 04:15:22 -07:00
{
"userAgent": original_user_agent_string.replace("Headless", ""),
},
2019-12-22 05:48:39 -07:00
)
if emulate_touch:
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(navigator, 'maxTouchPoints', {
get: () => 1
})"""
},
)
2020-05-12 11:56:49 -06:00
logger.info(f"starting undetected_chromedriver.Chrome({args}, {kwargs})")
2019-12-22 05:48:39 -07:00
return instance
class ChromeOptions:
def __new__(cls, *args, **kwargs):
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
2020-09-03 14:12:09 -06:00
2019-12-22 05:48:39 -07:00
instance = object.__new__(_ChromeOptions)
instance.__init__()
2019-12-22 06:09:35 -07:00
instance.add_argument("start-maximized")
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
2020-10-12 19:51:35 -06:00
instance.add_argument("--disable-blink-features=AutomationControlled")
2019-12-22 05:48:39 -07:00
return instance
class ChromeDriverManager(object):
2019-12-22 05:48:39 -07:00
installed = False
selenium_patched = False
target_version = None
DL_BASE = "https://chromedriver.storage.googleapis.com/"
2019-12-22 05:48:39 -07:00
def __init__(self, executable_path=None, target_version=None, *args, **kwargs):
2020-03-03 07:03:49 -07:00
_platform = sys.platform
2020-09-03 14:12:09 -06:00
2020-10-12 19:51:35 -06:00
if TARGET_VERSION:
# use global if set
2020-09-01 20:45:12 -06:00
self.target_version = TARGET_VERSION
2020-10-12 19:51:35 -06:00
2020-09-03 14:12:09 -06:00
if target_version:
2020-10-12 19:51:35 -06:00
# use explicitly passed target
2020-09-03 14:12:09 -06:00
self.target_version = target_version # user override
2020-10-12 19:51:35 -06:00
2020-09-01 20:45:12 -06:00
if not self.target_version:
2020-10-12 19:51:35 -06:00
# none of the above (default) and just get current version
2020-09-03 14:12:09 -06:00
self.target_version = self.get_release_version_number().version[
0
] # only major version int
2020-03-03 07:03:49 -07:00
self._base = base_ = "chromedriver{}"
2020-03-03 07:03:49 -07:00
exe_name = self._base
2020-09-03 14:12:09 -06:00
if _platform in ("win32",):
2020-03-03 07:03:49 -07:00
exe_name = base_.format(".exe")
2020-09-03 14:12:09 -06:00
if _platform in ("linux",):
_platform += "64"
exe_name = exe_name.format("")
if _platform in ("darwin",):
_platform = "mac64"
exe_name = exe_name.format("")
2020-03-03 07:03:49 -07:00
self.platform = _platform
self.executable_path = executable_path or exe_name
2020-03-03 07:20:42 -07:00
self._exe_name = exe_name
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def patch_selenium_webdriver(self_):
"""
2019-12-22 06:27:20 -07:00
Patches selenium package Chrome, ChromeOptions classes for current session
2019-12-22 05:48:39 -07:00
:return:
"""
import selenium.webdriver.chrome.service
import selenium.webdriver
2020-09-03 14:12:09 -06:00
2019-12-22 06:09:35 -07:00
selenium.webdriver.Chrome = Chrome
selenium.webdriver.ChromeOptions = ChromeOptions
logger.info("Selenium patched. Safe to import Chrome / ChromeOptions")
2019-12-22 05:48:39 -07:00
self_.__class__.selenium_patched = True
def install(self, patch_selenium=True):
"""
Initialize the patch
This will:
download chromedriver if not present
patch the downloaded chromedriver
patch selenium package if <patch_selenium> is True (default)
:param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session)
:return:
"""
2020-01-01 14:31:35 -07:00
if not os.path.exists(self.executable_path):
2019-12-22 05:48:39 -07:00
self.fetch_chromedriver()
2020-10-12 19:51:35 -06:00
if not self.__class__.installed:
if self.patch_binary():
self.__class__.installed = True
2019-12-22 05:48:39 -07:00
if patch_selenium:
self.patch_selenium_webdriver()
def get_release_version_number(self):
"""
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
:return: version string
"""
path = (
"LATEST_RELEASE"
if not self.target_version
else f"LATEST_RELEASE_{self.target_version}"
)
2020-09-01 20:45:12 -06:00
return LooseVersion(urlopen(self.__class__.DL_BASE + path).read().decode())
2019-12-22 05:48:39 -07:00
def fetch_chromedriver(self):
"""
Downloads ChromeDriver from source and unpacks the executable
:return: on success, name of the unpacked executable
"""
2020-03-03 07:03:49 -07:00
base_ = self._base
2020-03-03 06:51:10 -07:00
zip_name = base_.format(".zip")
2020-09-01 20:45:12 -06:00
ver = self.get_release_version_number().vstring
2020-03-03 07:03:49 -07:00
if os.path.exists(self.executable_path):
2020-03-03 07:20:42 -07:00
return self.executable_path
2019-12-22 05:48:39 -07:00
urlretrieve(
f"{self.__class__.DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip",
2019-12-22 05:48:39 -07:00
filename=zip_name,
)
with zipfile.ZipFile(zip_name) as zf:
2020-03-03 07:20:42 -07:00
zf.extract(self._exe_name)
2019-12-22 05:48:39 -07:00
os.remove(zip_name)
2020-09-03 14:12:09 -06:00
if sys.platform != "win32":
2020-03-03 07:30:48 -07:00
os.chmod(self._exe_name, 0o755)
2020-03-03 07:20:42 -07:00
return self._exe_name
2019-12-22 05:48:39 -07:00
@staticmethod
def random_cdc():
cdc = random.choices(string.ascii_lowercase, k=26)
2021-02-04 04:15:22 -07:00
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
cdc[2] = cdc[0]
2021-02-04 04:15:22 -07:00
cdc[3] = "_"
return "".join(cdc).encode()
2019-12-22 05:48:39 -07:00
def patch_binary(self):
"""
Patches the ChromeDriver binary
:return: False on failure, binary name on success
"""
2020-10-12 19:51:35 -06:00
linect = 0
replacement = self.random_cdc()
2020-10-12 19:51:35 -06:00
with io.open(self.executable_path, "r+b") as fh:
for line in iter(lambda: fh.readline(), b""):
2019-12-22 05:48:39 -07:00
if b"cdc_" in line:
2020-10-12 19:51:35 -06:00
fh.seek(-len(line), 1)
newline = re.sub(b"cdc_.{22}", replacement, line)
2020-10-12 19:51:35 -06:00
fh.write(newline)
linect += 1
return linect
2019-12-22 05:48:39 -07:00
def install(executable_path=None, target_version=None, *args, **kwargs):
2019-12-22 05:48:39 -07:00
ChromeDriverManager(executable_path, target_version, *args, **kwargs).install()