undetected-chromedriver/undetected_chromedriver/__init__.py

261 lines
9.3 KiB
Python
Raw Normal View History

2019-12-22 05:48:39 -07:00
#!/usr/bin/env python3
"""
888 888 d8b
888 888 Y8P
888 888
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
2019-12-22 06:28:02 -07:00
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
2019-12-22 05:48:39 -07:00
"""
import io
import logging
import os
2020-10-12 19:51:35 -06:00
import re
2019-12-22 05:48:39 -07:00
import sys
import zipfile
import string
import random
2020-09-01 20:45:12 -06:00
from distutils.version import LooseVersion
2019-12-22 05:48:39 -07:00
from urllib.request import urlopen, urlretrieve
from selenium.webdriver import Chrome as _Chrome
from selenium.webdriver import ChromeOptions as _ChromeOptions
2019-12-22 06:45:37 -07:00
logger = logging.getLogger(__name__)
2020-09-01 20:45:12 -06:00
TARGET_VERSION = 0
2019-12-22 05:48:39 -07:00
class Chrome:
def __new__(cls, *args, emulate_touch=False, **kwargs):
2020-04-24 06:02:15 -06:00
2019-12-22 05:48:39 -07:00
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
2020-09-03 14:12:09 -06:00
if not kwargs.get("executable_path"):
kwargs["executable_path"] = "./{}".format(
ChromeDriverManager(*args, **kwargs).executable_path
)
if not kwargs.get("options"):
kwargs["options"] = ChromeOptions()
2019-12-22 05:48:39 -07:00
instance = object.__new__(_Chrome)
2020-01-01 14:17:45 -07:00
instance.__init__(*args, **kwargs)
2020-10-12 19:51:35 -06:00
2020-09-19 10:22:03 -06:00
instance._orig_get = instance.get
2020-10-12 19:51:35 -06:00
2020-09-19 10:22:03 -06:00
def _get_wrapped(*args, **kwargs):
if instance.execute_script("return navigator.webdriver"):
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
2020-10-12 19:51:35 -06:00
{
"source": """
2020-09-19 10:22:03 -06:00
Object.defineProperty(window, 'navigator', {
value: new Proxy(navigator, {
has: (target, key) => (key === 'webdriver' ? false : key in target),
get: (target, key) =>
key === 'webdriver'
? undefined
: typeof target[key] === 'function'
? target[key].bind(target)
: target[key]
})
});
"""
2020-10-12 19:51:35 -06:00
},
2020-09-19 10:22:03 -06:00
)
return instance._orig_get(*args, **kwargs)
2020-10-12 19:51:35 -06:00
2020-09-19 10:22:03 -06:00
instance.get = _get_wrapped
instance.get = _get_wrapped
instance.get = _get_wrapped
2020-10-12 19:51:35 -06:00
2019-12-22 05:48:39 -07:00
original_user_agent_string = instance.execute_script(
"return navigator.userAgent"
)
instance.execute_cdp_cmd(
"Network.setUserAgentOverride",
2021-02-04 04:15:22 -07:00
{
"userAgent": original_user_agent_string.replace("Headless", ""),
},
2019-12-22 05:48:39 -07:00
)
if emulate_touch:
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(navigator, 'maxTouchPoints', {
get: () => 1
})"""
},
)
2020-05-12 11:56:49 -06:00
logger.info(f"starting undetected_chromedriver.Chrome({args}, {kwargs})")
2019-12-22 05:48:39 -07:00
return instance
class ChromeOptions:
def __new__(cls, *args, **kwargs):
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
2020-09-03 14:12:09 -06:00
2019-12-22 05:48:39 -07:00
instance = object.__new__(_ChromeOptions)
instance.__init__()
2019-12-22 06:09:35 -07:00
instance.add_argument("start-maximized")
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
2020-10-12 19:51:35 -06:00
instance.add_argument("--disable-blink-features=AutomationControlled")
2019-12-22 05:48:39 -07:00
return instance
class ChromeDriverManager(object):
2019-12-22 05:48:39 -07:00
installed = False
selenium_patched = False
target_version = None
DL_BASE = "https://chromedriver.storage.googleapis.com/"
2019-12-22 05:48:39 -07:00
def __init__(self, executable_path=None, target_version=None, *args, **kwargs):
2020-03-03 07:03:49 -07:00
_platform = sys.platform
2020-09-03 14:12:09 -06:00
2020-10-12 19:51:35 -06:00
if TARGET_VERSION:
# use global if set
2020-09-01 20:45:12 -06:00
self.target_version = TARGET_VERSION
2020-10-12 19:51:35 -06:00
2020-09-03 14:12:09 -06:00
if target_version:
2020-10-12 19:51:35 -06:00
# use explicitly passed target
2020-09-03 14:12:09 -06:00
self.target_version = target_version # user override
2020-10-12 19:51:35 -06:00
2020-09-01 20:45:12 -06:00
if not self.target_version:
2020-10-12 19:51:35 -06:00
# none of the above (default) and just get current version
2020-09-03 14:12:09 -06:00
self.target_version = self.get_release_version_number().version[
0
] # only major version int
2020-03-03 07:03:49 -07:00
self._base = base_ = "chromedriver{}"
2020-03-03 07:03:49 -07:00
exe_name = self._base
2020-09-03 14:12:09 -06:00
if _platform in ("win32",):
2020-03-03 07:03:49 -07:00
exe_name = base_.format(".exe")
2020-09-03 14:12:09 -06:00
if _platform in ("linux",):
_platform += "64"
exe_name = exe_name.format("")
if _platform in ("darwin",):
_platform = "mac64"
exe_name = exe_name.format("")
2020-03-03 07:03:49 -07:00
self.platform = _platform
self.executable_path = executable_path or exe_name
2020-03-03 07:20:42 -07:00
self._exe_name = exe_name
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def patch_selenium_webdriver(self_):
"""
2019-12-22 06:27:20 -07:00
Patches selenium package Chrome, ChromeOptions classes for current session
2019-12-22 05:48:39 -07:00
:return:
"""
import selenium.webdriver.chrome.service
import selenium.webdriver
2020-09-03 14:12:09 -06:00
2019-12-22 06:09:35 -07:00
selenium.webdriver.Chrome = Chrome
selenium.webdriver.ChromeOptions = ChromeOptions
logger.info("Selenium patched. Safe to import Chrome / ChromeOptions")
2019-12-22 05:48:39 -07:00
self_.__class__.selenium_patched = True
def install(self, patch_selenium=True):
"""
Initialize the patch
This will:
download chromedriver if not present
patch the downloaded chromedriver
patch selenium package if <patch_selenium> is True (default)
:param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session)
:return:
"""
2020-01-01 14:31:35 -07:00
if not os.path.exists(self.executable_path):
2019-12-22 05:48:39 -07:00
self.fetch_chromedriver()
2020-10-12 19:51:35 -06:00
if not self.__class__.installed:
if self.patch_binary():
self.__class__.installed = True
2019-12-22 05:48:39 -07:00
if patch_selenium:
self.patch_selenium_webdriver()
def get_release_version_number(self):
"""
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
:return: version string
"""
path = (
"LATEST_RELEASE"
if not self.target_version
else f"LATEST_RELEASE_{self.target_version}"
)
2020-09-01 20:45:12 -06:00
return LooseVersion(urlopen(self.__class__.DL_BASE + path).read().decode())
2019-12-22 05:48:39 -07:00
def fetch_chromedriver(self):
"""
Downloads ChromeDriver from source and unpacks the executable
:return: on success, name of the unpacked executable
"""
2020-03-03 07:03:49 -07:00
base_ = self._base
2020-03-03 06:51:10 -07:00
zip_name = base_.format(".zip")
2020-09-01 20:45:12 -06:00
ver = self.get_release_version_number().vstring
2020-03-03 07:03:49 -07:00
if os.path.exists(self.executable_path):
2020-03-03 07:20:42 -07:00
return self.executable_path
2019-12-22 05:48:39 -07:00
urlretrieve(
f"{self.__class__.DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip",
2019-12-22 05:48:39 -07:00
filename=zip_name,
)
with zipfile.ZipFile(zip_name) as zf:
2020-03-03 07:20:42 -07:00
zf.extract(self._exe_name)
2019-12-22 05:48:39 -07:00
os.remove(zip_name)
2020-09-03 14:12:09 -06:00
if sys.platform != "win32":
2020-03-03 07:30:48 -07:00
os.chmod(self._exe_name, 0o755)
2020-03-03 07:20:42 -07:00
return self._exe_name
2019-12-22 05:48:39 -07:00
@staticmethod
def random_cdc():
cdc = random.choices(string.ascii_lowercase, k=26)
2021-02-04 04:15:22 -07:00
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
cdc[2] = cdc[0]
2021-02-04 04:15:22 -07:00
cdc[3] = "_"
return "".join(cdc).encode()
2019-12-22 05:48:39 -07:00
def patch_binary(self):
"""
Patches the ChromeDriver binary
:return: False on failure, binary name on success
"""
2020-10-12 19:51:35 -06:00
linect = 0
replacement = self.random_cdc()
2020-10-12 19:51:35 -06:00
with io.open(self.executable_path, "r+b") as fh:
for line in iter(lambda: fh.readline(), b""):
2019-12-22 05:48:39 -07:00
if b"cdc_" in line:
2020-10-12 19:51:35 -06:00
fh.seek(-len(line), 1)
newline = re.sub(b"cdc_.{22}", replacement, line)
2020-10-12 19:51:35 -06:00
fh.write(newline)
linect += 1
return linect
2019-12-22 05:48:39 -07:00
def install(executable_path=None, target_version=None, *args, **kwargs):
2019-12-22 05:48:39 -07:00
ChromeDriverManager(executable_path, target_version, *args, **kwargs).install()