undetected-chromedriver/undetected_chromedriver/__init__.py

220 lines
7.4 KiB
Python
Raw Normal View History

2019-12-22 05:48:39 -07:00
#!/usr/bin/env python3
"""
888 888 d8b
888 888 Y8P
888 888
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
2019-12-22 06:28:02 -07:00
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
2019-12-22 05:48:39 -07:00
"""
import io
import logging
import os
import sys
import zipfile
from urllib.request import urlopen, urlretrieve
from selenium.webdriver import Chrome as _Chrome
from selenium.webdriver import ChromeOptions as _ChromeOptions
2019-12-22 06:45:37 -07:00
logger = logging.getLogger(__name__)
2019-12-22 05:48:39 -07:00
_DL_BASE = "https://chromedriver.storage.googleapis.com/"
2020-01-02 01:56:43 -07:00
TARGET_VERSION = 80
2019-12-22 05:48:39 -07:00
__is_patched__ = 0
class Chrome:
def __new__(cls, *args, **kwargs):
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
instance = object.__new__(_Chrome)
2020-01-01 14:17:45 -07:00
instance.__init__(*args, **kwargs)
2019-12-22 05:48:39 -07:00
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(window, 'navigator', {
value: new Proxy(navigator, {
has: (target, key) => (key === 'webdriver' ? false : key in target),
get: (target, key) =>
key === 'webdriver'
? undefined
: typeof target[key] === 'function'
? target[key].bind(target)
: target[key]
2019-12-22 05:48:39 -07:00
})
})
"""
2019-12-22 05:48:39 -07:00
},
)
original_user_agent_string = instance.execute_script(
"return navigator.userAgent"
)
instance.execute_cdp_cmd(
"Network.setUserAgentOverride",
{
"userAgent": original_user_agent_string.replace("Headless", ""),
"platform": "Windows",
},
)
2019-12-22 06:45:37 -07:00
logger.warning(f"starting webdriver instance Chrome({args}, {kwargs})")
2019-12-22 05:48:39 -07:00
return instance
class ChromeOptions:
def __new__(cls, *args, **kwargs):
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
instance = object.__new__(_ChromeOptions)
instance.__init__()
2019-12-22 06:09:35 -07:00
instance.add_argument("start-maximized")
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
instance.add_experimental_option("useAutomationExtension", False)
2019-12-22 06:45:37 -07:00
logger.debug(f"starting options instance ChromeOptions({args}, {kwargs})")
2019-12-22 05:48:39 -07:00
return instance
# return _ChromeOptions()
class ChromeDriverManager(object):
installed = False
selenium_patched = False
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def __init__(self, executable_path=None, target_version=None, *args, **kwargs):
2020-03-03 07:03:49 -07:00
_platform = sys.platform
self.target_version = target_version or TARGET_VERSION
2020-03-03 07:03:49 -07:00
self._base = base_ = "chromedriver{}"
exe_name = self._base
if _platform in ('win32',):
exe_name = base_.format(".exe")
if _platform in ('linux',):
_platform+='64'
exe_name = exe_name.format('')
if _platform in ('darwin',):
_platform = 'mac64'
exe_name = exe_name.format('')
self.platform = _platform
self.executable_path = executable_path or exe_name
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def patch_selenium_webdriver(self_):
"""
2019-12-22 06:27:20 -07:00
Patches selenium package Chrome, ChromeOptions classes for current session
2019-12-22 05:48:39 -07:00
:return:
"""
import selenium.webdriver.chrome.service
import selenium.webdriver
2019-12-22 06:09:35 -07:00
selenium.webdriver.Chrome = Chrome
selenium.webdriver.ChromeOptions = ChromeOptions
2019-12-22 06:45:37 -07:00
logger.warning(
2019-12-22 05:48:39 -07:00
"Now it is safe to import Chrome and ChromeOptions from selenium"
)
self_.__class__.selenium_patched = True
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def install(self, patch_selenium=True):
"""
Initialize the patch
This will:
download chromedriver if not present
patch the downloaded chromedriver
patch selenium package if <patch_selenium> is True (default)
:param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session)
:return:
"""
2020-01-01 14:31:35 -07:00
if not os.path.exists(self.executable_path):
2020-01-01 14:30:57 -07:00
# if
# not self.__class__.installed
# or not __is_patched__
# or not os.path.exists(self.executable_path)
# ):
2019-12-22 05:48:39 -07:00
self.fetch_chromedriver()
self.patch_binary()
self.__class__.installed = True
if patch_selenium:
self.patch_selenium_webdriver()
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def get_release_version_number(self):
"""
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
:return: version string
"""
path = (
"LATEST_RELEASE"
if not self.target_version
else f"LATEST_RELEASE_{self.target_version}"
)
return urlopen(_DL_BASE + path).read().decode()
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def fetch_chromedriver(self):
"""
Downloads ChromeDriver from source and unpacks the executable
:return: on success, name of the unpacked executable
"""
2020-03-03 07:03:49 -07:00
base_ = self._base
2020-03-03 06:51:10 -07:00
zip_name = base_.format(".zip")
ver = self.get_release_version_number()
2020-03-03 07:03:49 -07:00
if os.path.exists(self.executable_path):
2020-01-01 14:26:04 -07:00
return exe_name
2019-12-22 05:48:39 -07:00
urlretrieve(
2020-03-03 07:03:49 -07:00
f"{_DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip",
2019-12-22 05:48:39 -07:00
filename=zip_name,
)
with zipfile.ZipFile(zip_name) as zf:
zf.extract(exe_name)
os.remove(zip_name)
return exe_name
2019-12-22 06:27:20 -07:00
2019-12-22 05:48:39 -07:00
def patch_binary(self):
"""
Patches the ChromeDriver binary
:return: False on failure, binary name on success
"""
if self.__class__.installed:
return
with io.open(self.executable_path, "r+b") as binary:
for line in iter(lambda: binary.readline(), b""):
if b"cdc_" in line:
binary.seek(-len(line), 1)
line = b" var key = '$azc_abcdefghijklmnopQRstuv_';\n"
binary.write(line)
__is_patched__ = 1
break
else:
return False
return True
def install(executable_path=None, target_version=None, *args, **kwargs):
2019-12-22 05:48:39 -07:00
ChromeDriverManager(executable_path, target_version, *args, **kwargs).install()