1.5.0
This commit is contained in:
parent
06217efc65
commit
4ce47e7f83
|
@ -1,129 +1,129 @@
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
*.so
|
*.so
|
||||||
|
|
||||||
# Distribution / packaging
|
# Distribution / packaging
|
||||||
.Python
|
.Python
|
||||||
build/
|
build/
|
||||||
develop-eggs/
|
develop-eggs/
|
||||||
dist/
|
dist/
|
||||||
downloads/
|
downloads/
|
||||||
eggs/
|
eggs/
|
||||||
.eggs/
|
.eggs/
|
||||||
lib/
|
lib/
|
||||||
lib64/
|
lib64/
|
||||||
parts/
|
parts/
|
||||||
sdist/
|
sdist/
|
||||||
var/
|
var/
|
||||||
wheels/
|
wheels/
|
||||||
pip-wheel-metadata/
|
pip-wheel-metadata/
|
||||||
share/python-wheels/
|
share/python-wheels/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
.installed.cfg
|
.installed.cfg
|
||||||
*.egg
|
*.egg
|
||||||
MANIFEST
|
MANIFEST
|
||||||
|
|
||||||
# PyInstaller
|
# PyInstaller
|
||||||
# Usually these files are written by a python script from a template
|
# Usually these files are written by a python script from a template
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
*.manifest
|
*.manifest
|
||||||
*.spec
|
*.spec
|
||||||
|
|
||||||
# Installer logs
|
# Installer logs
|
||||||
pip-log.txt
|
pip-log.txt
|
||||||
pip-delete-this-directory.txt
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
# Unit test / coverage reports
|
# Unit test / coverage reports
|
||||||
htmlcov/
|
htmlcov/
|
||||||
.tox/
|
.tox/
|
||||||
.nox/
|
.nox/
|
||||||
.coverage
|
.coverage
|
||||||
.coverage.*
|
.coverage.*
|
||||||
.cache
|
.cache
|
||||||
nosetests.xml
|
nosetests.xml
|
||||||
coverage.xml
|
coverage.xml
|
||||||
*.cover
|
*.cover
|
||||||
*.py,cover
|
*.py,cover
|
||||||
.hypothesis/
|
.hypothesis/
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
|
|
||||||
# Translations
|
# Translations
|
||||||
*.mo
|
*.mo
|
||||||
*.pot
|
*.pot
|
||||||
|
|
||||||
# Django stuff:
|
# Django stuff:
|
||||||
*.log
|
*.log
|
||||||
local_settings.py
|
local_settings.py
|
||||||
db.sqlite3
|
db.sqlite3
|
||||||
db.sqlite3-journal
|
db.sqlite3-journal
|
||||||
|
|
||||||
# Flask stuff:
|
# Flask stuff:
|
||||||
instance/
|
instance/
|
||||||
.webassets-cache
|
.webassets-cache
|
||||||
|
|
||||||
# Scrapy stuff:
|
# Scrapy stuff:
|
||||||
.scrapy
|
.scrapy
|
||||||
|
|
||||||
# Sphinx documentation
|
# Sphinx documentation
|
||||||
docs/_build/
|
docs/_build/
|
||||||
|
|
||||||
# PyBuilder
|
# PyBuilder
|
||||||
target/
|
target/
|
||||||
|
|
||||||
# Jupyter Notebook
|
# Jupyter Notebook
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
|
|
||||||
# IPython
|
# IPython
|
||||||
profile_default/
|
profile_default/
|
||||||
ipython_config.py
|
ipython_config.py
|
||||||
|
|
||||||
# pyenv
|
# pyenv
|
||||||
.python-version
|
.python-version
|
||||||
|
|
||||||
# pipenv
|
# pipenv
|
||||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
# install all needed dependencies.
|
# install all needed dependencies.
|
||||||
#Pipfile.lock
|
#Pipfile.lock
|
||||||
|
|
||||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
__pypackages__/
|
__pypackages__/
|
||||||
|
|
||||||
# Celery stuff
|
# Celery stuff
|
||||||
celerybeat-schedule
|
celerybeat-schedule
|
||||||
celerybeat.pid
|
celerybeat.pid
|
||||||
|
|
||||||
# SageMath parsed files
|
# SageMath parsed files
|
||||||
*.sage.py
|
*.sage.py
|
||||||
|
|
||||||
# Environments
|
# Environments
|
||||||
.env
|
.env
|
||||||
.venv
|
.venv
|
||||||
env/
|
env/
|
||||||
venv/
|
venv/
|
||||||
ENV/
|
ENV/
|
||||||
env.bak/
|
env.bak/
|
||||||
venv.bak/
|
venv.bak/
|
||||||
|
|
||||||
# Spyder project settings
|
# Spyder project settings
|
||||||
.spyderproject
|
.spyderproject
|
||||||
.spyproject
|
.spyproject
|
||||||
|
|
||||||
# Rope project settings
|
# Rope project settings
|
||||||
.ropeproject
|
.ropeproject
|
||||||
|
|
||||||
# mkdocs documentation
|
# mkdocs documentation
|
||||||
/site
|
/site
|
||||||
|
|
||||||
# mypy
|
# mypy
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
.dmypy.json
|
.dmypy.json
|
||||||
dmypy.json
|
dmypy.json
|
||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
252
README.md
252
README.md
|
@ -1,126 +1,126 @@
|
||||||
# undetected_chromedriver
|
# undetected_chromedriver
|
||||||
|
|
||||||
https://github.com/ultrafunkamsterdam/undetected-chromedriver
|
https://github.com/ultrafunkamsterdam/undetected-chromedriver
|
||||||
|
|
||||||
Optimized Selenium Chromedriver patch which does not trigger anti-bot services like Distill Network / Imperva / DataDome / Botprotect.io
|
Optimized Selenium Chromedriver patch which does not trigger anti-bot services like Distill Network / Imperva / DataDome / Botprotect.io
|
||||||
Automatically downloads the driver binary and patches it.
|
Automatically downloads the driver binary and patches it.
|
||||||
|
|
||||||
* **Tested on version 75,76,77,78,79,80,81,83,84,85,86**
|
* **Tested on version 75,76,77,78,79,80,81,83,84,85,86**
|
||||||
|
|
||||||
* **patching also works on MS Edge (chromium-based) webdriver binary**
|
* **patching also works on MS Edge (chromium-based) webdriver binary**
|
||||||
|
|
||||||
|
|
||||||
## New ##
|
## New ##
|
||||||
|
|
||||||
By default, the console log function is disabled to prevent certain detections.
|
By default, the console log function is disabled to prevent certain detections.
|
||||||
Until a cleaner solution is found, use the following to manually enable it
|
Until a cleaner solution is found, use the following to manually enable it
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
driver = uc.Chrome(enable_console_log=True)
|
driver = uc.Chrome(enable_console_log=True)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installation ##
|
## Installation ##
|
||||||
```
|
```
|
||||||
pip install undetected-chromedriver
|
pip install undetected-chromedriver
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage ##
|
## Usage ##
|
||||||
|
|
||||||
To prevent unnecessary hair-pulling and issue-rasing, please mind the **[important note at the end of this document](#important-note) .**
|
To prevent unnecessary hair-pulling and issue-rasing, please mind the **[important note at the end of this document](#important-note) .**
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
#### the easy way (recommended) ####
|
#### the easy way (recommended) ####
|
||||||
```python
|
```python
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
driver = uc.Chrome()
|
driver = uc.Chrome()
|
||||||
driver.get('https://distilnetworks.com')
|
driver.get('https://distilnetworks.com')
|
||||||
|
|
||||||
# To target specific version
|
# To target specific version
|
||||||
|
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
uc.TARGET_VERSION = 85
|
uc.TARGET_VERSION = 85
|
||||||
driver = uc.Chrome()
|
driver = uc.Chrome()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
#### patches selenium module ####
|
#### patches selenium module ####
|
||||||
Needs to be done before importing from selenium package
|
Needs to be done before importing from selenium package
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
uc.install()
|
uc.install()
|
||||||
from selenium.webdriver import Chrome
|
from selenium.webdriver import Chrome
|
||||||
driver = Chrome()
|
driver = Chrome()
|
||||||
driver.get('https://distilnetworks.com')
|
driver.get('https://distilnetworks.com')
|
||||||
````
|
````
|
||||||
|
|
||||||
|
|
||||||
#### the customized way ####
|
#### the customized way ####
|
||||||
```python
|
```python
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
|
|
||||||
#specify chromedriver version to download and patch
|
#specify chromedriver version to download and patch
|
||||||
#this did not work correctly until 1.2.1
|
#this did not work correctly until 1.2.1
|
||||||
uc.TARGET_VERSION = 78
|
uc.TARGET_VERSION = 78
|
||||||
|
|
||||||
# or specify your own chromedriver binary to patch
|
# or specify your own chromedriver binary to patch
|
||||||
undetected_chromedriver.install(
|
undetected_chromedriver.install(
|
||||||
executable_path='c:/users/user1/chromedriver.exe',
|
executable_path='c:/users/user1/chromedriver.exe',
|
||||||
)
|
)
|
||||||
from selenium.webdriver import Chrome, ChromeOptions
|
from selenium.webdriver import Chrome, ChromeOptions
|
||||||
opts = ChromeOptions()
|
opts = ChromeOptions()
|
||||||
opts.add_argument(f'--proxy-server=socks5://127.0.0.1:9050')
|
opts.add_argument(f'--proxy-server=socks5://127.0.0.1:9050')
|
||||||
driver = Chrome(options=opts)
|
driver = Chrome(options=opts)
|
||||||
driver.get('https://distilnetworks.com')
|
driver.get('https://distilnetworks.com')
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### datadome.co ####
|
### datadome.co ####
|
||||||
These guys have actually a powerful product, and a link to this repo, which makes me wanna test their product.
|
These guys have actually a powerful product, and a link to this repo, which makes me wanna test their product.
|
||||||
Make sure you use a "clean" ip for this one.
|
Make sure you use a "clean" ip for this one.
|
||||||
```
|
```
|
||||||
# STANDARD chromedriver
|
# STANDARD chromedriver
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
chrome = webdriver.Chrome()
|
chrome = webdriver.Chrome()
|
||||||
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
|
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
|
||||||
chrome.save_screenshot('datadome_regular_webdriver.png')
|
chrome.save_screenshot('datadome_regular_webdriver.png')
|
||||||
True
|
True
|
||||||
# after this detectioon, you'll keep being nagged with puzzles, even if you use another machine from the same same network (they use a very tight but effective regime, possibly combination of fingerprinting and ip-flagging).
|
# after this detectioon, you'll keep being nagged with puzzles, even if you use another machine from the same same network (they use a very tight but effective regime, possibly combination of fingerprinting and ip-flagging).
|
||||||
|
|
||||||
|
|
||||||
# UNDETECTED chromedriver (headless,even)
|
# UNDETECTED chromedriver (headless,even)
|
||||||
|
|
||||||
import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
options = uc.ChromeOptions()
|
options = uc.ChromeOptions()
|
||||||
options.headless=True
|
options.headless=True
|
||||||
options.add_argument('--headless')
|
options.add_argument('--headless')
|
||||||
chrome = uc.Chrome(options=options)
|
chrome = uc.Chrome(options=options)
|
||||||
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
|
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
|
||||||
chrome.save_screenshot('datadome_undetected_webddriver.png')
|
chrome.save_screenshot('datadome_undetected_webddriver.png')
|
||||||
|
|
||||||
```
|
```
|
||||||
**Check both saved screenhots [here](https://imgur.com/a/fEmqadP)**
|
**Check both saved screenhots [here](https://imgur.com/a/fEmqadP)**
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## important note ##
|
## important note ##
|
||||||
|
|
||||||
the default blank page on start plays a BIG role in the anti-detection workings of the module. You will only become undetectable from the moment you use driver.get(url) to navigate to some url (and next and next and next). This automatically means that if you enter a url in the browser screen by hand right after launch, you are NOT protected! New Tabs: same story. If you really need multi-tabs, then open the tab with the blank page (hint: url is `data:,` including comma, and yes, driver accepts it) and do your thing as usual. If you follow these "rules" (actually its default behaviour), then you will have a great time for now.
|
the default blank page on start plays a BIG role in the anti-detection workings of the module. You will only become undetectable from the moment you use driver.get(url) to navigate to some url (and next and next and next). This automatically means that if you enter a url in the browser screen by hand right after launch, you are NOT protected! New Tabs: same story. If you really need multi-tabs, then open the tab with the blank page (hint: url is `data:,` including comma, and yes, driver accepts it) and do your thing as usual. If you follow these "rules" (actually its default behaviour), then you will have a great time for now.
|
||||||
|
|
||||||
TL;DR and for the visual-minded:
|
TL;DR and for the visual-minded:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
In [1]: import undetected_chromedriver as uc
|
In [1]: import undetected_chromedriver as uc
|
||||||
In [2]: driver = uc.Chrome()
|
In [2]: driver = uc.Chrome()
|
||||||
In [3]: driver.execute_script('return navigator.webdriver')
|
In [3]: driver.execute_script('return navigator.webdriver')
|
||||||
Out[3]: True # Detectable
|
Out[3]: True # Detectable
|
||||||
In [4]: driver.get('https://distilnetworks.com') # starts magic
|
In [4]: driver.get('https://distilnetworks.com') # starts magic
|
||||||
In [4]: driver.execute_script('return navigator.webdriver')
|
In [4]: driver.execute_script('return navigator.webdriver')
|
||||||
In [5]: None # Undetectable!
|
In [5]: None # Undetectable!
|
||||||
```
|
```
|
||||||
## end important note ##
|
## end important note ##
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -16,7 +16,7 @@ from setuptools import setup
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="undetected-chromedriver",
|
name="undetected-chromedriver",
|
||||||
version="1.4.2",
|
version="1.5.0",
|
||||||
packages=["undetected_chromedriver"],
|
packages=["undetected_chromedriver"],
|
||||||
install_requires=["selenium",],
|
install_requires=["selenium",],
|
||||||
url="https://github.com/ultrafunkamsterdam/undetected_chromedriver",
|
url="https://github.com/ultrafunkamsterdam/undetected_chromedriver",
|
||||||
|
|
|
@ -19,6 +19,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import zipfile
|
import zipfile
|
||||||
from distutils.version import LooseVersion
|
from distutils.version import LooseVersion
|
||||||
|
@ -30,7 +31,6 @@ from selenium.webdriver import ChromeOptions as _ChromeOptions
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
__IS_PATCHED__ = 0
|
|
||||||
TARGET_VERSION = 0
|
TARGET_VERSION = 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,15 +49,15 @@ class Chrome:
|
||||||
kwargs["options"] = ChromeOptions()
|
kwargs["options"] = ChromeOptions()
|
||||||
instance = object.__new__(_Chrome)
|
instance = object.__new__(_Chrome)
|
||||||
instance.__init__(*args, **kwargs)
|
instance.__init__(*args, **kwargs)
|
||||||
|
|
||||||
instance._orig_get = instance.get
|
instance._orig_get = instance.get
|
||||||
|
|
||||||
def _get_wrapped(*args, **kwargs):
|
def _get_wrapped(*args, **kwargs):
|
||||||
if instance.execute_script("return navigator.webdriver"):
|
if instance.execute_script("return navigator.webdriver"):
|
||||||
instance.execute_cdp_cmd(
|
instance.execute_cdp_cmd(
|
||||||
|
|
||||||
"Page.addScriptToEvaluateOnNewDocument",
|
"Page.addScriptToEvaluateOnNewDocument",
|
||||||
{"source": """
|
{
|
||||||
|
"source": """
|
||||||
|
|
||||||
Object.defineProperty(window, 'navigator', {
|
Object.defineProperty(window, 'navigator', {
|
||||||
value: new Proxy(navigator, {
|
value: new Proxy(navigator, {
|
||||||
|
@ -70,16 +70,18 @@ class Chrome:
|
||||||
: target[key]
|
: target[key]
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
"""
|
||||||
(function () {
|
+ (
|
||||||
}) ();
|
"console.log = console.dir = console.error = function(){};"
|
||||||
|
if not enable_console_log
|
||||||
""" + ("console.log = console.dir = console.error = function(){};" if not enable_console_log else '' ) }
|
else ""
|
||||||
|
)
|
||||||
|
},
|
||||||
)
|
)
|
||||||
return instance._orig_get(*args, **kwargs)
|
return instance._orig_get(*args, **kwargs)
|
||||||
|
|
||||||
instance.get = _get_wrapped
|
instance.get = _get_wrapped
|
||||||
|
|
||||||
original_user_agent_string = instance.execute_script(
|
original_user_agent_string = instance.execute_script(
|
||||||
"return navigator.userAgent"
|
"return navigator.userAgent"
|
||||||
)
|
)
|
||||||
|
@ -102,9 +104,7 @@ class ChromeOptions:
|
||||||
instance.__init__()
|
instance.__init__()
|
||||||
instance.add_argument("start-maximized")
|
instance.add_argument("start-maximized")
|
||||||
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
|
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||||
instance.add_experimental_option("useAutomationExtension", False)
|
instance.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
instance.add_argument("--disable-blink-features=AutomationControlled");
|
|
||||||
logger.info(f"starting undetected_chromedriver.ChromeOptions({args}, {kwargs})")
|
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
|
|
||||||
|
@ -120,12 +120,16 @@ class ChromeDriverManager(object):
|
||||||
|
|
||||||
_platform = sys.platform
|
_platform = sys.platform
|
||||||
|
|
||||||
if TARGET_VERSION: # user override using global
|
if TARGET_VERSION:
|
||||||
|
# use global if set
|
||||||
self.target_version = TARGET_VERSION
|
self.target_version = TARGET_VERSION
|
||||||
|
|
||||||
if target_version:
|
if target_version:
|
||||||
|
# use explicitly passed target
|
||||||
self.target_version = target_version # user override
|
self.target_version = target_version # user override
|
||||||
|
|
||||||
if not self.target_version:
|
if not self.target_version:
|
||||||
# if target_version still not set, fetch the current major release version
|
# none of the above (default) and just get current version
|
||||||
self.target_version = self.get_release_version_number().version[
|
self.target_version = self.get_release_version_number().version[
|
||||||
0
|
0
|
||||||
] # only major version int
|
] # only major version int
|
||||||
|
@ -173,8 +177,9 @@ class ChromeDriverManager(object):
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(self.executable_path):
|
if not os.path.exists(self.executable_path):
|
||||||
self.fetch_chromedriver()
|
self.fetch_chromedriver()
|
||||||
self.patch_binary()
|
if not self.__class__.installed:
|
||||||
self.__class__.installed = True
|
if self.patch_binary():
|
||||||
|
self.__class__.installed = True
|
||||||
|
|
||||||
if patch_selenium:
|
if patch_selenium:
|
||||||
self.patch_selenium_webdriver()
|
self.patch_selenium_webdriver()
|
||||||
|
@ -220,20 +225,15 @@ class ChromeDriverManager(object):
|
||||||
|
|
||||||
:return: False on failure, binary name on success
|
:return: False on failure, binary name on success
|
||||||
"""
|
"""
|
||||||
if self.__class__.installed:
|
linect = 0
|
||||||
return
|
with io.open(self.executable_path, "r+b") as fh:
|
||||||
|
for line in iter(lambda: fh.readline(), b""):
|
||||||
with io.open(self.executable_path, "r+b") as binary:
|
|
||||||
for line in iter(lambda: binary.readline(), b""):
|
|
||||||
if b"cdc_" in line:
|
if b"cdc_" in line:
|
||||||
binary.seek(-len(line), 1)
|
fh.seek(-len(line), 1)
|
||||||
line = b" var key = '$azc_abcdefghijklmnopQRstuv_';\n"
|
newline = re.sub(b"cdc_.{22}", b"xxx_undetectedchromeDRiver", line)
|
||||||
binary.write(line)
|
fh.write(newline)
|
||||||
__IS_PATCHED__ = 1
|
linect += 1
|
||||||
break
|
return linect
|
||||||
else:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def install(executable_path=None, target_version=None, *args, **kwargs):
|
def install(executable_path=None, target_version=None, *args, **kwargs):
|
||||||
|
|
Loading…
Reference in New Issue