This commit is contained in:
ultrafunkamsterdam 2020-10-13 03:51:35 +02:00
parent 06217efc65
commit 4ce47e7f83
5 changed files with 962 additions and 962 deletions

258
.gitignore vendored
View File

@ -1,129 +1,129 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/

1348
LICENSE

File diff suppressed because it is too large Load Diff

252
README.md
View File

@ -1,126 +1,126 @@
# undetected_chromedriver
https://github.com/ultrafunkamsterdam/undetected-chromedriver
Optimized Selenium Chromedriver patch which does not trigger anti-bot services like Distill Network / Imperva / DataDome / Botprotect.io
Automatically downloads the driver binary and patches it.
* **Tested on version 75,76,77,78,79,80,81,83,84,85,86**
* **patching also works on MS Edge (chromium-based) webdriver binary**
## New ##
By default, the console log function is disabled to prevent certain detections.
Until a cleaner solution is found, use the following to manually enable it
```python
import undetected_chromedriver as uc
driver = uc.Chrome(enable_console_log=True)
```
## Installation ##
```
pip install undetected-chromedriver
```
## Usage ##
To prevent unnecessary hair-pulling and issue-rasing, please mind the **[important note at the end of this document](#important-note) .**
<br>
#### the easy way (recommended) ####
```python
import undetected_chromedriver as uc
driver = uc.Chrome()
driver.get('https://distilnetworks.com')
# To target specific version
import undetected_chromedriver as uc
uc.TARGET_VERSION = 85
driver = uc.Chrome()
```
#### patches selenium module ####
Needs to be done before importing from selenium package
```python
import undetected_chromedriver as uc
uc.install()
from selenium.webdriver import Chrome
driver = Chrome()
driver.get('https://distilnetworks.com')
````
#### the customized way ####
```python
import undetected_chromedriver as uc
#specify chromedriver version to download and patch
#this did not work correctly until 1.2.1
uc.TARGET_VERSION = 78
# or specify your own chromedriver binary to patch
undetected_chromedriver.install(
executable_path='c:/users/user1/chromedriver.exe',
)
from selenium.webdriver import Chrome, ChromeOptions
opts = ChromeOptions()
opts.add_argument(f'--proxy-server=socks5://127.0.0.1:9050')
driver = Chrome(options=opts)
driver.get('https://distilnetworks.com')
```
### datadome.co ####
These guys have actually a powerful product, and a link to this repo, which makes me wanna test their product.
Make sure you use a "clean" ip for this one.
```
# STANDARD chromedriver
from selenium import webdriver
chrome = webdriver.Chrome()
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
chrome.save_screenshot('datadome_regular_webdriver.png')
True
# after this detectioon, you'll keep being nagged with puzzles, even if you use another machine from the same same network (they use a very tight but effective regime, possibly combination of fingerprinting and ip-flagging).
# UNDETECTED chromedriver (headless,even)
import undetected_chromedriver as uc
options = uc.ChromeOptions()
options.headless=True
options.add_argument('--headless')
chrome = uc.Chrome(options=options)
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
chrome.save_screenshot('datadome_undetected_webddriver.png')
```
**Check both saved screenhots [here](https://imgur.com/a/fEmqadP)**
## important note ##
the default blank page on start plays a BIG role in the anti-detection workings of the module. You will only become undetectable from the moment you use driver.get(url) to navigate to some url (and next and next and next). This automatically means that if you enter a url in the browser screen by hand right after launch, you are NOT protected! New Tabs: same story. If you really need multi-tabs, then open the tab with the blank page (hint: url is `data:,` including comma, and yes, driver accepts it) and do your thing as usual. If you follow these "rules" (actually its default behaviour), then you will have a great time for now.
TL;DR and for the visual-minded:
```python
In [1]: import undetected_chromedriver as uc
In [2]: driver = uc.Chrome()
In [3]: driver.execute_script('return navigator.webdriver')
Out[3]: True # Detectable
In [4]: driver.get('https://distilnetworks.com') # starts magic
In [4]: driver.execute_script('return navigator.webdriver')
In [5]: None # Undetectable!
```
## end important note ##
# undetected_chromedriver
https://github.com/ultrafunkamsterdam/undetected-chromedriver
Optimized Selenium Chromedriver patch which does not trigger anti-bot services like Distill Network / Imperva / DataDome / Botprotect.io
Automatically downloads the driver binary and patches it.
* **Tested on version 75,76,77,78,79,80,81,83,84,85,86**
* **patching also works on MS Edge (chromium-based) webdriver binary**
## New ##
By default, the console log function is disabled to prevent certain detections.
Until a cleaner solution is found, use the following to manually enable it
```python
import undetected_chromedriver as uc
driver = uc.Chrome(enable_console_log=True)
```
## Installation ##
```
pip install undetected-chromedriver
```
## Usage ##
To prevent unnecessary hair-pulling and issue-rasing, please mind the **[important note at the end of this document](#important-note) .**
<br>
#### the easy way (recommended) ####
```python
import undetected_chromedriver as uc
driver = uc.Chrome()
driver.get('https://distilnetworks.com')
# To target specific version
import undetected_chromedriver as uc
uc.TARGET_VERSION = 85
driver = uc.Chrome()
```
#### patches selenium module ####
Needs to be done before importing from selenium package
```python
import undetected_chromedriver as uc
uc.install()
from selenium.webdriver import Chrome
driver = Chrome()
driver.get('https://distilnetworks.com')
````
#### the customized way ####
```python
import undetected_chromedriver as uc
#specify chromedriver version to download and patch
#this did not work correctly until 1.2.1
uc.TARGET_VERSION = 78
# or specify your own chromedriver binary to patch
undetected_chromedriver.install(
executable_path='c:/users/user1/chromedriver.exe',
)
from selenium.webdriver import Chrome, ChromeOptions
opts = ChromeOptions()
opts.add_argument(f'--proxy-server=socks5://127.0.0.1:9050')
driver = Chrome(options=opts)
driver.get('https://distilnetworks.com')
```
### datadome.co ####
These guys have actually a powerful product, and a link to this repo, which makes me wanna test their product.
Make sure you use a "clean" ip for this one.
```
# STANDARD chromedriver
from selenium import webdriver
chrome = webdriver.Chrome()
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
chrome.save_screenshot('datadome_regular_webdriver.png')
True
# after this detectioon, you'll keep being nagged with puzzles, even if you use another machine from the same same network (they use a very tight but effective regime, possibly combination of fingerprinting and ip-flagging).
# UNDETECTED chromedriver (headless,even)
import undetected_chromedriver as uc
options = uc.ChromeOptions()
options.headless=True
options.add_argument('--headless')
chrome = uc.Chrome(options=options)
chrome.get('https://datadome.co/customers-stories/toppreise-ends-web-scraping-and-content-theft-with-datadome/')
chrome.save_screenshot('datadome_undetected_webddriver.png')
```
**Check both saved screenhots [here](https://imgur.com/a/fEmqadP)**
## important note ##
the default blank page on start plays a BIG role in the anti-detection workings of the module. You will only become undetectable from the moment you use driver.get(url) to navigate to some url (and next and next and next). This automatically means that if you enter a url in the browser screen by hand right after launch, you are NOT protected! New Tabs: same story. If you really need multi-tabs, then open the tab with the blank page (hint: url is `data:,` including comma, and yes, driver accepts it) and do your thing as usual. If you follow these "rules" (actually its default behaviour), then you will have a great time for now.
TL;DR and for the visual-minded:
```python
In [1]: import undetected_chromedriver as uc
In [2]: driver = uc.Chrome()
In [3]: driver.execute_script('return navigator.webdriver')
Out[3]: True # Detectable
In [4]: driver.get('https://distilnetworks.com') # starts magic
In [4]: driver.execute_script('return navigator.webdriver')
In [5]: None # Undetectable!
```
## end important note ##

View File

@ -16,7 +16,7 @@ from setuptools import setup
setup(
name="undetected-chromedriver",
version="1.4.2",
version="1.5.0",
packages=["undetected_chromedriver"],
install_requires=["selenium",],
url="https://github.com/ultrafunkamsterdam/undetected_chromedriver",

View File

@ -19,6 +19,7 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
import io
import logging
import os
import re
import sys
import zipfile
from distutils.version import LooseVersion
@ -30,7 +31,6 @@ from selenium.webdriver import ChromeOptions as _ChromeOptions
logger = logging.getLogger(__name__)
__IS_PATCHED__ = 0
TARGET_VERSION = 0
@ -49,15 +49,15 @@ class Chrome:
kwargs["options"] = ChromeOptions()
instance = object.__new__(_Chrome)
instance.__init__(*args, **kwargs)
instance._orig_get = instance.get
def _get_wrapped(*args, **kwargs):
if instance.execute_script("return navigator.webdriver"):
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{"source": """
{
"source": """
Object.defineProperty(window, 'navigator', {
value: new Proxy(navigator, {
@ -70,16 +70,18 @@ class Chrome:
: target[key]
})
});
(function () {
}) ();
""" + ("console.log = console.dir = console.error = function(){};" if not enable_console_log else '' ) }
"""
+ (
"console.log = console.dir = console.error = function(){};"
if not enable_console_log
else ""
)
},
)
return instance._orig_get(*args, **kwargs)
instance.get = _get_wrapped
original_user_agent_string = instance.execute_script(
"return navigator.userAgent"
)
@ -102,9 +104,7 @@ class ChromeOptions:
instance.__init__()
instance.add_argument("start-maximized")
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
instance.add_experimental_option("useAutomationExtension", False)
instance.add_argument("--disable-blink-features=AutomationControlled");
logger.info(f"starting undetected_chromedriver.ChromeOptions({args}, {kwargs})")
instance.add_argument("--disable-blink-features=AutomationControlled")
return instance
@ -120,12 +120,16 @@ class ChromeDriverManager(object):
_platform = sys.platform
if TARGET_VERSION: # user override using global
if TARGET_VERSION:
# use global if set
self.target_version = TARGET_VERSION
if target_version:
# use explicitly passed target
self.target_version = target_version # user override
if not self.target_version:
# if target_version still not set, fetch the current major release version
# none of the above (default) and just get current version
self.target_version = self.get_release_version_number().version[
0
] # only major version int
@ -173,8 +177,9 @@ class ChromeDriverManager(object):
"""
if not os.path.exists(self.executable_path):
self.fetch_chromedriver()
self.patch_binary()
self.__class__.installed = True
if not self.__class__.installed:
if self.patch_binary():
self.__class__.installed = True
if patch_selenium:
self.patch_selenium_webdriver()
@ -220,20 +225,15 @@ class ChromeDriverManager(object):
:return: False on failure, binary name on success
"""
if self.__class__.installed:
return
with io.open(self.executable_path, "r+b") as binary:
for line in iter(lambda: binary.readline(), b""):
linect = 0
with io.open(self.executable_path, "r+b") as fh:
for line in iter(lambda: fh.readline(), b""):
if b"cdc_" in line:
binary.seek(-len(line), 1)
line = b" var key = '$azc_abcdefghijklmnopQRstuv_';\n"
binary.write(line)
__IS_PATCHED__ = 1
break
else:
return False
return True
fh.seek(-len(line), 1)
newline = re.sub(b"cdc_.{22}", b"xxx_undetectedchromeDRiver", line)
fh.write(newline)
linect += 1
return linect
def install(executable_path=None, target_version=None, *args, **kwargs):