diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 06359a6..2f7b6f4 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -17,11 +17,12 @@ by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) from __future__ import annotations -__version__ = "3.5.0" +__version__ = "3.5.5" import json import logging import os +import pathlib import re import shutil import subprocess @@ -372,6 +373,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): browser_executable_path or find_chrome_executable() ) + if not options.binary_location or not \ + pathlib.Path(options.binary_location).exists(): + raise FileNotFoundError( + "\n---------------------\n" + "Could not determine browser executable." + "\n---------------------\n" + "Make sure your browser is installed in the default location (path).\n" + "If you are sure about the browser executable, you can specify it using\n" + "the `browser_executable_path='{}` parameter.\n\n" + .format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe") + ) + self._delay = 3 self.user_data_dir = user_data_dir @@ -382,7 +395,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): if no_sandbox: options.arguments.extend(["--no-sandbox", "--test-type"]) - if headless or options.headless: + if headless or getattr(options, 'headless', None): #workaround until a better checking is found try: if self.patcher.version_main < 108: @@ -472,7 +485,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): else: self._web_element_cls = WebElement - if options.headless: + if headless or getattr(options, 'headless', None): self._configure_headless() def _configure_headless(self): @@ -877,8 +890,6 @@ def find_chrome_executable(): if item is not None: for subitem in ( "Google/Chrome/Application", - "Google/Chrome Beta/Application", - "Google/Chrome Canary/Application", ): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index d083dc3..e72c022 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -3,9 +3,11 @@ from distutils.version import LooseVersion import io +import json import logging import os import pathlib +import platform import random import re import shutil @@ -24,21 +26,9 @@ IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2")) class Patcher(object): lock = Lock() - url_repo = "https://chromedriver.storage.googleapis.com" - zip_name = "chromedriver_%s.zip" exe_name = "chromedriver%s" platform = sys.platform - if platform.endswith("win32"): - zip_name %= "win32" - exe_name %= ".exe" - if platform.endswith(("linux", "linux2")): - zip_name %= "linux64" - exe_name %= "" - if platform.endswith("darwin"): - zip_name %= "mac64" - exe_name %= "" - if platform.endswith("win32"): d = "~/appdata/roaming/undetected_chromedriver" elif "LAMBDA_TASK_ROOT" in os.environ: @@ -72,6 +62,10 @@ class Patcher(object): prefix = "undetected" self.user_multi_procs = user_multi_procs + self.is_old_chromedriver = version_main and version_main <= 114 + # Needs to be called before self.exe_name is accessed + self._set_platform_name() + if not os.path.exists(self.data_path): os.makedirs(self.data_path, exist_ok=True) @@ -97,9 +91,33 @@ class Patcher(object): self._custom_exe_path = True self.executable_path = executable_path + # Set the correct repository to download the Chromedriver from + if self.is_old_chromedriver: + self.url_repo = "https://chromedriver.storage.googleapis.com" + else: + self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing" + self.version_main = version_main self.version_full = None + def _set_platform_name(self): + """ + Set the platform and exe name based on the platform undetected_chromedriver is running on + in order to download the correct chromedriver. + """ + if self.platform.endswith("win32"): + self.platform_name = "win32" + self.exe_name %= ".exe" + if self.platform.endswith(("linux", "linux2")): + self.platform_name = "linux64" + self.exe_name %= "" + if self.platform.endswith("darwin"): + if self.is_old_chromedriver: + self.platform_name = "mac64" + else: + self.platform_name = "mac-x64" + self.exe_name %= "" + def auto(self, executable_path=None, force=False, version_main=None, _=None): """ @@ -111,16 +129,15 @@ class Patcher(object): Returns: """ - # if self.user_multi_procs and \ - # self.user_multi_procs != -1: - # # -1 being a skip value used later in this block - # p = pathlib.Path(self.data_path) - with Lock(): - files = list(p.rglob("*chromedriver*?")) - for file in files: - if self.is_binary_patched(file): - self.executable_path = str(file) + if self.user_multi_procs: + with Lock(): + files = list(p.rglob("*chromedriver*")) + most_recent = max(files, key=lambda f: f.stat().st_mtime) + files.remove(most_recent) + list(map(lambda f: f.unlink(), files)) + if self.is_binary_patched(most_recent): + self.executable_path = str(most_recent) return True if executable_path: @@ -202,7 +219,11 @@ class Patcher(object): def cleanup_unused_files(self): p = pathlib.Path(self.data_path) items = list(p.glob("*undetected*")) - print(items) + for item in items: + try: + item.unlink() + except: + pass def patch(self): self.patch_exe() @@ -214,12 +235,32 @@ class Patcher(object): :return: version string :rtype: LooseVersion """ - path = "/latest_release" - if self.version_main: - path += f"_{self.version_main}" - path = path.upper() + # Endpoint for old versions of Chromedriver (114 and below) + if self.is_old_chromedriver: + path = f"/latest_release_{self.version_main}" + path = path.upper() + logger.debug("getting release number from %s" % path) + return LooseVersion(urlopen(self.url_repo + path).read().decode()) + + # Endpoint for new versions of Chromedriver (115+) + if not self.version_main: + # Fetch the latest version + path = "/last-known-good-versions-with-downloads.json" + logger.debug("getting release number from %s" % path) + with urlopen(self.url_repo + path) as conn: + response = conn.read().decode() + + last_versions = json.loads(response) + return LooseVersion(last_versions["channels"]["Stable"]["version"]) + + # Fetch the latest minor version of the major version provided + path = "/latest-versions-per-milestone-with-downloads.json" logger.debug("getting release number from %s" % path) - return LooseVersion(urlopen(self.url_repo + path).read().decode()) + with urlopen(self.url_repo + path) as conn: + response = conn.read().decode() + + major_versions = json.loads(response) + return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"]) def parse_exe_version(self): with io.open(self.executable_path, "rb") as f: @@ -234,10 +275,16 @@ class Patcher(object): :return: path to downloaded file """ - u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name) - logger.debug("downloading from %s" % u) - # return urlretrieve(u, filename=self.data_path)[0] - return urlretrieve(u)[0] + zip_name = f"chromedriver_{self.platform_name}.zip" + if self.is_old_chromedriver: + download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name) + else: + zip_name = zip_name.replace("_", "-", 1) + download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s" + download_url %= (self.version_full.vstring, self.platform_name, zip_name) + + logger.debug("downloading from %s" % download_url) + return urlretrieve(download_url)[0] def unzip_package(self, fp): """ @@ -245,6 +292,12 @@ class Patcher(object): :return: path to unpacked executable """ + exe_path = self.exe_name + if not self.is_old_chromedriver: + # The new chromedriver unzips into its own folder + zip_name = f"chromedriver-{self.platform_name}" + exe_path = os.path.join(zip_name, self.exe_name) + logger.debug("unzipping %s" % fp) try: os.unlink(self.zip_path) @@ -253,10 +306,10 @@ class Patcher(object): os.makedirs(self.zip_path, mode=0o755, exist_ok=True) with zipfile.ZipFile(fp, mode="r") as zf: - zf.extract(self.exe_name, self.zip_path) - os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path) + zf.extractall(self.zip_path) + os.rename(os.path.join(self.zip_path, exe_path), self.executable_path) os.remove(fp) - os.rmdir(self.zip_path) + shutil.rmtree(self.zip_path) os.chmod(self.executable_path, 0o755) return self.executable_path