diff --git a/undetected_chromedriver/__init__.py b/undetected_chromedriver/__init__.py index 661cc0f..7dc335a 100644 --- a/undetected_chromedriver/__init__.py +++ b/undetected_chromedriver/__init__.py @@ -30,7 +30,7 @@ from urllib.request import urlopen, urlretrieve from selenium.webdriver import Chrome as _Chrome, ChromeOptions as _ChromeOptions logger = logging.getLogger(__name__) -__version__ = "3.0.0" +__version__ = "3.0.1" TARGET_VERSION = 0 diff --git a/undetected_chromedriver/cdp.py b/undetected_chromedriver/cdp.py index 7def6ca..bf9b7e8 100644 --- a/undetected_chromedriver/cdp.py +++ b/undetected_chromedriver/cdp.py @@ -11,41 +11,40 @@ import websockets log = logging.getLogger(__name__) -class CDPObjectBase(dict): - def __init__(self, *a, **kw): - super().__init__(**kw) - for k in self: - if isinstance(self[k], Mapping): - self[k] = self.__class__(self[k]) # noqa - elif isinstance(self[k], Sequence) and not isinstance( - self[k], (str, bytes) - ): - self[k] = self[k].__class__(self.__class__(i) for i in self[k]) - else: - self[k] = self[k] +class CDPObject(dict): + def __init__(self, *a, **k): + super().__init__(*a, **k) + self.__dict__ = self + for k in self.__dict__: + if isinstance(self.__dict__[k], dict): + self.__dict__[k] = CDPObject(self.__dict__[k]) + elif isinstance(self.__dict__[k], list): + for i in range(len(self.__dict__[k])): + if isinstance(self.__dict__[k][i], dict): + self.__dict__[k][i] = CDPObject(self) def __repr__(self): tpl = f"{self.__class__.__name__}(\n\t{{}}\n\t)" return tpl.format("\n ".join(f"{k} = {v}" for k, v in self.items())) -class PageElement(CDPObjectBase): +class PageElement(CDPObject): pass class CDP: log = logging.getLogger("CDP") - endpoints = { + endpoints = CDPObject({ "json": "/json", "protocol": "/json/protocol", "list": "/json/list", "new": "/json/new?{url}", "activate": "/json/activate/{id}", "close": "/json/close/{id}", - } + }) - def __init__(self, options: "ChromeOptions"): + def __init__(self, options: "ChromeOptions"): # noqa self.server_addr = "http://{0}:{1}".format(*options.debugger_address.split(":")) self._reqid = 0 @@ -53,15 +52,19 @@ class CDP: self._last_resp = None self._last_json = None - resp = self.get(self.endpoints["json"]) + resp = self.get(self.endpoints.json) # noqa self.sessionId = resp[0]["id"] self.wsurl = resp[0]["webSocketDebuggerUrl"] - def tab_activate(self, id): + def tab_activate(self, id=None): + if not id: + active_tab = self.tab_list()[0] + id = active_tab.id # noqa + self.wsurl = active_tab.webSocketDebuggerUrl # noqa return self.post(self.endpoints["activate"].format(id=id)) def tab_list(self): - retval = self.post(self.endpoints["list"]) + retval = self.get(self.endpoints["list"]) return [PageElement(o) for o in retval] def tab_new(self, url): @@ -92,8 +95,10 @@ class CDP: else: return self._last_json - def post(self, uri): - resp = self._session.post(self.server_addr + uri) + def post(self, uri, data: dict = None): + if not data: + data = {} + resp = self._session.post(self.server_addr + uri, json=data) try: self._last_resp = resp self._last_json = resp.json() diff --git a/undetected_chromedriver/patcher.py b/undetected_chromedriver/patcher.py index 621e07f..c86cdb6 100644 --- a/undetected_chromedriver/patcher.py +++ b/undetected_chromedriver/patcher.py @@ -71,25 +71,25 @@ class Patcher(object): self.version_main = version_main self.version_full = None - @classmethod - def auto(cls, executable_path=None, force=False): + + def auto(self, executable_path=None, force=False, version_main=None): """ - - Args: - force: - - Returns: - """ - i = cls(executable_path, force=force) + if executable_path: + self.executable_path = executable_path + if version_main: + self.version_main = version_main + if force is True: + self.force = force + try: - os.unlink(i.executable_path) + os.unlink(self.executable_path) except PermissionError: - if i.force: - cls.force_kill_instances(i.executable_path) - return i.auto(force=False) + if self.force: + self.force_kill_instances(self.executable_path) + return self.auto(force=not self.force) try: - if i.is_binary_patched(): + if self.is_binary_patched(): # assumes already running AND patched return True except PermissionError: @@ -98,12 +98,12 @@ class Patcher(object): except FileNotFoundError: pass - release = i.fetch_release_number() - i.version_main = release.version[0] - i.version_full = release - i.unzip_package(i.fetch_package()) - i.patch() - return i + release = self.fetch_release_number() + self.version_main = release.version[0] + self.version_full = release + self.unzip_package(self.fetch_package()) + # i.patch() + return self.patch() def patch(self): self.patch_exe() diff --git a/undetected_chromedriver/v2.py b/undetected_chromedriver/v2.py index 44450cb..78a74fd 100644 --- a/undetected_chromedriver/v2.py +++ b/undetected_chromedriver/v2.py @@ -80,6 +80,8 @@ class Chrome(selenium.webdriver.Chrome): log_level=0, headless=False, delay=5, + version_main=None, + patcher_force_close=False, ): """ Creates a new instance of the chrome driver. @@ -104,7 +106,7 @@ class Chrome(selenium.webdriver.Chrome): this enables the handling of wire messages when enabled, you can subscribe to CDP events by using: - driver.on_cdp_event("Network.dataReceived", yourcallback) + driver.add_cdp_listener("Network.dataReceived", yourcallback) # yourcallback is an callable which accepts exactly 1 dict as parameter service_args: list of str, optional, default: None @@ -135,9 +137,18 @@ class Chrome(selenium.webdriver.Chrome): (`with` statement) to bypass, for example CloudFlare. 5 seconds is a foolproof value. - """ + version_main: int, optional, default: None (=auto) + if you, for god knows whatever reason, use + an older version of Chrome. You can specify it's full rounded version number + here. Example: 87 for all versions of 87 - patcher = Patcher(executable_path=executable_path) + patcher_force_close: bool, optional, default: False + instructs the patcher to do whatever it can to access the chromedriver binary + if the file is locked, it will force shutdown all instances. + setting it is not recommended, unless you know the implications and think + you might need it. + """ + patcher = Patcher(executable_path=executable_path, force=patcher_force_close, version_main=version_main) patcher.auto() if not options: @@ -237,7 +248,10 @@ class Chrome(selenium.webdriver.Chrome): options.headless = True options.add_argument("--window-size=1920,1080") options.add_argument("--start-maximized") - + options.add_argument("--no-sandbox") + # fixes "could not connect to chrome" error when running + # on linux using privileged user like root (which i don't recommend) + options.add_argument( "--log-level=%d" % log_level or divmod(logging.getLogger().getEffectiveLevel(), 10)[0] @@ -255,7 +269,7 @@ class Chrome(selenium.webdriver.Chrome): # fixing the restore-tabs-nag config["profile"]["exit_type"] = None fs.seek(0, 0) - fs.write(json.dumps(config, indent=4)) + json.dump(config, fs) logger.debug("fixed exit_type flag") except Exception as e: logger.debug("did not find a bad exit_type flag ") @@ -270,7 +284,6 @@ class Chrome(selenium.webdriver.Chrome): stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - #creationflags=subprocess.CREATE_NEW_PROCESS_GROUP, close_fds=True, ) @@ -283,7 +296,7 @@ class Chrome(selenium.webdriver.Chrome): service_log_path=service_log_path, keep_alive=keep_alive, ) - + # intentional # self.webdriver = selenium.webdriver.chrome.webdriver.WebDriver( # executable_path=patcher.executable_path, # port=port, @@ -306,6 +319,7 @@ class Chrome(selenium.webdriver.Chrome): reactor.start() self.reactor = reactor + if options.headless: self._configure_headless() @@ -493,6 +507,28 @@ class Chrome(selenium.webdriver.Chrome): self.reactor.add_event_handler(event_name, callback) return self.reactor.handlers return False + + def clear_cdp_listeners(self): + if self.reactor and isinstance(self.reactor, Reactor): + self.reactor.handlers.clear() + + def tab_new(self, url:str): + """ + this opens a url in a new tab. + apparently, that passes all tests directly! + + Parameters + ---------- + url + + Returns + ------- + + """ + if not hasattr(self, 'cdp'): + from .cdp import CDP + self.cdp = CDP(self.options) + self.cdp.tab_new(url) def reconnect(self): try: