2023-02-04 14:02:46 -07:00
|
|
|
from typing import List
|
|
|
|
|
2022-11-28 15:40:41 -07:00
|
|
|
from selenium.webdriver.common.by import By
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
import selenium.webdriver.remote.webelement
|
|
|
|
|
|
|
|
|
|
|
|
class WebElement(selenium.webdriver.remote.webelement.WebElement):
|
2022-11-28 15:40:41 -07:00
|
|
|
def click_safe(self):
|
|
|
|
super().click()
|
|
|
|
self._parent.reconnect(0.1)
|
|
|
|
|
|
|
|
def children(
|
|
|
|
self, tag=None, recursive=False
|
2022-12-28 15:47:58 -07:00
|
|
|
) -> List[selenium.webdriver.remote.webelement.WebElement]:
|
2022-11-28 15:40:41 -07:00
|
|
|
"""
|
|
|
|
returns direct child elements of current element
|
|
|
|
:param tag: str, if supplied, returns <tag> nodes only
|
|
|
|
"""
|
|
|
|
script = "return [... arguments[0].children]"
|
|
|
|
if tag:
|
|
|
|
script += ".filter( node => node.tagName === '%s')" % tag.upper()
|
|
|
|
if recursive:
|
2022-12-14 14:22:22 -07:00
|
|
|
return list(_recursive_children(self, tag))
|
|
|
|
return list(self._parent.execute_script(script, self))
|
2022-11-28 15:40:41 -07:00
|
|
|
|
|
|
|
|
|
|
|
class UCWebElement(WebElement):
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
"""
|
|
|
|
Custom WebElement class which makes it easier to view elements when
|
|
|
|
working in an interactive environment.
|
|
|
|
|
|
|
|
standard webelement repr:
|
|
|
|
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
|
|
|
|
|
|
|
|
using this WebElement class:
|
|
|
|
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
|
|
|
|
|
|
|
|
"""
|
3.15
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:42:41 -06:00
|
|
|
|
2022-11-28 15:40:41 -07:00
|
|
|
def __init__(self, parent, id_):
|
|
|
|
super().__init__(parent, id_)
|
|
|
|
self._attrs = None
|
|
|
|
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
@property
|
|
|
|
def attrs(self):
|
2022-11-28 15:40:41 -07:00
|
|
|
if not self._attrs:
|
Patcher:
changed the way how patcher works (for those using multiple sessions/processes).
when not specifying a executable_path (the default, and recommended!), the filename
gets randomized to <somehex>_chromedriver[.exe]. this should fix the issue for multiprocessing
(although Chrome/driver itself has restrictions in this as well, see it using processhacker).
As i told before, webdriver is a purely io-based operation which only sends and pulls data. multiprocessing/threading isn't going to help much. You'd better use asyncio.)
find_chrome_executable:
added google-chrome-stable to the list, as some distro's have this name.
advanced_webelements: bool, optional, default: False
makes it easier to recognize elements like you know them from html/browser inspection, especially when working in an interactive environment
default webelement repr:
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
advanced webelement repr
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
note: when retrieving large amounts of elements ( example: find_elements_by_tag("*") ) and **print** them, it does take a little more time for all the repr's to fetch
Chrome() parameters
driver_executable_path=None
( = executable_path )
if you really need to specify your own chromedriver binary.
(don't log issues when you are not using the default. the downloading per session happens for a reason. remember this is a detection-focussed fork)
browser_executable_path=None
( = browser binary path )
to specify your browser in case you use exotic locations instead of the more default install folders
advanced_elements=False
if set to True, webelements get a nicer REPR showing. this is very convenient when working
interactively (like ipython for example).
<WebElement(<a class="mobile-show-inline-block mc-update-infos init-ok" href="#" id="main-cat-switcher-mobile">)>
instead of
<selenium.webdriver.remote.webelement.WebElement (session="85ff0f671512fa535630e71ee951b1f2", element="6357cb55-92c3-4c0f-9416-b174f9c1b8c4")>
2022-03-13 16:05:22 -06:00
|
|
|
self._attrs = self._parent.execute_script(
|
|
|
|
"""
|
|
|
|
var items = {};
|
|
|
|
for (index = 0; index < arguments[0].attributes.length; ++index)
|
|
|
|
{
|
|
|
|
items[arguments[0].attributes[index].name] = arguments[0].attributes[index].value
|
|
|
|
};
|
|
|
|
return items;
|
|
|
|
""",
|
|
|
|
self,
|
|
|
|
)
|
|
|
|
return self._attrs
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
strattrs = " ".join([f'{k}="{v}"' for k, v in self.attrs.items()])
|
|
|
|
if strattrs:
|
|
|
|
strattrs = " " + strattrs
|
2022-03-13 17:22:13 -06:00
|
|
|
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
|
2022-11-28 15:40:41 -07:00
|
|
|
|
|
|
|
|
|
|
|
def _recursive_children(element, tag: str = None, _results=None):
|
|
|
|
"""
|
|
|
|
returns all children of <element> recursively
|
|
|
|
|
|
|
|
:param element: `WebElement` object.
|
|
|
|
find children below this <element>
|
|
|
|
|
|
|
|
:param tag: str = None.
|
|
|
|
if provided, return only <tag> elements. example: 'a', or 'img'
|
|
|
|
:param _results: do not use!
|
|
|
|
"""
|
|
|
|
results = _results or set()
|
|
|
|
for element in element.children():
|
|
|
|
if tag:
|
|
|
|
if element.tag_name == tag:
|
|
|
|
results.add(element)
|
|
|
|
else:
|
|
|
|
results.add(element)
|
|
|
|
results |= _recursive_children(element, tag, results)
|
2022-12-14 14:22:22 -07:00
|
|
|
return results
|