41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
import chromedriver_autoinstaller
|
|
import undetected_chromedriver
|
|
from langchain_core.tools import tool
|
|
from selenium.webdriver.chromium.options import ChromiumOptions
|
|
|
|
from pers import GLOBALS
|
|
from pers.langchain.tools.tools import _print_func_call, PRINT_USAGE
|
|
|
|
MAX_RESULT_LENGTH_CHAR = 5000
|
|
|
|
|
|
def get_chrome_webdriver():
|
|
chromedriver_autoinstaller.install()
|
|
chrome_options = ChromiumOptions()
|
|
chrome_options.add_argument("--test-type")
|
|
chrome_options.add_argument('--ignore-certificate-errors')
|
|
chrome_options.add_argument('--disable-extensions')
|
|
chrome_options.add_argument('--disable-infobars')
|
|
chrome_options.add_argument("--incognito")
|
|
driver = undetected_chromedriver.Chrome(headless=True, options=chrome_options)
|
|
return driver
|
|
|
|
|
|
def render_webpage(url: str):
|
|
browser = get_chrome_webdriver()
|
|
browser.get(url)
|
|
html_source = browser.page_source
|
|
browser.close()
|
|
browser.quit()
|
|
return html_source
|
|
|
|
|
|
@tool('render_webpage')
|
|
def render_webpage_tool(url: str, reasoning: str):
|
|
"""Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool. Best for when you need to do complicated parsing of a webpage or are dealing with very long pages."""
|
|
if PRINT_USAGE:
|
|
_print_func_call('render_webpage', {'url': url, 'reasoning': reasoning})
|
|
html_source = render_webpage(url)
|
|
GLOBALS.DocumentManager.load_data(html_source)
|
|
return GLOBALS.DocumentManager.create_retrieval()
|