server-personification/pers/langchain/tools/browser.py

41 lines
1.4 KiB
Python

import chromedriver_autoinstaller
import undetected_chromedriver
from langchain_core.tools import tool
from selenium.webdriver.chromium.options import ChromiumOptions
from pers import GLOBALS
from pers.langchain.tools.tools import _print_func_call, PRINT_USAGE
MAX_RESULT_LENGTH_CHAR = 5000
def get_chrome_webdriver():
chromedriver_autoinstaller.install()
chrome_options = ChromiumOptions()
chrome_options.add_argument("--test-type")
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('--disable-infobars')
chrome_options.add_argument("--incognito")
driver = undetected_chromedriver.Chrome(headless=True, options=chrome_options)
return driver
def render_webpage(url: str):
browser = get_chrome_webdriver()
browser.get(url)
html_source = browser.page_source
browser.close()
browser.quit()
return html_source
@tool('render_webpage')
def render_webpage_tool(url: str, reasoning: str):
"""Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool. Best for when you need to do complicated parsing of a webpage or are dealing with very long pages."""
if PRINT_USAGE:
_print_func_call('render_webpage', {'url': url, 'reasoning': reasoning})
html_source = render_webpage(url)
GLOBALS.DocumentManager.load_data(html_source)
return GLOBALS.DocumentManager.create_retrieval()