fix undetected-chromedriver issue, bump up read_webpage character limit, adjust tool descriptions

This commit is contained in:
Cyberes 2024-03-07 18:49:41 -07:00
parent f4848423b8
commit 57d47eb7fe
3 changed files with 8 additions and 6 deletions

View File

@ -15,7 +15,7 @@ def get_chrome_webdriver():
chrome_options.add_argument("--test-type")
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('disable-infobars')
chrome_options.add_argument('--disable-infobars')
chrome_options.add_argument("--incognito")
driver = undetected_chromedriver.Chrome(headless=True, options=chrome_options)
return driver
@ -32,7 +32,7 @@ def render_webpage(url: str):
@tool('render_webpage')
def render_webpage_tool(url: str, reasoning: str):
"""Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool."""
"""Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool. Best for when you need to do complicated parsing of a webpage or are dealing with very long pages."""
if PRINT_USAGE:
_print_func_call('render_webpage', {'url': url, 'reasoning': reasoning})
html_source = render_webpage(url)

View File

@ -30,7 +30,7 @@ PUBLISH DATE: {publish_date}
TOP_IMAGE_URL: {top_image}
"""
MAX_RESULT_LENGTH_CHAR = 1000 * 4 # roughly 1,000 tokens
MAX_RESULT_LENGTH_CHAR = 1000 * 6
def page_result(text: str, cursor: int, max_length: int) -> str:
@ -121,7 +121,9 @@ class ReaderToolInput(BaseModel):
@tool(args_schema=ReaderToolInput)
def read_webpage(url: str, reasoning: str, include_body: bool = True, cursor: int = 0):
"""Fetch a webpage's text content. This function may not correctly parse complicated webpages, so use render_webpage if targeting specific HTML elements or expecting a complicated page."""
"""Fetch a webpage's text content.
This tool trunucates the text content if it is longer than the context limit. You will see a line like `PAGE WAS TRUNCATED. TO CONTINUE READING, USE CURSOR=n.` when this happens, where `CURSOR=n` is the starting position for the next page. To continue reading, call this tool with the `cursor` argument with where you want to begin.
This function may not correctly parse complicated webpages, so use render_webpage if targeting specific HTML elements or expecting a complicated page. Best for when you need to simply read the page."""
if PRINT_USAGE:
_print_func_call('read_webpage', {'url': url, 'reasoning': reasoning})

View File

@ -8,10 +8,10 @@ newspaper3k
playwright
beautifulsoup4==4.12.3
chromedriver-autoinstaller==0.6.4
undetected-chromedriver==3.5.4
undetected-chromedriver==3.5.5
redis==5.0.1
async-timeout==4.0.3
pyyaml==6.0.1
py-cpuinfo==9.0.0
psutil==5.9.8
chromadb==0.4.22
chromadb==0.4.22