fix undetected-chromedriver issue, bump up read_webpage character limit, adjust tool descriptions
This commit is contained in:
parent
f4848423b8
commit
57d47eb7fe
|
@ -15,7 +15,7 @@ def get_chrome_webdriver():
|
|||
chrome_options.add_argument("--test-type")
|
||||
chrome_options.add_argument('--ignore-certificate-errors')
|
||||
chrome_options.add_argument('--disable-extensions')
|
||||
chrome_options.add_argument('disable-infobars')
|
||||
chrome_options.add_argument('--disable-infobars')
|
||||
chrome_options.add_argument("--incognito")
|
||||
driver = undetected_chromedriver.Chrome(headless=True, options=chrome_options)
|
||||
return driver
|
||||
|
@ -32,7 +32,7 @@ def render_webpage(url: str):
|
|||
|
||||
@tool('render_webpage')
|
||||
def render_webpage_tool(url: str, reasoning: str):
|
||||
"""Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool."""
|
||||
"""Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool. Best for when you need to do complicated parsing of a webpage or are dealing with very long pages."""
|
||||
if PRINT_USAGE:
|
||||
_print_func_call('render_webpage', {'url': url, 'reasoning': reasoning})
|
||||
html_source = render_webpage(url)
|
||||
|
|
|
@ -30,7 +30,7 @@ PUBLISH DATE: {publish_date}
|
|||
TOP_IMAGE_URL: {top_image}
|
||||
"""
|
||||
|
||||
MAX_RESULT_LENGTH_CHAR = 1000 * 4 # roughly 1,000 tokens
|
||||
MAX_RESULT_LENGTH_CHAR = 1000 * 6
|
||||
|
||||
|
||||
def page_result(text: str, cursor: int, max_length: int) -> str:
|
||||
|
@ -121,7 +121,9 @@ class ReaderToolInput(BaseModel):
|
|||
|
||||
@tool(args_schema=ReaderToolInput)
|
||||
def read_webpage(url: str, reasoning: str, include_body: bool = True, cursor: int = 0):
|
||||
"""Fetch a webpage's text content. This function may not correctly parse complicated webpages, so use render_webpage if targeting specific HTML elements or expecting a complicated page."""
|
||||
"""Fetch a webpage's text content.
|
||||
This tool trunucates the text content if it is longer than the context limit. You will see a line like `PAGE WAS TRUNCATED. TO CONTINUE READING, USE CURSOR=n.` when this happens, where `CURSOR=n` is the starting position for the next page. To continue reading, call this tool with the `cursor` argument with where you want to begin.
|
||||
This function may not correctly parse complicated webpages, so use render_webpage if targeting specific HTML elements or expecting a complicated page. Best for when you need to simply read the page."""
|
||||
if PRINT_USAGE:
|
||||
_print_func_call('read_webpage', {'url': url, 'reasoning': reasoning})
|
||||
|
||||
|
|
|
@ -8,10 +8,10 @@ newspaper3k
|
|||
playwright
|
||||
beautifulsoup4==4.12.3
|
||||
chromedriver-autoinstaller==0.6.4
|
||||
undetected-chromedriver==3.5.4
|
||||
undetected-chromedriver==3.5.5
|
||||
redis==5.0.1
|
||||
async-timeout==4.0.3
|
||||
pyyaml==6.0.1
|
||||
py-cpuinfo==9.0.0
|
||||
psutil==5.9.8
|
||||
chromadb==0.4.22
|
||||
chromadb==0.4.22
|
||||
|
|
Reference in New Issue