diff --git a/pers/langchain/tools/browser.py b/pers/langchain/tools/browser.py index 640ae65..b1ded7d 100644 --- a/pers/langchain/tools/browser.py +++ b/pers/langchain/tools/browser.py @@ -15,7 +15,7 @@ def get_chrome_webdriver(): chrome_options.add_argument("--test-type") chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('--disable-extensions') - chrome_options.add_argument('disable-infobars') + chrome_options.add_argument('--disable-infobars') chrome_options.add_argument("--incognito") driver = undetected_chromedriver.Chrome(headless=True, options=chrome_options) return driver @@ -32,7 +32,7 @@ def render_webpage(url: str): @tool('render_webpage') def render_webpage_tool(url: str, reasoning: str): - """Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool.""" + """Fetches the raw HTML of a webpage for use with the `retrieve_from_chroma` tool. Best for when you need to do complicated parsing of a webpage or are dealing with very long pages.""" if PRINT_USAGE: _print_func_call('render_webpage', {'url': url, 'reasoning': reasoning}) html_source = render_webpage(url) diff --git a/pers/langchain/tools/web_reader.py b/pers/langchain/tools/web_reader.py index 72e03f0..2ebdc87 100644 --- a/pers/langchain/tools/web_reader.py +++ b/pers/langchain/tools/web_reader.py @@ -30,7 +30,7 @@ PUBLISH DATE: {publish_date} TOP_IMAGE_URL: {top_image} """ -MAX_RESULT_LENGTH_CHAR = 1000 * 4 # roughly 1,000 tokens +MAX_RESULT_LENGTH_CHAR = 1000 * 6 def page_result(text: str, cursor: int, max_length: int) -> str: @@ -121,7 +121,9 @@ class ReaderToolInput(BaseModel): @tool(args_schema=ReaderToolInput) def read_webpage(url: str, reasoning: str, include_body: bool = True, cursor: int = 0): - """Fetch a webpage's text content. This function may not correctly parse complicated webpages, so use render_webpage if targeting specific HTML elements or expecting a complicated page.""" + """Fetch a webpage's text content. + This tool trunucates the text content if it is longer than the context limit. You will see a line like `PAGE WAS TRUNCATED. TO CONTINUE READING, USE CURSOR=n.` when this happens, where `CURSOR=n` is the starting position for the next page. To continue reading, call this tool with the `cursor` argument with where you want to begin. + This function may not correctly parse complicated webpages, so use render_webpage if targeting specific HTML elements or expecting a complicated page. Best for when you need to simply read the page.""" if PRINT_USAGE: _print_func_call('read_webpage', {'url': url, 'reasoning': reasoning}) diff --git a/requirements.txt b/requirements.txt index f0e5252..673c485 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,10 +8,10 @@ newspaper3k playwright beautifulsoup4==4.12.3 chromedriver-autoinstaller==0.6.4 -undetected-chromedriver==3.5.4 +undetected-chromedriver==3.5.5 redis==5.0.1 async-timeout==4.0.3 pyyaml==6.0.1 py-cpuinfo==9.0.0 psutil==5.9.8 -chromadb==0.4.22 \ No newline at end of file +chromadb==0.4.22