From d22b0c1fbef747f3c38f6424a7a6d4c90ed408c3 Mon Sep 17 00:00:00 2001 From: drbh Date: Wed, 2 Oct 2024 11:34:57 -0400 Subject: [PATCH] Unroll notify error into generate response (#2597) * feat: unroll notify_error if no tool is choosen * fix: expect simple message when no tool is selected * fix: improve test to avoid notify_error * fix: improve docs and indicate change in expected response * fix: adjust linting in test file --- docs/source/basic_tutorials/using_guidance.md | 8 ++-- ...rammar_tools_insufficient_information.json | 32 +++++----------- integration-tests/models/test_tools_llama.py | 9 +---- router/src/server.rs | 38 +++++++++++++------ 4 files changed, 43 insertions(+), 44 deletions(-) diff --git a/docs/source/basic_tutorials/using_guidance.md b/docs/source/basic_tutorials/using_guidance.md index dfa3f0e4..7e33e9a2 100644 --- a/docs/source/basic_tutorials/using_guidance.md +++ b/docs/source/basic_tutorials/using_guidance.md @@ -311,11 +311,13 @@ print(chat.choices[0].message.tool_calls) ``` -### OpenAI integration +### OpenAI Integration -TGI exposes an OpenAI-compatible API, which means you can use OpenAI's client libraries to interact with TGI's Messages API and Tool functions. +Text Generation Inference (TGI) offers seamless integration with OpenAI's client libraries, allowing developers to interact with TGI's Messages API and Tool functions in a familiar way. This compatibility simplifies the implementation of advanced features, such as tools and grammar, within your applications using OpenAI’s client. -However there are some minor differences in the API, for example `tool_choice="auto"` will ALWAYS choose the tool for you. This is different from OpenAI's API where `tool_choice="auto"` will choose a tool if the model thinks it's necessary. +Previously, TGI handled tool selection differently than OpenAI’s API—`tool_choice="auto"` would always pick a tool for you. However, as of the latest version, TGI now mimics OpenAI’s behavior more closely: `tool_choice="auto"` selects a tool only when the model deems it necessary, aligning with how OpenAI's API works. This enhancement ensures a smoother and more predictable integration experience. + +Additionally, error notifications like `notify_error`, which previously indicated that no tool was chosen, are no longer returned. Instead, TGI will proceed with generating a response as if no tool was selected, further improving consistency with OpenAI's API. ```python from openai import OpenAI diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json index 0cd3c67f..7f7f7884 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json @@ -1,38 +1,26 @@ { "choices": [ { - "finish_reason": "eos_token", + "finish_reason": "stop", "index": 0, "logprobs": null, "message": { - "content": null, + "content": "There is a huge storm in the ocean", "name": null, "role": "assistant", - "tool_calls": [ - { - "function": { - "arguments": { - "error": "Cannot get current weather forecast from specified location and temperature unit. Please try again with different options." - }, - "description": null, - "name": "notify_error" - }, - "id": 0, - "type": "function" - } - ] + "tool_calls": null }, "usage": null } ], - "created": 1712852597, + "created": 1727796440, "id": "", - "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "object": "text_completion", - "system_fingerprint": "1.4.5-native", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "object": "chat.completion", + "system_fingerprint": "2.3.1-dev0-native", "usage": { - "completion_tokens": 39, - "prompt_tokens": 496, - "total_tokens": 535 + "completion_tokens": 25, + "prompt_tokens": 600, + "total_tokens": 625 } } diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index c337afa1..3c222b72 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -225,10 +225,6 @@ async def test_flash_llama_grammar_tools_insufficient_information( tools=tools, tool_choice="auto", messages=[ - { - "role": "system", - "content": "STRICTLY ONLY RESPOND IF THE USER ASKS A WEATHER RELATED QUESTION", - }, { "role": "user", "content": "Tell me a story about 3 sea creatures", @@ -237,8 +233,5 @@ async def test_flash_llama_grammar_tools_insufficient_information( stream=False, ) - assert responses.choices[0].message.content is None - assert ( - responses.choices[0].message.tool_calls[0]["function"]["name"] == "notify_error" - ) + assert responses.choices[0].message.content == "There is a huge storm in the ocean" assert responses == response_snapshot diff --git a/router/src/server.rs b/router/src/server.rs index 73b54321..fb06b245 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1246,17 +1246,33 @@ async fn chat_completions( if let Value::Object(ref mut props) = arguments { props.remove("_name"); } - - let tool_calls = vec![ToolCall { - id: "0".to_string(), - r#type: "function".to_string(), - function: FunctionDefinition { - description: None, - name, - arguments, - }, - }]; - (Some(tool_calls), None) + match name.as_str() { + "notify_error" => { + // parse the error message + let error_message = arguments + .get("error") + .and_then(Value::as_str) + .ok_or_else(|| { + InferError::ToolError( + "No error message found in generated text".to_string(), + ) + })? + .to_string(); + (None, Some(error_message)) + } + _ => { + let tool_calls = vec![ToolCall { + id: "0".to_string(), + r#type: "function".to_string(), + function: FunctionDefinition { + description: None, + name, + arguments, + }, + }]; + (Some(tool_calls), None) + } + } } else { (None, Some(generation.generated_text)) };