Revert "Unroll notify error into generate response" (#2605)

Revert "Unroll notify error into generate response (#2597)"

This reverts commit d22b0c1fbe.
This commit is contained in:
drbh 2024-10-03 17:56:40 -04:00 committed by GitHub
parent f6e2f05b16
commit 3011639ff7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 44 additions and 43 deletions

View File

@ -311,13 +311,11 @@ print(chat.choices[0].message.tool_calls)
``` ```
### OpenAI Integration ### OpenAI integration
Text Generation Inference (TGI) offers seamless integration with OpenAI's client libraries, allowing developers to interact with TGI's Messages API and Tool functions in a familiar way. This compatibility simplifies the implementation of advanced features, such as tools and grammar, within your applications using OpenAIs client. TGI exposes an OpenAI-compatible API, which means you can use OpenAI's client libraries to interact with TGI's Messages API and Tool functions.
Previously, TGI handled tool selection differently than OpenAIs API—`tool_choice="auto"` would always pick a tool for you. However, as of the latest version, TGI now mimics OpenAIs behavior more closely: `tool_choice="auto"` selects a tool only when the model deems it necessary, aligning with how OpenAI's API works. This enhancement ensures a smoother and more predictable integration experience. However there are some minor differences in the API, for example `tool_choice="auto"` will ALWAYS choose the tool for you. This is different from OpenAI's API where `tool_choice="auto"` will choose a tool if the model thinks it's necessary.
Additionally, error notifications like `notify_error`, which previously indicated that no tool was chosen, are no longer returned. Instead, TGI will proceed with generating a response as if no tool was selected, further improving consistency with OpenAI's API.
```python ```python
from openai import OpenAI from openai import OpenAI

View File

@ -1,26 +1,38 @@
{ {
"choices": [ "choices": [
{ {
"finish_reason": "stop", "finish_reason": "eos_token",
"index": 0, "index": 0,
"logprobs": null, "logprobs": null,
"message": { "message": {
"content": "There is a huge storm in the ocean", "content": null,
"name": null, "name": null,
"role": "assistant", "role": "assistant",
"tool_calls": null "tool_calls": [
{
"function": {
"arguments": {
"error": "Cannot get current weather forecast from specified location and temperature unit. Please try again with different options."
},
"description": null,
"name": "notify_error"
},
"id": 0,
"type": "function"
}
]
}, },
"usage": null "usage": null
} }
], ],
"created": 1727796440, "created": 1712852597,
"id": "", "id": "",
"model": "meta-llama/Llama-3.1-8B-Instruct", "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "chat.completion", "object": "text_completion",
"system_fingerprint": "2.3.1-dev0-native", "system_fingerprint": "1.4.5-native",
"usage": { "usage": {
"completion_tokens": 25, "completion_tokens": 39,
"prompt_tokens": 600, "prompt_tokens": 496,
"total_tokens": 625 "total_tokens": 535
} }
} }

View File

@ -225,6 +225,10 @@ async def test_flash_llama_grammar_tools_insufficient_information(
tools=tools, tools=tools,
tool_choice="auto", tool_choice="auto",
messages=[ messages=[
{
"role": "system",
"content": "STRICTLY ONLY RESPOND IF THE USER ASKS A WEATHER RELATED QUESTION",
},
{ {
"role": "user", "role": "user",
"content": "Tell me a story about 3 sea creatures", "content": "Tell me a story about 3 sea creatures",
@ -233,5 +237,8 @@ async def test_flash_llama_grammar_tools_insufficient_information(
stream=False, stream=False,
) )
assert responses.choices[0].message.content == "There is a huge storm in the ocean" assert responses.choices[0].message.content is None
assert (
responses.choices[0].message.tool_calls[0]["function"]["name"] == "notify_error"
)
assert responses == response_snapshot assert responses == response_snapshot

View File

@ -1246,21 +1246,7 @@ async fn chat_completions(
if let Value::Object(ref mut props) = arguments { if let Value::Object(ref mut props) = arguments {
props.remove("_name"); props.remove("_name");
} }
match name.as_str() {
"notify_error" => {
// parse the error message
let error_message = arguments
.get("error")
.and_then(Value::as_str)
.ok_or_else(|| {
InferError::ToolError(
"No error message found in generated text".to_string(),
)
})?
.to_string();
(None, Some(error_message))
}
_ => {
let tool_calls = vec![ToolCall { let tool_calls = vec![ToolCall {
id: "0".to_string(), id: "0".to_string(),
r#type: "function".to_string(), r#type: "function".to_string(),
@ -1271,8 +1257,6 @@ async fn chat_completions(
}, },
}]; }];
(Some(tool_calls), None) (Some(tool_calls), None)
}
}
} else { } else {
(None, Some(generation.generated_text)) (None, Some(generation.generated_text))
}; };