Revert "Unroll notify error into generate response" (#2605)

Revert "Unroll notify error into generate response (#2597)" This reverts commit d22b0c1fbe.
2024-10-03 17:56:40 -04:00 · 2024-10-03 17:56:40 -04:00 · 3011639ff7
parent f6e2f05b16
commit 3011639ff7
4 changed files with 44 additions and 43 deletions
--- a/docs/source/basic_tutorials/using_guidance.md
+++ b/docs/source/basic_tutorials/using_guidance.md
@ -311,13 +311,11 @@ print(chat.choices[0].message.tool_calls)

 ```

-### OpenAI Integration
+### OpenAI integration

-Text Generation Inference (TGI) offers seamless integration with OpenAI's client libraries, allowing developers to interact with TGI's Messages API and Tool functions in a familiar way. This compatibility simplifies the implementation of advanced features, such as tools and grammar, within your applications using OpenAI’s client.
+TGI exposes an OpenAI-compatible API, which means you can use OpenAI's client libraries to interact with TGI's Messages API and Tool functions.

-Previously, TGI handled tool selection differently than OpenAI’s API—`tool_choice="auto"` would always pick a tool for you. However, as of the latest version, TGI now mimics OpenAI’s behavior more closely: `tool_choice="auto"` selects a tool only when the model deems it necessary, aligning with how OpenAI's API works. This enhancement ensures a smoother and more predictable integration experience.
-
-Additionally, error notifications like `notify_error`, which previously indicated that no tool was chosen, are no longer returned. Instead, TGI will proceed with generating a response as if no tool was selected, further improving consistency with OpenAI's API.
+However there are some minor differences in the API, for example `tool_choice="auto"` will ALWAYS choose the tool for you. This is different from OpenAI's API where `tool_choice="auto"` will choose a tool if the model thinks it's necessary.

 ```python
 from openai import OpenAI
--- a/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json
+++ b/integration-tests/models/snapshots/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json
@ -1,26 +1,38 @@
 {
  "choices": [
    {
-      "finish_reason": "stop",
+      "finish_reason": "eos_token",
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "There is a huge storm in the ocean",
+        "content": null,
        "name": null,
        "role": "assistant",
-        "tool_calls": null
+        "tool_calls": [
+          {
+            "function": {
+              "arguments": {
+                "error": "Cannot get current weather forecast from specified location and temperature unit. Please try again with different options."
+              },
+              "description": null,
+              "name": "notify_error"
+            },
+            "id": 0,
+            "type": "function"
+          }
+        ]
      },
      "usage": null
    }
  ],
-  "created": 1727796440,
+  "created": 1712852597,
  "id": "",
-  "model": "meta-llama/Llama-3.1-8B-Instruct",
-  "object": "chat.completion",
-  "system_fingerprint": "2.3.1-dev0-native",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "text_completion",
+  "system_fingerprint": "1.4.5-native",
  "usage": {
-    "completion_tokens": 25,
-    "prompt_tokens": 600,
-    "total_tokens": 625
+    "completion_tokens": 39,
+    "prompt_tokens": 496,
+    "total_tokens": 535
  }
 }
--- a/integration-tests/models/test_tools_llama.py
+++ b/integration-tests/models/test_tools_llama.py
@ -225,6 +225,10 @@ async def test_flash_llama_grammar_tools_insufficient_information(
        tools=tools,
        tool_choice="auto",
        messages=[
+            {
+                "role": "system",
+                "content": "STRICTLY ONLY RESPOND IF THE USER ASKS A WEATHER RELATED QUESTION",
+            },
            {
                "role": "user",
                "content": "Tell me a story about 3 sea creatures",
@ -233,5 +237,8 @@ async def test_flash_llama_grammar_tools_insufficient_information(
        stream=False,
    )

-    assert responses.choices[0].message.content == "There is a huge storm in the ocean"
+    assert responses.choices[0].message.content is None
+    assert (
+        responses.choices[0].message.tool_calls[0]["function"]["name"] == "notify_error"
+    )
    assert responses == response_snapshot
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -1246,21 +1246,7 @@ async fn chat_completions(
            if let Value::Object(ref mut props) = arguments {
                props.remove("_name");
            }
-            match name.as_str() {
-                "notify_error" => {
-                    // parse the error message
-                    let error_message = arguments
-                        .get("error")
-                        .and_then(Value::as_str)
-                        .ok_or_else(|| {
-                            InferError::ToolError(
-                                "No error message found in generated text".to_string(),
-                            )
-                        })?
-                        .to_string();
-                    (None, Some(error_message))
-                }
-                _ => {
+
            let tool_calls = vec![ToolCall {
                id: "0".to_string(),
                r#type: "function".to_string(),
@ -1271,8 +1257,6 @@ async fn chat_completions(
                },
            }];
            (Some(tool_calls), None)
-                }
-            }
        } else {
            (None, Some(generation.generated_text))
        };