Update response type for `/v1/chat/completions` and `/v1/completions` (#1747)

`/v1/chat/completions` and `/v1/completions` have different output types depending on the `stream` parameter. This PR aims at fixing the inconsistency in the auto-generated [openapi.json](https://huggingface.github.io/text-generation-inference/openapi.json) specs. cc @OlivierDehaene @drbh I reused what had been done for the `/` endpoint but haven't tested anything myself. Could you confirm this is the correct way of handling things? Also, should I update the openapi.json file manually? If yes, how can I do it?
2024-04-16 19:26:32 +02:00 · 2024-04-16 19:26:32 +02:00 · 00f365353e
parent 7276d43495
commit 00f365353e
1 changed files with 11 additions and 3 deletions
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -548,7 +548,11 @@ async fn generate_stream_internal(
    path = "/v1/completions",
    request_body = CompletionRequest,
    responses(
-    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
+    (status = 200, description = "Generated Chat Completion",
+    content(
+    ("application/json" = Completion),
+    ("text/event-stream" = CompletionCompleteChunk),
+    )),
    (status = 424, description = "Generation Error", body = ErrorResponse,
    example = json ! ({"error": "Request failed during generation"})),
    (status = 429, description = "Model is overloaded", body = ErrorResponse,
@ -652,7 +656,7 @@ async fn completions(
                })
                .map_or_else(
                    |e| {
-                        println!("Failed to serialize ChatCompletionChunk: {:?}", e);
+                        println!("Failed to serialize CompletionCompleteChunk: {:?}", e);
                        Event::default()
                    },
                    |data| data,
@ -725,7 +729,11 @@ async fn completions(
    path = "/v1/chat/completions",
    request_body = ChatRequest,
    responses(
-    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
+    (status = 200, description = "Generated Chat Completion",
+    content(
+    ("application/json" = ChatCompletion),
+    ("text/event-stream" = ChatCompletionChunk),
+    )),
    (status = 424, description = "Generation Error", body = ErrorResponse,
    example = json ! ({"error": "Request failed during generation"})),
    (status = 429, description = "Model is overloaded", body = ErrorResponse,