From 818aee37e5d52a1b9af707e1eff70bae1f0c4ef3 Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 28 Mar 2024 12:01:46 -0400 Subject: [PATCH] fix: adjust logprob response logic (#1682) This PR fixes a bug with `ChatCompletionLogprobs` where if `top_tokens.len() == 0` empty results were returned. ```bash curl http://localhost:3000/v1/chat/completions \ -X POST \ -H 'Content-Type: application/json' \ -d '{ "model": "tgi", "logprobs": true, "messages": [ { "role": "user", "content": "What is deep learning?" } ], "stream": false, "max_tokens": 20 }' ``` response ```json {"id":"","object":"text_completion","created":1711588522,"model":"google/gemma-2b-it","system_fingerprint":"1.4.4-native","choices":[{"index":0,"message":{"role":"assistant","content":"**Deep learning** is a subset of machine learning (ML) that emphasizes the creation of **artificial"},"logprobs":{"content":[{"token":"**","logprob":-0.22558594,"top_logprobs":[]},{"token":"Deep","logprob":-0.0014877319,"top_logprobs":[]},{"token":" learning","logprob":-0.12695312,"top_logprobs":[]},{"token":"**","logprob":-0.055664062,"top_logprobs":[]},{"token":" is","logprob":-0.00090026855,"top_logprobs":[]},{"token":" a","logprob":-0.006072998,"top_logprobs":[]},{"token":" subset","logprob":-2.25,"top_logprobs":[]},{"token":" of","logprob":-0.00031089783,"top_logprobs":[]},{"token":" machine","logprob":-0.091308594,"top_logprobs":[]},{"token":" learning","logprob":-0.00002348423,"top_logprobs":[]},{"token":" (","logprob":-1.671875,"top_logprobs":[]},{"token":"ML","logprob":-0.00040626526,"top_logprobs":[]},{"token":")","logprob":-0.00016212463,"top_logprobs":[]},{"token":" that","logprob":-0.13769531,"top_logprobs":[]},{"token":" emphasizes","logprob":-4.03125,"top_logprobs":[]},{"token":" the","logprob":-0.2890625,"top_logprobs":[]},{"token":" creation","logprob":-3.109375,"top_logprobs":[]},{"token":" of","logprob":-0.00024032593,"top_logprobs":[]},{"token":" **","logprob":-1.2265625,"top_logprobs":[]},{"token":"artificial","logprob":-0.10546875,"top_logprobs":[]}]},"finish_reason":"length"}],"usage":{"prompt_tokens":15,"completion_tokens":20,"total_tokens":35}} ``` --- router/src/lib.rs | 32 +++++++++++++++++++++----------- router/src/server.rs | 2 +- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/router/src/lib.rs b/router/src/lib.rs index b231a31f..5415a956 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -385,23 +385,33 @@ impl From<(Token, Vec)> for ChatCompletionLogprobs { impl From<(Vec, Vec>)> for ChatCompletionLogprobs { fn from(value: (Vec, Vec>)) -> Self { let (tokens, top_tokens) = value; - Self { - content: tokens - .into_iter() - .zip(top_tokens) - .map(|(t, top_t)| ChatCompletionLogprob { - token: t.text, - logprob: t.logprob, - top_logprobs: top_t + + // Create an iterator that produces None for top_tokens once it's exhausted + let top_tokens_iter = top_tokens + .into_iter() + .map(Some) + .chain(std::iter::repeat(None)); + + let content = tokens + .into_iter() + .zip(top_tokens_iter) + .map(|(t, top_t_option)| ChatCompletionLogprob { + token: t.text, + logprob: t.logprob, + top_logprobs: match top_t_option { + Some(top_t) => top_t .into_iter() .map(|t| ChatCompletionTopLogprob { token: t.text, logprob: t.logprob, }) .collect(), - }) - .collect(), - } + None => vec![], // Handle the case where there are no top tokens + }, + }) + .collect(); + + Self { content } } } diff --git a/router/src/server.rs b/router/src/server.rs index 652bc699..b328a8e8 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -857,7 +857,7 @@ async fn chat_completions( details: true, decoder_input_details: !stream, seed, - top_n_tokens: None, + top_n_tokens: req.top_logprobs, grammar: tool_grammar.clone(), }, };