fix: adjust logprob response logic (#1682)

This PR fixes a bug with `ChatCompletionLogprobs` where if
`top_tokens.len() == 0` empty results were returned.

```bash
 curl http://localhost:3000/v1/chat/completions \
    -X POST \
    -H 'Content-Type: application/json' \
    -d '{
  "model": "tgi",
  "logprobs": true,
  "messages": [
    {
      "role": "user",
      "content": "What is deep learning?"
    }
  ],
  "stream": false,
  "max_tokens": 20
}'
```

response


```json
{"id":"","object":"text_completion","created":1711588522,"model":"google/gemma-2b-it","system_fingerprint":"1.4.4-native","choices":[{"index":0,"message":{"role":"assistant","content":"**Deep learning** is a subset of machine learning (ML) that emphasizes the creation of **artificial"},"logprobs":{"content":[{"token":"**","logprob":-0.22558594,"top_logprobs":[]},{"token":"Deep","logprob":-0.0014877319,"top_logprobs":[]},{"token":" learning","logprob":-0.12695312,"top_logprobs":[]},{"token":"**","logprob":-0.055664062,"top_logprobs":[]},{"token":" is","logprob":-0.00090026855,"top_logprobs":[]},{"token":" a","logprob":-0.006072998,"top_logprobs":[]},{"token":" subset","logprob":-2.25,"top_logprobs":[]},{"token":" of","logprob":-0.00031089783,"top_logprobs":[]},{"token":" machine","logprob":-0.091308594,"top_logprobs":[]},{"token":" learning","logprob":-0.00002348423,"top_logprobs":[]},{"token":" (","logprob":-1.671875,"top_logprobs":[]},{"token":"ML","logprob":-0.00040626526,"top_logprobs":[]},{"token":")","logprob":-0.00016212463,"top_logprobs":[]},{"token":" that","logprob":-0.13769531,"top_logprobs":[]},{"token":" emphasizes","logprob":-4.03125,"top_logprobs":[]},{"token":" the","logprob":-0.2890625,"top_logprobs":[]},{"token":" creation","logprob":-3.109375,"top_logprobs":[]},{"token":" of","logprob":-0.00024032593,"top_logprobs":[]},{"token":" **","logprob":-1.2265625,"top_logprobs":[]},{"token":"artificial","logprob":-0.10546875,"top_logprobs":[]}]},"finish_reason":"length"}],"usage":{"prompt_tokens":15,"completion_tokens":20,"total_tokens":35}}
```
This commit is contained in:
drbh 2024-03-28 12:01:46 -04:00 committed by GitHub
parent 6c4496a1a3
commit 818aee37e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 12 deletions

View File

@ -385,23 +385,33 @@ impl From<(Token, Vec<Token>)> for ChatCompletionLogprobs {
impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs { impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
fn from(value: (Vec<Token>, Vec<Vec<Token>>)) -> Self { fn from(value: (Vec<Token>, Vec<Vec<Token>>)) -> Self {
let (tokens, top_tokens) = value; let (tokens, top_tokens) = value;
Self {
content: tokens // Create an iterator that produces None for top_tokens once it's exhausted
let top_tokens_iter = top_tokens
.into_iter() .into_iter()
.zip(top_tokens) .map(Some)
.map(|(t, top_t)| ChatCompletionLogprob { .chain(std::iter::repeat(None));
let content = tokens
.into_iter()
.zip(top_tokens_iter)
.map(|(t, top_t_option)| ChatCompletionLogprob {
token: t.text, token: t.text,
logprob: t.logprob, logprob: t.logprob,
top_logprobs: top_t top_logprobs: match top_t_option {
Some(top_t) => top_t
.into_iter() .into_iter()
.map(|t| ChatCompletionTopLogprob { .map(|t| ChatCompletionTopLogprob {
token: t.text, token: t.text,
logprob: t.logprob, logprob: t.logprob,
}) })
.collect(), .collect(),
None => vec![], // Handle the case where there are no top tokens
},
}) })
.collect(), .collect();
}
Self { content }
} }
} }

View File

@ -857,7 +857,7 @@ async fn chat_completions(
details: true, details: true,
decoder_input_details: !stream, decoder_input_details: !stream,
seed, seed,
top_n_tokens: None, top_n_tokens: req.top_logprobs,
grammar: tool_grammar.clone(), grammar: tool_grammar.clone(),
}, },
}; };