fix: adjust logprob response logic (#1682)
This PR fixes a bug with `ChatCompletionLogprobs` where if `top_tokens.len() == 0` empty results were returned. ```bash curl http://localhost:3000/v1/chat/completions \ -X POST \ -H 'Content-Type: application/json' \ -d '{ "model": "tgi", "logprobs": true, "messages": [ { "role": "user", "content": "What is deep learning?" } ], "stream": false, "max_tokens": 20 }' ``` response ```json {"id":"","object":"text_completion","created":1711588522,"model":"google/gemma-2b-it","system_fingerprint":"1.4.4-native","choices":[{"index":0,"message":{"role":"assistant","content":"**Deep learning** is a subset of machine learning (ML) that emphasizes the creation of **artificial"},"logprobs":{"content":[{"token":"**","logprob":-0.22558594,"top_logprobs":[]},{"token":"Deep","logprob":-0.0014877319,"top_logprobs":[]},{"token":" learning","logprob":-0.12695312,"top_logprobs":[]},{"token":"**","logprob":-0.055664062,"top_logprobs":[]},{"token":" is","logprob":-0.00090026855,"top_logprobs":[]},{"token":" a","logprob":-0.006072998,"top_logprobs":[]},{"token":" subset","logprob":-2.25,"top_logprobs":[]},{"token":" of","logprob":-0.00031089783,"top_logprobs":[]},{"token":" machine","logprob":-0.091308594,"top_logprobs":[]},{"token":" learning","logprob":-0.00002348423,"top_logprobs":[]},{"token":" (","logprob":-1.671875,"top_logprobs":[]},{"token":"ML","logprob":-0.00040626526,"top_logprobs":[]},{"token":")","logprob":-0.00016212463,"top_logprobs":[]},{"token":" that","logprob":-0.13769531,"top_logprobs":[]},{"token":" emphasizes","logprob":-4.03125,"top_logprobs":[]},{"token":" the","logprob":-0.2890625,"top_logprobs":[]},{"token":" creation","logprob":-3.109375,"top_logprobs":[]},{"token":" of","logprob":-0.00024032593,"top_logprobs":[]},{"token":" **","logprob":-1.2265625,"top_logprobs":[]},{"token":"artificial","logprob":-0.10546875,"top_logprobs":[]}]},"finish_reason":"length"}],"usage":{"prompt_tokens":15,"completion_tokens":20,"total_tokens":35}} ```
This commit is contained in:
parent
6c4496a1a3
commit
818aee37e5
|
@ -385,23 +385,33 @@ impl From<(Token, Vec<Token>)> for ChatCompletionLogprobs {
|
||||||
impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
|
impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
|
||||||
fn from(value: (Vec<Token>, Vec<Vec<Token>>)) -> Self {
|
fn from(value: (Vec<Token>, Vec<Vec<Token>>)) -> Self {
|
||||||
let (tokens, top_tokens) = value;
|
let (tokens, top_tokens) = value;
|
||||||
Self {
|
|
||||||
content: tokens
|
// Create an iterator that produces None for top_tokens once it's exhausted
|
||||||
|
let top_tokens_iter = top_tokens
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.zip(top_tokens)
|
.map(Some)
|
||||||
.map(|(t, top_t)| ChatCompletionLogprob {
|
.chain(std::iter::repeat(None));
|
||||||
|
|
||||||
|
let content = tokens
|
||||||
|
.into_iter()
|
||||||
|
.zip(top_tokens_iter)
|
||||||
|
.map(|(t, top_t_option)| ChatCompletionLogprob {
|
||||||
token: t.text,
|
token: t.text,
|
||||||
logprob: t.logprob,
|
logprob: t.logprob,
|
||||||
top_logprobs: top_t
|
top_logprobs: match top_t_option {
|
||||||
|
Some(top_t) => top_t
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|t| ChatCompletionTopLogprob {
|
.map(|t| ChatCompletionTopLogprob {
|
||||||
token: t.text,
|
token: t.text,
|
||||||
logprob: t.logprob,
|
logprob: t.logprob,
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
|
None => vec![], // Handle the case where there are no top tokens
|
||||||
|
},
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect();
|
||||||
}
|
|
||||||
|
Self { content }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -857,7 +857,7 @@ async fn chat_completions(
|
||||||
details: true,
|
details: true,
|
||||||
decoder_input_details: !stream,
|
decoder_input_details: !stream,
|
||||||
seed,
|
seed,
|
||||||
top_n_tokens: None,
|
top_n_tokens: req.top_logprobs,
|
||||||
grammar: tool_grammar.clone(),
|
grammar: tool_grammar.clone(),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue