fix: add missing stop parameter for chat request (#1619)

This PR adds the missing `stop` parameter to the `ChatRequest` struct
which allows calls to specify a list of stop sequences
This commit is contained in:
drbh 2024-03-01 12:08:11 -05:00 committed by GitHub
parent 3dd7da2198
commit 7e08751378
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 1 deletions

View File

@ -611,6 +611,11 @@ pub(crate) struct ChatRequest {
#[schema(nullable = true, example = 0.1)] #[schema(nullable = true, example = 0.1)]
pub presence_penalty: Option<f32>, pub presence_penalty: Option<f32>,
/// Up to 4 sequences where the API will stop generating further tokens.
#[serde(default)]
#[schema(nullable = true, example = "null")]
pub stop: Option<Vec<String>>,
#[serde(default = "bool::default")] #[serde(default = "bool::default")]
pub stream: bool, pub stream: bool,

View File

@ -763,6 +763,7 @@ async fn chat_completions(
.map(|x| x + 2.0); .map(|x| x + 2.0);
let logprobs = req.logprobs.unwrap_or(false); let logprobs = req.logprobs.unwrap_or(false);
let seed = req.seed; let seed = req.seed;
let stop = req.stop.unwrap_or_default();
// apply chat template to flatten the request into a single input // apply chat template to flatten the request into a single input
let mut inputs = match infer.apply_chat_template(req.messages) { let mut inputs = match infer.apply_chat_template(req.messages) {
@ -850,7 +851,7 @@ async fn chat_completions(
do_sample: true, do_sample: true,
max_new_tokens, max_new_tokens,
return_full_text: None, return_full_text: None,
stop: Vec::new(), stop,
truncate: None, truncate: None,
watermark: false, watermark: false,
details: true, details: true,