diff --git a/router/src/lib.rs b/router/src/lib.rs
index a97b9b50..64f0fafa 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -611,6 +611,11 @@ pub(crate) struct ChatRequest {
     #[schema(nullable = true, example = 0.1)]
     pub presence_penalty: Option<f32>,
 
+    /// Up to 4 sequences where the API will stop generating further tokens.
+    #[serde(default)]
+    #[schema(nullable = true, example = "null")]
+    pub stop: Option<Vec<String>>,
+
     #[serde(default = "bool::default")]
     pub stream: bool,
 
diff --git a/router/src/server.rs b/router/src/server.rs
index 9c956a73..61aacd0b 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -763,6 +763,7 @@ async fn chat_completions(
         .map(|x| x + 2.0);
     let logprobs = req.logprobs.unwrap_or(false);
     let seed = req.seed;
+    let stop = req.stop.unwrap_or_default();
 
     // apply chat template to flatten the request into a single input
     let mut inputs = match infer.apply_chat_template(req.messages) {
@@ -850,7 +851,7 @@ async fn chat_completions(
             do_sample: true,
             max_new_tokens,
             return_full_text: None,
-            stop: Vec::new(),
+            stop,
             truncate: None,
             watermark: false,
             details: true,