diff --git a/benchmark/src/table.rs b/benchmark/src/table.rs
index 1585a25f..3a70f68d 100644
--- a/benchmark/src/table.rs
+++ b/benchmark/src/table.rs
@@ -16,6 +16,7 @@ pub(crate) fn parameters_table(
     typical_p: Option<f32>,
     repetition_penalty: Option<f32>,
     frequency_penalty: Option<f32>,
+    no_repeat_ngram_size: Option<u32>,
     watermark: bool,
     do_sample: bool,
 ) -> Table {
@@ -35,6 +36,7 @@ pub(crate) fn parameters_table(
     builder.push_record(["Typical P", &format!("{typical_p:?}")]);
     builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]);
     builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]);
+    builder.push_record(["No Repeat Ngram Size", &format!("{no_repeat_ngram_size:?}")]);
     builder.push_record(["Watermark", &watermark.to_string()]);
     builder.push_record(["Do Sample", &do_sample.to_string()]);
 
diff --git a/clients/python/README.md b/clients/python/README.md
index 88239aa1..f611a25c 100644
--- a/clients/python/README.md
+++ b/clients/python/README.md
@@ -135,6 +135,10 @@ class Parameters:
     # Penalize new tokens based on their existing frequency in the text so far,
     # decreasing the model's likelihood to repeat the same line verbatim.
     frequency_penalty: Optional[float]
+    # n-grams are groups of "n" consecutive words, characters, or tokens taken from a sequence of text. Given the
+    # sentence: "She runs fast", the bi-grams (n=2) would be ("she", "runs") and ("runs", "fast"). Set this to avoid
+    # generating the same n-grams in the completion.
+    no_repeat_ngram_size: Optional[int]
     # Whether to prepend the prompt to the generated text
     return_full_text: bool
     # Stop generating tokens if a member of `stop_sequences` is generated
diff --git a/docs/openapi.json b/docs/openapi.json
index ed9b0b96..ec4f214c 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1379,6 +1379,15 @@
             "nullable": true,
             "exclusiveMinimum": -2
           },
+          "no_repeat_ngram_size": {
+            "type": "integer",
+            "format": "int32",
+            "description": "If set to int > 0, all ngrams of that size can only occur once.",
+            "default": "null",
+            "example": 12,
+            "nullable": true,
+            "minimum": 0
+          },
           "grammar": {
             "allOf": [
               {
diff --git a/proto/v3/generate.proto b/proto/v3/generate.proto
index 926c878e..9979ea33 100644
--- a/proto/v3/generate.proto
+++ b/proto/v3/generate.proto
@@ -95,6 +95,8 @@ message NextTokenChooserParameters {
     float repetition_penalty = 7;
     /// frequency penalty
     float frequency_penalty = 9;
+    /// no_repeat_ngram_size
+    uint32 no_repeat_ngram_size = 12;
     /// token watermarking using "A Watermark for Large Language Models"
     bool watermark = 8;
     /// grammar (applied if not empty)
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 3d1a4103..c8be8131 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -687,6 +687,8 @@ pub struct ValidParameters {
     pub repetition_penalty: f32,
     /// / frequency penalty
     pub frequency_penalty: f32,
+    /// / no_repeat_ngram_size
+    pub no_repeat_ngram_size: u32,
     /// / token watermarking using "A Watermark for Large Language Models"
     pub watermark: bool,
     /// / grammar (applied if not empty)