diff --git a/benchmark/src/table.rs b/benchmark/src/table.rs index 1585a25f..3a70f68d 100644 --- a/benchmark/src/table.rs +++ b/benchmark/src/table.rs @@ -16,6 +16,7 @@ pub(crate) fn parameters_table( typical_p: Option, repetition_penalty: Option, frequency_penalty: Option, + no_repeat_ngram_size: Option, watermark: bool, do_sample: bool, ) -> Table { @@ -35,6 +36,7 @@ pub(crate) fn parameters_table( builder.push_record(["Typical P", &format!("{typical_p:?}")]); builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]); builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]); + builder.push_record(["No Repeat Ngram Size", &format!("{no_repeat_ngram_size:?}")]); builder.push_record(["Watermark", &watermark.to_string()]); builder.push_record(["Do Sample", &do_sample.to_string()]); diff --git a/clients/python/README.md b/clients/python/README.md index 88239aa1..f611a25c 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -135,6 +135,10 @@ class Parameters: # Penalize new tokens based on their existing frequency in the text so far, # decreasing the model's likelihood to repeat the same line verbatim. frequency_penalty: Optional[float] + # n-grams are groups of "n" consecutive words, characters, or tokens taken from a sequence of text. Given the + # sentence: "She runs fast", the bi-grams (n=2) would be ("she", "runs") and ("runs", "fast"). Set this to avoid + # generating the same n-grams in the completion. + no_repeat_ngram_size: Optional[int] # Whether to prepend the prompt to the generated text return_full_text: bool # Stop generating tokens if a member of `stop_sequences` is generated diff --git a/docs/openapi.json b/docs/openapi.json index ed9b0b96..ec4f214c 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1379,6 +1379,15 @@ "nullable": true, "exclusiveMinimum": -2 }, + "no_repeat_ngram_size": { + "type": "integer", + "format": "int32", + "description": "If set to int > 0, all ngrams of that size can only occur once.", + "default": "null", + "example": 12, + "nullable": true, + "minimum": 0 + }, "grammar": { "allOf": [ { diff --git a/proto/v3/generate.proto b/proto/v3/generate.proto index 926c878e..9979ea33 100644 --- a/proto/v3/generate.proto +++ b/proto/v3/generate.proto @@ -95,6 +95,8 @@ message NextTokenChooserParameters { float repetition_penalty = 7; /// frequency penalty float frequency_penalty = 9; + /// no_repeat_ngram_size + uint32 no_repeat_ngram_size = 12; /// token watermarking using "A Watermark for Large Language Models" bool watermark = 8; /// grammar (applied if not empty) diff --git a/router/src/validation.rs b/router/src/validation.rs index 3d1a4103..c8be8131 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -687,6 +687,8 @@ pub struct ValidParameters { pub repetition_penalty: f32, /// / frequency penalty pub frequency_penalty: f32, + /// / no_repeat_ngram_size + pub no_repeat_ngram_size: u32, /// / token watermarking using "A Watermark for Large Language Models" pub watermark: bool, /// / grammar (applied if not empty)