update docs

2024-07-15 13:55:43 +00:00 · 2024-07-15 13:55:43 +00:00 · 10b940559a
parent ea915ad7d7
commit 10b940559a
5 changed files with 19 additions and 0 deletions
--- a/benchmark/src/table.rs
+++ b/benchmark/src/table.rs
@ -16,6 +16,7 @@ pub(crate) fn parameters_table(
    typical_p: Option<f32>,
    repetition_penalty: Option<f32>,
    frequency_penalty: Option<f32>,
    no_repeat_ngram_size: Option<u32>,
    watermark: bool,
    do_sample: bool,
 ) -> Table {
@ -35,6 +36,7 @@ pub(crate) fn parameters_table(
    builder.push_record(["Typical P", &format!("{typical_p:?}")]);
    builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]);
    builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]);
    builder.push_record(["No Repeat Ngram Size", &format!("{no_repeat_ngram_size:?}")]);
    builder.push_record(["Watermark", &watermark.to_string()]);
    builder.push_record(["Do Sample", &do_sample.to_string()]);
--- a/clients/python/README.md
+++ b/clients/python/README.md
@ -135,6 +135,10 @@ class Parameters:
    # Penalize new tokens based on their existing frequency in the text so far,
    # decreasing the model's likelihood to repeat the same line verbatim.
    frequency_penalty: Optional[float]
    # n-grams are groups of "n" consecutive words, characters, or tokens taken from a sequence of text. Given the
    # sentence: "She runs fast", the bi-grams (n=2) would be ("she", "runs") and ("runs", "fast"). Set this to avoid
    # generating the same n-grams in the completion.
    no_repeat_ngram_size: Optional[int]
    # Whether to prepend the prompt to the generated text
    return_full_text: bool
    # Stop generating tokens if a member of `stop_sequences` is generated
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -1379,6 +1379,15 @@
            "nullable": true,
            "exclusiveMinimum": -2
          },
          "no_repeat_ngram_size": {
            "type": "integer",
            "format": "int32",
            "description": "If set to int > 0, all ngrams of that size can only occur once.",
            "default": "null",
            "example": 12,
            "nullable": true,
            "minimum": 0
          },
          "grammar": {
            "allOf": [
              {
--- a/proto/v3/generate.proto
+++ b/proto/v3/generate.proto
@ -95,6 +95,8 @@ message NextTokenChooserParameters {
    float repetition_penalty = 7;
    /// frequency penalty
    float frequency_penalty = 9;
    /// no_repeat_ngram_size
    uint32 no_repeat_ngram_size = 12;
    /// token watermarking using "A Watermark for Large Language Models"
    bool watermark = 8;
    /// grammar (applied if not empty)
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@ -687,6 +687,8 @@ pub struct ValidParameters {
    pub repetition_penalty: f32,
    /// / frequency penalty
    pub frequency_penalty: f32,
    /// / no_repeat_ngram_size
    pub no_repeat_ngram_size: u32,
    /// / token watermarking using "A Watermark for Large Language Models"
    pub watermark: bool,
    /// / grammar (applied if not empty)