From 455cada527152a7bd2e17385b705873fdb015e51 Mon Sep 17 00:00:00 2001
From: Lucain <lucain@huggingface.co>
Date: Tue, 23 Apr 2024 16:22:12 +0200
Subject: [PATCH] Add attribute descriptions for `GenerateParameters` (#1798)

Once https://github.com/huggingface/huggingface.js/pull/629 gets merged,
we will rely on TGI's specs to generate jsonschema types for
`text_generation` and `chat_completion`.

This PR adds some documentation for `GenerationParameters`'s properties
so that they get documented in the downstream tools (TGI docs but also
`huggingface.js`/`huggingface_hub` inference clients). I mostly took
inspiration from [the python
client](https://github.com/huggingface/text-generation-inference/blob/main/clients/python/text_generation/types.py)
for the descriptions.
---
 router/src/lib.rs | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 53982c36..9e847fe2 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -168,9 +168,12 @@ pub struct Info {
 
 #[derive(Clone, Debug, Deserialize, ToSchema, Default)]
 pub(crate) struct GenerateParameters {
+    /// Generate best_of sequences and return the one if the highest token logprobs.
     #[serde(default)]
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]
     pub best_of: Option<usize>,
+
+    /// The value used to module the logits distribution.
     #[serde(default)]
     #[schema(
         exclusive_minimum = 0.0,
@@ -179,6 +182,9 @@ pub(crate) struct GenerateParameters {
         example = 0.5
     )]
     pub temperature: Option<f32>,
+
+    /// The parameter for repetition penalty. 1.0 means no penalty.
+    /// See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
     #[serde(default)]
     #[schema(
         exclusive_minimum = 0.0,
@@ -187,6 +193,10 @@ pub(crate) struct GenerateParameters {
         example = 1.03
     )]
     pub repetition_penalty: Option<f32>,
+
+    /// The parameter for frequency penalty. 1.0 means no penalty
+    /// Penalize new tokens based on their existing frequency in the text so far,
+    /// decreasing the model's likelihood to repeat the same line verbatim.
     #[serde(default)]
     #[schema(
         exclusive_minimum = -2.0,
@@ -195,9 +205,13 @@ pub(crate) struct GenerateParameters {
         example = 0.1
     )]
     pub frequency_penalty: Option<f32>,
+
+    /// The number of highest probability vocabulary tokens to keep for top-k-filtering.
     #[serde(default)]
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
     pub top_k: Option<i32>,
+
+    /// Top-p value for nucleus sampling.
     #[serde(default)]
     #[schema(
         exclusive_minimum = 0.0,
@@ -207,6 +221,9 @@ pub(crate) struct GenerateParameters {
         example = 0.95
     )]
     pub top_p: Option<f32>,
+
+    /// Typical Decoding mass
+    /// See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.
     #[serde(default)]
     #[schema(
         exclusive_minimum = 0.0,
@@ -216,30 +233,48 @@ pub(crate) struct GenerateParameters {
         example = 0.95
     )]
     pub typical_p: Option<f32>,
+
+    /// Activate logits sampling.
     #[serde(default)]
     #[schema(default = "false", example = true)]
     pub do_sample: bool,
+
+    /// Maximum number of tokens to generate.
     #[serde(default = "default_max_new_tokens")]
     #[schema(nullable = true, default = "100", example = "20")]
     pub max_new_tokens: Option<u32>,
+
+    /// Whether to prepend the prompt to the generated text
     #[serde(default)]
     #[schema(nullable = true, default = "null", example = false)]
     pub return_full_text: Option<bool>,
+
+    /// Stop generating tokens if a member of `stop` is generated.
     #[serde(default)]
     #[schema(inline, max_items = 4, example = json ! (["photographer"]))]
     pub stop: Vec<String>,
+
+    /// Truncate inputs tokens to the given size.
     #[serde(default)]
     #[schema(nullable = true, default = "null", example = "null")]
     pub truncate: Option<usize>,
+
+    /// Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).
     #[serde(default)]
     #[schema(default = "false", example = true)]
     pub watermark: bool,
+
+    /// Whether to return generation details.
     #[serde(default)]
     #[schema(default = "true")]
     pub details: bool,
+
+    /// Whether to return decoder input token logprobs and ids.
     #[serde(default)]
     #[schema(default = "false")]
     pub decoder_input_details: bool,
+
+    /// Random sampling seed.
     #[serde(default)]
     #[schema(
         exclusive_minimum = 0,
@@ -248,9 +283,13 @@ pub(crate) struct GenerateParameters {
         example = "null"
     )]
     pub seed: Option<u64>,
+
+    /// The number of highest probability vocabulary tokens to keep for top-n-filtering.
     #[serde(default)]
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
     pub top_n_tokens: Option<u32>,
+
+    /// Grammar constraints for the generation.
     #[serde(default)]
     #[schema(nullable = true, default = "null", example = "null")]
     pub grammar: Option<GrammarType>,