Add attribute descriptions for `GenerateParameters` (#1798)

Once https://github.com/huggingface/huggingface.js/pull/629 gets merged, we will rely on TGI's specs to generate jsonschema types for `text_generation` and `chat_completion`. This PR adds some documentation for `GenerationParameters`'s properties so that they get documented in the downstream tools (TGI docs but also `huggingface.js`/`huggingface_hub` inference clients). I mostly took inspiration from [the python client](https://github.com/huggingface/text-generation-inference/blob/main/clients/python/text_generation/types.py) for the descriptions.
2024-04-23 16:22:12 +02:00 · 2024-04-23 16:22:12 +02:00 · 455cada527
parent ed72e92126
commit 455cada527
1 changed files with 39 additions and 0 deletions
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -168,9 +168,12 @@ pub struct Info {

 #[derive(Clone, Debug, Deserialize, ToSchema, Default)]
 pub(crate) struct GenerateParameters {
+    /// Generate best_of sequences and return the one if the highest token logprobs.
    #[serde(default)]
    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]
    pub best_of: Option<usize>,
+
+    /// The value used to module the logits distribution.
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
@ -179,6 +182,9 @@ pub(crate) struct GenerateParameters {
        example = 0.5
    )]
    pub temperature: Option<f32>,
+
+    /// The parameter for repetition penalty. 1.0 means no penalty.
+    /// See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
@ -187,6 +193,10 @@ pub(crate) struct GenerateParameters {
        example = 1.03
    )]
    pub repetition_penalty: Option<f32>,
+
+    /// The parameter for frequency penalty. 1.0 means no penalty
+    /// Penalize new tokens based on their existing frequency in the text so far,
+    /// decreasing the model's likelihood to repeat the same line verbatim.
    #[serde(default)]
    #[schema(
        exclusive_minimum = -2.0,
@ -195,9 +205,13 @@ pub(crate) struct GenerateParameters {
        example = 0.1
    )]
    pub frequency_penalty: Option<f32>,
+
+    /// The number of highest probability vocabulary tokens to keep for top-k-filtering.
    #[serde(default)]
    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
    pub top_k: Option<i32>,
+
+    /// Top-p value for nucleus sampling.
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
@ -207,6 +221,9 @@ pub(crate) struct GenerateParameters {
        example = 0.95
    )]
    pub top_p: Option<f32>,
+
+    /// Typical Decoding mass
+    /// See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
@ -216,30 +233,48 @@ pub(crate) struct GenerateParameters {
        example = 0.95
    )]
    pub typical_p: Option<f32>,
+
+    /// Activate logits sampling.
    #[serde(default)]
    #[schema(default = "false", example = true)]
    pub do_sample: bool,
+
+    /// Maximum number of tokens to generate.
    #[serde(default = "default_max_new_tokens")]
    #[schema(nullable = true, default = "100", example = "20")]
    pub max_new_tokens: Option<u32>,
+
+    /// Whether to prepend the prompt to the generated text
    #[serde(default)]
    #[schema(nullable = true, default = "null", example = false)]
    pub return_full_text: Option<bool>,
+
+    /// Stop generating tokens if a member of `stop` is generated.
    #[serde(default)]
    #[schema(inline, max_items = 4, example = json ! (["photographer"]))]
    pub stop: Vec<String>,
+
+    /// Truncate inputs tokens to the given size.
    #[serde(default)]
    #[schema(nullable = true, default = "null", example = "null")]
    pub truncate: Option<usize>,
+
+    /// Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).
    #[serde(default)]
    #[schema(default = "false", example = true)]
    pub watermark: bool,
+
+    /// Whether to return generation details.
    #[serde(default)]
    #[schema(default = "true")]
    pub details: bool,
+
+    /// Whether to return decoder input token logprobs and ids.
    #[serde(default)]
    #[schema(default = "false")]
    pub decoder_input_details: bool,
+
+    /// Random sampling seed.
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0,
@ -248,9 +283,13 @@ pub(crate) struct GenerateParameters {
        example = "null"
    )]
    pub seed: Option<u64>,
+
+    /// The number of highest probability vocabulary tokens to keep for top-n-filtering.
    #[serde(default)]
    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
    pub top_n_tokens: Option<u32>,
+
+    /// Grammar constraints for the generation.
    #[serde(default)]
    #[schema(nullable = true, default = "null", example = "null")]
    pub grammar: Option<GrammarType>,