Add attribute descriptions for `GenerateParameters` (#1798)

Once https://github.com/huggingface/huggingface.js/pull/629 gets merged,
we will rely on TGI's specs to generate jsonschema types for
`text_generation` and `chat_completion`.

This PR adds some documentation for `GenerationParameters`'s properties
so that they get documented in the downstream tools (TGI docs but also
`huggingface.js`/`huggingface_hub` inference clients). I mostly took
inspiration from [the python
client](https://github.com/huggingface/text-generation-inference/blob/main/clients/python/text_generation/types.py)
for the descriptions.
This commit is contained in:
Lucain 2024-04-23 16:22:12 +02:00 committed by GitHub
parent ed72e92126
commit 455cada527
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 39 additions and 0 deletions

View File

@ -168,9 +168,12 @@ pub struct Info {
#[derive(Clone, Debug, Deserialize, ToSchema, Default)]
pub(crate) struct GenerateParameters {
/// Generate best_of sequences and return the one if the highest token logprobs.
#[serde(default)]
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]
pub best_of: Option<usize>,
/// The value used to module the logits distribution.
#[serde(default)]
#[schema(
exclusive_minimum = 0.0,
@ -179,6 +182,9 @@ pub(crate) struct GenerateParameters {
example = 0.5
)]
pub temperature: Option<f32>,
/// The parameter for repetition penalty. 1.0 means no penalty.
/// See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
#[serde(default)]
#[schema(
exclusive_minimum = 0.0,
@ -187,6 +193,10 @@ pub(crate) struct GenerateParameters {
example = 1.03
)]
pub repetition_penalty: Option<f32>,
/// The parameter for frequency penalty. 1.0 means no penalty
/// Penalize new tokens based on their existing frequency in the text so far,
/// decreasing the model's likelihood to repeat the same line verbatim.
#[serde(default)]
#[schema(
exclusive_minimum = -2.0,
@ -195,9 +205,13 @@ pub(crate) struct GenerateParameters {
example = 0.1
)]
pub frequency_penalty: Option<f32>,
/// The number of highest probability vocabulary tokens to keep for top-k-filtering.
#[serde(default)]
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
pub top_k: Option<i32>,
/// Top-p value for nucleus sampling.
#[serde(default)]
#[schema(
exclusive_minimum = 0.0,
@ -207,6 +221,9 @@ pub(crate) struct GenerateParameters {
example = 0.95
)]
pub top_p: Option<f32>,
/// Typical Decoding mass
/// See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.
#[serde(default)]
#[schema(
exclusive_minimum = 0.0,
@ -216,30 +233,48 @@ pub(crate) struct GenerateParameters {
example = 0.95
)]
pub typical_p: Option<f32>,
/// Activate logits sampling.
#[serde(default)]
#[schema(default = "false", example = true)]
pub do_sample: bool,
/// Maximum number of tokens to generate.
#[serde(default = "default_max_new_tokens")]
#[schema(nullable = true, default = "100", example = "20")]
pub max_new_tokens: Option<u32>,
/// Whether to prepend the prompt to the generated text
#[serde(default)]
#[schema(nullable = true, default = "null", example = false)]
pub return_full_text: Option<bool>,
/// Stop generating tokens if a member of `stop` is generated.
#[serde(default)]
#[schema(inline, max_items = 4, example = json ! (["photographer"]))]
pub stop: Vec<String>,
/// Truncate inputs tokens to the given size.
#[serde(default)]
#[schema(nullable = true, default = "null", example = "null")]
pub truncate: Option<usize>,
/// Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).
#[serde(default)]
#[schema(default = "false", example = true)]
pub watermark: bool,
/// Whether to return generation details.
#[serde(default)]
#[schema(default = "true")]
pub details: bool,
/// Whether to return decoder input token logprobs and ids.
#[serde(default)]
#[schema(default = "false")]
pub decoder_input_details: bool,
/// Random sampling seed.
#[serde(default)]
#[schema(
exclusive_minimum = 0,
@ -248,9 +283,13 @@ pub(crate) struct GenerateParameters {
example = "null"
)]
pub seed: Option<u64>,
/// The number of highest probability vocabulary tokens to keep for top-n-filtering.
#[serde(default)]
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
pub top_n_tokens: Option<u32>,
/// Grammar constraints for the generation.
#[serde(default)]
#[schema(nullable = true, default = "null", example = "null")]
pub grammar: Option<GrammarType>,