Add attribute descriptions for `GenerateParameters` (#1798)
Once https://github.com/huggingface/huggingface.js/pull/629 gets merged, we will rely on TGI's specs to generate jsonschema types for `text_generation` and `chat_completion`. This PR adds some documentation for `GenerationParameters`'s properties so that they get documented in the downstream tools (TGI docs but also `huggingface.js`/`huggingface_hub` inference clients). I mostly took inspiration from [the python client](https://github.com/huggingface/text-generation-inference/blob/main/clients/python/text_generation/types.py) for the descriptions.
This commit is contained in:
parent
ed72e92126
commit
455cada527
|
@ -168,9 +168,12 @@ pub struct Info {
|
|||
|
||||
#[derive(Clone, Debug, Deserialize, ToSchema, Default)]
|
||||
pub(crate) struct GenerateParameters {
|
||||
/// Generate best_of sequences and return the one if the highest token logprobs.
|
||||
#[serde(default)]
|
||||
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]
|
||||
pub best_of: Option<usize>,
|
||||
|
||||
/// The value used to module the logits distribution.
|
||||
#[serde(default)]
|
||||
#[schema(
|
||||
exclusive_minimum = 0.0,
|
||||
|
@ -179,6 +182,9 @@ pub(crate) struct GenerateParameters {
|
|||
example = 0.5
|
||||
)]
|
||||
pub temperature: Option<f32>,
|
||||
|
||||
/// The parameter for repetition penalty. 1.0 means no penalty.
|
||||
/// See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
||||
#[serde(default)]
|
||||
#[schema(
|
||||
exclusive_minimum = 0.0,
|
||||
|
@ -187,6 +193,10 @@ pub(crate) struct GenerateParameters {
|
|||
example = 1.03
|
||||
)]
|
||||
pub repetition_penalty: Option<f32>,
|
||||
|
||||
/// The parameter for frequency penalty. 1.0 means no penalty
|
||||
/// Penalize new tokens based on their existing frequency in the text so far,
|
||||
/// decreasing the model's likelihood to repeat the same line verbatim.
|
||||
#[serde(default)]
|
||||
#[schema(
|
||||
exclusive_minimum = -2.0,
|
||||
|
@ -195,9 +205,13 @@ pub(crate) struct GenerateParameters {
|
|||
example = 0.1
|
||||
)]
|
||||
pub frequency_penalty: Option<f32>,
|
||||
|
||||
/// The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
||||
#[serde(default)]
|
||||
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
|
||||
pub top_k: Option<i32>,
|
||||
|
||||
/// Top-p value for nucleus sampling.
|
||||
#[serde(default)]
|
||||
#[schema(
|
||||
exclusive_minimum = 0.0,
|
||||
|
@ -207,6 +221,9 @@ pub(crate) struct GenerateParameters {
|
|||
example = 0.95
|
||||
)]
|
||||
pub top_p: Option<f32>,
|
||||
|
||||
/// Typical Decoding mass
|
||||
/// See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.
|
||||
#[serde(default)]
|
||||
#[schema(
|
||||
exclusive_minimum = 0.0,
|
||||
|
@ -216,30 +233,48 @@ pub(crate) struct GenerateParameters {
|
|||
example = 0.95
|
||||
)]
|
||||
pub typical_p: Option<f32>,
|
||||
|
||||
/// Activate logits sampling.
|
||||
#[serde(default)]
|
||||
#[schema(default = "false", example = true)]
|
||||
pub do_sample: bool,
|
||||
|
||||
/// Maximum number of tokens to generate.
|
||||
#[serde(default = "default_max_new_tokens")]
|
||||
#[schema(nullable = true, default = "100", example = "20")]
|
||||
pub max_new_tokens: Option<u32>,
|
||||
|
||||
/// Whether to prepend the prompt to the generated text
|
||||
#[serde(default)]
|
||||
#[schema(nullable = true, default = "null", example = false)]
|
||||
pub return_full_text: Option<bool>,
|
||||
|
||||
/// Stop generating tokens if a member of `stop` is generated.
|
||||
#[serde(default)]
|
||||
#[schema(inline, max_items = 4, example = json ! (["photographer"]))]
|
||||
pub stop: Vec<String>,
|
||||
|
||||
/// Truncate inputs tokens to the given size.
|
||||
#[serde(default)]
|
||||
#[schema(nullable = true, default = "null", example = "null")]
|
||||
pub truncate: Option<usize>,
|
||||
|
||||
/// Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).
|
||||
#[serde(default)]
|
||||
#[schema(default = "false", example = true)]
|
||||
pub watermark: bool,
|
||||
|
||||
/// Whether to return generation details.
|
||||
#[serde(default)]
|
||||
#[schema(default = "true")]
|
||||
pub details: bool,
|
||||
|
||||
/// Whether to return decoder input token logprobs and ids.
|
||||
#[serde(default)]
|
||||
#[schema(default = "false")]
|
||||
pub decoder_input_details: bool,
|
||||
|
||||
/// Random sampling seed.
|
||||
#[serde(default)]
|
||||
#[schema(
|
||||
exclusive_minimum = 0,
|
||||
|
@ -248,9 +283,13 @@ pub(crate) struct GenerateParameters {
|
|||
example = "null"
|
||||
)]
|
||||
pub seed: Option<u64>,
|
||||
|
||||
/// The number of highest probability vocabulary tokens to keep for top-n-filtering.
|
||||
#[serde(default)]
|
||||
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
|
||||
pub top_n_tokens: Option<u32>,
|
||||
|
||||
/// Grammar constraints for the generation.
|
||||
#[serde(default)]
|
||||
#[schema(nullable = true, default = "null", example = "null")]
|
||||
pub grammar: Option<GrammarType>,
|
||||
|
|
Loading…
Reference in New Issue