diff --git a/router/src/lib.rs b/router/src/lib.rs index 53982c36..9e847fe2 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -168,9 +168,12 @@ pub struct Info { #[derive(Clone, Debug, Deserialize, ToSchema, Default)] pub(crate) struct GenerateParameters { + /// Generate best_of sequences and return the one if the highest token logprobs. #[serde(default)] #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)] pub best_of: Option, + + /// The value used to module the logits distribution. #[serde(default)] #[schema( exclusive_minimum = 0.0, @@ -179,6 +182,9 @@ pub(crate) struct GenerateParameters { example = 0.5 )] pub temperature: Option, + + /// The parameter for repetition penalty. 1.0 means no penalty. + /// See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. #[serde(default)] #[schema( exclusive_minimum = 0.0, @@ -187,6 +193,10 @@ pub(crate) struct GenerateParameters { example = 1.03 )] pub repetition_penalty: Option, + + /// The parameter for frequency penalty. 1.0 means no penalty + /// Penalize new tokens based on their existing frequency in the text so far, + /// decreasing the model's likelihood to repeat the same line verbatim. #[serde(default)] #[schema( exclusive_minimum = -2.0, @@ -195,9 +205,13 @@ pub(crate) struct GenerateParameters { example = 0.1 )] pub frequency_penalty: Option, + + /// The number of highest probability vocabulary tokens to keep for top-k-filtering. #[serde(default)] #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)] pub top_k: Option, + + /// Top-p value for nucleus sampling. #[serde(default)] #[schema( exclusive_minimum = 0.0, @@ -207,6 +221,9 @@ pub(crate) struct GenerateParameters { example = 0.95 )] pub top_p: Option, + + /// Typical Decoding mass + /// See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information. #[serde(default)] #[schema( exclusive_minimum = 0.0, @@ -216,30 +233,48 @@ pub(crate) struct GenerateParameters { example = 0.95 )] pub typical_p: Option, + + /// Activate logits sampling. #[serde(default)] #[schema(default = "false", example = true)] pub do_sample: bool, + + /// Maximum number of tokens to generate. #[serde(default = "default_max_new_tokens")] #[schema(nullable = true, default = "100", example = "20")] pub max_new_tokens: Option, + + /// Whether to prepend the prompt to the generated text #[serde(default)] #[schema(nullable = true, default = "null", example = false)] pub return_full_text: Option, + + /// Stop generating tokens if a member of `stop` is generated. #[serde(default)] #[schema(inline, max_items = 4, example = json ! (["photographer"]))] pub stop: Vec, + + /// Truncate inputs tokens to the given size. #[serde(default)] #[schema(nullable = true, default = "null", example = "null")] pub truncate: Option, + + /// Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226). #[serde(default)] #[schema(default = "false", example = true)] pub watermark: bool, + + /// Whether to return generation details. #[serde(default)] #[schema(default = "true")] pub details: bool, + + /// Whether to return decoder input token logprobs and ids. #[serde(default)] #[schema(default = "false")] pub decoder_input_details: bool, + + /// Random sampling seed. #[serde(default)] #[schema( exclusive_minimum = 0, @@ -248,9 +283,13 @@ pub(crate) struct GenerateParameters { example = "null" )] pub seed: Option, + + /// The number of highest probability vocabulary tokens to keep for top-n-filtering. #[serde(default)] #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)] pub top_n_tokens: Option, + + /// Grammar constraints for the generation. #[serde(default)] #[schema(nullable = true, default = "null", example = "null")] pub grammar: Option,