/// Text Generation Inference Webserver mod infer; mod queue; pub mod server; mod validation; use infer::Infer; use queue::{Entry, Queue}; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; use validation::Validation; #[derive(Clone, Debug, Deserialize, ToSchema)] pub(crate) struct GenerateParameters { #[serde(default)] #[schema( exclusive_minimum = 0.0, nullable = true, default = "null", example = 0.5 )] pub temperature: Option, #[serde(default)] #[schema( exclusive_minimum = 0.0, nullable = true, default = "null", example = 1.03 )] pub repetition_penalty: Option, #[serde(default)] #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)] pub top_k: Option, #[serde(default)] #[schema( exclusive_minimum = 0.0, maximum = 1.0, nullable = true, default = "null", example = 0.95 )] pub top_p: Option, #[serde(default)] #[schema( exclusive_minimum = 0.0, maximum = 1.0, nullable = true, default = "null", example = 0.95 )] pub typical_p: Option, #[serde(default)] #[schema(default = "false", example = true)] pub do_sample: bool, #[serde(default = "default_max_new_tokens")] #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")] pub max_new_tokens: u32, #[serde(default)] #[schema(default = "null", example = false)] pub return_full_text: Option, #[serde(default)] #[schema(inline, max_items = 4, example = json ! (["photographer"]))] pub stop: Vec, #[serde(default)] #[schema(default = "null", example = "null")] pub truncate: Option, #[serde(default)] #[schema(default = "false", example = true)] pub watermark: bool, #[serde(default)] #[schema(default = "true")] pub details: bool, #[serde(default)] pub seed: Option, } fn default_max_new_tokens() -> u32 { 20 } fn default_parameters() -> GenerateParameters { GenerateParameters { temperature: None, repetition_penalty: None, top_k: None, top_p: None, typical_p: None, do_sample: false, max_new_tokens: default_max_new_tokens(), return_full_text: None, stop: Vec::new(), truncate: None, watermark: false, details: false, seed: None, } } #[derive(Clone, Debug, Deserialize, ToSchema)] pub(crate) struct GenerateRequest { #[schema(example = "My name is Olivier and I")] pub inputs: String, #[serde(default = "default_parameters")] pub parameters: GenerateParameters, } #[derive(Clone, Debug, Deserialize, ToSchema)] pub(crate) struct CompatGenerateRequest { #[schema(example = "My name is Olivier and I")] pub inputs: String, #[serde(default = "default_parameters")] pub parameters: GenerateParameters, #[serde(default)] #[allow(dead_code)] pub stream: bool, } impl From for GenerateRequest { fn from(req: CompatGenerateRequest) -> Self { Self { inputs: req.inputs, parameters: req.parameters, } } } #[derive(Debug, Serialize, ToSchema)] pub struct PrefillToken { #[schema(example = 0)] id: u32, #[schema(example = "test")] text: String, #[schema(nullable = true, example = - 0.34)] logprob: f32, } #[derive(Debug, Serialize, ToSchema)] pub struct Token { #[schema(example = 0)] id: u32, #[schema(example = "test")] text: String, #[schema(nullable = true, example = - 0.34)] logprob: f32, #[schema(example = "false")] special: bool, } #[derive(Serialize, ToSchema)] #[serde(rename_all(serialize = "snake_case"))] pub(crate) enum FinishReason { #[schema(rename = "length")] Length, #[serde(rename = "eos_token")] #[schema(rename = "eos_token")] EndOfSequenceToken, #[schema(rename = "stop_sequence")] StopSequence, } #[derive(Serialize, ToSchema)] pub(crate) struct Details { #[schema(example = "length")] pub finish_reason: FinishReason, #[schema(example = 1)] pub generated_tokens: u32, #[schema(example = 42)] pub seed: Option, pub prefill: Vec, pub tokens: Vec, } #[derive(Serialize, ToSchema)] pub(crate) struct GenerateResponse { #[schema(example = "test")] pub generated_text: String, #[serde(skip_serializing_if = "Option::is_none")] pub details: Option
, } #[derive(Serialize, ToSchema)] pub(crate) struct StreamDetails { #[schema(example = "length")] pub finish_reason: FinishReason, #[schema(example = 1)] pub generated_tokens: u32, #[schema(example = 42)] pub seed: Option, } #[derive(Serialize, ToSchema)] pub(crate) struct StreamResponse { pub token: Token, #[schema(nullable = true, default = "null", example = "test")] pub generated_text: Option, #[schema(nullable = true, default = "null")] pub details: Option, } #[derive(Serialize, ToSchema)] pub(crate) struct ErrorResponse { pub error: String, pub error_type: String, }