hf_text-generation-inference/router/src/lib.rs

mod health;
/// Text Generation Inference Webserver
mod infer;
mod queue;
pub mod server;
mod validation;

use infer::Infer;
use queue::{Entry, Queue};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validation::Validation;

/// Hub type
#[derive(Clone, Debug, Deserialize)]
pub struct HubModelInfo {
    #[serde(rename(deserialize = "id"))]
    pub model_id: String,
    pub sha: Option<String>,
    pub pipeline_tag: Option<String>,
}

#[derive(Clone, Debug, Serialize, ToSchema)]
pub struct Info {
    /// Model info
    #[schema(example = "bigscience/blomm-560m")]
    pub model_id: String,
    #[schema(nullable = true, example = "e985a63cdc139290c5f700ff1929f0b5942cced2")]
    pub model_sha: Option<String>,
    #[schema(example = "torch.float16")]
    pub model_dtype: String,
    #[schema(example = "cuda")]
    pub model_device_type: String,
    #[schema(nullable = true, example = "text-generation")]
    pub model_pipeline_tag: Option<String>,
    /// Router Parameters
    #[schema(example = "128")]
    pub max_concurrent_requests: usize,
    #[schema(example = "2")]
    pub max_best_of: usize,
    #[schema(example = "4")]
    pub max_stop_sequences: usize,
    #[schema(example = "1024")]
    pub max_input_length: usize,
    #[schema(example = "2048")]
    pub max_total_tokens: usize,
    #[schema(example = "1.2")]
    pub waiting_served_ratio: f32,
    #[schema(example = "32000")]
    pub max_batch_total_tokens: u32,
    #[schema(example = "20")]
    pub max_waiting_tokens: usize,
    #[schema(example = "2")]
    pub validation_workers: usize,
    /// Router Info
    #[schema(example = "0.5.0")]
    pub version: &'static str,
    #[schema(nullable = true, example = "null")]
    pub sha: Option<&'static str>,
    #[schema(nullable = true, example = "null")]
    pub docker_label: Option<&'static str>,
}

#[derive(Clone, Debug, Deserialize, ToSchema)]
pub(crate) struct GenerateParameters {
    #[serde(default)]
    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]
    pub best_of: Option<usize>,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        nullable = true,
        default = "null",
        example = 0.5
    )]
    pub temperature: Option<f32>,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        nullable = true,
        default = "null",
        example = 1.03
    )]
    pub repetition_penalty: Option<f32>,
    #[serde(default)]
    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
    pub top_k: Option<i32>,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        maximum = 1.0,
        nullable = true,
        default = "null",
        example = 0.95
    )]
    pub top_p: Option<f32>,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        maximum = 1.0,
        nullable = true,
        default = "null",
        example = 0.95
    )]
    pub typical_p: Option<f32>,
    #[serde(default)]
    #[schema(default = "false", example = true)]
    pub do_sample: bool,
    #[serde(default = "default_max_new_tokens")]
    #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")]
    pub max_new_tokens: u32,
    #[serde(default)]
    #[schema(nullable = true, default = "null", example = false)]
    pub return_full_text: Option<bool>,
    #[serde(default)]
    #[schema(inline, max_items = 4, example = json ! (["photographer"]))]
    pub stop: Vec<String>,
    #[serde(default)]
    #[schema(nullable = true, default = "null", example = "null")]
    pub truncate: Option<usize>,
    #[serde(default)]
    #[schema(default = "false", example = true)]
    pub watermark: bool,
    #[serde(default)]
    #[schema(default = "true")]
    pub details: bool,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0,
        nullable = true,
        default = "null",
        example = "null"
    )]
    pub seed: Option<u64>,
}

fn default_max_new_tokens() -> u32 {
    20
}

fn default_parameters() -> GenerateParameters {
    GenerateParameters {
        best_of: None,
        temperature: None,
        repetition_penalty: None,
        top_k: None,
        top_p: None,
        typical_p: None,
        do_sample: false,
        max_new_tokens: default_max_new_tokens(),
        return_full_text: None,
        stop: Vec::new(),
        truncate: None,
        watermark: false,
        details: false,
        seed: None,
    }
}

#[derive(Clone, Debug, Deserialize, ToSchema)]
pub(crate) struct GenerateRequest {
    #[schema(example = "My name is Olivier and I")]
    pub inputs: String,
    #[serde(default = "default_parameters")]
    pub parameters: GenerateParameters,
}

#[derive(Clone, Debug, Deserialize, ToSchema)]
pub(crate) struct CompatGenerateRequest {
    #[schema(example = "My name is Olivier and I")]
    pub inputs: String,
    #[serde(default = "default_parameters")]
    pub parameters: GenerateParameters,
    #[serde(default)]
    #[allow(dead_code)]
    pub stream: bool,
}

impl From<CompatGenerateRequest> for GenerateRequest {
    fn from(req: CompatGenerateRequest) -> Self {
        Self {
            inputs: req.inputs,
            parameters: req.parameters,
        }
    }
}

#[derive(Debug, Serialize, ToSchema)]
pub struct PrefillToken {
    #[schema(example = 0)]
    id: u32,
    #[schema(example = "test")]
    text: String,
    #[schema(nullable = true, example = - 0.34)]
    logprob: f32,
}

#[derive(Debug, Serialize, ToSchema)]
pub struct Token {
    #[schema(example = 0)]
    id: u32,
    #[schema(example = "test")]
    text: String,
    #[schema(nullable = true, example = - 0.34)]
    logprob: f32,
    #[schema(example = "false")]
    special: bool,
}

#[derive(Serialize, ToSchema)]
#[serde(rename_all(serialize = "snake_case"))]
pub(crate) enum FinishReason {
    #[schema(rename = "length")]
    Length,
    #[serde(rename = "eos_token")]
    #[schema(rename = "eos_token")]
    EndOfSequenceToken,
    #[schema(rename = "stop_sequence")]
    StopSequence,
}

#[derive(Serialize, ToSchema)]
pub(crate) struct BestOfSequence {
    #[schema(example = "test")]
    pub generated_text: String,
    #[schema(example = "length")]
    pub finish_reason: FinishReason,
    #[schema(example = 1)]
    pub generated_tokens: u32,
    #[schema(nullable = true, example = 42)]
    pub seed: Option<u64>,
    pub prefill: Vec<PrefillToken>,
    pub tokens: Vec<Token>,
}

#[derive(Serialize, ToSchema)]
pub(crate) struct Details {
    #[schema(example = "length")]
    pub finish_reason: FinishReason,
    #[schema(example = 1)]
    pub generated_tokens: u32,
    #[schema(nullable = true, example = 42)]
    pub seed: Option<u64>,
    pub prefill: Vec<PrefillToken>,
    pub tokens: Vec<Token>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub best_of_sequences: Option<Vec<BestOfSequence>>,
}

#[derive(Serialize, ToSchema)]
pub(crate) struct GenerateResponse {
    #[schema(example = "test")]
    pub generated_text: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub details: Option<Details>,
}

#[derive(Serialize, ToSchema)]
pub(crate) struct StreamDetails {
    #[schema(example = "length")]
    pub finish_reason: FinishReason,
    #[schema(example = 1)]
    pub generated_tokens: u32,
    #[schema(nullable = true, example = 42)]
    pub seed: Option<u64>,
}

#[derive(Serialize, ToSchema)]
pub(crate) struct StreamResponse {
    pub token: Token,
    #[schema(nullable = true, default = "null", example = "test")]
    pub generated_text: Option<String>,
    #[schema(nullable = true, default = "null")]
    pub details: Option<StreamDetails>,
}

#[derive(Serialize, ToSchema)]
pub(crate) struct ErrorResponse {
    pub error: String,
    pub error_type: String,
}

#[cfg(test)]
mod tests {
    use std::io::Write;
    use tokenizers::Tokenizer;

    pub(crate) async fn get_tokenizer() -> Tokenizer {
        if !std::path::Path::new("tokenizer.json").exists() {
            let content = reqwest::get("https://huggingface.co/gpt2/raw/main/tokenizer.json")
                .await
                .unwrap()
                .bytes()
                .await
                .unwrap();
            let mut file = std::fs::File::create("tokenizer.json").unwrap();
            file.write_all(&content).unwrap();
        }
        Tokenizer::from_file("tokenizer.json").unwrap()
    }
}
feat(router): new healthcheck that skips the queue (#244) Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Co-authored-by: OlivierDehaene <olivier@huggingface.co> 2023-04-26 12:23:54 -06:00			`mod health;`
feat(router): use background task to manage request queue (#52) Co-authored-by: Nick Hill <nickhill@us.ibm.com> 2023-02-02 06:59:27 -07:00			`/// Text Generation Inference Webserver`
breaking(router): modify /generate API to only return generated text (#50) @njhill, @yk FYI generated_text was concatenated to the user prompt for legacy reason. We want to remove this behaviour as we don't think it is useful and even detrimonial to usability. We also remove the unused Vec. 2023-02-02 07:02:04 -07:00			`mod infer;`
feat(router): use background task to manage request queue (#52) Co-authored-by: Nick Hill <nickhill@us.ibm.com> 2023-02-02 06:59:27 -07:00			`mod queue;`
feat: Add arguments to CLI 2022-10-17 10:27:33 -06:00			`pub mod server;`
v0.1.0 2022-10-18 07:19:03 -06:00			`mod validation;`
feat: Improve error handling 2022-10-17 06:59:00 -06:00
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 09:04:00 -07:00			`use infer::Infer;`
feat(router): use background task to manage request queue (#52) Co-authored-by: Nick Hill <nickhill@us.ibm.com> 2023-02-02 06:59:27 -07:00			`use queue::{Entry, Queue};`
v0.1.0 2022-10-18 07:19:03 -06:00			`use serde::{Deserialize, Serialize};`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`use utoipa::ToSchema;`
feat: Improve error handling 2022-10-17 06:59:00 -06:00			`use validation::Validation;`
v0.1.0 2022-10-18 07:19:03 -06:00
feat(router): add info route (#196) close #125 2023-04-18 08:16:06 -06:00			`/// Hub type`
			`#[derive(Clone, Debug, Deserialize)]`
feat(router): add device and dtype info (#215) 2023-04-21 07:36:29 -06:00			`pub struct HubModelInfo {`
feat(router): add info route (#196) close #125 2023-04-18 08:16:06 -06:00			`#[serde(rename(deserialize = "id"))]`
			`pub model_id: String,`
			`pub sha: Option<String>,`
			`pub pipeline_tag: Option<String>,`
			`}`

			`#[derive(Clone, Debug, Serialize, ToSchema)]`
			`pub struct Info {`
feat(router): add endpoint info to /info route (#228) 2023-04-25 05:11:18 -06:00			`/// Model info`
feat(router): add info route (#196) close #125 2023-04-18 08:16:06 -06:00			`#[schema(example = "bigscience/blomm-560m")]`
			`pub model_id: String,`
			`#[schema(nullable = true, example = "e985a63cdc139290c5f700ff1929f0b5942cced2")]`
			`pub model_sha: Option<String>,`
feat(router): add device and dtype info (#215) 2023-04-21 07:36:29 -06:00			`#[schema(example = "torch.float16")]`
			`pub model_dtype: String,`
			`#[schema(example = "cuda")]`
			`pub model_device_type: String,`
feat(router): add info route (#196) close #125 2023-04-18 08:16:06 -06:00			`#[schema(nullable = true, example = "text-generation")]`
			`pub model_pipeline_tag: Option<String>,`
feat(router): add endpoint info to /info route (#228) 2023-04-25 05:11:18 -06:00			`/// Router Parameters`
			`#[schema(example = "128")]`
			`pub max_concurrent_requests: usize,`
			`#[schema(example = "2")]`
			`pub max_best_of: usize,`
			`#[schema(example = "4")]`
			`pub max_stop_sequences: usize,`
			`#[schema(example = "1024")]`
			`pub max_input_length: usize,`
			`#[schema(example = "2048")]`
			`pub max_total_tokens: usize,`
			`#[schema(example = "1.2")]`
			`pub waiting_served_ratio: f32,`
			`#[schema(example = "32000")]`
			`pub max_batch_total_tokens: u32,`
			`#[schema(example = "20")]`
			`pub max_waiting_tokens: usize,`
			`#[schema(example = "2")]`
			`pub validation_workers: usize,`
			`/// Router Info`
feat(router): add info route (#196) close #125 2023-04-18 08:16:06 -06:00			`#[schema(example = "0.5.0")]`
			`pub version: &'static str,`
			`#[schema(nullable = true, example = "null")]`
			`pub sha: Option<&'static str>,`
chore(github): add templates (#264) 2023-05-02 07:43:19 -06:00			`#[schema(nullable = true, example = "null")]`
			`pub docker_label: Option<&'static str>,`
feat(router): add info route (#196) close #125 2023-04-18 08:16:06 -06:00			`}`

feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Clone, Debug, Deserialize, ToSchema)]`
v0.1.0 2022-10-18 07:19:03 -06:00			`pub(crate) struct GenerateParameters {`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[serde(default)]`
			`#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]`
			`pub best_of: Option<usize>,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[serde(default)]`
			`#[schema(`
			`exclusive_minimum = 0.0,`
			`nullable = true,`
			`default = "null",`
			`example = 0.5`
			`)]`
			`pub temperature: Option<f32>,`
			`#[serde(default)]`
			`#[schema(`
			`exclusive_minimum = 0.0,`
			`nullable = true,`
			`default = "null",`
			`example = 1.03`
			`)]`
			`pub repetition_penalty: Option<f32>,`
			`#[serde(default)]`
			`#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]`
			`pub top_k: Option<i32>,`
			`#[serde(default)]`
			`#[schema(`
			`exclusive_minimum = 0.0,`
			`maximum = 1.0,`
			`nullable = true,`
			`default = "null",`
			`example = 0.95`
			`)]`
			`pub top_p: Option<f32>,`
feat(router): ask hf.co for pipelinetag to decide on compat_return_full_text (#89) 2023-02-28 02:19:32 -07:00			`#[serde(default)]`
feat: support typical sampling (#114) closes #112 2023-03-09 03:33:57 -07:00			`#[schema(`
			`exclusive_minimum = 0.0,`
			`maximum = 1.0,`
			`nullable = true,`
			`default = "null",`
			`example = 0.95`
			`)]`
			`pub typical_p: Option<f32>,`
			`#[serde(default)]`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(default = "false", example = true)]`
v0.1.0 2022-10-18 07:19:03 -06:00			`pub do_sample: bool,`
			`#[serde(default = "default_max_new_tokens")]`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")]`
v0.1.0 2022-10-18 07:19:03 -06:00			`pub max_new_tokens: u32,`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`#[serde(default)]`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[schema(nullable = true, default = "null", example = false)]`
feat(router): ask hf.co for pipelinetag to decide on compat_return_full_text (#89) 2023-02-28 02:19:32 -07:00			`pub return_full_text: Option<bool>,`
			`#[serde(default)]`
feat(router): add legacy route for api-inference support (#88) 2023-02-27 06:56:58 -07:00			`#[schema(inline, max_items = 4, example = json ! (["photographer"]))]`
feat: Support stop sequences (#7) 2022-12-12 10:25:22 -07:00			`pub stop: Vec<String>,`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`#[serde(default)]`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[schema(nullable = true, default = "null", example = "null")]`
feat(router): support left truncation (#115) closes #111 2023-03-09 05:10:30 -07:00			`pub truncate: Option<usize>,`
			`#[serde(default)]`
feat(server): add logits watermark (#90) 2023-03-02 04:30:41 -07:00			`#[schema(default = "false", example = true)]`
			`pub watermark: bool,`
			`#[serde(default)]`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(default = "true")]`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`pub details: bool,`
feat: Support sampling seeding (#37) Co-authored-by: Yannic Kilcher <yk@users.noreply.github.com> 2023-01-30 07:36:16 -07:00			`#[serde(default)]`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[schema(`
			`exclusive_minimum = 0,`
			`nullable = true,`
			`default = "null",`
			`example = "null"`
			`)]`
feat: Support sampling seeding (#37) Co-authored-by: Yannic Kilcher <yk@users.noreply.github.com> 2023-01-30 07:36:16 -07:00			`pub seed: Option<u64>,`
v0.1.0 2022-10-18 07:19:03 -06:00			`}`

			`fn default_max_new_tokens() -> u32 {`
			`20`
			`}`

			`fn default_parameters() -> GenerateParameters {`
			`GenerateParameters {`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`best_of: None,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`temperature: None,`
			`repetition_penalty: None,`
			`top_k: None,`
			`top_p: None,`
feat: support typical sampling (#114) closes #112 2023-03-09 03:33:57 -07:00			`typical_p: None,`
feat(router): ask hf.co for pipelinetag to decide on compat_return_full_text (#89) 2023-02-28 02:19:32 -07:00			`do_sample: false,`
v0.1.0 2022-10-18 07:19:03 -06:00			`max_new_tokens: default_max_new_tokens(),`
feat(router): ask hf.co for pipelinetag to decide on compat_return_full_text (#89) 2023-02-28 02:19:32 -07:00			`return_full_text: None,`
feat(server): add logits watermark (#90) 2023-03-02 04:30:41 -07:00			`stop: Vec::new(),`
feat(router): support left truncation (#115) closes #111 2023-03-09 05:10:30 -07:00			`truncate: None,`
feat(server): add logits watermark (#90) 2023-03-02 04:30:41 -07:00			`watermark: false,`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`details: false,`
feat: Support sampling seeding (#37) Co-authored-by: Yannic Kilcher <yk@users.noreply.github.com> 2023-01-30 07:36:16 -07:00			`seed: None,`
v0.1.0 2022-10-18 07:19:03 -06:00			`}`
			`}`

feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Clone, Debug, Deserialize, ToSchema)]`
v0.1.0 2022-10-18 07:19:03 -06:00			`pub(crate) struct GenerateRequest {`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(example = "My name is Olivier and I")]`
v0.1.0 2022-10-18 07:19:03 -06:00			`pub inputs: String,`
			`#[serde(default = "default_parameters")]`
			`pub parameters: GenerateParameters,`
			`}`

feat(router): add legacy route for api-inference support (#88) 2023-02-27 06:56:58 -07:00			`#[derive(Clone, Debug, Deserialize, ToSchema)]`
			`pub(crate) struct CompatGenerateRequest {`
			`#[schema(example = "My name is Olivier and I")]`
			`pub inputs: String,`
			`#[serde(default = "default_parameters")]`
			`pub parameters: GenerateParameters,`
			`#[serde(default)]`
			`#[allow(dead_code)]`
			`pub stream: bool,`
			`}`

			`impl From<CompatGenerateRequest> for GenerateRequest {`
			`fn from(req: CompatGenerateRequest) -> Self {`
			`Self {`
			`inputs: req.inputs,`
			`parameters: req.parameters,`
			`}`
			`}`
			`}`

feat(server): add special token bool (#85) 2023-02-24 07:55:57 -07:00			`#[derive(Debug, Serialize, ToSchema)]`
			`pub struct PrefillToken {`
			`#[schema(example = 0)]`
			`id: u32,`
			`#[schema(example = "test")]`
			`text: String,`
feat(router): add legacy route for api-inference support (#88) 2023-02-27 06:56:58 -07:00			`#[schema(nullable = true, example = - 0.34)]`
feat(server): add special token bool (#85) 2023-02-24 07:55:57 -07:00			`logprob: f32,`
			`}`

feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Debug, Serialize, ToSchema)]`
			`pub struct Token {`
			`#[schema(example = 0)]`
			`id: u32,`
			`#[schema(example = "test")]`
			`text: String,`
feat(router): add legacy route for api-inference support (#88) 2023-02-27 06:56:58 -07:00			`#[schema(nullable = true, example = - 0.34)]`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`logprob: f32,`
feat(server): add special token bool (#85) 2023-02-24 07:55:57 -07:00			`#[schema(example = "false")]`
			`special: bool,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`}`

			`#[derive(Serialize, ToSchema)]`
			`#[serde(rename_all(serialize = "snake_case"))]`
			`pub(crate) enum FinishReason {`
			`#[schema(rename = "length")]`
			`Length,`
			`#[serde(rename = "eos_token")]`
			`#[schema(rename = "eos_token")]`
			`EndOfSequenceToken,`
			`#[schema(rename = "stop_sequence")]`
			`StopSequence,`
			`}`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 09:04:00 -07:00
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[derive(Serialize, ToSchema)]`
			`pub(crate) struct BestOfSequence {`
			`#[schema(example = "test")]`
			`pub generated_text: String,`
			`#[schema(example = "length")]`
			`pub finish_reason: FinishReason,`
			`#[schema(example = 1)]`
			`pub generated_tokens: u32,`
			`#[schema(nullable = true, example = 42)]`
			`pub seed: Option<u64>,`
			`pub prefill: Vec<PrefillToken>,`
			`pub tokens: Vec<Token>,`
			`}`

feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Serialize, ToSchema)]`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`pub(crate) struct Details {`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(example = "length")]`
			`pub finish_reason: FinishReason,`
			`#[schema(example = 1)]`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`pub generated_tokens: u32,`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[schema(nullable = true, example = 42)]`
feat: Support sampling seeding (#37) Co-authored-by: Yannic Kilcher <yk@users.noreply.github.com> 2023-01-30 07:36:16 -07:00			`pub seed: Option<u64>,`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`pub prefill: Vec<PrefillToken>,`
			`pub tokens: Vec<Token>,`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[serde(skip_serializing_if = "Option::is_none")]`
			`pub best_of_sequences: Option<Vec<BestOfSequence>>,`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`}`

feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Serialize, ToSchema)]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 09:04:00 -07:00			`pub(crate) struct GenerateResponse {`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(example = "test")]`
v0.1.0 2022-10-18 07:19:03 -06:00			`pub generated_text: String,`
feat: Return logprobs (#8) 2022-12-15 09:03:56 -07:00			`#[serde(skip_serializing_if = "Option::is_none")]`
			`pub details: Option<Details>,`
v0.1.0 2022-10-18 07:19:03 -06:00			`}`
feat(server): Support bitsandbytes 2022-10-27 06:25:29 -06:00
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Serialize, ToSchema)]`
			`pub(crate) struct StreamDetails {`
			`#[schema(example = "length")]`
			`pub finish_reason: FinishReason,`
			`#[schema(example = 1)]`
			`pub generated_tokens: u32,`
feat(router): add best_of parameter (#117) 2023-03-09 07:30:54 -07:00			`#[schema(nullable = true, example = 42)]`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`pub seed: Option<u64>,`
			`}`

			`#[derive(Serialize, ToSchema)]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 09:04:00 -07:00			`pub(crate) struct StreamResponse {`
			`pub token: Token,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(nullable = true, default = "null", example = "test")]`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 09:04:00 -07:00			`pub generated_text: Option<String>,`
feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[schema(nullable = true, default = "null")]`
			`pub details: Option<StreamDetails>,`
feat: Add token streaming using ServerSideEvents support (#41) 2023-01-31 09:04:00 -07:00			`}`

feat(router): refactor API and add openAPI schemas (#53) 2023-02-03 04:43:37 -07:00			`#[derive(Serialize, ToSchema)]`
feat(server): Support bitsandbytes 2022-10-27 06:25:29 -06:00			`pub(crate) struct ErrorResponse {`
			`pub error: String,`
feat(clients): Python client (#103) 2023-03-07 10:52:22 -07:00			`pub error_type: String,`
feat(server): Support bitsandbytes 2022-10-27 06:25:29 -06:00			`}`
feat(router): add tests to validation (#237) 2023-04-26 08:14:40 -06:00
			`#[cfg(test)]`
feat(router): new healthcheck that skips the queue (#244) Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Co-authored-by: OlivierDehaene <olivier@huggingface.co> 2023-04-26 12:23:54 -06:00			`mod tests {`
feat(router): add tests to validation (#237) 2023-04-26 08:14:40 -06:00			`use std::io::Write;`
			`use tokenizers::Tokenizer;`

feat(router): new healthcheck that skips the queue (#244) Co-authored-by: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Co-authored-by: OlivierDehaene <olivier@huggingface.co> 2023-04-26 12:23:54 -06:00			`pub(crate) async fn get_tokenizer() -> Tokenizer {`
			`if !std::path::Path::new("tokenizer.json").exists() {`
			`let content = reqwest::get("https://huggingface.co/gpt2/raw/main/tokenizer.json")`
			`.await`
			`.unwrap()`
			`.bytes()`
			`.await`
			`.unwrap();`
			`let mut file = std::fs::File::create("tokenizer.json").unwrap();`
feat(router): add tests to validation (#237) 2023-04-26 08:14:40 -06:00			`file.write_all(&content).unwrap();`
			`}`
			`Tokenizer::from_file("tokenizer.json").unwrap()`
			`}`
			`}`