From fef1a1c381d6a0d01b64d888032b4ed7f5bb113e Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Thu, 30 Mar 2023 17:28:14 +0200 Subject: [PATCH] v0.4.3 (#152) --- Cargo.lock | 6 +++--- benchmark/Cargo.lock | 2 +- docs/openapi.json | 2 +- launcher/Cargo.toml | 2 +- router/Cargo.toml | 2 +- router/client/Cargo.toml | 2 +- router/src/main.rs | 10 +++------- server/pyproject.toml | 2 +- 8 files changed, 12 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a5c60a1b..8e469f23 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2212,7 +2212,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "0.4.2" +version = "0.4.3" dependencies = [ "futures", "grpc-metadata", @@ -2229,7 +2229,7 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "0.4.2" +version = "0.4.3" dependencies = [ "clap 4.1.8", "ctrlc", @@ -2244,7 +2244,7 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "0.4.2" +version = "0.4.3" dependencies = [ "async-stream", "axum", diff --git a/benchmark/Cargo.lock b/benchmark/Cargo.lock index 872e797e..8441014b 100644 --- a/benchmark/Cargo.lock +++ b/benchmark/Cargo.lock @@ -2069,7 +2069,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "0.4.2" +version = "0.4.3" dependencies = [ "futures", "grpc-metadata", diff --git a/docs/openapi.json b/docs/openapi.json index e384f5c5..11fa1e35 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -11,7 +11,7 @@ "name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0" }, - "version": "0.4.2" + "version": "0.4.3" }, "paths": { "/generate": { diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml index c238e9ac..318052c9 100644 --- a/launcher/Cargo.toml +++ b/launcher/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-launcher" -version = "0.4.2" +version = "0.4.3" edition = "2021" authors = ["Olivier Dehaene"] description = "Text Generation Launcher" diff --git a/router/Cargo.toml b/router/Cargo.toml index 43a89991..77577eea 100644 --- a/router/Cargo.toml +++ b/router/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-router" -version = "0.4.2" +version = "0.4.3" edition = "2021" authors = ["Olivier Dehaene"] description = "Text Generation Webserver" diff --git a/router/client/Cargo.toml b/router/client/Cargo.toml index 9e6242fa..401742dc 100644 --- a/router/client/Cargo.toml +++ b/router/client/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-client" -version = "0.4.2" +version = "0.4.3" edition = "2021" [dependencies] diff --git a/router/src/main.rs b/router/src/main.rs index f6028574..81c6aeef 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -37,7 +37,7 @@ struct Args { max_waiting_tokens: usize, #[clap(default_value = "3000", long, short, env)] port: u16, - #[clap(default_value = "/tmp/text-generation-server-0", long, env)] + #[clap(default_value = "/tmp/text-generation-0", long, env)] master_shard_uds_path: String, #[clap(default_value = "bigscience/bloom", long, env)] tokenizer_name: String, @@ -76,8 +76,6 @@ fn main() -> Result<(), std::io::Error> { panic!("validation_workers must be > 0"); } - init_logging(otlp_endpoint, json_output); - // CORS allowed origins // map to go inside the option and then map to parse from String to HeaderValue // Finally, convert to AllowOrigin @@ -91,21 +89,17 @@ fn main() -> Result<(), std::io::Error> { // Tokenizer instance // This will only be used to validate payloads - tracing::info!("Loading tokenizer"); let local_path = Path::new(&tokenizer_name); let tokenizer = if local_path.exists() && local_path.is_dir() && local_path.join("tokenizer.json").exists() { // Load local tokenizer - tracing::info!("Found local tokenizer"); Tokenizer::from_file(local_path.join("tokenizer.json")).unwrap() } else { // Download and instantiate tokenizer // We need to download it outside of the Tokio runtime - tracing::info!("Downloading tokenizer"); Tokenizer::from_pretrained(tokenizer_name.clone(), None).unwrap() }; - tracing::info!("Tokenizer loaded"); // Launch Tokio runtime tokio::runtime::Builder::new_multi_thread() @@ -113,6 +107,8 @@ fn main() -> Result<(), std::io::Error> { .build() .unwrap() .block_on(async { + init_logging(otlp_endpoint, json_output); + // Get pipeline tag let model_info = reqwest::get(format!( "https://huggingface.co/api/models/{tokenizer_name}" diff --git a/server/pyproject.toml b/server/pyproject.toml index 948b3f49..e9dc624c 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "text-generation-server" -version = "0.4.2" +version = "0.4.3" description = "Text Generation Inference Python gRPC Server" authors = ["Olivier Dehaene "]