v0.4.3 (#152)
This commit is contained in:
parent
84722f3e33
commit
fef1a1c381
|
@ -2212,7 +2212,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-client"
|
name = "text-generation-client"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"grpc-metadata",
|
"grpc-metadata",
|
||||||
|
@ -2229,7 +2229,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-launcher"
|
name = "text-generation-launcher"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap 4.1.8",
|
"clap 4.1.8",
|
||||||
"ctrlc",
|
"ctrlc",
|
||||||
|
@ -2244,7 +2244,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-router"
|
name = "text-generation-router"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"axum",
|
"axum",
|
||||||
|
|
|
@ -2069,7 +2069,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-client"
|
name = "text-generation-client"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"grpc-metadata",
|
"grpc-metadata",
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
"name": "Apache 2.0",
|
"name": "Apache 2.0",
|
||||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||||
},
|
},
|
||||||
"version": "0.4.2"
|
"version": "0.4.3"
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"/generate": {
|
"/generate": {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "text-generation-launcher"
|
name = "text-generation-launcher"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Olivier Dehaene"]
|
authors = ["Olivier Dehaene"]
|
||||||
description = "Text Generation Launcher"
|
description = "Text Generation Launcher"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "text-generation-router"
|
name = "text-generation-router"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Olivier Dehaene"]
|
authors = ["Olivier Dehaene"]
|
||||||
description = "Text Generation Webserver"
|
description = "Text Generation Webserver"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "text-generation-client"
|
name = "text-generation-client"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
|
@ -37,7 +37,7 @@ struct Args {
|
||||||
max_waiting_tokens: usize,
|
max_waiting_tokens: usize,
|
||||||
#[clap(default_value = "3000", long, short, env)]
|
#[clap(default_value = "3000", long, short, env)]
|
||||||
port: u16,
|
port: u16,
|
||||||
#[clap(default_value = "/tmp/text-generation-server-0", long, env)]
|
#[clap(default_value = "/tmp/text-generation-0", long, env)]
|
||||||
master_shard_uds_path: String,
|
master_shard_uds_path: String,
|
||||||
#[clap(default_value = "bigscience/bloom", long, env)]
|
#[clap(default_value = "bigscience/bloom", long, env)]
|
||||||
tokenizer_name: String,
|
tokenizer_name: String,
|
||||||
|
@ -76,8 +76,6 @@ fn main() -> Result<(), std::io::Error> {
|
||||||
panic!("validation_workers must be > 0");
|
panic!("validation_workers must be > 0");
|
||||||
}
|
}
|
||||||
|
|
||||||
init_logging(otlp_endpoint, json_output);
|
|
||||||
|
|
||||||
// CORS allowed origins
|
// CORS allowed origins
|
||||||
// map to go inside the option and then map to parse from String to HeaderValue
|
// map to go inside the option and then map to parse from String to HeaderValue
|
||||||
// Finally, convert to AllowOrigin
|
// Finally, convert to AllowOrigin
|
||||||
|
@ -91,21 +89,17 @@ fn main() -> Result<(), std::io::Error> {
|
||||||
|
|
||||||
// Tokenizer instance
|
// Tokenizer instance
|
||||||
// This will only be used to validate payloads
|
// This will only be used to validate payloads
|
||||||
tracing::info!("Loading tokenizer");
|
|
||||||
let local_path = Path::new(&tokenizer_name);
|
let local_path = Path::new(&tokenizer_name);
|
||||||
let tokenizer =
|
let tokenizer =
|
||||||
if local_path.exists() && local_path.is_dir() && local_path.join("tokenizer.json").exists()
|
if local_path.exists() && local_path.is_dir() && local_path.join("tokenizer.json").exists()
|
||||||
{
|
{
|
||||||
// Load local tokenizer
|
// Load local tokenizer
|
||||||
tracing::info!("Found local tokenizer");
|
|
||||||
Tokenizer::from_file(local_path.join("tokenizer.json")).unwrap()
|
Tokenizer::from_file(local_path.join("tokenizer.json")).unwrap()
|
||||||
} else {
|
} else {
|
||||||
// Download and instantiate tokenizer
|
// Download and instantiate tokenizer
|
||||||
// We need to download it outside of the Tokio runtime
|
// We need to download it outside of the Tokio runtime
|
||||||
tracing::info!("Downloading tokenizer");
|
|
||||||
Tokenizer::from_pretrained(tokenizer_name.clone(), None).unwrap()
|
Tokenizer::from_pretrained(tokenizer_name.clone(), None).unwrap()
|
||||||
};
|
};
|
||||||
tracing::info!("Tokenizer loaded");
|
|
||||||
|
|
||||||
// Launch Tokio runtime
|
// Launch Tokio runtime
|
||||||
tokio::runtime::Builder::new_multi_thread()
|
tokio::runtime::Builder::new_multi_thread()
|
||||||
|
@ -113,6 +107,8 @@ fn main() -> Result<(), std::io::Error> {
|
||||||
.build()
|
.build()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.block_on(async {
|
.block_on(async {
|
||||||
|
init_logging(otlp_endpoint, json_output);
|
||||||
|
|
||||||
// Get pipeline tag
|
// Get pipeline tag
|
||||||
let model_info = reqwest::get(format!(
|
let model_info = reqwest::get(format!(
|
||||||
"https://huggingface.co/api/models/{tokenizer_name}"
|
"https://huggingface.co/api/models/{tokenizer_name}"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation-server"
|
name = "text-generation-server"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
description = "Text Generation Inference Python gRPC Server"
|
description = "Text Generation Inference Python gRPC Server"
|
||||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue