feat: add distributed tracing (#62)
This commit is contained in:
parent
e520d5b349
commit
9af454142a
|
@ -23,6 +23,8 @@ jobs:
|
||||||
toolchain: 1.65.0
|
toolchain: 1.65.0
|
||||||
override: true
|
override: true
|
||||||
components: rustfmt, clippy
|
components: rustfmt, clippy
|
||||||
|
- name: Install Protoc
|
||||||
|
uses: arduino/setup-protoc@v1
|
||||||
- name: Loading cache.
|
- name: Loading cache.
|
||||||
uses: actions/cache@v2
|
uses: actions/cache@v2
|
||||||
id: model_cache
|
id: model_cache
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,6 +2,7 @@
|
||||||
members = [
|
members = [
|
||||||
"router",
|
"router",
|
||||||
"router/client",
|
"router/client",
|
||||||
|
"router/grpc-metadata",
|
||||||
"launcher"
|
"launcher"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
10
Dockerfile
10
Dockerfile
|
@ -1,4 +1,10 @@
|
||||||
FROM rust:1.65 as router-builder
|
FROM rust:1.67 as router-builder
|
||||||
|
|
||||||
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
||||||
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
||||||
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
||||||
|
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
|
||||||
|
rm -f $PROTOC_ZIP
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
|
@ -10,7 +16,7 @@ WORKDIR /usr/src/router
|
||||||
|
|
||||||
RUN cargo install --path .
|
RUN cargo install --path .
|
||||||
|
|
||||||
FROM rust:1.65 as launcher-builder
|
FROM rust:1.67 as launcher-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
|
|
25
README.md
25
README.md
|
@ -27,6 +27,7 @@ to power LLMs api-inference widgets.
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
- [API Documentation](#api-documentation)
|
- [API Documentation](#api-documentation)
|
||||||
- [A note on Shared Memory](#a-note-on-shared-memory-shm)
|
- [A note on Shared Memory](#a-note-on-shared-memory-shm)
|
||||||
|
- [Distributed Tracing](#distributed-tracing)
|
||||||
- [Local Install](#local-install)
|
- [Local Install](#local-install)
|
||||||
- [CUDA Kernels](#cuda-kernels)
|
- [CUDA Kernels](#cuda-kernels)
|
||||||
- [Run BLOOM](#run-bloom)
|
- [Run BLOOM](#run-bloom)
|
||||||
|
@ -46,6 +47,7 @@ to power LLMs api-inference widgets.
|
||||||
- Logits warpers (temperature scaling, topk, repetition penalty ...)
|
- Logits warpers (temperature scaling, topk, repetition penalty ...)
|
||||||
- Stop sequences
|
- Stop sequences
|
||||||
- Log probabilities
|
- Log probabilities
|
||||||
|
- Distributed tracing with Open Telemetry
|
||||||
|
|
||||||
## Officially supported models
|
## Officially supported models
|
||||||
|
|
||||||
|
@ -102,6 +104,11 @@ curl 127.0.0.1:8080/generate_stream \
|
||||||
You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route.
|
You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route.
|
||||||
The Swagger UI is also available at: [https://huggingface.github.io/text-generation-inference](https://huggingface.github.io/text-generation-inference).
|
The Swagger UI is also available at: [https://huggingface.github.io/text-generation-inference](https://huggingface.github.io/text-generation-inference).
|
||||||
|
|
||||||
|
### Distributed Tracing
|
||||||
|
|
||||||
|
`text-generation-inference` is instrumented with distributed tracing using OpenTelemetry. You can use this feature
|
||||||
|
by setting the address to an OTLP collector with the `--otlp-endpoint` argument.
|
||||||
|
|
||||||
### A note on Shared Memory (shm)
|
### A note on Shared Memory (shm)
|
||||||
|
|
||||||
[`NCCL`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html) is a communication framework used by
|
[`NCCL`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/index.html) is a communication framework used by
|
||||||
|
@ -142,6 +149,24 @@ conda create -n text-generation-inference python=3.9
|
||||||
conda activate text-generation-inference
|
conda activate text-generation-inference
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You may also need to install Protoc.
|
||||||
|
|
||||||
|
On Linux:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
PROTOC_ZIP=protoc-21.12-linux-x86_64.zip
|
||||||
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP
|
||||||
|
sudo unzip -o $PROTOC_ZIP -d /usr/local bin/protoc
|
||||||
|
sudo unzip -o $PROTOC_ZIP -d /usr/local 'include/*'
|
||||||
|
rm -f $PROTOC_ZIP
|
||||||
|
```
|
||||||
|
|
||||||
|
On MacOS, using Homebrew:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
brew install protobuf
|
||||||
|
```
|
||||||
|
|
||||||
Then run:
|
Then run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -38,7 +38,8 @@ function sample_example(inputs, max_new_tokens, name) {
|
||||||
parameters: {
|
parameters: {
|
||||||
max_new_tokens: max_new_tokens,
|
max_new_tokens: max_new_tokens,
|
||||||
do_sample: true,
|
do_sample: true,
|
||||||
top_p: 0.9
|
top_p: 0.9,
|
||||||
|
seed: 0
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
let params = {
|
let params = {
|
||||||
|
|
|
@ -6,14 +6,14 @@ authors = ["Olivier Dehaene"]
|
||||||
description = "Text Generation Launcher"
|
description = "Text Generation Launcher"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "4.0.15", features = ["derive", "env"] }
|
clap = { version = "4.1.4", features = ["derive", "env"] }
|
||||||
ctrlc = { version = "3.2.3", features = ["termination"] }
|
ctrlc = { version = "3.2.5", features = ["termination"] }
|
||||||
serde_json = "1.0.89"
|
serde_json = "1.0.93"
|
||||||
subprocess = "0.2.9"
|
subprocess = "0.2.9"
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json"] }
|
tracing-subscriber = { version = "0.3.16", features = ["json"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
float_eq = "1.0.1"
|
float_eq = "1.0.1"
|
||||||
reqwest = { version = "0.11.13", features = ["blocking", "json"] }
|
reqwest = { version = "0.11.14", features = ["blocking", "json"] }
|
||||||
serde = { version = "1.0.150", features = ["derive"] }
|
serde = { version = "1.0.152", features = ["derive"] }
|
||||||
|
|
|
@ -44,6 +44,8 @@ struct Args {
|
||||||
master_port: usize,
|
master_port: usize,
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
json_output: bool,
|
json_output: bool,
|
||||||
|
#[clap(long, env)]
|
||||||
|
otlp_endpoint: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> ExitCode {
|
fn main() -> ExitCode {
|
||||||
|
@ -62,6 +64,7 @@ fn main() -> ExitCode {
|
||||||
master_addr,
|
master_addr,
|
||||||
master_port,
|
master_port,
|
||||||
json_output,
|
json_output,
|
||||||
|
otlp_endpoint,
|
||||||
} = Args::parse();
|
} = Args::parse();
|
||||||
|
|
||||||
if json_output {
|
if json_output {
|
||||||
|
@ -99,6 +102,7 @@ fn main() -> ExitCode {
|
||||||
let status_sender = status_sender.clone();
|
let status_sender = status_sender.clone();
|
||||||
let shutdown = shutdown.clone();
|
let shutdown = shutdown.clone();
|
||||||
let shutdown_sender = shutdown_sender.clone();
|
let shutdown_sender = shutdown_sender.clone();
|
||||||
|
let otlp_endpoint = otlp_endpoint.clone();
|
||||||
thread::spawn(move || {
|
thread::spawn(move || {
|
||||||
shard_manager(
|
shard_manager(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -109,6 +113,7 @@ fn main() -> ExitCode {
|
||||||
num_shard,
|
num_shard,
|
||||||
master_addr,
|
master_addr,
|
||||||
master_port,
|
master_port,
|
||||||
|
otlp_endpoint,
|
||||||
status_sender,
|
status_sender,
|
||||||
shutdown,
|
shutdown,
|
||||||
shutdown_sender,
|
shutdown_sender,
|
||||||
|
@ -165,7 +170,7 @@ fn main() -> ExitCode {
|
||||||
"--port".to_string(),
|
"--port".to_string(),
|
||||||
port.to_string(),
|
port.to_string(),
|
||||||
"--master-shard-uds-path".to_string(),
|
"--master-shard-uds-path".to_string(),
|
||||||
format!("{}-0", shard_uds_path),
|
format!("{shard_uds_path}-0"),
|
||||||
"--tokenizer-name".to_string(),
|
"--tokenizer-name".to_string(),
|
||||||
model_id,
|
model_id,
|
||||||
];
|
];
|
||||||
|
@ -174,6 +179,12 @@ fn main() -> ExitCode {
|
||||||
argv.push("--json-output".to_string());
|
argv.push("--json-output".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OpenTelemetry
|
||||||
|
if let Some(otlp_endpoint) = otlp_endpoint {
|
||||||
|
argv.push("--otlp-endpoint".to_string());
|
||||||
|
argv.push(otlp_endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
let mut webserver = match Popen::create(
|
let mut webserver = match Popen::create(
|
||||||
&argv,
|
&argv,
|
||||||
PopenConfig {
|
PopenConfig {
|
||||||
|
@ -264,12 +275,13 @@ fn shard_manager(
|
||||||
world_size: usize,
|
world_size: usize,
|
||||||
master_addr: String,
|
master_addr: String,
|
||||||
master_port: usize,
|
master_port: usize,
|
||||||
|
otlp_endpoint: Option<String>,
|
||||||
status_sender: mpsc::Sender<ShardStatus>,
|
status_sender: mpsc::Sender<ShardStatus>,
|
||||||
shutdown: Arc<Mutex<bool>>,
|
shutdown: Arc<Mutex<bool>>,
|
||||||
_shutdown_sender: mpsc::Sender<()>,
|
_shutdown_sender: mpsc::Sender<()>,
|
||||||
) {
|
) {
|
||||||
// Get UDS path
|
// Get UDS path
|
||||||
let uds_string = format!("{}-{}", uds_path, rank);
|
let uds_string = format!("{uds_path}-{rank}");
|
||||||
let uds = Path::new(&uds_string);
|
let uds = Path::new(&uds_string);
|
||||||
// Clean previous runs
|
// Clean previous runs
|
||||||
fs::remove_file(uds).unwrap_or_default();
|
fs::remove_file(uds).unwrap_or_default();
|
||||||
|
@ -286,6 +298,7 @@ fn shard_manager(
|
||||||
"--json-output".to_string(),
|
"--json-output".to_string(),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Activate tensor parallelism
|
||||||
if world_size > 1 {
|
if world_size > 1 {
|
||||||
shard_argv.push("--sharded".to_string());
|
shard_argv.push("--sharded".to_string());
|
||||||
}
|
}
|
||||||
|
@ -294,11 +307,18 @@ fn shard_manager(
|
||||||
shard_argv.push("--quantize".to_string())
|
shard_argv.push("--quantize".to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Model optional revision
|
||||||
if let Some(revision) = revision {
|
if let Some(revision) = revision {
|
||||||
shard_argv.push("--revision".to_string());
|
shard_argv.push("--revision".to_string());
|
||||||
shard_argv.push(revision)
|
shard_argv.push(revision)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OpenTelemetry
|
||||||
|
if let Some(otlp_endpoint) = otlp_endpoint {
|
||||||
|
shard_argv.push("--otlp-endpoint".to_string());
|
||||||
|
shard_argv.push(otlp_endpoint);
|
||||||
|
}
|
||||||
|
|
||||||
let mut env = vec![
|
let mut env = vec![
|
||||||
("RANK".into(), rank.to_string().into()),
|
("RANK".into(), rank.to_string().into()),
|
||||||
("WORLD_SIZE".into(), world_size.to_string().into()),
|
("WORLD_SIZE".into(), world_size.to_string().into()),
|
||||||
|
|
|
@ -15,20 +15,24 @@ path = "src/main.rs"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-stream = "0.3.3"
|
async-stream = "0.3.3"
|
||||||
axum = { version = "0.6.4", features = ["json"] }
|
axum = { version = "0.6.4", features = ["json"] }
|
||||||
|
axum-tracing-opentelemetry = "0.9.0"
|
||||||
text-generation-client = { path = "client" }
|
text-generation-client = { path = "client" }
|
||||||
clap = { version = "4.0.15", features = ["derive", "env"] }
|
clap = { version = "4.1.4", features = ["derive", "env"] }
|
||||||
futures = "0.3.24"
|
futures = "0.3.26"
|
||||||
nohash-hasher = "0.2.0"
|
nohash-hasher = "0.2.0"
|
||||||
|
opentelemetry = { version = "0.18.0", features = ["rt-tokio"] }
|
||||||
|
opentelemetry-otlp = "0.11.0"
|
||||||
parking_lot = "0.12.1"
|
parking_lot = "0.12.1"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
serde = "1.0.145"
|
serde = "1.0.152"
|
||||||
serde_json = "1.0.85"
|
serde_json = "1.0.93"
|
||||||
thiserror = "1.0.37"
|
thiserror = "1.0.38"
|
||||||
tokenizers = "0.13.0"
|
tokenizers = "0.13.2"
|
||||||
tokio = { version = "1.21.1", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
||||||
tokio-stream = "0.1.11"
|
tokio-stream = "0.1.11"
|
||||||
tracing = "0.1.36"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.15", features = ["json"] }
|
tracing-opentelemetry = "0.18.0"
|
||||||
|
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
||||||
utoipa = { version = "3.0.1", features = ["axum_extras"] }
|
utoipa = { version = "3.0.1", features = ["axum_extras"] }
|
||||||
utoipa-swagger-ui = { version = "3.0.2", features = ["axum"] }
|
utoipa-swagger-ui = { version = "3.0.2", features = ["axum"] }
|
||||||
|
|
||||||
|
|
|
@ -5,13 +5,15 @@ edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
futures = "^0.3"
|
futures = "^0.3"
|
||||||
prost = "^0.9"
|
grpc-metadata = { path = "../grpc-metadata" }
|
||||||
|
prost = "^0.11"
|
||||||
thiserror = "^1.0"
|
thiserror = "^1.0"
|
||||||
tokio = { version = "^1.21", features = ["sync"] }
|
tokio = { version = "^1.25", features = ["sync"] }
|
||||||
tonic = "^0.6"
|
tonic = "^0.8"
|
||||||
tower = "^0.4"
|
tower = "^0.4"
|
||||||
tracing = "^0.1"
|
tracing = "^0.1"
|
||||||
tracing-error = "^0.2"
|
tracing-error = "^0.2"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
tonic-build = "0.6.2"
|
tonic-build = "0.8.4"
|
||||||
|
prost-build = "0.11.6"
|
||||||
|
|
|
@ -3,13 +3,17 @@ use std::fs;
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
println!("cargo:rerun-if-changed=../../proto/generate.proto");
|
println!("cargo:rerun-if-changed=../../proto/generate.proto");
|
||||||
fs::create_dir("src/pb").unwrap_or(());
|
fs::create_dir("src/pb").unwrap_or(());
|
||||||
|
|
||||||
|
let mut config = prost_build::Config::new();
|
||||||
|
config.protoc_arg("--experimental_allow_proto3_optional");
|
||||||
|
|
||||||
tonic_build::configure()
|
tonic_build::configure()
|
||||||
.build_client(true)
|
.build_client(true)
|
||||||
.build_server(false)
|
.build_server(false)
|
||||||
.out_dir("src/pb")
|
.out_dir("src/pb")
|
||||||
.include_file("mod.rs")
|
.include_file("mod.rs")
|
||||||
.compile(&["../../proto/generate.proto"], &["../../proto"])
|
.compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
|
||||||
.unwrap_or_else(|e| panic!("protobuf compilation failed: {}", e));
|
.unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
use crate::pb::generate::v1::text_generation_service_client::TextGenerationServiceClient;
|
use crate::pb::generate::v1::text_generation_service_client::TextGenerationServiceClient;
|
||||||
use crate::pb::generate::v1::*;
|
use crate::pb::generate::v1::*;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
use grpc_metadata::InjectTelemetryContext;
|
||||||
use tonic::transport::{Channel, Uri};
|
use tonic::transport::{Channel, Uri};
|
||||||
use tracing::*;
|
use tracing::instrument;
|
||||||
|
|
||||||
/// Text Generation Inference gRPC client
|
/// Text Generation Inference gRPC client
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -38,12 +39,8 @@ impl Client {
|
||||||
/// Returns a list of uris or unix sockets of all shards
|
/// Returns a list of uris or unix sockets of all shards
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
pub async fn service_discovery(&mut self) -> Result<Vec<String>> {
|
pub async fn service_discovery(&mut self) -> Result<Vec<String>> {
|
||||||
let request = tonic::Request::new(ServiceDiscoveryRequest {});
|
let request = tonic::Request::new(ServiceDiscoveryRequest {}).inject_context();
|
||||||
let response = self
|
let response = self.stub.service_discovery(request).await?;
|
||||||
.stub
|
|
||||||
.service_discovery(request)
|
|
||||||
.instrument(info_span!("service_discovery"))
|
|
||||||
.await?;
|
|
||||||
let urls = response
|
let urls = response
|
||||||
.into_inner()
|
.into_inner()
|
||||||
.urls
|
.urls
|
||||||
|
@ -60,11 +57,8 @@ impl Client {
|
||||||
/// Clear the past generations cache
|
/// Clear the past generations cache
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip(self))]
|
||||||
pub async fn clear_cache(&mut self) -> Result<()> {
|
pub async fn clear_cache(&mut self) -> Result<()> {
|
||||||
let request = tonic::Request::new(ClearCacheRequest {});
|
let request = tonic::Request::new(ClearCacheRequest {}).inject_context();
|
||||||
self.stub
|
self.stub.clear_cache(request).await?;
|
||||||
.clear_cache(request)
|
|
||||||
.instrument(info_span!("clear_cache"))
|
|
||||||
.await?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,15 +66,10 @@ impl Client {
|
||||||
///
|
///
|
||||||
/// Returns Generation for each request in batch
|
/// Returns Generation for each request in batch
|
||||||
/// and the next cached batch
|
/// and the next cached batch
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]
|
||||||
pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
|
pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
|
||||||
let request = tonic::Request::new(PrefillRequest { batch: Some(batch) });
|
let request = tonic::Request::new(PrefillRequest { batch: Some(batch) }).inject_context();
|
||||||
let response = self
|
let response = self.stub.prefill(request).await?.into_inner();
|
||||||
.stub
|
|
||||||
.prefill(request)
|
|
||||||
.instrument(info_span!("prefill"))
|
|
||||||
.await?
|
|
||||||
.into_inner();
|
|
||||||
Ok((response.generations, response.batch))
|
Ok((response.generations, response.batch))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,18 +77,13 @@ impl Client {
|
||||||
///
|
///
|
||||||
/// Returns Generation for each request in batches
|
/// Returns Generation for each request in batches
|
||||||
/// and the next cached batch
|
/// and the next cached batch
|
||||||
#[instrument(skip(self))]
|
#[instrument(skip_all, fields(size = batches.iter().map(|batch|{batch.size}).sum::<u32>()))]
|
||||||
pub async fn decode(
|
pub async fn decode(
|
||||||
&mut self,
|
&mut self,
|
||||||
batches: Vec<Batch>,
|
batches: Vec<Batch>,
|
||||||
) -> Result<(Vec<Generation>, Option<Batch>)> {
|
) -> Result<(Vec<Generation>, Option<Batch>)> {
|
||||||
let request = tonic::Request::new(DecodeRequest { batches });
|
let request = tonic::Request::new(DecodeRequest { batches }).inject_context();
|
||||||
let response = self
|
let response = self.stub.decode(request).await?.into_inner();
|
||||||
.stub
|
|
||||||
.decode(request)
|
|
||||||
.instrument(info_span!("decode"))
|
|
||||||
.await?
|
|
||||||
.into_inner();
|
|
||||||
Ok((response.generations, response.batch))
|
Ok((response.generations, response.batch))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,21 +17,25 @@ use tonic::Status;
|
||||||
|
|
||||||
#[derive(Error, Debug, Clone)]
|
#[derive(Error, Debug, Clone)]
|
||||||
pub enum ClientError {
|
pub enum ClientError {
|
||||||
#[error("Could not connect to Text Generation server: {0:?}")]
|
#[error("Could not connect to Text Generation server: {0}")]
|
||||||
Connection(String),
|
Connection(String),
|
||||||
#[error("Server error: {0:?}")]
|
#[error("Server error: {0}")]
|
||||||
Generation(String),
|
Generation(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Status> for ClientError {
|
impl From<Status> for ClientError {
|
||||||
fn from(err: Status) -> Self {
|
fn from(err: Status) -> Self {
|
||||||
Self::Generation(err.message().to_string())
|
let err = Self::Generation(err.message().to_string());
|
||||||
|
tracing::error!("{err}");
|
||||||
|
err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<transport::Error> for ClientError {
|
impl From<transport::Error> for ClientError {
|
||||||
fn from(err: transport::Error) -> Self {
|
fn from(err: transport::Error) -> Self {
|
||||||
Self::Connection(err.to_string())
|
let err = Self::Connection(err.to_string());
|
||||||
|
tracing::error!("{err}");
|
||||||
|
err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ use crate::{Batch, Client, Generation};
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
use futures::future::select_all;
|
use futures::future::select_all;
|
||||||
use tonic::transport::Uri;
|
use tonic::transport::Uri;
|
||||||
|
use tracing::instrument;
|
||||||
|
|
||||||
/// Text Generation Inference gRPC multi client
|
/// Text Generation Inference gRPC multi client
|
||||||
pub struct ShardedClient {
|
pub struct ShardedClient {
|
||||||
|
@ -38,6 +39,7 @@ impl ShardedClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clear the past generations cache
|
/// Clear the past generations cache
|
||||||
|
#[instrument(skip(self))]
|
||||||
pub async fn clear_cache(&mut self) -> Result<()> {
|
pub async fn clear_cache(&mut self) -> Result<()> {
|
||||||
let futures: Vec<_> = self
|
let futures: Vec<_> = self
|
||||||
.clients
|
.clients
|
||||||
|
@ -51,6 +53,7 @@ impl ShardedClient {
|
||||||
///
|
///
|
||||||
/// Returns Generation for each request in batch
|
/// Returns Generation for each request in batch
|
||||||
/// and the next cached batch
|
/// and the next cached batch
|
||||||
|
#[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]
|
||||||
pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
|
pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
|
||||||
let futures: Vec<_> = self
|
let futures: Vec<_> = self
|
||||||
.clients
|
.clients
|
||||||
|
@ -66,6 +69,7 @@ impl ShardedClient {
|
||||||
///
|
///
|
||||||
/// Returns Generation for each request in batches
|
/// Returns Generation for each request in batches
|
||||||
/// and the next cached batch
|
/// and the next cached batch
|
||||||
|
#[instrument(skip_all, fields(size = batches.iter().map(|batch|{batch.size}).sum::<u32>()))]
|
||||||
pub async fn decode(
|
pub async fn decode(
|
||||||
&mut self,
|
&mut self,
|
||||||
batches: Vec<Batch>,
|
batches: Vec<Batch>,
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
[package]
|
||||||
|
name = "grpc-metadata"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
opentelemetry = "0.18.0"
|
||||||
|
tonic = "^0.8"
|
||||||
|
tracing = "^0.1"
|
||||||
|
tracing-opentelemetry = "0.18.0"
|
|
@ -0,0 +1,62 @@
|
||||||
|
//! A crate to extract and inject a OpenTelemetry context from and to a gRPC request.
|
||||||
|
//! Inspired by: https://github.com/open-telemetry/opentelemetry-rust gRPC examples
|
||||||
|
|
||||||
|
use opentelemetry::global;
|
||||||
|
use opentelemetry::propagation::{Extractor, Injector};
|
||||||
|
use tracing_opentelemetry::OpenTelemetrySpanExt;
|
||||||
|
|
||||||
|
/// Extract context metadata from a gRPC request's metadata
|
||||||
|
struct MetadataExtractor<'a>(pub &'a tonic::metadata::MetadataMap);
|
||||||
|
|
||||||
|
impl<'a> Extractor for MetadataExtractor<'a> {
|
||||||
|
/// Get a value for a key from the MetadataMap. If the value can't be converted to &str, returns None
|
||||||
|
fn get(&self, key: &str) -> Option<&str> {
|
||||||
|
self.0.get(key).and_then(|metadata| metadata.to_str().ok())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect all the keys from the MetadataMap.
|
||||||
|
fn keys(&self) -> Vec<&str> {
|
||||||
|
self.0
|
||||||
|
.keys()
|
||||||
|
.map(|key| match key {
|
||||||
|
tonic::metadata::KeyRef::Ascii(v) => v.as_str(),
|
||||||
|
tonic::metadata::KeyRef::Binary(v) => v.as_str(),
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inject context in the metadata of a gRPC request.
|
||||||
|
struct MetadataInjector<'a>(pub &'a mut tonic::metadata::MetadataMap);
|
||||||
|
|
||||||
|
impl<'a> Injector for MetadataInjector<'a> {
|
||||||
|
/// Set a key and value in the MetadataMap. Does nothing if the key or value are not valid inputs
|
||||||
|
fn set(&mut self, key: &str, value: String) {
|
||||||
|
if let Ok(key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes()) {
|
||||||
|
if let Ok(val) = value.parse() {
|
||||||
|
self.0.insert(key, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a context from the global context and inject the span into a gRPC request's metadata.
|
||||||
|
fn inject(metadata: &mut tonic::metadata::MetadataMap) {
|
||||||
|
global::get_text_map_propagator(|propagator| {
|
||||||
|
propagator.inject_context(
|
||||||
|
&tracing::Span::current().context(),
|
||||||
|
&mut MetadataInjector(metadata),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait InjectTelemetryContext {
|
||||||
|
fn inject_context(self) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> InjectTelemetryContext for tonic::Request<T> {
|
||||||
|
fn inject_context(mut self) -> Self {
|
||||||
|
inject(self.metadata_mut());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,7 +13,7 @@ use tokio::sync::{mpsc, Notify, Semaphore, TryAcquireError};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
use tracing::instrument;
|
use tracing::{info_span, instrument, Instrument, Span};
|
||||||
|
|
||||||
/// Inference struct
|
/// Inference struct
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -69,13 +69,21 @@ impl Infer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a new request to the queue and return a stream of InferStreamResponse
|
/// Add a new request to the queue and return a stream of InferStreamResponse
|
||||||
|
#[instrument(skip(self))]
|
||||||
pub(crate) async fn generate_stream(
|
pub(crate) async fn generate_stream(
|
||||||
&self,
|
&self,
|
||||||
request: GenerateRequest,
|
request: GenerateRequest,
|
||||||
) -> Result<UnboundedReceiverStream<Result<InferStreamResponse, InferError>>, InferError> {
|
) -> Result<UnboundedReceiverStream<Result<InferStreamResponse, InferError>>, InferError> {
|
||||||
// Limit concurrent requests by acquiring a permit from the semaphore
|
// Limit concurrent requests by acquiring a permit from the semaphore
|
||||||
// This permit will live as long as Entry
|
// This permit will live as long as Entry
|
||||||
let permit = self.clone().limit_concurrent_requests.try_acquire_owned()?;
|
let permit = self
|
||||||
|
.clone()
|
||||||
|
.limit_concurrent_requests
|
||||||
|
.try_acquire_owned()
|
||||||
|
.map_err(|err| {
|
||||||
|
tracing::error!("{err}");
|
||||||
|
err
|
||||||
|
})?;
|
||||||
|
|
||||||
// Validate request
|
// Validate request
|
||||||
let valid_request = self.validation.validate(request).await?;
|
let valid_request = self.validation.validate(request).await?;
|
||||||
|
@ -87,7 +95,9 @@ impl Infer {
|
||||||
self.queue.append(Entry {
|
self.queue.append(Entry {
|
||||||
request: valid_request,
|
request: valid_request,
|
||||||
response_tx,
|
response_tx,
|
||||||
time: Instant::now(),
|
span: Span::current(),
|
||||||
|
temp_span: None,
|
||||||
|
queue_time: Instant::now(),
|
||||||
batch_time: None,
|
batch_time: None,
|
||||||
_permit: permit,
|
_permit: permit,
|
||||||
});
|
});
|
||||||
|
@ -101,6 +111,7 @@ impl Infer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a new request to the queue and return a InferResponse
|
/// Add a new request to the queue and return a InferResponse
|
||||||
|
#[instrument(skip(self))]
|
||||||
pub(crate) async fn generate(
|
pub(crate) async fn generate(
|
||||||
&self,
|
&self,
|
||||||
request: GenerateRequest,
|
request: GenerateRequest,
|
||||||
|
@ -160,7 +171,9 @@ impl Infer {
|
||||||
start,
|
start,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
Err(InferError::IncompleteGeneration)
|
let err = InferError::IncompleteGeneration;
|
||||||
|
tracing::error!("{err}");
|
||||||
|
Err(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -169,7 +182,6 @@ impl Infer {
|
||||||
/// Will be launched in a background Tokio task
|
/// Will be launched in a background Tokio task
|
||||||
///
|
///
|
||||||
/// Batches requests and sends them to the inference server
|
/// Batches requests and sends them to the inference server
|
||||||
#[instrument(skip(client, queue, shared))]
|
|
||||||
async fn batching_task(
|
async fn batching_task(
|
||||||
mut client: ShardedClient,
|
mut client: ShardedClient,
|
||||||
max_batch_size: usize,
|
max_batch_size: usize,
|
||||||
|
@ -188,8 +200,10 @@ async fn batching_task(
|
||||||
// Get the next batch from the queue
|
// Get the next batch from the queue
|
||||||
// This batch might be smaller than the maximum batch size if there are not enough requests
|
// This batch might be smaller than the maximum batch size if there are not enough requests
|
||||||
// waiting in the queue
|
// waiting in the queue
|
||||||
while let Some((mut entries, batch)) = queue.next_batch(None, max_batch_size).await {
|
while let Some((mut entries, batch, span)) = queue.next_batch(None, max_batch_size).await {
|
||||||
let mut cached_batch = wrap_future(client.prefill(batch), &mut entries).await;
|
let mut cached_batch = wrap_future(client.prefill(batch), &mut entries)
|
||||||
|
.instrument(span)
|
||||||
|
.await;
|
||||||
let mut waiting_tokens = 1;
|
let mut waiting_tokens = 1;
|
||||||
|
|
||||||
// We loop until we do not receive any cached batch from the inference server (== until
|
// We loop until we do not receive any cached batch from the inference server (== until
|
||||||
|
@ -210,13 +224,27 @@ async fn batching_task(
|
||||||
};
|
};
|
||||||
|
|
||||||
// Try to get a new batch
|
// Try to get a new batch
|
||||||
if let Some((mut new_entries, new_batch)) = queue
|
if let Some((mut new_entries, new_batch, span)) = queue
|
||||||
.next_batch(min_size, max_batch_size - batch_size as usize)
|
.next_batch(min_size, max_batch_size - batch_size as usize)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
let new_batch_size = new_batch.size;
|
||||||
|
entries.iter_mut().for_each(|(_, entry)| {
|
||||||
|
// Create a new span to add the info that this entry is waiting
|
||||||
|
// because a new batch is being computed
|
||||||
|
let entry_waiting_span =
|
||||||
|
info_span!(parent: &entry.span, "waiting", batch_size = new_batch_size);
|
||||||
|
// Add relationship
|
||||||
|
entry_waiting_span.follows_from(&span);
|
||||||
|
// Update entry
|
||||||
|
entry.temp_span = Some(entry_waiting_span);
|
||||||
|
});
|
||||||
|
|
||||||
// Generate one token for this new batch to have the attention past in cache
|
// Generate one token for this new batch to have the attention past in cache
|
||||||
let new_cached_batch =
|
let new_cached_batch =
|
||||||
wrap_future(client.prefill(new_batch), &mut new_entries).await;
|
wrap_future(client.prefill(new_batch), &mut new_entries)
|
||||||
|
.instrument(span)
|
||||||
|
.await;
|
||||||
// Reset waiting counter
|
// Reset waiting counter
|
||||||
waiting_tokens = 1;
|
waiting_tokens = 1;
|
||||||
// Extend current batch with the new batch
|
// Extend current batch with the new batch
|
||||||
|
@ -226,8 +254,23 @@ async fn batching_task(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Create span for this batch to add context to inference calls
|
||||||
|
let next_batch_size = entries.len();
|
||||||
|
let next_batch_span =
|
||||||
|
info_span!(parent: None, "batch", batch_size = next_batch_size);
|
||||||
|
entries.iter_mut().for_each(|(_, entry)| {
|
||||||
|
// Create a new span to link the batch back to this entry
|
||||||
|
let entry_batch_span =
|
||||||
|
info_span!(parent: &entry.span, "infer", batch_size = next_batch_size);
|
||||||
|
// Add relationship
|
||||||
|
entry_batch_span.follows_from(&next_batch_span);
|
||||||
|
// Update entry
|
||||||
|
entry.temp_span = Some(entry_batch_span);
|
||||||
|
});
|
||||||
|
|
||||||
cached_batch = wrap_future(client.decode(batches), &mut entries).await;
|
cached_batch = wrap_future(client.decode(batches), &mut entries)
|
||||||
|
.instrument(next_batch_span)
|
||||||
|
.await;
|
||||||
waiting_tokens += 1;
|
waiting_tokens += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -235,6 +278,7 @@ async fn batching_task(
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Wrap a future inside a match statement to handle errors and send the responses to Infer
|
/// Wrap a future inside a match statement to handle errors and send the responses to Infer
|
||||||
|
#[instrument(skip_all)]
|
||||||
async fn wrap_future(
|
async fn wrap_future(
|
||||||
future: impl Future<Output = Result<(Vec<Generation>, Option<Batch>), ClientError>>,
|
future: impl Future<Output = Result<(Vec<Generation>, Option<Batch>), ClientError>>,
|
||||||
entries: &mut IntMap<u64, Entry>,
|
entries: &mut IntMap<u64, Entry>,
|
||||||
|
@ -246,24 +290,31 @@ async fn wrap_future(
|
||||||
}
|
}
|
||||||
// If we have an error, we discard the whole batch
|
// If we have an error, we discard the whole batch
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
send_error(err, entries);
|
send_errors(err, entries);
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send errors to Infer for all `entries`
|
/// Send errors to Infer for all `entries`
|
||||||
fn send_error(error: ClientError, entries: &mut IntMap<u64, Entry>) {
|
#[instrument(skip_all)]
|
||||||
|
fn send_errors(error: ClientError, entries: &mut IntMap<u64, Entry>) {
|
||||||
entries.drain().for_each(|(_, entry)| {
|
entries.drain().for_each(|(_, entry)| {
|
||||||
|
// Create and enter a span to link this function back to the entry
|
||||||
|
let _send_error_span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_error").entered();
|
||||||
|
let err = InferError::GenerationError(error.to_string());
|
||||||
|
tracing::error!("{err}");
|
||||||
|
|
||||||
// unwrap_or is valid here as we don't care if the receiver is gone.
|
// unwrap_or is valid here as we don't care if the receiver is gone.
|
||||||
entry
|
entry
|
||||||
.response_tx
|
.response_tx
|
||||||
.send(Err(InferError::GenerationError(error.to_string())))
|
.send(Err(err))
|
||||||
.unwrap_or(());
|
.unwrap_or(());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send one or multiple `InferStreamResponse` to Infer for all `entries`
|
/// Send one or multiple `InferStreamResponse` to Infer for all `entries`
|
||||||
|
#[instrument(skip_all)]
|
||||||
fn send_generations(generations: Vec<Generation>, entries: &mut IntMap<u64, Entry>) {
|
fn send_generations(generations: Vec<Generation>, entries: &mut IntMap<u64, Entry>) {
|
||||||
generations.into_iter().for_each(|generation| {
|
generations.into_iter().for_each(|generation| {
|
||||||
// Get entry
|
// Get entry
|
||||||
|
@ -272,6 +323,9 @@ fn send_generations(generations: Vec<Generation>, entries: &mut IntMap<u64, Entr
|
||||||
.get(&generation.request_id)
|
.get(&generation.request_id)
|
||||||
.expect("ID not found in entries. This is a bug.");
|
.expect("ID not found in entries. This is a bug.");
|
||||||
|
|
||||||
|
// Create and enter a span to link this function back to the entry
|
||||||
|
let _generation_span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_generation", generation = ?generation).entered();
|
||||||
|
|
||||||
if let Some(prefill_tokens) = generation.prefill_tokens {
|
if let Some(prefill_tokens) = generation.prefill_tokens {
|
||||||
// Send message
|
// Send message
|
||||||
// unwrap_or is valid here as we don't care if the receiver is gone.
|
// unwrap_or is valid here as we don't care if the receiver is gone.
|
||||||
|
@ -302,7 +356,7 @@ fn send_generations(generations: Vec<Generation>, entries: &mut IntMap<u64, Entr
|
||||||
.send(Ok(InferStreamResponse::End {
|
.send(Ok(InferStreamResponse::End {
|
||||||
token,
|
token,
|
||||||
generated_text,
|
generated_text,
|
||||||
queued: entry.time,
|
queued: entry.queue_time,
|
||||||
start: entry.batch_time.unwrap(),
|
start: entry.batch_time.unwrap(),
|
||||||
}))
|
}))
|
||||||
.unwrap_or(());
|
.unwrap_or(());
|
||||||
|
|
|
@ -1,9 +1,18 @@
|
||||||
use clap::Parser;
|
|
||||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
|
||||||
/// Text Generation Inference webserver entrypoint
|
/// Text Generation Inference webserver entrypoint
|
||||||
|
use clap::Parser;
|
||||||
|
use opentelemetry::sdk::propagation::TraceContextPropagator;
|
||||||
|
use opentelemetry::sdk::trace;
|
||||||
|
use opentelemetry::sdk::trace::Sampler;
|
||||||
|
use opentelemetry::sdk::Resource;
|
||||||
|
use opentelemetry::{global, KeyValue};
|
||||||
|
use opentelemetry_otlp::WithExportConfig;
|
||||||
|
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
|
||||||
use text_generation_client::ShardedClient;
|
use text_generation_client::ShardedClient;
|
||||||
use text_generation_router::server;
|
use text_generation_router::server;
|
||||||
use tokenizers::Tokenizer;
|
use tokenizers::Tokenizer;
|
||||||
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
|
use tracing_subscriber::util::SubscriberInitExt;
|
||||||
|
use tracing_subscriber::{EnvFilter, Layer};
|
||||||
|
|
||||||
/// App Configuration
|
/// App Configuration
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
|
@ -27,6 +36,8 @@ struct Args {
|
||||||
validation_workers: usize,
|
validation_workers: usize,
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
json_output: bool,
|
json_output: bool,
|
||||||
|
#[clap(long, env)]
|
||||||
|
otlp_endpoint: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<(), std::io::Error> {
|
fn main() -> Result<(), std::io::Error> {
|
||||||
|
@ -43,14 +54,9 @@ fn main() -> Result<(), std::io::Error> {
|
||||||
tokenizer_name,
|
tokenizer_name,
|
||||||
validation_workers,
|
validation_workers,
|
||||||
json_output,
|
json_output,
|
||||||
|
otlp_endpoint,
|
||||||
} = args;
|
} = args;
|
||||||
|
|
||||||
if json_output {
|
|
||||||
tracing_subscriber::fmt().json().init();
|
|
||||||
} else {
|
|
||||||
tracing_subscriber::fmt().compact().init();
|
|
||||||
}
|
|
||||||
|
|
||||||
if validation_workers == 0 {
|
if validation_workers == 0 {
|
||||||
panic!("validation_workers must be > 0");
|
panic!("validation_workers must be > 0");
|
||||||
}
|
}
|
||||||
|
@ -67,6 +73,8 @@ fn main() -> Result<(), std::io::Error> {
|
||||||
.build()
|
.build()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.block_on(async {
|
.block_on(async {
|
||||||
|
init_logging(otlp_endpoint, json_output);
|
||||||
|
|
||||||
// Instantiate sharded client from the master unix socket
|
// Instantiate sharded client from the master unix socket
|
||||||
let mut sharded_client = ShardedClient::connect_uds(master_shard_uds_path)
|
let mut sharded_client = ShardedClient::connect_uds(master_shard_uds_path)
|
||||||
.await
|
.await
|
||||||
|
@ -96,3 +104,58 @@ fn main() -> Result<(), std::io::Error> {
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Init logging using env variables LOG_LEVEL and LOG_FORMAT:
|
||||||
|
/// - otlp_endpoint is an optional URL to an Open Telemetry collector
|
||||||
|
/// - LOG_LEVEL may be TRACE, DEBUG, INFO, WARN or ERROR (default to INFO)
|
||||||
|
/// - LOG_FORMAT may be TEXT or JSON (default to TEXT)
|
||||||
|
fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
|
||||||
|
let mut layers = Vec::new();
|
||||||
|
|
||||||
|
// STDOUT/STDERR layer
|
||||||
|
let fmt_layer = tracing_subscriber::fmt::layer()
|
||||||
|
.with_file(true)
|
||||||
|
.with_line_number(true);
|
||||||
|
|
||||||
|
let fmt_layer = match json_output {
|
||||||
|
true => fmt_layer.json().flatten_event(true).boxed(),
|
||||||
|
false => fmt_layer.boxed(),
|
||||||
|
};
|
||||||
|
layers.push(fmt_layer);
|
||||||
|
|
||||||
|
// OpenTelemetry tracing layer
|
||||||
|
if let Some(otlp_endpoint) = otlp_endpoint {
|
||||||
|
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||||
|
|
||||||
|
let tracer = opentelemetry_otlp::new_pipeline()
|
||||||
|
.tracing()
|
||||||
|
.with_exporter(
|
||||||
|
opentelemetry_otlp::new_exporter()
|
||||||
|
.tonic()
|
||||||
|
.with_endpoint(otlp_endpoint),
|
||||||
|
)
|
||||||
|
.with_trace_config(
|
||||||
|
trace::config()
|
||||||
|
.with_resource(Resource::new(vec![KeyValue::new(
|
||||||
|
"service.name",
|
||||||
|
"text-generation-inference.router",
|
||||||
|
)]))
|
||||||
|
.with_sampler(Sampler::AlwaysOn),
|
||||||
|
)
|
||||||
|
.install_batch(opentelemetry::runtime::Tokio);
|
||||||
|
|
||||||
|
if let Ok(tracer) = tracer {
|
||||||
|
layers.push(tracing_opentelemetry::layer().with_tracer(tracer).boxed());
|
||||||
|
axum_tracing_opentelemetry::init_propagator().unwrap();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter events with LOG_LEVEL
|
||||||
|
let env_filter =
|
||||||
|
EnvFilter::try_from_env("LOG_LEVEL").unwrap_or_else(|_| EnvFilter::new("info"));
|
||||||
|
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(env_filter)
|
||||||
|
.with(layers)
|
||||||
|
.init();
|
||||||
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ use text_generation_client::{Batch, Request};
|
||||||
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
|
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
|
||||||
use tokio::sync::{mpsc, oneshot, OwnedSemaphorePermit};
|
use tokio::sync::{mpsc, oneshot, OwnedSemaphorePermit};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
use tracing::{info_span, instrument, Span};
|
||||||
|
|
||||||
/// Queue entry
|
/// Queue entry
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -15,8 +16,12 @@ pub(crate) struct Entry {
|
||||||
pub request: ValidGenerateRequest,
|
pub request: ValidGenerateRequest,
|
||||||
/// Response sender to communicate between the Infer struct and the batching_task
|
/// Response sender to communicate between the Infer struct and the batching_task
|
||||||
pub response_tx: UnboundedSender<Result<InferStreamResponse, InferError>>,
|
pub response_tx: UnboundedSender<Result<InferStreamResponse, InferError>>,
|
||||||
/// Instant when this entry was created
|
/// Span that will live as long as entry
|
||||||
pub time: Instant,
|
pub span: Span,
|
||||||
|
/// Temporary span used as a guard when logging inference, wait times...
|
||||||
|
pub temp_span: Option<Span>,
|
||||||
|
/// Instant when this entry was queued
|
||||||
|
pub queue_time: Instant,
|
||||||
/// Instant when this entry was added to a batch
|
/// Instant when this entry was added to a batch
|
||||||
pub batch_time: Option<Instant>,
|
pub batch_time: Option<Instant>,
|
||||||
/// Permit
|
/// Permit
|
||||||
|
@ -42,13 +47,17 @@ impl Queue {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Append an entry to the queue
|
/// Append an entry to the queue
|
||||||
|
#[instrument(skip_all)]
|
||||||
pub(crate) fn append(&self, entry: Entry) {
|
pub(crate) fn append(&self, entry: Entry) {
|
||||||
// Send append command to the background task managing the state
|
// Send append command to the background task managing the state
|
||||||
// Unwrap is safe here
|
// Unwrap is safe here
|
||||||
self.queue_sender.send(QueueCommand::Append(entry)).unwrap();
|
self.queue_sender
|
||||||
|
.send(QueueCommand::Append(entry, Span::current()))
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the next batch
|
// Get the next batch
|
||||||
|
#[instrument(skip(self))]
|
||||||
pub(crate) async fn next_batch(
|
pub(crate) async fn next_batch(
|
||||||
&self,
|
&self,
|
||||||
min_size: Option<usize>,
|
min_size: Option<usize>,
|
||||||
|
@ -63,6 +72,7 @@ impl Queue {
|
||||||
min_size,
|
min_size,
|
||||||
max_size,
|
max_size,
|
||||||
response_sender,
|
response_sender,
|
||||||
|
span: Span::current(),
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// Await on response channel
|
// Await on response channel
|
||||||
|
@ -77,15 +87,16 @@ async fn queue_task(mut receiver: UnboundedReceiver<QueueCommand>) {
|
||||||
|
|
||||||
while let Some(cmd) = receiver.recv().await {
|
while let Some(cmd) = receiver.recv().await {
|
||||||
match cmd {
|
match cmd {
|
||||||
QueueCommand::Append(entry) => state.append(entry),
|
QueueCommand::Append(entry, span) => span.in_scope(|| state.append(entry)),
|
||||||
QueueCommand::NextBatch {
|
QueueCommand::NextBatch {
|
||||||
min_size,
|
min_size,
|
||||||
max_size,
|
max_size,
|
||||||
response_sender,
|
response_sender,
|
||||||
} => {
|
span,
|
||||||
|
} => span.in_scope(|| {
|
||||||
let next_batch = state.next_batch(min_size, max_size);
|
let next_batch = state.next_batch(min_size, max_size);
|
||||||
response_sender.send(next_batch).unwrap_or(());
|
response_sender.send(next_batch).unwrap_or(());
|
||||||
}
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -113,7 +124,12 @@ impl State {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Append an entry to the queue
|
/// Append an entry to the queue
|
||||||
fn append(&mut self, entry: Entry) {
|
fn append(&mut self, mut entry: Entry) {
|
||||||
|
// Create a span that will live as long as the entry is in the queue waiting to be batched
|
||||||
|
let queue_span = info_span!(parent: &entry.span, "queued");
|
||||||
|
entry.temp_span = Some(queue_span);
|
||||||
|
|
||||||
|
// Push entry in the queue
|
||||||
self.entries.push((self.next_id, entry));
|
self.entries.push((self.next_id, entry));
|
||||||
self.next_id += 1;
|
self.next_id += 1;
|
||||||
}
|
}
|
||||||
|
@ -133,6 +149,10 @@ impl State {
|
||||||
|
|
||||||
let next_batch_size = min(self.entries.len(), max_size);
|
let next_batch_size = min(self.entries.len(), max_size);
|
||||||
|
|
||||||
|
// Create span for this batch to add context to inference calls
|
||||||
|
let next_batch_span = info_span!(parent: None, "batch", batch_size = next_batch_size);
|
||||||
|
next_batch_span.follows_from(&Span::current());
|
||||||
|
|
||||||
let mut batch_requests = Vec::with_capacity(next_batch_size);
|
let mut batch_requests = Vec::with_capacity(next_batch_size);
|
||||||
let mut batch_entries =
|
let mut batch_entries =
|
||||||
IntMap::with_capacity_and_hasher(next_batch_size, BuildNoHashHasher::default());
|
IntMap::with_capacity_and_hasher(next_batch_size, BuildNoHashHasher::default());
|
||||||
|
@ -141,6 +161,14 @@ impl State {
|
||||||
self.entries
|
self.entries
|
||||||
.drain(..next_batch_size)
|
.drain(..next_batch_size)
|
||||||
.for_each(|(id, mut entry)| {
|
.for_each(|(id, mut entry)| {
|
||||||
|
// Create a new span to link the batch back to this entry
|
||||||
|
let entry_batch_span =
|
||||||
|
info_span!(parent: &entry.span, "infer", batch_size = next_batch_size);
|
||||||
|
// Add relationship
|
||||||
|
entry_batch_span.follows_from(&next_batch_span);
|
||||||
|
// Update entry
|
||||||
|
entry.temp_span = Some(entry_batch_span);
|
||||||
|
|
||||||
batch_requests.push(Request {
|
batch_requests.push(Request {
|
||||||
id,
|
id,
|
||||||
inputs: entry.request.inputs.clone(),
|
inputs: entry.request.inputs.clone(),
|
||||||
|
@ -162,19 +190,20 @@ impl State {
|
||||||
// Increment batch id
|
// Increment batch id
|
||||||
self.next_batch_id += 1;
|
self.next_batch_id += 1;
|
||||||
|
|
||||||
Some((batch_entries, batch))
|
Some((batch_entries, batch, next_batch_span))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type NextBatch = (IntMap<u64, Entry>, Batch);
|
type NextBatch = (IntMap<u64, Entry>, Batch, Span);
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum QueueCommand {
|
enum QueueCommand {
|
||||||
Append(Entry),
|
Append(Entry, Span),
|
||||||
NextBatch {
|
NextBatch {
|
||||||
min_size: Option<usize>,
|
min_size: Option<usize>,
|
||||||
max_size: usize,
|
max_size: usize,
|
||||||
response_sender: oneshot::Sender<Option<NextBatch>>,
|
response_sender: oneshot::Sender<Option<NextBatch>>,
|
||||||
|
span: Span,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,6 +213,7 @@ mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
|
use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
|
||||||
use tokio::sync::{mpsc, Semaphore};
|
use tokio::sync::{mpsc, Semaphore};
|
||||||
|
use tracing::info_span;
|
||||||
|
|
||||||
fn default_entry() -> Entry {
|
fn default_entry() -> Entry {
|
||||||
let semaphore = Arc::new(Semaphore::new(1));
|
let semaphore = Arc::new(Semaphore::new(1));
|
||||||
|
@ -208,7 +238,9 @@ mod tests {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
response_tx,
|
response_tx,
|
||||||
time: Instant::now(),
|
span: info_span!("entry"),
|
||||||
|
temp_span: None,
|
||||||
|
queue_time: Instant::now(),
|
||||||
batch_time: None,
|
batch_time: None,
|
||||||
_permit: permit,
|
_permit: permit,
|
||||||
}
|
}
|
||||||
|
@ -244,7 +276,7 @@ mod tests {
|
||||||
state.append(default_entry());
|
state.append(default_entry());
|
||||||
state.append(default_entry());
|
state.append(default_entry());
|
||||||
|
|
||||||
let (entries, batch) = state.next_batch(None, 2).unwrap();
|
let (entries, batch, _) = state.next_batch(None, 2).unwrap();
|
||||||
assert_eq!(entries.len(), 2);
|
assert_eq!(entries.len(), 2);
|
||||||
assert!(entries.contains_key(&0));
|
assert!(entries.contains_key(&0));
|
||||||
assert!(entries.contains_key(&1));
|
assert!(entries.contains_key(&1));
|
||||||
|
@ -273,7 +305,7 @@ mod tests {
|
||||||
state.append(default_entry());
|
state.append(default_entry());
|
||||||
state.append(default_entry());
|
state.append(default_entry());
|
||||||
|
|
||||||
let (entries, batch) = state.next_batch(None, 1).unwrap();
|
let (entries, batch, _) = state.next_batch(None, 1).unwrap();
|
||||||
assert_eq!(entries.len(), 1);
|
assert_eq!(entries.len(), 1);
|
||||||
assert!(entries.contains_key(&0));
|
assert!(entries.contains_key(&0));
|
||||||
assert_eq!(batch.id, 0);
|
assert_eq!(batch.id, 0);
|
||||||
|
@ -285,7 +317,7 @@ mod tests {
|
||||||
|
|
||||||
state.append(default_entry());
|
state.append(default_entry());
|
||||||
|
|
||||||
let (entries, batch) = state.next_batch(None, 3).unwrap();
|
let (entries, batch, _) = state.next_batch(None, 3).unwrap();
|
||||||
assert_eq!(entries.len(), 2);
|
assert_eq!(entries.len(), 2);
|
||||||
assert!(entries.contains_key(&1));
|
assert!(entries.contains_key(&1));
|
||||||
assert!(entries.contains_key(&2));
|
assert!(entries.contains_key(&2));
|
||||||
|
@ -317,7 +349,7 @@ mod tests {
|
||||||
queue.append(default_entry());
|
queue.append(default_entry());
|
||||||
queue.append(default_entry());
|
queue.append(default_entry());
|
||||||
|
|
||||||
let (entries, batch) = queue.next_batch(None, 2).await.unwrap();
|
let (entries, batch, _) = queue.next_batch(None, 2).await.unwrap();
|
||||||
assert_eq!(entries.len(), 2);
|
assert_eq!(entries.len(), 2);
|
||||||
assert!(entries.contains_key(&0));
|
assert!(entries.contains_key(&0));
|
||||||
assert!(entries.contains_key(&1));
|
assert!(entries.contains_key(&1));
|
||||||
|
@ -337,7 +369,7 @@ mod tests {
|
||||||
queue.append(default_entry());
|
queue.append(default_entry());
|
||||||
queue.append(default_entry());
|
queue.append(default_entry());
|
||||||
|
|
||||||
let (entries, batch) = queue.next_batch(None, 1).await.unwrap();
|
let (entries, batch, _) = queue.next_batch(None, 1).await.unwrap();
|
||||||
assert_eq!(entries.len(), 1);
|
assert_eq!(entries.len(), 1);
|
||||||
assert!(entries.contains_key(&0));
|
assert!(entries.contains_key(&0));
|
||||||
assert_eq!(batch.id, 0);
|
assert_eq!(batch.id, 0);
|
||||||
|
@ -345,7 +377,7 @@ mod tests {
|
||||||
|
|
||||||
queue.append(default_entry());
|
queue.append(default_entry());
|
||||||
|
|
||||||
let (entries, batch) = queue.next_batch(None, 3).await.unwrap();
|
let (entries, batch, _) = queue.next_batch(None, 3).await.unwrap();
|
||||||
assert_eq!(entries.len(), 2);
|
assert_eq!(entries.len(), 2);
|
||||||
assert!(entries.contains_key(&1));
|
assert!(entries.contains_key(&1));
|
||||||
assert!(entries.contains_key(&2));
|
assert!(entries.contains_key(&2));
|
||||||
|
|
|
@ -10,6 +10,7 @@ use axum::response::sse::{Event, KeepAlive, Sse};
|
||||||
use axum::response::IntoResponse;
|
use axum::response::IntoResponse;
|
||||||
use axum::routing::{get, post};
|
use axum::routing::{get, post};
|
||||||
use axum::{Json, Router};
|
use axum::{Json, Router};
|
||||||
|
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use std::convert::Infallible;
|
use std::convert::Infallible;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
|
@ -18,7 +19,7 @@ use tokenizers::Tokenizer;
|
||||||
use tokio::signal;
|
use tokio::signal;
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
use tracing::instrument;
|
use tracing::{info_span, instrument, Instrument};
|
||||||
use utoipa::OpenApi;
|
use utoipa::OpenApi;
|
||||||
use utoipa_swagger_ui::SwaggerUi;
|
use utoipa_swagger_ui::SwaggerUi;
|
||||||
|
|
||||||
|
@ -75,7 +76,7 @@ async fn health(infer: Extension<Infer>) -> Result<(), (StatusCode, Json<ErrorRe
|
||||||
queue_time,
|
queue_time,
|
||||||
inference_time,
|
inference_time,
|
||||||
time_per_token,
|
time_per_token,
|
||||||
seed
|
seed,
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
async fn generate(
|
async fn generate(
|
||||||
|
@ -87,10 +88,7 @@ async fn generate(
|
||||||
|
|
||||||
// Inference
|
// Inference
|
||||||
let details = req.0.parameters.details;
|
let details = req.0.parameters.details;
|
||||||
let response = infer.generate(req.0).await.map_err(|err| {
|
let response = infer.generate(req.0).await?;
|
||||||
tracing::error!("{}", err.to_string());
|
|
||||||
err
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// Token details
|
// Token details
|
||||||
let details = match details {
|
let details = match details {
|
||||||
|
@ -135,11 +133,11 @@ async fn generate(
|
||||||
);
|
);
|
||||||
|
|
||||||
// Tracing metadata
|
// Tracing metadata
|
||||||
span.record("total_time", format!("{:?}", total_time));
|
span.record("total_time", format!("{total_time:?}"));
|
||||||
span.record("validation_time", format!("{:?}", validation_time));
|
span.record("validation_time", format!("{validation_time:?}"));
|
||||||
span.record("queue_time", format!("{:?}", queue_time));
|
span.record("queue_time", format!("{queue_time:?}"));
|
||||||
span.record("inference_time", format!("{:?}", inference_time));
|
span.record("inference_time", format!("{inference_time:?}"));
|
||||||
span.record("time_per_token", format!("{:?}", time_per_token));
|
span.record("time_per_token", format!("{time_per_token:?}"));
|
||||||
span.record("seed", format!("{:?}", response.generated_text.seed));
|
span.record("seed", format!("{:?}", response.generated_text.seed));
|
||||||
tracing::info!("Output: {}", response.generated_text.text);
|
tracing::info!("Output: {}", response.generated_text.text);
|
||||||
|
|
||||||
|
@ -181,7 +179,8 @@ async fn generate(
|
||||||
validation_time,
|
validation_time,
|
||||||
queue_time,
|
queue_time,
|
||||||
inference_time,
|
inference_time,
|
||||||
time_per_token
|
time_per_token,
|
||||||
|
seed,
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
async fn generate_stream(
|
async fn generate_stream(
|
||||||
|
@ -197,7 +196,7 @@ async fn generate_stream(
|
||||||
let mut error = false;
|
let mut error = false;
|
||||||
let details = req.0.parameters.details;
|
let details = req.0.parameters.details;
|
||||||
|
|
||||||
match infer.generate_stream(req.0).await {
|
match infer.generate_stream(req.0).instrument(info_span!(parent: &span, "async_stream")).await {
|
||||||
Ok(mut response_stream) => {
|
Ok(mut response_stream) => {
|
||||||
// Server-Sent Event stream
|
// Server-Sent Event stream
|
||||||
while let Some(response) = response_stream.next().await {
|
while let Some(response) = response_stream.next().await {
|
||||||
|
@ -243,13 +242,11 @@ async fn generate_stream(
|
||||||
|
|
||||||
// Tracing metadata
|
// Tracing metadata
|
||||||
span.record("total_time", format!("{:?}", total_time));
|
span.record("total_time", format!("{:?}", total_time));
|
||||||
span
|
span.record("validation_time", format!("{:?}", validation_time));
|
||||||
.record("validation_time", format!("{:?}", validation_time));
|
|
||||||
span.record("queue_time", format!("{:?}", queue_time));
|
span.record("queue_time", format!("{:?}", queue_time));
|
||||||
span
|
span.record("inference_time", format!("{:?}", inference_time));
|
||||||
.record("inference_time", format!("{:?}", inference_time));
|
span.record("time_per_token", format!("{:?}", time_per_token));
|
||||||
span
|
span.record("seed", format!("{:?}", generated_text.seed));
|
||||||
.record("time_per_token", format!("{:?}", time_per_token));
|
|
||||||
tracing::info!(parent: &span, "Output: {}", generated_text.text);
|
tracing::info!(parent: &span, "Output: {}", generated_text.text);
|
||||||
|
|
||||||
// StreamResponse
|
// StreamResponse
|
||||||
|
@ -264,19 +261,17 @@ async fn generate_stream(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Trace and yield error
|
// yield error
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
error = true;
|
error = true;
|
||||||
tracing::error!("{}", err.to_string());
|
|
||||||
yield Ok(Event::from(err))
|
yield Ok(Event::from(err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
// Trace and yield error
|
// yield error
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
error = true;
|
error = true;
|
||||||
tracing::error!("{}", err.to_string());
|
|
||||||
yield Ok(Event::from(err))
|
yield Ok(Event::from(err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -284,7 +279,7 @@ async fn generate_stream(
|
||||||
// Skip if we already sent an error
|
// Skip if we already sent an error
|
||||||
if !end_reached && !error {
|
if !end_reached && !error {
|
||||||
let err = InferError::IncompleteGeneration;
|
let err = InferError::IncompleteGeneration;
|
||||||
tracing::error!("{}", err.to_string());
|
tracing::error!("{err}");
|
||||||
yield Ok(Event::from(err))
|
yield Ok(Event::from(err))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -355,7 +350,8 @@ pub async fn run(
|
||||||
.route("/generate_stream", post(generate_stream))
|
.route("/generate_stream", post(generate_stream))
|
||||||
.route("/", get(health))
|
.route("/", get(health))
|
||||||
.route("/health", get(health))
|
.route("/health", get(health))
|
||||||
.layer(Extension(infer));
|
.layer(Extension(infer))
|
||||||
|
.layer(opentelemetry_tracing_layer());
|
||||||
|
|
||||||
// Run server
|
// Run server
|
||||||
axum::Server::bind(&addr)
|
axum::Server::bind(&addr)
|
||||||
|
@ -391,6 +387,7 @@ async fn shutdown_signal() {
|
||||||
}
|
}
|
||||||
|
|
||||||
tracing::info!("signal received, starting graceful shutdown");
|
tracing::info!("signal received, starting graceful shutdown");
|
||||||
|
opentelemetry::global::shutdown_tracer_provider();
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<i32> for FinishReason {
|
impl From<i32> for FinishReason {
|
||||||
|
|
|
@ -6,6 +6,7 @@ use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParamet
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokenizers::tokenizer::Tokenizer;
|
use tokenizers::tokenizer::Tokenizer;
|
||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
use tracing::{instrument, Span};
|
||||||
|
|
||||||
const MAX_MAX_NEW_TOKENS: u32 = 512;
|
const MAX_MAX_NEW_TOKENS: u32 = 512;
|
||||||
const MAX_STOP_SEQUENCES: usize = 4;
|
const MAX_STOP_SEQUENCES: usize = 4;
|
||||||
|
@ -36,6 +37,7 @@ impl Validation {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Validate a payload and get the number of tokens in the input
|
/// Validate a payload and get the number of tokens in the input
|
||||||
|
#[instrument(skip_all)]
|
||||||
pub(crate) async fn validate(
|
pub(crate) async fn validate(
|
||||||
&self,
|
&self,
|
||||||
request: GenerateRequest,
|
request: GenerateRequest,
|
||||||
|
@ -44,7 +46,10 @@ impl Validation {
|
||||||
let (sender, receiver) = oneshot::channel();
|
let (sender, receiver) = oneshot::channel();
|
||||||
// Send request to the background validation task
|
// Send request to the background validation task
|
||||||
// Unwrap is safe here
|
// Unwrap is safe here
|
||||||
self.sender.send((request, sender)).await.unwrap();
|
self.sender
|
||||||
|
.send((request, sender, Span::current()))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
// Await on response channel
|
// Await on response channel
|
||||||
// Unwrap is safe here
|
// Unwrap is safe here
|
||||||
receiver.await.unwrap()
|
receiver.await.unwrap()
|
||||||
|
@ -97,10 +102,17 @@ fn validation_worker(
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
// Loop over requests
|
// Loop over requests
|
||||||
while let Some((request, response_tx)) = receiver.blocking_recv() {
|
while let Some((request, response_tx, parent_span)) = receiver.blocking_recv() {
|
||||||
response_tx
|
parent_span.in_scope(|| {
|
||||||
.send(validate(request, &tokenizer, max_input_length, &mut rng))
|
response_tx
|
||||||
.unwrap_or(())
|
.send(
|
||||||
|
validate(request, &tokenizer, max_input_length, &mut rng).map_err(|err| {
|
||||||
|
tracing::error!("{err}");
|
||||||
|
err
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.unwrap_or(())
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,6 +215,7 @@ fn validate(
|
||||||
type ValidationRequest = (
|
type ValidationRequest = (
|
||||||
GenerateRequest,
|
GenerateRequest,
|
||||||
oneshot::Sender<Result<ValidGenerateRequest, ValidationError>>,
|
oneshot::Sender<Result<ValidGenerateRequest, ValidationError>>,
|
||||||
|
Span,
|
||||||
);
|
);
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
[toolchain]
|
[toolchain]
|
||||||
channel = "1.65.0"
|
channel = "1.67.0"
|
||||||
components = ["rustfmt", "clippy"]
|
components = ["rustfmt", "clippy"]
|
|
@ -1,6 +1,6 @@
|
||||||
gen-server:
|
gen-server:
|
||||||
# Compile protos
|
# Compile protos
|
||||||
pip install grpcio-tools==1.49.1 --no-cache-dir
|
pip install grpcio-tools==1.51.1 --no-cache-dir
|
||||||
mkdir text_generation/pb || true
|
mkdir text_generation/pb || true
|
||||||
python -m grpc_tools.protoc -I../proto --python_out=text_generation/pb --grpc_python_out=text_generation/pb ../proto/generate.proto
|
python -m grpc_tools.protoc -I../proto --python_out=text_generation/pb --grpc_python_out=text_generation/pb ../proto/generate.proto
|
||||||
find text_generation/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
find text_generation/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
||||||
|
|
|
@ -39,6 +39,14 @@ tests = ["attrs[tests-no-zope]", "zope.interface"]
|
||||||
tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=0.971,<0.990)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=0.971,<0.990)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||||
tests_no_zope = ["cloudpickle", "hypothesis", "mypy (>=0.971,<0.990)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
tests_no_zope = ["cloudpickle", "hypothesis", "mypy (>=0.971,<0.990)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "backoff"
|
||||||
|
version = "2.2.1"
|
||||||
|
description = "Function decoration for backoff and retry"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7,<4.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitsandbytes"
|
name = "bitsandbytes"
|
||||||
version = "0.35.4"
|
version = "0.35.4"
|
||||||
|
@ -47,6 +55,22 @@ category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "certifi"
|
||||||
|
version = "2022.12.7"
|
||||||
|
description = "Python package for providing Mozilla's CA Bundle."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "charset-normalizer"
|
||||||
|
version = "3.0.1"
|
||||||
|
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "click"
|
name = "click"
|
||||||
version = "8.1.3"
|
version = "8.1.3"
|
||||||
|
@ -66,6 +90,20 @@ category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "Deprecated"
|
||||||
|
version = "1.2.13"
|
||||||
|
description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
wrapt = ">=1.10,<2"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["PyTest", "PyTest (<5)", "PyTest-Cov", "PyTest-Cov (<2.6)", "bump2version (<1)", "configparser (<5)", "importlib-metadata (<3)", "importlib-resources (<4)", "sphinx (<2)", "sphinxcontrib-websupport (<2)", "tox", "zipp (<2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "exceptiongroup"
|
name = "exceptiongroup"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
|
@ -154,6 +192,14 @@ grpcio = ">=1.51.1"
|
||||||
protobuf = ">=4.21.6,<5.0dev"
|
protobuf = ">=4.21.6,<5.0dev"
|
||||||
setuptools = "*"
|
setuptools = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "idna"
|
||||||
|
version = "3.4"
|
||||||
|
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "iniconfig"
|
name = "iniconfig"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
|
@ -179,7 +225,7 @@ dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "numpy"
|
name = "numpy"
|
||||||
version = "1.24.1"
|
version = "1.24.2"
|
||||||
description = "Fundamental package for array computing in Python"
|
description = "Fundamental package for array computing in Python"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -233,6 +279,133 @@ python-versions = ">=3"
|
||||||
setuptools = "*"
|
setuptools = "*"
|
||||||
wheel = "*"
|
wheel = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-api"
|
||||||
|
version = "1.15.0"
|
||||||
|
description = "OpenTelemetry Python API"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
deprecated = ">=1.2.6"
|
||||||
|
setuptools = ">=16.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-exporter-otlp"
|
||||||
|
version = "1.15.0"
|
||||||
|
description = "OpenTelemetry Collector Exporters"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
opentelemetry-exporter-otlp-proto-grpc = "1.15.0"
|
||||||
|
opentelemetry-exporter-otlp-proto-http = "1.15.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-exporter-otlp-proto-grpc"
|
||||||
|
version = "1.15.0"
|
||||||
|
description = "OpenTelemetry Collector Protobuf over gRPC Exporter"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""}
|
||||||
|
googleapis-common-protos = ">=1.52,<2.0"
|
||||||
|
grpcio = ">=1.0.0,<2.0.0"
|
||||||
|
opentelemetry-api = ">=1.12,<2.0"
|
||||||
|
opentelemetry-proto = "1.15.0"
|
||||||
|
opentelemetry-sdk = ">=1.12,<2.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
test = ["pytest-grpc"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-exporter-otlp-proto-http"
|
||||||
|
version = "1.15.0"
|
||||||
|
description = "OpenTelemetry Collector Protobuf over HTTP Exporter"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""}
|
||||||
|
googleapis-common-protos = ">=1.52,<2.0"
|
||||||
|
opentelemetry-api = ">=1.12,<2.0"
|
||||||
|
opentelemetry-proto = "1.15.0"
|
||||||
|
opentelemetry-sdk = ">=1.12,<2.0"
|
||||||
|
requests = ">=2.7,<3.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
test = ["responses (==0.22.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-instrumentation"
|
||||||
|
version = "0.36b0"
|
||||||
|
description = "Instrumentation Tools & Auto Instrumentation for OpenTelemetry Python"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
opentelemetry-api = ">=1.4,<2.0"
|
||||||
|
setuptools = ">=16.0"
|
||||||
|
wrapt = ">=1.0.0,<2.0.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-instrumentation-grpc"
|
||||||
|
version = "0.36b0"
|
||||||
|
description = "OpenTelemetry gRPC instrumentation"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
opentelemetry-api = ">=1.12,<2.0"
|
||||||
|
opentelemetry-instrumentation = "0.36b0"
|
||||||
|
opentelemetry-sdk = ">=1.12,<2.0"
|
||||||
|
opentelemetry-semantic-conventions = "0.36b0"
|
||||||
|
wrapt = ">=1.0.0,<2.0.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
instruments = ["grpcio (>=1.27,<2.0)"]
|
||||||
|
test = ["opentelemetry-instrumentation-grpc[instruments]", "opentelemetry-sdk (>=1.12,<2.0)", "opentelemetry-test-utils (==0.36b0)", "protobuf (>=3.13,<4.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-proto"
|
||||||
|
version = "1.15.0"
|
||||||
|
description = "OpenTelemetry Python Proto"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
protobuf = ">=3.19,<5.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-sdk"
|
||||||
|
version = "1.15.0"
|
||||||
|
description = "OpenTelemetry Python SDK"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
opentelemetry-api = "1.15.0"
|
||||||
|
opentelemetry-semantic-conventions = "0.36b0"
|
||||||
|
setuptools = ">=16.0"
|
||||||
|
typing-extensions = ">=3.7.4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opentelemetry-semantic-conventions"
|
||||||
|
version = "0.36b0"
|
||||||
|
description = "OpenTelemetry Semantic Conventions"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packaging"
|
name = "packaging"
|
||||||
version = "23.0"
|
version = "23.0"
|
||||||
|
@ -300,6 +473,24 @@ category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "requests"
|
||||||
|
version = "2.28.2"
|
||||||
|
description = "Python HTTP for Humans."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7, <4"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
certifi = ">=2017.4.17"
|
||||||
|
charset-normalizer = ">=2,<4"
|
||||||
|
idna = ">=2.5,<4"
|
||||||
|
urllib3 = ">=1.21.1,<1.27"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||||
|
use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "safetensors"
|
name = "safetensors"
|
||||||
version = "0.2.8"
|
version = "0.2.8"
|
||||||
|
@ -320,7 +511,7 @@ torch = ["torch"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "setuptools"
|
name = "setuptools"
|
||||||
version = "67.0.0"
|
version = "67.2.0"
|
||||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -382,6 +573,19 @@ category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "urllib3"
|
||||||
|
version = "1.26.14"
|
||||||
|
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
|
||||||
|
secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
|
||||||
|
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wheel"
|
name = "wheel"
|
||||||
version = "0.38.4"
|
version = "0.38.4"
|
||||||
|
@ -404,13 +608,21 @@ python-versions = ">=3.5"
|
||||||
[package.extras]
|
[package.extras]
|
||||||
dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
|
dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wrapt"
|
||||||
|
version = "1.14.1"
|
||||||
|
description = "Module for decorators, wrappers and monkey patching."
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
bnb = ["bitsandbytes"]
|
bnb = ["bitsandbytes"]
|
||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "c920067f39ade631d1022c0560c3a4336c82d8c28355509bdfff751bbcfc01cd"
|
content-hash = "f3cab6881b52045770a90ec9be7415a0ee499d9e980892d544f68073700cf321"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
accelerate = [
|
accelerate = [
|
||||||
|
@ -421,10 +633,108 @@ attrs = [
|
||||||
{file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
|
{file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"},
|
||||||
{file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
|
{file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"},
|
||||||
]
|
]
|
||||||
|
backoff = [
|
||||||
|
{file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
|
||||||
|
{file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
|
||||||
|
]
|
||||||
bitsandbytes = [
|
bitsandbytes = [
|
||||||
{file = "bitsandbytes-0.35.4-py3-none-any.whl", hash = "sha256:201f168538ccfbd7594568a2f86c149cec8352782301076a15a783695ecec7fb"},
|
{file = "bitsandbytes-0.35.4-py3-none-any.whl", hash = "sha256:201f168538ccfbd7594568a2f86c149cec8352782301076a15a783695ecec7fb"},
|
||||||
{file = "bitsandbytes-0.35.4.tar.gz", hash = "sha256:b23db6b91cd73cb14faf9841a66bffa5c1722f9b8b57039ef2fb461ac22dd2a6"},
|
{file = "bitsandbytes-0.35.4.tar.gz", hash = "sha256:b23db6b91cd73cb14faf9841a66bffa5c1722f9b8b57039ef2fb461ac22dd2a6"},
|
||||||
]
|
]
|
||||||
|
certifi = [
|
||||||
|
{file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
|
||||||
|
{file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
|
||||||
|
]
|
||||||
|
charset-normalizer = [
|
||||||
|
{file = "charset-normalizer-3.0.1.tar.gz", hash = "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-win32.whl", hash = "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-win32.whl", hash = "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-win32.whl", hash = "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-win32.whl", hash = "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-win32.whl", hash = "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-win32.whl", hash = "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8"},
|
||||||
|
{file = "charset_normalizer-3.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59"},
|
||||||
|
{file = "charset_normalizer-3.0.1-py3-none-any.whl", hash = "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24"},
|
||||||
|
]
|
||||||
click = [
|
click = [
|
||||||
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
|
{file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
|
||||||
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
|
{file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
|
||||||
|
@ -433,6 +743,10 @@ colorama = [
|
||||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||||
]
|
]
|
||||||
|
Deprecated = [
|
||||||
|
{file = "Deprecated-1.2.13-py2.py3-none-any.whl", hash = "sha256:64756e3e14c8c5eea9795d93c524551432a0be75629f8f29e67ab8caf076c76d"},
|
||||||
|
{file = "Deprecated-1.2.13.tar.gz", hash = "sha256:43ac5335da90c31c24ba028af536a91d41d53f9e6901ddb021bcc572ce44e38d"},
|
||||||
|
]
|
||||||
exceptiongroup = [
|
exceptiongroup = [
|
||||||
{file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"},
|
{file = "exceptiongroup-1.1.0-py3-none-any.whl", hash = "sha256:327cbda3da756e2de031a3107b81ab7b3770a602c4d16ca618298c526f4bec1e"},
|
||||||
{file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"},
|
{file = "exceptiongroup-1.1.0.tar.gz", hash = "sha256:bcb67d800a4497e1b404c2dd44fca47d3b7a5e5433dbab67f96c1a685cdfdf23"},
|
||||||
|
@ -547,6 +861,10 @@ grpcio-tools = [
|
||||||
{file = "grpcio_tools-1.51.1-cp39-cp39-win32.whl", hash = "sha256:40ef70e8c5d0310dedff9af502b520b4c7e215bce94094527fb959150a0c594a"},
|
{file = "grpcio_tools-1.51.1-cp39-cp39-win32.whl", hash = "sha256:40ef70e8c5d0310dedff9af502b520b4c7e215bce94094527fb959150a0c594a"},
|
||||||
{file = "grpcio_tools-1.51.1-cp39-cp39-win_amd64.whl", hash = "sha256:15b8acf4eaa0ebe37e2f69108de49efd935b7abe9c7e58ba737490b99906aa76"},
|
{file = "grpcio_tools-1.51.1-cp39-cp39-win_amd64.whl", hash = "sha256:15b8acf4eaa0ebe37e2f69108de49efd935b7abe9c7e58ba737490b99906aa76"},
|
||||||
]
|
]
|
||||||
|
idna = [
|
||||||
|
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
|
||||||
|
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
|
||||||
|
]
|
||||||
iniconfig = [
|
iniconfig = [
|
||||||
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
||||||
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
||||||
|
@ -556,34 +874,34 @@ loguru = [
|
||||||
{file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"},
|
{file = "loguru-0.6.0.tar.gz", hash = "sha256:066bd06758d0a513e9836fd9c6b5a75bfb3fd36841f4b996bc60b547a309d41c"},
|
||||||
]
|
]
|
||||||
numpy = [
|
numpy = [
|
||||||
{file = "numpy-1.24.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:179a7ef0889ab769cc03573b6217f54c8bd8e16cef80aad369e1e8185f994cd7"},
|
{file = "numpy-1.24.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eef70b4fc1e872ebddc38cddacc87c19a3709c0e3e5d20bf3954c147b1dd941d"},
|
||||||
{file = "numpy-1.24.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b09804ff570b907da323b3d762e74432fb07955701b17b08ff1b5ebaa8cfe6a9"},
|
{file = "numpy-1.24.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d2859428712785e8a8b7d2b3ef0a1d1565892367b32f915c4a4df44d0e64f5"},
|
||||||
{file = "numpy-1.24.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1b739841821968798947d3afcefd386fa56da0caf97722a5de53e07c4ccedc7"},
|
{file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6524630f71631be2dabe0c541e7675db82651eb998496bbe16bc4f77f0772253"},
|
||||||
{file = "numpy-1.24.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e3463e6ac25313462e04aea3fb8a0a30fb906d5d300f58b3bc2c23da6a15398"},
|
{file = "numpy-1.24.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a51725a815a6188c662fb66fb32077709a9ca38053f0274640293a14fdd22978"},
|
||||||
{file = "numpy-1.24.1-cp310-cp310-win32.whl", hash = "sha256:b31da69ed0c18be8b77bfce48d234e55d040793cebb25398e2a7d84199fbc7e2"},
|
{file = "numpy-1.24.2-cp310-cp310-win32.whl", hash = "sha256:2620e8592136e073bd12ee4536149380695fbe9ebeae845b81237f986479ffc9"},
|
||||||
{file = "numpy-1.24.1-cp310-cp310-win_amd64.whl", hash = "sha256:b07b40f5fb4fa034120a5796288f24c1fe0e0580bbfff99897ba6267af42def2"},
|
{file = "numpy-1.24.2-cp310-cp310-win_amd64.whl", hash = "sha256:97cf27e51fa078078c649a51d7ade3c92d9e709ba2bfb97493007103c741f1d0"},
|
||||||
{file = "numpy-1.24.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7094891dcf79ccc6bc2a1f30428fa5edb1e6fb955411ffff3401fb4ea93780a8"},
|
{file = "numpy-1.24.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7de8fdde0003f4294655aa5d5f0a89c26b9f22c0a58790c38fae1ed392d44a5a"},
|
||||||
{file = "numpy-1.24.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28e418681372520c992805bb723e29d69d6b7aa411065f48216d8329d02ba032"},
|
{file = "numpy-1.24.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4173bde9fa2a005c2c6e2ea8ac1618e2ed2c1c6ec8a7657237854d42094123a0"},
|
||||||
{file = "numpy-1.24.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e274f0f6c7efd0d577744f52032fdd24344f11c5ae668fe8d01aac0422611df1"},
|
{file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cecaed30dc14123020f77b03601559fff3e6cd0c048f8b5289f4eeabb0eb281"},
|
||||||
{file = "numpy-1.24.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0044f7d944ee882400890f9ae955220d29b33d809a038923d88e4e01d652acd9"},
|
{file = "numpy-1.24.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a23f8440561a633204a67fb44617ce2a299beecf3295f0d13c495518908e910"},
|
||||||
{file = "numpy-1.24.1-cp311-cp311-win32.whl", hash = "sha256:442feb5e5bada8408e8fcd43f3360b78683ff12a4444670a7d9e9824c1817d36"},
|
{file = "numpy-1.24.2-cp311-cp311-win32.whl", hash = "sha256:e428c4fbfa085f947b536706a2fc349245d7baa8334f0c5723c56a10595f9b95"},
|
||||||
{file = "numpy-1.24.1-cp311-cp311-win_amd64.whl", hash = "sha256:de92efa737875329b052982e37bd4371d52cabf469f83e7b8be9bb7752d67e51"},
|
{file = "numpy-1.24.2-cp311-cp311-win_amd64.whl", hash = "sha256:557d42778a6869c2162deb40ad82612645e21d79e11c1dc62c6e82a2220ffb04"},
|
||||||
{file = "numpy-1.24.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b162ac10ca38850510caf8ea33f89edcb7b0bb0dfa5592d59909419986b72407"},
|
{file = "numpy-1.24.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d0a2db9d20117bf523dde15858398e7c0858aadca7c0f088ac0d6edd360e9ad2"},
|
||||||
{file = "numpy-1.24.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26089487086f2648944f17adaa1a97ca6aee57f513ba5f1c0b7ebdabbe2b9954"},
|
{file = "numpy-1.24.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c72a6b2f4af1adfe193f7beb91ddf708ff867a3f977ef2ec53c0ffb8283ab9f5"},
|
||||||
{file = "numpy-1.24.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:caf65a396c0d1f9809596be2e444e3bd4190d86d5c1ce21f5fc4be60a3bc5b36"},
|
{file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c29e6bd0ec49a44d7690ecb623a8eac5ab8a923bce0bea6293953992edf3a76a"},
|
||||||
{file = "numpy-1.24.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0677a52f5d896e84414761531947c7a330d1adc07c3a4372262f25d84af7bf7"},
|
{file = "numpy-1.24.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2eabd64ddb96a1239791da78fa5f4e1693ae2dadc82a76bc76a14cbb2b966e96"},
|
||||||
{file = "numpy-1.24.1-cp38-cp38-win32.whl", hash = "sha256:dae46bed2cb79a58d6496ff6d8da1e3b95ba09afeca2e277628171ca99b99db1"},
|
{file = "numpy-1.24.2-cp38-cp38-win32.whl", hash = "sha256:e3ab5d32784e843fc0dd3ab6dcafc67ef806e6b6828dc6af2f689be0eb4d781d"},
|
||||||
{file = "numpy-1.24.1-cp38-cp38-win_amd64.whl", hash = "sha256:6ec0c021cd9fe732e5bab6401adea5a409214ca5592cd92a114f7067febcba0c"},
|
{file = "numpy-1.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:76807b4063f0002c8532cfeac47a3068a69561e9c8715efdad3c642eb27c0756"},
|
||||||
{file = "numpy-1.24.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:28bc9750ae1f75264ee0f10561709b1462d450a4808cd97c013046073ae64ab6"},
|
{file = "numpy-1.24.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4199e7cfc307a778f72d293372736223e39ec9ac096ff0a2e64853b866a8e18a"},
|
||||||
{file = "numpy-1.24.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:84e789a085aabef2f36c0515f45e459f02f570c4b4c4c108ac1179c34d475ed7"},
|
{file = "numpy-1.24.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:adbdce121896fd3a17a77ab0b0b5eedf05a9834a18699db6829a64e1dfccca7f"},
|
||||||
{file = "numpy-1.24.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e669fbdcdd1e945691079c2cae335f3e3a56554e06bbd45d7609a6cf568c700"},
|
{file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889b2cc88b837d86eda1b17008ebeb679d82875022200c6e8e4ce6cf549b7acb"},
|
||||||
{file = "numpy-1.24.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef85cf1f693c88c1fd229ccd1055570cb41cdf4875873b7728b6301f12cd05bf"},
|
{file = "numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64bb98ac59b3ea3bf74b02f13836eb2e24e48e0ab0145bbda646295769bd780"},
|
||||||
{file = "numpy-1.24.1-cp39-cp39-win32.whl", hash = "sha256:87a118968fba001b248aac90e502c0b13606721b1343cdaddbc6e552e8dfb56f"},
|
{file = "numpy-1.24.2-cp39-cp39-win32.whl", hash = "sha256:63e45511ee4d9d976637d11e6c9864eae50e12dc9598f531c035265991910468"},
|
||||||
{file = "numpy-1.24.1-cp39-cp39-win_amd64.whl", hash = "sha256:ddc7ab52b322eb1e40521eb422c4e0a20716c271a306860979d450decbb51b8e"},
|
{file = "numpy-1.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:a77d3e1163a7770164404607b7ba3967fb49b24782a6ef85d9b5f54126cc39e5"},
|
||||||
{file = "numpy-1.24.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed5fb71d79e771ec930566fae9c02626b939e37271ec285e9efaf1b5d4370e7d"},
|
{file = "numpy-1.24.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92011118955724465fb6853def593cf397b4a1367495e0b59a7e69d40c4eb71d"},
|
||||||
{file = "numpy-1.24.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad2925567f43643f51255220424c23d204024ed428afc5aad0f86f3ffc080086"},
|
{file = "numpy-1.24.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9006288bcf4895917d02583cf3411f98631275bc67cce355a7f39f8c14338fa"},
|
||||||
{file = "numpy-1.24.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:cfa1161c6ac8f92dea03d625c2d0c05e084668f4a06568b77a25a89111621566"},
|
{file = "numpy-1.24.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:150947adbdfeceec4e5926d956a06865c1c690f2fd902efede4ca6fe2e657c3f"},
|
||||||
{file = "numpy-1.24.1.tar.gz", hash = "sha256:2386da9a471cc00a1f47845e27d916d5ec5346ae9696e01a8a34760858fe9dd2"},
|
{file = "numpy-1.24.2.tar.gz", hash = "sha256:003a9f530e880cb2cd177cba1af7220b9aa42def9c4afc2a2fc3ee6be7eb2b22"},
|
||||||
]
|
]
|
||||||
nvidia-cublas-cu11 = [
|
nvidia-cublas-cu11 = [
|
||||||
{file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"},
|
{file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"},
|
||||||
|
@ -602,6 +920,42 @@ nvidia-cudnn-cu11 = [
|
||||||
{file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"},
|
{file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"},
|
||||||
{file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"},
|
{file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"},
|
||||||
]
|
]
|
||||||
|
opentelemetry-api = [
|
||||||
|
{file = "opentelemetry_api-1.15.0-py3-none-any.whl", hash = "sha256:e6c2d2e42140fd396e96edf75a7ceb11073f4efb4db87565a431cc9d0f93f2e0"},
|
||||||
|
{file = "opentelemetry_api-1.15.0.tar.gz", hash = "sha256:79ab791b4aaad27acc3dc3ba01596db5b5aac2ef75c70622c6038051d6c2cded"},
|
||||||
|
]
|
||||||
|
opentelemetry-exporter-otlp = [
|
||||||
|
{file = "opentelemetry_exporter_otlp-1.15.0-py3-none-any.whl", hash = "sha256:79f22748b6a54808a0448093dfa189c8490e729f67c134d4c992533d9393b33e"},
|
||||||
|
{file = "opentelemetry_exporter_otlp-1.15.0.tar.gz", hash = "sha256:4f7c49751d9720e2e726e13b0bb958ccade4e29122c305d92c033da432c8d2c5"},
|
||||||
|
]
|
||||||
|
opentelemetry-exporter-otlp-proto-grpc = [
|
||||||
|
{file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0-py3-none-any.whl", hash = "sha256:c2a5492ba7d140109968135d641d06ce3c5bd73c50665f787526065d57d7fd1d"},
|
||||||
|
{file = "opentelemetry_exporter_otlp_proto_grpc-1.15.0.tar.gz", hash = "sha256:844f2a4bb9bcda34e4eb6fe36765e5031aacb36dc60ed88c90fc246942ea26e7"},
|
||||||
|
]
|
||||||
|
opentelemetry-exporter-otlp-proto-http = [
|
||||||
|
{file = "opentelemetry_exporter_otlp_proto_http-1.15.0-py3-none-any.whl", hash = "sha256:3ec2a02196c8a54bf5cbf7fe623a5238625638e83b6047a983bdf96e2bbb74c0"},
|
||||||
|
{file = "opentelemetry_exporter_otlp_proto_http-1.15.0.tar.gz", hash = "sha256:11b2c814249a49b22f6cca7a06b05701f561d577b747f3660dfd67b6eb9daf9c"},
|
||||||
|
]
|
||||||
|
opentelemetry-instrumentation = [
|
||||||
|
{file = "opentelemetry_instrumentation-0.36b0-py3-none-any.whl", hash = "sha256:83ba4ae7d5292b5b33e0f851cc5c76d8f91196b9b3527800fc13855c33383ac2"},
|
||||||
|
{file = "opentelemetry_instrumentation-0.36b0.tar.gz", hash = "sha256:e3ddac9b3b93408ef26c8ecbf38f717042977e16381bb4cd329a5b4cf16998cf"},
|
||||||
|
]
|
||||||
|
opentelemetry-instrumentation-grpc = [
|
||||||
|
{file = "opentelemetry_instrumentation_grpc-0.36b0-py3-none-any.whl", hash = "sha256:eaa246ed2083c97b13bab2555cb9d170e8433230a31476c4cab8a17fa03380a4"},
|
||||||
|
{file = "opentelemetry_instrumentation_grpc-0.36b0.tar.gz", hash = "sha256:dc89447c9eb6ea868970f6c13b4ffdac182cdd5a41dd215a0f5393ca6375be55"},
|
||||||
|
]
|
||||||
|
opentelemetry-proto = [
|
||||||
|
{file = "opentelemetry_proto-1.15.0-py3-none-any.whl", hash = "sha256:044b6d044b4d10530f250856f933442b8753a17f94ae37c207607f733fb9a844"},
|
||||||
|
{file = "opentelemetry_proto-1.15.0.tar.gz", hash = "sha256:9c4008e40ac8cab359daac283fbe7002c5c29c77ea2674ad5626a249e64e0101"},
|
||||||
|
]
|
||||||
|
opentelemetry-sdk = [
|
||||||
|
{file = "opentelemetry_sdk-1.15.0-py3-none-any.whl", hash = "sha256:555c533e9837766119bbccc7a80458c9971d853a6f1da683a2246cd5e53b4645"},
|
||||||
|
{file = "opentelemetry_sdk-1.15.0.tar.gz", hash = "sha256:98dbffcfeebcbff12c0c974292d6ea603180a145904cf838b1fe4d5c99078425"},
|
||||||
|
]
|
||||||
|
opentelemetry-semantic-conventions = [
|
||||||
|
{file = "opentelemetry_semantic_conventions-0.36b0-py3-none-any.whl", hash = "sha256:adc05635e87b9d3e007c9f530eed487fc3ef2177d02f82f674f28ebf9aff8243"},
|
||||||
|
{file = "opentelemetry_semantic_conventions-0.36b0.tar.gz", hash = "sha256:829dc221795467d98b773c04096e29be038d77526dc8d6ac76f546fb6279bf01"},
|
||||||
|
]
|
||||||
packaging = [
|
packaging = [
|
||||||
{file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
|
{file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"},
|
||||||
{file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
|
{file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"},
|
||||||
|
@ -688,6 +1042,10 @@ PyYAML = [
|
||||||
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
|
{file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
|
||||||
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
|
{file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
|
||||||
]
|
]
|
||||||
|
requests = [
|
||||||
|
{file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
|
||||||
|
{file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
|
||||||
|
]
|
||||||
safetensors = [
|
safetensors = [
|
||||||
{file = "safetensors-0.2.8-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:8df8af89a0b6b535b47c077e33e5cd4941ef4b067e7c1dd1a05647dec0cf2eea"},
|
{file = "safetensors-0.2.8-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:8df8af89a0b6b535b47c077e33e5cd4941ef4b067e7c1dd1a05647dec0cf2eea"},
|
||||||
{file = "safetensors-0.2.8-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9cabae651542b22d229b6439029fb4a3abc7868cb123af336b2877541ad9ab39"},
|
{file = "safetensors-0.2.8-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9cabae651542b22d229b6439029fb4a3abc7868cb123af336b2877541ad9ab39"},
|
||||||
|
@ -731,8 +1089,8 @@ safetensors = [
|
||||||
{file = "safetensors-0.2.8.tar.gz", hash = "sha256:2720b20a6a38c799dca79bd76caeeac2f7df585a9d4f7d59fa7e28eff9ccb27f"},
|
{file = "safetensors-0.2.8.tar.gz", hash = "sha256:2720b20a6a38c799dca79bd76caeeac2f7df585a9d4f7d59fa7e28eff9ccb27f"},
|
||||||
]
|
]
|
||||||
setuptools = [
|
setuptools = [
|
||||||
{file = "setuptools-67.0.0-py3-none-any.whl", hash = "sha256:9d790961ba6219e9ff7d9557622d2fe136816a264dd01d5997cfc057d804853d"},
|
{file = "setuptools-67.2.0-py3-none-any.whl", hash = "sha256:16ccf598aab3b506593c17378473978908a2734d7336755a8769b480906bec1c"},
|
||||||
{file = "setuptools-67.0.0.tar.gz", hash = "sha256:883131c5b6efa70b9101c7ef30b2b7b780a4283d5fc1616383cdf22c83cbefe6"},
|
{file = "setuptools-67.2.0.tar.gz", hash = "sha256:b440ee5f7e607bb8c9de15259dba2583dd41a38879a7abc1d43a71c59524da48"},
|
||||||
]
|
]
|
||||||
tomli = [
|
tomli = [
|
||||||
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
{file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
|
||||||
|
@ -769,6 +1127,10 @@ typing-extensions = [
|
||||||
{file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"},
|
{file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"},
|
||||||
{file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"},
|
{file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"},
|
||||||
]
|
]
|
||||||
|
urllib3 = [
|
||||||
|
{file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"},
|
||||||
|
{file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"},
|
||||||
|
]
|
||||||
wheel = [
|
wheel = [
|
||||||
{file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"},
|
{file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"},
|
||||||
{file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"},
|
{file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"},
|
||||||
|
@ -777,3 +1139,69 @@ win32-setctime = [
|
||||||
{file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
|
{file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
|
||||||
{file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
|
{file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
|
||||||
]
|
]
|
||||||
|
wrapt = [
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462"},
|
||||||
|
{file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
|
||||||
|
{file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
|
||||||
|
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
|
||||||
|
{file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
|
||||||
|
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
|
||||||
|
{file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d"},
|
||||||
|
{file = "wrapt-1.14.1-cp35-cp35m-win32.whl", hash = "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7"},
|
||||||
|
{file = "wrapt-1.14.1-cp35-cp35m-win_amd64.whl", hash = "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-win32.whl", hash = "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed"},
|
||||||
|
{file = "wrapt-1.14.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853"},
|
||||||
|
{file = "wrapt-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-win32.whl", hash = "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5"},
|
||||||
|
{file = "wrapt-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-win32.whl", hash = "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5"},
|
||||||
|
{file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"},
|
||||||
|
{file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"},
|
||||||
|
]
|
||||||
|
|
|
@ -19,12 +19,15 @@ accelerate = "^0.15.0"
|
||||||
bitsandbytes = "^0.35.1"
|
bitsandbytes = "^0.35.1"
|
||||||
safetensors = "^0.2.4"
|
safetensors = "^0.2.4"
|
||||||
loguru = "^0.6.0"
|
loguru = "^0.6.0"
|
||||||
|
opentelemetry-api = "^1.15.0"
|
||||||
|
opentelemetry-exporter-otlp = "^1.15.0"
|
||||||
|
opentelemetry-instrumentation-grpc = "^0.36b0"
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
bnb = ["bitsandbytes"]
|
bnb = ["bitsandbytes"]
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
grpcio-tools = "^1.49.1"
|
grpcio-tools = "^1.51.1"
|
||||||
pytest = "^7.2.0"
|
pytest = "^7.2.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|
|
@ -7,6 +7,7 @@ from loguru import logger
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from text_generation import server, utils
|
from text_generation import server, utils
|
||||||
|
from text_generation.tracing import setup_tracing
|
||||||
|
|
||||||
app = typer.Typer()
|
app = typer.Typer()
|
||||||
|
|
||||||
|
@ -20,18 +21,8 @@ def serve(
|
||||||
uds_path: Path = "/tmp/text-generation",
|
uds_path: Path = "/tmp/text-generation",
|
||||||
logger_level: str = "INFO",
|
logger_level: str = "INFO",
|
||||||
json_output: bool = False,
|
json_output: bool = False,
|
||||||
|
otlp_endpoint: Optional[str] = None,
|
||||||
):
|
):
|
||||||
# Remove default handler
|
|
||||||
logger.remove()
|
|
||||||
logger.add(
|
|
||||||
sys.stdout,
|
|
||||||
format="{message}",
|
|
||||||
filter="text_generation",
|
|
||||||
level=logger_level,
|
|
||||||
serialize=json_output,
|
|
||||||
backtrace=True,
|
|
||||||
diagnose=False,
|
|
||||||
)
|
|
||||||
if sharded:
|
if sharded:
|
||||||
assert (
|
assert (
|
||||||
os.getenv("RANK", None) is not None
|
os.getenv("RANK", None) is not None
|
||||||
|
@ -46,6 +37,21 @@ def serve(
|
||||||
os.getenv("MASTER_PORT", None) is not None
|
os.getenv("MASTER_PORT", None) is not None
|
||||||
), "MASTER_PORT must be set when sharded is True"
|
), "MASTER_PORT must be set when sharded is True"
|
||||||
|
|
||||||
|
# Remove default handler
|
||||||
|
logger.remove()
|
||||||
|
logger.add(
|
||||||
|
sys.stdout,
|
||||||
|
format="{message}",
|
||||||
|
filter="text_generation",
|
||||||
|
level=logger_level,
|
||||||
|
serialize=json_output,
|
||||||
|
backtrace=True,
|
||||||
|
diagnose=False,
|
||||||
|
)
|
||||||
|
# Setup OpenTelemetry distributed tracing
|
||||||
|
if otlp_endpoint is not None:
|
||||||
|
setup_tracing(shard=os.getenv("RANK", 0), otlp_endpoint=otlp_endpoint)
|
||||||
|
|
||||||
server.serve(model_id, revision, sharded, quantize, uds_path)
|
server.serve(model_id, revision, sharded, quantize, uds_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from opentelemetry import trace
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizerBase
|
from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizerBase
|
||||||
from typing import Optional, Tuple, List, Type
|
from typing import Optional, Tuple, List, Type
|
||||||
|
|
||||||
|
@ -9,6 +10,8 @@ from text_generation.models.types import Batch, PrefillTokens, Generation, Gener
|
||||||
from text_generation.pb import generate_pb2
|
from text_generation.pb import generate_pb2
|
||||||
from text_generation.utils import NextTokenChooser, StoppingCriteria, Sampling
|
from text_generation.utils import NextTokenChooser, StoppingCriteria, Sampling
|
||||||
|
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CausalLMBatch(Batch):
|
class CausalLMBatch(Batch):
|
||||||
|
@ -94,6 +97,7 @@ class CausalLMBatch(Batch):
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@tracer.start_as_current_span("concatenate")
|
||||||
def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch":
|
def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch":
|
||||||
# Used for padding
|
# Used for padding
|
||||||
total_batch_size = sum(batch.size for batch in batches)
|
total_batch_size = sum(batch.size for batch in batches)
|
||||||
|
@ -286,6 +290,7 @@ class CausalLM(Model):
|
||||||
)
|
)
|
||||||
return outputs.logits, outputs.past_key_values
|
return outputs.logits, outputs.past_key_values
|
||||||
|
|
||||||
|
@tracer.start_as_current_span("generate_token")
|
||||||
def generate_token(
|
def generate_token(
|
||||||
self, batch: CausalLMBatch
|
self, batch: CausalLMBatch
|
||||||
) -> Tuple[List[Generation], Optional[CausalLMBatch]]:
|
) -> Tuple[List[Generation], Optional[CausalLMBatch]]:
|
||||||
|
@ -331,8 +336,9 @@ class CausalLM(Model):
|
||||||
all_input_ids,
|
all_input_ids,
|
||||||
) in enumerate(iterator):
|
) in enumerate(iterator):
|
||||||
# Select next token
|
# Select next token
|
||||||
tokens, logprobs = next_token_chooser(all_input_ids.view(1, -1), logits)
|
next_token_id, logprobs = next_token_chooser(
|
||||||
next_token_id = tokens[-1].view(1, 1)
|
all_input_ids.view(1, -1), logits
|
||||||
|
)
|
||||||
|
|
||||||
# Append next token to all tokens
|
# Append next token to all tokens
|
||||||
all_input_ids = torch.cat([all_input_ids, next_token_id])
|
all_input_ids = torch.cat([all_input_ids, next_token_id])
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from opentelemetry import trace
|
||||||
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PreTrainedTokenizerBase
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PreTrainedTokenizerBase
|
||||||
from typing import Optional, Tuple, List, Type
|
from typing import Optional, Tuple, List, Type
|
||||||
|
|
||||||
|
@ -9,6 +10,8 @@ from text_generation.models.types import GeneratedText, Batch, Generation, Prefi
|
||||||
from text_generation.pb import generate_pb2
|
from text_generation.pb import generate_pb2
|
||||||
from text_generation.utils import NextTokenChooser, StoppingCriteria, Sampling
|
from text_generation.utils import NextTokenChooser, StoppingCriteria, Sampling
|
||||||
|
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Seq2SeqLMBatch(Batch):
|
class Seq2SeqLMBatch(Batch):
|
||||||
|
@ -107,6 +110,7 @@ class Seq2SeqLMBatch(Batch):
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@tracer.start_as_current_span("concatenate")
|
||||||
def concatenate(cls, batches: List["Seq2SeqLMBatch"]) -> "Seq2SeqLMBatch":
|
def concatenate(cls, batches: List["Seq2SeqLMBatch"]) -> "Seq2SeqLMBatch":
|
||||||
"""Concatenate multiple batches together by padding internal torch tensors"""
|
"""Concatenate multiple batches together by padding internal torch tensors"""
|
||||||
|
|
||||||
|
@ -361,6 +365,7 @@ class Seq2SeqLM(Model):
|
||||||
outputs.past_key_values,
|
outputs.past_key_values,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@tracer.start_as_current_span("generate_token")
|
||||||
def generate_token(
|
def generate_token(
|
||||||
self, batch: Seq2SeqLMBatch
|
self, batch: Seq2SeqLMBatch
|
||||||
) -> Tuple[List[Generation], Optional[Seq2SeqLMBatch]]:
|
) -> Tuple[List[Generation], Optional[Seq2SeqLMBatch]]:
|
||||||
|
@ -418,7 +423,7 @@ class Seq2SeqLM(Model):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Append next token to decoder tokens
|
# Append next token to decoder tokens
|
||||||
decoder_input_ids = torch.cat([decoder_input_ids, next_token_id])
|
decoder_input_ids = torch.cat([decoder_input_ids, next_token_id.squeeze(1)])
|
||||||
new_decoder_input_length = decoder_input_length + 1
|
new_decoder_input_length = decoder_input_length + 1
|
||||||
|
|
||||||
# Generated token
|
# Generated token
|
||||||
|
|
|
@ -13,6 +13,7 @@ from text_generation.cache import Cache
|
||||||
from text_generation.interceptor import ExceptionInterceptor
|
from text_generation.interceptor import ExceptionInterceptor
|
||||||
from text_generation.models import Model, get_model
|
from text_generation.models import Model, get_model
|
||||||
from text_generation.pb import generate_pb2_grpc, generate_pb2
|
from text_generation.pb import generate_pb2_grpc, generate_pb2
|
||||||
|
from text_generation.tracing import UDSOpenTelemetryAioServerInterceptor
|
||||||
|
|
||||||
|
|
||||||
class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
||||||
|
@ -100,7 +101,12 @@ def serve(
|
||||||
logger.exception("Error when initializing model")
|
logger.exception("Error when initializing model")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
server = aio.server(interceptors=[ExceptionInterceptor()])
|
server = aio.server(
|
||||||
|
interceptors=[
|
||||||
|
ExceptionInterceptor(),
|
||||||
|
UDSOpenTelemetryAioServerInterceptor(),
|
||||||
|
]
|
||||||
|
)
|
||||||
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
||||||
TextGenerationService(model, Cache(), server_urls), server
|
TextGenerationService(model, Cache(), server_urls), server
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
import grpc
|
||||||
|
|
||||||
|
from opentelemetry import trace
|
||||||
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||||
|
from opentelemetry.instrumentation.grpc._aio_server import (
|
||||||
|
OpenTelemetryAioServerInterceptor,
|
||||||
|
)
|
||||||
|
from opentelemetry.semconv.trace import SpanAttributes
|
||||||
|
from opentelemetry.sdk.resources import Resource
|
||||||
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
|
from opentelemetry.sdk.trace.export import (
|
||||||
|
BatchSpanProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UDSOpenTelemetryAioServerInterceptor(OpenTelemetryAioServerInterceptor):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(trace.get_tracer(__name__))
|
||||||
|
|
||||||
|
def _start_span(self, handler_call_details, context, set_status_on_exception=False):
|
||||||
|
"""
|
||||||
|
Rewrite _start_span method to support Unix Domain Socket gRPC contexts
|
||||||
|
"""
|
||||||
|
|
||||||
|
# standard attributes
|
||||||
|
attributes = {
|
||||||
|
SpanAttributes.RPC_SYSTEM: "grpc",
|
||||||
|
SpanAttributes.RPC_GRPC_STATUS_CODE: grpc.StatusCode.OK.value[0],
|
||||||
|
}
|
||||||
|
|
||||||
|
# if we have details about the call, split into service and method
|
||||||
|
if handler_call_details.method:
|
||||||
|
service, method = handler_call_details.method.lstrip("/").split("/", 1)
|
||||||
|
attributes.update(
|
||||||
|
{
|
||||||
|
SpanAttributes.RPC_METHOD: method,
|
||||||
|
SpanAttributes.RPC_SERVICE: service,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# add some attributes from the metadata
|
||||||
|
metadata = dict(context.invocation_metadata())
|
||||||
|
if "user-agent" in metadata:
|
||||||
|
attributes["rpc.user_agent"] = metadata["user-agent"]
|
||||||
|
|
||||||
|
# We use gRPC over a UNIX socket
|
||||||
|
attributes.update({SpanAttributes.NET_TRANSPORT: "unix"})
|
||||||
|
|
||||||
|
return self._tracer.start_as_current_span(
|
||||||
|
name=handler_call_details.method,
|
||||||
|
kind=trace.SpanKind.SERVER,
|
||||||
|
attributes=attributes,
|
||||||
|
set_status_on_exception=set_status_on_exception,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_tracing(shard: int, otlp_endpoint: str):
|
||||||
|
resource = Resource.create(
|
||||||
|
attributes={"service.name": f"text-generation-inference.server-{shard}"}
|
||||||
|
)
|
||||||
|
span_exporter = OTLPSpanExporter(endpoint=otlp_endpoint, insecure=True)
|
||||||
|
span_processor = BatchSpanProcessor(span_exporter)
|
||||||
|
|
||||||
|
trace.set_tracer_provider(TracerProvider(resource=resource))
|
||||||
|
trace.get_tracer_provider().add_span_processor(span_processor)
|
|
@ -36,16 +36,14 @@ class Sampling:
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
|
|
||||||
def __call__(self, logits):
|
def __call__(self, logits):
|
||||||
probs = torch.nn.functional.softmax(logits, dim=-1)
|
probs = torch.nn.functional.softmax(logits)
|
||||||
next_tokens = torch.multinomial(
|
next_tokens = torch.multinomial(probs, num_samples=1, generator=self.generator)
|
||||||
probs, num_samples=1, generator=self.generator
|
|
||||||
).squeeze(1)
|
|
||||||
return next_tokens
|
return next_tokens
|
||||||
|
|
||||||
|
|
||||||
class Greedy:
|
class Greedy:
|
||||||
def __call__(self, logits):
|
def __call__(self, logits):
|
||||||
return logits.argmax(dim=-1)
|
return logits.argmax()
|
||||||
|
|
||||||
|
|
||||||
class NextTokenChooser:
|
class NextTokenChooser:
|
||||||
|
@ -87,8 +85,9 @@ class NextTokenChooser:
|
||||||
logprobs = torch.log_softmax(scores, -1)
|
logprobs = torch.log_softmax(scores, -1)
|
||||||
|
|
||||||
# Choose tokens
|
# Choose tokens
|
||||||
next_ids = self.choice(scores)
|
next_id = self.choice(scores[-1])
|
||||||
return next_ids, logprobs
|
|
||||||
|
return next_id.view(1, 1), logprobs
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pb(
|
def from_pb(
|
||||||
|
@ -163,6 +162,7 @@ def initialize_torch_distributed():
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
from torch.distributed import ProcessGroupNCCL
|
from torch.distributed import ProcessGroupNCCL
|
||||||
|
|
||||||
# Set the device id.
|
# Set the device id.
|
||||||
assert world_size <= torch.cuda.device_count(), "Each process is one gpu"
|
assert world_size <= torch.cuda.device_count(), "Each process is one gpu"
|
||||||
device = rank % torch.cuda.device_count()
|
device = rank % torch.cuda.device_count()
|
||||||
|
@ -181,7 +181,7 @@ def initialize_torch_distributed():
|
||||||
world_size=world_size,
|
world_size=world_size,
|
||||||
rank=rank,
|
rank=rank,
|
||||||
timeout=timedelta(seconds=60),
|
timeout=timedelta(seconds=60),
|
||||||
pg_options=options
|
pg_options=options,
|
||||||
)
|
)
|
||||||
|
|
||||||
return torch.distributed.group.WORLD, rank, world_size
|
return torch.distributed.group.WORLD, rank, world_size
|
||||||
|
|
Loading…
Reference in New Issue