Preping 1.1.0 (#1066)
# What does this PR do? Upgrade all relevant versions and dependencies. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution. Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change. Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost. --> <!-- Remove if not applicable --> Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. <!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @ @OlivierDehaene OR @Narsil -->
This commit is contained in:
parent
8672cad2cb
commit
a049864270
File diff suppressed because it is too large
Load Diff
|
@ -8,7 +8,7 @@ members = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.0.3"
|
version = "1.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Olivier Dehaene"]
|
authors = ["Olivier Dehaene"]
|
||||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||||
|
|
|
@ -87,7 +87,7 @@ The easiest way of getting started is using the official Docker container:
|
||||||
model=tiiuae/falcon-7b-instruct
|
model=tiiuae/falcon-7b-instruct
|
||||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||||
|
|
||||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.3 --model-id $model
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
|
||||||
```
|
```
|
||||||
**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
|
**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
|
||||||
|
|
||||||
|
|
|
@ -14,18 +14,19 @@ name = "text-generation-benchmark"
|
||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
average = "0.13"
|
average = "0.14"
|
||||||
clap = { version = "4.1.4", features = ["derive", "env"] }
|
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||||
crossterm = "0.26"
|
crossterm = "0.27"
|
||||||
float-ord = "0.3.2"
|
float-ord = "0.3.2"
|
||||||
serde = {version = "1.0.142", features = ["derive"]}
|
serde = {version = "1.0.188", features = ["derive"]}
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
tabled = "0.12.0"
|
tabled = "0.14.0"
|
||||||
text-generation-client = { path = "../router/client" }
|
text-generation-client = { path = "../router/client" }
|
||||||
thiserror = "1.0.38"
|
thiserror = "1.0.48"
|
||||||
tokenizers = "0.13.3"
|
tokenizers = { version = "0.14.0", features = ["http"] }
|
||||||
tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "macros"] }
|
||||||
tui = {package = "ratatui", version = "0.20", default-features = false, features = ["crossterm"]}
|
tui = {package = "ratatui", version = "0.23", default-features = false, features = ["crossterm"]}
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
|
hf-hub = "0.3.1"
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ use tokio::sync::mpsc;
|
||||||
use tui::backend::Backend;
|
use tui::backend::Backend;
|
||||||
use tui::layout::{Alignment, Constraint, Direction, Layout};
|
use tui::layout::{Alignment, Constraint, Direction, Layout};
|
||||||
use tui::style::{Color, Modifier, Style};
|
use tui::style::{Color, Modifier, Style};
|
||||||
use tui::text::{Span, Spans};
|
use tui::text::{Line, Span};
|
||||||
use tui::widgets::{
|
use tui::widgets::{
|
||||||
Axis, BarChart, Block, Borders, Chart, Dataset, Gauge, GraphType, Paragraph, Tabs,
|
Axis, BarChart, Block, Borders, Chart, Dataset, Gauge, GraphType, Paragraph, Tabs,
|
||||||
};
|
};
|
||||||
|
@ -244,7 +244,7 @@ impl App {
|
||||||
.batch_size
|
.batch_size
|
||||||
.iter()
|
.iter()
|
||||||
.map(|b| {
|
.map(|b| {
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!("Batch: {b}"),
|
format!("Batch: {b}"),
|
||||||
Style::default().fg(Color::White),
|
Style::default().fg(Color::White),
|
||||||
)])
|
)])
|
||||||
|
@ -468,7 +468,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
|
||||||
// Latency p50/p90/p99 texts
|
// Latency p50/p90/p99 texts
|
||||||
let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
|
let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
|
||||||
for (i, (name, value)) in latency_percentiles.iter().enumerate() {
|
for (i, (name, value)) in latency_percentiles.iter().enumerate() {
|
||||||
let span = Spans::from(vec![Span::styled(
|
let span = Line::from(vec![Span::styled(
|
||||||
format!("{name}: {value:.2} ms"),
|
format!("{name}: {value:.2} ms"),
|
||||||
Style::default().fg(colors[i]),
|
Style::default().fg(colors[i]),
|
||||||
)]);
|
)]);
|
||||||
|
@ -483,16 +483,16 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Average/High/Low spans
|
/// Average/High/Low spans
|
||||||
fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Spans<'a>> {
|
fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Line<'a>> {
|
||||||
vec![
|
vec![
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!(
|
format!(
|
||||||
"Average: {:.2} {unit}",
|
"Average: {:.2} {unit}",
|
||||||
data.iter().sum::<f64>() / data.len() as f64
|
data.iter().sum::<f64>() / data.len() as f64
|
||||||
),
|
),
|
||||||
Style::default().fg(Color::LightBlue),
|
Style::default().fg(Color::LightBlue),
|
||||||
)]),
|
)]),
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!(
|
format!(
|
||||||
"Lowest: {:.2} {unit}",
|
"Lowest: {:.2} {unit}",
|
||||||
data.iter()
|
data.iter()
|
||||||
|
@ -501,7 +501,7 @@ fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Spans<'a>> {
|
||||||
),
|
),
|
||||||
Style::default().fg(Color::Reset),
|
Style::default().fg(Color::Reset),
|
||||||
)]),
|
)]),
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!(
|
format!(
|
||||||
"Highest: {:.2} {unit}",
|
"Highest: {:.2} {unit}",
|
||||||
data.iter()
|
data.iter()
|
||||||
|
|
|
@ -33,7 +33,7 @@ pub async fn run(
|
||||||
watermark: bool,
|
watermark: bool,
|
||||||
do_sample: bool,
|
do_sample: bool,
|
||||||
client: ShardedClient,
|
client: ShardedClient,
|
||||||
) -> Result<(), crossterm::ErrorKind> {
|
) -> Result<(), std::io::Error> {
|
||||||
let parameters = NextTokenChooserParameters {
|
let parameters = NextTokenChooserParameters {
|
||||||
temperature: temperature.unwrap_or(1.0),
|
temperature: temperature.unwrap_or(1.0),
|
||||||
top_k: top_k.unwrap_or(0),
|
top_k: top_k.unwrap_or(0),
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
"name": "Apache 2.0",
|
"name": "Apache 2.0",
|
||||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||||
},
|
},
|
||||||
"version": "1.0.3"
|
"version": "1.1.0"
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"/": {
|
"/": {
|
||||||
|
|
|
@ -19,6 +19,6 @@ docker run --gpus all \
|
||||||
--shm-size 1g \
|
--shm-size 1g \
|
||||||
-e HUGGING_FACE_HUB_TOKEN=$token \
|
-e HUGGING_FACE_HUB_TOKEN=$token \
|
||||||
-p 8080:80 \
|
-p 8080:80 \
|
||||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.1 \
|
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 \
|
||||||
--model-id $model
|
--model-id $model
|
||||||
```
|
```
|
||||||
|
|
|
@ -8,7 +8,7 @@ Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/
|
||||||
model=tiiuae/falcon-7b-instruct
|
model=tiiuae/falcon-7b-instruct
|
||||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||||
|
|
||||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.3 --model-id $model
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tip warning={true}>
|
<Tip warning={true}>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation-integration-tests"
|
name = "text-generation-integration-tests"
|
||||||
version = "1.0.3"
|
version = "1.1.0"
|
||||||
description = "Text Generation Inference integration tests"
|
description = "Text Generation Inference integration tests"
|
||||||
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
||||||
|
|
||||||
|
|
|
@ -7,17 +7,17 @@ authors.workspace = true
|
||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "4.1.4", features = ["derive", "env"] }
|
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||||
ctrlc = { version = "3.2.5", features = ["termination"] }
|
ctrlc = { version = "3.4.1", features = ["termination"] }
|
||||||
nix = "0.26.2"
|
nix = "0.27.1"
|
||||||
serde = { version = "1.0.152", features = ["derive"] }
|
serde = { version = "1.0.188", features = ["derive"] }
|
||||||
serde_json = "1.0.93"
|
serde_json = "1.0.107"
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
float_eq = "1.0.1"
|
float_eq = "1.0.1"
|
||||||
reqwest = { version = "0.11.14", features = ["blocking", "json"] }
|
reqwest = { version = "0.11.20", features = ["blocking", "json"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
vergen = { version = "8.0.0", features = ["build", "cargo", "git", "gitcl", "rustc", "si"] }
|
vergen = { version = "8.2.5", features = ["build", "cargo", "git", "gitcl", "rustc", "si"] }
|
||||||
|
|
|
@ -15,36 +15,38 @@ name = "text-generation-router"
|
||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-stream = "0.3.3"
|
async-stream = "0.3.5"
|
||||||
axum = { version = "0.6.4", features = ["json"] }
|
axum = { version = "0.6.20", features = ["json"] }
|
||||||
axum-tracing-opentelemetry = "0.10.0"
|
axum-tracing-opentelemetry = "0.14.1"
|
||||||
text-generation-client = { path = "client" }
|
text-generation-client = { path = "client" }
|
||||||
clap = { version = "4.1.4", features = ["derive", "env"] }
|
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||||
flume = "0.10.14"
|
flume = "0.11.0"
|
||||||
futures = "0.3.26"
|
futures = "0.3.28"
|
||||||
metrics = "0.21.0"
|
metrics = "0.21.1"
|
||||||
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
|
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
|
||||||
nohash-hasher = "0.2.0"
|
nohash-hasher = "0.2.0"
|
||||||
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
|
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
||||||
opentelemetry-otlp = "0.12.0"
|
opentelemetry-otlp = "0.13.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
reqwest = { version = "0.11.14", features = [] }
|
reqwest = { version = "0.11.20", features = [] }
|
||||||
serde = "1.0.152"
|
serde = "1.0.188"
|
||||||
serde_json = "1.0.93"
|
serde_json = "1.0.107"
|
||||||
thiserror = "1.0.38"
|
thiserror = "1.0.48"
|
||||||
tokenizers = "0.13.3"
|
tokenizers = { version = "0.14.0", features = ["http"] }
|
||||||
tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
||||||
tower-http = { version = "0.4.0", features = ["cors"] }
|
tower-http = { version = "0.4.4", features = ["cors"] }
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-opentelemetry = "0.19.0"
|
tracing-opentelemetry = "0.21.0"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
utoipa = { version = "3.0.1", features = ["axum_extras"] }
|
utoipa = { version = "3.5.0", features = ["axum_extras"] }
|
||||||
utoipa-swagger-ui = { version = "3.0.2", features = ["axum"] }
|
utoipa-swagger-ui = { version = "3.1.5", features = ["axum"] }
|
||||||
ngrok = { version = "0.12.3", features = ["axum"], optional = true }
|
ngrok = { version = "0.13.1", features = ["axum"], optional = true }
|
||||||
|
hf-hub = "0.3.1"
|
||||||
|
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
vergen = { version = "8.0.0", features = ["build", "git", "gitcl"] }
|
vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["ngrok"]
|
default = ["ngrok"]
|
||||||
ngrok = ["dep:ngrok"]
|
ngrok = ["dep:ngrok"]
|
||||||
|
|
|
@ -8,13 +8,13 @@ homepage.workspace = true
|
||||||
[dependencies]
|
[dependencies]
|
||||||
futures = "^0.3"
|
futures = "^0.3"
|
||||||
grpc-metadata = { path = "../grpc-metadata" }
|
grpc-metadata = { path = "../grpc-metadata" }
|
||||||
prost = "^0.11"
|
prost = "^0.12"
|
||||||
thiserror = "^1.0"
|
thiserror = "^1.0"
|
||||||
tokio = { version = "^1.25", features = ["sync"] }
|
tokio = { version = "^1.32", features = ["sync"] }
|
||||||
tonic = "^0.9"
|
tonic = "^0.10"
|
||||||
tower = "^0.4"
|
tower = "^0.4"
|
||||||
tracing = "^0.1"
|
tracing = "^0.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
tonic-build = "0.9.2"
|
tonic-build = "0.10.1"
|
||||||
prost-build = "0.11.6"
|
prost-build = "0.12.1"
|
||||||
|
|
|
@ -4,7 +4,7 @@ version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
opentelemetry = "^0.19"
|
opentelemetry = "^0.20"
|
||||||
tonic = "^0.9"
|
tonic = "^0.10"
|
||||||
tracing = "^0.1"
|
tracing = "^0.1"
|
||||||
tracing-opentelemetry = "^0.19"
|
tracing-opentelemetry = "^0.21"
|
||||||
|
|
|
@ -324,7 +324,7 @@ fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
|
||||||
|
|
||||||
if let Ok(tracer) = tracer {
|
if let Ok(tracer) = tracer {
|
||||||
layers.push(tracing_opentelemetry::layer().with_tracer(tracer).boxed());
|
layers.push(tracing_opentelemetry::layer().with_tracer(tracer).boxed());
|
||||||
axum_tracing_opentelemetry::init_propagator().unwrap();
|
init_tracing_opentelemetry::init_propagator().unwrap();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ use axum::response::sse::{Event, KeepAlive, Sse};
|
||||||
use axum::response::{IntoResponse, Response};
|
use axum::response::{IntoResponse, Response};
|
||||||
use axum::routing::{get, post};
|
use axum::routing::{get, post};
|
||||||
use axum::{http, Json, Router};
|
use axum::{http, Json, Router};
|
||||||
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
use axum_tracing_opentelemetry::middleware::OtelAxumLayer;
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
|
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
|
||||||
|
@ -396,7 +396,7 @@ async fn generate_stream(
|
||||||
// StreamResponse
|
// StreamResponse
|
||||||
let stream_token = StreamResponse {
|
let stream_token = StreamResponse {
|
||||||
token,
|
token,
|
||||||
top_tokens: top_tokens,
|
top_tokens,
|
||||||
generated_text: None,
|
generated_text: None,
|
||||||
details: None,
|
details: None,
|
||||||
};
|
};
|
||||||
|
@ -458,7 +458,7 @@ async fn generate_stream(
|
||||||
|
|
||||||
let stream_token = StreamResponse {
|
let stream_token = StreamResponse {
|
||||||
token,
|
token,
|
||||||
top_tokens: top_tokens,
|
top_tokens,
|
||||||
generated_text: Some(output_text),
|
generated_text: Some(output_text),
|
||||||
details
|
details
|
||||||
};
|
};
|
||||||
|
@ -695,7 +695,7 @@ pub async fn run(
|
||||||
.layer(Extension(compat_return_full_text))
|
.layer(Extension(compat_return_full_text))
|
||||||
.layer(Extension(infer))
|
.layer(Extension(infer))
|
||||||
.layer(Extension(prom_handle.clone()))
|
.layer(Extension(prom_handle.clone()))
|
||||||
.layer(opentelemetry_tracing_layer())
|
.layer(OtelAxumLayer::default())
|
||||||
.layer(cors_layer);
|
.layer(cors_layer);
|
||||||
|
|
||||||
if ngrok {
|
if ngrok {
|
||||||
|
@ -792,7 +792,7 @@ async fn shutdown_signal() {
|
||||||
|
|
||||||
impl From<i32> for FinishReason {
|
impl From<i32> for FinishReason {
|
||||||
fn from(finish_reason: i32) -> Self {
|
fn from(finish_reason: i32) -> Self {
|
||||||
let finish_reason = text_generation_client::FinishReason::from_i32(finish_reason).unwrap();
|
let finish_reason = text_generation_client::FinishReason::try_from(finish_reason).unwrap();
|
||||||
match finish_reason {
|
match finish_reason {
|
||||||
text_generation_client::FinishReason::Length => FinishReason::Length,
|
text_generation_client::FinishReason::Length => FinishReason::Length,
|
||||||
text_generation_client::FinishReason::EosToken => FinishReason::EndOfSequenceToken,
|
text_generation_client::FinishReason::EosToken => FinishReason::EndOfSequenceToken,
|
||||||
|
|
|
@ -276,7 +276,7 @@ impl Validation {
|
||||||
truncate: truncate.unwrap_or(self.max_input_length) as u32,
|
truncate: truncate.unwrap_or(self.max_input_length) as u32,
|
||||||
parameters,
|
parameters,
|
||||||
stopping_parameters,
|
stopping_parameters,
|
||||||
top_n_tokens: top_n_tokens,
|
top_n_tokens,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation-server"
|
name = "text-generation-server"
|
||||||
version = "1.0.3"
|
version = "1.1.0"
|
||||||
description = "Text Generation Inference Python gRPC Server"
|
description = "Text Generation Inference Python gRPC Server"
|
||||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||||
|
|
||||||
|
|
|
@ -9,19 +9,19 @@ certifi==2023.7.22 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.13"
|
charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
|
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
|
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
|
||||||
datasets==2.14.4 ; python_version >= "3.9" and python_version < "3.13"
|
datasets==2.14.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
|
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
|
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
filelock==3.12.3 ; python_version >= "3.9" and python_version < "3.13"
|
filelock==3.12.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
fsspec[http]==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
fsspec[http]==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
googleapis-common-protos==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
|
googleapis-common-protos==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpc-interceptor==0.15.3 ; python_version >= "3.9" and python_version < "3.13"
|
grpc-interceptor==0.15.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpcio-reflection==1.57.0 ; python_version >= "3.9" and python_version < "3.13"
|
grpcio-reflection==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpcio-status==1.57.0 ; python_version >= "3.9" and python_version < "3.13"
|
grpcio-status==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpcio==1.57.0 ; python_version >= "3.9" and python_version < "3.13"
|
grpcio==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
hf-transfer==0.1.3 ; python_version >= "3.9" and python_version < "3.13"
|
hf-transfer==0.1.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
|
huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
|
idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
|
@ -32,7 +32,7 @@ mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
|
multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13"
|
multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
networkx==3.1 ; python_version >= "3.9" and python_version < "3.13"
|
networkx==3.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
numpy==1.25.2 ; python_version >= "3.9" and python_version < "3.13"
|
numpy==1.26.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
|
@ -43,32 +43,32 @@ opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13
|
||||||
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
|
packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pandas==2.0.3 ; python_version >= "3.9" and python_version < "3.13"
|
pandas==2.1.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pillow==10.0.0 ; python_version >= "3.9" and python_version < "3.13"
|
pillow==10.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
protobuf==4.24.2 ; python_version >= "3.9" and python_version < "3.13"
|
protobuf==4.24.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
psutil==5.9.5 ; python_version >= "3.9" and python_version < "3.13"
|
psutil==5.9.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pyarrow==13.0.0 ; python_version >= "3.9" and python_version < "3.13"
|
pyarrow==13.0.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pytz==2023.3 ; python_version >= "3.9" and python_version < "3.13"
|
pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
regex==2023.8.8 ; python_version >= "3.9" and python_version < "3.13"
|
regex==2023.8.8 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
|
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
|
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
scipy==1.11.2 ; python_version >= "3.9" and python_version < "3.13"
|
scipy==1.11.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
|
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
setuptools==68.1.2 ; python_version >= "3.9" and python_version < "3.13"
|
setuptools==68.2.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
|
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
texttable==1.6.7 ; python_version >= "3.9" and python_version < "3.13"
|
texttable==1.6.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
|
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
torch==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
torch==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
|
tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
transformers==4.32.1 ; python_version >= "3.9" and python_version < "3.13"
|
transformers==4.33.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
|
typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13"
|
tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
urllib3==2.0.4 ; python_version >= "3.9" and python_version < "3.13"
|
urllib3==2.0.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
|
win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
|
||||||
wrapt==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
wrapt==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
xxhash==3.3.0 ; python_version >= "3.9" and python_version < "3.13"
|
xxhash==3.3.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
|
|
Loading…
Reference in New Issue