Set maximum grpc message receive size to 2GiB (#2075)
* Set maximum grpc message receive size to 2GiB The previous default was 4MiB, which doesn't really work well for multi-modal models. * Update to Rust 1.79.0 * Fixup formatting to make PR pass
This commit is contained in:
parent
0f7d38e774
commit
c8c7ccd31e
|
@ -33,9 +33,9 @@ jobs:
|
|||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
# Released on: 02 May, 2024
|
||||
# https://releases.rs/docs/1.78.0/
|
||||
toolchain: 1.78.0
|
||||
# Released on: June 13, 2024
|
||||
# https://releases.rs/docs/1.79.0/
|
||||
toolchain: 1.79.0
|
||||
override: true
|
||||
components: rustfmt, clippy
|
||||
- name: Install Protoc
|
||||
|
|
|
@ -130,4 +130,4 @@ For answers to common questions about this code of conduct, see the FAQ at
|
|||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
|
|
|
@ -55,10 +55,10 @@ feedback.
|
|||
The text-generation-inference library is robust and reliable thanks to users who report the problems they encounter.
|
||||
|
||||
Before you report an issue, we would really appreciate it if you could **make sure the bug was not
|
||||
already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the
|
||||
library itself, and not your code.
|
||||
already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the
|
||||
library itself, and not your code.
|
||||
|
||||
Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so
|
||||
Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so
|
||||
we can quickly resolve it:
|
||||
|
||||
* Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies).
|
||||
|
@ -79,20 +79,20 @@ that in your issue report.
|
|||
|
||||
If there is a new feature you'd like to see in text-generation-inference, please open an issue and describe:
|
||||
|
||||
1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it
|
||||
a feature related to something you need for a project? Is it something you worked on and think it could benefit
|
||||
1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it
|
||||
a feature related to something you need for a project? Is it something you worked on and think it could benefit
|
||||
the community?
|
||||
|
||||
Whatever it is, we'd love to hear about it!
|
||||
|
||||
2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better
|
||||
2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better
|
||||
we'll be able to help you.
|
||||
3. Provide a *code snippet* that demonstrates the feature's usage.
|
||||
4. If the feature is related to a paper, please include a link.
|
||||
|
||||
If your issue is well written we're already 80% of the way there by the time you create it.
|
||||
|
||||
We have added [templates](https://github.com/huggingface/text-generation-inference/tree/main/.github/ISSUE_TEMPLATE)
|
||||
We have added [templates](https://github.com/huggingface/text-generation-inference/tree/main/.github/ISSUE_TEMPLATE)
|
||||
to help you get started with your issue.
|
||||
|
||||
## Do you want to implement a new model?
|
||||
|
@ -107,14 +107,14 @@ If you are willing to contribute the model yourself, let us know so we can help
|
|||
|
||||
## Do you want to add documentation?
|
||||
|
||||
We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know
|
||||
how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be
|
||||
We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know
|
||||
how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be
|
||||
happy to make the changes or help you make a contribution if you're interested!
|
||||
|
||||
## I want to become a maintainer of the project. How do I get there?
|
||||
|
||||
TGI is a project led and managed by Hugging Face as it powers our internal services. However, we are happy to have
|
||||
motivated individuals from other organizations join us as maintainers with the goal of making TGI the best inference
|
||||
service.
|
||||
service.
|
||||
|
||||
If you are such an individual (or organization), please reach out to us and let's collaborate.
|
||||
If you are such an individual (or organization), please reach out to us and let's collaborate.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Rust builder
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Rust builder
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
|
|
@ -497,7 +497,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
|||
"Lowest: {:.2} {unit}",
|
||||
data.iter()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN)
|
||||
.unwrap_or(&f64::NAN)
|
||||
),
|
||||
Style::default().fg(Color::Reset),
|
||||
)]),
|
||||
|
@ -506,7 +506,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
|||
"Highest: {:.2} {unit}",
|
||||
data.iter()
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN)
|
||||
.unwrap_or(&f64::NAN)
|
||||
),
|
||||
Style::default().fg(Color::Reset),
|
||||
)]),
|
||||
|
@ -555,17 +555,17 @@ fn latency_throughput_chart<'a>(
|
|||
let min_latency: f64 = *latency_iter
|
||||
.clone()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max_latency: f64 = *latency_iter
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let min_throughput: f64 = *throughput_iter
|
||||
.clone()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max_throughput: f64 = *throughput_iter
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
|
||||
// Char min max values
|
||||
let min_x = if zoom {
|
||||
|
|
|
@ -156,17 +156,17 @@ fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
|
|||
let min = data
|
||||
.iter()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max = data
|
||||
.iter()
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
(average, *min, *max)
|
||||
}
|
||||
|
||||
fn px(data: &[f64], p: u32) -> f64 {
|
||||
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
|
||||
*data.get(i).unwrap_or(&std::f64::NAN)
|
||||
*data.get(i).unwrap_or(&f64::NAN)
|
||||
}
|
||||
|
||||
fn format_value(value: f64, unit: &'static str) -> String {
|
||||
|
|
|
@ -37,7 +37,7 @@ pub(crate) fn percentiles(values: &[f64], pecents: &[i32]) -> BTreeMap<String, f
|
|||
.iter()
|
||||
.map(|&p| {
|
||||
let i = (f64::from(p) / 100.0 * values.len() as f64) as usize;
|
||||
(format!("p{p}"), *values.get(i).unwrap_or(&std::f64::NAN))
|
||||
(format!("p{p}"), *values.get(i).unwrap_or(&f64::NAN))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[toolchain]
|
||||
# Released on: 02 May, 2024
|
||||
# https://releases.rs/docs/1.78.0/
|
||||
channel = "1.78.0"
|
||||
# Released on: June 13, 2024
|
||||
# https://releases.rs/docs/1.79.0/
|
||||
channel = "1.79.0"
|
||||
components = ["rustfmt", "clippy"]
|
||||
|
|
|
@ -240,7 +240,11 @@ def serve(
|
|||
interceptors=[
|
||||
ExceptionInterceptor(),
|
||||
UDSOpenTelemetryAioServerInterceptor(),
|
||||
]
|
||||
],
|
||||
options=[
|
||||
# Set the maximum possible message length: i32::MAX
|
||||
("grpc.max_receive_message_length", (1 << 31) - 1)
|
||||
],
|
||||
)
|
||||
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
||||
TextGenerationService(model, Cache(), quantize, server_urls), server
|
||||
|
|
Loading…
Reference in New Issue