Set maximum grpc message receive size to 2GiB (#2075)

* Set maximum grpc message receive size to 2GiB

The previous default was 4MiB, which doesn't really work well for
multi-modal models.

* Update to Rust 1.79.0

* Fixup formatting to make PR pass
This commit is contained in:
Daniël de Kok 2024-06-17 16:40:44 +02:00 committed by GitHub
parent 0f7d38e774
commit c8c7ccd31e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 36 additions and 32 deletions

View File

@ -33,9 +33,9 @@ jobs:
- name: Install Rust - name: Install Rust
uses: actions-rs/toolchain@v1 uses: actions-rs/toolchain@v1
with: with:
# Released on: 02 May, 2024 # Released on: June 13, 2024
# https://releases.rs/docs/1.78.0/ # https://releases.rs/docs/1.79.0/
toolchain: 1.78.0 toolchain: 1.79.0
override: true override: true
components: rustfmt, clippy components: rustfmt, clippy
- name: Install Protoc - name: Install Protoc

View File

@ -1,5 +1,5 @@
# Rust builder # Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
WORKDIR /usr/src WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

View File

@ -1,5 +1,5 @@
# Rust builder # Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
WORKDIR /usr/src WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

View File

@ -1,4 +1,4 @@
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
WORKDIR /usr/src WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

View File

@ -497,7 +497,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
"Lowest: {:.2} {unit}", "Lowest: {:.2} {unit}",
data.iter() data.iter()
.min_by(|a, b| a.total_cmp(b)) .min_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN) .unwrap_or(&f64::NAN)
), ),
Style::default().fg(Color::Reset), Style::default().fg(Color::Reset),
)]), )]),
@ -506,7 +506,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
"Highest: {:.2} {unit}", "Highest: {:.2} {unit}",
data.iter() data.iter()
.max_by(|a, b| a.total_cmp(b)) .max_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN) .unwrap_or(&f64::NAN)
), ),
Style::default().fg(Color::Reset), Style::default().fg(Color::Reset),
)]), )]),
@ -555,17 +555,17 @@ fn latency_throughput_chart<'a>(
let min_latency: f64 = *latency_iter let min_latency: f64 = *latency_iter
.clone() .clone()
.min_by(|a, b| a.total_cmp(b)) .min_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN); .unwrap_or(&f64::NAN);
let max_latency: f64 = *latency_iter let max_latency: f64 = *latency_iter
.max_by(|a, b| a.total_cmp(b)) .max_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN); .unwrap_or(&f64::NAN);
let min_throughput: f64 = *throughput_iter let min_throughput: f64 = *throughput_iter
.clone() .clone()
.min_by(|a, b| a.total_cmp(b)) .min_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN); .unwrap_or(&f64::NAN);
let max_throughput: f64 = *throughput_iter let max_throughput: f64 = *throughput_iter
.max_by(|a, b| a.total_cmp(b)) .max_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN); .unwrap_or(&f64::NAN);
// Char min max values // Char min max values
let min_x = if zoom { let min_x = if zoom {

View File

@ -156,17 +156,17 @@ fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
let min = data let min = data
.iter() .iter()
.min_by(|a, b| a.total_cmp(b)) .min_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN); .unwrap_or(&f64::NAN);
let max = data let max = data
.iter() .iter()
.max_by(|a, b| a.total_cmp(b)) .max_by(|a, b| a.total_cmp(b))
.unwrap_or(&std::f64::NAN); .unwrap_or(&f64::NAN);
(average, *min, *max) (average, *min, *max)
} }
fn px(data: &[f64], p: u32) -> f64 { fn px(data: &[f64], p: u32) -> f64 {
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize; let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
*data.get(i).unwrap_or(&std::f64::NAN) *data.get(i).unwrap_or(&f64::NAN)
} }
fn format_value(value: f64, unit: &'static str) -> String { fn format_value(value: f64, unit: &'static str) -> String {

View File

@ -37,7 +37,7 @@ pub(crate) fn percentiles(values: &[f64], pecents: &[i32]) -> BTreeMap<String, f
.iter() .iter()
.map(|&p| { .map(|&p| {
let i = (f64::from(p) / 100.0 * values.len() as f64) as usize; let i = (f64::from(p) / 100.0 * values.len() as f64) as usize;
(format!("p{p}"), *values.get(i).unwrap_or(&std::f64::NAN)) (format!("p{p}"), *values.get(i).unwrap_or(&f64::NAN))
}) })
.collect() .collect()
} }

View File

@ -1,5 +1,5 @@
[toolchain] [toolchain]
# Released on: 02 May, 2024 # Released on: June 13, 2024
# https://releases.rs/docs/1.78.0/ # https://releases.rs/docs/1.79.0/
channel = "1.78.0" channel = "1.79.0"
components = ["rustfmt", "clippy"] components = ["rustfmt", "clippy"]

View File

@ -240,7 +240,11 @@ def serve(
interceptors=[ interceptors=[
ExceptionInterceptor(), ExceptionInterceptor(),
UDSOpenTelemetryAioServerInterceptor(), UDSOpenTelemetryAioServerInterceptor(),
] ],
options=[
# Set the maximum possible message length: i32::MAX
("grpc.max_receive_message_length", (1 << 31) - 1)
],
) )
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server( generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
TextGenerationService(model, Cache(), quantize, server_urls), server TextGenerationService(model, Cache(), quantize, server_urls), server