Set maximum grpc message receive size to 2GiB (#2075)
* Set maximum grpc message receive size to 2GiB The previous default was 4MiB, which doesn't really work well for multi-modal models. * Update to Rust 1.79.0 * Fixup formatting to make PR pass
This commit is contained in:
parent
0f7d38e774
commit
c8c7ccd31e
|
@ -33,9 +33,9 @@ jobs:
|
||||||
- name: Install Rust
|
- name: Install Rust
|
||||||
uses: actions-rs/toolchain@v1
|
uses: actions-rs/toolchain@v1
|
||||||
with:
|
with:
|
||||||
# Released on: 02 May, 2024
|
# Released on: June 13, 2024
|
||||||
# https://releases.rs/docs/1.78.0/
|
# https://releases.rs/docs/1.79.0/
|
||||||
toolchain: 1.78.0
|
toolchain: 1.79.0
|
||||||
override: true
|
override: true
|
||||||
components: rustfmt, clippy
|
components: rustfmt, clippy
|
||||||
- name: Install Protoc
|
- name: Install Protoc
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
|
|
|
@ -497,7 +497,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
||||||
"Lowest: {:.2} {unit}",
|
"Lowest: {:.2} {unit}",
|
||||||
data.iter()
|
data.iter()
|
||||||
.min_by(|a, b| a.total_cmp(b))
|
.min_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN)
|
.unwrap_or(&f64::NAN)
|
||||||
),
|
),
|
||||||
Style::default().fg(Color::Reset),
|
Style::default().fg(Color::Reset),
|
||||||
)]),
|
)]),
|
||||||
|
@ -506,7 +506,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
||||||
"Highest: {:.2} {unit}",
|
"Highest: {:.2} {unit}",
|
||||||
data.iter()
|
data.iter()
|
||||||
.max_by(|a, b| a.total_cmp(b))
|
.max_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN)
|
.unwrap_or(&f64::NAN)
|
||||||
),
|
),
|
||||||
Style::default().fg(Color::Reset),
|
Style::default().fg(Color::Reset),
|
||||||
)]),
|
)]),
|
||||||
|
@ -555,17 +555,17 @@ fn latency_throughput_chart<'a>(
|
||||||
let min_latency: f64 = *latency_iter
|
let min_latency: f64 = *latency_iter
|
||||||
.clone()
|
.clone()
|
||||||
.min_by(|a, b| a.total_cmp(b))
|
.min_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN);
|
.unwrap_or(&f64::NAN);
|
||||||
let max_latency: f64 = *latency_iter
|
let max_latency: f64 = *latency_iter
|
||||||
.max_by(|a, b| a.total_cmp(b))
|
.max_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN);
|
.unwrap_or(&f64::NAN);
|
||||||
let min_throughput: f64 = *throughput_iter
|
let min_throughput: f64 = *throughput_iter
|
||||||
.clone()
|
.clone()
|
||||||
.min_by(|a, b| a.total_cmp(b))
|
.min_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN);
|
.unwrap_or(&f64::NAN);
|
||||||
let max_throughput: f64 = *throughput_iter
|
let max_throughput: f64 = *throughput_iter
|
||||||
.max_by(|a, b| a.total_cmp(b))
|
.max_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN);
|
.unwrap_or(&f64::NAN);
|
||||||
|
|
||||||
// Char min max values
|
// Char min max values
|
||||||
let min_x = if zoom {
|
let min_x = if zoom {
|
||||||
|
|
|
@ -156,17 +156,17 @@ fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
|
||||||
let min = data
|
let min = data
|
||||||
.iter()
|
.iter()
|
||||||
.min_by(|a, b| a.total_cmp(b))
|
.min_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN);
|
.unwrap_or(&f64::NAN);
|
||||||
let max = data
|
let max = data
|
||||||
.iter()
|
.iter()
|
||||||
.max_by(|a, b| a.total_cmp(b))
|
.max_by(|a, b| a.total_cmp(b))
|
||||||
.unwrap_or(&std::f64::NAN);
|
.unwrap_or(&f64::NAN);
|
||||||
(average, *min, *max)
|
(average, *min, *max)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn px(data: &[f64], p: u32) -> f64 {
|
fn px(data: &[f64], p: u32) -> f64 {
|
||||||
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
|
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
|
||||||
*data.get(i).unwrap_or(&std::f64::NAN)
|
*data.get(i).unwrap_or(&f64::NAN)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn format_value(value: f64, unit: &'static str) -> String {
|
fn format_value(value: f64, unit: &'static str) -> String {
|
||||||
|
|
|
@ -37,7 +37,7 @@ pub(crate) fn percentiles(values: &[f64], pecents: &[i32]) -> BTreeMap<String, f
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&p| {
|
.map(|&p| {
|
||||||
let i = (f64::from(p) / 100.0 * values.len() as f64) as usize;
|
let i = (f64::from(p) / 100.0 * values.len() as f64) as usize;
|
||||||
(format!("p{p}"), *values.get(i).unwrap_or(&std::f64::NAN))
|
(format!("p{p}"), *values.get(i).unwrap_or(&f64::NAN))
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[toolchain]
|
[toolchain]
|
||||||
# Released on: 02 May, 2024
|
# Released on: June 13, 2024
|
||||||
# https://releases.rs/docs/1.78.0/
|
# https://releases.rs/docs/1.79.0/
|
||||||
channel = "1.78.0"
|
channel = "1.79.0"
|
||||||
components = ["rustfmt", "clippy"]
|
components = ["rustfmt", "clippy"]
|
||||||
|
|
|
@ -240,7 +240,11 @@ def serve(
|
||||||
interceptors=[
|
interceptors=[
|
||||||
ExceptionInterceptor(),
|
ExceptionInterceptor(),
|
||||||
UDSOpenTelemetryAioServerInterceptor(),
|
UDSOpenTelemetryAioServerInterceptor(),
|
||||||
]
|
],
|
||||||
|
options=[
|
||||||
|
# Set the maximum possible message length: i32::MAX
|
||||||
|
("grpc.max_receive_message_length", (1 << 31) - 1)
|
||||||
|
],
|
||||||
)
|
)
|
||||||
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
||||||
TextGenerationService(model, Cache(), quantize, server_urls), server
|
TextGenerationService(model, Cache(), quantize, server_urls), server
|
||||||
|
|
Loading…
Reference in New Issue