diff --git a/router/src/queue.rs b/router/src/queue.rs index 52ea16ca..20f25b09 100644 --- a/router/src/queue.rs +++ b/router/src/queue.rs @@ -200,6 +200,10 @@ impl State { } } + // Pad prefill_token_budget to be a multiple of block size + let prefill_token_budget = + ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size; + // Create span for this batch to add context to inference calls let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty); next_batch_span.follows_from(&Span::current());