From c2c98725f868027bc66366e3cf2cc13dd4ba90b3 Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Fri, 12 Apr 2024 10:59:04 +0200 Subject: [PATCH] fix(router): fix a possible deadlock in next_batch (#1731) --- router/src/queue.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/router/src/queue.rs b/router/src/queue.rs index 52ea16ca..20f25b09 100644 --- a/router/src/queue.rs +++ b/router/src/queue.rs @@ -200,6 +200,10 @@ impl State { } } + // Pad prefill_token_budget to be a multiple of block size + let prefill_token_budget = + ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size; + // Create span for this batch to add context to inference calls let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty); next_batch_span.follows_from(&Span::current());