Fixing the free algorithm to handle times where the common prefix is

smaller.
This commit is contained in:
Nicolas Patry 2024-08-29 09:17:00 +02:00
parent 9c839ca5df
commit bef2f6bdaa
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
1 changed files with 6 additions and 5 deletions

View File

@ -172,7 +172,6 @@ impl Allocator for RadixAllocator {
)
// Unwrap, failing is a programming error.
.expect("Failed to store prefill tokens");
// We can have a prefill with the following structure:
//
// |---| From the prefix cache.
@ -182,12 +181,14 @@ impl Allocator for RadixAllocator {
// This means that while processing this request there was a
// partially overlapping request that had A..=E in its
// prefill. In this case we need to free the blocks D E.
if prefix_len > allocation.cached_prefix_len {
self.free_blocks.extend(
&blocks[allocation.cached_prefix_len / self.block_size as usize
..prefix_len / self.block_size as usize],
);
}
}
}
// Free non-prefill blocks.
self.free_blocks