Fixing the free algorithm to handle times where the common prefix is

smaller.
This commit is contained in:
Nicolas Patry 2024-08-29 09:17:00 +02:00
parent 9c839ca5df
commit bef2f6bdaa
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
1 changed files with 6 additions and 5 deletions

View File

@ -172,7 +172,6 @@ impl Allocator for RadixAllocator {
) )
// Unwrap, failing is a programming error. // Unwrap, failing is a programming error.
.expect("Failed to store prefill tokens"); .expect("Failed to store prefill tokens");
// We can have a prefill with the following structure: // We can have a prefill with the following structure:
// //
// |---| From the prefix cache. // |---| From the prefix cache.
@ -182,10 +181,12 @@ impl Allocator for RadixAllocator {
// This means that while processing this request there was a // This means that while processing this request there was a
// partially overlapping request that had A..=E in its // partially overlapping request that had A..=E in its
// prefill. In this case we need to free the blocks D E. // prefill. In this case we need to free the blocks D E.
self.free_blocks.extend( if prefix_len > allocation.cached_prefix_len {
&blocks[allocation.cached_prefix_len / self.block_size as usize self.free_blocks.extend(
..prefix_len / self.block_size as usize], &blocks[allocation.cached_prefix_len / self.block_size as usize
); ..prefix_len / self.block_size as usize],
);
}
} }
} }