Fixing the free algorithm to handle times where the common prefix is
smaller.
This commit is contained in:
parent
9c839ca5df
commit
bef2f6bdaa
|
@ -172,7 +172,6 @@ impl Allocator for RadixAllocator {
|
|||
)
|
||||
// Unwrap, failing is a programming error.
|
||||
.expect("Failed to store prefill tokens");
|
||||
|
||||
// We can have a prefill with the following structure:
|
||||
//
|
||||
// |---| From the prefix cache.
|
||||
|
@ -182,12 +181,14 @@ impl Allocator for RadixAllocator {
|
|||
// This means that while processing this request there was a
|
||||
// partially overlapping request that had A..=E in its
|
||||
// prefill. In this case we need to free the blocks D E.
|
||||
if prefix_len > allocation.cached_prefix_len {
|
||||
self.free_blocks.extend(
|
||||
&blocks[allocation.cached_prefix_len / self.block_size as usize
|
||||
..prefix_len / self.block_size as usize],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free non-prefill blocks.
|
||||
self.free_blocks
|
||||
|
|
Loading…
Reference in New Issue