Fixing the free algorithm to handle times where the common prefix is
smaller.
This commit is contained in:
parent
9c839ca5df
commit
bef2f6bdaa
|
@ -172,7 +172,6 @@ impl Allocator for RadixAllocator {
|
||||||
)
|
)
|
||||||
// Unwrap, failing is a programming error.
|
// Unwrap, failing is a programming error.
|
||||||
.expect("Failed to store prefill tokens");
|
.expect("Failed to store prefill tokens");
|
||||||
|
|
||||||
// We can have a prefill with the following structure:
|
// We can have a prefill with the following structure:
|
||||||
//
|
//
|
||||||
// |---| From the prefix cache.
|
// |---| From the prefix cache.
|
||||||
|
@ -182,10 +181,12 @@ impl Allocator for RadixAllocator {
|
||||||
// This means that while processing this request there was a
|
// This means that while processing this request there was a
|
||||||
// partially overlapping request that had A..=E in its
|
// partially overlapping request that had A..=E in its
|
||||||
// prefill. In this case we need to free the blocks D E.
|
// prefill. In this case we need to free the blocks D E.
|
||||||
self.free_blocks.extend(
|
if prefix_len > allocation.cached_prefix_len {
|
||||||
&blocks[allocation.cached_prefix_len / self.block_size as usize
|
self.free_blocks.extend(
|
||||||
..prefix_len / self.block_size as usize],
|
&blocks[allocation.cached_prefix_len / self.block_size as usize
|
||||||
);
|
..prefix_len / self.block_size as usize],
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue