Allowing window_left_size (dummy version).

This commit is contained in:
Nicolas Patry 2024-08-17 12:04:21 +02:00
parent f2bdc65098
commit f55278de2d
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
2 changed files with 11 additions and 4 deletions

View File

@ -16,6 +16,11 @@ pub struct RadixAllocator {
/// Blocks that are immediately available for allocation. /// Blocks that are immediately available for allocation.
free_blocks: Vec<u32>, free_blocks: Vec<u32>,
#[allow(dead_code)]
// This isn't used because the prefix need to match without the windowing
// mecanism. This at worst is overallocating, not necessarily being wrong.
window_size: Option<u32>,
} }
impl RadixAllocator { impl RadixAllocator {
@ -25,9 +30,9 @@ impl RadixAllocator {
"Radix tree allocator only works with block_size=1, was: {}", "Radix tree allocator only works with block_size=1, was: {}",
block_size block_size
); );
if window_size.is_some() { // if window_size.is_some() {
unimplemented!("Window size not supported in the prefix-caching block allocator yet"); // unimplemented!("Window size not supported in the prefix-caching block allocator yet");
} // }
RadixAllocator { RadixAllocator {
allocation_id: 0, allocation_id: 0,
@ -36,6 +41,7 @@ impl RadixAllocator {
// Block 0 is reserved for health checks. // Block 0 is reserved for health checks.
free_blocks: (1..n_blocks).collect(), free_blocks: (1..n_blocks).collect(),
window_size,
} }
} }

View File

@ -233,7 +233,7 @@ if ATTENTION == "flashinfer":
causal=True, causal=True,
softcap=0.0, softcap=0.0,
): ):
assert window_size_left == -1, "Windowing is not supported with flash infer" # assert window_size_left == -1, "Windowing is not supported with flash infer"
from text_generation_server.layers.attention.flashinfer import ( from text_generation_server.layers.attention.flashinfer import (
prefill_with_paged_kv_state, prefill_with_paged_kv_state,
) )
@ -244,6 +244,7 @@ if ATTENTION == "flashinfer":
paged_kv_cache=(key_cache, value_cache), paged_kv_cache=(key_cache, value_cache),
logits_soft_cap=softcap, logits_soft_cap=softcap,
sm_scale=softmax_scale, sm_scale=softmax_scale,
window_left=window_size_left,
) )
elif V2: elif V2: