fix(server): fix cohere (#2249)
This commit is contained in:
parent
da82c63a4f
commit
1d1b1efa01
|
@ -259,8 +259,8 @@ class FlashCohereAttention(torch.nn.Module):
|
||||||
cu_seqlen_prefill,
|
cu_seqlen_prefill,
|
||||||
kv_cache,
|
kv_cache,
|
||||||
block_tables,
|
block_tables,
|
||||||
input_lengths,
|
|
||||||
slots,
|
slots,
|
||||||
|
input_lengths,
|
||||||
max_s,
|
max_s,
|
||||||
):
|
):
|
||||||
qkv = self.query_key_value(hidden_states)
|
qkv = self.query_key_value(hidden_states)
|
||||||
|
|
Loading…
Reference in New Issue