fix(server): fix cohere (#2249)
This commit is contained in:
parent
da82c63a4f
commit
1d1b1efa01
|
@ -259,8 +259,8 @@ class FlashCohereAttention(torch.nn.Module):
|
|||
cu_seqlen_prefill,
|
||||
kv_cache,
|
||||
block_tables,
|
||||
input_lengths,
|
||||
slots,
|
||||
input_lengths,
|
||||
max_s,
|
||||
):
|
||||
qkv = self.query_key_value(hidden_states)
|
||||
|
|
Loading…
Reference in New Issue