Fix after rebase..

This commit is contained in:
Nicolas Patry 2024-05-23 12:42:19 +00:00
parent 1b86d0f31d
commit cacba5f21f
1 changed files with 4 additions and 1 deletions

View File

@ -1,5 +1,6 @@
import torch import torch
from text_generation_server.utils.import_utils import SYSTEM from text_generation_server.utils.import_utils import SYSTEM
from text_generation_server.models.globals import FLASH_DECODING
_PARTITION_SIZE = 512 _PARTITION_SIZE = 512
@ -125,7 +126,9 @@ def attention(
else: else:
from vllm._C import ops from vllm._C import ops
use_v1 = max_s <= 8192 and (max_num_partitions == 1 or num_seqs * num_heads > 512) use_v1 = max_s <= 8192 and (
max_num_partitions == 1 or num_seqs * num_heads > 512
)
if use_v1: if use_v1:
ops.paged_attention_v1( ops.paged_attention_v1(
out, out,