fix: include create_exllama_buffers and set_device for exllama (#2407)

This commit is contained in:
drbh 2024-08-12 17:59:37 -04:00 committed by GitHub
parent 9a7830bd28
commit 8a7749b8fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 4 additions and 0 deletions

View File

@ -422,12 +422,16 @@ elif CAN_EXLLAMA:
if V2:
from text_generation_server.layers.gptq.exllamav2 import (
QuantLinear as ExllamaQuantLinear, # noqa: F401
create_exllama_buffers, # noqa: F401
set_device, # noqa: F401
)
HAS_EXLLAMA = "2"
else:
from text_generation_server.layers.gptq.exllama import (
Ex4bitLinear as ExllamaQuantLinear, # noqa: F401
create_exllama_buffers, # noqa: F401
set_device, # noqa: F401
)
HAS_EXLLAMA = "1"