fix: include create_exllama_buffers and set_device for exllama (#2407)
This commit is contained in:
parent
9a7830bd28
commit
8a7749b8fb
|
@ -422,12 +422,16 @@ elif CAN_EXLLAMA:
|
|||
if V2:
|
||||
from text_generation_server.layers.gptq.exllamav2 import (
|
||||
QuantLinear as ExllamaQuantLinear, # noqa: F401
|
||||
create_exllama_buffers, # noqa: F401
|
||||
set_device, # noqa: F401
|
||||
)
|
||||
|
||||
HAS_EXLLAMA = "2"
|
||||
else:
|
||||
from text_generation_server.layers.gptq.exllama import (
|
||||
Ex4bitLinear as ExllamaQuantLinear, # noqa: F401
|
||||
create_exllama_buffers, # noqa: F401
|
||||
set_device, # noqa: F401
|
||||
)
|
||||
|
||||
HAS_EXLLAMA = "1"
|
||||
|
|
Loading…
Reference in New Issue