fix: include create_exllama_buffers and set_device for exllama (#2407)
This commit is contained in:
parent
9a7830bd28
commit
8a7749b8fb
|
@ -422,12 +422,16 @@ elif CAN_EXLLAMA:
|
||||||
if V2:
|
if V2:
|
||||||
from text_generation_server.layers.gptq.exllamav2 import (
|
from text_generation_server.layers.gptq.exllamav2 import (
|
||||||
QuantLinear as ExllamaQuantLinear, # noqa: F401
|
QuantLinear as ExllamaQuantLinear, # noqa: F401
|
||||||
|
create_exllama_buffers, # noqa: F401
|
||||||
|
set_device, # noqa: F401
|
||||||
)
|
)
|
||||||
|
|
||||||
HAS_EXLLAMA = "2"
|
HAS_EXLLAMA = "2"
|
||||||
else:
|
else:
|
||||||
from text_generation_server.layers.gptq.exllama import (
|
from text_generation_server.layers.gptq.exllama import (
|
||||||
Ex4bitLinear as ExllamaQuantLinear, # noqa: F401
|
Ex4bitLinear as ExllamaQuantLinear, # noqa: F401
|
||||||
|
create_exllama_buffers, # noqa: F401
|
||||||
|
set_device, # noqa: F401
|
||||||
)
|
)
|
||||||
|
|
||||||
HAS_EXLLAMA = "1"
|
HAS_EXLLAMA = "1"
|
||||||
|
|
Loading…
Reference in New Issue