support bits different than 4
This commit is contained in:
parent
67d687609b
commit
f90c61a340
|
@ -151,7 +151,7 @@ def get_linear(weight, bias, quantize):
|
|||
f"The passed weight is not `gptq` compatible, loader needs to be updated."
|
||||
)
|
||||
|
||||
if use_triton_kernel:
|
||||
if use_triton_kernel or bits != 4:
|
||||
linear = QuantLinear(
|
||||
qweight,
|
||||
qzeros,
|
||||
|
|
Loading…
Reference in New Issue