`server quantize`: expose groupsize option (#2225)
This commit is contained in:
parent
06d0e880e0
commit
2cb1842852
|
@ -341,6 +341,7 @@ def quantize(
|
||||||
upload_to_model_id: Optional[str] = None,
|
upload_to_model_id: Optional[str] = None,
|
||||||
percdamp: float = 0.01,
|
percdamp: float = 0.01,
|
||||||
act_order: bool = False,
|
act_order: bool = False,
|
||||||
|
groupsize: int = 128,
|
||||||
):
|
):
|
||||||
if revision is None:
|
if revision is None:
|
||||||
revision = "main"
|
revision = "main"
|
||||||
|
@ -355,7 +356,7 @@ def quantize(
|
||||||
quantize(
|
quantize(
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
bits=4,
|
bits=4,
|
||||||
groupsize=128,
|
groupsize=groupsize,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
|
|
Loading…
Reference in New Issue