`server quantize`: expose groupsize option (#2225)

This commit is contained in:
Daniël de Kok 2024-07-16 08:36:05 +02:00 committed by GitHub
parent 06d0e880e0
commit 2cb1842852
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 1 deletions

View File

@ -341,6 +341,7 @@ def quantize(
upload_to_model_id: Optional[str] = None, upload_to_model_id: Optional[str] = None,
percdamp: float = 0.01, percdamp: float = 0.01,
act_order: bool = False, act_order: bool = False,
groupsize: int = 128,
): ):
if revision is None: if revision is None:
revision = "main" revision = "main"
@ -355,7 +356,7 @@ def quantize(
quantize( quantize(
model_id=model_id, model_id=model_id,
bits=4, bits=4,
groupsize=128, groupsize=groupsize,
output_dir=output_dir, output_dir=output_dir,
revision=revision, revision=revision,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,