From 2967b8168c3788c2d8e009c7c5b16e3b6a01a667 Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:16:27 +0200 Subject: [PATCH] fix post refactor --- server/text_generation_server/utils/quantization.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/quantization.py b/server/text_generation_server/utils/quantization.py index 07975bea..29782321 100644 --- a/server/text_generation_server/utils/quantization.py +++ b/server/text_generation_server/utils/quantization.py @@ -77,7 +77,11 @@ def _get_quantizer_config(model_id, revision): if "version" in data and data["version"] == "GEMM": quant_method = "awq" except Exception: - pass + if self.quant_method is None: + if "awq" in model_id.lower(): + self.quant_method = "awq" + elif "gptq" in model_id.lower(): + self.quant_method = "gptq" return _QuantizerConfig( bits=bits,