From 083c2de9f83c6070116a386d5c87e7b6c7538313 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Thu, 14 Dec 2023 16:45:47 +0100
Subject: [PATCH] fix: fix quant linear autotune

---
 server/text_generation_server/utils/gptq/custom_autotune.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/text_generation_server/utils/gptq/custom_autotune.py b/server/text_generation_server/utils/gptq/custom_autotune.py
index 589d89ef..1eb40f1e 100644
--- a/server/text_generation_server/utils/gptq/custom_autotune.py
+++ b/server/text_generation_server/utils/gptq/custom_autotune.py
@@ -88,7 +88,7 @@ class Autotuner(triton.KernelInterface):
             # In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
             # PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
             return triton.testing.do_bench(
-                kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40
+                kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
             )
         except triton.OutOfResources:
             return (float("inf"), float("inf"), float("inf"))