fix: fix quant linear autotune

This commit is contained in:
OlivierDehaene 2023-12-14 16:45:47 +01:00
parent 773aabdda6
commit 083c2de9f8
1 changed files with 1 additions and 1 deletions

View File

@ -88,7 +88,7 @@ class Autotuner(triton.KernelInterface):
# In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
# PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
return triton.testing.do_bench(
kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40
kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
)
except triton.OutOfResources:
return (float("inf"), float("inf"), float("inf"))