diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py index d039e1e7..8703eb94 100644 --- a/server/text_generation_server/layers/attention/cuda.py +++ b/server/text_generation_server/layers/attention/cuda.py @@ -293,6 +293,7 @@ else: max_s, softmax_scale, window_size_left=-1, + causal=None, softcap=None, ): if window_size_left != -1: