From 1cebccc72b3233f705724be43ea760239d5d2717 Mon Sep 17 00:00:00 2001 From: drbh Date: Tue, 13 Aug 2024 10:19:46 -0400 Subject: [PATCH] fix: adds causal to attention params (#2408) fix: adds causal to attention params to check when using flash attn v1 --- server/text_generation_server/layers/attention/cuda.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py index d039e1e7..8703eb94 100644 --- a/server/text_generation_server/layers/attention/cuda.py +++ b/server/text_generation_server/layers/attention/cuda.py @@ -293,6 +293,7 @@ else: max_s, softmax_scale, window_size_left=-1, + causal=None, softcap=None, ): if window_size_left != -1: