From 29b8d19cdf83d7fd26a4f17015228363a7163522 Mon Sep 17 00:00:00 2001 From: drbh Date: Tue, 6 Aug 2024 07:49:53 -0400 Subject: [PATCH] fix: return the out tensor rather then the functions return value (#2361) --- server/text_generation_server/layers/attention/cuda.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py index 2b898831..96b654d0 100644 --- a/server/text_generation_server/layers/attention/cuda.py +++ b/server/text_generation_server/layers/attention/cuda.py @@ -292,8 +292,7 @@ else: ) out = torch.empty_like(q) - - return flash_attn_cuda.fwd( + flash_attn_cuda.fwd( q, k, v, @@ -309,4 +308,5 @@ else: False, 0, None, - )[0] + ) + return out