fix(server): fix flash causal (#218)

2023-04-21 19:42:16 +02:00 · 2023-04-21 19:42:16 +02:00 · 86bca365df
parent afc5b999d0
commit 86bca365df
1 changed files with 4 additions and 1 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -453,7 +453,10 @@ class FlashCausalLM(Model):
                )
            # Set in batch in case it needs to be used later in concatenate()
            batch.past_pad = self.past_pad
-            if len(batch) != 1:
+            if len(batch) == 1:
+                # present is already pre-padded
+                batch.past_key_values = present
+            else:
                # Add padding after each sequence
                # This will have the correct shape after the final past_key_values concatenation before the model
                # forward