fix(server): empty_cache when stopped

2023-07-15 13:57:31 +02:00 · 2023-07-15 13:57:31 +02:00 · a2cf1bdb2f
parent c58a0c185b
commit a2cf1bdb2f
1 changed files with 1 additions and 0 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -991,6 +991,7 @@ class FlashCausalLM(Model):

        if stopped:
            del batch
+            torch.cuda.empty_cache()
            # No need to return a batch if we know that all requests stopped
            return generations, None