From a2cf1bdb2fc0570dfca8b9ed2c8322f2040c3c07 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Sat, 15 Jul 2023 13:57:31 +0200
Subject: [PATCH] fix(server): empty_cache when stopped

---
 server/text_generation_server/models/flash_causal_lm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 4e5804f5..d034d472 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -991,6 +991,7 @@ class FlashCausalLM(Model):
 
         if stopped:
             del batch
+            torch.cuda.empty_cache()
             # No need to return a batch if we know that all requests stopped
             return generations, None