From a2cf1bdb2fc0570dfca8b9ed2c8322f2040c3c07 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Sat, 15 Jul 2023 13:57:31 +0200 Subject: [PATCH] fix(server): empty_cache when stopped --- server/text_generation_server/models/flash_causal_lm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 4e5804f5..d034d472 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -991,6 +991,7 @@ class FlashCausalLM(Model): if stopped: del batch + torch.cuda.empty_cache() # No need to return a batch if we know that all requests stopped return generations, None