fix(server): empty_cache when stopped
This commit is contained in:
parent
c58a0c185b
commit
a2cf1bdb2f
|
@ -991,6 +991,7 @@ class FlashCausalLM(Model):
|
|||
|
||||
if stopped:
|
||||
del batch
|
||||
torch.cuda.empty_cache()
|
||||
# No need to return a batch if we know that all requests stopped
|
||||
return generations, None
|
||||
|
||||
|
|
Loading…
Reference in New Issue