fix: better warmup error

2023-10-25 10:18:58 +02:00 · 2023-10-25 10:18:58 +02:00 · 96a982ad8f
parent f9910d13e2
commit 96a982ad8f
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -670,7 +670,7 @@ class FlashCausalLM(Model):
                self.device,
            )
            _, batch = self.generate_token(batch)
-        except Exception as e:
+        except torch.cuda.OutOfMemoryError as e:
            raise RuntimeError(
                f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
                f"You need to decrease `--max-batch-prefill-tokens`"