fix: better warmup error
This commit is contained in:
parent
f9910d13e2
commit
96a982ad8f
|
@ -670,7 +670,7 @@ class FlashCausalLM(Model):
|
||||||
self.device,
|
self.device,
|
||||||
)
|
)
|
||||||
_, batch = self.generate_token(batch)
|
_, batch = self.generate_token(batch)
|
||||||
except Exception as e:
|
except torch.cuda.OutOfMemoryError as e:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
|
f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
|
||||||
f"You need to decrease `--max-batch-prefill-tokens`"
|
f"You need to decrease `--max-batch-prefill-tokens`"
|
||||||
|
|
Loading…
Reference in New Issue