From 072f267cc3c30ea2a859c3f1594f99fc6e976b40 Mon Sep 17 00:00:00 2001 From: "Yang, Bo" Date: Wed, 23 Aug 2023 14:23:59 -0700 Subject: [PATCH] Initialize v_cache to avoid NaNs (#12) --- server/text_generation_server/models/flash_causal_lm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 10cf8e8..795ce37 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -53,8 +53,9 @@ class CacheManager: dtype=dtype, device=device, ), - torch.empty( + torch.full( (num_blocks, num_heads, head_size, self.block_size), + self.v_cache_initial_value, dtype=dtype, device=device, ),