From 3961e32390ad16659b561bfd8f1dbd36b874fedf Mon Sep 17 00:00:00 2001 From: shaltielshmid Date: Tue, 23 Jul 2024 16:00:07 +0300 Subject: [PATCH] [WIP] Add support for Mistral-Nemo by supporting head_dim through config (#2254) * Support passing head_dim through config * Using `head_dim` as a fallback is necessary since it's a non standard key in mistralConfig (as defined in transformers). * Shorter diff. --------- Co-authored-by: Nicolas Patry --- .../models/custom_modeling/flash_mistral_modeling.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py index eeb3c45f..eca01bbb 100644 --- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py @@ -149,15 +149,14 @@ class MistralAttention(torch.nn.Module): bias=False, ) - head_size = config.hidden_size // config.num_attention_heads self.query_key_value = TensorParallelMultiAdapterLinear.load( query_key_value, layer_id, ["q_proj", "k_proj", "v_proj"], sizes=[ - head_size * config.num_attention_heads, - head_size * config.num_key_value_heads, - head_size * config.num_key_value_heads, + self.head_size * config.num_attention_heads, + self.head_size * config.num_key_value_heads, + self.head_size * config.num_key_value_heads, ], process_group=weights.process_group, )