hotfix: fix flashllama

2024-10-23 13:22:31 +02:00 · 2024-10-23 13:22:31 +02:00 · 27ff1871b5
parent 03c9388bf7
commit 27ff1871b5
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@ -692,7 +692,7 @@ class FlashLlamaForCausalLM(torch.nn.Module):
        logits, speculative_logits = self.lm_head(hidden_states)

        # Used in Granite
-        if not self.logits_scaled:
+        if self.logits_scaling is not None and not self.logits_scaled:
            logits /= self.logits_scaling
            if speculative_logits is not None:
                speculative_logits /= self.logits_scaling