From f3435bab8c047d7e6fb8c03f1bf63f6f0702e3e1 Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Sun, 21 Jul 2024 16:48:04 +0000 Subject: [PATCH] fix(server): fix deepseekv2 loading (#2266) --- .../models/custom_modeling/flash_deepseek_v2_modeling.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py index 8ffbd143..f5b2ba0e 100644 --- a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py @@ -34,7 +34,6 @@ from text_generation_server.layers.attention.common import Seqlen from text_generation_server.layers.layernorm import FastRMSNorm from text_generation_server.layers.rotary import PositionRotaryEmbedding, get_mscale from text_generation_server.utils.import_utils import SYSTEM -from text_generation_server.utils.log import log_once from text_generation_server.utils.weights import Weights from torch import nn from transformers.activations import ACT2FN @@ -240,7 +239,6 @@ class DeepseekV2Attention(torch.nn.Module): if config.attention_bias else None ), - quantize=config.quantize, ) self.q_a_layernorm = FastRMSNorm.load( prefix=f"{prefix}.q_a_layernorm", @@ -261,7 +259,6 @@ class DeepseekV2Attention(torch.nn.Module): if config.attention_bias else None ), - quantize=config.quantize, ) self.kv_a_layernorm = FastRMSNorm.load(