diff --git a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py index 12be08cd..328f239b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py @@ -15,7 +15,6 @@ from typing import List, Optional, Tuple -from moe_kernels.fused_moe import grouped_topk import torch import torch.distributed from text_generation_server.layers import ( @@ -41,6 +40,9 @@ from torch import nn from transformers.activations import ACT2FN from transformers.configuration_utils import PretrainedConfig +if SYSTEM != "ipex": + from moe_kernels.fused_moe import grouped_topk + if SYSTEM == "rocm": try: from vllm import _custom_C