From f478aa77ade6d3f2baca72ae148afb73ef5bf748 Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Fri, 20 Sep 2024 16:02:55 +0800 Subject: [PATCH] hotfix: ipex fails since cuda moe kernel is not supported (#2532) Signed-off-by: Wang, Yi A --- .../models/custom_modeling/flash_deepseek_v2_modeling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py index 12be08cd..328f239b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_deepseek_v2_modeling.py @@ -15,7 +15,6 @@ from typing import List, Optional, Tuple -from moe_kernels.fused_moe import grouped_topk import torch import torch.distributed from text_generation_server.layers import ( @@ -41,6 +40,9 @@ from torch import nn from transformers.activations import ACT2FN from transformers.configuration_utils import PretrainedConfig +if SYSTEM != "ipex": + from moe_kernels.fused_moe import grouped_topk + if SYSTEM == "rocm": try: from vllm import _custom_C