diff --git a/Dockerfile_intel b/Dockerfile_intel index 37faf22e..ea38b081 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -83,12 +83,7 @@ RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dea RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ - | gpg --dearmor > /tmp/intel-for-pytorch-gpu-dev-keyring.gpg - -RUN mv /tmp/intel-for-pytorch-gpu-dev-keyring.gpg /usr/share/keyrings - -RUN echo "deb [signed-by=/usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg] https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" > /tmp/intel-for-pytorch-gpu-dev.list +RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" > /tmp/intel-for-pytorch-gpu-dev.list RUN mv /tmp/intel-for-pytorch-gpu-dev.list /etc/apt/sources.list.d diff --git a/server/text_generation_server/layers/moe/__init__.py b/server/text_generation_server/layers/moe/__init__.py index f528fcd0..a5ae7ff4 100644 --- a/server/text_generation_server/layers/moe/__init__.py +++ b/server/text_generation_server/layers/moe/__init__.py @@ -27,10 +27,10 @@ from text_generation_server.utils.weights import ( if SYSTEM == "rocm": from .fused_moe_rocm import grouped_topk from vllm.model_executor.layers.fused_moe import fused_topk -elif SYSTEM != "ipex": - from moe_kernels.fused_moe import fused_topk, grouped_topk -else: +elif SYSTEM == "ipex": from intel_extension_for_pytorch.llm.modules import GatedMLPMOE +else: + from moe_kernels.fused_moe import fused_topk, grouped_topk # NOTE: we are using a protocol here, because multiple inherance is not nice.