hotfix : enable intel ipex cpu and xpu in python3.11 (#2517)
enable intel ipex cpu and xpu in python3.11 Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
628334d336
commit
3ac7df2b6d
|
@ -44,9 +44,35 @@ RUN cargo build --profile release-opt
|
|||
|
||||
# Text Generation Inference base image for Intel
|
||||
|
||||
FROM intel/intel-extension-for-pytorch:2.1.30-xpu AS xpu
|
||||
FROM intel/intel-extension-for-pytorch:2.3.110-xpu AS xpu
|
||||
|
||||
USER root
|
||||
|
||||
ARG MAMBA_VERSION=23.1.0-1
|
||||
ARG PYTHON_VERSION='3.11.10'
|
||||
# Automatically set by buildx
|
||||
ARG TARGETPLATFORM
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
|
||||
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
|
||||
# Install mamba
|
||||
# translating Docker's TARGETPLATFORM into mamba arches
|
||||
RUN case ${TARGETPLATFORM} in \
|
||||
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
|
||||
*) MAMBA_ARCH=x86_64 ;; \
|
||||
esac && \
|
||||
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
||||
RUN chmod +x ~/mambaforge.sh && \
|
||||
bash ~/mambaforge.sh -b -p /opt/conda && \
|
||||
rm ~/mambaforge.sh
|
||||
|
||||
RUN case ${TARGETPLATFORM} in \
|
||||
"linux/arm64") exit 1 ;; \
|
||||
*) /opt/conda/bin/conda update -y conda && \
|
||||
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
|
||||
esac && \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
|
||||
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
|
||||
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
|
||||
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
||||
|
@ -56,7 +82,7 @@ RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dea
|
|||
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
|
||||
|
||||
RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev ninja-build pciutils
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit xpu-smi cmake ninja-build pciutils
|
||||
|
||||
# Text Generation Inference base env
|
||||
ENV HF_HOME=/data \
|
||||
|
@ -65,9 +91,7 @@ ENV HF_HOME=/data \
|
|||
|
||||
|
||||
WORKDIR /usr/src
|
||||
RUN wget https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/torch-2.1.0.post1%2Bcxx11.abi-cp310-cp310-linux_x86_64.whl && pip install torch-2.1.0.post1+cxx11.abi-cp310-cp310-linux_x86_64.whl
|
||||
RUN pip install https://github.com/intel/intel-xpu-backend-for-triton/releases/download/v2.1.0/triton-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b distributed origin/dev/distributed
|
||||
RUN pip install torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi torchaudio==2.3.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
|
||||
|
||||
# Install server
|
||||
COPY proto proto
|
||||
|
@ -82,14 +106,12 @@ ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
|
|||
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
|
||||
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
||||
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
|
||||
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:
|
||||
ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:/opt/conda/lib
|
||||
ENV PATH=/opt/conda/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
||||
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
|
||||
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
|
||||
|
||||
RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=OFF BUILD_WITH_CPU=OFF USE_XETLA=ON python setup.py install && rm -rf /usr/src/intel-extension-for-pytorch
|
||||
|
||||
# Install benchmarker
|
||||
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||
# Install router
|
||||
|
@ -133,12 +155,19 @@ RUN chmod +x ~/mambaforge.sh && \
|
|||
bash ~/mambaforge.sh -b -p /opt/conda && \
|
||||
rm ~/mambaforge.sh
|
||||
|
||||
RUN case ${TARGETPLATFORM} in \
|
||||
"linux/arm64") exit 1 ;; \
|
||||
*) /opt/conda/bin/conda update -y conda && \
|
||||
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
|
||||
esac && \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
|
||||
RUN conda install -c conda-forge gperftools mkl
|
||||
|
||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.4.0.dev20240612%2Bcpu-cp310-cp310-linux_x86_64.whl
|
||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.19.0.dev20240612%2Bcpu-cp310-cp310-linux_x86_64.whl
|
||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240612%2Bcpu-cp310-cp310-linux_x86_64.whl
|
||||
RUN pip install triton numa
|
||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.4.0.dev20240612%2Bcpu-cp311-cp311-linux_x86_64.whl
|
||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.19.0.dev20240612%2Bcpu-cp311-cp311-linux_x86_64.whl
|
||||
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240612%2Bcpu-cp311-cp311-linux_x86_64.whl
|
||||
RUN pip install triton py-libnuma
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
|
@ -151,10 +180,10 @@ RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update
|
|||
RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
|
||||
|
||||
ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
|
||||
ENV CCL_ROOT=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch
|
||||
ENV I_MPI_ROOT=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch
|
||||
ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
|
||||
ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.10/site-packages/oneccl_bindings_for_pytorch/lib
|
||||
ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
||||
ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
||||
ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
|
||||
ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
|
||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
|
||||
|
||||
# Install server
|
||||
|
|
|
@ -22,9 +22,9 @@ def attention(
|
|||
|
||||
# We do not need to check window_size_left (not supported) here, so it is already checked ahead of time at model load.
|
||||
ipex.llm.functional.varlen_attention(
|
||||
q,
|
||||
key_cache,
|
||||
value_cache,
|
||||
q.contiguous() if q.device.type == "xpu" else q,
|
||||
key_cache.contiguous() if key_cache.device.type == "xpu" else key_cache,
|
||||
value_cache.contiguous() if value_cache.device.type == "xpu" else value_cache,
|
||||
out,
|
||||
seqlen.cu_seqlen_q,
|
||||
seqlen.cu_seqlen_q,
|
||||
|
|
|
@ -82,7 +82,7 @@ def init_cpu_threads_env(rank_id: int, world_size: int):
|
|||
import numa
|
||||
import psutil
|
||||
|
||||
nodes = numa.get_max_node() + 1
|
||||
nodes = numa.info.get_max_node() + 1
|
||||
rank_per_node = math.ceil(world_size / nodes)
|
||||
num_cpus_per_nodes = int(psutil.cpu_count(logical=False) / nodes)
|
||||
node_id = int(rank_id / rank_per_node)
|
||||
|
@ -91,18 +91,22 @@ def init_cpu_threads_env(rank_id: int, world_size: int):
|
|||
num_cpus_per_rank = max(int(num_cpus_per_nodes / rank_per_node), 1)
|
||||
else:
|
||||
num_cpus_per_rank = int(os.getenv("OMP_NUM_THREADS"))
|
||||
if len(numa.get_membind()) == nodes:
|
||||
numa.set_membind([node_id])
|
||||
if len(numa.memory.get_membind_nodes()) == nodes:
|
||||
numa.memory.set_membind_nodes((node_id))
|
||||
torch.set_num_threads(num_cpus_per_rank)
|
||||
if len(numa.get_affinity(0)) == psutil.cpu_count(logical=True):
|
||||
if len(numa.schedule.get_affinitive_cpus(0)) == psutil.cpu_count(logical=True):
|
||||
cpu_start = num_cpus_per_rank * rank_offset_per_node
|
||||
numa.set_affinity(
|
||||
numa.schedule.run_on_cpus(
|
||||
0,
|
||||
list(numa.node_to_cpus(node_id))[
|
||||
cpu_start : cpu_start + num_cpus_per_rank
|
||||
],
|
||||
*(
|
||||
numa.info.node_to_cpus(node_id)[
|
||||
cpu_start : cpu_start + num_cpus_per_rank
|
||||
]
|
||||
),
|
||||
)
|
||||
logger.info(f"affinity={numa.get_affinity(0)}, membind = {numa.get_membind()}")
|
||||
logger.info(
|
||||
f"affinity={numa.schedule.get_affinitive_cpus(0)}, membind = {numa.memory.get_membind_nodes()}"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
Loading…
Reference in New Issue