227 lines
9.6 KiB
Plaintext
227 lines
9.6 KiB
Plaintext
ARG PLATFORM=xpu
|
|
|
|
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
|
|
WORKDIR /usr/src
|
|
|
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
|
|
|
FROM chef AS planner
|
|
COPY Cargo.lock Cargo.lock
|
|
COPY Cargo.toml Cargo.toml
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY proto proto
|
|
COPY benchmark benchmark
|
|
COPY router router
|
|
COPY backends backends
|
|
COPY launcher launcher
|
|
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
FROM chef AS builder
|
|
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
python3.11-dev
|
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
|
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
|
|
rm -f $PROTOC_ZIP
|
|
|
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
|
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
|
|
|
|
ARG GIT_SHA
|
|
ARG DOCKER_LABEL
|
|
|
|
COPY Cargo.lock Cargo.lock
|
|
COPY Cargo.toml Cargo.toml
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
COPY proto proto
|
|
COPY benchmark benchmark
|
|
COPY router router
|
|
COPY backends backends
|
|
COPY launcher launcher
|
|
RUN cargo build --profile release-opt --frozen
|
|
|
|
|
|
# Text Generation Inference base image for Intel
|
|
|
|
FROM intel/intel-extension-for-pytorch:2.3.110-xpu AS xpu
|
|
|
|
USER root
|
|
|
|
ARG MAMBA_VERSION=23.1.0-1
|
|
ARG PYTHON_VERSION='3.11.10'
|
|
# Automatically set by buildx
|
|
ARG TARGETPLATFORM
|
|
ENV PATH=/opt/conda/bin:$PATH
|
|
|
|
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
|
|
# Install mamba
|
|
# translating Docker's TARGETPLATFORM into mamba arches
|
|
RUN case ${TARGETPLATFORM} in \
|
|
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
|
|
*) MAMBA_ARCH=x86_64 ;; \
|
|
esac && \
|
|
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
|
RUN chmod +x ~/mambaforge.sh && \
|
|
bash ~/mambaforge.sh -b -p /opt/conda && \
|
|
rm ~/mambaforge.sh
|
|
|
|
RUN case ${TARGETPLATFORM} in \
|
|
"linux/arm64") exit 1 ;; \
|
|
*) /opt/conda/bin/conda update -y conda && \
|
|
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
|
|
esac && \
|
|
/opt/conda/bin/conda clean -ya
|
|
|
|
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
|
|
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
|
|
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
|
|
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null
|
|
|
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
|
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
|
|
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit xpu-smi cmake ninja-build pciutils
|
|
|
|
# Text Generation Inference base env
|
|
ENV HF_HOME=/data \
|
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
PORT=80
|
|
|
|
|
|
WORKDIR /usr/src
|
|
RUN pip install torch==2.3.1+cxx11.abi torchvision==0.18.1+cxx11.abi torchaudio==2.3.1+cxx11.abi intel-extension-for-pytorch==2.3.110+xpu oneccl_bind_pt==2.3.100+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ --no-cache-dir
|
|
RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
|
|
|
|
# Install server
|
|
COPY proto proto
|
|
COPY server server
|
|
COPY server/Makefile server/Makefile
|
|
RUN cd server && \
|
|
make gen-server && \
|
|
pip install -r requirements_intel.txt && \
|
|
pip install ".[accelerate, peft, outlines]" --no-cache-dir
|
|
|
|
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
|
|
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
|
|
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
|
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
|
|
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:/opt/conda/lib
|
|
ENV PATH=/opt/conda/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
|
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
|
|
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
|
|
ENV TORCH_LLM_ALLREDUCE=1
|
|
ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
|
|
|
|
# Install benchmarker
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
|
# Install router
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
|
# Install launcher
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
|
|
|
|
|
# Text Generation Inference base image for Intel-cpu
|
|
FROM ubuntu:22.04 AS cpu
|
|
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
curl \
|
|
ca-certificates \
|
|
make \
|
|
g++-12 \
|
|
gcc-12 \
|
|
git \
|
|
wget \
|
|
cmake \
|
|
libnuma-dev
|
|
|
|
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 12
|
|
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
|
|
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
|
|
RUN update-alternatives --set cc /usr/bin/gcc
|
|
|
|
RUN update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
|
|
RUN update-alternatives --set c++ /usr/bin/g++
|
|
|
|
|
|
ENV HUGGINGFACE_HUB_CACHE=/data \
|
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
PORT=80
|
|
|
|
ARG MAMBA_VERSION=23.1.0-1
|
|
ARG PYTHON_VERSION='3.11.10'
|
|
# Automatically set by buildx
|
|
ARG TARGETPLATFORM
|
|
ENV PATH /opt/conda/bin:$PATH
|
|
|
|
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
|
|
# Install mamba
|
|
# translating Docker's TARGETPLATFORM into mamba arches
|
|
RUN case ${TARGETPLATFORM} in \
|
|
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
|
|
*) MAMBA_ARCH=x86_64 ;; \
|
|
esac && \
|
|
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
|
RUN chmod +x ~/mambaforge.sh && \
|
|
bash ~/mambaforge.sh -b -p /opt/conda && \
|
|
rm ~/mambaforge.sh
|
|
|
|
RUN case ${TARGETPLATFORM} in \
|
|
"linux/arm64") exit 1 ;; \
|
|
*) /opt/conda/bin/conda update -y conda && \
|
|
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
|
|
esac && \
|
|
/opt/conda/bin/conda clean -ya
|
|
|
|
RUN conda install -c conda-forge gperftools mkl
|
|
|
|
|
|
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
|
|
|
|
RUN pip install triton py-libnuma
|
|
|
|
WORKDIR /usr/src
|
|
|
|
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout f86e93e4890dc2c989024d148d415c9aa8a1649f
|
|
RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
|
|
|
|
RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install
|
|
|
|
RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
|
|
|
|
ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
|
|
ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
|
ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
|
|
ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
|
|
ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
|
|
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
|
|
|
|
# Install server
|
|
COPY proto proto
|
|
COPY server server
|
|
COPY server/Makefile server/Makefile
|
|
RUN cd server && \
|
|
make gen-server && \
|
|
pip install -r requirements_intel.txt && \
|
|
pip install ".[accelerate, peft, outlines]" --no-cache-dir
|
|
|
|
# Install benchmarker
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
|
# Install router
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
|
# Install launcher
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
|
|
|
FROM ${PLATFORM} AS final
|
|
ENV ATTENTION=paged
|
|
ENV PREFIX_CACHING=0
|
|
ENV PREFILL_CHUNKING=0
|
|
ENV CUDA_GRAPHS=0
|
|
ENTRYPOINT ["text-generation-launcher"]
|
|
CMD ["--json-output"]
|