2024-06-25 04:21:29 -06:00
ARG PLATFORM=xpu
2024-10-08 01:42:50 -06:00
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
2024-04-26 07:48:58 -06:00
WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
2024-07-03 04:48:45 -06:00
FROM chef AS planner
2024-06-24 10:16:36 -06:00
COPY Cargo.lock Cargo.lock
2024-04-26 07:48:58 -06:00
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
2024-07-31 02:33:10 -06:00
COPY backends backends
2024-04-26 07:48:58 -06:00
COPY launcher launcher
RUN cargo chef prepare --recipe-path recipe.json
FROM chef AS builder
2024-09-11 14:41:56 -06:00
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python3.11-dev
2024-04-26 07:48:58 -06:00
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP
COPY --from=planner /usr/src/recipe.json recipe.json
2024-06-05 04:18:38 -06:00
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
2024-04-26 07:48:58 -06:00
2024-06-06 10:51:42 -06:00
ARG GIT_SHA
ARG DOCKER_LABEL
2024-10-08 01:42:50 -06:00
COPY Cargo.lock Cargo.lock
2024-04-26 07:48:58 -06:00
COPY Cargo.toml Cargo.toml
COPY rust-toolchain.toml rust-toolchain.toml
COPY proto proto
COPY benchmark benchmark
COPY router router
2024-07-31 02:33:10 -06:00
COPY backends backends
2024-04-26 07:48:58 -06:00
COPY launcher launcher
2024-10-08 01:42:50 -06:00
RUN cargo build --profile release-opt --frozen
2024-04-26 07:48:58 -06:00
# Text Generation Inference base image for Intel
2024-06-25 04:21:29 -06:00
2024-09-12 09:23:49 -06:00
FROM intel/intel-extension-for-pytorch:2.3.110-xpu AS xpu
2024-04-26 07:48:58 -06:00
USER root
2024-09-12 09:23:49 -06:00
ARG MAMBA_VERSION=23.1.0-1
ARG PYTHON_VERSION='3.11.10'
# Automatically set by buildx
ARG TARGETPLATFORM
2024-10-08 01:42:50 -06:00
ENV PATH=/opt/conda/bin:$PATH
2024-09-12 09:23:49 -06:00
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
bash ~/mambaforge.sh -b -p /opt/conda && \
rm ~/mambaforge.sh
RUN case ${TARGETPLATFORM} in \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
esac && \
/opt/conda/bin/conda clean -ya
2024-04-26 07:48:58 -06:00
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
2024-05-23 06:11:08 -06:00
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null
2024-04-26 07:48:58 -06:00
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
2024-11-18 09:16:55 -07:00
RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" > /tmp/intel-for-pytorch-gpu-dev.list
RUN mv /tmp/intel-for-pytorch-gpu-dev.list /etc/apt/sources.list.d
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit=2024.2.1-98 xpu-smi cmake ninja-build pciutils intel-pti-dev-0.9
2024-04-26 07:48:58 -06:00
# Text Generation Inference base env
2024-08-09 06:25:44 -06:00
ENV HF_HOME=/data \
2024-04-26 07:48:58 -06:00
HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=80
2024-11-18 09:16:55 -07:00
2024-04-26 07:48:58 -06:00
WORKDIR /usr/src
2024-11-18 09:16:55 -07:00
RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torch-2.5.0a0%2Bgite84e33f-cp311-cp311-linux_x86_64.whl --no-cache-dir
RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchaudio-2.5.0a0%2B56bc006-cp311-cp311-linux_x86_64.whl --no-cache-dir
RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/torchvision-0.20.0a0%2B8e8a208-cp311-cp311-linux_x86_64.whl --no-cache-dir
RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.5.10%2Bgit9d489a8-cp311-cp311-linux_x86_64.whl --no-cache-dir
RUN pip install https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/oneccl_bind_pt-2.5.0%2Bxpu-cp311-cp311-linux_x86_64.whl --no-cache-dir
2024-10-30 07:18:50 -06:00
RUN pip install triton-xpu==3.0.0b2 --no-cache-dir
2024-04-26 07:48:58 -06:00
# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
2024-06-03 08:07:50 -06:00
pip install -r requirements_intel.txt && \
2024-11-10 05:54:07 -07:00
pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
2024-04-26 07:48:58 -06:00
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
2024-11-18 09:16:55 -07:00
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
2024-09-12 09:23:49 -06:00
ENV PATH=/opt/conda/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
2024-04-26 07:48:58 -06:00
ENV CCL_ZE_IPC_EXCHANGE=sockets
2024-06-06 11:12:57 -06:00
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
2024-11-18 09:16:55 -07:00
#ENV TORCH_LLM_ALLREDUCE=1
#ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
2024-06-06 11:12:57 -06:00
2024-04-26 07:48:58 -06:00
# Install benchmarker
2024-06-05 04:18:38 -06:00
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
2024-04-26 07:48:58 -06:00
# Install router
2024-06-05 04:18:38 -06:00
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
2024-04-26 07:48:58 -06:00
# Install launcher
2024-06-05 04:18:38 -06:00
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
2024-04-26 07:48:58 -06:00
2024-06-25 04:21:29 -06:00
# Text Generation Inference base image for Intel-cpu
2024-07-03 04:48:45 -06:00
FROM ubuntu:22.04 AS cpu
2024-06-25 04:21:29 -06:00
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl \
ca-certificates \
make \
2024-10-14 08:32:33 -06:00
g++-12 \
gcc-12 \
2024-06-25 04:21:29 -06:00
git \
wget \
2024-08-13 07:33:55 -06:00
cmake \
libnuma-dev
2024-06-25 04:21:29 -06:00
2024-10-14 08:32:33 -06:00
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 12
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 12
RUN update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30
RUN update-alternatives --set cc /usr/bin/gcc
RUN update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30
RUN update-alternatives --set c++ /usr/bin/g++
2024-06-25 04:21:29 -06:00
ENV HUGGINGFACE_HUB_CACHE=/data \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=80
ARG MAMBA_VERSION=23.1.0-1
2024-09-11 14:41:56 -06:00
ARG PYTHON_VERSION='3.11.10'
2024-06-25 04:21:29 -06:00
# Automatically set by buildx
ARG TARGETPLATFORM
ENV PATH /opt/conda/bin:$PATH
# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
# Install mamba
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
bash ~/mambaforge.sh -b -p /opt/conda && \
rm ~/mambaforge.sh
2024-09-12 09:23:49 -06:00
RUN case ${TARGETPLATFORM} in \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
esac && \
/opt/conda/bin/conda clean -ya
2024-06-25 04:21:29 -06:00
RUN conda install -c conda-forge gperftools mkl
2024-10-14 08:32:33 -06:00
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torch-2.5.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchvision-0.20.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
RUN pip install https://download.pytorch.org/whl/nightly/cpu/torchaudio-2.4.0.dev20240815%2Bcpu-cp311-cp311-linux_x86_64.whl
2024-09-12 09:23:49 -06:00
RUN pip install triton py-libnuma
2024-06-25 04:21:29 -06:00
WORKDIR /usr/src
2024-11-18 09:16:55 -07:00
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout 2e1c98f74ec1b35ad8dd1ebe7dd4b25470f2fd41
2024-10-14 08:32:33 -06:00
RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
2024-06-25 04:21:29 -06:00
RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install
RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
2024-08-13 07:33:55 -06:00
ENV LD_PRELOAD=/opt/conda/lib/libtcmalloc.so
2024-09-12 09:23:49 -06:00
ENV CCL_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
ENV I_MPI_ROOT=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch
ENV FI_PROVIDER_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib/prov:/usr/lib64/libfabric
ENV LD_LIBRARY_PATH=/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/opt/mpi/libfabric/lib:/opt/conda/lib/python3.11/site-packages/oneccl_bindings_for_pytorch/lib
2024-09-11 14:41:56 -06:00
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib/"
2024-06-25 04:21:29 -06:00
# Install server
COPY proto proto
COPY server server
COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_intel.txt && \
2024-11-20 07:17:47 -07:00
pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
2024-06-25 04:21:29 -06:00
# Install benchmarker
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
# Install router
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
# Install launcher
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
2024-07-03 04:48:45 -06:00
FROM ${PLATFORM} AS final
2024-09-05 09:41:39 -06:00
ENV ATTENTION=paged
2024-10-16 04:49:33 -06:00
ENV PREFIX_CACHING=0
ENV PREFILL_CHUNKING=0
2024-09-05 09:41:39 -06:00
ENV CUDA_GRAPHS=0
2024-04-26 07:48:58 -06:00
ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"]