2024-06-17 08:40:44 -06:00
|
|
|
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
2024-04-26 07:48:58 -06:00
|
|
|
WORKDIR /usr/src
|
|
|
|
|
|
|
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
|
|
|
|
|
|
|
FROM chef as planner
|
|
|
|
COPY Cargo.toml Cargo.toml
|
|
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
|
|
COPY proto proto
|
|
|
|
COPY benchmark benchmark
|
|
|
|
COPY router router
|
|
|
|
COPY launcher launcher
|
|
|
|
RUN cargo chef prepare --recipe-path recipe.json
|
|
|
|
|
|
|
|
FROM chef AS builder
|
|
|
|
|
|
|
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|
|
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
|
|
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
|
|
|
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
|
|
|
|
rm -f $PROTOC_ZIP
|
|
|
|
|
|
|
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
2024-06-05 04:18:38 -06:00
|
|
|
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
|
2024-04-26 07:48:58 -06:00
|
|
|
|
2024-06-06 10:51:42 -06:00
|
|
|
ARG GIT_SHA
|
|
|
|
ARG DOCKER_LABEL
|
|
|
|
|
2024-04-26 07:48:58 -06:00
|
|
|
COPY Cargo.toml Cargo.toml
|
|
|
|
COPY rust-toolchain.toml rust-toolchain.toml
|
|
|
|
COPY proto proto
|
|
|
|
COPY benchmark benchmark
|
|
|
|
COPY router router
|
|
|
|
COPY launcher launcher
|
2024-06-05 04:18:38 -06:00
|
|
|
RUN cargo build --profile release-opt
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
|
|
|
|
# Text Generation Inference base image for Intel
|
2024-05-06 08:05:43 -06:00
|
|
|
FROM intel/intel-extension-for-pytorch:2.1.30-xpu as base
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
USER root
|
|
|
|
# libssl.so.1.1 is not installed on Ubuntu 22.04 by default, install it
|
|
|
|
RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \
|
|
|
|
dpkg -i ./libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
|
|
|
|
2024-05-23 06:11:08 -06:00
|
|
|
RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
|
|
|
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
|
|
|
|
|
2024-06-06 11:12:57 -06:00
|
|
|
RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev ninja-build
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
# Text Generation Inference base env
|
|
|
|
ENV HUGGINGFACE_HUB_CACHE=/data \
|
|
|
|
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
|
|
|
PORT=80
|
|
|
|
|
|
|
|
|
|
|
|
WORKDIR /usr/src
|
2024-06-06 11:12:57 -06:00
|
|
|
RUN wget https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/xpu/torch-2.1.0.post1%2Bcxx11.abi-cp310-cp310-linux_x86_64.whl && pip install torch-2.1.0.post1+cxx11.abi-cp310-cp310-linux_x86_64.whl
|
|
|
|
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b group_rope origin/dev/gqa_rope
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
# Install server
|
|
|
|
COPY proto proto
|
|
|
|
COPY server server
|
|
|
|
COPY server/Makefile server/Makefile
|
|
|
|
RUN cd server && \
|
|
|
|
make gen-server && \
|
2024-06-03 08:07:50 -06:00
|
|
|
pip install -r requirements_intel.txt && \
|
2024-04-26 07:48:58 -06:00
|
|
|
pip install ".[accelerate, peft, outlines]" --no-cache-dir
|
|
|
|
|
|
|
|
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
|
|
|
|
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
|
|
|
|
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
|
|
|
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
|
|
|
|
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:
|
|
|
|
ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
|
|
|
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
2024-06-06 11:12:57 -06:00
|
|
|
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
|
|
|
|
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
|
|
|
|
|
|
|
|
RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=OFF BUILD_WITH_CPU=OFF USE_XETLA=ON python setup.py install && rm -rf /usr/src/intel-extension-for-pytorch
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
# Install benchmarker
|
2024-06-05 04:18:38 -06:00
|
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
2024-04-26 07:48:58 -06:00
|
|
|
# Install router
|
2024-06-05 04:18:38 -06:00
|
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
2024-04-26 07:48:58 -06:00
|
|
|
# Install launcher
|
2024-06-05 04:18:38 -06:00
|
|
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
2024-04-26 07:48:58 -06:00
|
|
|
|
|
|
|
# Final image
|
|
|
|
FROM base
|
|
|
|
|
|
|
|
ENTRYPOINT ["text-generation-launcher"]
|
|
|
|
CMD ["--json-output"]
|