still working on dockerfile
This commit is contained in:
parent
cc1db8a0ba
commit
b2b6cdabaa
|
@ -40,7 +40,7 @@ RUN cargo build --release
|
||||||
# Python builder
|
# Python builder
|
||||||
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
||||||
# FROM debian:bullseye-slim as pytorch-install
|
# FROM debian:bullseye-slim as pytorch-install
|
||||||
FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as pytorch-install
|
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-install
|
||||||
|
|
||||||
ARG PYTORCH_VERSION=2.0.1
|
ARG PYTORCH_VERSION=2.0.1
|
||||||
ARG PYTHON_VERSION=3.9
|
ARG PYTHON_VERSION=3.9
|
||||||
|
@ -88,7 +88,11 @@ RUN git clone --recursive https://github.com/pytorch/pytorch && \
|
||||||
|
|
||||||
WORKDIR /pytorch
|
WORKDIR /pytorch
|
||||||
|
|
||||||
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
|
# Write the Pytorch version into the version.txt file because it isn't always the same as the tag
|
||||||
|
RUN echo $PYTORCH_VERSION > version.txt
|
||||||
|
|
||||||
|
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython mkl mkl-include && \
|
||||||
|
/opt/conda/bin/conda install -c pytorch magma-cuda118 && \
|
||||||
/opt/conda/bin/python -mpip install -r requirements.txt && \
|
/opt/conda/bin/python -mpip install -r requirements.txt && \
|
||||||
/opt/conda/bin/conda clean -ya
|
/opt/conda/bin/conda clean -ya
|
||||||
|
|
||||||
|
@ -102,8 +106,12 @@ RUN python setup.py clean && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
rm -rf pytorch
|
rm -rf pytorch
|
||||||
|
|
||||||
|
# BUILD_TEST=0 \
|
||||||
|
|
||||||
# Make sure we built everything properly. Build will fail if CUDA isn't available.
|
# Make sure we built everything properly. Build will fail if CUDA isn't available.
|
||||||
RUN python -c "import torch; exit(1 if not torch.cuda.is_available() else 0)"
|
RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
|
||||||
|
|
||||||
|
# RUN pip freeze | grep "torch"
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
@ -127,7 +135,7 @@ WORKDIR /usr/src
|
||||||
COPY server/Makefile-flash-att Makefile
|
COPY server/Makefile-flash-att Makefile
|
||||||
|
|
||||||
# Build specific version of flash attention
|
# Build specific version of flash attention
|
||||||
RUN MAX_WORKERS=4 make build-flash-attention
|
RUN MAX_JOBS=5 make build-flash-attention
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
@ -137,7 +145,7 @@ WORKDIR /usr/src
|
||||||
COPY server/Makefile-flash-att-v2 Makefile
|
COPY server/Makefile-flash-att-v2 Makefile
|
||||||
|
|
||||||
# Build specific version of flash attention v2
|
# Build specific version of flash attention v2
|
||||||
RUN MAX_WORKERS=4 make build-flash-attention-v2
|
RUN MAX_JOBS=10 make build-flash-attention-v2
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
@ -157,7 +165,7 @@ WORKDIR /usr/src
|
||||||
COPY server/custom_kernels/ .
|
COPY server/custom_kernels/ .
|
||||||
|
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN python setup.py build
|
RUN BUILD_EXTENSIONS=True MAX_JOBS=5 python setup.py build
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
|
@ -167,12 +175,13 @@ WORKDIR /usr/src
|
||||||
COPY server/Makefile-vllm Makefile
|
COPY server/Makefile-vllm Makefile
|
||||||
|
|
||||||
# Build specific version of vllm
|
# Build specific version of vllm
|
||||||
RUN MAX_WORKERS=4 make build-vllm
|
RUN MAX_JOBS=5 make build-vllm
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
|
# nvidia/cuda:11.8.0-base-ubuntu20.04
|
||||||
|
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as base
|
||||||
|
|
||||||
# Conda env
|
# Conda env
|
||||||
ENV PATH=/opt/conda/bin:$PATH \
|
ENV PATH=/opt/conda/bin:$PATH \
|
||||||
|
@ -204,12 +213,15 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86
|
||||||
|
|
||||||
# Copy build artifacts from custom kernels builder
|
# Copy build artifacts from custom kernels builder
|
||||||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
|
||||||
# Copy build artifacts from exllama kernels builder
|
# Copy build artifacts from exllama kernels builder
|
||||||
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
|
||||||
# Copy builds artifacts from vllm builder
|
# Copy builds artifacts from vllm builder
|
||||||
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
|
||||||
|
RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
|
||||||
|
|
||||||
# Install flash-attention dependencies
|
# Install flash-attention dependencies
|
||||||
RUN pip install einops --no-cache-dir
|
RUN pip install einops --no-cache-dir
|
||||||
|
|
||||||
|
@ -217,10 +229,17 @@ RUN pip install einops --no-cache-dir
|
||||||
COPY proto proto
|
COPY proto proto
|
||||||
COPY server server
|
COPY server server
|
||||||
COPY server/Makefile server/Makefile
|
COPY server/Makefile server/Makefile
|
||||||
|
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
make gen-server && \
|
make gen-server && \
|
||||||
pip install -r requirements.txt && \
|
sed -i '/torch/d' requirements.txt && \
|
||||||
pip install ".[bnb, accelerate, quantize]" --no-cache-dir
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
RUN pip freeze | grep torch
|
||||||
|
|
||||||
|
RUN cd server && \
|
||||||
|
pip install ".[bnb, accelerate, quantize]" --no-cache-dir && \
|
||||||
|
pip install optimum auto-gptq
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
|
@ -245,6 +264,6 @@ ENTRYPOINT ["./entrypoint.sh"]
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
|
|
||||||
# Final image
|
# Final image
|
||||||
FROM text-generation-inference-base
|
FROM base
|
||||||
ENTRYPOINT ["text-generation-launcher"]
|
ENTRYPOINT ["text-generation-launcher"]
|
||||||
CMD ["--json-output"]
|
CMD ["--json-output"]
|
||||||
|
|
Reference in New Issue