From 170c912d71c8141b49824bbb6b86e5ba481ea576 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Wed, 6 Sep 2023 23:32:43 -0600 Subject: [PATCH] reorganize dockerfile --- other/non-avx tgi docker/Dockerfile | 30 +++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/other/non-avx tgi docker/Dockerfile b/other/non-avx tgi docker/Dockerfile index 528e54b..9855f9c 100644 --- a/other/non-avx tgi docker/Dockerfile +++ b/other/non-avx tgi docker/Dockerfile @@ -36,6 +36,7 @@ COPY launcher launcher RUN cargo build --release # ============================================================================== +# Build PyTorch # Python builder # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile @@ -103,12 +104,15 @@ WORKDIR /pytorch # Write the Pytorch version into the version.txt file because it isn't always the same as the tag we checked out RUN echo $PYTORCH_VERSION > version.txt -RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython mkl mkl-include cudnn && \ - /opt/conda/bin/conda install -c pytorch magma-cuda118 && \ - /opt/conda/bin/python -mpip install -r requirements.txt && \ - /opt/conda/bin/conda clean -ya +RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython && \ + /opt/conda/bin/python -mpip install -r requirements.txt + +# Install things for building PyTorch +RUN /opt/conda/bin/conda install -y mkl mkl-include cudnn && \ + /opt/conda/bin/conda install -c pytorch magma-cuda118 # https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/reqs/train-conda-build.requirements.txt +# TODO: remove what we don't need RUN /opt/conda/bin/conda install -y \ jemalloc \ astunparse \ @@ -133,6 +137,8 @@ RUN /opt/conda/bin/conda install -y \ types-dataclasses \ typing-extensions +RUN /opt/conda/bin/conda clean -ya + # Use Intel OpenMP with optimizations. See the documentation for details. # https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html # Intel OpenMP thread blocking time in ms. @@ -185,12 +191,10 @@ RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" RUN nm -D /opt/conda/lib/python3.9/site-packages/torch/lib/libtorch.so # ============================================================================== +# Set up the kernel-builder FROM pytorch-install as kernel-builder -# Copy installed PyTorch from Conda environment -# COPY --from=pytorch-install /opt/conda /opt/conda - RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ ninja-build \ && rm -rf /var/lib/apt/lists/* @@ -199,8 +203,8 @@ RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \ /opt/conda/bin/conda clean -ya # ============================================================================== - # Build Flash Attention CUDA kernels + FROM kernel-builder as flash-att-builder WORKDIR /usr/src COPY server/Makefile-flash-att Makefile @@ -209,8 +213,8 @@ COPY server/Makefile-flash-att Makefile RUN MAX_JOBS=5 make build-flash-attention # ============================================================================== - # Build Flash Attention v2 CUDA kernels + FROM kernel-builder as flash-att-v2-builder WORKDIR /usr/src COPY server/Makefile-flash-att-v2 Makefile @@ -219,8 +223,8 @@ COPY server/Makefile-flash-att-v2 Makefile RUN MAX_JOBS=10 make build-flash-attention-v2 # ============================================================================== - # Build Transformers exllama kernels + FROM kernel-builder as exllama-kernels-builder WORKDIR /usr/src COPY server/exllama_kernels/ . @@ -229,8 +233,8 @@ COPY server/exllama_kernels/ . RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build # ============================================================================== - # Build Transformers CUDA kernels + FROM kernel-builder as custom-kernels-builder WORKDIR /usr/src COPY server/custom_kernels/ . @@ -239,8 +243,8 @@ COPY server/custom_kernels/ . RUN MAX_JOBS=5 python setup.py build # ============================================================================== - # Build vllm CUDA kernels + FROM kernel-builder as vllm-builder WORKDIR /usr/src COPY server/Makefile-vllm Makefile @@ -308,6 +312,8 @@ RUN cd server && \ pip install ".[bnb, accelerate, quantize]" --no-cache-dir && \ pip install optimum auto-gptq +RUN /opt/conda/bin/conda clean -ya + # Fix the error # /opt/conda/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32 RUN cp /opt/conda/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda118.so /opt/conda/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cpu.so