From b2b6cdabaa6b4892f4133d9e9f17912c24003f3e Mon Sep 17 00:00:00 2001 From: Cyberes Date: Wed, 6 Sep 2023 12:01:32 -0600 Subject: [PATCH] still working on dockerfile --- other/non-avx tgi docker/Dockerfile | 41 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/other/non-avx tgi docker/Dockerfile b/other/non-avx tgi docker/Dockerfile index 3f8e549..4ed39b8 100644 --- a/other/non-avx tgi docker/Dockerfile +++ b/other/non-avx tgi docker/Dockerfile @@ -40,7 +40,7 @@ RUN cargo build --release # Python builder # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile # FROM debian:bullseye-slim as pytorch-install -FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as pytorch-install +FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-install ARG PYTORCH_VERSION=2.0.1 ARG PYTHON_VERSION=3.9 @@ -88,7 +88,11 @@ RUN git clone --recursive https://github.com/pytorch/pytorch && \ WORKDIR /pytorch -RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \ +# Write the Pytorch version into the version.txt file because it isn't always the same as the tag +RUN echo $PYTORCH_VERSION > version.txt + +RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython mkl mkl-include && \ + /opt/conda/bin/conda install -c pytorch magma-cuda118 && \ /opt/conda/bin/python -mpip install -r requirements.txt && \ /opt/conda/bin/conda clean -ya @@ -102,8 +106,12 @@ RUN python setup.py clean && \ cd .. && \ rm -rf pytorch +# BUILD_TEST=0 \ + # Make sure we built everything properly. Build will fail if CUDA isn't available. -RUN python -c "import torch; exit(1 if not torch.cuda.is_available() else 0)" +RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" + +# RUN pip freeze | grep "torch" # ============================================================================== @@ -127,7 +135,7 @@ WORKDIR /usr/src COPY server/Makefile-flash-att Makefile # Build specific version of flash attention -RUN MAX_WORKERS=4 make build-flash-attention +RUN MAX_JOBS=5 make build-flash-attention # ============================================================================== @@ -137,7 +145,7 @@ WORKDIR /usr/src COPY server/Makefile-flash-att-v2 Makefile # Build specific version of flash attention v2 -RUN MAX_WORKERS=4 make build-flash-attention-v2 +RUN MAX_JOBS=10 make build-flash-attention-v2 # ============================================================================== @@ -157,7 +165,7 @@ WORKDIR /usr/src COPY server/custom_kernels/ . # Build specific version of transformers -RUN python setup.py build +RUN BUILD_EXTENSIONS=True MAX_JOBS=5 python setup.py build # ============================================================================== @@ -167,12 +175,13 @@ WORKDIR /usr/src COPY server/Makefile-vllm Makefile # Build specific version of vllm -RUN MAX_WORKERS=4 make build-vllm +RUN MAX_JOBS=5 make build-vllm # ============================================================================== # Text Generation Inference base image -FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base +# nvidia/cuda:11.8.0-base-ubuntu20.04 +FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as base # Conda env ENV PATH=/opt/conda/bin:$PATH \ @@ -204,12 +213,15 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86 # Copy build artifacts from custom kernels builder COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages + # Copy build artifacts from exllama kernels builder COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages # Copy builds artifacts from vllm builder COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages +RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" + # Install flash-attention dependencies RUN pip install einops --no-cache-dir @@ -217,10 +229,17 @@ RUN pip install einops --no-cache-dir COPY proto proto COPY server server COPY server/Makefile server/Makefile + RUN cd server && \ make gen-server && \ - pip install -r requirements.txt && \ - pip install ".[bnb, accelerate, quantize]" --no-cache-dir + sed -i '/torch/d' requirements.txt && \ + pip install -r requirements.txt + +RUN pip freeze | grep torch + +RUN cd server && \ + pip install ".[bnb, accelerate, quantize]" --no-cache-dir && \ + pip install optimum auto-gptq # Install benchmarker COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark @@ -245,6 +264,6 @@ ENTRYPOINT ["./entrypoint.sh"] # ============================================================================== # Final image -FROM text-generation-inference-base +FROM base ENTRYPOINT ["text-generation-launcher"] CMD ["--json-output"]