From b2b6cdabaa6b4892f4133d9e9f17912c24003f3e Mon Sep 17 00:00:00 2001
From: Cyberes <cyberes@evulid.cc>
Date: Wed, 6 Sep 2023 12:01:32 -0600
Subject: [PATCH] still working on dockerfile

---
 other/non-avx tgi docker/Dockerfile | 41 +++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/other/non-avx tgi docker/Dockerfile b/other/non-avx tgi docker/Dockerfile
index 3f8e549..4ed39b8 100644
--- a/other/non-avx tgi docker/Dockerfile	
+++ b/other/non-avx tgi docker/Dockerfile	
@@ -40,7 +40,7 @@ RUN cargo build --release
 # Python builder
 # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
 # FROM debian:bullseye-slim as pytorch-install
-FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as pytorch-install
+FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-install
 
 ARG PYTORCH_VERSION=2.0.1
 ARG PYTHON_VERSION=3.9
@@ -88,7 +88,11 @@ RUN git clone --recursive https://github.com/pytorch/pytorch && \
 
 WORKDIR /pytorch
 
-RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
+# Write the Pytorch version into the version.txt file because it isn't always the same as the tag
+RUN echo $PYTORCH_VERSION > version.txt
+
+RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython mkl mkl-include && \
+     /opt/conda/bin/conda install -c pytorch magma-cuda118 && \
     /opt/conda/bin/python -mpip install -r requirements.txt && \
     /opt/conda/bin/conda clean -ya
 
@@ -102,8 +106,12 @@ RUN python setup.py clean && \
     cd .. && \
     rm -rf pytorch
 
+#     BUILD_TEST=0 \
+
 # Make sure we built everything properly. Build will fail if CUDA isn't available.
-RUN python -c "import torch; exit(1 if not torch.cuda.is_available() else 0)"
+RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
+
+# RUN pip freeze | grep "torch"
 
 # ==============================================================================
 
@@ -127,7 +135,7 @@ WORKDIR /usr/src
 COPY server/Makefile-flash-att Makefile
 
 # Build specific version of flash attention
-RUN MAX_WORKERS=4 make build-flash-attention
+RUN MAX_JOBS=5 make build-flash-attention
 
 # ==============================================================================
 
@@ -137,7 +145,7 @@ WORKDIR /usr/src
 COPY server/Makefile-flash-att-v2 Makefile
 
 # Build specific version of flash attention v2
-RUN MAX_WORKERS=4 make build-flash-attention-v2
+RUN MAX_JOBS=10 make build-flash-attention-v2
 
 # ==============================================================================
 
@@ -157,7 +165,7 @@ WORKDIR /usr/src
 COPY server/custom_kernels/ .
 
 # Build specific version of transformers
-RUN python setup.py build
+RUN BUILD_EXTENSIONS=True MAX_JOBS=5 python setup.py build
 
 # ==============================================================================
 
@@ -167,12 +175,13 @@ WORKDIR /usr/src
 COPY server/Makefile-vllm Makefile
 
 # Build specific version of vllm
-RUN MAX_WORKERS=4 make build-vllm
+RUN MAX_JOBS=5 make build-vllm
 
 # ==============================================================================
 
 # Text Generation Inference base image
-FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
+# nvidia/cuda:11.8.0-base-ubuntu20.04
+FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as base
 
 # Conda env
 ENV PATH=/opt/conda/bin:$PATH \
@@ -204,12 +213,15 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86
 
 # Copy build artifacts from custom kernels builder
 COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
+
 # Copy build artifacts from exllama kernels builder
 COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
 
 # Copy builds artifacts from vllm builder
 COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
 
+RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
+
 # Install flash-attention dependencies
 RUN pip install einops --no-cache-dir
 
@@ -217,10 +229,17 @@ RUN pip install einops --no-cache-dir
 COPY proto proto
 COPY server server
 COPY server/Makefile server/Makefile
+
 RUN cd server && \
     make gen-server && \
-    pip install -r requirements.txt && \
-    pip install ".[bnb, accelerate, quantize]" --no-cache-dir
+    sed -i '/torch/d' requirements.txt && \
+    pip install -r requirements.txt
+
+RUN pip freeze | grep torch
+
+RUN cd server && \
+    pip install ".[bnb, accelerate, quantize]" --no-cache-dir && \
+    pip install optimum auto-gptq
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
@@ -245,6 +264,6 @@ ENTRYPOINT ["./entrypoint.sh"]
 # ==============================================================================
 
 # Final image
-FROM text-generation-inference-base
+FROM base
 ENTRYPOINT ["text-generation-launcher"]
 CMD ["--json-output"]