diff --git a/other/non-avx tgi docker/Dockerfile b/other/non-avx tgi docker/Dockerfile index 9855f9c..378c9e3 100644 --- a/other/non-avx tgi docker/Dockerfile +++ b/other/non-avx tgi docker/Dockerfile @@ -1,3 +1,7 @@ +# syntax = docker/dockerfile:experimental + +# DOCKER_BUILDKIT=1 DOCKER_CLI_EXPERIMENTAL=enabled + # Rust builder FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef WORKDIR /usr/src @@ -40,16 +44,13 @@ RUN cargo build --release # Python builder # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile -# FROM debian:bullseye-slim as pytorch-install -FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-install +FROM ubuntu:20.04 as dev-base +# FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-build ARG PYTORCH_VERSION=2.0.1 ARG PYTHON_VERSION=3.9 # Keep in sync with `server/pyproject.toml ARG CUDA_VERSION=11.8 -ARG MAMBA_VERSION=23.1.0-1 -ARG CUDA_CHANNEL=nvidia -ARG INSTALL_CHANNEL=pytorch # Automatically set by buildx ARG TARGETPLATFORM @@ -58,24 +59,26 @@ ENV PATH /opt/conda/bin:$PATH RUN apt-get update +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gnupg2 + # Add new repo to install gcc 11 on Ubuntu 20.04 RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main" >> /etc/apt/sources.list && \ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 60C317803A41BA51845E371A1E9377A2BA9EF27F && \ apt-get update -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - gcc-11 \ - g++-11 \ - build-essential \ - ca-certificates \ - ccache \ - curl \ - git \ - cmake \ - libjpeg-dev \ - libpng-dev \ - ninja-build \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + gcc-11 \ + g++-11 \ + build-essential \ + ca-certificates \ + ccache \ + cmake \ + curl \ + git \ + ninja-build \ + libjpeg-dev \ + libpng-dev && \ + rm -rf /var/lib/apt/lists/* RUN /usr/sbin/update-ccache-symlinks RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache ENV PATH /opt/conda/bin:$PATH @@ -84,38 +87,43 @@ ENV PATH /opt/conda/bin:$PATH RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 60 --slave /usr/bin/g++ g++ /usr/bin/g++-11 # Install conda -# translating Docker's TARGETPLATFORM into mamba arches +FROM dev-base as pytorch-build +# Automatically set by buildx +ARG TARGETPLATFORM +# translating Docker's TARGETPLATFORM into miniconda arches RUN case ${TARGETPLATFORM} in \ - "linux/arm64") MAMBA_ARCH=aarch64 ;; \ - *) MAMBA_ARCH=x86_64 ;; \ + "linux/arm64") MINICONDA_ARCH=aarch64 ;; \ + *) MINICONDA_ARCH=x86_64 ;; \ esac && \ - curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" -RUN chmod +x ~/mambaforge.sh && \ - bash ~/mambaforge.sh -b -p /opt/conda && \ - rm ~/mambaforge.sh + curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh" + +# Manually invoke bash on miniconda script per https://github.com/conda/conda/issues/10431 +RUN chmod +x ~/miniconda.sh && \ + bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh RUN git clone --recursive https://github.com/pytorch/pytorch && \ cd pytorch && \ - git checkout v${PYTORCH_VERSION} && \ - git submodule update --init --recursive + git checkout v${PYTORCH_VERSION} WORKDIR /pytorch +RUN git submodule update --init --recursive + # Write the Pytorch version into the version.txt file because it isn't always the same as the tag we checked out RUN echo $PYTORCH_VERSION > version.txt -RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython && \ - /opt/conda/bin/python -mpip install -r requirements.txt - # Install things for building PyTorch RUN /opt/conda/bin/conda install -y mkl mkl-include cudnn && \ - /opt/conda/bin/conda install -c pytorch magma-cuda118 + /opt/conda/bin/conda install libgcc && \ + /opt/conda/bin/conda install -c conda-forge libstdcxx-ng=12 && \ + /opt/conda/bin/conda install -c pytorch magma-cuda118 && \ + /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 # https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/reqs/train-conda-build.requirements.txt # TODO: remove what we don't need RUN /opt/conda/bin/conda install -y \ jemalloc \ - astunparse \ ccache \ cmake \ expecttest \ @@ -124,7 +132,6 @@ RUN /opt/conda/bin/conda install -y \ git \ hypothesis \ jinja2 \ - libjpeg-turbo \ libpng \ networkx \ ninja \ @@ -134,7 +141,6 @@ RUN /opt/conda/bin/conda install -y \ requests \ setuptools \ sympy \ - types-dataclasses \ typing-extensions RUN /opt/conda/bin/conda clean -ya @@ -151,13 +157,21 @@ ENV LD_PRELOAD=/opt/conda/lib/libiomp5.so:${LD_PRELOAD} ENV LD_PRELOAD=/opt/conda/lib/libjemalloc.so:${LD_PRELOAD} ENV MALLOC_CONF="background_thread:true,metadata_thp:auto,dirty_decay_ms:30000,muzzy_decay_ms:30000" +RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \ + /opt/conda/bin/python -mpip install -r requirements.txt +RUN /opt/conda/bin/conda clean -ya + +RUN cat version.txt + +RUN make triton + # Install PyTorch without AVX2 # https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/docker-compose.yaml#L124 # print(torch.__config__.show().split("\n"), sep="\n") RUN --mount=type=cache,target=/opt/ccache \ python setup.py clean && \ - - BLAS_INFO=mklBUILD_TYPE=Release \ + BLAS_INFO=mkl \ + BUILD_TYPE=Release \ CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow" \ LAPACK_INFO=mkl \ PERF_WITH_AVX=1 \ @@ -177,35 +191,24 @@ RUN --mount=type=cache,target=/opt/ccache \ USE_NNPACK=ON \ USE_OPENMP=ON \ USE_ROCM=OFF \ - BUILD_TEST=0 \ - TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ - CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ - CMAKE_ARGS='-DDISABLE_AVX2:BOOL=TRUE -DCXX_AVX2_FOUND:BOOL=FALSE -DC_AVX2_FOUND:BOOL=FALSE -DDISABLE_AVX512F:BOOL=TRUE' \ - python setup.py install && \ - cd .. && \ - rm -rf pytorch + BUILD_TEST=0 \ + CMAKE_ARGS='-DDISABLE_AVX2:BOOL=TRUE -DCXX_AVX2_FOUND:BOOL=FALSE -DC_AVX2_FOUND:BOOL=FALSE -DDISABLE_AVX512F:BOOL=TRUE' \ + TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \ + CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \ + python setup.py install && \ + cd .. && \ + rm -rf pytorch + # Make sure we built everything properly. Build will fail if CUDA isn't available. -RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" +# RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" RUN nm -D /opt/conda/lib/python3.9/site-packages/torch/lib/libtorch.so -# ============================================================================== -# Set up the kernel-builder - -FROM pytorch-install as kernel-builder - -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - ninja-build \ - && rm -rf /var/lib/apt/lists/* - -RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \ - /opt/conda/bin/conda clean -ya - # ============================================================================== # Build Flash Attention CUDA kernels -FROM kernel-builder as flash-att-builder +FROM pytorch-build as flash-att-builder WORKDIR /usr/src COPY server/Makefile-flash-att Makefile @@ -215,7 +218,7 @@ RUN MAX_JOBS=5 make build-flash-attention # ============================================================================== # Build Flash Attention v2 CUDA kernels -FROM kernel-builder as flash-att-v2-builder +FROM pytorch-build as flash-att-v2-builder WORKDIR /usr/src COPY server/Makefile-flash-att-v2 Makefile @@ -225,7 +228,7 @@ RUN MAX_JOBS=10 make build-flash-attention-v2 # ============================================================================== # Build Transformers exllama kernels -FROM kernel-builder as exllama-kernels-builder +FROM pytorch-build as exllama-kernels-builder WORKDIR /usr/src COPY server/exllama_kernels/ . @@ -235,7 +238,7 @@ RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build # ============================================================================== # Build Transformers CUDA kernels -FROM kernel-builder as custom-kernels-builder +FROM pytorch-build as custom-kernels-builder WORKDIR /usr/src COPY server/custom_kernels/ . @@ -245,7 +248,7 @@ RUN MAX_JOBS=5 python setup.py build # ============================================================================== # Build vllm CUDA kernels -FROM kernel-builder as vllm-builder +FROM pytorch-build as vllm-builder WORKDIR /usr/src COPY server/Makefile-vllm Makefile @@ -256,7 +259,7 @@ RUN MAX_JOBS=5 make build-vllm # Text Generation Inference base image # nvidia/cuda:11.8.0-base-ubuntu20.04 -FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as base +FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base # Conda env ENV PATH=/opt/conda/bin:$PATH \ @@ -277,7 +280,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins && rm -rf /var/lib/apt/lists/* # Copy conda with PyTorch installed -COPY --from=pytorch-install /opt/conda /opt/conda +COPY --from=pytorch-build /opt/conda /opt/conda # Copy build artifacts from flash attention builder COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages @@ -334,8 +337,8 @@ RUN python3 -c "import torch; import text_generation_server.utils.gptq.exllama" # Make sure our special dependencies were compiled and copied correctly -RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" -RUN python -c "import torch; torch.cuda.is_available()" +# RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)" +# RUN python -c "import torch; torch.cuda.is_available()" RUN python -c "import torch; import flash_attn_2_cuda" RUN python -c "import torch; import flash_attn_cuda" RUN python -c "import torch; import vllm_cache_ops"