still working on dockerfile
This commit is contained in:
parent
170c912d71
commit
4c49aa525a
|
@ -1,3 +1,7 @@
|
||||||
|
# syntax = docker/dockerfile:experimental
|
||||||
|
|
||||||
|
# DOCKER_BUILDKIT=1 DOCKER_CLI_EXPERIMENTAL=enabled
|
||||||
|
|
||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
@ -40,16 +44,13 @@ RUN cargo build --release
|
||||||
|
|
||||||
# Python builder
|
# Python builder
|
||||||
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
||||||
# FROM debian:bullseye-slim as pytorch-install
|
FROM ubuntu:20.04 as dev-base
|
||||||
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-install
|
# FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as pytorch-build
|
||||||
|
|
||||||
ARG PYTORCH_VERSION=2.0.1
|
ARG PYTORCH_VERSION=2.0.1
|
||||||
ARG PYTHON_VERSION=3.9
|
ARG PYTHON_VERSION=3.9
|
||||||
# Keep in sync with `server/pyproject.toml
|
# Keep in sync with `server/pyproject.toml
|
||||||
ARG CUDA_VERSION=11.8
|
ARG CUDA_VERSION=11.8
|
||||||
ARG MAMBA_VERSION=23.1.0-1
|
|
||||||
ARG CUDA_CHANNEL=nvidia
|
|
||||||
ARG INSTALL_CHANNEL=pytorch
|
|
||||||
|
|
||||||
# Automatically set by buildx
|
# Automatically set by buildx
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
|
@ -58,24 +59,26 @@ ENV PATH /opt/conda/bin:$PATH
|
||||||
|
|
||||||
RUN apt-get update
|
RUN apt-get update
|
||||||
|
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gnupg2
|
||||||
|
|
||||||
# Add new repo to install gcc 11 on Ubuntu 20.04
|
# Add new repo to install gcc 11 on Ubuntu 20.04
|
||||||
RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main" >> /etc/apt/sources.list && \
|
RUN echo "deb http://ppa.launchpad.net/ubuntu-toolchain-r/test/ubuntu focal main" >> /etc/apt/sources.list && \
|
||||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 60C317803A41BA51845E371A1E9377A2BA9EF27F && \
|
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 60C317803A41BA51845E371A1E9377A2BA9EF27F && \
|
||||||
apt-get update
|
apt-get update
|
||||||
|
|
||||||
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
gcc-11 \
|
gcc-11 \
|
||||||
g++-11 \
|
g++-11 \
|
||||||
build-essential \
|
build-essential \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
ccache \
|
ccache \
|
||||||
|
cmake \
|
||||||
curl \
|
curl \
|
||||||
git \
|
git \
|
||||||
cmake \
|
|
||||||
libjpeg-dev \
|
|
||||||
libpng-dev \
|
|
||||||
ninja-build \
|
ninja-build \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
libjpeg-dev \
|
||||||
|
libpng-dev && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
RUN /usr/sbin/update-ccache-symlinks
|
RUN /usr/sbin/update-ccache-symlinks
|
||||||
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
RUN mkdir /opt/ccache && ccache --set-config=cache_dir=/opt/ccache
|
||||||
ENV PATH /opt/conda/bin:$PATH
|
ENV PATH /opt/conda/bin:$PATH
|
||||||
|
@ -84,38 +87,43 @@ ENV PATH /opt/conda/bin:$PATH
|
||||||
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 60 --slave /usr/bin/g++ g++ /usr/bin/g++-11
|
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 60 --slave /usr/bin/g++ g++ /usr/bin/g++-11
|
||||||
|
|
||||||
# Install conda
|
# Install conda
|
||||||
# translating Docker's TARGETPLATFORM into mamba arches
|
FROM dev-base as pytorch-build
|
||||||
|
# Automatically set by buildx
|
||||||
|
ARG TARGETPLATFORM
|
||||||
|
# translating Docker's TARGETPLATFORM into miniconda arches
|
||||||
RUN case ${TARGETPLATFORM} in \
|
RUN case ${TARGETPLATFORM} in \
|
||||||
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
|
"linux/arm64") MINICONDA_ARCH=aarch64 ;; \
|
||||||
*) MAMBA_ARCH=x86_64 ;; \
|
*) MINICONDA_ARCH=x86_64 ;; \
|
||||||
esac && \
|
esac && \
|
||||||
curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
|
curl -fsSL -v -o ~/miniconda.sh -O "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-${MINICONDA_ARCH}.sh"
|
||||||
RUN chmod +x ~/mambaforge.sh && \
|
|
||||||
bash ~/mambaforge.sh -b -p /opt/conda && \
|
# Manually invoke bash on miniconda script per https://github.com/conda/conda/issues/10431
|
||||||
rm ~/mambaforge.sh
|
RUN chmod +x ~/miniconda.sh && \
|
||||||
|
bash ~/miniconda.sh -b -p /opt/conda && \
|
||||||
|
rm ~/miniconda.sh
|
||||||
|
|
||||||
RUN git clone --recursive https://github.com/pytorch/pytorch && \
|
RUN git clone --recursive https://github.com/pytorch/pytorch && \
|
||||||
cd pytorch && \
|
cd pytorch && \
|
||||||
git checkout v${PYTORCH_VERSION} && \
|
git checkout v${PYTORCH_VERSION}
|
||||||
git submodule update --init --recursive
|
|
||||||
|
|
||||||
WORKDIR /pytorch
|
WORKDIR /pytorch
|
||||||
|
|
||||||
|
RUN git submodule update --init --recursive
|
||||||
|
|
||||||
# Write the Pytorch version into the version.txt file because it isn't always the same as the tag we checked out
|
# Write the Pytorch version into the version.txt file because it isn't always the same as the tag we checked out
|
||||||
RUN echo $PYTORCH_VERSION > version.txt
|
RUN echo $PYTORCH_VERSION > version.txt
|
||||||
|
|
||||||
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake ninja conda-build pyyaml numpy ipython && \
|
|
||||||
/opt/conda/bin/python -mpip install -r requirements.txt
|
|
||||||
|
|
||||||
# Install things for building PyTorch
|
# Install things for building PyTorch
|
||||||
RUN /opt/conda/bin/conda install -y mkl mkl-include cudnn && \
|
RUN /opt/conda/bin/conda install -y mkl mkl-include cudnn && \
|
||||||
/opt/conda/bin/conda install -c pytorch magma-cuda118
|
/opt/conda/bin/conda install libgcc && \
|
||||||
|
/opt/conda/bin/conda install -c conda-forge libstdcxx-ng=12 && \
|
||||||
|
/opt/conda/bin/conda install -c pytorch magma-cuda118 && \
|
||||||
|
/opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8
|
||||||
|
|
||||||
# https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/reqs/train-conda-build.requirements.txt
|
# https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/reqs/train-conda-build.requirements.txt
|
||||||
# TODO: remove what we don't need
|
# TODO: remove what we don't need
|
||||||
RUN /opt/conda/bin/conda install -y \
|
RUN /opt/conda/bin/conda install -y \
|
||||||
jemalloc \
|
jemalloc \
|
||||||
astunparse \
|
|
||||||
ccache \
|
ccache \
|
||||||
cmake \
|
cmake \
|
||||||
expecttest \
|
expecttest \
|
||||||
|
@ -124,7 +132,6 @@ RUN /opt/conda/bin/conda install -y \
|
||||||
git \
|
git \
|
||||||
hypothesis \
|
hypothesis \
|
||||||
jinja2 \
|
jinja2 \
|
||||||
libjpeg-turbo \
|
|
||||||
libpng \
|
libpng \
|
||||||
networkx \
|
networkx \
|
||||||
ninja \
|
ninja \
|
||||||
|
@ -134,7 +141,6 @@ RUN /opt/conda/bin/conda install -y \
|
||||||
requests \
|
requests \
|
||||||
setuptools \
|
setuptools \
|
||||||
sympy \
|
sympy \
|
||||||
types-dataclasses \
|
|
||||||
typing-extensions
|
typing-extensions
|
||||||
|
|
||||||
RUN /opt/conda/bin/conda clean -ya
|
RUN /opt/conda/bin/conda clean -ya
|
||||||
|
@ -151,13 +157,21 @@ ENV LD_PRELOAD=/opt/conda/lib/libiomp5.so:${LD_PRELOAD}
|
||||||
ENV LD_PRELOAD=/opt/conda/lib/libjemalloc.so:${LD_PRELOAD}
|
ENV LD_PRELOAD=/opt/conda/lib/libjemalloc.so:${LD_PRELOAD}
|
||||||
ENV MALLOC_CONF="background_thread:true,metadata_thp:auto,dirty_decay_ms:30000,muzzy_decay_ms:30000"
|
ENV MALLOC_CONF="background_thread:true,metadata_thp:auto,dirty_decay_ms:30000,muzzy_decay_ms:30000"
|
||||||
|
|
||||||
|
RUN /opt/conda/bin/conda install -y python=${PYTHON_VERSION} cmake conda-build pyyaml numpy ipython && \
|
||||||
|
/opt/conda/bin/python -mpip install -r requirements.txt
|
||||||
|
RUN /opt/conda/bin/conda clean -ya
|
||||||
|
|
||||||
|
RUN cat version.txt
|
||||||
|
|
||||||
|
RUN make triton
|
||||||
|
|
||||||
# Install PyTorch without AVX2
|
# Install PyTorch without AVX2
|
||||||
# https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/docker-compose.yaml#L124
|
# https://github.com/cresset-template/cresset/blob/37c7b5df7236d3b9d96c4908efe5af8bc90066e3/docker-compose.yaml#L124
|
||||||
# print(torch.__config__.show().split("\n"), sep="\n")
|
# print(torch.__config__.show().split("\n"), sep="\n")
|
||||||
RUN --mount=type=cache,target=/opt/ccache \
|
RUN --mount=type=cache,target=/opt/ccache \
|
||||||
python setup.py clean && \
|
python setup.py clean && \
|
||||||
|
BLAS_INFO=mkl \
|
||||||
BLAS_INFO=mklBUILD_TYPE=Release \
|
BUILD_TYPE=Release \
|
||||||
CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow" \
|
CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow" \
|
||||||
LAPACK_INFO=mkl \
|
LAPACK_INFO=mkl \
|
||||||
PERF_WITH_AVX=1 \
|
PERF_WITH_AVX=1 \
|
||||||
|
@ -178,34 +192,23 @@ RUN --mount=type=cache,target=/opt/ccache \
|
||||||
USE_OPENMP=ON \
|
USE_OPENMP=ON \
|
||||||
USE_ROCM=OFF \
|
USE_ROCM=OFF \
|
||||||
BUILD_TEST=0 \
|
BUILD_TEST=0 \
|
||||||
|
CMAKE_ARGS='-DDISABLE_AVX2:BOOL=TRUE -DCXX_AVX2_FOUND:BOOL=FALSE -DC_AVX2_FOUND:BOOL=FALSE -DDISABLE_AVX512F:BOOL=TRUE' \
|
||||||
TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
|
TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
|
||||||
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
|
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
|
||||||
CMAKE_ARGS='-DDISABLE_AVX2:BOOL=TRUE -DCXX_AVX2_FOUND:BOOL=FALSE -DC_AVX2_FOUND:BOOL=FALSE -DDISABLE_AVX512F:BOOL=TRUE' \
|
|
||||||
python setup.py install && \
|
python setup.py install && \
|
||||||
cd .. && \
|
cd .. && \
|
||||||
rm -rf pytorch
|
rm -rf pytorch
|
||||||
|
|
||||||
|
|
||||||
# Make sure we built everything properly. Build will fail if CUDA isn't available.
|
# Make sure we built everything properly. Build will fail if CUDA isn't available.
|
||||||
RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
|
# RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
|
||||||
|
|
||||||
RUN nm -D /opt/conda/lib/python3.9/site-packages/torch/lib/libtorch.so
|
RUN nm -D /opt/conda/lib/python3.9/site-packages/torch/lib/libtorch.so
|
||||||
|
|
||||||
# ==============================================================================
|
|
||||||
# Set up the kernel-builder
|
|
||||||
|
|
||||||
FROM pytorch-install as kernel-builder
|
|
||||||
|
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
||||||
ninja-build \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN /opt/conda/bin/conda install -c "nvidia/label/cuda-11.8.0" cuda==11.8 && \
|
|
||||||
/opt/conda/bin/conda clean -ya
|
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Build Flash Attention CUDA kernels
|
# Build Flash Attention CUDA kernels
|
||||||
|
|
||||||
FROM kernel-builder as flash-att-builder
|
FROM pytorch-build as flash-att-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/Makefile-flash-att Makefile
|
COPY server/Makefile-flash-att Makefile
|
||||||
|
|
||||||
|
@ -215,7 +218,7 @@ RUN MAX_JOBS=5 make build-flash-attention
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Build Flash Attention v2 CUDA kernels
|
# Build Flash Attention v2 CUDA kernels
|
||||||
|
|
||||||
FROM kernel-builder as flash-att-v2-builder
|
FROM pytorch-build as flash-att-v2-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/Makefile-flash-att-v2 Makefile
|
COPY server/Makefile-flash-att-v2 Makefile
|
||||||
|
|
||||||
|
@ -225,7 +228,7 @@ RUN MAX_JOBS=10 make build-flash-attention-v2
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Build Transformers exllama kernels
|
# Build Transformers exllama kernels
|
||||||
|
|
||||||
FROM kernel-builder as exllama-kernels-builder
|
FROM pytorch-build as exllama-kernels-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/exllama_kernels/ .
|
COPY server/exllama_kernels/ .
|
||||||
|
|
||||||
|
@ -235,7 +238,7 @@ RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Build Transformers CUDA kernels
|
# Build Transformers CUDA kernels
|
||||||
|
|
||||||
FROM kernel-builder as custom-kernels-builder
|
FROM pytorch-build as custom-kernels-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/custom_kernels/ .
|
COPY server/custom_kernels/ .
|
||||||
|
|
||||||
|
@ -245,7 +248,7 @@ RUN MAX_JOBS=5 python setup.py build
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Build vllm CUDA kernels
|
# Build vllm CUDA kernels
|
||||||
|
|
||||||
FROM kernel-builder as vllm-builder
|
FROM pytorch-build as vllm-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/Makefile-vllm Makefile
|
COPY server/Makefile-vllm Makefile
|
||||||
|
|
||||||
|
@ -256,7 +259,7 @@ RUN MAX_JOBS=5 make build-vllm
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
# nvidia/cuda:11.8.0-base-ubuntu20.04
|
# nvidia/cuda:11.8.0-base-ubuntu20.04
|
||||||
FROM nvidia/cuda:11.8.0-devel-ubuntu20.04 as base
|
FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
|
||||||
|
|
||||||
# Conda env
|
# Conda env
|
||||||
ENV PATH=/opt/conda/bin:$PATH \
|
ENV PATH=/opt/conda/bin:$PATH \
|
||||||
|
@ -277,7 +280,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy conda with PyTorch installed
|
# Copy conda with PyTorch installed
|
||||||
COPY --from=pytorch-install /opt/conda /opt/conda
|
COPY --from=pytorch-build /opt/conda /opt/conda
|
||||||
|
|
||||||
# Copy build artifacts from flash attention builder
|
# Copy build artifacts from flash attention builder
|
||||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
@ -334,8 +337,8 @@ RUN python3 -c "import torch; import text_generation_server.utils.gptq.exllama"
|
||||||
|
|
||||||
|
|
||||||
# Make sure our special dependencies were compiled and copied correctly
|
# Make sure our special dependencies were compiled and copied correctly
|
||||||
RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
|
# RUN python -c "import torch; exit(1 if not torch.version.cuda else 0)"
|
||||||
RUN python -c "import torch; torch.cuda.is_available()"
|
# RUN python -c "import torch; torch.cuda.is_available()"
|
||||||
RUN python -c "import torch; import flash_attn_2_cuda"
|
RUN python -c "import torch; import flash_attn_2_cuda"
|
||||||
RUN python -c "import torch; import flash_attn_cuda"
|
RUN python -c "import torch; import flash_attn_cuda"
|
||||||
RUN python -c "import torch; import vllm_cache_ops"
|
RUN python -c "import torch; import vllm_cache_ops"
|
||||||
|
|
Loading…
Reference in New Issue