update dockerfile
This commit is contained in:
parent
dccab72549
commit
65506e19bf
|
@ -2,3 +2,4 @@ aml
|
|||
target
|
||||
server/transformers
|
||||
server/flash-attention
|
||||
hf_cache/
|
||||
|
|
File diff suppressed because it is too large
Load Diff
131
Dockerfile
131
Dockerfile
|
@ -72,17 +72,18 @@ RUN chmod +x ~/mambaforge.sh && \
|
|||
|
||||
# Install pytorch
|
||||
# On arm64 we exit with an error code
|
||||
RUN case ${TARGETPLATFORM} in \
|
||||
"linux/arm64") exit 1 ;; \
|
||||
*) /opt/conda/bin/conda update -y conda && \
|
||||
/opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
|
||||
esac && \
|
||||
/opt/conda/bin/conda clean -ya
|
||||
# RUN case ${TARGETPLATFORM} in \
|
||||
# "linux/arm64") exit 1 ;; \
|
||||
# *) /opt/conda/bin/conda update -y conda && \
|
||||
# /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
|
||||
# esac && \
|
||||
# /opt/conda/bin/conda clean -ya
|
||||
RUN pip install torch --index-url https://download.pytorch.org/whl/cu121
|
||||
|
||||
# CUDA kernels builder image
|
||||
FROM pytorch-install as kernel-builder
|
||||
|
||||
ARG MAX_JOBS=8
|
||||
# ARG MAX_JOBS=64
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
ninja-build cmake \
|
||||
|
@ -106,64 +107,66 @@ WORKDIR /usr/src
|
|||
COPY server/Makefile-flash-att-v2 Makefile
|
||||
|
||||
# Build specific version of flash attention v2
|
||||
ENV TORCH_CUDA_ARCH_LIST="9.0+PTX"
|
||||
RUN make build-flash-attention-v2-cuda
|
||||
|
||||
# Build Transformers exllama kernels
|
||||
FROM kernel-builder as exllama-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/exllama_kernels/ .
|
||||
# # Build Transformers exllama kernels
|
||||
# FROM kernel-builder as exllama-kernels-builder
|
||||
# WORKDIR /usr/src
|
||||
# COPY server/exllama_kernels/ .
|
||||
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||
# RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||
|
||||
# Build Transformers exllama kernels
|
||||
FROM kernel-builder as exllamav2-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/exllamav2_kernels/ .
|
||||
# FROM kernel-builder as exllamav2-kernels-builder
|
||||
# WORKDIR /usr/src
|
||||
# COPY server/exllamav2_kernels/ .
|
||||
|
||||
# Build specific version of transformers
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||
# RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||
|
||||
# Build Transformers awq kernels
|
||||
FROM kernel-builder as awq-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/Makefile-awq Makefile
|
||||
# Build specific version of transformers
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
||||
# FROM kernel-builder as awq-kernels-builder
|
||||
# WORKDIR /usr/src
|
||||
# COPY server/Makefile-awq Makefile
|
||||
# # Build specific version of transformers
|
||||
# RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
||||
|
||||
# Build eetq kernels
|
||||
FROM kernel-builder as eetq-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/Makefile-eetq Makefile
|
||||
# Build specific version of transformers
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq
|
||||
# # Build eetq kernels
|
||||
# FROM kernel-builder as eetq-kernels-builder
|
||||
# WORKDIR /usr/src
|
||||
# COPY server/Makefile-eetq Makefile
|
||||
# # Build specific version of transformers
|
||||
# RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq
|
||||
|
||||
# Build Transformers CUDA kernels
|
||||
FROM kernel-builder as custom-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/custom_kernels/ .
|
||||
# Build specific version of transformers
|
||||
RUN python setup.py build
|
||||
# # Build Transformers CUDA kernels
|
||||
# FROM kernel-builder as custom-kernels-builder
|
||||
# WORKDIR /usr/src
|
||||
# COPY server/custom_kernels/ .
|
||||
# # Build specific version of transformers
|
||||
# RUN python setup.py build
|
||||
|
||||
# Build vllm CUDA kernels
|
||||
FROM kernel-builder as vllm-builder
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
|
||||
ENV TORCH_CUDA_ARCH_LIST="9.0+PTX"
|
||||
|
||||
COPY server/Makefile-vllm Makefile
|
||||
|
||||
# Build specific version of vllm
|
||||
RUN make build-vllm-cuda
|
||||
|
||||
# Build mamba kernels
|
||||
FROM kernel-builder as mamba-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/Makefile-selective-scan Makefile
|
||||
RUN make build-all
|
||||
# # Build mamba kernels
|
||||
# FROM kernel-builder as mamba-builder
|
||||
# WORKDIR /usr/src
|
||||
# COPY server/Makefile-selective-scan Makefile
|
||||
# RUN make build-all
|
||||
|
||||
# Text Generation Inference base image
|
||||
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 as base
|
||||
FROM pytorch-install
|
||||
|
||||
|
||||
# Conda env
|
||||
ENV PATH=/opt/conda/bin:$PATH \
|
||||
|
@ -184,7 +187,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy conda with PyTorch installed
|
||||
COPY --from=pytorch-install /opt/conda /opt/conda
|
||||
# COPY --from=pytorch-install /opt/conda /opt/conda
|
||||
|
||||
# Copy build artifacts from flash attention builder
|
||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
|
@ -194,23 +197,23 @@ COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.lin
|
|||
# Copy build artifacts from flash attention v2 builder
|
||||
COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
|
||||
# Copy build artifacts from custom kernels builder
|
||||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# Copy build artifacts from exllama kernels builder
|
||||
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# Copy build artifacts from exllamav2 kernels builder
|
||||
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# Copy build artifacts from awq kernels builder
|
||||
COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# Copy build artifacts from eetq kernels builder
|
||||
COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# # Copy build artifacts from custom kernels builder
|
||||
# COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# # Copy build artifacts from exllama kernels builder
|
||||
# COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# # Copy build artifacts from exllamav2 kernels builder
|
||||
# COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# # Copy build artifacts from awq kernels builder
|
||||
# COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
# # Copy build artifacts from eetq kernels builder
|
||||
# COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
|
||||
# Copy builds artifacts from vllm builder
|
||||
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
|
||||
|
||||
# Copy build artifacts from mamba builder
|
||||
COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||
COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||
# # Copy build artifacts from mamba builder
|
||||
# COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||
# COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||
|
||||
# Install flash-attention dependencies
|
||||
RUN pip install einops --no-cache-dir
|
||||
|
@ -237,18 +240,18 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# AWS Sagemaker compatible image
|
||||
FROM base as sagemaker
|
||||
# FROM base as sagemaker
|
||||
|
||||
COPY sagemaker-entrypoint.sh entrypoint.sh
|
||||
RUN chmod +x entrypoint.sh
|
||||
# COPY sagemaker-entrypoint.sh entrypoint.sh
|
||||
# RUN chmod +x entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["./entrypoint.sh"]
|
||||
# ENTRYPOINT ["./entrypoint.sh"]
|
||||
|
||||
# Final image
|
||||
FROM base
|
||||
# # Final image
|
||||
# FROM base
|
||||
|
||||
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
||||
RUN chmod +x /tgi-entrypoint.sh
|
||||
# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
|
||||
# RUN chmod +x /tgi-entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/tgi-entrypoint.sh"]
|
||||
CMD ["--json-output"]
|
||||
# ENTRYPOINT ["/tgi-entrypoint.sh"]
|
||||
# CMD ["--json-output"]
|
||||
|
|
|
@ -48,7 +48,7 @@ minijinja = { git = "https://github.com/mitsuhiko/minijinja.git", rev = "5cd4efb
|
|||
futures-util = "0.3.30"
|
||||
regex = "1.10.3"
|
||||
once_cell = "1.19.0"
|
||||
image = "0.25.1"
|
||||
image = "0.24.9"
|
||||
base64 = "0.22.0"
|
||||
|
||||
[build-dependencies]
|
||||
|
|
|
@ -55,7 +55,7 @@ def initialize_torch_distributed():
|
|||
backend = "nccl"
|
||||
options = ProcessGroupNCCL.Options()
|
||||
options.is_high_priority_stream = True
|
||||
options._timeout = timedelta(seconds=60)
|
||||
options._timeout = timedelta(seconds=30)
|
||||
else:
|
||||
try:
|
||||
import oneccl_bindings_for_pytorch
|
||||
|
@ -79,7 +79,7 @@ def initialize_torch_distributed():
|
|||
backend=backend,
|
||||
world_size=WORLD_SIZE,
|
||||
rank=RANK,
|
||||
timeout=timedelta(seconds=60),
|
||||
timeout=timedelta(seconds=30),
|
||||
pg_options=options,
|
||||
)
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue