Fix nccl regression on PyTorch 2.3 upgrade (#2099)
* fix nccl issue * add note in dockerfile * use v2.22.3 that also fixes @samsamoa's repro * poetry actually can't handle the conflict between torch and nccl * set LD_PRELOAD
This commit is contained in:
parent
87ebb6477b
commit
4c50b6d04b
|
@ -40,7 +40,9 @@ RUN cargo build --profile release-opt
|
||||||
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
|
||||||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS pytorch-install
|
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS pytorch-install
|
||||||
|
|
||||||
|
# NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099
|
||||||
ARG PYTORCH_VERSION=2.3.0
|
ARG PYTORCH_VERSION=2.3.0
|
||||||
|
|
||||||
ARG PYTHON_VERSION=3.10
|
ARG PYTHON_VERSION=3.10
|
||||||
# Keep in sync with `server/pyproject.toml
|
# Keep in sync with `server/pyproject.toml
|
||||||
ARG CUDA_VERSION=12.1
|
ARG CUDA_VERSION=12.1
|
||||||
|
@ -241,7 +243,10 @@ COPY server/Makefile server/Makefile
|
||||||
RUN cd server && \
|
RUN cd server && \
|
||||||
make gen-server && \
|
make gen-server && \
|
||||||
pip install -r requirements_cuda.txt && \
|
pip install -r requirements_cuda.txt && \
|
||||||
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir
|
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir && \
|
||||||
|
pip install nvidia-nccl-cu12==2.22.3
|
||||||
|
|
||||||
|
ENV LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2
|
||||||
|
|
||||||
# Deps before the binaries
|
# Deps before the binaries
|
||||||
# The binaries change on every build given we burn the SHA into them
|
# The binaries change on every build given we burn the SHA into them
|
||||||
|
|
|
@ -35,5 +35,5 @@ run-dev:
|
||||||
SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
|
SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
|
||||||
|
|
||||||
export-requirements:
|
export-requirements:
|
||||||
poetry export -o requirements_cuda.txt --without-hashes
|
poetry export -o requirements_cuda.txt --without-hashes --with cuda
|
||||||
poetry export -o requirements_rocm.txt --without-hashes
|
poetry export -o requirements_rocm.txt --without-hashes
|
||||||
|
|
Loading…
Reference in New Issue