Fix nccl regression on PyTorch 2.3 upgrade (#2099)

* fix nccl issue * add note in dockerfile * use v2.22.3 that also fixes @samsamoa's repro * poetry actually can't handle the conflict between torch and nccl * set LD_PRELOAD
2024-07-08 17:52:10 +02:00 · 2024-07-08 17:52:10 +02:00 · 4c50b6d04b
parent 87ebb6477b
commit 4c50b6d04b
2 changed files with 7 additions and 2 deletions
--- a/7
+++ b/7
@ -40,7 +40,9 @@ RUN cargo build --profile release-opt
 # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
 FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS pytorch-install
 # NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099
 ARG PYTORCH_VERSION=2.3.0
 ARG PYTHON_VERSION=3.10
 # Keep in sync with `server/pyproject.toml
 ARG CUDA_VERSION=12.1
@ -241,7 +243,10 @@ COPY server/Makefile server/Makefile
 RUN cd server && \
    make gen-server && \
    pip install -r requirements_cuda.txt && \
-    pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir
+    pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir && \
    pip install nvidia-nccl-cu12==2.22.3
 ENV LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2
 # Deps before the binaries
 # The binaries change on every build given we burn the SHA into them
--- a/server/Makefile
+++ b/server/Makefile
@ -35,5 +35,5 @@ run-dev:
 	SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
 export-requirements:
-	poetry export -o requirements_cuda.txt --without-hashes
+	poetry export -o requirements_cuda.txt --without-hashes --with cuda
 	poetry export -o requirements_rocm.txt --without-hashes