diff --git a/Dockerfile b/Dockerfile index aee4a172..c3a4623e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -252,7 +252,10 @@ COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ pip install -r requirements_cuda.txt && \ - pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir + pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir && \ + pip install nvidia-nccl-cu12==2.22.3 + +ENV LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2 # Deps before the binaries # The binaries change on every build given we burn the SHA into them diff --git a/server/Makefile b/server/Makefile index af5ffa59..209fc44e 100644 --- a/server/Makefile +++ b/server/Makefile @@ -30,6 +30,7 @@ install: install-cuda install-cuda: install-server install-flash-attention-v2-cuda install-vllm-cuda install-flash-attention install-fbgemm pip install -e ".[bnb]" + pip install nvidia-nccl-cu12==2.22.3 install-rocm: install-server install-flash-attention-v2-rocm install-vllm-rocm