diff --git a/Dockerfile b/Dockerfile index 138fd8c5..a0681bf0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -188,7 +188,7 @@ RUN make build-all FROM kernel-builder AS flashinfer-builder WORKDIR /usr/src COPY server/Makefile-flashinfer Makefile -RUN make build-flashinfer +RUN make install-flashinfer # Text Generation Inference base image FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base @@ -242,7 +242,7 @@ COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/c # Copy build artifacts from mamba builder COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages -COPY --from=flashinfer-builder /usr/src/flashinfer/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages +COPY --from=flashinfer-builder /opt/conda/lib/python3.10/site-packages/flashinfer/ /opt/conda/lib/python3.10/site-packages/flashinfer/ # Install flash-attention dependencies RUN pip install einops --no-cache-dir diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer index c377eb0e..3abb0491 100644 --- a/server/Makefile-flashinfer +++ b/server/Makefile-flashinfer @@ -1,12 +1,2 @@ -flashinfer_commit := v0.1.5 - -build-flashinfer: - git clone https://github.com/flashinfer-ai/flashinfer.git flashinfer && \ - cd flashinfer && git fetch && git checkout $(flashinfer_commit) && \ - git submodule update --init --recursive && \ - cd python/ && \ - CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py build - -install-flashinfer: build-flashinfer - cd flashinfer/python/ && \ - CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py install +install-flashinfer: + pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4