diff --git a/Dockerfile b/Dockerfile
index 138fd8c5..a0681bf0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -188,7 +188,7 @@ RUN make build-all
 FROM kernel-builder AS flashinfer-builder
 WORKDIR /usr/src
 COPY server/Makefile-flashinfer Makefile
-RUN make build-flashinfer
+RUN make install-flashinfer
 
 # Text Generation Inference base image
 FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
@@ -242,7 +242,7 @@ COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/c
 # Copy build artifacts from mamba builder
 COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
 COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
-COPY --from=flashinfer-builder /usr/src/flashinfer/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
+COPY --from=flashinfer-builder /opt/conda/lib/python3.10/site-packages/flashinfer/ /opt/conda/lib/python3.10/site-packages/flashinfer/
 
 # Install flash-attention dependencies
 RUN pip install einops --no-cache-dir
diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer
index c377eb0e..3abb0491 100644
--- a/server/Makefile-flashinfer
+++ b/server/Makefile-flashinfer
@@ -1,12 +1,2 @@
-flashinfer_commit := v0.1.5
-
-build-flashinfer:
-	git clone https://github.com/flashinfer-ai/flashinfer.git flashinfer && \
-	cd flashinfer && git fetch && git checkout $(flashinfer_commit)  && \
-	git submodule update --init --recursive && \
-	cd python/ && \
-	CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py build
-
-install-flashinfer: build-flashinfer
-	cd flashinfer/python/ &&  \
-	CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py install
+install-flashinfer:
+	pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4