Using prebuilt.
This commit is contained in:
parent
9d4c5d39fe
commit
f2bdc65098
|
@ -188,7 +188,7 @@ RUN make build-all
|
||||||
FROM kernel-builder AS flashinfer-builder
|
FROM kernel-builder AS flashinfer-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/Makefile-flashinfer Makefile
|
COPY server/Makefile-flashinfer Makefile
|
||||||
RUN make build-flashinfer
|
RUN make install-flashinfer
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
|
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS base
|
||||||
|
@ -242,7 +242,7 @@ COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/c
|
||||||
# Copy build artifacts from mamba builder
|
# Copy build artifacts from mamba builder
|
||||||
COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
COPY --from=mamba-builder /usr/src/mamba/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||||
COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
COPY --from=mamba-builder /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
||||||
COPY --from=flashinfer-builder /usr/src/flashinfer/build/lib.linux-x86_64-cpython-310/ /opt/conda/lib/python3.10/site-packages
|
COPY --from=flashinfer-builder /opt/conda/lib/python3.10/site-packages/flashinfer/ /opt/conda/lib/python3.10/site-packages/flashinfer/
|
||||||
|
|
||||||
# Install flash-attention dependencies
|
# Install flash-attention dependencies
|
||||||
RUN pip install einops --no-cache-dir
|
RUN pip install einops --no-cache-dir
|
||||||
|
|
|
@ -1,12 +1,2 @@
|
||||||
flashinfer_commit := v0.1.5
|
install-flashinfer:
|
||||||
|
pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4
|
||||||
build-flashinfer:
|
|
||||||
git clone https://github.com/flashinfer-ai/flashinfer.git flashinfer && \
|
|
||||||
cd flashinfer && git fetch && git checkout $(flashinfer_commit) && \
|
|
||||||
git submodule update --init --recursive && \
|
|
||||||
cd python/ && \
|
|
||||||
CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py build
|
|
||||||
|
|
||||||
install-flashinfer: build-flashinfer
|
|
||||||
cd flashinfer/python/ && \
|
|
||||||
CUDA_ARCH_LIST="8.0;9.0a" NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_90a,code=sm_90a" TORCH_CUDA_ARCH_LIST="8.0;9.0a" python setup.py install
|
|
||||||
|
|
Loading…
Reference in New Issue