feat: compile vllm for cuda after flash_attn
This commit is contained in:
parent
8253f83034
commit
ad94f299f4
|
@ -159,6 +159,12 @@ jobs:
|
|||
MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist
|
||||
cd ../layer_norm
|
||||
MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist
|
||||
# build the kernels for vllm as well
|
||||
cd ../..
|
||||
git clone https://github.com/Narsil/vllm.git
|
||||
cd vllm
|
||||
git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa
|
||||
python setup.py bdist_wheel --dist-dir=../dist
|
||||
# Generate a custom name for the wheel to include CUDA and Torch versions
|
||||
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
|
||||
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
|
||||
|
|
Loading…
Reference in New Issue