feat: compile vllm for cuda after flash_attn

This commit is contained in:
drbh 2024-05-26 23:21:07 -04:00
parent 8253f83034
commit ad94f299f4
1 changed files with 6 additions and 0 deletions

View File

@ -159,6 +159,12 @@ jobs:
MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist
cd ../layer_norm
MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist
# build the kernels for vllm as well
cd ../..
git clone https://github.com/Narsil/vllm.git
cd vllm
git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa
python setup.py bdist_wheel --dist-dir=../dist
# Generate a custom name for the wheel to include CUDA and Torch versions
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")