feat: compile vllm for cuda after flash_attn

2024-05-26 23:21:07 -04:00 · 2024-05-26 23:21:07 -04:00 · ad94f299f4
parent 8253f83034
commit ad94f299f4
1 changed files with 6 additions and 0 deletions
--- a/.github/workflows/python-packaging.yaml
+++ b/.github/workflows/python-packaging.yaml
@ -159,6 +159,12 @@ jobs:
          MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist
          cd ../layer_norm
          MAX_JOBS=2 python setup.py bdist_wheel --dist-dir=../../../dist
+          # build the kernels for vllm as well
+          cd ../..
+          git clone https://github.com/Narsil/vllm.git
+          cd vllm
+          git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa
+          python setup.py bdist_wheel --dist-dir=../dist
          # Generate a custom name for the wheel to include CUDA and Torch versions
          tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
          wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")