diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att index 5570863b..29e75bc4 100644 --- a/server/Makefile-flash-att +++ b/server/Makefile-flash-att @@ -3,12 +3,10 @@ flash_att_commit := 3a9bfd076f98746c73362328958dbc68d145fbec build-flash-attention: if [ ! -d 'flash-attention' ]; then \ pip install -U packaging ninja --no-cache-dir && \ - git clone https://github.com/HazyResearch/flash-attention.git && \ - cd flash-attention && git fetch && git checkout $(flash_att_commit) && \ - MAX_JOBS=8 python setup.py build && cd csrc/layer_norm && python setup.py build && cd ../rotary && python setup.py build; \ + git clone https://github.com/HazyResearch/flash-attention.git; \ fi + cd flash-attention && git fetch && git checkout $(flash_att_commit) && \ + MAX_JOBS=8 python setup.py build && cd csrc/layer_norm && python setup.py build && cd ../rotary && python setup.py build install-flash-attention: build-flash-attention - if [ ! -d 'flash-attention' ]; then \ - cd flash-attntion && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install; \ - fi + cd flash-attention && git checkout $(flash_att_commit) && MAX_JOBS=8 python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2 index b67803fe..ba90a74d 100644 --- a/server/Makefile-flash-att-v2 +++ b/server/Makefile-flash-att-v2 @@ -5,9 +5,8 @@ build-flash-attention-v2-cuda: pip install -U packaging wheel pip install flash-attn==$(flash_att_v2_commit_cuda) -install-flash-attention-v2-cuda: - pip install -U packaging wheel - pip install flash-attn==$(flash_att_v2_commit_cuda) +install-flash-attention-v2-cuda: build-flash-attention-v2-cuda + echo "Flash v2 installed" build-flash-attention-v2-rocm: if [ ! -d 'flash-attention-v2' ]; then \ @@ -18,7 +17,5 @@ build-flash-attention-v2-rocm: fi install-flash-attention-v2-rocm: build-flash-attention-v2-rocm - if [ ! -d 'flash-attention-v2' ]; then \ - cd flash-attention-v2 && \ - GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install; \ - fi + cd flash-attention-v2 && \ + GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install diff --git a/server/Makefile-vllm b/server/Makefile-vllm index de3b4611..ded2f5d2 100644 --- a/server/Makefile-vllm +++ b/server/Makefile-vllm @@ -1,26 +1,23 @@ +commit_cuda := b5dfc61db88a81069e45b44f7cc99bd9e62a60fa +commit_rocm := ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 build-vllm-cuda: if [ ! -d 'vllm' ]; then \ pip install -U ninja packaging --no-cache-dir && \ - git clone https://github.com/Narsil/vllm.git vllm &&\ - cd vllm && \ - git fetch && git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa &&\ - python setup.py build; \ + git clone https://github.com/Narsil/vllm.git vllm; \ fi + cd vllm && git fetch && git checkout $(commit_cuda) && python setup.py build + install-vllm-cuda: build-vllm-cuda - if [ ! -d 'vllm' ]; then \ - cd vllm && pip install -e .; \ - fi + cd vllm && git fetch && git checkout $(commit_cuda) && pip install -e . build-vllm-rocm: if [ ! -d 'vllm' ]; then \ pip install -U ninja packaging --no-cache-dir && \ - git clone https://github.com/fxmarty/rocm-vllm.git vllm && \ - cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 && \ - PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build; \ + git clone https://github.com/fxmarty/rocm-vllm.git vllm; \ fi + cd vllm && git fetch && git checkout $(commit_rocm) && \ + PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build install-vllm-rocm: build-vllm-rocm - if [ ! -d 'vllm' ]; then \ - cd vllm && \ - PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .; \ - fi + cd vllm && git fetch && git checkout $(commit_rocm) && \ + PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .