local-llm-server/other/vllm/build-vllm.sh

#!/bin/bash

# Expected to be run as root in some sort of container

cd /tmp || exit

if [ ! -d /tmp/vllm-gptq ]; then
  git clone https://github.com/chu-tianxiang/vllm-gptq.git
  cd vllm-gptq || exit
else
  cd vllm-gptq || exit
  git pull
fi

if [ ! -d /root/miniconda3 ]; then
  wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O Miniconda3-latest-Linux-x86_64.sh
  bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b
  rm /tmp/Miniconda3-latest-Linux-x86_64.sh
fi

eval "$(/root/miniconda3/bin/conda shell.bash hook)"

if [ ! -d /root/miniconda3/envs/vllm-gptq ]; then
  conda create --name vllm-gptq -c conda-forge python=3.11 -y
  conda activate vllm-gptq
  pip install ninja
  conda install -y -c "nvidia/label/cuda-11.8.0" cuda==11.8
  conda install -y cudatoolkit cudnn
else
  conda activate vllm-gptq
fi

pip install -r requirements.txt

CUDA_HOME=/root/miniconda3/envs/vllm-gptq python setup.py bdist_wheel

echo -e "\n\n===\nOUTPUT:"
find /tmp/vllm-gptq -name '*.whl'