diff --git a/other/vllm/Docker/Dockerfile b/other/vllm/Docker/Dockerfile index d6f43ed..226ec11 100644 --- a/other/vllm/Docker/Dockerfile +++ b/other/vllm/Docker/Dockerfile @@ -18,7 +18,10 @@ RUN rm /tmp/rathole.zip RUN cp /tmp/rathole /app RUN python3 -m venv /venv -RUN /venv/bin/pip install torch --index-url https://download.pytorch.org/whl/cu118 + +# Install PyTorch before installing VLLM in an attempt to ensure we use the right +# version for our CUDA install. (VLLM wants 2.0.1) +RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118 WORKDIR /local-llm-server @@ -27,6 +30,12 @@ WORKDIR /local-llm-server # ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm +# Uninstall PyTorch since VLLM may have installed a version that is incompatible with +# our CUDA version. +RUN /venv/bin/pip3 uninstall -y torch +RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118 + + FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/* diff --git a/other/vllm/Docker/README.md b/other/vllm/Docker/README.md index 8468160..265f441 100644 --- a/other/vllm/Docker/README.md +++ b/other/vllm/Docker/README.md @@ -31,4 +31,5 @@ You **must** have a GPU attached to your system when building the container (req 1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8. 2. `sudo docker build .` - If you want to build the latest VLLM, add `--no-cache`. + If you want to build the latest VLLM, add `--no-cache` + Don't forget about `--progress=plain` diff --git a/other/vllm/Docker/start-container.sh b/other/vllm/Docker/start-container.sh index fa3d2ca..f1667f6 100644 --- a/other/vllm/Docker/start-container.sh +++ b/other/vllm/Docker/start-container.sh @@ -1,28 +1,35 @@ #!/bin/bash -echo "LAUNCHING CONTAINER..." - # Update the container repository and make sure pip dependancies are up to date. +echo "UPDATING CONTAINER..." cd /local-llm-server || exit git fetch git reset --hard origin/master /venv/bin/pip install -r requirements.txt +# Create the required directories and files. +echo "SETTING UP FILE SYSTEM..." mkdir -p /storage/vllm/ chown -R apiserver:apiserver /storage/vllm touch /storage/vllm/cmd.txt touch /storage/vllm/rathole-client.toml +# The user can store SSH auth and authorized_keys to streamline SSH login. if [ -f /storage/vllm/ssh ]; then cp -r /storage/vllm/ssh /root/.ssh echo "Copied ssh from /storage" fi +# If the user has not created the VLLM commandline arg file, create the default. if [ ! -f /storage/vllm/cmd.txt ]; then echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt fi +# Copy the idling notebook to storage. This will create a blank notebook every +# time the container is started. cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb -# Start the services +echo "LAUNCHING SERVICES..." + +# Start the services and launch the container. /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf