diff --git a/other/vllm/Docker/DOCKER.md b/other/vllm/Docker/DOCKER.md deleted file mode 100644 index 6abf6bf..0000000 --- a/other/vllm/Docker/DOCKER.md +++ /dev/null @@ -1,15 +0,0 @@ -**A Docker container for running VLLM on Paperspace Gradient notebooks.** - -1. Run `jupyter server --generate-config` and `jupyter server password` on your local machine, then copy Jupyter's config directory to `./jupyter` -2. Place your Rathole client config at `./rathole-client.toml` -3. `docker build . -t "paperspace-vllm"` - -To test on your local machine, run this command: - -```bash -docker run --shm-size 14g --gpus all \ - -v /storage/models/awq/MythoMax-L2-13B-AWQ:/models/MythoMax-L2-13B-AWQ \ - -p 7000:7000 -p 8888:8888 \ - -e API_SERVER_ARGS="--model /models/MythoMax-L2-13B-AWQ --quantization awq --max-num-batched-tokens 99999 --gpu-memory-utilization 1" \ - vllm-cloud -``` \ No newline at end of file diff --git a/other/vllm/Docker/Dockerfile b/other/vllm/Docker/Dockerfile index d3c02e8..7ebe7b0 100644 --- a/other/vllm/Docker/Dockerfile +++ b/other/vllm/Docker/Dockerfile @@ -1,87 +1,50 @@ -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build - -RUN apt-get update && \ - apt-get install -y git python3-pip python3-venv wget unzip && \ - rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade pip setuptools wheel - -RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server - -WORKDIR /local-llm-server - -RUN python3 -m venv /venv -RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm - -RUN python3 -m venv /jupyterlab -RUN /jupyterlab/bin/pip install jupyterlab -RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements" - -RUN mkdir -p /app -RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip -RUN unzip -j /tmp/rathole.zip -d /tmp -RUN rm /tmp/rathole.zip -RUN cp /tmp/rathole /app - -# The local local-llm-server repo may be cached, so we will fetch and reset to the remote every time. -# Also, make sure there weren't any pip deps added. -ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache -RUN git fetch; git reset --hard origin/master -RUN /venv/bin/pip install -r requirements.txt - -FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime - -RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/* +FROM cyberes/vllm-paperspace-base as runtime RUN useradd -ms /bin/bash apiserver RUN usermod -s /bin/bash root +# Required packages RUN apt-get update && \ - apt-get install -y python3 python3-pip wget aria2 git-lfs git openssh-server openssh-client nano tmux file && \ + apt-get install -y python3 python3-pip supervisor && \ + rm -rf /var/lib/apt/lists/* +RUN pip3 install --upgrade pip setuptools wheel + +# Useful Python packages +RUN pip3 install glances + +# Useful tools +RUN apt-get update && \ + apt-get install -y wget aria2 git-lfs git openssh-server openssh-client nano tmux file && \ rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade pip setuptools wheel -RUN pip3 install glances +# Update the git repo +RUN cd /local-llm-server && git reset --hard && git pull # Enable root SSH login RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config - # Disable password SSH login RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config - -# Create the necessary directory for SSH +# Create the necessary directory for sshd RUN mkdir /var/run/sshd -ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache - -COPY --from=build /local-llm-server /local-llm-server -COPY --from=build /venv /venv -COPY --from=build /app /app -COPY --from=build /jupyterlab /jupyterlab - -RUN cp /local-llm-server/other/vllm/Docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf -RUN cp /local-llm-server/other/vllm/Docker/start-vllm.sh /app/start-vllm.sh -RUN cp /local-llm-server/other/vllm/Docker/start-container.sh /app/start.sh - -# Copy your secrets in -# COPY ./jupyter /app/jupyter +COPY supervisord.conf /etc/supervisor/supervisord.conf +COPY start-vllm.sh /app/start-vllm.sh +COPY init-container.sh /app/init.sh +COPY start-container.sh /app/start.sh RUN mkdir -p /var/log/app/ RUN chown -R apiserver:apiserver /local-llm-server && \ chown -R apiserver:apiserver /app && \ chown -R apiserver:apiserver /var/log/app/ +RUN git config --global --add safe.directory /local-llm-server +RUN chmod +x /app/init.sh RUN chmod +x /app/start.sh ENV SHELL="/bin/bash" -# SSH -EXPOSE 22 - -# VLLM -EXPOSE 7000 - -# Jupyter +# Expose Jupyter. We don't need to expose VLLM or SSH since rathole will tunnel those. EXPOSE 8888 CMD /app/start.sh diff --git a/other/vllm/Docker/Dockerfile.base b/other/vllm/Docker/Dockerfile.base new file mode 100644 index 0000000..bcd4d6f --- /dev/null +++ b/other/vllm/Docker/Dockerfile.base @@ -0,0 +1,43 @@ +# This container builds and assembles the Python parts of the Docker container. +# It is used as the base for the resulting container, which avoids having to re-push +# the large PyTorch parts every time the application is rebuilt. + +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build + +RUN apt-get update && \ + apt-get install -y git python3-pip python3-venv wget unzip && \ + rm -rf /var/lib/apt/lists/* +RUN pip install --upgrade pip setuptools wheel + +RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server + +RUN python3 -m venv /jupyterlab +RUN /jupyterlab/bin/pip install jupyterlab +RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements" + +RUN mkdir -p /app +RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip +RUN unzip -j /tmp/rathole.zip -d /tmp +RUN rm /tmp/rathole.zip +RUN cp /tmp/rathole /app + +RUN python3 -m venv /venv +RUN /venv/bin/pip3 install --upgrade pip setuptools wheel + +# Install PyTorch before installing VLLM to ensure we use the right version for our CUDA install. +RUN wget -q -O - https://raw.githubusercontent.com/vllm-project/vllm/main/requirements.txt | grep -E 'torch*' > /tmp/torch_version +RUN /venv/bin/pip3 install "$(cat /tmp/torch_version)" --index-url https://download.pytorch.org/whl/cu118 + +# WORKDIR /local-llm-server + +# Don't build VLLM because we don't do that on the inference server. Just install from pip. +# RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm + +RUN /venv/bin/pip install vllm + +FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base + +COPY --from=build /local-llm-server /local-llm-server +COPY --from=build /venv /venv +COPY --from=build /app /app +COPY --from=build /jupyterlab /jupyterlab diff --git a/other/vllm/Docker/README.md b/other/vllm/Docker/README.md new file mode 100644 index 0000000..97faf32 --- /dev/null +++ b/other/vllm/Docker/README.md @@ -0,0 +1,47 @@ +**A Docker container for running VLLM on Paperspace Gradient notebooks.** + +### Running + +1. In Paperspace, create a new notebook. +2. Click `Start from Scratch`. +3. Select your GPU and set the auto-shutdown timeout to 6 hours. +4. Click the `View Advanced Options` button at the bottom of the page. Enter these details in the form that appears: + - Container Name: `cyberes/vllm-paperspace:latest` + - Container Command: `/app/start.sh` +5. Start the notebook. It may take up to five minutes for them to pull and start the custom image. +6. Once the container is started, open the log viewer by clicking the icon in the bottom left of the screen. You should see errors from rathole and VLLM as a result of the blank config files. The container will create a new directory in your mounted + storage: `/storage/vllm/`. +7. Enter your rathole client config in `/storage/vllm/rathole-client.toml`. If you need a visual text editor, first link the directory back to the Jupyter home: `ln -s /storage/vllm /notebooks` +8. Restart rathole with `supervisorctl restart rathole` and then view the log: `tail -f /var/log/app/rathole.log`. If you see lines that start with `INFO` and end with `Control channel established`, rathole has connected and is working. Error mesasges will begin + with `ERROR`. +9. Download an AWQ quantization from [TheBloke](https://huggingface.co/TheBloke) to `/storage/vllm/models/`. +10. Enter your VLLM commandline args in `/storage/vllm/cmd.txt`. You need to set `--model` to the path of the model you want to load. +11. Restart VLLM with `supervisorctl restart vllm` and then view the log: `tail -f /var/log/app/vllm.log`. It may take up to three minutes to load. When you see the line: + ``` + INFO: Uvicorn running on http://0.0.0.0:7000 (Press CTRL+C to quit) + ``` +       VLLM is running and ready for queries. + +12. In `/notebooks` (the home directory of Jupyter), the notebook `idle.ipynb` will automatically be created. Run this notebook so Paperspace does not shut down your machine due to "inactivity". You **must** keep the running notebook open in a + browser tab. + +### Building + +You **must** have a GPU attached to your system when building the container (required for building VLLM). + +1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8. +2. `bash build-docker.sh` + +To run the container on your local machine: + +```bash +sudo docker run -it --shm-size 14g --gpus all -v /home/user/testing123/notebooks:/notebooks -v /home/user/testing123/storage:/storage -p 8888:8888 cyberes/vllm-paperspace:latest +``` + +You will need to create a directory to mount inside the container (for example: `/home/user/testing123/`). Within this should be the folder `models` that holds the model to load, `rathole-client.toml`, and `cmd.txt`. + +If you need to debug something, you can start a shell inside the container: + +```bash +sudo docker run -it --shm-size 14g --gpus all -v /home/user/testing123/notebooks:/notebooks -v /home/user/testing123/storage:/storage -p 8888:8888 --entrypoint bash cyberes/vllm-paperspace:latest +``` diff --git a/other/vllm/Docker/build-docker.sh b/other/vllm/Docker/build-docker.sh new file mode 100644 index 0000000..f95ad4f --- /dev/null +++ b/other/vllm/Docker/build-docker.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Build and push the container. + +git pull || exit +sudo docker build . -f Dockerfile.base -t cyberes/vllm-paperspace-base --no-cache && sudo docker push cyberes/vllm-paperspace-base:latest || exit +sudo docker build . -t cyberes/vllm-paperspace && sudo docker push cyberes/vllm-paperspace:latest diff --git a/other/vllm/Docker/idle.ipynb b/other/vllm/Docker/idle.ipynb new file mode 100644 index 0000000..057e227 --- /dev/null +++ b/other/vllm/Docker/idle.ipynb @@ -0,0 +1,40 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "49ae6555-572b-4463-ba01-cc4331932a6c", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "i = 0\n", + "while True:\n", + " print(i)\n", + " i += 1\n", + " time.sleep(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/other/vllm/Docker/init-container.sh b/other/vllm/Docker/init-container.sh new file mode 100644 index 0000000..111646c --- /dev/null +++ b/other/vllm/Docker/init-container.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Create the required directories and files. +echo "SETTING UP FILE SYSTEM..." +mkdir -p /storage/vllm/ +chown -R apiserver:apiserver /storage/vllm +touch /storage/vllm/cmd.txt +touch /storage/vllm/rathole-client.toml + +# The user can store SSH auth and authorized_keys to streamline SSH login. +if [ -f /storage/vllm/ssh ]; then + cp -r /storage/vllm/ssh /root/.ssh + echo "Copied ssh from /storage" +fi + +# If the user has not created the VLLM commandline arg file, create the default. +if [ ! -f /storage/vllm/cmd.txt ]; then + echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt +fi + +# Copy the idling notebook to storage. This will create a blank notebook every time the container is started. +cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb diff --git a/other/vllm/Docker/start-container.sh b/other/vllm/Docker/start-container.sh index 0b98702..05587a1 100644 --- a/other/vllm/Docker/start-container.sh +++ b/other/vllm/Docker/start-container.sh @@ -1,13 +1,4 @@ #!/bin/bash -mkdir -p /storage/vllm/ -chown -R apiserver:apiserver /storage/vllm -touch /storage/vllm/cmd.txt -touch /storage/vllm/rathole-client.toml - -if [ -f /storage/vllm/ssh ]; then - cp -r /storage/vllm/ssh /root/.ssh - echo "Copied ssh from /storage" -fi - -/usr/bin/supervisord +# Start the services and launch the container. +/usr/bin/supervisord -c /etc/supervisor/supervisord.conf diff --git a/other/vllm/Docker/start-vllm.sh b/other/vllm/Docker/start-vllm.sh index 906bc30..209e90a 100644 --- a/other/vllm/Docker/start-vllm.sh +++ b/other/vllm/Docker/start-vllm.sh @@ -6,9 +6,4 @@ for pid in $vllm_pid; do kill -9 $pid done -cd /local-llm-server -git fetch -git reset --hard origin/master -/venv/bin/pip install -r requirements.txt - /venv/bin/python /local-llm-server/other/vllm/vllm_api_server.py --host 0.0.0.0 --port 7000 --max-log-len 100 $(cat /storage/vllm/cmd.txt) diff --git a/other/vllm/Docker/supervisord.conf b/other/vllm/Docker/supervisord.conf index 9361bdb..800cb27 100644 --- a/other/vllm/Docker/supervisord.conf +++ b/other/vllm/Docker/supervisord.conf @@ -1,5 +1,25 @@ [supervisord] -nodaemon=true +nodaemon = true +user=root +pidfile = /var/run/supervisord.pid +logfile = /var/log/app/supervisord.log +directory = /tmp + +[unix_http_server] +file=/var/run/supervisor.sock +chmod=0770 + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[supervisorctl] +serverurl=unix:///var/run/supervisor.sock + +[program:startup] +command=/app/init.sh +autostart=true +autorestart=false +startsecs=0 [program:vllm] command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log' @@ -24,9 +44,20 @@ user=apiserver environment=HOME="/home/apiserver",USER="apiserver" [program:jupyter] -command=/jupyterlab/bin/jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True +command=/jupyterlab/bin/jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True --notebook-dir /notebooks environment=SHELL="/bin/bash" -; JUPYTER_CONFIG_DIR="/app/jupyter" +autostart=true +autorestart=true +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/fd/2 +stderr_logfile_maxbytes=0 [program:ssh] command=/usr/sbin/sshd -D +autostart=true +autorestart=true +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/fd/2 +stderr_logfile_maxbytes=0 diff --git a/other/vllm/Docker/update-container.sh b/other/vllm/Docker/update-container.sh new file mode 100755 index 0000000..d44d6d9 --- /dev/null +++ b/other/vllm/Docker/update-container.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Run this script to update the container. +# Will restart VLLM as well. + +cd /local-llm-server || exit + +git fetch +git reset --hard origin/master + +supervisorctl restart vllm