From ee44371fdfdf22c17eaed28f64bc9ffec5c4dc43 Mon Sep 17 00:00:00 2001
From: Cyberes <cyberes@evulid.cc>
Date: Fri, 27 Oct 2023 19:05:27 -0600
Subject: [PATCH] Merge branch 'master' into cluster

---
 other/vllm/Docker/DOCKER.md           | 15 -----
 other/vllm/Docker/Dockerfile          | 81 ++++++++-------------------
 other/vllm/Docker/Dockerfile.base     | 43 ++++++++++++++
 other/vllm/Docker/README.md           | 47 ++++++++++++++++
 other/vllm/Docker/build-docker.sh     |  7 +++
 other/vllm/Docker/idle.ipynb          | 40 +++++++++++++
 other/vllm/Docker/init-container.sh   | 22 ++++++++
 other/vllm/Docker/start-container.sh  | 13 +----
 other/vllm/Docker/start-vllm.sh       |  5 --
 other/vllm/Docker/supervisord.conf    | 37 +++++++++++-
 other/vllm/Docker/update-container.sh | 11 ++++
 11 files changed, 228 insertions(+), 93 deletions(-)
 delete mode 100644 other/vllm/Docker/DOCKER.md
 create mode 100644 other/vllm/Docker/Dockerfile.base
 create mode 100644 other/vllm/Docker/README.md
 create mode 100644 other/vllm/Docker/build-docker.sh
 create mode 100644 other/vllm/Docker/idle.ipynb
 create mode 100644 other/vllm/Docker/init-container.sh
 create mode 100755 other/vllm/Docker/update-container.sh

diff --git a/other/vllm/Docker/DOCKER.md b/other/vllm/Docker/DOCKER.md
deleted file mode 100644
index 6abf6bf..0000000
--- a/other/vllm/Docker/DOCKER.md
+++ /dev/null
@@ -1,15 +0,0 @@
-**A Docker container for running VLLM on Paperspace Gradient notebooks.**
-
-1. Run `jupyter server --generate-config` and `jupyter server password` on your local machine, then copy Jupyter's config directory to `./jupyter`
-2. Place your Rathole client config at `./rathole-client.toml`
-3. `docker build . -t "paperspace-vllm"`
-
-To test on your local machine, run this command:
-
-```bash
-docker run --shm-size 14g --gpus all \
-  -v /storage/models/awq/MythoMax-L2-13B-AWQ:/models/MythoMax-L2-13B-AWQ \
-  -p 7000:7000 -p 8888:8888 \
-  -e API_SERVER_ARGS="--model /models/MythoMax-L2-13B-AWQ --quantization awq --max-num-batched-tokens 99999 --gpu-memory-utilization 1" \
-  vllm-cloud
-```
\ No newline at end of file
diff --git a/other/vllm/Docker/Dockerfile b/other/vllm/Docker/Dockerfile
index d3c02e8..7ebe7b0 100644
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@@ -1,87 +1,50 @@
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
-
-RUN apt-get update && \
-    apt-get install -y git python3-pip python3-venv wget unzip && \
-    rm -rf /var/lib/apt/lists/*
-RUN pip3 install --upgrade pip setuptools wheel
-
-RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
-
-WORKDIR /local-llm-server
-
-RUN python3 -m venv /venv
-RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
-
-RUN python3 -m venv /jupyterlab
-RUN /jupyterlab/bin/pip install jupyterlab
-RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
-
-RUN mkdir -p /app
-RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
-RUN unzip -j /tmp/rathole.zip -d /tmp
-RUN rm /tmp/rathole.zip
-RUN cp /tmp/rathole /app
-
-# The local local-llm-server repo may be cached, so we will fetch and reset to the remote every time.
-# Also, make sure there weren't any pip deps added.
-ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
-RUN git fetch; git reset --hard origin/master
-RUN /venv/bin/pip install -r requirements.txt
-
-FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
-
-RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
+FROM cyberes/vllm-paperspace-base as runtime
 
 RUN useradd -ms /bin/bash apiserver
 RUN usermod -s /bin/bash root
 
+# Required packages
 RUN apt-get update && \
-    apt-get install -y python3 python3-pip wget aria2 git-lfs git openssh-server openssh-client nano tmux file && \
+    apt-get install -y python3 python3-pip supervisor  && \
+    rm -rf /var/lib/apt/lists/*
+RUN pip3 install --upgrade pip setuptools wheel
+
+# Useful Python packages
+RUN pip3 install glances
+
+# Useful tools
+RUN apt-get update && \
+    apt-get install -y wget aria2 git-lfs git openssh-server openssh-client nano tmux file && \
     rm -rf /var/lib/apt/lists/*
 
-RUN pip3 install --upgrade pip setuptools wheel
-RUN pip3 install glances
+# Update the git repo
+RUN cd /local-llm-server && git reset --hard && git pull
 
 # Enable root SSH login
 RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
-
 # Disable password SSH login
 RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
-
-# Create the necessary directory for SSH
+# Create the necessary directory for sshd
 RUN mkdir /var/run/sshd
 
-ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
-
-COPY --from=build /local-llm-server /local-llm-server
-COPY --from=build /venv /venv
-COPY --from=build /app /app
-COPY --from=build /jupyterlab /jupyterlab
-
-RUN cp /local-llm-server/other/vllm/Docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
-RUN cp /local-llm-server/other/vllm/Docker/start-vllm.sh /app/start-vllm.sh
-RUN cp /local-llm-server/other/vllm/Docker/start-container.sh /app/start.sh
-
-# Copy your secrets in
-# COPY ./jupyter /app/jupyter
+COPY supervisord.conf /etc/supervisor/supervisord.conf
+COPY start-vllm.sh /app/start-vllm.sh
+COPY init-container.sh /app/init.sh
+COPY start-container.sh /app/start.sh
 
 RUN mkdir -p /var/log/app/
 
 RUN chown -R apiserver:apiserver /local-llm-server && \
     chown -R apiserver:apiserver /app && \
     chown -R apiserver:apiserver /var/log/app/
+RUN git config --global --add safe.directory /local-llm-server
 
+RUN chmod +x /app/init.sh
 RUN chmod +x /app/start.sh
 
 ENV SHELL="/bin/bash"
 
-# SSH
-EXPOSE 22
-
-# VLLM
-EXPOSE 7000
-
-# Jupyter
+# Expose Jupyter. We don't need to expose VLLM or SSH since rathole will tunnel those.
 EXPOSE 8888
 
 CMD /app/start.sh
diff --git a/other/vllm/Docker/Dockerfile.base b/other/vllm/Docker/Dockerfile.base
new file mode 100644
index 0000000..bcd4d6f
--- /dev/null
+++ b/other/vllm/Docker/Dockerfile.base
@@ -0,0 +1,43 @@
+# This container builds and assembles the Python parts of the Docker container.
+# It is used as the base for the resulting container, which avoids having to re-push
+# the large PyTorch parts every time the application is rebuilt.
+
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
+
+RUN apt-get update && \
+    apt-get install -y git python3-pip python3-venv wget unzip && \
+    rm -rf /var/lib/apt/lists/*
+RUN pip install --upgrade pip setuptools wheel
+
+RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
+
+RUN python3 -m venv /jupyterlab
+RUN /jupyterlab/bin/pip install jupyterlab
+RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
+
+RUN mkdir -p /app
+RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
+RUN unzip -j /tmp/rathole.zip -d /tmp
+RUN rm /tmp/rathole.zip
+RUN cp /tmp/rathole /app
+
+RUN python3 -m venv /venv
+RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
+
+# Install PyTorch before installing VLLM to ensure we use the right version for our CUDA install.
+RUN wget -q -O - https://raw.githubusercontent.com/vllm-project/vllm/main/requirements.txt | grep -E 'torch*' > /tmp/torch_version
+RUN /venv/bin/pip3 install "$(cat /tmp/torch_version)" --index-url https://download.pytorch.org/whl/cu118
+
+# WORKDIR /local-llm-server
+
+# Don't build VLLM because we don't do that on the inference server. Just install from pip.
+# RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
+
+RUN /venv/bin/pip install vllm
+
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base
+
+COPY --from=build /local-llm-server /local-llm-server
+COPY --from=build /venv /venv
+COPY --from=build /app /app
+COPY --from=build /jupyterlab /jupyterlab
diff --git a/other/vllm/Docker/README.md b/other/vllm/Docker/README.md
new file mode 100644
index 0000000..97faf32
--- /dev/null
+++ b/other/vllm/Docker/README.md
@@ -0,0 +1,47 @@
+**A Docker container for running VLLM on Paperspace Gradient notebooks.**
+
+### Running
+
+1. In Paperspace, create a new notebook.
+2. Click `Start from Scratch`.
+3. Select your GPU and set the auto-shutdown timeout to 6 hours.
+4. Click the `View Advanced Options` button at the bottom of the page. Enter these details in the form that appears:
+    - Container Name: `cyberes/vllm-paperspace:latest`
+    - Container Command: `/app/start.sh`
+5. Start the notebook. It may take up to five minutes for them to pull and start the custom image.
+6. Once the container is started, open the log viewer by clicking the icon in the bottom left of the screen. You should see errors from rathole and VLLM as a result of the blank config files. The container will create a new directory in your mounted
+   storage: `/storage/vllm/`.
+7. Enter your rathole client config in `/storage/vllm/rathole-client.toml`. If you need a visual text editor, first link the directory back to the Jupyter home: `ln -s /storage/vllm /notebooks`
+8. Restart rathole with `supervisorctl restart rathole` and then view the log: `tail -f /var/log/app/rathole.log`. If you see lines that start with `INFO` and end with `Control channel established`, rathole has connected and is working. Error mesasges will begin
+   with `ERROR`.
+9. Download an AWQ quantization from [TheBloke](https://huggingface.co/TheBloke) to `/storage/vllm/models/`.
+10. Enter your VLLM commandline args in `/storage/vllm/cmd.txt`. You need to set `--model` to the path of the model you want to load.
+11. Restart VLLM with `supervisorctl restart vllm` and then view the log: `tail -f /var/log/app/vllm.log`. It may take up to three minutes to load. When you see the line:
+   ```
+  INFO:     Uvicorn running on http://0.0.0.0:7000 (Press CTRL+C to quit)
+   ```
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;VLLM is running and ready for queries.
+
+12. In `/notebooks` (the home directory of Jupyter), the notebook `idle.ipynb` will automatically be created. Run this notebook so Paperspace does not shut down your machine due to "inactivity". You **must** keep the running notebook open in a
+    browser tab.
+
+### Building
+
+You **must** have a GPU attached to your system when building the container (required for building VLLM).
+
+1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8.
+2. `bash build-docker.sh`
+
+To run the container on your local machine:
+
+```bash
+sudo docker run -it --shm-size 14g --gpus all -v /home/user/testing123/notebooks:/notebooks -v /home/user/testing123/storage:/storage -p 8888:8888 cyberes/vllm-paperspace:latest
+```
+
+You will need to create a directory to mount inside the container (for example: `/home/user/testing123/`). Within this should be the folder `models` that holds the model to load, `rathole-client.toml`, and `cmd.txt`.
+
+If you need to debug something, you can start a shell inside the container:
+
+```bash
+sudo docker run -it --shm-size 14g --gpus all -v /home/user/testing123/notebooks:/notebooks -v /home/user/testing123/storage:/storage -p 8888:8888 --entrypoint bash cyberes/vllm-paperspace:latest
+```
diff --git a/other/vllm/Docker/build-docker.sh b/other/vllm/Docker/build-docker.sh
new file mode 100644
index 0000000..f95ad4f
--- /dev/null
+++ b/other/vllm/Docker/build-docker.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Build and push the container.
+
+git pull || exit
+sudo docker build . -f Dockerfile.base -t cyberes/vllm-paperspace-base --no-cache && sudo docker push cyberes/vllm-paperspace-base:latest  || exit
+sudo docker build . -t cyberes/vllm-paperspace && sudo docker push cyberes/vllm-paperspace:latest
diff --git a/other/vllm/Docker/idle.ipynb b/other/vllm/Docker/idle.ipynb
new file mode 100644
index 0000000..057e227
--- /dev/null
+++ b/other/vllm/Docker/idle.ipynb
@@ -0,0 +1,40 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49ae6555-572b-4463-ba01-cc4331932a6c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "i = 0\n",
+    "while True:\n",
+    "    print(i)\n",
+    "    i += 1\n",
+    "    time.sleep(1)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/other/vllm/Docker/init-container.sh b/other/vllm/Docker/init-container.sh
new file mode 100644
index 0000000..111646c
--- /dev/null
+++ b/other/vllm/Docker/init-container.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Create the required directories and files.
+echo "SETTING UP FILE SYSTEM..."
+mkdir -p /storage/vllm/
+chown -R apiserver:apiserver /storage/vllm
+touch /storage/vllm/cmd.txt
+touch /storage/vllm/rathole-client.toml
+
+# The user can store SSH auth and authorized_keys to streamline SSH login.
+if [ -f /storage/vllm/ssh ]; then
+  cp -r /storage/vllm/ssh /root/.ssh
+  echo "Copied ssh from /storage"
+fi
+
+# If the user has not created the VLLM commandline arg file, create the default.
+if [ ! -f /storage/vllm/cmd.txt ]; then
+  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
+fi
+
+# Copy the idling notebook to storage. This will create a blank notebook every time the container is started.
+cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
diff --git a/other/vllm/Docker/start-container.sh b/other/vllm/Docker/start-container.sh
index 0b98702..05587a1 100644
--- a/other/vllm/Docker/start-container.sh
+++ b/other/vllm/Docker/start-container.sh
@@ -1,13 +1,4 @@
 #!/bin/bash
 
-mkdir -p /storage/vllm/
-chown -R apiserver:apiserver /storage/vllm
-touch /storage/vllm/cmd.txt
-touch /storage/vllm/rathole-client.toml
-
-if [ -f /storage/vllm/ssh ]; then
-  cp -r /storage/vllm/ssh /root/.ssh
-  echo "Copied ssh from /storage"
-fi
-
-/usr/bin/supervisord
+# Start the services and launch the container.
+/usr/bin/supervisord -c /etc/supervisor/supervisord.conf
diff --git a/other/vllm/Docker/start-vllm.sh b/other/vllm/Docker/start-vllm.sh
index 906bc30..209e90a 100644
--- a/other/vllm/Docker/start-vllm.sh
+++ b/other/vllm/Docker/start-vllm.sh
@@ -6,9 +6,4 @@ for pid in $vllm_pid; do
   kill -9 $pid
 done
 
-cd /local-llm-server
-git fetch
-git reset --hard origin/master
-/venv/bin/pip install -r requirements.txt
-
 /venv/bin/python /local-llm-server/other/vllm/vllm_api_server.py --host 0.0.0.0 --port 7000 --max-log-len 100 $(cat /storage/vllm/cmd.txt)
diff --git a/other/vllm/Docker/supervisord.conf b/other/vllm/Docker/supervisord.conf
index 9361bdb..800cb27 100644
--- a/other/vllm/Docker/supervisord.conf
+++ b/other/vllm/Docker/supervisord.conf
@@ -1,5 +1,25 @@
 [supervisord]
-nodaemon=true
+nodaemon = true
+user=root
+pidfile = /var/run/supervisord.pid
+logfile = /var/log/app/supervisord.log
+directory = /tmp
+
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0770
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
+[program:startup]
+command=/app/init.sh
+autostart=true
+autorestart=false
+startsecs=0
 
 [program:vllm]
 command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log'
@@ -24,9 +44,20 @@ user=apiserver
 environment=HOME="/home/apiserver",USER="apiserver"
 
 [program:jupyter]
-command=/jupyterlab/bin/jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True
+command=/jupyterlab/bin/jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True --notebook-dir /notebooks
 environment=SHELL="/bin/bash"
-; JUPYTER_CONFIG_DIR="/app/jupyter"
+autostart=true
+autorestart=true
+stdout_logfile=/dev/fd/1
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/fd/2
+stderr_logfile_maxbytes=0
 
 [program:ssh]
 command=/usr/sbin/sshd -D
+autostart=true
+autorestart=true
+stdout_logfile=/dev/fd/1
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/fd/2
+stderr_logfile_maxbytes=0
diff --git a/other/vllm/Docker/update-container.sh b/other/vllm/Docker/update-container.sh
new file mode 100755
index 0000000..d44d6d9
--- /dev/null
+++ b/other/vllm/Docker/update-container.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Run this script to update the container.
+# Will restart VLLM as well.
+
+cd /local-llm-server || exit
+
+git fetch
+git reset --hard origin/master
+
+supervisorctl restart vllm