Merge branch 'master' into cluster

2023-10-27 19:05:27 -06:00 · 2023-10-27 19:05:27 -06:00 · ee44371fdf
parent 28c250385d
commit ee44371fdf
11 changed files with 228 additions and 93 deletions
--- a/other/vllm/Docker/DOCKER.md
+++ b/other/vllm/Docker/DOCKER.md
@ -1,15 +0,0 @@
 **A Docker container for running VLLM on Paperspace Gradient notebooks.**
 1. Run `jupyter server --generate-config` and `jupyter server password` on your local machine, then copy Jupyter's config directory to `./jupyter`
 2. Place your Rathole client config at `./rathole-client.toml`
 3. `docker build . -t "paperspace-vllm"`
 To test on your local machine, run this command:
 ```bash
 docker run --shm-size 14g --gpus all \
  -v /storage/models/awq/MythoMax-L2-13B-AWQ:/models/MythoMax-L2-13B-AWQ \
  -p 7000:7000 -p 8888:8888 \
  -e API_SERVER_ARGS="--model /models/MythoMax-L2-13B-AWQ --quantization awq --max-num-batched-tokens 99999 --gpu-memory-utilization 1" \
  vllm-cloud
 ```
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@ -1,87 +1,50 @@
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
+FROM cyberes/vllm-paperspace-base as runtime
 RUN apt-get update && \
    apt-get install -y git python3-pip python3-venv wget unzip && \
    rm -rf /var/lib/apt/lists/*
 RUN pip3 install --upgrade pip setuptools wheel
 RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
 WORKDIR /local-llm-server
 RUN python3 -m venv /venv
 RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
 RUN python3 -m venv /jupyterlab
 RUN /jupyterlab/bin/pip install jupyterlab
 RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
 RUN mkdir -p /app
 RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
 RUN unzip -j /tmp/rathole.zip -d /tmp
 RUN rm /tmp/rathole.zip
 RUN cp /tmp/rathole /app
 # The local local-llm-server repo may be cached, so we will fetch and reset to the remote every time.
 # Also, make sure there weren't any pip deps added.
 ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
 RUN git fetch; git reset --hard origin/master
 RUN /venv/bin/pip install -r requirements.txt
 FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
 RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
 RUN useradd -ms /bin/bash apiserver
 RUN usermod -s /bin/bash root
 # Required packages
 RUN apt-get update && \
-    apt-get install -y python3 python3-pip wget aria2 git-lfs git openssh-server openssh-client nano tmux file && \
+    apt-get install -y python3 python3-pip supervisor  && \
    rm -rf /var/lib/apt/lists/*
 RUN pip3 install --upgrade pip setuptools wheel
 # Useful Python packages
 RUN pip3 install glances
 # Useful tools
 RUN apt-get update && \
    apt-get install -y wget aria2 git-lfs git openssh-server openssh-client nano tmux file && \
    rm -rf /var/lib/apt/lists/*
-RUN pip3 install --upgrade pip setuptools wheel
+# Update the git repo
-RUN pip3 install glances
+RUN cd /local-llm-server && git reset --hard && git pull
 # Enable root SSH login
 RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
 # Disable password SSH login
 RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config
-
+# Create the necessary directory for sshd
 # Create the necessary directory for SSH
 RUN mkdir /var/run/sshd
-ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
+COPY supervisord.conf /etc/supervisor/supervisord.conf
-
+COPY start-vllm.sh /app/start-vllm.sh
-COPY --from=build /local-llm-server /local-llm-server
+COPY init-container.sh /app/init.sh
-COPY --from=build /venv /venv
+COPY start-container.sh /app/start.sh
 COPY --from=build /app /app
 COPY --from=build /jupyterlab /jupyterlab
 RUN cp /local-llm-server/other/vllm/Docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 RUN cp /local-llm-server/other/vllm/Docker/start-vllm.sh /app/start-vllm.sh
 RUN cp /local-llm-server/other/vllm/Docker/start-container.sh /app/start.sh
 # Copy your secrets in
 # COPY ./jupyter /app/jupyter
 RUN mkdir -p /var/log/app/
 RUN chown -R apiserver:apiserver /local-llm-server && \
    chown -R apiserver:apiserver /app && \
    chown -R apiserver:apiserver /var/log/app/
 RUN git config --global --add safe.directory /local-llm-server
 RUN chmod +x /app/init.sh
 RUN chmod +x /app/start.sh
 ENV SHELL="/bin/bash"
-# SSH
+# Expose Jupyter. We don't need to expose VLLM or SSH since rathole will tunnel those.
 EXPOSE 22
 # VLLM
 EXPOSE 7000
 # Jupyter
 EXPOSE 8888
 CMD /app/start.sh
--- a/other/vllm/Docker/Dockerfile.base
+++ b/other/vllm/Docker/Dockerfile.base
@ -0,0 +1,43 @@
 # This container builds and assembles the Python parts of the Docker container.
 # It is used as the base for the resulting container, which avoids having to re-push
 # the large PyTorch parts every time the application is rebuilt.
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
 RUN apt-get update && \
    apt-get install -y git python3-pip python3-venv wget unzip && \
    rm -rf /var/lib/apt/lists/*
 RUN pip install --upgrade pip setuptools wheel
 RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
 RUN python3 -m venv /jupyterlab
 RUN /jupyterlab/bin/pip install jupyterlab
 RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
 RUN mkdir -p /app
 RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
 RUN unzip -j /tmp/rathole.zip -d /tmp
 RUN rm /tmp/rathole.zip
 RUN cp /tmp/rathole /app
 RUN python3 -m venv /venv
 RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
 # Install PyTorch before installing VLLM to ensure we use the right version for our CUDA install.
 RUN wget -q -O - https://raw.githubusercontent.com/vllm-project/vllm/main/requirements.txt | grep -E 'torch*' > /tmp/torch_version
 RUN /venv/bin/pip3 install "$(cat /tmp/torch_version)" --index-url https://download.pytorch.org/whl/cu118
 # WORKDIR /local-llm-server
 # Don't build VLLM because we don't do that on the inference server. Just install from pip.
 # RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
 RUN /venv/bin/pip install vllm
 FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base
 COPY --from=build /local-llm-server /local-llm-server
 COPY --from=build /venv /venv
 COPY --from=build /app /app
 COPY --from=build /jupyterlab /jupyterlab
--- a/other/vllm/Docker/README.md
+++ b/other/vllm/Docker/README.md
@ -0,0 +1,47 @@
 **A Docker container for running VLLM on Paperspace Gradient notebooks.**
 ### Running
 1. In Paperspace, create a new notebook.
 2. Click `Start from Scratch`.
 3. Select your GPU and set the auto-shutdown timeout to 6 hours.
 4. Click the `View Advanced Options` button at the bottom of the page. Enter these details in the form that appears:
    - Container Name: `cyberes/vllm-paperspace:latest`
    - Container Command: `/app/start.sh`
 5. Start the notebook. It may take up to five minutes for them to pull and start the custom image.
 6. Once the container is started, open the log viewer by clicking the icon in the bottom left of the screen. You should see errors from rathole and VLLM as a result of the blank config files. The container will create a new directory in your mounted
   storage: `/storage/vllm/`.
 7. Enter your rathole client config in `/storage/vllm/rathole-client.toml`. If you need a visual text editor, first link the directory back to the Jupyter home: `ln -s /storage/vllm /notebooks`
 8. Restart rathole with `supervisorctl restart rathole` and then view the log: `tail -f /var/log/app/rathole.log`. If you see lines that start with `INFO` and end with `Control channel established`, rathole has connected and is working. Error mesasges will begin
   with `ERROR`.
 9. Download an AWQ quantization from [TheBloke](https://huggingface.co/TheBloke) to `/storage/vllm/models/`.
 10. Enter your VLLM commandline args in `/storage/vllm/cmd.txt`. You need to set `--model` to the path of the model you want to load.
 11. Restart VLLM with `supervisorctl restart vllm` and then view the log: `tail -f /var/log/app/vllm.log`. It may take up to three minutes to load. When you see the line:
   ```
  INFO:     Uvicorn running on http://0.0.0.0:7000 (Press CTRL+C to quit)
   ```
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;VLLM is running and ready for queries.
 12. In `/notebooks` (the home directory of Jupyter), the notebook `idle.ipynb` will automatically be created. Run this notebook so Paperspace does not shut down your machine due to "inactivity". You **must** keep the running notebook open in a
    browser tab.
 ### Building
 You **must** have a GPU attached to your system when building the container (required for building VLLM).
 1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8.
 2. `bash build-docker.sh`
 To run the container on your local machine:
 ```bash
 sudo docker run -it --shm-size 14g --gpus all -v /home/user/testing123/notebooks:/notebooks -v /home/user/testing123/storage:/storage -p 8888:8888 cyberes/vllm-paperspace:latest
 ```
 You will need to create a directory to mount inside the container (for example: `/home/user/testing123/`). Within this should be the folder `models` that holds the model to load, `rathole-client.toml`, and `cmd.txt`.
 If you need to debug something, you can start a shell inside the container:
 ```bash
 sudo docker run -it --shm-size 14g --gpus all -v /home/user/testing123/notebooks:/notebooks -v /home/user/testing123/storage:/storage -p 8888:8888 --entrypoint bash cyberes/vllm-paperspace:latest
 ```
--- a/other/vllm/Docker/build-docker.sh
+++ b/other/vllm/Docker/build-docker.sh
@ -0,0 +1,7 @@
 #!/bin/bash
 # Build and push the container.
 git pull || exit
 sudo docker build . -f Dockerfile.base -t cyberes/vllm-paperspace-base --no-cache && sudo docker push cyberes/vllm-paperspace-base:latest  || exit
 sudo docker build . -t cyberes/vllm-paperspace && sudo docker push cyberes/vllm-paperspace:latest
--- a/other/vllm/Docker/idle.ipynb
+++ b/other/vllm/Docker/idle.ipynb
@ -0,0 +1,40 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49ae6555-572b-4463-ba01-cc4331932a6c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "i = 0\n",
    "while True:\n",
    "    print(i)\n",
    "    i += 1\n",
    "    time.sleep(1)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/other/vllm/Docker/init-container.sh
+++ b/other/vllm/Docker/init-container.sh
@ -0,0 +1,22 @@
 #!/bin/bash
 # Create the required directories and files.
 echo "SETTING UP FILE SYSTEM..."
 mkdir -p /storage/vllm/
 chown -R apiserver:apiserver /storage/vllm
 touch /storage/vllm/cmd.txt
 touch /storage/vllm/rathole-client.toml
 # The user can store SSH auth and authorized_keys to streamline SSH login.
 if [ -f /storage/vllm/ssh ]; then
  cp -r /storage/vllm/ssh /root/.ssh
  echo "Copied ssh from /storage"
 fi
 # If the user has not created the VLLM commandline arg file, create the default.
 if [ ! -f /storage/vllm/cmd.txt ]; then
  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
 fi
 # Copy the idling notebook to storage. This will create a blank notebook every time the container is started.
 cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
--- a/other/vllm/Docker/start-container.sh
+++ b/other/vllm/Docker/start-container.sh
@ -1,13 +1,4 @@
 #!/bin/bash
-mkdir -p /storage/vllm/
+# Start the services and launch the container.
-chown -R apiserver:apiserver /storage/vllm
+/usr/bin/supervisord -c /etc/supervisor/supervisord.conf
 touch /storage/vllm/cmd.txt
 touch /storage/vllm/rathole-client.toml
 if [ -f /storage/vllm/ssh ]; then
  cp -r /storage/vllm/ssh /root/.ssh
  echo "Copied ssh from /storage"
 fi
 /usr/bin/supervisord
--- a/other/vllm/Docker/start-vllm.sh
+++ b/other/vllm/Docker/start-vllm.sh
@ -6,9 +6,4 @@ for pid in $vllm_pid; do
  kill -9 $pid
 done
 cd /local-llm-server
 git fetch
 git reset --hard origin/master
 /venv/bin/pip install -r requirements.txt
 /venv/bin/python /local-llm-server/other/vllm/vllm_api_server.py --host 0.0.0.0 --port 7000 --max-log-len 100 $(cat /storage/vllm/cmd.txt)
--- a/other/vllm/Docker/supervisord.conf
+++ b/other/vllm/Docker/supervisord.conf
@ -1,5 +1,25 @@
 [supervisord]
 nodaemon = true
 user=root
 pidfile = /var/run/supervisord.pid
 logfile = /var/log/app/supervisord.log
 directory = /tmp
 [unix_http_server]
 file=/var/run/supervisor.sock
 chmod=0770
 [rpcinterface:supervisor]
 supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
 [supervisorctl]
 serverurl=unix:///var/run/supervisor.sock
 [program:startup]
 command=/app/init.sh
 autostart=true
 autorestart=false
 startsecs=0
 [program:vllm]
 command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log'
@ -24,9 +44,20 @@ user=apiserver
 environment=HOME="/home/apiserver",USER="apiserver"
 [program:jupyter]
-command=/jupyterlab/bin/jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True
+command=/jupyterlab/bin/jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True --notebook-dir /notebooks
 environment=SHELL="/bin/bash"
-; JUPYTER_CONFIG_DIR="/app/jupyter"
+autostart=true
 autorestart=true
 stdout_logfile=/dev/fd/1
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/fd/2
 stderr_logfile_maxbytes=0
 [program:ssh]
 command=/usr/sbin/sshd -D
 autostart=true
 autorestart=true
 stdout_logfile=/dev/fd/1
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/fd/2
 stderr_logfile_maxbytes=0
--- a/other/vllm/Docker/update-container.sh
+++ b/other/vllm/Docker/update-container.sh
@ -0,0 +1,11 @@
 #!/bin/bash
 # Run this script to update the container.
 # Will restart VLLM as well.
 cd /local-llm-server || exit
 git fetch
 git reset --hard origin/master
 supervisorctl restart vllm