From d9d3e03f59bc6c7057214dc3e0baf52536db7ce9 Mon Sep 17 00:00:00 2001
From: Cyberes <cyberes@evulid.cc>
Date: Sun, 15 Oct 2023 15:38:48 -0600
Subject: [PATCH] docker: split into base and runtime

---
 other/vllm/Docker/Dockerfile         | 41 ++--------------------------
 other/vllm/Docker/Dockerfile.base    | 39 ++++++++++++++++++++++++++
 other/vllm/Docker/init-container.sh  | 25 +++++++++++++++++
 other/vllm/Docker/start-container.sh | 26 ------------------
 other/vllm/Docker/supervisord.conf   |  6 ++++
 5 files changed, 72 insertions(+), 65 deletions(-)
 create mode 100644 other/vllm/Docker/Dockerfile.base
 create mode 100644 other/vllm/Docker/init-container.sh

diff --git a/other/vllm/Docker/Dockerfile b/other/vllm/Docker/Dockerfile
index 6e4b29a..2d16c95 100644
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@@ -1,37 +1,4 @@
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
-
-RUN apt-get update && \
-    apt-get install -y git python3-pip python3-venv wget unzip && \
-    rm -rf /var/lib/apt/lists/*
-RUN pip3 install --upgrade pip setuptools wheel
-
-RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
-
-RUN python3 -m venv /jupyterlab
-RUN /jupyterlab/bin/pip install jupyterlab
-RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
-
-RUN mkdir -p /app
-RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
-RUN unzip -j /tmp/rathole.zip -d /tmp
-RUN rm /tmp/rathole.zip
-RUN cp /tmp/rathole /app
-
-RUN python3 -m venv /venv
-RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
-
-# Install PyTorch before installing VLLM in an attempt to ensure we use the right
-# version for our CUDA install. (VLLM wants 2.0.1)
-RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
-
-WORKDIR /local-llm-server
-
-# We don't need to rebuild VLLM every time we build the container. But if we need
-# to, uncomment the following line.
-# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
-RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
-
-FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
+FROM cyberes/paperspace-vllm-base as runtime
 
 RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
 
@@ -57,13 +24,9 @@ RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/s
 # Create the necessary directory for sshd
 RUN mkdir /var/run/sshd
 
-COPY --from=build /local-llm-server /local-llm-server
-COPY --from=build /venv /venv
-COPY --from=build /app /app
-COPY --from=build /jupyterlab /jupyterlab
-
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 COPY start-vllm.sh /app/start-vllm.sh
+COPY init-container.sh /app/init.sh
 COPY start-container.sh /app/start.sh
 
 RUN mkdir -p /var/log/app/
diff --git a/other/vllm/Docker/Dockerfile.base b/other/vllm/Docker/Dockerfile.base
new file mode 100644
index 0000000..a4cee9a
--- /dev/null
+++ b/other/vllm/Docker/Dockerfile.base
@@ -0,0 +1,39 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
+
+RUN apt-get update && \
+    apt-get install -y git python3-pip python3-venv wget unzip && \
+    rm -rf /var/lib/apt/lists/*
+RUN pip3 install --upgrade pip setuptools wheel
+
+RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
+
+RUN python3 -m venv /jupyterlab
+RUN /jupyterlab/bin/pip install jupyterlab
+RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
+
+RUN mkdir -p /app
+RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
+RUN unzip -j /tmp/rathole.zip -d /tmp
+RUN rm /tmp/rathole.zip
+RUN cp /tmp/rathole /app
+
+RUN python3 -m venv /venv
+RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
+
+# Install PyTorch before installing VLLM in an attempt to ensure we use the right
+# version for our CUDA install. (VLLM wants 2.0.1)
+RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
+
+WORKDIR /local-llm-server
+
+# We don't need to rebuild VLLM every time we build the container. But if we need
+# to, uncomment the following line.
+# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
+RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
+
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base
+
+COPY --from=build /local-llm-server /local-llm-server
+COPY --from=build /venv /venv
+COPY --from=build /app /app
+COPY --from=build /jupyterlab /jupyterlab
diff --git a/other/vllm/Docker/init-container.sh b/other/vllm/Docker/init-container.sh
new file mode 100644
index 0000000..9719c57
--- /dev/null
+++ b/other/vllm/Docker/init-container.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Create the required directories and files.
+echo "SETTING UP FILE SYSTEM..."
+mkdir -p /storage/vllm/
+chown -R apiserver:apiserver /storage/vllm
+touch /storage/vllm/cmd.txt
+touch /storage/vllm/rathole-client.toml
+
+# The user can store SSH auth and authorized_keys to streamline SSH login.
+if [ -f /storage/vllm/ssh ]; then
+  cp -r /storage/vllm/ssh /root/.ssh
+  echo "Copied ssh from /storage"
+fi
+
+# If the user has not created the VLLM commandline arg file, create the default.
+if [ ! -f /storage/vllm/cmd.txt ]; then
+  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
+fi
+
+# Copy the idling notebook to storage. This will create a blank notebook every
+# time the container is started.
+cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
+
+# cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh
diff --git a/other/vllm/Docker/start-container.sh b/other/vllm/Docker/start-container.sh
index eeae2da..401f9f6 100644
--- a/other/vllm/Docker/start-container.sh
+++ b/other/vllm/Docker/start-container.sh
@@ -1,30 +1,4 @@
 #!/bin/bash
 
-# Create the required directories and files.
-echo "SETTING UP FILE SYSTEM..."
-mkdir -p /storage/vllm/
-chown -R apiserver:apiserver /storage/vllm
-touch /storage/vllm/cmd.txt
-touch /storage/vllm/rathole-client.toml
-
-# The user can store SSH auth and authorized_keys to streamline SSH login.
-if [ -f /storage/vllm/ssh ]; then
-  cp -r /storage/vllm/ssh /root/.ssh
-  echo "Copied ssh from /storage"
-fi
-
-# If the user has not created the VLLM commandline arg file, create the default.
-if [ ! -f /storage/vllm/cmd.txt ]; then
-  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
-fi
-
-# Copy the idling notebook to storage. This will create a blank notebook every
-# time the container is started.
-cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
-
-# cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh
-
-echo "LAUNCHING SERVICES..."
-
 # Start the services and launch the container.
 /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
diff --git a/other/vllm/Docker/supervisord.conf b/other/vllm/Docker/supervisord.conf
index acdcb07..4dd7bc7 100644
--- a/other/vllm/Docker/supervisord.conf
+++ b/other/vllm/Docker/supervisord.conf
@@ -1,6 +1,12 @@
 [supervisord]
 nodaemon=true
 
+[program:startup]
+command=/app/init.sh
+autostart=true
+autorestart=false
+startsecs=0
+
 [program:vllm]
 command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log'
 autostart=true