docker: split into base and runtime

2023-10-15 15:38:48 -06:00 · 2023-10-15 15:38:48 -06:00 · d9d3e03f59
parent ac5cf18784
commit d9d3e03f59
5 changed files with 72 additions and 65 deletions
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@ -1,37 +1,4 @@
-FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
+FROM cyberes/paperspace-vllm-base as runtime
 RUN apt-get update && \
    apt-get install -y git python3-pip python3-venv wget unzip && \
    rm -rf /var/lib/apt/lists/*
 RUN pip3 install --upgrade pip setuptools wheel
 RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
 RUN python3 -m venv /jupyterlab
 RUN /jupyterlab/bin/pip install jupyterlab
 RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
 RUN mkdir -p /app
 RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
 RUN unzip -j /tmp/rathole.zip -d /tmp
 RUN rm /tmp/rathole.zip
 RUN cp /tmp/rathole /app
 RUN python3 -m venv /venv
 RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
 # Install PyTorch before installing VLLM in an attempt to ensure we use the right
 # version for our CUDA install. (VLLM wants 2.0.1)
 RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
 WORKDIR /local-llm-server
 # We don't need to rebuild VLLM every time we build the container. But if we need
 # to, uncomment the following line.
 # ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
 RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
 FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
 RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
@ -57,13 +24,9 @@ RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/s
 # Create the necessary directory for sshd
 RUN mkdir /var/run/sshd
 COPY --from=build /local-llm-server /local-llm-server
 COPY --from=build /venv /venv
 COPY --from=build /app /app
 COPY --from=build /jupyterlab /jupyterlab
 COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 COPY start-vllm.sh /app/start-vllm.sh
 COPY init-container.sh /app/init.sh
 COPY start-container.sh /app/start.sh
 RUN mkdir -p /var/log/app/
--- a/other/vllm/Docker/Dockerfile.base
+++ b/other/vllm/Docker/Dockerfile.base
@ -0,0 +1,39 @@
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
 RUN apt-get update && \
    apt-get install -y git python3-pip python3-venv wget unzip && \
    rm -rf /var/lib/apt/lists/*
 RUN pip3 install --upgrade pip setuptools wheel
 RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
 RUN python3 -m venv /jupyterlab
 RUN /jupyterlab/bin/pip install jupyterlab
 RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
 RUN mkdir -p /app
 RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
 RUN unzip -j /tmp/rathole.zip -d /tmp
 RUN rm /tmp/rathole.zip
 RUN cp /tmp/rathole /app
 RUN python3 -m venv /venv
 RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
 # Install PyTorch before installing VLLM in an attempt to ensure we use the right
 # version for our CUDA install. (VLLM wants 2.0.1)
 RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
 WORKDIR /local-llm-server
 # We don't need to rebuild VLLM every time we build the container. But if we need
 # to, uncomment the following line.
 # ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
 RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
 FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base
 COPY --from=build /local-llm-server /local-llm-server
 COPY --from=build /venv /venv
 COPY --from=build /app /app
 COPY --from=build /jupyterlab /jupyterlab
--- a/other/vllm/Docker/init-container.sh
+++ b/other/vllm/Docker/init-container.sh
@ -0,0 +1,25 @@
 #!/bin/bash
 # Create the required directories and files.
 echo "SETTING UP FILE SYSTEM..."
 mkdir -p /storage/vllm/
 chown -R apiserver:apiserver /storage/vllm
 touch /storage/vllm/cmd.txt
 touch /storage/vllm/rathole-client.toml
 # The user can store SSH auth and authorized_keys to streamline SSH login.
 if [ -f /storage/vllm/ssh ]; then
  cp -r /storage/vllm/ssh /root/.ssh
  echo "Copied ssh from /storage"
 fi
 # If the user has not created the VLLM commandline arg file, create the default.
 if [ ! -f /storage/vllm/cmd.txt ]; then
  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
 fi
 # Copy the idling notebook to storage. This will create a blank notebook every
 # time the container is started.
 cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
 # cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh
--- a/other/vllm/Docker/start-container.sh
+++ b/other/vllm/Docker/start-container.sh
@ -1,30 +1,4 @@
 #!/bin/bash
 # Create the required directories and files.
 echo "SETTING UP FILE SYSTEM..."
 mkdir -p /storage/vllm/
 chown -R apiserver:apiserver /storage/vllm
 touch /storage/vllm/cmd.txt
 touch /storage/vllm/rathole-client.toml
 # The user can store SSH auth and authorized_keys to streamline SSH login.
 if [ -f /storage/vllm/ssh ]; then
  cp -r /storage/vllm/ssh /root/.ssh
  echo "Copied ssh from /storage"
 fi
 # If the user has not created the VLLM commandline arg file, create the default.
 if [ ! -f /storage/vllm/cmd.txt ]; then
  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
 fi
 # Copy the idling notebook to storage. This will create a blank notebook every
 # time the container is started.
 cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
 # cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh
 echo "LAUNCHING SERVICES..."
 # Start the services and launch the container.
 /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
--- a/other/vllm/Docker/supervisord.conf
+++ b/other/vllm/Docker/supervisord.conf
@ -1,6 +1,12 @@
 [supervisord]
 nodaemon=true
 [program:startup]
 command=/app/init.sh
 autostart=true
 autorestart=false
 startsecs=0
 [program:vllm]
 command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log'
 autostart=true