From d9d3e03f59bc6c7057214dc3e0baf52536db7ce9 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Sun, 15 Oct 2023 15:38:48 -0600 Subject: [PATCH] docker: split into base and runtime --- other/vllm/Docker/Dockerfile | 41 ++-------------------------- other/vllm/Docker/Dockerfile.base | 39 ++++++++++++++++++++++++++ other/vllm/Docker/init-container.sh | 25 +++++++++++++++++ other/vllm/Docker/start-container.sh | 26 ------------------ other/vllm/Docker/supervisord.conf | 6 ++++ 5 files changed, 72 insertions(+), 65 deletions(-) create mode 100644 other/vllm/Docker/Dockerfile.base create mode 100644 other/vllm/Docker/init-container.sh diff --git a/other/vllm/Docker/Dockerfile b/other/vllm/Docker/Dockerfile index 6e4b29a..2d16c95 100644 --- a/other/vllm/Docker/Dockerfile +++ b/other/vllm/Docker/Dockerfile @@ -1,37 +1,4 @@ -FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build - -RUN apt-get update && \ - apt-get install -y git python3-pip python3-venv wget unzip && \ - rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade pip setuptools wheel - -RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server - -RUN python3 -m venv /jupyterlab -RUN /jupyterlab/bin/pip install jupyterlab -RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements" - -RUN mkdir -p /app -RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip -RUN unzip -j /tmp/rathole.zip -d /tmp -RUN rm /tmp/rathole.zip -RUN cp /tmp/rathole /app - -RUN python3 -m venv /venv -RUN /venv/bin/pip3 install --upgrade pip setuptools wheel - -# Install PyTorch before installing VLLM in an attempt to ensure we use the right -# version for our CUDA install. (VLLM wants 2.0.1) -RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118 - -WORKDIR /local-llm-server - -# We don't need to rebuild VLLM every time we build the container. But if we need -# to, uncomment the following line. -# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache -RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm - -FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime +FROM cyberes/paperspace-vllm-base as runtime RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/* @@ -57,13 +24,9 @@ RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/s # Create the necessary directory for sshd RUN mkdir /var/run/sshd -COPY --from=build /local-llm-server /local-llm-server -COPY --from=build /venv /venv -COPY --from=build /app /app -COPY --from=build /jupyterlab /jupyterlab - COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY start-vllm.sh /app/start-vllm.sh +COPY init-container.sh /app/init.sh COPY start-container.sh /app/start.sh RUN mkdir -p /var/log/app/ diff --git a/other/vllm/Docker/Dockerfile.base b/other/vllm/Docker/Dockerfile.base new file mode 100644 index 0000000..a4cee9a --- /dev/null +++ b/other/vllm/Docker/Dockerfile.base @@ -0,0 +1,39 @@ +FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build + +RUN apt-get update && \ + apt-get install -y git python3-pip python3-venv wget unzip && \ + rm -rf /var/lib/apt/lists/* +RUN pip3 install --upgrade pip setuptools wheel + +RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server + +RUN python3 -m venv /jupyterlab +RUN /jupyterlab/bin/pip install jupyterlab +RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements" + +RUN mkdir -p /app +RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip +RUN unzip -j /tmp/rathole.zip -d /tmp +RUN rm /tmp/rathole.zip +RUN cp /tmp/rathole /app + +RUN python3 -m venv /venv +RUN /venv/bin/pip3 install --upgrade pip setuptools wheel + +# Install PyTorch before installing VLLM in an attempt to ensure we use the right +# version for our CUDA install. (VLLM wants 2.0.1) +RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118 + +WORKDIR /local-llm-server + +# We don't need to rebuild VLLM every time we build the container. But if we need +# to, uncomment the following line. +# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache +RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm + +FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base + +COPY --from=build /local-llm-server /local-llm-server +COPY --from=build /venv /venv +COPY --from=build /app /app +COPY --from=build /jupyterlab /jupyterlab diff --git a/other/vllm/Docker/init-container.sh b/other/vllm/Docker/init-container.sh new file mode 100644 index 0000000..9719c57 --- /dev/null +++ b/other/vllm/Docker/init-container.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Create the required directories and files. +echo "SETTING UP FILE SYSTEM..." +mkdir -p /storage/vllm/ +chown -R apiserver:apiserver /storage/vllm +touch /storage/vllm/cmd.txt +touch /storage/vllm/rathole-client.toml + +# The user can store SSH auth and authorized_keys to streamline SSH login. +if [ -f /storage/vllm/ssh ]; then + cp -r /storage/vllm/ssh /root/.ssh + echo "Copied ssh from /storage" +fi + +# If the user has not created the VLLM commandline arg file, create the default. +if [ ! -f /storage/vllm/cmd.txt ]; then + echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt +fi + +# Copy the idling notebook to storage. This will create a blank notebook every +# time the container is started. +cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb + +# cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh diff --git a/other/vllm/Docker/start-container.sh b/other/vllm/Docker/start-container.sh index eeae2da..401f9f6 100644 --- a/other/vllm/Docker/start-container.sh +++ b/other/vllm/Docker/start-container.sh @@ -1,30 +1,4 @@ #!/bin/bash -# Create the required directories and files. -echo "SETTING UP FILE SYSTEM..." -mkdir -p /storage/vllm/ -chown -R apiserver:apiserver /storage/vllm -touch /storage/vllm/cmd.txt -touch /storage/vllm/rathole-client.toml - -# The user can store SSH auth and authorized_keys to streamline SSH login. -if [ -f /storage/vllm/ssh ]; then - cp -r /storage/vllm/ssh /root/.ssh - echo "Copied ssh from /storage" -fi - -# If the user has not created the VLLM commandline arg file, create the default. -if [ ! -f /storage/vllm/cmd.txt ]; then - echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt -fi - -# Copy the idling notebook to storage. This will create a blank notebook every -# time the container is started. -cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb - -# cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh - -echo "LAUNCHING SERVICES..." - # Start the services and launch the container. /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf diff --git a/other/vllm/Docker/supervisord.conf b/other/vllm/Docker/supervisord.conf index acdcb07..4dd7bc7 100644 --- a/other/vllm/Docker/supervisord.conf +++ b/other/vllm/Docker/supervisord.conf @@ -1,6 +1,12 @@ [supervisord] nodaemon=true +[program:startup] +command=/app/init.sh +autostart=true +autorestart=false +startsecs=0 + [program:vllm] command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log' autostart=true