docker: split into base and runtime
This commit is contained in:
parent
ac5cf18784
commit
d9d3e03f59
|
@ -1,37 +1,4 @@
|
||||||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
|
FROM cyberes/paperspace-vllm-base as runtime
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git python3-pip python3-venv wget unzip && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
RUN pip3 install --upgrade pip setuptools wheel
|
|
||||||
|
|
||||||
RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
|
|
||||||
|
|
||||||
RUN python3 -m venv /jupyterlab
|
|
||||||
RUN /jupyterlab/bin/pip install jupyterlab
|
|
||||||
RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
|
|
||||||
|
|
||||||
RUN mkdir -p /app
|
|
||||||
RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
|
|
||||||
RUN unzip -j /tmp/rathole.zip -d /tmp
|
|
||||||
RUN rm /tmp/rathole.zip
|
|
||||||
RUN cp /tmp/rathole /app
|
|
||||||
|
|
||||||
RUN python3 -m venv /venv
|
|
||||||
RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
|
|
||||||
|
|
||||||
# Install PyTorch before installing VLLM in an attempt to ensure we use the right
|
|
||||||
# version for our CUDA install. (VLLM wants 2.0.1)
|
|
||||||
RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
|
|
||||||
|
|
||||||
WORKDIR /local-llm-server
|
|
||||||
|
|
||||||
# We don't need to rebuild VLLM every time we build the container. But if we need
|
|
||||||
# to, uncomment the following line.
|
|
||||||
# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
|
|
||||||
RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
|
|
||||||
|
|
||||||
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
@ -57,13 +24,9 @@ RUN sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/s
|
||||||
# Create the necessary directory for sshd
|
# Create the necessary directory for sshd
|
||||||
RUN mkdir /var/run/sshd
|
RUN mkdir /var/run/sshd
|
||||||
|
|
||||||
COPY --from=build /local-llm-server /local-llm-server
|
|
||||||
COPY --from=build /venv /venv
|
|
||||||
COPY --from=build /app /app
|
|
||||||
COPY --from=build /jupyterlab /jupyterlab
|
|
||||||
|
|
||||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
COPY start-vllm.sh /app/start-vllm.sh
|
COPY start-vllm.sh /app/start-vllm.sh
|
||||||
|
COPY init-container.sh /app/init.sh
|
||||||
COPY start-container.sh /app/start.sh
|
COPY start-container.sh /app/start.sh
|
||||||
|
|
||||||
RUN mkdir -p /var/log/app/
|
RUN mkdir -p /var/log/app/
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git python3-pip python3-venv wget unzip && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
RUN pip3 install --upgrade pip setuptools wheel
|
||||||
|
|
||||||
|
RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
|
||||||
|
|
||||||
|
RUN python3 -m venv /jupyterlab
|
||||||
|
RUN /jupyterlab/bin/pip install jupyterlab
|
||||||
|
RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
|
||||||
|
|
||||||
|
RUN mkdir -p /app
|
||||||
|
RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
|
||||||
|
RUN unzip -j /tmp/rathole.zip -d /tmp
|
||||||
|
RUN rm /tmp/rathole.zip
|
||||||
|
RUN cp /tmp/rathole /app
|
||||||
|
|
||||||
|
RUN python3 -m venv /venv
|
||||||
|
RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
|
||||||
|
|
||||||
|
# Install PyTorch before installing VLLM in an attempt to ensure we use the right
|
||||||
|
# version for our CUDA install. (VLLM wants 2.0.1)
|
||||||
|
RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
|
||||||
|
|
||||||
|
WORKDIR /local-llm-server
|
||||||
|
|
||||||
|
# We don't need to rebuild VLLM every time we build the container. But if we need
|
||||||
|
# to, uncomment the following line.
|
||||||
|
# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
|
||||||
|
RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
|
||||||
|
|
||||||
|
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base
|
||||||
|
|
||||||
|
COPY --from=build /local-llm-server /local-llm-server
|
||||||
|
COPY --from=build /venv /venv
|
||||||
|
COPY --from=build /app /app
|
||||||
|
COPY --from=build /jupyterlab /jupyterlab
|
|
@ -0,0 +1,25 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Create the required directories and files.
|
||||||
|
echo "SETTING UP FILE SYSTEM..."
|
||||||
|
mkdir -p /storage/vllm/
|
||||||
|
chown -R apiserver:apiserver /storage/vllm
|
||||||
|
touch /storage/vllm/cmd.txt
|
||||||
|
touch /storage/vllm/rathole-client.toml
|
||||||
|
|
||||||
|
# The user can store SSH auth and authorized_keys to streamline SSH login.
|
||||||
|
if [ -f /storage/vllm/ssh ]; then
|
||||||
|
cp -r /storage/vllm/ssh /root/.ssh
|
||||||
|
echo "Copied ssh from /storage"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If the user has not created the VLLM commandline arg file, create the default.
|
||||||
|
if [ ! -f /storage/vllm/cmd.txt ]; then
|
||||||
|
echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy the idling notebook to storage. This will create a blank notebook every
|
||||||
|
# time the container is started.
|
||||||
|
cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
|
||||||
|
|
||||||
|
# cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh
|
|
@ -1,30 +1,4 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Create the required directories and files.
|
|
||||||
echo "SETTING UP FILE SYSTEM..."
|
|
||||||
mkdir -p /storage/vllm/
|
|
||||||
chown -R apiserver:apiserver /storage/vllm
|
|
||||||
touch /storage/vllm/cmd.txt
|
|
||||||
touch /storage/vllm/rathole-client.toml
|
|
||||||
|
|
||||||
# The user can store SSH auth and authorized_keys to streamline SSH login.
|
|
||||||
if [ -f /storage/vllm/ssh ]; then
|
|
||||||
cp -r /storage/vllm/ssh /root/.ssh
|
|
||||||
echo "Copied ssh from /storage"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If the user has not created the VLLM commandline arg file, create the default.
|
|
||||||
if [ ! -f /storage/vllm/cmd.txt ]; then
|
|
||||||
echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Copy the idling notebook to storage. This will create a blank notebook every
|
|
||||||
# time the container is started.
|
|
||||||
cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
|
|
||||||
|
|
||||||
# cp /local-llm-server/other/vllm/Docker/update-container.sh /notebooks/update-container.sh
|
|
||||||
|
|
||||||
echo "LAUNCHING SERVICES..."
|
|
||||||
|
|
||||||
# Start the services and launch the container.
|
# Start the services and launch the container.
|
||||||
/usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
/usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
|
@ -1,6 +1,12 @@
|
||||||
[supervisord]
|
[supervisord]
|
||||||
nodaemon=true
|
nodaemon=true
|
||||||
|
|
||||||
|
[program:startup]
|
||||||
|
command=/app/init.sh
|
||||||
|
autostart=true
|
||||||
|
autorestart=false
|
||||||
|
startsecs=0
|
||||||
|
|
||||||
[program:vllm]
|
[program:vllm]
|
||||||
command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log'
|
command=/bin/bash -c 'bash /app/start-vllm.sh 2>&1 | tee -a /var/log/app/vllm.log'
|
||||||
autostart=true
|
autostart=true
|
||||||
|
|
Reference in New Issue