docker: ???
This commit is contained in:
parent
f849e17944
commit
21ff182ec1
|
@ -18,7 +18,10 @@ RUN rm /tmp/rathole.zip
|
||||||
RUN cp /tmp/rathole /app
|
RUN cp /tmp/rathole /app
|
||||||
|
|
||||||
RUN python3 -m venv /venv
|
RUN python3 -m venv /venv
|
||||||
RUN /venv/bin/pip install torch --index-url https://download.pytorch.org/whl/cu118
|
|
||||||
|
# Install PyTorch before installing VLLM in an attempt to ensure we use the right
|
||||||
|
# version for our CUDA install. (VLLM wants 2.0.1)
|
||||||
|
RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
|
||||||
|
|
||||||
WORKDIR /local-llm-server
|
WORKDIR /local-llm-server
|
||||||
|
|
||||||
|
@ -27,6 +30,12 @@ WORKDIR /local-llm-server
|
||||||
# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
|
# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
|
||||||
RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
|
RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
|
||||||
|
|
||||||
|
# Uninstall PyTorch since VLLM may have installed a version that is incompatible with
|
||||||
|
# our CUDA version.
|
||||||
|
RUN /venv/bin/pip3 uninstall -y torch
|
||||||
|
RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
|
||||||
|
|
||||||
|
|
||||||
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
|
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
|
@ -31,4 +31,5 @@ You **must** have a GPU attached to your system when building the container (req
|
||||||
|
|
||||||
1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8.
|
1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8.
|
||||||
2. `sudo docker build .`
|
2. `sudo docker build .`
|
||||||
If you want to build the latest VLLM, add `--no-cache`.
|
If you want to build the latest VLLM, add `--no-cache`
|
||||||
|
Don't forget about `--progress=plain`
|
||||||
|
|
|
@ -1,28 +1,35 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
echo "LAUNCHING CONTAINER..."
|
|
||||||
|
|
||||||
# Update the container repository and make sure pip dependancies are up to date.
|
# Update the container repository and make sure pip dependancies are up to date.
|
||||||
|
echo "UPDATING CONTAINER..."
|
||||||
cd /local-llm-server || exit
|
cd /local-llm-server || exit
|
||||||
git fetch
|
git fetch
|
||||||
git reset --hard origin/master
|
git reset --hard origin/master
|
||||||
/venv/bin/pip install -r requirements.txt
|
/venv/bin/pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Create the required directories and files.
|
||||||
|
echo "SETTING UP FILE SYSTEM..."
|
||||||
mkdir -p /storage/vllm/
|
mkdir -p /storage/vllm/
|
||||||
chown -R apiserver:apiserver /storage/vllm
|
chown -R apiserver:apiserver /storage/vllm
|
||||||
touch /storage/vllm/cmd.txt
|
touch /storage/vllm/cmd.txt
|
||||||
touch /storage/vllm/rathole-client.toml
|
touch /storage/vllm/rathole-client.toml
|
||||||
|
|
||||||
|
# The user can store SSH auth and authorized_keys to streamline SSH login.
|
||||||
if [ -f /storage/vllm/ssh ]; then
|
if [ -f /storage/vllm/ssh ]; then
|
||||||
cp -r /storage/vllm/ssh /root/.ssh
|
cp -r /storage/vllm/ssh /root/.ssh
|
||||||
echo "Copied ssh from /storage"
|
echo "Copied ssh from /storage"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# If the user has not created the VLLM commandline arg file, create the default.
|
||||||
if [ ! -f /storage/vllm/cmd.txt ]; then
|
if [ ! -f /storage/vllm/cmd.txt ]; then
|
||||||
echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
|
echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Copy the idling notebook to storage. This will create a blank notebook every
|
||||||
|
# time the container is started.
|
||||||
cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
|
cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
|
||||||
|
|
||||||
# Start the services
|
echo "LAUNCHING SERVICES..."
|
||||||
|
|
||||||
|
# Start the services and launch the container.
|
||||||
/usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
/usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
Reference in New Issue