docker: ???

2023-10-15 12:37:45 -06:00 · 2023-10-15 12:37:45 -06:00 · 21ff182ec1
parent f849e17944
commit 21ff182ec1
3 changed files with 22 additions and 5 deletions
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@ -18,7 +18,10 @@ RUN rm /tmp/rathole.zip
 RUN cp /tmp/rathole /app
 RUN python3 -m venv /venv
-RUN /venv/bin/pip install torch --index-url https://download.pytorch.org/whl/cu118
+
 # Install PyTorch before installing VLLM in an attempt to ensure we use the right
 # version for our CUDA install. (VLLM wants 2.0.1)
 RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
 WORKDIR /local-llm-server
@ -27,6 +30,12 @@ WORKDIR /local-llm-server
 # ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
 RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
 # Uninstall PyTorch since VLLM may have installed a version that is incompatible with
 # our CUDA version.
 RUN /venv/bin/pip3 uninstall -y torch
 RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118
 FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
 RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
--- a/other/vllm/Docker/README.md
+++ b/other/vllm/Docker/README.md
@ -31,4 +31,5 @@ You **must** have a GPU attached to your system when building the container (req
 1. Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) and CUDA 11.8.
 2. `sudo docker build .`
-   If you want to build the latest VLLM, add `--no-cache`.
+   If you want to build the latest VLLM, add `--no-cache`
   Don't forget about `--progress=plain`
--- a/other/vllm/Docker/start-container.sh
+++ b/other/vllm/Docker/start-container.sh
@ -1,28 +1,35 @@
 #!/bin/bash
 echo "LAUNCHING CONTAINER..."
 # Update the container repository and make sure pip dependancies are up to date.
 echo "UPDATING CONTAINER..."
 cd /local-llm-server || exit
 git fetch
 git reset --hard origin/master
 /venv/bin/pip install -r requirements.txt
 # Create the required directories and files.
 echo "SETTING UP FILE SYSTEM..."
 mkdir -p /storage/vllm/
 chown -R apiserver:apiserver /storage/vllm
 touch /storage/vllm/cmd.txt
 touch /storage/vllm/rathole-client.toml
 # The user can store SSH auth and authorized_keys to streamline SSH login.
 if [ -f /storage/vllm/ssh ]; then
  cp -r /storage/vllm/ssh /root/.ssh
  echo "Copied ssh from /storage"
 fi
 # If the user has not created the VLLM commandline arg file, create the default.
 if [ ! -f /storage/vllm/cmd.txt ]; then
  echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt
 fi
 # Copy the idling notebook to storage. This will create a blank notebook every
 # time the container is started.
 cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb
-# Start the services
+echo "LAUNCHING SERVICES..."
 # Start the services and launch the container.
 /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf