add vllm dockerfile

2023-09-26 14:48:34 -06:00 · 2023-09-26 14:48:34 -06:00 · e3c57d874a
parent e0af2ea9c5
commit e3c57d874a
3 changed files with 73 additions and 0 deletions
--- a/other/vllm/Docker/DOCKER.md
+++ b/other/vllm/Docker/DOCKER.md
@ -0,0 +1 @@
+`docker run --shm-size 14g --gpus all -v /storage/models/awq/MythoMax-L2-13B-AWQ:/models/MythoMax-L2-13B-AWQ -e ENV_API_SERVER_ARGS="--model /models/MythoMax-L2-13B-AWQ --quantization awq --host 0.0.0.0 --port 7000 --max-num-batched-tokens 8192 --gpu-memory-utilization 1" -d cyberes_vllm_cloud`
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@ -0,0 +1,45 @@
+FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
+
+RUN apt-get update && apt-get install -y git python3-pip python3-venv wget unzip && rm -rf /var/lib/apt/lists/*
+
+RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
+
+WORKDIR /local-llm-server
+
+RUN python3 -m venv /venv
+RUN /venv/bin/pip install -r requirements.txt
+RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
+
+ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
+
+RUN git fetch; git reset --hard origin/master
+
+RUN mkdir -p /app
+COPY ./rathole-client.toml /app/client.toml
+RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
+RUN unzip -j /tmp/rathole.zip -d /tmp
+RUN rm /tmp/rathole.zip
+RUN cp /tmp/rathole /app
+
+FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as runtime
+
+RUN apt-get update && apt-get install -y supervisor && rm -rf /var/lib/apt/lists/*
+
+COPY --from=build /local-llm-server /local-llm-server
+COPY --from=build /venv /venv
+COPY --from=build /app /app
+RUN cp /local-llm-server/other/vllm/Docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
+
+RUN useradd -ms /bin/bash apiserver
+RUN chown -R apiserver:apiserver /local-llm-server
+
+RUN apt-get update && apt-get install -y python3 python3-pip wget aria2
+RUN pip3 install --upgrade pip setuptools wheel
+RUN pip install jupyterlab
+
+EXPOSE 7000
+EXPOSE 8888 # jupyter
+CMD ["bash", "-c", "source /etc/bash.bashrc && /usr/bin/supervisord"]
+
+# To test your config, uncomment this and comment out the other CMD.
+#CMD ["/app/rathole", "-c", "/app/client.toml"]
--- a/other/vllm/Docker/supervisord.conf
+++ b/other/vllm/Docker/supervisord.conf
@ -0,0 +1,27 @@
+[supervisord]
+nodaemon=true
+
+[program:api_server]
+command=/venv/bin/python /local-llm-server/other/vllm/vllm_api_server.py --host 0.0.0.0 --port 7000 %(ENV_API_SERVER_ARGS)s
+autostart=true
+autorestart=true
+stdout_logfile=/dev/fd/1
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/fd/2
+stderr_logfile_maxbytes=0
+user=apiserver
+environment=HOME="/home/apiserver",USER="apiserver"
+
+[program:proxy]
+command=/app/rathole -c /app/client.toml
+autostart=true
+autorestart=true
+stdout_logfile=/dev/fd/1
+stdout_logfile_maxbytes=0
+stderr_logfile=/dev/fd/2
+stderr_logfile_maxbytes=0
+user=apiserver
+environment=HOME="/home/apiserver",USER="apiserver"
+
+[program:jupyter]
+command=bash -c "source /etc/bash.bashrcPIP_DISABLE_PIP_VERSION_CHECK=1 jupyter lab --allow-root --ip=0.0.0.0 --no-browser --ServerApp.trust_xheaders=True --ServerApp.disable_check_xsrf=False --ServerApp.allow_remote_access=True --ServerApp.allow_origin='*' --ServerApp.allow_credentials=True"
				`@ -0,0 +1 @@`
				`docker run --shm-size 14g --gpus all -v /storage/models/awq/MythoMax-L2-13B-AWQ:/models/MythoMax-L2-13B-AWQ -e ENV_API_SERVER_ARGS="--model /models/MythoMax-L2-13B-AWQ --quantization awq --host 0.0.0.0 --port 7000 --max-num-batched-tokens 8192 --gpu-memory-utilization 1" -d cyberes_vllm_cloud`