44 lines
1.7 KiB
Docker
44 lines
1.7 KiB
Docker
# This container builds and assembles the Python parts of the Docker container.
|
|
# It is used as the base for the resulting container, which avoids having to re-push
|
|
# the large PyTorch parts every time the application is rebuilt.
|
|
|
|
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build
|
|
|
|
RUN apt-get update && \
|
|
apt-get install -y git python3-pip python3-venv wget unzip && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
RUN pip install --upgrade pip setuptools wheel
|
|
|
|
RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server
|
|
|
|
RUN python3 -m venv /jupyterlab
|
|
RUN /jupyterlab/bin/pip install jupyterlab
|
|
RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"
|
|
|
|
RUN mkdir -p /app
|
|
RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
|
|
RUN unzip -j /tmp/rathole.zip -d /tmp
|
|
RUN rm /tmp/rathole.zip
|
|
RUN cp /tmp/rathole /app
|
|
|
|
RUN python3 -m venv /venv
|
|
RUN /venv/bin/pip3 install --upgrade pip setuptools wheel
|
|
|
|
# Install PyTorch before installing VLLM to ensure we use the right version for our CUDA install.
|
|
RUN wget -q -O - https://raw.githubusercontent.com/vllm-project/vllm/main/requirements.txt | grep -E 'torch*' > /tmp/torch_version
|
|
RUN /venv/bin/pip3 install "$(cat /tmp/torch_version)" --index-url https://download.pytorch.org/whl/cu118
|
|
|
|
# WORKDIR /local-llm-server
|
|
|
|
# Don't build VLLM because we don't do that on the inference server. Just install from pip.
|
|
# RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm
|
|
|
|
RUN /venv/bin/pip install vllm
|
|
|
|
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base
|
|
|
|
COPY --from=build /local-llm-server /local-llm-server
|
|
COPY --from=build /venv /venv
|
|
COPY --from=build /app /app
|
|
COPY --from=build /jupyterlab /jupyterlab
|