# This container builds and assembles the Python parts of the Docker container.
# It is used as the base for the resulting container, which avoids having to re-push
# the large PyTorch parts every time the application is rebuilt.

FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build

RUN apt-get update && \
    apt-get install -y git python3-pip python3-venv wget unzip && \
    rm -rf /var/lib/apt/lists/*
RUN pip3 install --upgrade pip setuptools wheel

RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server

RUN python3 -m venv /jupyterlab
RUN /jupyterlab/bin/pip install jupyterlab
RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"

RUN mkdir -p /app
RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
RUN unzip -j /tmp/rathole.zip -d /tmp
RUN rm /tmp/rathole.zip
RUN cp /tmp/rathole /app

RUN python3 -m venv /venv
RUN /venv/bin/pip3 install --upgrade pip setuptools wheel

# Install PyTorch before installing VLLM in an attempt to ensure we use the right
# version for our CUDA install. (VLLM wants 2.0.1)
RUN /venv/bin/pip3 install torch==2.0.1 --index-url https://download.pytorch.org/whl/cu118

WORKDIR /local-llm-server

# We don't need to rebuild VLLM every time we build the container. But if we need
# to, uncomment the following line.
# ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache
RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm

FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base

COPY --from=build /local-llm-server /local-llm-server
COPY --from=build /venv /venv
COPY --from=build /app /app
COPY --from=build /jupyterlab /jupyterlab