local-llm-server/other/vllm/Docker/Dockerfile.base

# This container builds and assembles the Python parts of the Docker container.
# It is used as the base for the resulting container, which avoids having to re-push
# the large PyTorch parts every time the application is rebuilt.

FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build

RUN apt-get update && \
    apt-get install -y git python3-pip python3-venv wget unzip && \
    rm -rf /var/lib/apt/lists/*
RUN pip install --upgrade pip setuptools wheel

RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server

RUN python3 -m venv /jupyterlab
RUN /jupyterlab/bin/pip install jupyterlab
RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"

RUN mkdir -p /app
RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip
RUN unzip -j /tmp/rathole.zip -d /tmp
RUN rm /tmp/rathole.zip
RUN cp /tmp/rathole /app

RUN python3 -m venv /venv
RUN /venv/bin/pip3 install --upgrade pip setuptools wheel

# Install PyTorch before installing VLLM to ensure we use the right version for our CUDA install.
RUN wget -q -O - https://raw.githubusercontent.com/vllm-project/vllm/main/requirements.txt | grep -E 'torch*' > /tmp/torch_version
RUN /venv/bin/pip3 install "$(cat /tmp/torch_version)" --index-url https://download.pytorch.org/whl/cu118

# WORKDIR /local-llm-server

# Don't build VLLM because we don't do that on the inference server. Just install from pip.
# RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm

RUN /venv/bin/pip install vllm

FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base

COPY --from=build /local-llm-server /local-llm-server
COPY --from=build /venv /venv
COPY --from=build /app /app
COPY --from=build /jupyterlab /jupyterlab
docker: git clone 2023-10-15 15:43:37 -06:00			`# This container builds and assembles the Python parts of the Docker container.`
			`# It is used as the base for the resulting container, which avoids having to re-push`
			`# the large PyTorch parts every time the application is rebuilt.`

docker: split into base and runtime 2023-10-15 15:38:48 -06:00			`FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as build`

			`RUN apt-get update && \`
			`apt-get install -y git python3-pip python3-venv wget unzip && \`
			`rm -rf /var/lib/apt/lists/*`
docked: adjust 2023-10-15 16:09:48 -06:00			`RUN pip install --upgrade pip setuptools wheel`
docker: split into base and runtime 2023-10-15 15:38:48 -06:00
			`RUN git clone https://git.evulid.cc/cyberes/local-llm-server.git /local-llm-server`

			`RUN python3 -m venv /jupyterlab`
docker: revert 2023-10-15 16:05:14 -06:00			`RUN /jupyterlab/bin/pip install jupyterlab`
docker: split into base and runtime 2023-10-15 15:38:48 -06:00			`RUN /jupyterlab/bin/jupyter labextension disable "@jupyterlab/apputils-extension:announcements"`

			`RUN mkdir -p /app`
			`RUN wget https://github.com/rapiz1/rathole/releases/download/v0.4.8/rathole-x86_64-unknown-linux-gnu.zip -O /tmp/rathole.zip`
			`RUN unzip -j /tmp/rathole.zip -d /tmp`
			`RUN rm /tmp/rathole.zip`
			`RUN cp /tmp/rathole /app`

			`RUN python3 -m venv /venv`
			`RUN /venv/bin/pip3 install --upgrade pip setuptools wheel`

pull torch version from github 2023-10-18 09:35:51 -06:00			`# Install PyTorch before installing VLLM to ensure we use the right version for our CUDA install.`
			`RUN wget -q -O - https://raw.githubusercontent.com/vllm-project/vllm/main/requirements.txt \| grep -E 'torch*' > /tmp/torch_version`
			`RUN /venv/bin/pip3 install "$(cat /tmp/torch_version)" --index-url https://download.pytorch.org/whl/cu118`
docker: split into base and runtime 2023-10-15 15:38:48 -06:00
docked: adjust 2023-10-15 16:09:48 -06:00			`# WORKDIR /local-llm-server`
docker: split into base and runtime 2023-10-15 15:38:48 -06:00
Update 'other/vllm/Docker/Dockerfile.base' 2023-10-17 12:45:31 -06:00			`# Don't build VLLM because we don't do that on the inference server. Just install from pip.`
			`# RUN /venv/bin/pip install git+https://github.com/vllm-project/vllm`

fix base vllm dockerfile 2023-10-18 09:24:39 -06:00			`RUN /venv/bin/pip install vllm`
docker: split into base and runtime 2023-10-15 15:38:48 -06:00
			`FROM nvidia/cuda:11.8.0-base-ubuntu22.04 as base`

			`COPY --from=build /local-llm-server /local-llm-server`
			`COPY --from=build /venv /venv`
			`COPY --from=build /app /app`
			`COPY --from=build /jupyterlab /jupyterlab`