diff --git a/.github/workflows/build_trtllm.yaml b/.github/workflows/build_trtllm.yaml index 265510b0..a350cd1e 100644 --- a/.github/workflows/build_trtllm.yaml +++ b/.github/workflows/build_trtllm.yaml @@ -43,19 +43,29 @@ jobs: aws-region: us-east-1 role-to-assume: ${{ secrets.AWS_ROLE_GITHUB_TGI_TEST }} - - name: "Install OS dependencies" - run: apt update && apt install -y cmake gcc g++ ninja-build openmpi-devel - - name: "Install sccache" - run: cargo install sccache --locked + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + install: true + buildkitd-config: /tmp/buildkitd.toml + + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v4 + with: + context: . + file: Dockerfile_trtllm + target: runtime + push: false + platforms: 'linux/amd64' + build-args: | + BUILD_TYPE=debug + SCCACHE_BUCKET: ${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }} + SCCACHE_REGION: "us-east-1" + SCCACHE_S3_USE_SSL: false + SCCACHE_S3_KEY_PREFIX: "tgi+trtllm" + RUSTC_WRAPPER: sccache - - name: "Build TensorRT-LLM Backend" - env: - SCCACHE_BUCKET: ${{ secrets.AWS_S3_BUCKET_GITHUB_TGI_TEST }} - SCCACHE_REGION: "us-east-1" - SCCACHE_S3_USE_SSL: false - SCCACHE_S3_KEY_PREFIX: "tgi+trtllm" - RUSTC_WRAPPER: sccache - run: cargo build --package text-generation-backends-trtllm --bin text-generation-backends-trtllm diff --git a/Dockerfile_trtllm b/Dockerfile_trtllm index b4523ea5..769aa927 100644 --- a/Dockerfile_trtllm +++ b/Dockerfile_trtllm @@ -1,13 +1,6 @@ ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real" ARG OMPI_VERSION="4.1.7rc1" - -# Build dependencies resolver stage -FROM lukemathwalker/cargo-chef:latest AS chef -WORKDIR /usr/src/text-generation-inference/backends/trtllm - -FROM chef AS planner -COPY . . -RUN cargo chef prepare --recipe-path recipe.json +ARG BUILD_TYPE=release # CUDA dependent dependencies resolver stage FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS cuda-builder @@ -62,18 +55,14 @@ FROM cuda-builder AS tgi-builder WORKDIR /usr/src/text-generation-inference # Install Rust +ENV PATH="/root/.cargo/bin:$PATH" RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y && \ chmod -R a+w /root/.rustup && \ - chmod -R a+w /root/.cargo - -ENV PATH="/root/.cargo/bin:$PATH" -RUN cargo install cargo-chef - -# Cache dependencies -COPY --from=planner /usr/src/text-generation-inference/backends/trtllm/recipe.json . -RUN cargo chef cook --release --recipe-path recipe.json + chmod -R a+w /root/.cargo && \ + cargo install sccache --locked # Build actual TGI +ARG BUILD_TYPE ARG CUDA_ARCH_LIST ENV CMAKE_PREFIX_PATH="/usr/local/mpi:/usr/local/tensorrt:$CMAKE_PREFIX_PATH" ENV LD_LIBRARY_PATH="/usr/local/mpi/lib:$LD_LIBRARY_PATH" @@ -83,8 +72,7 @@ COPY . . COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \ - cd backends/trtllm && \ - CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release + CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX sccache build --${BUILD_TYPE} --package text-generation-backends-trtllm --bin text-generation-backends-trtllm FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 AS runtime RUN apt update && apt install -y libucx0 pipx python3-minimal python3-dev python3-pip python3-venv && \