diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 8304c8d1..6c752de8 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -15,6 +15,10 @@ jobs: build-and-push-image: runs-on: ubuntu-latest steps: + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v2.0.0 + with: + install: true - name: Tailscale uses: tailscale/github-action@v1 with: @@ -65,5 +69,5 @@ jobs: platforms: 'linux/amd64' tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=ghcr.io/huggingface/text-generation-inference:latest - cache-to: type=inline \ No newline at end of file + cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max + cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max \ No newline at end of file diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 8b659fe2..27caff11 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -3,6 +3,7 @@ name: Server Tests on: pull_request: paths: + - ".github/workflows/tests.yaml" - "server/**" - "proto/**" - "router/**" @@ -11,6 +12,12 @@ on: jobs: run_tests: runs-on: ubuntu-20.04 + + env: + SCCACHE_GHA_ENABLED: "on" + RUSTC_WRAPPER: /usr/local/bin/sccache + SCCACHE: 0.3.3 + steps: - uses: actions/checkout@v2 - name: Set up Python @@ -25,12 +32,28 @@ jobs: components: rustfmt, clippy - name: Install Protoc uses: arduino/setup-protoc@v1 - - name: Loading cache. - uses: actions/cache@v2 - id: model_cache + - name: Install sccache + run: | + curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache + chmod +x /usr/local/bin/sccache + - name: configure sccache + uses: actions/github-script@v6 with: - path: ~/.cache/huggingface/ - key: models + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + core.exportVariable('SCCACHE_GHA_CACHE_TO', 'sccache-${{runner.os}}-${{github.ref_name}}'); + core.exportVariable('SCCACHE_GHA_CACHE_FROM', 'sccache-${{runner.os}}-main,sccache-${{runner.os}}-'); + - name: cargo registry cache + uses: actions/cache@v3 + with: + key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}-${{ github.sha }} + restore-keys: | + cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + cargo-${{ runner.os }}- + path: | + ~/.cargo/registry + ~/.cargo/git - name: Install run: | make install @@ -41,3 +64,6 @@ jobs: - name: Run Rust tests run: | cargo test + - name: sccache stats + run: | + /usr/local/bin/sccache --show-stats diff --git a/Dockerfile b/Dockerfile index 228909dd..5fbf8985 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,15 @@ -FROM rust:1.67 as router-builder +FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef +WORKDIR /usr/src + +FROM chef as planner +COPY Cargo.toml Cargo.toml +COPY rust-toolchain.toml rust-toolchain.toml +COPY proto proto +COPY router router +COPY launcher launcher +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ @@ -6,26 +17,15 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ rm -f $PROTOC_ZIP -WORKDIR /usr/src +COPY --from=planner /usr/src/recipe.json recipe.json +RUN cargo chef cook --release --recipe-path recipe.json +COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto COPY router router - -WORKDIR /usr/src/router - -RUN cargo install --path . - -FROM rust:1.67 as launcher-builder - -WORKDIR /usr/src - -COPY rust-toolchain.toml rust-toolchain.toml COPY launcher launcher - -WORKDIR /usr/src/launcher - -RUN cargo install --path . +RUN cargo build --release FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 @@ -69,9 +69,9 @@ RUN cd server && \ /opt/miniconda/envs/text-generation/bin/pip install ".[bnb]" --no-cache-dir # Install router -COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/local/bin/text-generation-router +COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router # Install launcher -COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher +COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher ENTRYPOINT ["text-generation-launcher"] CMD ["--json-output"] \ No newline at end of file