From 8b295aa498408ab526ce36bb726b5eaafa5e1593 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 8 Oct 2024 09:42:50 +0200 Subject: [PATCH] Upgrade minor rust version (Fixes rust build compilation cache) (#2617) * Upgrade minor rust version (Fixes rust build compilation cache) * Black --- Dockerfile | 5 +++-- Dockerfile_amd | 5 +++-- Dockerfile_intel | 7 ++++--- rust-toolchain.toml | 2 +- server/text_generation_server/layers/attention/kv_cache.py | 6 ++---- 5 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 80e5b681..daeb9309 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -32,6 +32,7 @@ RUN cargo chef cook --profile release-opt --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL +COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto @@ -39,7 +40,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt +RUN cargo build --profile release-opt --frozen # Python builder # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile diff --git a/Dockerfile_amd b/Dockerfile_amd index 0b059f8c..4bb6407a 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -31,6 +31,7 @@ RUN cargo chef cook --profile release-opt --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL +COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto @@ -38,7 +39,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt +RUN cargo build --profile release-opt --frozen # Text Generation Inference base image for RoCm FROM rocm/dev-ubuntu-22.04:6.2 AS base diff --git a/Dockerfile_intel b/Dockerfile_intel index 7ab6bba1..b1f7b79c 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -1,6 +1,6 @@ ARG PLATFORM=xpu -FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -32,6 +32,7 @@ RUN cargo chef cook --profile release-opt --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL +COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto @@ -39,7 +40,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt +RUN cargo build --profile release-opt --frozen # Text Generation Inference base image for Intel @@ -52,7 +53,7 @@ ARG MAMBA_VERSION=23.1.0-1 ARG PYTHON_VERSION='3.11.10' # Automatically set by buildx ARG TARGETPLATFORM -ENV PATH /opt/conda/bin:$PATH +ENV PATH=/opt/conda/bin:$PATH # TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda. # Install mamba diff --git a/rust-toolchain.toml b/rust-toolchain.toml index f392b161..12d58532 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] # Released on: June 13, 2024 # https://releases.rs/docs/1.79.0/ -channel = "1.80.0" +channel = "1.80.1" components = ["rustfmt", "clippy"] diff --git a/server/text_generation_server/layers/attention/kv_cache.py b/server/text_generation_server/layers/attention/kv_cache.py index ced4b5b4..3960c954 100644 --- a/server/text_generation_server/layers/attention/kv_cache.py +++ b/server/text_generation_server/layers/attention/kv_cache.py @@ -24,10 +24,8 @@ class KVCache: ): """Construct the key-value cache for a layer.""" - if ( - dtype == torch.float8_e5m2 - and (ATTENTION != "flashinfer" - or SYSTEM != "cuda") + if dtype == torch.float8_e5m2 and ( + ATTENTION != "flashinfer" or SYSTEM != "cuda" ): raise ValueError( "float8_e5m2 KV cache is currently only supported for flashinfer on CUDA"