diff --git a/Dockerfile b/Dockerfile index 80e5b681..daeb9309 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -32,6 +32,7 @@ RUN cargo chef cook --profile release-opt --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL +COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto @@ -39,7 +40,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt +RUN cargo build --profile release-opt --frozen # Python builder # Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile diff --git a/Dockerfile_amd b/Dockerfile_amd index 0b059f8c..4bb6407a 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -31,6 +31,7 @@ RUN cargo chef cook --profile release-opt --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL +COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto @@ -38,7 +39,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt +RUN cargo build --profile release-opt --frozen # Text Generation Inference base image for RoCm FROM rocm/dev-ubuntu-22.04:6.2 AS base diff --git a/Dockerfile_intel b/Dockerfile_intel index 7ab6bba1..b1f7b79c 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -1,6 +1,6 @@ ARG PLATFORM=xpu -FROM lukemathwalker/cargo-chef:latest-rust-1.80 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -32,6 +32,7 @@ RUN cargo chef cook --profile release-opt --recipe-path recipe.json ARG GIT_SHA ARG DOCKER_LABEL +COPY Cargo.lock Cargo.lock COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto @@ -39,7 +40,7 @@ COPY benchmark benchmark COPY router router COPY backends backends COPY launcher launcher -RUN cargo build --profile release-opt +RUN cargo build --profile release-opt --frozen # Text Generation Inference base image for Intel @@ -52,7 +53,7 @@ ARG MAMBA_VERSION=23.1.0-1 ARG PYTHON_VERSION='3.11.10' # Automatically set by buildx ARG TARGETPLATFORM -ENV PATH /opt/conda/bin:$PATH +ENV PATH=/opt/conda/bin:$PATH # TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda. # Install mamba diff --git a/rust-toolchain.toml b/rust-toolchain.toml index f392b161..12d58532 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,5 +1,5 @@ [toolchain] # Released on: June 13, 2024 # https://releases.rs/docs/1.79.0/ -channel = "1.80.0" +channel = "1.80.1" components = ["rustfmt", "clippy"] diff --git a/server/text_generation_server/layers/attention/kv_cache.py b/server/text_generation_server/layers/attention/kv_cache.py index ced4b5b4..3960c954 100644 --- a/server/text_generation_server/layers/attention/kv_cache.py +++ b/server/text_generation_server/layers/attention/kv_cache.py @@ -24,10 +24,8 @@ class KVCache: ): """Construct the key-value cache for a layer.""" - if ( - dtype == torch.float8_e5m2 - and (ATTENTION != "flashinfer" - or SYSTEM != "cuda") + if dtype == torch.float8_e5m2 and ( + ATTENTION != "flashinfer" or SYSTEM != "cuda" ): raise ValueError( "float8_e5m2 KV cache is currently only supported for flashinfer on CUDA"