diff --git a/.github/workflows/ci_build.yaml b/.github/workflows/ci_build.yaml index d62297e4..5ca2854a 100644 --- a/.github/workflows/ci_build.yaml +++ b/.github/workflows/ci_build.yaml @@ -10,6 +10,7 @@ on: paths: - ".github/workflows/build.yaml" - "integration-tests/**" + - "backends/**" - "server/**" - "proto/**" - "router/**" diff --git a/backends/v3/src/backend.rs b/backends/v3/src/backend.rs index 49e2bc8f..d82355de 100644 --- a/backends/v3/src/backend.rs +++ b/backends/v3/src/backend.rs @@ -35,9 +35,16 @@ impl BackendV3 { window_size: Option, speculate: u32, ) -> Self { + let flashdecoding = if let Ok(flashdecoding) = std::env::var("FLASH_DECODING") { + matches!(flashdecoding.to_lowercase().as_str(), "1" | "true") + } else { + false + }; + let block_size = if flashdecoding { 256 } else { 16 }; + let queue = Queue::new( requires_padding, - 16, + block_size, window_size, speculate, max_batch_total_tokens,