From db1114955a967eda0481b246e83c70a9fc44bddd Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 2 Dec 2024 07:00:03 +0100 Subject: [PATCH] chunking by default. --- server/text_generation_server/models/globals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index dfae8ed2..8d988ad5 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -12,7 +12,7 @@ PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in { "1", "true", } -PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "0").lower() in {"1", "true"} +PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "1").lower() in {"1", "true"} log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}") _expected = {"paged", "flashdecoding", "flashinfer"} assert (