diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index dfae8ed2..8d988ad5 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -12,7 +12,7 @@ PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in { "1", "true", } -PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "0").lower() in {"1", "true"} +PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "1").lower() in {"1", "true"} log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}") _expected = {"paged", "flashdecoding", "flashinfer"} assert (