Making prefix/flashinfer the default and testing the full release tests.
This commit is contained in:
parent
310778e02a
commit
52c813527a
|
@ -38,7 +38,7 @@ impl BackendV3 {
|
|||
let prefix_caching = if let Ok(prefix_caching) = std::env::var("USE_PREFIX_CACHING") {
|
||||
matches!(prefix_caching.as_str(), "true" | "1")
|
||||
} else {
|
||||
false
|
||||
true
|
||||
};
|
||||
let attention = if let Ok(attention) = std::env::var("ATTENTION") {
|
||||
attention
|
||||
|
|
|
@ -5,7 +5,7 @@ from typing import Dict, Optional
|
|||
|
||||
from text_generation_server.utils.log import log_master
|
||||
|
||||
PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", "0").lower() in {"1", "true"}
|
||||
PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", "1").lower() in {"1", "true"}
|
||||
log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
|
||||
ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "paged")
|
||||
_expected = {"paged", "flashdecoding", "flashinfer"}
|
||||
|
|
Loading…
Reference in New Issue