Fix FP8 KV-cache condition (#2611)

Update kv_cache.py
This commit is contained in:
Florian Zimmermeister 2024-10-07 09:34:19 +02:00 committed by GitHub
parent 2358c2bb54
commit 0da4df4b96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 2 additions and 2 deletions

View File

@ -26,8 +26,8 @@ class KVCache:
if ( if (
dtype == torch.float8_e5m2 dtype == torch.float8_e5m2
and ATTENTION != "flashinfer" and (ATTENTION != "flashinfer"
and SYSTEM != "cuda" or SYSTEM != "cuda")
): ):
raise ValueError( raise ValueError(
"float8_e5m2 KV cache is currently only supported for flashinfer on CUDA" "float8_e5m2 KV cache is currently only supported for flashinfer on CUDA"