fix: only run test when cuda is available
This commit is contained in:
parent
a8108bc0da
commit
1ddde382bd
|
@ -1,14 +1,12 @@
|
||||||
import torch
|
import torch
|
||||||
import pytest
|
import pytest
|
||||||
from text_generation_server.models.globals import ATTENTION, BLOCK_SIZE
|
from text_generation_server.models.globals import ATTENTION, BLOCK_SIZE
|
||||||
from text_generation_server.layers.attention import KVCache
|
|
||||||
from text_generation_server.utils.import_utils import SYSTEM
|
from text_generation_server.utils.import_utils import SYSTEM
|
||||||
|
|
||||||
def test_kvcache_memory():
|
# only include this import when CUDA is available
|
||||||
if SYSTEM == "cuda":
|
if SYSTEM == "cuda":
|
||||||
kvcache_memory()
|
from text_generation_server.layers.attention import KVCache
|
||||||
else:
|
|
||||||
pytest.skip("Test only runs on CUDA")
|
|
||||||
|
|
||||||
def kvcache_memory():
|
def kvcache_memory():
|
||||||
num_blocks = 8188
|
num_blocks = 8188
|
||||||
|
@ -34,11 +32,18 @@ def kvcache_memory():
|
||||||
available_memory_after_kv_cache = torch.cuda.memory_allocated(device)
|
available_memory_after_kv_cache = torch.cuda.memory_allocated(device)
|
||||||
kv_cache_memory = available_memory_after_kv_cache - current_memory
|
kv_cache_memory = available_memory_after_kv_cache - current_memory
|
||||||
kv_cache_memory_mb = kv_cache_memory / 1024 / 1024
|
kv_cache_memory_mb = kv_cache_memory / 1024 / 1024
|
||||||
|
|
||||||
print(f"KV Cache memory: {kv_cache_memory}")
|
print(f"KV Cache memory: {kv_cache_memory}")
|
||||||
assert kv_cache_memory_mb > 1023
|
assert kv_cache_memory_mb > 1023
|
||||||
assert kv_cache_memory_mb < 1025
|
assert kv_cache_memory_mb < 1025
|
||||||
|
|
||||||
|
|
||||||
|
# only include this test when CUDA is available
|
||||||
|
if SYSTEM == "cuda":
|
||||||
|
|
||||||
|
def test_kvcache_memory():
|
||||||
|
kvcache_memory()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_kvcache_memory()
|
test_kvcache_memory()
|
||||||
|
|
Loading…
Reference in New Issue