From 1ddde382bd327ead7e7055637c472cf0d89e8d2b Mon Sep 17 00:00:00 2001 From: David Holtz Date: Wed, 9 Oct 2024 18:43:53 +0000 Subject: [PATCH] fix: only run test when cuda is available --- server/tests/utils/test_kv_cache.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/server/tests/utils/test_kv_cache.py b/server/tests/utils/test_kv_cache.py index 8335089e..d8256eca 100644 --- a/server/tests/utils/test_kv_cache.py +++ b/server/tests/utils/test_kv_cache.py @@ -1,14 +1,12 @@ import torch import pytest from text_generation_server.models.globals import ATTENTION, BLOCK_SIZE -from text_generation_server.layers.attention import KVCache from text_generation_server.utils.import_utils import SYSTEM -def test_kvcache_memory(): - if SYSTEM == "cuda": - kvcache_memory() - else: - pytest.skip("Test only runs on CUDA") +# only include this import when CUDA is available +if SYSTEM == "cuda": + from text_generation_server.layers.attention import KVCache + def kvcache_memory(): num_blocks = 8188 @@ -34,11 +32,18 @@ def kvcache_memory(): available_memory_after_kv_cache = torch.cuda.memory_allocated(device) kv_cache_memory = available_memory_after_kv_cache - current_memory kv_cache_memory_mb = kv_cache_memory / 1024 / 1024 - + print(f"KV Cache memory: {kv_cache_memory}") assert kv_cache_memory_mb > 1023 assert kv_cache_memory_mb < 1025 +# only include this test when CUDA is available +if SYSTEM == "cuda": + + def test_kvcache_memory(): + kvcache_memory() + + if __name__ == "__main__": test_kvcache_memory()