fix: only run test when cuda is available

2024-10-09 18:43:53 +00:00 · 2024-10-09 18:43:53 +00:00 · 1ddde382bd
parent a8108bc0da
commit 1ddde382bd
1 changed files with 12 additions and 7 deletions
--- a/server/tests/utils/test_kv_cache.py
+++ b/server/tests/utils/test_kv_cache.py
@ -1,14 +1,12 @@
 import torch
 import pytest
 from text_generation_server.models.globals import ATTENTION, BLOCK_SIZE
-from text_generation_server.layers.attention import KVCache
 from text_generation_server.utils.import_utils import SYSTEM

-def test_kvcache_memory():
-    if SYSTEM == "cuda":
-        kvcache_memory()
-    else:
-        pytest.skip("Test only runs on CUDA")
+# only include this import when CUDA is available
+if SYSTEM == "cuda":
+    from text_generation_server.layers.attention import KVCache
+

 def kvcache_memory():
    num_blocks = 8188
@ -34,11 +32,18 @@ def kvcache_memory():
    available_memory_after_kv_cache = torch.cuda.memory_allocated(device)
    kv_cache_memory = available_memory_after_kv_cache - current_memory
    kv_cache_memory_mb = kv_cache_memory / 1024 / 1024
-    
+
    print(f"KV Cache memory: {kv_cache_memory}")
    assert kv_cache_memory_mb > 1023
    assert kv_cache_memory_mb < 1025


+# only include this test when CUDA is available
+if SYSTEM == "cuda":
+
+    def test_kvcache_memory():
+        kvcache_memory()
+
+
 if __name__ == "__main__":
    test_kvcache_memory()