From 1ddde382bd327ead7e7055637c472cf0d89e8d2b Mon Sep 17 00:00:00 2001
From: David Holtz <david.richard.holtz@gmail.com>
Date: Wed, 9 Oct 2024 18:43:53 +0000
Subject: [PATCH] fix: only run test when cuda is available

---
 server/tests/utils/test_kv_cache.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/server/tests/utils/test_kv_cache.py b/server/tests/utils/test_kv_cache.py
index 8335089e..d8256eca 100644
--- a/server/tests/utils/test_kv_cache.py
+++ b/server/tests/utils/test_kv_cache.py
@@ -1,14 +1,12 @@
 import torch
 import pytest
 from text_generation_server.models.globals import ATTENTION, BLOCK_SIZE
-from text_generation_server.layers.attention import KVCache
 from text_generation_server.utils.import_utils import SYSTEM
 
-def test_kvcache_memory():
-    if SYSTEM == "cuda":
-        kvcache_memory()
-    else:
-        pytest.skip("Test only runs on CUDA")
+# only include this import when CUDA is available
+if SYSTEM == "cuda":
+    from text_generation_server.layers.attention import KVCache
+
 
 def kvcache_memory():
     num_blocks = 8188
@@ -34,11 +32,18 @@ def kvcache_memory():
     available_memory_after_kv_cache = torch.cuda.memory_allocated(device)
     kv_cache_memory = available_memory_after_kv_cache - current_memory
     kv_cache_memory_mb = kv_cache_memory / 1024 / 1024
-    
+
     print(f"KV Cache memory: {kv_cache_memory}")
     assert kv_cache_memory_mb > 1023
     assert kv_cache_memory_mb < 1025
 
 
+# only include this test when CUDA is available
+if SYSTEM == "cuda":
+
+    def test_kvcache_memory():
+        kvcache_memory()
+
+
 if __name__ == "__main__":
     test_kvcache_memory()