From b5fadc4c284bb6e598961d06c23b19ca2a61b1b7 Mon Sep 17 00:00:00 2001
From: "Yang, Bo" <pop.atry@gmail.com>
Date: Wed, 2 Aug 2023 09:51:54 -0700
Subject: [PATCH] Don't enable custom kernels if CUDA is not available (#6)

---
 .../models/custom_modeling/bloom_modeling.py                    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/text_generation_server/models/custom_modeling/bloom_modeling.py b/server/text_generation_server/models/custom_modeling/bloom_modeling.py
index 047a187..297d5c6 100644
--- a/server/text_generation_server/models/custom_modeling/bloom_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/bloom_modeling.py
@@ -40,7 +40,7 @@ from text_generation_server.utils.layers import (
 )
 
 CUSTOM_KERNELS_ENABLED = False
-if not os.environ.get("DISABLE_CUSTOM_KERNELS", "False") == "True":
+if torch.cuda.is_available() and not os.environ.get("DISABLE_CUSTOM_KERNELS", "False") == "True":
     try:
         from custom_kernels import fused_bloom_attention_cuda