From afc7ded84fbd50106847f84b68fdae6f6f17158c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 30 Sep 2024 08:48:47 +0200
Subject: [PATCH] Remove compute capability lazy cell (#2580)

Remove compute capability lock

We are only calling the `get_cuda_capability` function once, so avoiding
the cost of multiple calls is not really necessary yet.
---
 launcher/src/gpu.rs  | 7 +------
 launcher/src/main.rs | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/launcher/src/gpu.rs b/launcher/src/gpu.rs
index 755d246a..b565220e 100644
--- a/launcher/src/gpu.rs
+++ b/launcher/src/gpu.rs
@@ -1,9 +1,4 @@
-use std::sync::LazyLock;
-
-pub static COMPUTE_CAPABILITY: LazyLock<Option<(usize, usize)>> =
-    LazyLock::new(get_cuda_capability);
-
-fn get_cuda_capability() -> Option<(usize, usize)> {
+pub fn get_cuda_capability() -> Option<(usize, usize)> {
     use pyo3::prelude::*;
 
     let py_get_capability = |py: Python| -> PyResult<(isize, isize)> {
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 583220a6..474a72d3 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -66,7 +66,7 @@ fn get_config(
 }
 
 fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) -> (String, String) {
-    let compute_capability = *gpu::COMPUTE_CAPABILITY;
+    let compute_capability = gpu::get_cuda_capability();
     let mut prefix_caching: Option<String> = std::env::var("USE_PREFIX_CACHING").ok();
     let mut attention: Option<String> = std::env::var("ATTENTION").ok();
     if let Some(config) = config {