From afc7ded84fbd50106847f84b68fdae6f6f17158c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 30 Sep 2024 08:48:47 +0200 Subject: [PATCH] Remove compute capability lazy cell (#2580) Remove compute capability lock We are only calling the `get_cuda_capability` function once, so avoiding the cost of multiple calls is not really necessary yet. --- launcher/src/gpu.rs | 7 +------ launcher/src/main.rs | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/launcher/src/gpu.rs b/launcher/src/gpu.rs index 755d246a..b565220e 100644 --- a/launcher/src/gpu.rs +++ b/launcher/src/gpu.rs @@ -1,9 +1,4 @@ -use std::sync::LazyLock; - -pub static COMPUTE_CAPABILITY: LazyLock> = - LazyLock::new(get_cuda_capability); - -fn get_cuda_capability() -> Option<(usize, usize)> { +pub fn get_cuda_capability() -> Option<(usize, usize)> { use pyo3::prelude::*; let py_get_capability = |py: Python| -> PyResult<(isize, isize)> { diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 583220a6..474a72d3 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -66,7 +66,7 @@ fn get_config( } fn resolve_attention(config: &Option, lora_adapters: &Option) -> (String, String) { - let compute_capability = *gpu::COMPUTE_CAPABILITY; + let compute_capability = gpu::get_cuda_capability(); let mut prefix_caching: Option = std::env::var("USE_PREFIX_CACHING").ok(); let mut attention: Option = std::env::var("ATTENTION").ok(); if let Some(config) = config {