From 5e38d3534c50486bed1c8e275d57fb630340b3bf Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Tue, 25 Jun 2024 15:45:04 +0000
Subject: [PATCH] update launcher

---
 launcher/src/main.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index ef346cd6..af5ab066 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -236,10 +236,10 @@ struct Args {
     #[clap(long, env, value_enum)]
     dtype: Option<Dtype>,
 
-    // Specify the data type for KV cache. By default, it uses the model's data type.
-    // CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3)'.
-    // If 'fp8_e4m3' is chosen, a model checkpoint with scales for the KV cache should be provided.
-    // If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
+    /// Specify the data type for KV cache. By default, it uses the model's data type.
+    /// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3fnuz)'.
+    /// If 'fp8' is chosen, a model checkpoint with scales for the KV cache should be provided.
+    /// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
     #[clap(long, env, value_enum)]
     kv_cache_dtype: Option<KvDtype>,