From 5e38d3534c50486bed1c8e275d57fb630340b3bf Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 25 Jun 2024 15:45:04 +0000 Subject: [PATCH] update launcher --- launcher/src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index ef346cd6..af5ab066 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -236,10 +236,10 @@ struct Args { #[clap(long, env, value_enum)] dtype: Option, - // Specify the data type for KV cache. By default, it uses the model's data type. - // CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3)'. - // If 'fp8_e4m3' is chosen, a model checkpoint with scales for the KV cache should be provided. - // If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy." + /// Specify the data type for KV cache. By default, it uses the model's data type. + /// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3fnuz)'. + /// If 'fp8' is chosen, a model checkpoint with scales for the KV cache should be provided. + /// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy." #[clap(long, env, value_enum)] kv_cache_dtype: Option,