update launcher

This commit is contained in:
Mohit Sharma 2024-06-25 15:45:04 +00:00
parent 15b351b4a9
commit 5e38d3534c
1 changed files with 4 additions and 4 deletions

View File

@ -236,10 +236,10 @@ struct Args {
#[clap(long, env, value_enum)]
dtype: Option<Dtype>,
// Specify the data type for KV cache. By default, it uses the model's data type.
// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3)'.
// If 'fp8_e4m3' is chosen, a model checkpoint with scales for the KV cache should be provided.
// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
/// Specify the data type for KV cache. By default, it uses the model's data type.
/// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3fnuz)'.
/// If 'fp8' is chosen, a model checkpoint with scales for the KV cache should be provided.
/// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
#[clap(long, env, value_enum)]
kv_cache_dtype: Option<KvDtype>,