update launcher
This commit is contained in:
parent
15b351b4a9
commit
5e38d3534c
|
@ -236,10 +236,10 @@ struct Args {
|
|||
#[clap(long, env, value_enum)]
|
||||
dtype: Option<Dtype>,
|
||||
|
||||
// Specify the data type for KV cache. By default, it uses the model's data type.
|
||||
// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3)'.
|
||||
// If 'fp8_e4m3' is chosen, a model checkpoint with scales for the KV cache should be provided.
|
||||
// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
|
||||
/// Specify the data type for KV cache. By default, it uses the model's data type.
|
||||
/// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3fnuz)'.
|
||||
/// If 'fp8' is chosen, a model checkpoint with scales for the KV cache should be provided.
|
||||
/// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
|
||||
#[clap(long, env, value_enum)]
|
||||
kv_cache_dtype: Option<KvDtype>,
|
||||
|
||||
|
|
Loading…
Reference in New Issue