update launcher
This commit is contained in:
parent
15b351b4a9
commit
5e38d3534c
|
@ -236,10 +236,10 @@ struct Args {
|
||||||
#[clap(long, env, value_enum)]
|
#[clap(long, env, value_enum)]
|
||||||
dtype: Option<Dtype>,
|
dtype: Option<Dtype>,
|
||||||
|
|
||||||
// Specify the data type for KV cache. By default, it uses the model's data type.
|
/// Specify the data type for KV cache. By default, it uses the model's data type.
|
||||||
// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3)'.
|
/// CUDA 11.8+ supports `fp8(fp8_e4m3)` and 'fp8_e5m2', while ROCm (AMD GPU) supports `fp8(fp8_e4m3fnuz)'.
|
||||||
// If 'fp8_e4m3' is chosen, a model checkpoint with scales for the KV cache should be provided.
|
/// If 'fp8' is chosen, a model checkpoint with scales for the KV cache should be provided.
|
||||||
// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
|
/// If not provided, the KV cache scaling factors default to 1.0, which may impact accuracy."
|
||||||
#[clap(long, env, value_enum)]
|
#[clap(long, env, value_enum)]
|
||||||
kv_cache_dtype: Option<KvDtype>,
|
kv_cache_dtype: Option<KvDtype>,
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue