feat: add tokenizer-config-path to launcher args (#1495)

This PR adds the `tokenizer-config-path` to the launcher and passes it
to the router

Fixes:
https://github.com/huggingface/text-generation-inference/pull/1427
This commit is contained in:
drbh 2024-01-26 12:01:33 -05:00 committed by GitHub
parent 650fea1834
commit d9758851be
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 0 deletions

View File

@ -354,6 +354,14 @@ Options:
[env: NGROK_EDGE=] [env: NGROK_EDGE=]
```
## TOKENIZER_CONFIG_PATH
```shell
--tokenizer-config-path <TOKENIZER_CONFIG_PATH>
The path to the tokenizer config file. This path is used to load the tokenizer configuration which may include a `chat_template`. If not provided, the default config will be used from the model hub
[env: TOKENIZER_CONFIG_PATH=]
``` ```
## ENV ## ENV
```shell ```shell

View File

@ -368,6 +368,11 @@ struct Args {
#[clap(long, env)] #[clap(long, env)]
ngrok_edge: Option<String>, ngrok_edge: Option<String>,
/// The path to the tokenizer config file. This path is used to load the tokenizer configuration which may
/// include a `chat_template`. If not provided, the default config will be used from the model hub.
#[clap(long, env)]
tokenizer_config_path: Option<String>,
/// Display a lot of information about your runtime environment /// Display a lot of information about your runtime environment
#[clap(long, short, action)] #[clap(long, short, action)]
env: bool, env: bool,
@ -1016,6 +1021,12 @@ fn spawn_webserver(
args.model_id, args.model_id,
]; ];
// Tokenizer config path
if let Some(ref tokenizer_config_path) = args.tokenizer_config_path {
router_args.push("--tokenizer-config-path".to_string());
router_args.push(tokenizer_config_path.to_string());
}
// Model optional max batch total tokens // Model optional max batch total tokens
if let Some(max_batch_total_tokens) = args.max_batch_total_tokens { if let Some(max_batch_total_tokens) = args.max_batch_total_tokens {
router_args.push("--max-batch-total-tokens".to_string()); router_args.push("--max-batch-total-tokens".to_string());