feat: allow null eos and bos tokens in config (#1791)

This PR resolves an issue loading in tokenizer_configs where the eos or
bos token is null as in:
[Qwen/Qwen1.5-72B-Chat](https://huggingface.co/Qwen/Qwen1.5-72B-Chat/blob/main/tokenizer_config.json)

resolves:
https://github.com/huggingface/text-generation-inference/issues/1545 and
related to https://github.com/QwenLM/Qwen1.5/issues/162
This commit is contained in:
drbh 2024-04-23 10:26:54 -04:00 committed by GitHub
parent 455cada527
commit 9be1db3101
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 1 deletions

View File

@ -116,6 +116,7 @@ mod token_serde {
))
}
}
Value::Null => Ok(None),
_ => Err(de::Error::custom("invalid token format")),
}
}

View File

@ -254,7 +254,10 @@ async fn main() -> Result<(), RouterError> {
// Load tokenizer config if found locally, or check if we can get it from the API if needed
let tokenizer_config = if let Some(path) = tokenizer_config_path {
tracing::info!("Using local tokenizer config from user specified path");
tracing::info!(
"Using local tokenizer config from user specified path {}",
path
);
HubTokenizerConfig::from_file(&std::path::PathBuf::from(path))
} else if local_model {
tracing::info!("Using local tokenizer config");