feat: allow null eos and bos tokens in config (#1791)

This PR resolves an issue loading in tokenizer_configs where the eos or bos token is null as in: [Qwen/Qwen1.5-72B-Chat](https://huggingface.co/Qwen/Qwen1.5-72B-Chat/blob/main/tokenizer_config.json) resolves: https://github.com/huggingface/text-generation-inference/issues/1545 and related to https://github.com/QwenLM/Qwen1.5/issues/162
2024-04-23 10:26:54 -04:00 · 2024-04-23 10:26:54 -04:00 · 9be1db3101
parent 455cada527
commit 9be1db3101
2 changed files with 5 additions and 1 deletions
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -116,6 +116,7 @@ mod token_serde {
                    ))
                }
            }
+            Value::Null => Ok(None),
            _ => Err(de::Error::custom("invalid token format")),
        }
    }
--- a/router/src/main.rs
+++ b/router/src/main.rs
@ -254,7 +254,10 @@ async fn main() -> Result<(), RouterError> {

    // Load tokenizer config if found locally, or check if we can get it from the API if needed
    let tokenizer_config = if let Some(path) = tokenizer_config_path {
-        tracing::info!("Using local tokenizer config from user specified path");
+        tracing::info!(
+            "Using local tokenizer config from user specified path {}",
+            path
+        );
        HubTokenizerConfig::from_file(&std::path::PathBuf::from(path))
    } else if local_model {
        tracing::info!("Using local tokenizer config");