From 155f9c98e2a272d255d42f0da8d2e88cc1eaa18a Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 12 Aug 2024 10:58:40 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20validate=20template=20variables=20befor?= =?UTF-8?q?e=20apply=20and=20improve=20sliding=20wi=E2=80=A6=20(#2403)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: validate template variables before apply and improve sliding window check * fix: improve missing template var test --- router/src/infer/chat_template.rs | 61 +++++++++++++++++-- router/src/infer/mod.rs | 3 + router/src/server.rs | 1 + router/src/validation.rs | 2 +- .../text_generation_server/models/__init__.py | 18 +++--- 5 files changed, 70 insertions(+), 15 deletions(-) diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs index 7c2753ed..ef4beee2 100644 --- a/router/src/infer/chat_template.rs +++ b/router/src/infer/chat_template.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use crate::infer::InferError; use crate::{ ChatTemplateInputs, GrammarType, Message, MessageChunk, TextMessage, TokenizerConfigToken, @@ -16,6 +18,7 @@ pub(crate) struct ChatTemplate { bos_token: Option, eos_token: Option, use_default_tool_template: bool, + variables: HashSet, } impl ChatTemplate { @@ -30,19 +33,22 @@ impl ChatTemplate { let template_str = template.into_boxed_str(); env.add_function("raise_exception", raise_exception); - // check if contains the tools variable within the template - let use_default_tool_template = - !template_str.as_ref().replace(' ', "").contains("{{tools}}"); // leaking env and template_str as read-only, static resources for performance. let template = Box::leak(env) .template_from_str(Box::leak(template_str)) .unwrap(); + // get the list of variables that are used in the template + let variables = template.undeclared_variables(true); + // check if the `tools` variable is used in the template + let use_default_tool_template = !variables.contains("tools"); + Self { template, bos_token: bos_token.map(|token| token.as_str().to_string()), eos_token: eos_token.map(|token| token.as_str().to_string()), use_default_tool_template, + variables, } } @@ -64,6 +70,11 @@ impl ChatTemplate { let messages: Vec = messages.into_iter().map(|c| c.into()).collect(); + // check if guideline is expected but not provided + if self.variables.contains("guideline") && guideline.is_none() { + return Err(InferError::MissingTemplateVariable("guideline".to_string())); + } + self.template .render(ChatTemplateInputs { guideline, @@ -82,7 +93,8 @@ impl ChatTemplate { #[cfg(test)] mod tests { use crate::infer::chat_template::raise_exception; - use crate::{ChatTemplateInputs, TextMessage}; + use crate::infer::ChatTemplate; + use crate::{ChatTemplateInputs, Message, MessageContent, TextMessage, TokenizerConfigToken}; use minijinja::Environment; #[test] @@ -770,4 +782,45 @@ mod tests { assert_eq!(result, target); } } + + #[test] + fn test_chat_template_invalid_with_guideline() { + let ct = ChatTemplate::new( + "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Human Question: \" + messages[-1].content }}\n {{- \"\\n\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Human Question: \" + messages[-2].content }}\n {{- \"\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Chatbot Response: \" + messages[-1].content }}\n {{- \"\\n\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the Chatbot Response violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n".to_string(), + Some(TokenizerConfigToken::String("".to_string())), + Some(TokenizerConfigToken::String("".to_string())), + ); + + // convert TextMessage to Message + let msgs: Vec = vec![ + Message { + name: None, + role: "user".to_string(), + content: MessageContent::SingleText( + "I'd like to show off how chat templating works!".to_string(), + ), + }, + Message { + name: None, + role: "assistant".to_string(), + content: MessageContent::SingleText( + "I'm doing great. How can I help you today?".to_string(), + ), + }, + Message { + name: None, + role: "user".to_string(), + content: MessageContent::SingleText("Hello, how are you?".to_string()), + }, + ]; + + let result = ct.apply(None, msgs, None); + + match result { + Ok(_) => panic!("Should have failed since no guideline is provided"), + Err(e) => { + assert_eq!(e.to_string(), "Missing template vatiable: guideline") + } + } + } } diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs index 58d5cf9a..c9354d9a 100644 --- a/router/src/infer/mod.rs +++ b/router/src/infer/mod.rs @@ -337,6 +337,8 @@ pub enum InferError { IncompleteGeneration, #[error("Template error: {0}")] TemplateError(#[from] minijinja::Error), + #[error("Missing template vatiable: {0}")] + MissingTemplateVariable(String), #[error("Tool error: {0}")] ToolError(String), } @@ -349,6 +351,7 @@ impl InferError { InferError::ValidationError(_) => "validation", InferError::IncompleteGeneration => "incomplete_generation", InferError::TemplateError(_) => "template_error", + InferError::MissingTemplateVariable(_) => "missing_template_variable", InferError::ToolError(_) => "tool_error", } } diff --git a/router/src/server.rs b/router/src/server.rs index 8c0bd762..99ec077f 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -2297,6 +2297,7 @@ impl From for (StatusCode, Json) { InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY, InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR, InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY, + InferError::MissingTemplateVariable(_) => StatusCode::UNPROCESSABLE_ENTITY, InferError::ToolError(_) => StatusCode::UNPROCESSABLE_ENTITY, }; diff --git a/router/src/validation.rs b/router/src/validation.rs index 5011158a..0024723c 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -830,7 +830,7 @@ mod tests { .await { // Err(ValidationError::MaxNewTokens(1, 10)) => (), - Ok((_s, 0, 10)) => (), + Ok((_s, _, 0, 10)) => (), r => panic!("Unexpected not max new tokens: {r:?}"), } } diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 960b426b..4fa9e66d 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -497,17 +497,15 @@ def get_model( else -1 ) - if max_input_tokens is not None and max_input_tokens <= sliding_window: - sliding_window = -1 + should_use_sliding_window = ( + sliding_window is not None and sliding_window != -1 and SUPPORTS_WINDOWING + ) - if ( - (sliding_window is not None and sliding_window != -1) - and not SUPPORTS_WINDOWING - and max_input_tokens > sliding_window - ): - raise ValueError( - f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})." - ) + if should_use_sliding_window: + if max_input_tokens is not None and max_input_tokens > sliding_window: + raise ValueError( + f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})." + ) if model_type == DEEPSEEK_V2: if FLASH_ATTENTION: