feat: validate template variables before apply and improve sliding wi… (#2403)
* feat: validate template variables before apply and improve sliding window check * fix: improve missing template var test
This commit is contained in:
parent
136bcc8128
commit
155f9c98e2
|
@ -1,3 +1,5 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use crate::infer::InferError;
|
use crate::infer::InferError;
|
||||||
use crate::{
|
use crate::{
|
||||||
ChatTemplateInputs, GrammarType, Message, MessageChunk, TextMessage, TokenizerConfigToken,
|
ChatTemplateInputs, GrammarType, Message, MessageChunk, TextMessage, TokenizerConfigToken,
|
||||||
|
@ -16,6 +18,7 @@ pub(crate) struct ChatTemplate {
|
||||||
bos_token: Option<String>,
|
bos_token: Option<String>,
|
||||||
eos_token: Option<String>,
|
eos_token: Option<String>,
|
||||||
use_default_tool_template: bool,
|
use_default_tool_template: bool,
|
||||||
|
variables: HashSet<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ChatTemplate {
|
impl ChatTemplate {
|
||||||
|
@ -30,19 +33,22 @@ impl ChatTemplate {
|
||||||
let template_str = template.into_boxed_str();
|
let template_str = template.into_boxed_str();
|
||||||
env.add_function("raise_exception", raise_exception);
|
env.add_function("raise_exception", raise_exception);
|
||||||
|
|
||||||
// check if contains the tools variable within the template
|
|
||||||
let use_default_tool_template =
|
|
||||||
!template_str.as_ref().replace(' ', "").contains("{{tools}}");
|
|
||||||
// leaking env and template_str as read-only, static resources for performance.
|
// leaking env and template_str as read-only, static resources for performance.
|
||||||
let template = Box::leak(env)
|
let template = Box::leak(env)
|
||||||
.template_from_str(Box::leak(template_str))
|
.template_from_str(Box::leak(template_str))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// get the list of variables that are used in the template
|
||||||
|
let variables = template.undeclared_variables(true);
|
||||||
|
// check if the `tools` variable is used in the template
|
||||||
|
let use_default_tool_template = !variables.contains("tools");
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
template,
|
template,
|
||||||
bos_token: bos_token.map(|token| token.as_str().to_string()),
|
bos_token: bos_token.map(|token| token.as_str().to_string()),
|
||||||
eos_token: eos_token.map(|token| token.as_str().to_string()),
|
eos_token: eos_token.map(|token| token.as_str().to_string()),
|
||||||
use_default_tool_template,
|
use_default_tool_template,
|
||||||
|
variables,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,6 +70,11 @@ impl ChatTemplate {
|
||||||
|
|
||||||
let messages: Vec<TextMessage> = messages.into_iter().map(|c| c.into()).collect();
|
let messages: Vec<TextMessage> = messages.into_iter().map(|c| c.into()).collect();
|
||||||
|
|
||||||
|
// check if guideline is expected but not provided
|
||||||
|
if self.variables.contains("guideline") && guideline.is_none() {
|
||||||
|
return Err(InferError::MissingTemplateVariable("guideline".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
self.template
|
self.template
|
||||||
.render(ChatTemplateInputs {
|
.render(ChatTemplateInputs {
|
||||||
guideline,
|
guideline,
|
||||||
|
@ -82,7 +93,8 @@ impl ChatTemplate {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::infer::chat_template::raise_exception;
|
use crate::infer::chat_template::raise_exception;
|
||||||
use crate::{ChatTemplateInputs, TextMessage};
|
use crate::infer::ChatTemplate;
|
||||||
|
use crate::{ChatTemplateInputs, Message, MessageContent, TextMessage, TokenizerConfigToken};
|
||||||
use minijinja::Environment;
|
use minijinja::Environment;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -770,4 +782,45 @@ mod tests {
|
||||||
assert_eq!(result, target);
|
assert_eq!(result, target);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_chat_template_invalid_with_guideline() {
|
||||||
|
let ct = ChatTemplate::new(
|
||||||
|
"{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Human Question: \" + messages[-1].content }}\n {{- \"\\n<end_of_turn>\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Human Question: \" + messages[-2].content }}\n {{- \"\\n<end_of_turn>\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Chatbot Response: \" + messages[-1].content }}\n {{- \"\\n<end_of_turn>\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the Chatbot Response violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n".to_string(),
|
||||||
|
Some(TokenizerConfigToken::String("<s>".to_string())),
|
||||||
|
Some(TokenizerConfigToken::String("</s>".to_string())),
|
||||||
|
);
|
||||||
|
|
||||||
|
// convert TextMessage to Message
|
||||||
|
let msgs: Vec<Message> = vec![
|
||||||
|
Message {
|
||||||
|
name: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: MessageContent::SingleText(
|
||||||
|
"I'd like to show off how chat templating works!".to_string(),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Message {
|
||||||
|
name: None,
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: MessageContent::SingleText(
|
||||||
|
"I'm doing great. How can I help you today?".to_string(),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Message {
|
||||||
|
name: None,
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: MessageContent::SingleText("Hello, how are you?".to_string()),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let result = ct.apply(None, msgs, None);
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(_) => panic!("Should have failed since no guideline is provided"),
|
||||||
|
Err(e) => {
|
||||||
|
assert_eq!(e.to_string(), "Missing template vatiable: guideline")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -337,6 +337,8 @@ pub enum InferError {
|
||||||
IncompleteGeneration,
|
IncompleteGeneration,
|
||||||
#[error("Template error: {0}")]
|
#[error("Template error: {0}")]
|
||||||
TemplateError(#[from] minijinja::Error),
|
TemplateError(#[from] minijinja::Error),
|
||||||
|
#[error("Missing template vatiable: {0}")]
|
||||||
|
MissingTemplateVariable(String),
|
||||||
#[error("Tool error: {0}")]
|
#[error("Tool error: {0}")]
|
||||||
ToolError(String),
|
ToolError(String),
|
||||||
}
|
}
|
||||||
|
@ -349,6 +351,7 @@ impl InferError {
|
||||||
InferError::ValidationError(_) => "validation",
|
InferError::ValidationError(_) => "validation",
|
||||||
InferError::IncompleteGeneration => "incomplete_generation",
|
InferError::IncompleteGeneration => "incomplete_generation",
|
||||||
InferError::TemplateError(_) => "template_error",
|
InferError::TemplateError(_) => "template_error",
|
||||||
|
InferError::MissingTemplateVariable(_) => "missing_template_variable",
|
||||||
InferError::ToolError(_) => "tool_error",
|
InferError::ToolError(_) => "tool_error",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2297,6 +2297,7 @@ impl From<InferError> for (StatusCode, Json<ErrorResponse>) {
|
||||||
InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
||||||
InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR,
|
InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR,
|
||||||
InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
||||||
|
InferError::MissingTemplateVariable(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
||||||
InferError::ToolError(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
InferError::ToolError(_) => StatusCode::UNPROCESSABLE_ENTITY,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -830,7 +830,7 @@ mod tests {
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
// Err(ValidationError::MaxNewTokens(1, 10)) => (),
|
// Err(ValidationError::MaxNewTokens(1, 10)) => (),
|
||||||
Ok((_s, 0, 10)) => (),
|
Ok((_s, _, 0, 10)) => (),
|
||||||
r => panic!("Unexpected not max new tokens: {r:?}"),
|
r => panic!("Unexpected not max new tokens: {r:?}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -497,17 +497,15 @@ def get_model(
|
||||||
else -1
|
else -1
|
||||||
)
|
)
|
||||||
|
|
||||||
if max_input_tokens is not None and max_input_tokens <= sliding_window:
|
should_use_sliding_window = (
|
||||||
sliding_window = -1
|
sliding_window is not None and sliding_window != -1 and SUPPORTS_WINDOWING
|
||||||
|
)
|
||||||
|
|
||||||
if (
|
if should_use_sliding_window:
|
||||||
(sliding_window is not None and sliding_window != -1)
|
if max_input_tokens is not None and max_input_tokens > sliding_window:
|
||||||
and not SUPPORTS_WINDOWING
|
raise ValueError(
|
||||||
and max_input_tokens > sliding_window
|
f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
|
||||||
):
|
)
|
||||||
raise ValueError(
|
|
||||||
f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
|
|
||||||
)
|
|
||||||
|
|
||||||
if model_type == DEEPSEEK_V2:
|
if model_type == DEEPSEEK_V2:
|
||||||
if FLASH_ATTENTION:
|
if FLASH_ATTENTION:
|
||||||
|
|
Loading…
Reference in New Issue