From 155f9c98e2a272d255d42f0da8d2e88cc1eaa18a Mon Sep 17 00:00:00 2001
From: drbh <david.richard.holtz@gmail.com>
Date: Mon, 12 Aug 2024 10:58:40 -0400
Subject: [PATCH] =?UTF-8?q?feat:=20validate=20template=20variables=20befor?=
 =?UTF-8?q?e=20apply=20and=20improve=20sliding=20wi=E2=80=A6=20(#2403)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: validate template variables before apply and improve sliding window check

* fix: improve missing template var test
---
 router/src/infer/chat_template.rs             | 61 +++++++++++++++++--
 router/src/infer/mod.rs                       |  3 +
 router/src/server.rs                          |  1 +
 router/src/validation.rs                      |  2 +-
 .../text_generation_server/models/__init__.py | 18 +++---
 5 files changed, 70 insertions(+), 15 deletions(-)
diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs
index 7c2753ed..ef4beee2 100644
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@@ -1,3 +1,5 @@
+use std::collections::HashSet;
+
 use crate::infer::InferError;
 use crate::{
     ChatTemplateInputs, GrammarType, Message, MessageChunk, TextMessage, TokenizerConfigToken,
@@ -16,6 +18,7 @@ pub(crate) struct ChatTemplate {
     bos_token: Option<String>,
     eos_token: Option<String>,
     use_default_tool_template: bool,
+    variables: HashSet<String>,
 }
 
 impl ChatTemplate {
@@ -30,19 +33,22 @@ impl ChatTemplate {
         let template_str = template.into_boxed_str();
         env.add_function("raise_exception", raise_exception);
 
-        // check if contains the tools variable within the template
-        let use_default_tool_template =
-            !template_str.as_ref().replace(' ', "").contains("{{tools}}");
         // leaking env and template_str as read-only, static resources for performance.
         let template = Box::leak(env)
             .template_from_str(Box::leak(template_str))
             .unwrap();
 
+        // get the list of variables that are used in the template
+        let variables = template.undeclared_variables(true);
+        // check if the `tools` variable is used in the template
+        let use_default_tool_template = !variables.contains("tools");
+
         Self {
             template,
             bos_token: bos_token.map(|token| token.as_str().to_string()),
             eos_token: eos_token.map(|token| token.as_str().to_string()),
             use_default_tool_template,
+            variables,
         }
     }
 
@@ -64,6 +70,11 @@ impl ChatTemplate {
 
         let messages: Vec<TextMessage> = messages.into_iter().map(|c| c.into()).collect();
 
+        // check if guideline is expected but not provided
+        if self.variables.contains("guideline") && guideline.is_none() {
+            return Err(InferError::MissingTemplateVariable("guideline".to_string()));
+        }
+
         self.template
             .render(ChatTemplateInputs {
                 guideline,
@@ -82,7 +93,8 @@ impl ChatTemplate {
 #[cfg(test)]
 mod tests {
     use crate::infer::chat_template::raise_exception;
-    use crate::{ChatTemplateInputs, TextMessage};
+    use crate::infer::ChatTemplate;
+    use crate::{ChatTemplateInputs, Message, MessageContent, TextMessage, TokenizerConfigToken};
     use minijinja::Environment;
 
     #[test]
@@ -770,4 +782,45 @@ mod tests {
             assert_eq!(result, target);
         }
     }
+
+    #[test]
+    fn test_chat_template_invalid_with_guideline() {
+        let ct = ChatTemplate::new(
+            "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n    {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n    {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-2].content }}\n    {{- \"\\n<end_of_turn>\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Chatbot Response: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the Chatbot Response violate the above principle? Your answer must  start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n".to_string(),
+            Some(TokenizerConfigToken::String("<s>".to_string())),
+            Some(TokenizerConfigToken::String("</s>".to_string())),
+        );
+
+        // convert TextMessage to Message
+        let msgs: Vec<Message> = vec![
+            Message {
+                name: None,
+                role: "user".to_string(),
+                content: MessageContent::SingleText(
+                    "I'd like to show off how chat templating works!".to_string(),
+                ),
+            },
+            Message {
+                name: None,
+                role: "assistant".to_string(),
+                content: MessageContent::SingleText(
+                    "I'm doing great. How can I help you today?".to_string(),
+                ),
+            },
+            Message {
+                name: None,
+                role: "user".to_string(),
+                content: MessageContent::SingleText("Hello, how are you?".to_string()),
+            },
+        ];
+
+        let result = ct.apply(None, msgs, None);
+
+        match result {
+            Ok(_) => panic!("Should have failed since no guideline is provided"),
+            Err(e) => {
+                assert_eq!(e.to_string(), "Missing template vatiable: guideline")
+            }
+        }
+    }
 }
diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs
index 58d5cf9a..c9354d9a 100644
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@@ -337,6 +337,8 @@ pub enum InferError {
     IncompleteGeneration,
     #[error("Template error: {0}")]
     TemplateError(#[from] minijinja::Error),
+    #[error("Missing template vatiable: {0}")]
+    MissingTemplateVariable(String),
     #[error("Tool error: {0}")]
     ToolError(String),
 }
@@ -349,6 +351,7 @@ impl InferError {
             InferError::ValidationError(_) => "validation",
             InferError::IncompleteGeneration => "incomplete_generation",
             InferError::TemplateError(_) => "template_error",
+            InferError::MissingTemplateVariable(_) => "missing_template_variable",
             InferError::ToolError(_) => "tool_error",
         }
     }
diff --git a/router/src/server.rs b/router/src/server.rs
index 8c0bd762..99ec077f 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -2297,6 +2297,7 @@ impl From<InferError> for (StatusCode, Json<ErrorResponse>) {
             InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY,
             InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR,
             InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY,
+            InferError::MissingTemplateVariable(_) => StatusCode::UNPROCESSABLE_ENTITY,
             InferError::ToolError(_) => StatusCode::UNPROCESSABLE_ENTITY,
         };
 
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 5011158a..0024723c 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -830,7 +830,7 @@ mod tests {
             .await
         {
             // Err(ValidationError::MaxNewTokens(1, 10)) => (),
-            Ok((_s, 0, 10)) => (),
+            Ok((_s, _, 0, 10)) => (),
             r => panic!("Unexpected not max new tokens: {r:?}"),
         }
     }
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 960b426b..4fa9e66d 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -497,17 +497,15 @@ def get_model(
         else -1
     )
 
-    if max_input_tokens is not None and max_input_tokens <= sliding_window:
-        sliding_window = -1
+    should_use_sliding_window = (
+        sliding_window is not None and sliding_window != -1 and SUPPORTS_WINDOWING
+    )
 
-    if (
-        (sliding_window is not None and sliding_window != -1)
-        and not SUPPORTS_WINDOWING
-        and max_input_tokens > sliding_window
-    ):
-        raise ValueError(
-            f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
-        )
+    if should_use_sliding_window:
+        if max_input_tokens is not None and max_input_tokens > sliding_window:
+            raise ValueError(
+                f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
+            )
 
     if model_type == DEEPSEEK_V2:
         if FLASH_ATTENTION: