From 87004ae711e71977ac9404de60d3ab06eec4c959 Mon Sep 17 00:00:00 2001
From: Wauplin <lucainp@gmail.com>
Date: Wed, 20 Nov 2024 13:47:59 +0100
Subject: [PATCH] Remove guideline from API

---
 docs/openapi.json                 |  7 ----
 router/src/infer/chat_template.rs | 64 -------------------------------
 router/src/infer/mod.rs           |  3 +-
 router/src/lib.rs                 | 14 ++-----
 4 files changed, 4 insertions(+), 84 deletions(-)
diff --git a/docs/openapi.json b/docs/openapi.json
index f42f9390..08dc865f 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -994,13 +994,6 @@
             "example": "1.0",
             "nullable": true
           },
-          "guideline": {
-            "type": "string",
-            "description": "A guideline to be used in the chat_template",
-            "default": "null",
-            "example": "null",
-            "nullable": true
-          },
           "logit_bias": {
             "type": "array",
             "items": {
diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs
index 1071d0ba..1bad4a36 100644
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@@ -2,7 +2,6 @@ use crate::infer::InferError;
 use crate::{ChatTemplateInputs, Message, MessageChunk, TextMessage, TokenizerConfigToken, Tool};
 use minijinja::{Environment, ErrorKind, Template};
 use minijinja_contrib::pycompat;
-use std::collections::HashSet;
 
 /// Raise a exception (custom function) used in the chat templates
 pub(crate) fn raise_exception(err_text: String) -> Result<String, minijinja::Error> {
@@ -15,7 +14,6 @@ pub(crate) struct ChatTemplate {
     bos_token: Option<String>,
     eos_token: Option<String>,
     use_default_tool_template: bool,
-    variables: HashSet<String>,
 }
 
 impl ChatTemplate {
@@ -47,21 +45,14 @@ impl ChatTemplate {
             bos_token: bos_token.map(|token| token.as_str().to_string()),
             eos_token: eos_token.map(|token| token.as_str().to_string()),
             use_default_tool_template,
-            variables,
         }
     }
 
     pub(crate) fn apply(
         &self,
-        guideline: Option<&str>,
         mut messages: Vec<Message>,
         tools_and_prompt: Option<(Vec<Tool>, String)>,
     ) -> Result<String, InferError> {
-        // check if guideline is expected but not provided
-        if self.variables.contains("guideline") && guideline.is_none() {
-            return Err(InferError::MissingTemplateVariable("guideline".to_string()));
-        }
-
         let tools = match tools_and_prompt {
             Some((tools, tool_prompt)) => {
                 // check if the `tools` variable is used in the template
@@ -87,7 +78,6 @@ impl ChatTemplate {
 
         self.template
             .render(ChatTemplateInputs {
-                guideline,
                 messages,
                 bos_token: self.bos_token.as_deref(),
                 eos_token: self.eos_token.as_deref(),
@@ -756,19 +746,6 @@ mod tests {
                 },
                 target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
             },
-            ChatTemplateTestItem {
-                name: "google/shieldgemma-9b",
-                chat_template: "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n    {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n    {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-2].content }}\n    {{- \"\\n<end_of_turn>\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Chatbot Response: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the Chatbot Response violate the above principle? Your answer must  start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n",
-                input: ChatTemplateInputs {
-                    messages: example_chat_with_system.clone(),
-                    add_generation_prompt: false,
-                    bos_token: Some("<s>"),
-                    eos_token: Some("</s>"),
-                    guideline: Some("Do not use offensive language."),
-                    ..Default::default()
-                },
-                target: "<s>You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\n\n<start_of_turn>\nHuman Question: I'd like to show off how chat templating works!\n<end_of_turn>\n\nOur safety principle is defined in the below:\n\n* Do not use offensive language.\n\n===\n\nDoes the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\n\n",
-            },
         ];
 
         #[allow(unused_variables)] // name is unused
@@ -794,47 +771,6 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_chat_template_invalid_with_guideline() {
-        let ct = ChatTemplate::new(
-            "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n    {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n    {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-2].content }}\n    {{- \"\\n<end_of_turn>\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Chatbot Response: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the Chatbot Response violate the above principle? Your answer must  start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n".to_string(),
-            Some(TokenizerConfigToken::String("<s>".to_string())),
-            Some(TokenizerConfigToken::String("</s>".to_string())),
-        );
-
-        // convert TextMessage to Message
-        let msgs: Vec<Message> = vec![
-            Message {
-                name: None,
-                role: "user".to_string(),
-                content: MessageContent::SingleText(
-                    "I'd like to show off how chat templating works!".to_string(),
-                ),
-            },
-            Message {
-                name: None,
-                role: "assistant".to_string(),
-                content: MessageContent::SingleText(
-                    "I'm doing great. How can I help you today?".to_string(),
-                ),
-            },
-            Message {
-                name: None,
-                role: "user".to_string(),
-                content: MessageContent::SingleText("Hello, how are you?".to_string()),
-            },
-        ];
-
-        let result = ct.apply(None, msgs, None);
-
-        match result {
-            Ok(_) => panic!("Should have failed since no guideline is provided"),
-            Err(e) => {
-                assert_eq!(e.to_string(), "Missing template vatiable: guideline")
-            }
-        }
-    }
-
     #[test]
     fn test_chat_template_with_default_tool_template() {
         let ct = ChatTemplate::new(
diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs
index d3d6bc59..1351b87e 100644
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@@ -159,14 +159,13 @@ impl Infer {
     #[instrument(skip_all)]
     pub(crate) fn apply_chat_template(
         &self,
-        guideline: Option<String>,
         messages: Vec<Message>,
         tools_and_prompt: Option<(Vec<Tool>, String)>,
     ) -> Result<String, InferError> {
         self.chat_template
             .as_ref()
             .ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
-            .apply(guideline.as_deref(), messages, tools_and_prompt)
+            .apply(messages, tools_and_prompt)
             .map_err(|e| {
                 metrics::counter!("tgi_request_failure", "err" => "template").increment(1);
                 tracing::error!("{e}");
diff --git a/router/src/lib.rs b/router/src/lib.rs
index 7f093b41..ea697c3a 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -909,11 +909,6 @@ pub(crate) struct ChatRequest {
     #[schema(nullable = true, default = "null", example = "null")]
     pub response_format: Option<GrammarType>,
 
-    /// A guideline to be used in the chat_template
-    #[serde(default)]
-    #[schema(nullable = true, default = "null", example = "null")]
-    pub guideline: Option<String>,
-
     /// Options for streaming response. Only set this when you set stream: true.
     #[serde(default)]
     #[schema(nullable = true, example = "null")]
@@ -934,7 +929,6 @@ impl ChatRequest {
             tool_prompt,
             temperature,
             response_format,
-            guideline,
             presence_penalty,
             frequency_penalty,
             top_p,
@@ -962,7 +956,7 @@ impl ChatRequest {
 
         let (inputs, grammar, using_tools) = match response_format {
             Some(format) => {
-                let inputs = infer.apply_chat_template(guideline, messages, None)?;
+                let inputs = infer.apply_chat_template(messages, None)?;
                 (inputs, Some(format), false)
             }
             None => {
@@ -971,7 +965,6 @@ impl ChatRequest {
                         Some((updated_tools, tool_schema)) => {
                             let grammar = GrammarType::Json(serde_json::json!(tool_schema));
                             let inputs: String = infer.apply_chat_template(
-                                guideline,
                                 messages,
                                 Some((updated_tools, tool_prompt)),
                             )?;
@@ -979,13 +972,13 @@ impl ChatRequest {
                         }
                         None => {
                             // same as if no response_format or tools are set
-                            let inputs = infer.apply_chat_template(guideline, messages, None)?;
+                            let inputs = infer.apply_chat_template(messages, None)?;
                             (inputs, None, false)
                         }
                     }
                 } else {
                     // if no response_format or tools are set simply apply the chat template to generate inputs
-                    let inputs = infer.apply_chat_template(guideline, messages, None)?;
+                    let inputs = infer.apply_chat_template(messages, None)?;
                     (inputs, None, false)
                 }
             }
@@ -1163,7 +1156,6 @@ pub(crate) struct ChatTemplateInputs<'a> {
     eos_token: Option<&'a str>,
     add_generation_prompt: bool,
     tools: Option<Vec<Tool>>,
-    guideline: Option<&'a str>,
 }
 
 #[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug, PartialEq)]