diff --git a/docs/openapi.json b/docs/openapi.json index f42f9390..08dc865f 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -994,13 +994,6 @@ "example": "1.0", "nullable": true }, - "guideline": { - "type": "string", - "description": "A guideline to be used in the chat_template", - "default": "null", - "example": "null", - "nullable": true - }, "logit_bias": { "type": "array", "items": { diff --git a/router/src/infer/chat_template.rs b/router/src/infer/chat_template.rs index 1071d0ba..1bad4a36 100644 --- a/router/src/infer/chat_template.rs +++ b/router/src/infer/chat_template.rs @@ -2,7 +2,6 @@ use crate::infer::InferError; use crate::{ChatTemplateInputs, Message, MessageChunk, TextMessage, TokenizerConfigToken, Tool}; use minijinja::{Environment, ErrorKind, Template}; use minijinja_contrib::pycompat; -use std::collections::HashSet; /// Raise a exception (custom function) used in the chat templates pub(crate) fn raise_exception(err_text: String) -> Result { @@ -15,7 +14,6 @@ pub(crate) struct ChatTemplate { bos_token: Option, eos_token: Option, use_default_tool_template: bool, - variables: HashSet, } impl ChatTemplate { @@ -47,21 +45,14 @@ impl ChatTemplate { bos_token: bos_token.map(|token| token.as_str().to_string()), eos_token: eos_token.map(|token| token.as_str().to_string()), use_default_tool_template, - variables, } } pub(crate) fn apply( &self, - guideline: Option<&str>, mut messages: Vec, tools_and_prompt: Option<(Vec, String)>, ) -> Result { - // check if guideline is expected but not provided - if self.variables.contains("guideline") && guideline.is_none() { - return Err(InferError::MissingTemplateVariable("guideline".to_string())); - } - let tools = match tools_and_prompt { Some((tools, tool_prompt)) => { // check if the `tools` variable is used in the template @@ -87,7 +78,6 @@ impl ChatTemplate { self.template .render(ChatTemplateInputs { - guideline, messages, bos_token: self.bos_token.as_deref(), eos_token: self.eos_token.as_deref(), @@ -756,19 +746,6 @@ mod tests { }, target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!", }, - ChatTemplateTestItem { - name: "google/shieldgemma-9b", - chat_template: "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Human Question: \" + messages[-1].content }}\n {{- \"\\n\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Human Question: \" + messages[-2].content }}\n {{- \"\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Chatbot Response: \" + messages[-1].content }}\n {{- \"\\n\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the Chatbot Response violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n", - input: ChatTemplateInputs { - messages: example_chat_with_system.clone(), - add_generation_prompt: false, - bos_token: Some(""), - eos_token: Some(""), - guideline: Some("Do not use offensive language."), - ..Default::default() - }, - target: "You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\n\n\nHuman Question: I'd like to show off how chat templating works!\n\n\nOur safety principle is defined in the below:\n\n* Do not use offensive language.\n\n===\n\nDoes the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\n\n", - }, ]; #[allow(unused_variables)] // name is unused @@ -794,47 +771,6 @@ mod tests { } } - #[test] - fn test_chat_template_invalid_with_guideline() { - let ct = ChatTemplate::new( - "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Human Question: \" + messages[-1].content }}\n {{- \"\\n\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Human Question: \" + messages[-2].content }}\n {{- \"\\n\\n\" }}\n {{- \"\\n\" }}\n {{- \"Chatbot Response: \" + messages[-1].content }}\n {{- \"\\n\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the Chatbot Response violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n".to_string(), - Some(TokenizerConfigToken::String("".to_string())), - Some(TokenizerConfigToken::String("".to_string())), - ); - - // convert TextMessage to Message - let msgs: Vec = vec![ - Message { - name: None, - role: "user".to_string(), - content: MessageContent::SingleText( - "I'd like to show off how chat templating works!".to_string(), - ), - }, - Message { - name: None, - role: "assistant".to_string(), - content: MessageContent::SingleText( - "I'm doing great. How can I help you today?".to_string(), - ), - }, - Message { - name: None, - role: "user".to_string(), - content: MessageContent::SingleText("Hello, how are you?".to_string()), - }, - ]; - - let result = ct.apply(None, msgs, None); - - match result { - Ok(_) => panic!("Should have failed since no guideline is provided"), - Err(e) => { - assert_eq!(e.to_string(), "Missing template vatiable: guideline") - } - } - } - #[test] fn test_chat_template_with_default_tool_template() { let ct = ChatTemplate::new( diff --git a/router/src/infer/mod.rs b/router/src/infer/mod.rs index d3d6bc59..1351b87e 100644 --- a/router/src/infer/mod.rs +++ b/router/src/infer/mod.rs @@ -159,14 +159,13 @@ impl Infer { #[instrument(skip_all)] pub(crate) fn apply_chat_template( &self, - guideline: Option, messages: Vec, tools_and_prompt: Option<(Vec, String)>, ) -> Result { self.chat_template .as_ref() .ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))? - .apply(guideline.as_deref(), messages, tools_and_prompt) + .apply(messages, tools_and_prompt) .map_err(|e| { metrics::counter!("tgi_request_failure", "err" => "template").increment(1); tracing::error!("{e}"); diff --git a/router/src/lib.rs b/router/src/lib.rs index 7f093b41..ea697c3a 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -909,11 +909,6 @@ pub(crate) struct ChatRequest { #[schema(nullable = true, default = "null", example = "null")] pub response_format: Option, - /// A guideline to be used in the chat_template - #[serde(default)] - #[schema(nullable = true, default = "null", example = "null")] - pub guideline: Option, - /// Options for streaming response. Only set this when you set stream: true. #[serde(default)] #[schema(nullable = true, example = "null")] @@ -934,7 +929,6 @@ impl ChatRequest { tool_prompt, temperature, response_format, - guideline, presence_penalty, frequency_penalty, top_p, @@ -962,7 +956,7 @@ impl ChatRequest { let (inputs, grammar, using_tools) = match response_format { Some(format) => { - let inputs = infer.apply_chat_template(guideline, messages, None)?; + let inputs = infer.apply_chat_template(messages, None)?; (inputs, Some(format), false) } None => { @@ -971,7 +965,6 @@ impl ChatRequest { Some((updated_tools, tool_schema)) => { let grammar = GrammarType::Json(serde_json::json!(tool_schema)); let inputs: String = infer.apply_chat_template( - guideline, messages, Some((updated_tools, tool_prompt)), )?; @@ -979,13 +972,13 @@ impl ChatRequest { } None => { // same as if no response_format or tools are set - let inputs = infer.apply_chat_template(guideline, messages, None)?; + let inputs = infer.apply_chat_template(messages, None)?; (inputs, None, false) } } } else { // if no response_format or tools are set simply apply the chat template to generate inputs - let inputs = infer.apply_chat_template(guideline, messages, None)?; + let inputs = infer.apply_chat_template(messages, None)?; (inputs, None, false) } } @@ -1163,7 +1156,6 @@ pub(crate) struct ChatTemplateInputs<'a> { eos_token: Option<&'a str>, add_generation_prompt: bool, tools: Option>, - guideline: Option<&'a str>, } #[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug, PartialEq)]