Remove guideline from API
This commit is contained in:
parent
bd6e8b3c13
commit
87004ae711
|
@ -994,13 +994,6 @@
|
||||||
"example": "1.0",
|
"example": "1.0",
|
||||||
"nullable": true
|
"nullable": true
|
||||||
},
|
},
|
||||||
"guideline": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "A guideline to be used in the chat_template",
|
|
||||||
"default": "null",
|
|
||||||
"example": "null",
|
|
||||||
"nullable": true
|
|
||||||
},
|
|
||||||
"logit_bias": {
|
"logit_bias": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
|
|
@ -2,7 +2,6 @@ use crate::infer::InferError;
|
||||||
use crate::{ChatTemplateInputs, Message, MessageChunk, TextMessage, TokenizerConfigToken, Tool};
|
use crate::{ChatTemplateInputs, Message, MessageChunk, TextMessage, TokenizerConfigToken, Tool};
|
||||||
use minijinja::{Environment, ErrorKind, Template};
|
use minijinja::{Environment, ErrorKind, Template};
|
||||||
use minijinja_contrib::pycompat;
|
use minijinja_contrib::pycompat;
|
||||||
use std::collections::HashSet;
|
|
||||||
|
|
||||||
/// Raise a exception (custom function) used in the chat templates
|
/// Raise a exception (custom function) used in the chat templates
|
||||||
pub(crate) fn raise_exception(err_text: String) -> Result<String, minijinja::Error> {
|
pub(crate) fn raise_exception(err_text: String) -> Result<String, minijinja::Error> {
|
||||||
|
@ -15,7 +14,6 @@ pub(crate) struct ChatTemplate {
|
||||||
bos_token: Option<String>,
|
bos_token: Option<String>,
|
||||||
eos_token: Option<String>,
|
eos_token: Option<String>,
|
||||||
use_default_tool_template: bool,
|
use_default_tool_template: bool,
|
||||||
variables: HashSet<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ChatTemplate {
|
impl ChatTemplate {
|
||||||
|
@ -47,21 +45,14 @@ impl ChatTemplate {
|
||||||
bos_token: bos_token.map(|token| token.as_str().to_string()),
|
bos_token: bos_token.map(|token| token.as_str().to_string()),
|
||||||
eos_token: eos_token.map(|token| token.as_str().to_string()),
|
eos_token: eos_token.map(|token| token.as_str().to_string()),
|
||||||
use_default_tool_template,
|
use_default_tool_template,
|
||||||
variables,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn apply(
|
pub(crate) fn apply(
|
||||||
&self,
|
&self,
|
||||||
guideline: Option<&str>,
|
|
||||||
mut messages: Vec<Message>,
|
mut messages: Vec<Message>,
|
||||||
tools_and_prompt: Option<(Vec<Tool>, String)>,
|
tools_and_prompt: Option<(Vec<Tool>, String)>,
|
||||||
) -> Result<String, InferError> {
|
) -> Result<String, InferError> {
|
||||||
// check if guideline is expected but not provided
|
|
||||||
if self.variables.contains("guideline") && guideline.is_none() {
|
|
||||||
return Err(InferError::MissingTemplateVariable("guideline".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let tools = match tools_and_prompt {
|
let tools = match tools_and_prompt {
|
||||||
Some((tools, tool_prompt)) => {
|
Some((tools, tool_prompt)) => {
|
||||||
// check if the `tools` variable is used in the template
|
// check if the `tools` variable is used in the template
|
||||||
|
@ -87,7 +78,6 @@ impl ChatTemplate {
|
||||||
|
|
||||||
self.template
|
self.template
|
||||||
.render(ChatTemplateInputs {
|
.render(ChatTemplateInputs {
|
||||||
guideline,
|
|
||||||
messages,
|
messages,
|
||||||
bos_token: self.bos_token.as_deref(),
|
bos_token: self.bos_token.as_deref(),
|
||||||
eos_token: self.eos_token.as_deref(),
|
eos_token: self.eos_token.as_deref(),
|
||||||
|
@ -756,19 +746,6 @@ mod tests {
|
||||||
},
|
},
|
||||||
target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
|
target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
|
||||||
},
|
},
|
||||||
ChatTemplateTestItem {
|
|
||||||
name: "google/shieldgemma-9b",
|
|
||||||
chat_template: "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Human Question: \" + messages[-1].content }}\n {{- \"\\n<end_of_turn>\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Human Question: \" + messages[-2].content }}\n {{- \"\\n<end_of_turn>\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Chatbot Response: \" + messages[-1].content }}\n {{- \"\\n<end_of_turn>\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the Chatbot Response violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n",
|
|
||||||
input: ChatTemplateInputs {
|
|
||||||
messages: example_chat_with_system.clone(),
|
|
||||||
add_generation_prompt: false,
|
|
||||||
bos_token: Some("<s>"),
|
|
||||||
eos_token: Some("</s>"),
|
|
||||||
guideline: Some("Do not use offensive language."),
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
target: "<s>You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\n\n<start_of_turn>\nHuman Question: I'd like to show off how chat templating works!\n<end_of_turn>\n\nOur safety principle is defined in the below:\n\n* Do not use offensive language.\n\n===\n\nDoes the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\n\n",
|
|
||||||
},
|
|
||||||
];
|
];
|
||||||
|
|
||||||
#[allow(unused_variables)] // name is unused
|
#[allow(unused_variables)] // name is unused
|
||||||
|
@ -794,47 +771,6 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_chat_template_invalid_with_guideline() {
|
|
||||||
let ct = ChatTemplate::new(
|
|
||||||
"{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Human Question: \" + messages[-1].content }}\n {{- \"\\n<end_of_turn>\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Human Question: \" + messages[-2].content }}\n {{- \"\\n<end_of_turn>\\n\" }}\n {{- \"<start_of_turn>\\n\" }}\n {{- \"Chatbot Response: \" + messages[-1].content }}\n {{- \"\\n<end_of_turn>\\n\\n\" }}\n {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n {{- \"* \" + guideline + \"\\n\" }}\n {{- \"\\n===\\n\\n\" }}\n {{- \"Does the Chatbot Response violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n".to_string(),
|
|
||||||
Some(TokenizerConfigToken::String("<s>".to_string())),
|
|
||||||
Some(TokenizerConfigToken::String("</s>".to_string())),
|
|
||||||
);
|
|
||||||
|
|
||||||
// convert TextMessage to Message
|
|
||||||
let msgs: Vec<Message> = vec![
|
|
||||||
Message {
|
|
||||||
name: None,
|
|
||||||
role: "user".to_string(),
|
|
||||||
content: MessageContent::SingleText(
|
|
||||||
"I'd like to show off how chat templating works!".to_string(),
|
|
||||||
),
|
|
||||||
},
|
|
||||||
Message {
|
|
||||||
name: None,
|
|
||||||
role: "assistant".to_string(),
|
|
||||||
content: MessageContent::SingleText(
|
|
||||||
"I'm doing great. How can I help you today?".to_string(),
|
|
||||||
),
|
|
||||||
},
|
|
||||||
Message {
|
|
||||||
name: None,
|
|
||||||
role: "user".to_string(),
|
|
||||||
content: MessageContent::SingleText("Hello, how are you?".to_string()),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
let result = ct.apply(None, msgs, None);
|
|
||||||
|
|
||||||
match result {
|
|
||||||
Ok(_) => panic!("Should have failed since no guideline is provided"),
|
|
||||||
Err(e) => {
|
|
||||||
assert_eq!(e.to_string(), "Missing template vatiable: guideline")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_chat_template_with_default_tool_template() {
|
fn test_chat_template_with_default_tool_template() {
|
||||||
let ct = ChatTemplate::new(
|
let ct = ChatTemplate::new(
|
||||||
|
|
|
@ -159,14 +159,13 @@ impl Infer {
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub(crate) fn apply_chat_template(
|
pub(crate) fn apply_chat_template(
|
||||||
&self,
|
&self,
|
||||||
guideline: Option<String>,
|
|
||||||
messages: Vec<Message>,
|
messages: Vec<Message>,
|
||||||
tools_and_prompt: Option<(Vec<Tool>, String)>,
|
tools_and_prompt: Option<(Vec<Tool>, String)>,
|
||||||
) -> Result<String, InferError> {
|
) -> Result<String, InferError> {
|
||||||
self.chat_template
|
self.chat_template
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
|
.ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
|
||||||
.apply(guideline.as_deref(), messages, tools_and_prompt)
|
.apply(messages, tools_and_prompt)
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
metrics::counter!("tgi_request_failure", "err" => "template").increment(1);
|
metrics::counter!("tgi_request_failure", "err" => "template").increment(1);
|
||||||
tracing::error!("{e}");
|
tracing::error!("{e}");
|
||||||
|
|
|
@ -909,11 +909,6 @@ pub(crate) struct ChatRequest {
|
||||||
#[schema(nullable = true, default = "null", example = "null")]
|
#[schema(nullable = true, default = "null", example = "null")]
|
||||||
pub response_format: Option<GrammarType>,
|
pub response_format: Option<GrammarType>,
|
||||||
|
|
||||||
/// A guideline to be used in the chat_template
|
|
||||||
#[serde(default)]
|
|
||||||
#[schema(nullable = true, default = "null", example = "null")]
|
|
||||||
pub guideline: Option<String>,
|
|
||||||
|
|
||||||
/// Options for streaming response. Only set this when you set stream: true.
|
/// Options for streaming response. Only set this when you set stream: true.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
#[schema(nullable = true, example = "null")]
|
#[schema(nullable = true, example = "null")]
|
||||||
|
@ -934,7 +929,6 @@ impl ChatRequest {
|
||||||
tool_prompt,
|
tool_prompt,
|
||||||
temperature,
|
temperature,
|
||||||
response_format,
|
response_format,
|
||||||
guideline,
|
|
||||||
presence_penalty,
|
presence_penalty,
|
||||||
frequency_penalty,
|
frequency_penalty,
|
||||||
top_p,
|
top_p,
|
||||||
|
@ -962,7 +956,7 @@ impl ChatRequest {
|
||||||
|
|
||||||
let (inputs, grammar, using_tools) = match response_format {
|
let (inputs, grammar, using_tools) = match response_format {
|
||||||
Some(format) => {
|
Some(format) => {
|
||||||
let inputs = infer.apply_chat_template(guideline, messages, None)?;
|
let inputs = infer.apply_chat_template(messages, None)?;
|
||||||
(inputs, Some(format), false)
|
(inputs, Some(format), false)
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
@ -971,7 +965,6 @@ impl ChatRequest {
|
||||||
Some((updated_tools, tool_schema)) => {
|
Some((updated_tools, tool_schema)) => {
|
||||||
let grammar = GrammarType::Json(serde_json::json!(tool_schema));
|
let grammar = GrammarType::Json(serde_json::json!(tool_schema));
|
||||||
let inputs: String = infer.apply_chat_template(
|
let inputs: String = infer.apply_chat_template(
|
||||||
guideline,
|
|
||||||
messages,
|
messages,
|
||||||
Some((updated_tools, tool_prompt)),
|
Some((updated_tools, tool_prompt)),
|
||||||
)?;
|
)?;
|
||||||
|
@ -979,13 +972,13 @@ impl ChatRequest {
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
// same as if no response_format or tools are set
|
// same as if no response_format or tools are set
|
||||||
let inputs = infer.apply_chat_template(guideline, messages, None)?;
|
let inputs = infer.apply_chat_template(messages, None)?;
|
||||||
(inputs, None, false)
|
(inputs, None, false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if no response_format or tools are set simply apply the chat template to generate inputs
|
// if no response_format or tools are set simply apply the chat template to generate inputs
|
||||||
let inputs = infer.apply_chat_template(guideline, messages, None)?;
|
let inputs = infer.apply_chat_template(messages, None)?;
|
||||||
(inputs, None, false)
|
(inputs, None, false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1163,7 +1156,6 @@ pub(crate) struct ChatTemplateInputs<'a> {
|
||||||
eos_token: Option<&'a str>,
|
eos_token: Option<&'a str>,
|
||||||
add_generation_prompt: bool,
|
add_generation_prompt: bool,
|
||||||
tools: Option<Vec<Tool>>,
|
tools: Option<Vec<Tool>>,
|
||||||
guideline: Option<&'a str>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug, PartialEq)]
|
#[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug, PartialEq)]
|
||||||
|
|
Loading…
Reference in New Issue