feat: add guideline to chat request and template (#2391)

* feat: add guideline to chat request and template * fix: add template test and update docs
2024-08-09 10:56:45 -04:00 · 2024-08-09 10:56:45 -04:00 · 0d06aed02d
parent 7a48a84784
commit 0d06aed02d
5 changed files with 37 additions and 3 deletions
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -819,6 +819,13 @@
            "example": "1.0",
            "nullable": true
          },
+          "guideline": {
+            "type": "string",
+            "description": "A guideline to be used in the chat_template",
+            "default": "null",
+            "example": "null",
+            "nullable": true
+          },
          "logit_bias": {
            "type": "array",
            "items": {
--- a/router/src/infer/chat_template.rs
+++ b/router/src/infer/chat_template.rs
@ -48,6 +48,7 @@ impl ChatTemplate {

    pub(crate) fn apply(
        &self,
+        guideline: Option<&str>,
        mut messages: Vec<Message>,
        grammar_with_prompt: Option<(GrammarType, String)>,
    ) -> Result<String, InferError> {
@ -65,6 +66,7 @@ impl ChatTemplate {

        self.template
            .render(ChatTemplateInputs {
+                guideline,
                messages,
                bos_token: self.bos_token.as_deref(),
                eos_token: self.eos_token.as_deref(),
@ -731,6 +733,19 @@ mod tests {
                },
                target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
            },
+            ChatTemplateTestItem {
+                name: "google/shieldgemma-9b",
+                chat_template: "{{- bos_token }}\n{%- if messages[-1].role == 'user' %}\n    {{- \"You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- else %}\n    {{- \"You are a policy expert trying to help determine whether an AI response to a prompt is in violation of the defined safety policies.\\n\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Human Question: \" + messages[-2].content }}\n    {{- \"\\n<end_of_turn>\\n\" }}\n    {{- \"<start_of_turn>\\n\" }}\n    {{- \"Chatbot Response: \" + messages[-1].content }}\n    {{- \"\\n<end_of_turn>\\n\\n\" }}\n    {{- \"Our safety principle is defined in the below:\\n\\n\" }}\n    {{- \"* \" + guideline + \"\\n\" }}\n    {{- \"\\n===\\n\\n\" }}\n    {{- \"Does the Chatbot Response violate the above principle? Your answer must  start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\\n\\n\" }}\n{%- endif %}\n\n",
+                input: ChatTemplateInputs {
+                    messages: example_chat_with_system.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                    guideline: Some("Do not use offensive language."),
+                    ..Default::default()
+                },
+                target: "<s>You are a policy expert trying to help determine whether a user prompt is in violation of the defined safety policies.\n\n<start_of_turn>\nHuman Question: I'd like to show off how chat templating works!\n<end_of_turn>\n\nOur safety principle is defined in the below:\n\n* Do not use offensive language.\n\n===\n\nDoes the human question violate the above principle? Your answer must start with 'Yes' or 'No'. And then walk through step by step to be sure we answer correctly.\n\n",
+            },
        ];

        #[allow(unused_variables)] // name is unused
--- a/router/src/infer/mod.rs
+++ b/router/src/infer/mod.rs
@ -138,13 +138,14 @@ impl Infer {
    #[instrument(skip_all)]
    pub(crate) fn apply_chat_template(
        &self,
+        guideline: Option<String>,
        messages: Vec<Message>,
        grammar_with_prompt: Option<(GrammarType, String)>,
    ) -> Result<String, InferError> {
        self.chat_template
            .as_ref()
            .ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
-            .apply(messages, grammar_with_prompt)
+            .apply(guideline.as_deref(), messages, grammar_with_prompt)
            .map_err(|e| {
                metrics::counter!("tgi_request_failure", "err" => "template").increment(1);
                tracing::error!("{e}");
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@ -858,6 +858,11 @@ pub(crate) struct ChatRequest {
    #[serde(default)]
    #[schema(nullable = true, default = "null", example = "null")]
    pub response_format: Option<GrammarType>,
+
+    /// A guideline to be used in the chat_template
+    #[serde(default)]
+    #[schema(nullable = true, default = "null", example = "null")]
+    pub guideline: Option<String>,
 }

 fn default_tool_prompt() -> Option<String> {
@ -965,6 +970,7 @@ pub(crate) struct ChatTemplateInputs<'a> {
    add_generation_prompt: bool,
    tools: Option<&'a str>,
    tools_prompt: Option<&'a str>,
+    guideline: Option<&'a str>,
 }

 #[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug, PartialEq)]
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -141,6 +141,7 @@ async fn get_chat_tokenize(
        tool_prompt,
        temperature,
        response_format,
+        guideline,
        ..
    } = req;

@ -151,6 +152,7 @@ async fn get_chat_tokenize(
        tools,
        tool_choice,
        &tool_prompt,
+        guideline,
        messages,
    )?;

@ -1123,6 +1125,7 @@ async fn chat_completions(
        tool_prompt,
        temperature,
        response_format,
+        guideline,
        ..
    } = req;

@ -1142,6 +1145,7 @@ async fn chat_completions(
        tools,
        tool_choice,
        &tool_prompt,
+        guideline,
        messages,
    )?;

@ -2402,6 +2406,7 @@ fn prepare_chat_input(
    tools: Option<Vec<Tool>>,
    tool_choice: ToolChoice,
    tool_prompt: &str,
+    guideline: Option<String>,
    messages: Vec<Message>,
 ) -> Result<PreparedInput, InferError> {
    if response_format.is_some() && tools.is_some() {
@ -2411,7 +2416,7 @@ fn prepare_chat_input(
    }

    if let Some(format) = response_format {
-        let inputs = infer.apply_chat_template(messages, None)?;
+        let inputs = infer.apply_chat_template(guideline, messages, None)?;
        return Ok((inputs, Some(format), None));
    }

@ -2423,6 +2428,6 @@ fn prepare_chat_input(
    let tools_grammar_prompt = tool_grammar
        .as_ref()
        .map(|t| (GrammarType::Json(serde_json::json!(t)), tool_prompt.into()));
-    let inputs = infer.apply_chat_template(messages, tools_grammar_prompt)?;
+    let inputs = infer.apply_chat_template(guideline, messages, tools_grammar_prompt)?;
    Ok((inputs, grammar, tool_grammar))
 }