From c5cd90dcef3b8a8772b2f6b1d8e769b16aae0687 Mon Sep 17 00:00:00 2001
From: nai-degen <khoners@protonmail.com>
Date: Wed, 13 Dec 2023 23:03:57 -0600
Subject: [PATCH] adjusts prompt transform to discourage Gemini from speaking
 for user

---
 src/proxy/google-ai.ts                        | 11 +++++-----
 .../transform-outbound-payload.ts             | 21 ++++++++++++++++---
 .../transformers/google-ai-to-openai.ts       | 13 +++++++++---
 3 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/src/proxy/google-ai.ts b/src/proxy/google-ai.ts
index 87d869d..d77d9ad 100644
--- a/src/proxy/google-ai.ts
+++ b/src/proxy/google-ai.ts
@@ -79,10 +79,12 @@ const googleAIResponseHandler: ProxyResHandlerWithBody = async (
 };
 
 function transformGoogleAIResponse(
-  googleAIResp: Record<string, any>,
+  resBody: Record<string, any>,
   req: Request
 ): Record<string, any> {
   const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
+  const parts = resBody.candidates[0].content?.parts ?? [{ text: "" }];
+  const content = parts[0].text.replace(/^(.{0,50}?): /, () => "");
   return {
     id: "goo-" + v4(),
     object: "chat.completion",
@@ -95,11 +97,8 @@ function transformGoogleAIResponse(
     },
     choices: [
       {
-        message: {
-          role: "assistant",
-          content: googleAIResp.candidates[0].content.parts[0].text,
-        },
-        finish_reason: googleAIResp.candidates[0].finishReason,
+        message: { role: "assistant", content },
+        finish_reason: resBody.candidates[0].finishReason,
         index: 0,
       },
     ],
diff --git a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
index aac0a07..efd60dc 100644
--- a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
+++ b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
@@ -346,14 +346,29 @@ function openaiToGoogleAI(
   const foundNames = new Set<string>();
   const contents = messages
     .map((m) => {
+      const role = m.role === "assistant" ? "model" : "user";
       // Detects character names so we can set stop sequences for them as Gemini
       // is prone to continuing as the next character.
+      // If names are not available, we'll still try to prefix the message
+      // with generic names so we can set stops for them but they don't work
+      // as well as real names.
       const text = flattenOpenAIMessageContent(m.content);
-      const name = m.name?.trim() || text.match(/^(.*?): /)?.[1]?.trim();
-      if (name) foundNames.add(name);
+      const propName = m.name?.trim();
+      const textName = text.match(/^(.*?): /)?.[1]?.trim();
+      const name =
+        propName || textName || (role === "model" ? "Character" : "User");
 
+      foundNames.add(name);
+
+      // Prefixing messages with their character name seems to help avoid
+      // Gemini trying to continue as the next character, or at the very least
+      // ensures it will hit the stop sequence.  Otherwise it will start a new
+      // paragraph and switch perspectives.
+      // The response will be very likely to include this prefix so frontends
+      // will need to strip it out.
+      const textPrefix = propName ? `${propName}: ` : "";
       return {
-        parts: [{ text }],
+        parts: [{ text: textPrefix + text }],
         role: m.role === "assistant" ? ("model" as const) : ("user" as const),
       };
     })
diff --git a/src/proxy/middleware/response/streaming/transformers/google-ai-to-openai.ts b/src/proxy/middleware/response/streaming/transformers/google-ai-to-openai.ts
index d1e4cec..8d6b1e5 100644
--- a/src/proxy/middleware/response/streaming/transformers/google-ai-to-openai.ts
+++ b/src/proxy/middleware/response/streaming/transformers/google-ai-to-openai.ts
@@ -22,7 +22,7 @@ type GoogleAIStreamEvent = {
  * chat.completion.chunk SSE.
  */
 export const googleAIToOpenAI: StreamingCompletionTransformer = (params) => {
-  const { data } = params;
+  const { data, index } = params;
 
   const rawEvent = parseEvent(data);
   if (!rawEvent.data || rawEvent.data === "[DONE]") {
@@ -35,7 +35,14 @@ export const googleAIToOpenAI: StreamingCompletionTransformer = (params) => {
   }
 
   const parts = completionEvent.candidates[0].content.parts;
-  const text = parts[0]?.text ?? "";
+  let content = parts[0]?.text ?? "";
+
+  // If this is the first chunk, try stripping speaker names from the response
+  // e.g. "John: Hello" -> "Hello"
+  if (index === 0) {
+    content = content.replace(/^(.*?): /, "").trim();
+  }
+
   const newEvent = {
     id: "goo-" + params.fallbackId,
     object: "chat.completion.chunk" as const,
@@ -44,7 +51,7 @@ export const googleAIToOpenAI: StreamingCompletionTransformer = (params) => {
     choices: [
       {
         index: 0,
-        delta: { content: text },
+        delta: { content },
         finish_reason: completionEvent.candidates[0].finishReason ?? null,
       },
     ],