From cd754bd6012e49b7e74c45fd48519ac87024044b Mon Sep 17 00:00:00 2001
From: khanon <khoners@protonmail.com>
Date: Mon, 5 Jun 2023 03:12:34 +0000
Subject: [PATCH] Allow overriding default Claude model selection
 (khanon/oai-reverse-proxy!19)

---
 .env.example                                  |  7 +++++
 .../request/transform-outbound-payload.ts     | 29 ++++++++++++-------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/.env.example b/.env.example
index 982b055..5f78bef 100644
--- a/.env.example
+++ b/.env.example
@@ -35,6 +35,13 @@
 OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 ANTHROPIC_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
+# TEMPORARY: This will eventually be replaced by a more robust system.
+# You can adjust the models used when sending OpenAI prompts to /anthropic.
+# Refer to Anthropic's docs for more info (note that they don't list older
+# versions of the models, but they still work).
+# CLAUDE_SMALL_MODEL=claude-v1.2
+# CLAUDE_BIG_MODEL=claude-v1-100k
+
 # You can require a Bearer token for requests when using proxy_token gatekeeper.
 # PROXY_KEY=your-secret-key
 
diff --git a/src/proxy/middleware/request/transform-outbound-payload.ts b/src/proxy/middleware/request/transform-outbound-payload.ts
index f022a2f..336c429 100644
--- a/src/proxy/middleware/request/transform-outbound-payload.ts
+++ b/src/proxy/middleware/request/transform-outbound-payload.ts
@@ -119,17 +119,26 @@ function openaiToAnthropic(body: any, req: Request) {
       })
       .join("") + "\n\nAssistant: ";
 
-  // When translating from OpenAI to Anthropic, we obviously can't use the
-  // provided OpenAI model name as-is. We will instead select a Claude model,
-  // choosing either the 100k token model or the 9k token model depending on
-  // the length of the prompt. I'm not bringing in the full OpenAI tokenizer for
-  // this so we'll use Anthropic's guideline of ~28000 characters to about 8k
-  // tokens (https://console.anthropic.com/docs/prompt-design#prompt-length)
-  // as the cutoff, minus a little bit for safety.
+  // Claude 1.2 has been selected as the default for smaller prompts because it
+  // is said to be less pozzed than the newer 1.3 model. But this is not based
+  // on any empirical testing, just speculation based on Anthropic stating that
+  // 1.3 is "safer and less susceptible to adversarial attacks" than 1.2.
+  // From my own interactions, both are pretty easy to jailbreak so I don't
+  // think there's much of a difference, honestly.
 
-  // For smaller prompts we use 1.2 because it's not as annoying as 1.3
-  // For big prompts (v1, auto-selects the latest model) is all we can use.
-  const model = prompt.length > 25000 ? "claude-v1-100k" : "claude-v1.2";
+  // If you want to override the model selection, you can set the
+  // CLAUDE_BIG_MODEL and CLAUDE_SMALL_MODEL environment variables in your
+  // .env file.
+
+  // Using "v1" of a model will automatically select the latest version of that
+  // model on the Anthropic side.
+
+  const CLAUDE_BIG = process.env.CLAUDE_BIG_MODEL || "claude-v1-100k";
+  const CLAUDE_SMALL = process.env.CLAUDE_SMALL_MODEL || "claude-v1.2";
+
+  // TODO: Finish implementing tokenizer for more accurate model selection.
+  // This currently uses _character count_, not token count.
+  const model = prompt.length > 25000 ? CLAUDE_BIG : CLAUDE_SMALL;
 
   // wip
   // const tokens = countTokens({