From d54acad6ad71919b23f984ae4c6f76026f48956f Mon Sep 17 00:00:00 2001
From: nai-degen <khoners@protonmail.com>
Date: Thu, 15 Aug 2024 11:55:13 -0500
Subject: [PATCH] adds support for sonnet 8192 output tokens on anthropic api

---
 src/proxy/anthropic.ts                        | 99 ++++++-------------
 .../request/onproxyreq/check-model-family.ts  | 10 +-
 .../request/preprocessors/sign-aws-request.ts |  1 +
 3 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts
index 643efef..bc7e627 100644
--- a/src/proxy/anthropic.ts
+++ b/src/proxy/anthropic.ts
@@ -46,7 +46,7 @@ const getModelsResponse = () => {
     "claude-3-haiku-20240307",
     "claude-3-opus-20240229",
     "claude-3-sonnet-20240229",
-    "claude-3-5-sonnet-20240620"
+    "claude-3-5-sonnet-20240620",
   ];
 
   const models = claudeVariants.map((id) => ({
@@ -70,7 +70,7 @@ const handleModelRequest: RequestHandler = (_req, res) => {
 };
 
 /** Only used for non-streaming requests. */
-const anthropicResponseHandler: ProxyResHandlerWithBody = async (
+const anthropicBlockingResponseHandler: ProxyResHandlerWithBody = async (
   _proxyRes,
   req,
   res,
@@ -179,6 +179,28 @@ export function transformAnthropicChatResponseToOpenAI(
   };
 }
 
+/**
+ * If a client using the OpenAI compatibility endpoint requests an actual OpenAI
+ * model, reassigns it to Claude 3 Sonnet.
+ */
+function maybeReassignModel(req: Request) {
+  const model = req.body.model;
+  if (!model.startsWith("gpt-")) return;
+  req.body.model = "claude-3-sonnet-20240229";
+}
+
+/**
+ * If client requests more than 4096 output tokens the request must have a
+ * particular version header.
+ * https://docs.anthropic.com/en/release-notes/api#july-15th-2024
+ */
+function setAnthropicBetaHeader(req: Request) {
+  const { max_tokens_to_sample } = req.body;
+  if (max_tokens_to_sample > 4096) {
+    req.headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
+  }
+}
+
 const anthropicProxy = createQueueMiddleware({
   proxyMiddleware: createProxyMiddleware({
     target: "https://api.anthropic.com",
@@ -189,7 +211,7 @@ const anthropicProxy = createQueueMiddleware({
       proxyReq: createOnProxyReqHandler({
         pipeline: [addKey, addAnthropicPreamble, finalizeBody],
       }),
-      proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
+      proxyRes: createOnProxyResHandler([anthropicBlockingResponseHandler]),
       error: handleProxyError,
     },
     // Abusing pathFilter to rewrite the paths dynamically.
@@ -213,6 +235,11 @@ const anthropicProxy = createQueueMiddleware({
   }),
 });
 
+const nativeAnthropicChatPreprocessor = createPreprocessorMiddleware(
+  { inApi: "anthropic-chat", outApi: "anthropic-chat", service: "anthropic" },
+  { afterTransform: [setAnthropicBetaHeader] }
+);
+
 const nativeTextPreprocessor = createPreprocessorMiddleware({
   inApi: "anthropic-text",
   outApi: "anthropic-text",
@@ -268,11 +295,7 @@ anthropicRouter.get("/v1/models", handleModelRequest);
 anthropicRouter.post(
   "/v1/messages",
   ipLimiter,
-  createPreprocessorMiddleware({
-    inApi: "anthropic-chat",
-    outApi: "anthropic-chat",
-    service: "anthropic",
-  }),
+  nativeAnthropicChatPreprocessor,
   anthropicProxy
 );
 // Anthropic text completion endpoint. Translates to Anthropic chat completion
@@ -292,65 +315,5 @@ anthropicRouter.post(
   preprocessOpenAICompatRequest,
   anthropicProxy
 );
-// Temporarily force Anthropic Text to Anthropic Chat for frontends which do not
-// yet support the new model. Forces claude-3. Will be removed once common
-// frontends have been updated.
-anthropicRouter.post(
-  "/v1/:type(sonnet|opus)/:action(complete|messages)",
-  ipLimiter,
-  handleAnthropicTextCompatRequest,
-  createPreprocessorMiddleware({
-    inApi: "anthropic-text",
-    outApi: "anthropic-chat",
-    service: "anthropic",
-  }),
-  anthropicProxy
-);
-
-function handleAnthropicTextCompatRequest(
-  req: Request,
-  res: Response,
-  next: any
-) {
-  const type = req.params.type;
-  const action = req.params.action;
-  const alreadyInChatFormat = Boolean(req.body.messages);
-  const compatModel = `claude-3-${type}-20240229`;
-  req.log.info(
-    { type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
-    "Handling Anthropic compatibility request"
-  );
-
-  if (action === "messages" || alreadyInChatFormat) {
-    return sendErrorToClient({
-      req,
-      res,
-      options: {
-        title: "Unnecessary usage of compatibility endpoint",
-        message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/anthropic\` proxy endpoint instead.`,
-        format: "unknown",
-        statusCode: 400,
-        reqId: req.id,
-        obj: {
-          requested_endpoint: "/anthropic/" + type,
-          correct_endpoint: "/anthropic",
-        },
-      },
-    });
-  }
-
-  req.body.model = compatModel;
-  next();
-}
-
-/**
- * If a client using the OpenAI compatibility endpoint requests an actual OpenAI
- * model, reassigns it to Claude 3 Sonnet.
- */
-function maybeReassignModel(req: Request) {
-  const model = req.body.model;
-  if (!model.startsWith("gpt-")) return;
-  req.body.model = "claude-3-sonnet-20240229";
-}
 
 export const anthropic = anthropicRouter;
diff --git a/src/proxy/middleware/request/onproxyreq/check-model-family.ts b/src/proxy/middleware/request/onproxyreq/check-model-family.ts
index e764be6..138b1ee 100644
--- a/src/proxy/middleware/request/onproxyreq/check-model-family.ts
+++ b/src/proxy/middleware/request/onproxyreq/check-model-family.ts
@@ -1,14 +1,16 @@
-import { HPMRequestCallback } from "../index";
 import { config } from "../../../../config";
 import { ForbiddenError } from "../../../../shared/errors";
 import { getModelFamilyForRequest } from "../../../../shared/models";
+import { HPMRequestCallback } from "../index";
 
 /**
  * Ensures the selected model family is enabled by the proxy configuration.
- **/
-export const checkModelFamily: HPMRequestCallback = (_proxyReq, req, res) => {
+ */
+export const checkModelFamily: HPMRequestCallback = (_proxyReq, req) => {
   const family = getModelFamilyForRequest(req);
   if (!config.allowedModelFamilies.includes(family)) {
-    throw new ForbiddenError(`Model family '${family}' is not enabled on this proxy`);
+    throw new ForbiddenError(
+      `Model family '${family}' is not enabled on this proxy`
+    );
   }
 };
diff --git a/src/proxy/middleware/request/preprocessors/sign-aws-request.ts b/src/proxy/middleware/request/preprocessors/sign-aws-request.ts
index eb8ec68..d27c058 100644
--- a/src/proxy/middleware/request/preprocessors/sign-aws-request.ts
+++ b/src/proxy/middleware/request/preprocessors/sign-aws-request.ts
@@ -35,6 +35,7 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
 
   const credential = getCredentialParts(req);
   const host = AMZ_HOST.replace("%REGION%", credential.region);
+
   // AWS only uses 2023-06-01 and does not actually check this header, but we
   // set it so that the stream adapter always selects the correct transformer.
   req.headers["anthropic-version"] = "2023-06-01";