adds support for sonnet 8192 output tokens on anthropic api
This commit is contained in:
parent
5e1fffe07d
commit
d54acad6ad
|
@ -46,7 +46,7 @@ const getModelsResponse = () => {
|
|||
"claude-3-haiku-20240307",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
"claude-3-5-sonnet-20240620"
|
||||
"claude-3-5-sonnet-20240620",
|
||||
];
|
||||
|
||||
const models = claudeVariants.map((id) => ({
|
||||
|
@ -70,7 +70,7 @@ const handleModelRequest: RequestHandler = (_req, res) => {
|
|||
};
|
||||
|
||||
/** Only used for non-streaming requests. */
|
||||
const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
||||
const anthropicBlockingResponseHandler: ProxyResHandlerWithBody = async (
|
||||
_proxyRes,
|
||||
req,
|
||||
res,
|
||||
|
@ -179,6 +179,28 @@ export function transformAnthropicChatResponseToOpenAI(
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* If a client using the OpenAI compatibility endpoint requests an actual OpenAI
|
||||
* model, reassigns it to Claude 3 Sonnet.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
if (!model.startsWith("gpt-")) return;
|
||||
req.body.model = "claude-3-sonnet-20240229";
|
||||
}
|
||||
|
||||
/**
|
||||
* If client requests more than 4096 output tokens the request must have a
|
||||
* particular version header.
|
||||
* https://docs.anthropic.com/en/release-notes/api#july-15th-2024
|
||||
*/
|
||||
function setAnthropicBetaHeader(req: Request) {
|
||||
const { max_tokens_to_sample } = req.body;
|
||||
if (max_tokens_to_sample > 4096) {
|
||||
req.headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15";
|
||||
}
|
||||
}
|
||||
|
||||
const anthropicProxy = createQueueMiddleware({
|
||||
proxyMiddleware: createProxyMiddleware({
|
||||
target: "https://api.anthropic.com",
|
||||
|
@ -189,7 +211,7 @@ const anthropicProxy = createQueueMiddleware({
|
|||
proxyReq: createOnProxyReqHandler({
|
||||
pipeline: [addKey, addAnthropicPreamble, finalizeBody],
|
||||
}),
|
||||
proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
|
||||
proxyRes: createOnProxyResHandler([anthropicBlockingResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
// Abusing pathFilter to rewrite the paths dynamically.
|
||||
|
@ -213,6 +235,11 @@ const anthropicProxy = createQueueMiddleware({
|
|||
}),
|
||||
});
|
||||
|
||||
const nativeAnthropicChatPreprocessor = createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-chat", outApi: "anthropic-chat", service: "anthropic" },
|
||||
{ afterTransform: [setAnthropicBetaHeader] }
|
||||
);
|
||||
|
||||
const nativeTextPreprocessor = createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-text",
|
||||
|
@ -268,11 +295,7 @@ anthropicRouter.get("/v1/models", handleModelRequest);
|
|||
anthropicRouter.post(
|
||||
"/v1/messages",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic-chat",
|
||||
outApi: "anthropic-chat",
|
||||
service: "anthropic",
|
||||
}),
|
||||
nativeAnthropicChatPreprocessor,
|
||||
anthropicProxy
|
||||
);
|
||||
// Anthropic text completion endpoint. Translates to Anthropic chat completion
|
||||
|
@ -292,65 +315,5 @@ anthropicRouter.post(
|
|||
preprocessOpenAICompatRequest,
|
||||
anthropicProxy
|
||||
);
|
||||
// Temporarily force Anthropic Text to Anthropic Chat for frontends which do not
|
||||
// yet support the new model. Forces claude-3. Will be removed once common
|
||||
// frontends have been updated.
|
||||
anthropicRouter.post(
|
||||
"/v1/:type(sonnet|opus)/:action(complete|messages)",
|
||||
ipLimiter,
|
||||
handleAnthropicTextCompatRequest,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-chat",
|
||||
service: "anthropic",
|
||||
}),
|
||||
anthropicProxy
|
||||
);
|
||||
|
||||
function handleAnthropicTextCompatRequest(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: any
|
||||
) {
|
||||
const type = req.params.type;
|
||||
const action = req.params.action;
|
||||
const alreadyInChatFormat = Boolean(req.body.messages);
|
||||
const compatModel = `claude-3-${type}-20240229`;
|
||||
req.log.info(
|
||||
{ type, inputModel: req.body.model, compatModel, alreadyInChatFormat },
|
||||
"Handling Anthropic compatibility request"
|
||||
);
|
||||
|
||||
if (action === "messages" || alreadyInChatFormat) {
|
||||
return sendErrorToClient({
|
||||
req,
|
||||
res,
|
||||
options: {
|
||||
title: "Unnecessary usage of compatibility endpoint",
|
||||
message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/anthropic\` proxy endpoint instead.`,
|
||||
format: "unknown",
|
||||
statusCode: 400,
|
||||
reqId: req.id,
|
||||
obj: {
|
||||
requested_endpoint: "/anthropic/" + type,
|
||||
correct_endpoint: "/anthropic",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
req.body.model = compatModel;
|
||||
next();
|
||||
}
|
||||
|
||||
/**
|
||||
* If a client using the OpenAI compatibility endpoint requests an actual OpenAI
|
||||
* model, reassigns it to Claude 3 Sonnet.
|
||||
*/
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
if (!model.startsWith("gpt-")) return;
|
||||
req.body.model = "claude-3-sonnet-20240229";
|
||||
}
|
||||
|
||||
export const anthropic = anthropicRouter;
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
import { HPMRequestCallback } from "../index";
|
||||
import { config } from "../../../../config";
|
||||
import { ForbiddenError } from "../../../../shared/errors";
|
||||
import { getModelFamilyForRequest } from "../../../../shared/models";
|
||||
import { HPMRequestCallback } from "../index";
|
||||
|
||||
/**
|
||||
* Ensures the selected model family is enabled by the proxy configuration.
|
||||
**/
|
||||
export const checkModelFamily: HPMRequestCallback = (_proxyReq, req, res) => {
|
||||
*/
|
||||
export const checkModelFamily: HPMRequestCallback = (_proxyReq, req) => {
|
||||
const family = getModelFamilyForRequest(req);
|
||||
if (!config.allowedModelFamilies.includes(family)) {
|
||||
throw new ForbiddenError(`Model family '${family}' is not enabled on this proxy`);
|
||||
throw new ForbiddenError(
|
||||
`Model family '${family}' is not enabled on this proxy`
|
||||
);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -35,6 +35,7 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
|
|||
|
||||
const credential = getCredentialParts(req);
|
||||
const host = AMZ_HOST.replace("%REGION%", credential.region);
|
||||
|
||||
// AWS only uses 2023-06-01 and does not actually check this header, but we
|
||||
// set it so that the stream adapter always selects the correct transformer.
|
||||
req.headers["anthropic-version"] = "2023-06-01";
|
||||
|
|
Loading…
Reference in New Issue