Implement Anthropic Chat Completions endpoint and Claude 3 (khanon/oai-reverse-proxy!64)
This commit is contained in:
parent
b90abbda88
commit
db318ec237
|
@ -42,6 +42,8 @@ const getModelsResponse = () => {
|
|||
"claude-2",
|
||||
"claude-2.0",
|
||||
"claude-2.1",
|
||||
"claude-3-opus-20240229",
|
||||
"claude-3-sonnet-20240229",
|
||||
];
|
||||
|
||||
const models = claudeVariants.map((id) => ({
|
||||
|
@ -81,8 +83,16 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
|||
}
|
||||
|
||||
if (req.inboundApi === "openai") {
|
||||
req.log.info("Transforming Anthropic response to OpenAI format");
|
||||
body = transformAnthropicResponse(body, req);
|
||||
req.log.info("Transforming Anthropic text to OpenAI format");
|
||||
body = transformAnthropicTextResponseToOpenAI(body, req);
|
||||
}
|
||||
|
||||
if (
|
||||
req.inboundApi === "anthropic-text" &&
|
||||
req.outboundApi === "anthropic-chat"
|
||||
) {
|
||||
req.log.info("Transforming Anthropic text to Anthropic chat format");
|
||||
body = transformAnthropicChatResponseToAnthropicText(body, req);
|
||||
}
|
||||
|
||||
if (req.tokenizerInfo) {
|
||||
|
@ -92,13 +102,32 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
|
|||
res.status(200).json(body);
|
||||
};
|
||||
|
||||
function transformAnthropicChatResponseToAnthropicText(
|
||||
anthropicBody: Record<string, any>,
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
return {
|
||||
type: "completion",
|
||||
id: "trans-" + anthropicBody.id,
|
||||
completion: anthropicBody.content
|
||||
.map((part: { type: string; text: string }) =>
|
||||
part.type === "text" ? part.text : ""
|
||||
)
|
||||
.join(""),
|
||||
stop_reason: anthropicBody.stop_reason,
|
||||
stop: anthropicBody.stop_sequence,
|
||||
model: anthropicBody.model,
|
||||
usage: anthropicBody.usage,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms a model response from the Anthropic API to match those from the
|
||||
* OpenAI API, for users using Claude via the OpenAI-compatible endpoint. This
|
||||
* is only used for non-streaming requests as streaming requests are handled
|
||||
* on-the-fly.
|
||||
*/
|
||||
function transformAnthropicResponse(
|
||||
function transformAnthropicTextResponseToOpenAI(
|
||||
anthropicBody: Record<string, any>,
|
||||
req: Request
|
||||
): Record<string, any> {
|
||||
|
@ -139,36 +168,96 @@ const anthropicProxy = createQueueMiddleware({
|
|||
proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
pathRewrite: {
|
||||
// Send OpenAI-compat requests to the real Anthropic endpoint.
|
||||
"^/v1/chat/completions": "/v1/complete",
|
||||
// Abusing pathFilter to rewrite the paths dynamically.
|
||||
pathFilter: (pathname, req) => {
|
||||
const isText = req.outboundApi === "anthropic-text";
|
||||
const isChat = req.outboundApi === "anthropic-chat";
|
||||
if (isChat && pathname === "/v1/complete") {
|
||||
req.url = "/v1/messages";
|
||||
}
|
||||
if (isText && pathname === "/v1/chat/completions") {
|
||||
req.url = "/v1/complete";
|
||||
}
|
||||
if (isChat && pathname === "/v1/claude-3/complete") {
|
||||
req.url = "/v1/messages";
|
||||
}
|
||||
return true;
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
const nativeTextPreprocessor = createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-text",
|
||||
service: "anthropic",
|
||||
});
|
||||
|
||||
const textToChatPreprocessor = createPreprocessorMiddleware({
|
||||
inApi: "anthropic-text",
|
||||
outApi: "anthropic-chat",
|
||||
service: "anthropic",
|
||||
});
|
||||
|
||||
/**
|
||||
* Routes text completion prompts to anthropic-chat if they need translation
|
||||
* (claude-3 based models do not support the old text completion endpoint).
|
||||
*/
|
||||
const claudeTextCompletionRouter: RequestHandler = (req, res, next) => {
|
||||
if (req.body.model?.startsWith("claude-3")) {
|
||||
textToChatPreprocessor(req, res, next);
|
||||
} else {
|
||||
nativeTextPreprocessor(req, res, next);
|
||||
}
|
||||
};
|
||||
|
||||
const anthropicRouter = Router();
|
||||
anthropicRouter.get("/v1/models", handleModelRequest);
|
||||
// Native Anthropic chat completion endpoint.
|
||||
// Anthropic text completion endpoint. Dynamic routing based on model.
|
||||
anthropicRouter.post(
|
||||
"/v1/complete",
|
||||
ipLimiter,
|
||||
claudeTextCompletionRouter,
|
||||
anthropicProxy
|
||||
);
|
||||
// Native Anthropic chat completion endpoint.
|
||||
anthropicRouter.post(
|
||||
"/v1/messages",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware({
|
||||
inApi: "anthropic",
|
||||
outApi: "anthropic",
|
||||
inApi: "anthropic-chat",
|
||||
outApi: "anthropic-chat",
|
||||
service: "anthropic",
|
||||
}),
|
||||
anthropicProxy
|
||||
);
|
||||
// OpenAI-to-Anthropic compatibility endpoint.
|
||||
// OpenAI-to-Anthropic Text compatibility endpoint.
|
||||
anthropicRouter.post(
|
||||
"/v1/chat/completions",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "anthropic", service: "anthropic" },
|
||||
{ inApi: "openai", outApi: "anthropic-text", service: "anthropic" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
),
|
||||
anthropicProxy
|
||||
);
|
||||
// Temporary force Anthropic Text to Anthropic Chat for frontends which do not
|
||||
// yet support the new model. Forces claude-3. Will be removed once common
|
||||
// frontends have been updated.
|
||||
anthropicRouter.post(
|
||||
"/v1/claude-3/complete",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "anthropic" },
|
||||
{
|
||||
beforeTransform: [
|
||||
(req) => {
|
||||
req.body.model = "claude-3-sonnet-20240229";
|
||||
},
|
||||
],
|
||||
}
|
||||
),
|
||||
anthropicProxy
|
||||
);
|
||||
|
||||
function maybeReassignModel(req: Request) {
|
||||
const model = req.body.model;
|
||||
|
|
|
@ -32,6 +32,7 @@ const getModelsResponse = () => {
|
|||
const variants = [
|
||||
"anthropic.claude-v2",
|
||||
"anthropic.claude-v2:1",
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
];
|
||||
|
||||
const models = variants.map((id) => ({
|
||||
|
@ -145,7 +146,7 @@ awsRouter.post(
|
|||
"/v1/complete",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "anthropic", outApi: "anthropic", service: "aws" },
|
||||
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
),
|
||||
awsProxy
|
||||
|
@ -155,7 +156,7 @@ awsRouter.post(
|
|||
"/v1/chat/completions",
|
||||
ipLimiter,
|
||||
createPreprocessorMiddleware(
|
||||
{ inApi: "openai", outApi: "anthropic", service: "aws" },
|
||||
{ inApi: "openai", outApi: "anthropic-text", service: "aws" },
|
||||
{ afterTransform: [maybeReassignModel] }
|
||||
),
|
||||
awsProxy
|
||||
|
|
|
@ -11,6 +11,8 @@ const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
|
|||
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
|
||||
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
|
||||
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
|
||||
const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
|
||||
const ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT = "/v1/claude-3/complete";
|
||||
|
||||
export function isTextGenerationRequest(req: Request) {
|
||||
return (
|
||||
|
@ -19,6 +21,8 @@ export function isTextGenerationRequest(req: Request) {
|
|||
OPENAI_CHAT_COMPLETION_ENDPOINT,
|
||||
OPENAI_TEXT_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_COMPLETION_ENDPOINT,
|
||||
ANTHROPIC_MESSAGES_ENDPOINT,
|
||||
ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT,
|
||||
].some((endpoint) => req.path.startsWith(endpoint))
|
||||
);
|
||||
}
|
||||
|
@ -91,6 +95,7 @@ export const classifyErrorAndSend = (
|
|||
});
|
||||
} catch (error) {
|
||||
req.log.error(error, `Error writing error response headers, giving up.`);
|
||||
res.end();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -199,11 +204,24 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
|
|||
return body.choices[0].message.content || "";
|
||||
case "openai-text":
|
||||
return body.choices[0].text;
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
if (!body.content) {
|
||||
req.log.error(
|
||||
{ body: JSON.stringify(body) },
|
||||
"Received empty Anthropic chat completion"
|
||||
);
|
||||
return "";
|
||||
}
|
||||
return body.content
|
||||
.map(({ text, type }: { type: string; text: string }) =>
|
||||
type === "text" ? text : `[Unsupported content type: ${type}]`
|
||||
)
|
||||
.join("\n");
|
||||
case "anthropic-text":
|
||||
if (!body.completion) {
|
||||
req.log.error(
|
||||
{ body: JSON.stringify(body) },
|
||||
"Received empty Anthropic completion"
|
||||
"Received empty Anthropic text completion"
|
||||
);
|
||||
return "";
|
||||
}
|
||||
|
@ -229,7 +247,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
|
|||
return body.model;
|
||||
case "openai-image":
|
||||
return req.body.model;
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
case "anthropic-text":
|
||||
// Anthropic confirms the model in the response, but AWS Claude doesn't.
|
||||
return body.model || req.body.model;
|
||||
case "google-ai":
|
||||
|
|
|
@ -28,7 +28,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
|
|||
switch (req.outboundApi) {
|
||||
// If we are translating between API formats we may need to select a model
|
||||
// for the user, because the provided model is for the inbound API.
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
case "anthropic-text":
|
||||
assignedKey = keyPool.get("claude-v1", req.service);
|
||||
break;
|
||||
case "openai-text":
|
||||
|
@ -71,6 +72,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
|
|||
if (key.organizationId) {
|
||||
proxyReq.setHeader("OpenAI-Organization", key.organizationId);
|
||||
}
|
||||
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
|
||||
break;
|
||||
case "mistral-ai":
|
||||
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
|
||||
break;
|
||||
|
|
|
@ -8,6 +8,10 @@ export const finalizeBody: HPMRequestCallback = (proxyReq, req) => {
|
|||
if (req.outboundApi === "openai-image") {
|
||||
delete req.body.stream;
|
||||
}
|
||||
// For anthropic text to chat requests, remove undefined prompt.
|
||||
if (req.outboundApi === "anthropic-chat") {
|
||||
delete req.body.prompt;
|
||||
}
|
||||
|
||||
const updatedBody = JSON.stringify(req.body);
|
||||
proxyReq.setHeader("Content-Length", Buffer.byteLength(updatedBody));
|
||||
|
|
|
@ -114,7 +114,7 @@ const handleTestMessage: RequestHandler = (req, res) => {
|
|||
if (method !== "POST") {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (isTestMessage(body)) {
|
||||
req.log.info({ body }, "Received test message. Skipping API call.");
|
||||
res.json({
|
||||
|
@ -145,6 +145,9 @@ function isTestMessage(body: any) {
|
|||
messages[0].content === "Hi"
|
||||
);
|
||||
} else {
|
||||
return prompt?.trim() === "Human: Hi\n\nAssistant:";
|
||||
return (
|
||||
prompt?.trim() === "Human: Hi\n\nAssistant:" ||
|
||||
prompt?.startsWith("Hi\n\n")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ import { RequestPreprocessor } from "../index";
|
|||
import { countTokens } from "../../../../shared/tokenization";
|
||||
import { assertNever } from "../../../../shared/utils";
|
||||
import {
|
||||
AnthropicChatMessage,
|
||||
GoogleAIChatMessage,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
|
@ -28,7 +29,13 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
|
|||
result = await countTokens({ req, prompt, service });
|
||||
break;
|
||||
}
|
||||
case "anthropic": {
|
||||
case "anthropic-chat": {
|
||||
req.outputTokens = req.body.max_tokens;
|
||||
const prompt: AnthropicChatMessage[] = req.body.messages;
|
||||
result = await countTokens({ req, prompt, service });
|
||||
break;
|
||||
}
|
||||
case "anthropic-text": {
|
||||
req.outputTokens = req.body.max_tokens_to_sample;
|
||||
const prompt: string = req.body.prompt;
|
||||
result = await countTokens({ req, prompt, service });
|
||||
|
|
|
@ -6,6 +6,7 @@ import { UserInputError } from "../../../../shared/errors";
|
|||
import {
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
flattenAnthropicMessages,
|
||||
} from "../../../../shared/api-schemas";
|
||||
|
||||
const rejectedClients = new Map<string, number>();
|
||||
|
@ -53,7 +54,9 @@ function getPromptFromRequest(req: Request) {
|
|||
const service = req.outboundApi;
|
||||
const body = req.body;
|
||||
switch (service) {
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
return flattenAnthropicMessages(body.messages);
|
||||
case "anthropic-text":
|
||||
return body.prompt;
|
||||
case "openai":
|
||||
case "mistral-ai":
|
||||
|
|
|
@ -2,7 +2,10 @@ import express from "express";
|
|||
import { Sha256 } from "@aws-crypto/sha256-js";
|
||||
import { SignatureV4 } from "@smithy/signature-v4";
|
||||
import { HttpRequest } from "@smithy/protocol-http";
|
||||
import { AnthropicV1CompleteSchema } from "../../../../shared/api-schemas/anthropic";
|
||||
import {
|
||||
AnthropicV1TextSchema,
|
||||
AnthropicV1MessagesSchema,
|
||||
} from "../../../../shared/api-schemas/anthropic";
|
||||
import { keyPool } from "../../../../shared/key-management";
|
||||
import { RequestPreprocessor } from "../index";
|
||||
|
||||
|
@ -22,19 +25,37 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
|
|||
let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
|
||||
req.body.prompt = preamble + req.body.prompt;
|
||||
|
||||
// AWS supports only a subset of Anthropic's parameters and is more strict
|
||||
// about unknown parameters.
|
||||
// AWS uses mostly the same parameters as Anthropic, with a few removed params
|
||||
// and much stricter validation on unused parameters. Rather than treating it
|
||||
// as a separate schema we will use the anthropic ones and strip the unused
|
||||
// parameters.
|
||||
// TODO: This should happen in transform-outbound-payload.ts
|
||||
const strippedParams = AnthropicV1CompleteSchema.pick({
|
||||
prompt: true,
|
||||
max_tokens_to_sample: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
.strip()
|
||||
.parse(req.body);
|
||||
let strippedParams: Record<string, unknown>;
|
||||
if (req.inboundApi === "anthropic-chat") {
|
||||
strippedParams = AnthropicV1MessagesSchema
|
||||
.pick({
|
||||
messages: true,
|
||||
max_tokens: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
.strip()
|
||||
.parse(req.body);
|
||||
} else {
|
||||
strippedParams = AnthropicV1TextSchema
|
||||
.pick({
|
||||
prompt: true,
|
||||
max_tokens_to_sample: true,
|
||||
stop_sequences: true,
|
||||
temperature: true,
|
||||
top_k: true,
|
||||
top_p: true,
|
||||
})
|
||||
.strip()
|
||||
.parse(req.body);
|
||||
}
|
||||
|
||||
const credential = getCredentialParts(req);
|
||||
const host = AMZ_HOST.replace("%REGION%", credential.region);
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
import { openAIToAnthropic } from "../../../../shared/api-schemas/anthropic";
|
||||
import {
|
||||
anthropicTextToAnthropicChat,
|
||||
openAIToAnthropicText,
|
||||
} from "../../../../shared/api-schemas/anthropic";
|
||||
import { openAIToOpenAIText } from "../../../../shared/api-schemas/openai-text";
|
||||
import { openAIToOpenAIImage } from "../../../../shared/api-schemas/openai-image";
|
||||
import { openAIToGoogleAI } from "../../../../shared/api-schemas/google-ai";
|
||||
|
@ -41,8 +44,16 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
|
|||
return;
|
||||
}
|
||||
|
||||
if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
|
||||
req.body = openAIToAnthropic(req);
|
||||
if (
|
||||
req.inboundApi === "anthropic-text" &&
|
||||
req.outboundApi === "anthropic-chat"
|
||||
) {
|
||||
req.body = anthropicTextToAnthropicChat(req);
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.inboundApi === "openai" && req.outboundApi === "anthropic-text") {
|
||||
req.body = openAIToAnthropicText(req);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
|||
case "openai-text":
|
||||
proxyMax = OPENAI_MAX_CONTEXT;
|
||||
break;
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
case "anthropic-text":
|
||||
proxyMax = CLAUDE_MAX_CONTEXT;
|
||||
break;
|
||||
case "google-ai":
|
||||
|
|
|
@ -10,9 +10,11 @@ import {
|
|||
import { ProxyResHandlerWithBody } from ".";
|
||||
import { assertNever } from "../../../shared/utils";
|
||||
import {
|
||||
AnthropicChatMessage, flattenAnthropicMessages,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
} from "../../../shared/api-schemas";
|
||||
import { APIFormat } from "../../../shared/key-management";
|
||||
|
||||
/** If prompt logging is enabled, enqueues the prompt for logging. */
|
||||
export const logPrompt: ProxyResHandlerWithBody = async (
|
||||
|
@ -33,7 +35,7 @@ export const logPrompt: ProxyResHandlerWithBody = async (
|
|||
if (!loggable) return;
|
||||
|
||||
const promptPayload = getPromptForRequest(req, responseBody);
|
||||
const promptFlattened = flattenMessages(promptPayload);
|
||||
const promptFlattened = flattenMessages(promptPayload, req.outboundApi);
|
||||
const response = getCompletionFromBody(req, responseBody);
|
||||
const model = getModelFromBody(req, responseBody);
|
||||
|
||||
|
@ -57,13 +59,19 @@ type OaiImageResult = {
|
|||
const getPromptForRequest = (
|
||||
req: Request,
|
||||
responseBody: Record<string, any>
|
||||
): string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult => {
|
||||
):
|
||||
| string
|
||||
| OpenAIChatMessage[]
|
||||
| AnthropicChatMessage[]
|
||||
| MistralAIChatMessage[]
|
||||
| OaiImageResult => {
|
||||
// Since the prompt logger only runs after the request has been proxied, we
|
||||
// can assume the body has already been transformed to the target API's
|
||||
// format.
|
||||
switch (req.outboundApi) {
|
||||
case "openai":
|
||||
case "mistral-ai":
|
||||
case "anthropic-chat":
|
||||
return req.body.messages;
|
||||
case "openai-text":
|
||||
return req.body.prompt;
|
||||
|
@ -75,7 +83,7 @@ const getPromptForRequest = (
|
|||
quality: req.body.quality,
|
||||
revisedPrompt: responseBody.data[0].revised_prompt,
|
||||
};
|
||||
case "anthropic":
|
||||
case "anthropic-text":
|
||||
return req.body.prompt;
|
||||
case "google-ai":
|
||||
return req.body.prompt.text;
|
||||
|
@ -85,11 +93,20 @@ const getPromptForRequest = (
|
|||
};
|
||||
|
||||
const flattenMessages = (
|
||||
val: string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult
|
||||
val:
|
||||
| string
|
||||
| OpenAIChatMessage[]
|
||||
| MistralAIChatMessage[]
|
||||
| OaiImageResult
|
||||
| AnthropicChatMessage[],
|
||||
format: APIFormat,
|
||||
): string => {
|
||||
if (typeof val === "string") {
|
||||
return val.trim();
|
||||
}
|
||||
if (format === "anthropic-chat") {
|
||||
return flattenAnthropicMessages(val as AnthropicChatMessage[]);
|
||||
}
|
||||
if (Array.isArray(val)) {
|
||||
return val
|
||||
.map(({ content, role }) => {
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
import { OpenAIChatCompletionStreamEvent } from "../index";
|
||||
|
||||
export type AnthropicChatCompletionResponse = {
|
||||
id: string;
|
||||
type: "message";
|
||||
role: "assistant";
|
||||
content: { type: "text"; text: string }[];
|
||||
model: string;
|
||||
stop_reason: string | null;
|
||||
stop_sequence: string | null;
|
||||
usage: { input_tokens: number; output_tokens: number };
|
||||
};
|
||||
|
||||
/**
|
||||
* Given a list of OpenAI chat completion events, compiles them into a single
|
||||
* finalized Anthropic chat completion response so that non-streaming middleware
|
||||
* can operate on it as if it were a blocking response.
|
||||
*/
|
||||
export function mergeEventsForAnthropicChat(
|
||||
events: OpenAIChatCompletionStreamEvent[]
|
||||
): AnthropicChatCompletionResponse {
|
||||
let merged: AnthropicChatCompletionResponse = {
|
||||
id: "",
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [],
|
||||
model: "",
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: { input_tokens: 0, output_tokens: 0 },
|
||||
};
|
||||
merged = events.reduce((acc, event, i) => {
|
||||
// The first event will only contain role assignment and response metadata
|
||||
if (i === 0) {
|
||||
acc.id = event.id;
|
||||
acc.model = event.model;
|
||||
acc.content = [{ type: "text", text: "" }];
|
||||
return acc;
|
||||
}
|
||||
|
||||
acc.stop_reason = event.choices[0].finish_reason ?? "";
|
||||
if (event.choices[0].delta.content) {
|
||||
acc.content[0].text += event.choices[0].delta.content;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, merged);
|
||||
return merged;
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
import { OpenAIChatCompletionStreamEvent } from "../index";
|
||||
|
||||
export type AnthropicCompletionResponse = {
|
||||
export type AnthropicTextCompletionResponse = {
|
||||
completion: string;
|
||||
stop_reason: string;
|
||||
truncated: boolean;
|
||||
|
@ -15,10 +15,10 @@ export type AnthropicCompletionResponse = {
|
|||
* finalized Anthropic completion response so that non-streaming middleware
|
||||
* can operate on it as if it were a blocking response.
|
||||
*/
|
||||
export function mergeEventsForAnthropic(
|
||||
export function mergeEventsForAnthropicText(
|
||||
events: OpenAIChatCompletionStreamEvent[]
|
||||
): AnthropicCompletionResponse {
|
||||
let merged: AnthropicCompletionResponse = {
|
||||
): AnthropicTextCompletionResponse {
|
||||
let merged: AnthropicTextCompletionResponse = {
|
||||
log_id: "",
|
||||
exception: null,
|
||||
model: "",
|
|
@ -1,9 +1,12 @@
|
|||
import { APIFormat } from "../../../../shared/key-management";
|
||||
import { assertNever } from "../../../../shared/utils";
|
||||
import {
|
||||
mergeEventsForAnthropic,
|
||||
anthropicV2ToOpenAI,
|
||||
mergeEventsForAnthropicChat,
|
||||
mergeEventsForAnthropicText,
|
||||
mergeEventsForOpenAIChat,
|
||||
mergeEventsForOpenAIText,
|
||||
AnthropicV2StreamEvent,
|
||||
OpenAIChatCompletionStreamEvent,
|
||||
} from "./index";
|
||||
|
||||
|
@ -20,8 +23,30 @@ export class EventAggregator {
|
|||
this.format = format;
|
||||
}
|
||||
|
||||
addEvent(event: OpenAIChatCompletionStreamEvent) {
|
||||
this.events.push(event);
|
||||
addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
|
||||
if (eventIsOpenAIEvent(event)) {
|
||||
this.events.push(event);
|
||||
} else {
|
||||
// horrible special case. previously all transformers' target format was
|
||||
// openai, so the event aggregator could conveniently assume all incoming
|
||||
// events were in openai format.
|
||||
// now we have added anthropic-chat-to-text, so aggregator needs to know
|
||||
// how to collapse events from two formats.
|
||||
// because that is annoying, we will simply transform anthropic events to
|
||||
// openai (even if the client didn't ask for openai) so we don't have to
|
||||
// write aggregation logic for anthropic chat (which is also a troublesome
|
||||
// stateful format).
|
||||
const openAIEvent = anthropicV2ToOpenAI({
|
||||
data: `event: completion\ndata: ${JSON.stringify(event)}\n\n`,
|
||||
lastPosition: -1,
|
||||
index: 0,
|
||||
fallbackId: event.log_id || "event-aggregator-fallback",
|
||||
fallbackModel: event.model || "claude-3-fallback",
|
||||
});
|
||||
if (openAIEvent.event) {
|
||||
this.events.push(openAIEvent.event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getFinalResponse() {
|
||||
|
@ -32,8 +57,10 @@ export class EventAggregator {
|
|||
return mergeEventsForOpenAIChat(this.events);
|
||||
case "openai-text":
|
||||
return mergeEventsForOpenAIText(this.events);
|
||||
case "anthropic":
|
||||
return mergeEventsForAnthropic(this.events);
|
||||
case "anthropic-text":
|
||||
return mergeEventsForAnthropicText(this.events);
|
||||
case "anthropic-chat":
|
||||
return mergeEventsForAnthropicChat(this.events);
|
||||
case "openai-image":
|
||||
throw new Error(`SSE aggregation not supported for ${this.format}`);
|
||||
default:
|
||||
|
@ -41,3 +68,9 @@ export class EventAggregator {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
function eventIsOpenAIEvent(
|
||||
event: any
|
||||
): event is OpenAIChatCompletionStreamEvent {
|
||||
return event?.object === "chat.completion.chunk";
|
||||
}
|
||||
|
|
|
@ -1,9 +1,17 @@
|
|||
export type SSEResponseTransformArgs = {
|
||||
export type SSEResponseTransformArgs<S = Record<string, any>> = {
|
||||
data: string;
|
||||
lastPosition: number;
|
||||
index: number;
|
||||
fallbackId: string;
|
||||
fallbackModel: string;
|
||||
state?: S;
|
||||
};
|
||||
|
||||
export type AnthropicV2StreamEvent = {
|
||||
log_id?: string;
|
||||
model?: string;
|
||||
completion: string;
|
||||
stop_reason: string | null;
|
||||
};
|
||||
|
||||
export type OpenAIChatCompletionStreamEvent = {
|
||||
|
@ -16,17 +24,24 @@ export type OpenAIChatCompletionStreamEvent = {
|
|||
delta: { role?: string; content?: string };
|
||||
finish_reason: string | null;
|
||||
}[];
|
||||
}
|
||||
};
|
||||
|
||||
export type StreamingCompletionTransformer = (
|
||||
params: SSEResponseTransformArgs
|
||||
) => { position: number; event?: OpenAIChatCompletionStreamEvent };
|
||||
export type StreamingCompletionTransformer<
|
||||
T = OpenAIChatCompletionStreamEvent,
|
||||
S = any,
|
||||
> = (params: SSEResponseTransformArgs<S>) => {
|
||||
position: number;
|
||||
event?: T;
|
||||
state?: S;
|
||||
};
|
||||
|
||||
export { openAITextToOpenAIChat } from "./transformers/openai-text-to-openai";
|
||||
export { anthropicV1ToOpenAI } from "./transformers/anthropic-v1-to-openai";
|
||||
export { anthropicV2ToOpenAI } from "./transformers/anthropic-v2-to-openai";
|
||||
export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-anthropic-v2";
|
||||
export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
|
||||
export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
|
||||
export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
|
||||
export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
|
||||
export { mergeEventsForAnthropic } from "./aggregators/anthropic";
|
||||
export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
|
||||
export { mergeEventsForAnthropicChat } from "./aggregators/anthropic-chat";
|
||||
|
|
|
@ -3,27 +3,27 @@ export type ServerSentEvent = { id?: string; type?: string; data: string };
|
|||
/** Given a string of SSE data, parse it into a `ServerSentEvent` object. */
|
||||
export function parseEvent(event: string) {
|
||||
const buffer: ServerSentEvent = { data: "" };
|
||||
return event.split(/\r?\n/).reduce(parseLine, buffer)
|
||||
return event.split(/\r?\n/).reduce(parseLine, buffer);
|
||||
}
|
||||
|
||||
function parseLine(event: ServerSentEvent, line: string) {
|
||||
const separator = line.indexOf(":");
|
||||
const field = separator === -1 ? line : line.slice(0,separator);
|
||||
const field = separator === -1 ? line : line.slice(0, separator);
|
||||
const value = separator === -1 ? "" : line.slice(separator + 1);
|
||||
|
||||
switch (field) {
|
||||
case 'id':
|
||||
event.id = value.trim()
|
||||
break
|
||||
case 'event':
|
||||
event.type = value.trim()
|
||||
break
|
||||
case 'data':
|
||||
event.data += value.trimStart()
|
||||
break
|
||||
case "id":
|
||||
event.id = value.trim();
|
||||
break;
|
||||
case "event":
|
||||
event.type = value.trim();
|
||||
break;
|
||||
case "data":
|
||||
event.data += value.trimStart();
|
||||
break;
|
||||
default:
|
||||
break
|
||||
break;
|
||||
}
|
||||
|
||||
return event
|
||||
}
|
||||
return event;
|
||||
}
|
||||
|
|
|
@ -3,11 +3,13 @@ import { logger } from "../../../../logger";
|
|||
import { APIFormat } from "../../../../shared/key-management";
|
||||
import { assertNever } from "../../../../shared/utils";
|
||||
import {
|
||||
anthropicChatToAnthropicV2,
|
||||
anthropicV1ToOpenAI,
|
||||
AnthropicV2StreamEvent,
|
||||
anthropicV2ToOpenAI,
|
||||
googleAIToOpenAI,
|
||||
OpenAIChatCompletionStreamEvent,
|
||||
openAITextToOpenAIChat,
|
||||
googleAIToOpenAI,
|
||||
passthroughToOpenAI,
|
||||
StreamingCompletionTransformer,
|
||||
} from "./index";
|
||||
|
@ -26,9 +28,13 @@ type SSEMessageTransformerOptions = TransformOptions & {
|
|||
*/
|
||||
export class SSEMessageTransformer extends Transform {
|
||||
private lastPosition: number;
|
||||
private transformState: any;
|
||||
private msgCount: number;
|
||||
private readonly inputFormat: APIFormat;
|
||||
private readonly transformFn: StreamingCompletionTransformer;
|
||||
private readonly transformFn: StreamingCompletionTransformer<
|
||||
// TODO: Refactor transformers to not assume only OpenAI events as output
|
||||
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
|
||||
>;
|
||||
private readonly log;
|
||||
private readonly fallbackId: string;
|
||||
private readonly fallbackModel: string;
|
||||
|
@ -58,15 +64,20 @@ export class SSEMessageTransformer extends Transform {
|
|||
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: Function) {
|
||||
try {
|
||||
const originalMessage = chunk.toString();
|
||||
const { event: transformedMessage, position: newPosition } =
|
||||
this.transformFn({
|
||||
data: originalMessage,
|
||||
lastPosition: this.lastPosition,
|
||||
index: this.msgCount++,
|
||||
fallbackId: this.fallbackId,
|
||||
fallbackModel: this.fallbackModel,
|
||||
});
|
||||
const {
|
||||
event: transformedMessage,
|
||||
position: newPosition,
|
||||
state,
|
||||
} = this.transformFn({
|
||||
data: originalMessage,
|
||||
lastPosition: this.lastPosition,
|
||||
index: this.msgCount++,
|
||||
fallbackId: this.fallbackId,
|
||||
fallbackModel: this.fallbackModel,
|
||||
state: this.transformState,
|
||||
});
|
||||
this.lastPosition = newPosition;
|
||||
this.transformState = state;
|
||||
|
||||
// Special case for Azure OpenAI, which is 99% the same as OpenAI but
|
||||
// sometimes emits an extra event at the beginning of the stream with the
|
||||
|
@ -84,7 +95,7 @@ export class SSEMessageTransformer extends Transform {
|
|||
// Some events may not be transformed, e.g. ping events
|
||||
if (!transformedMessage) return callback();
|
||||
|
||||
if (this.msgCount === 1) {
|
||||
if (this.msgCount === 1 && eventIsOpenAIEvent(transformedMessage)) {
|
||||
// TODO: does this need to be skipped for passthroughToOpenAI?
|
||||
this.push(createInitialMessage(transformedMessage));
|
||||
}
|
||||
|
@ -98,20 +109,30 @@ export class SSEMessageTransformer extends Transform {
|
|||
}
|
||||
}
|
||||
|
||||
function eventIsOpenAIEvent(
|
||||
event: any
|
||||
): event is OpenAIChatCompletionStreamEvent {
|
||||
return event?.object === "chat.completion.chunk";
|
||||
}
|
||||
|
||||
function getTransformer(
|
||||
responseApi: APIFormat,
|
||||
version?: string
|
||||
): StreamingCompletionTransformer {
|
||||
): StreamingCompletionTransformer<
|
||||
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
|
||||
> {
|
||||
switch (responseApi) {
|
||||
case "openai":
|
||||
case "mistral-ai":
|
||||
return passthroughToOpenAI;
|
||||
case "openai-text":
|
||||
return openAITextToOpenAIChat;
|
||||
case "anthropic":
|
||||
case "anthropic-text":
|
||||
return version === "2023-01-01"
|
||||
? anthropicV1ToOpenAI
|
||||
: anthropicV2ToOpenAI;
|
||||
case "anthropic-chat":
|
||||
return anthropicChatToAnthropicV2;
|
||||
case "google-ai":
|
||||
return googleAIToOpenAI;
|
||||
case "openai-image":
|
||||
|
|
|
@ -67,7 +67,7 @@ export class SSEStreamAdapter extends Transform {
|
|||
default:
|
||||
this.log.error({ message, type }, "Received bad AWS stream event");
|
||||
return makeCompletionSSE({
|
||||
format: "anthropic",
|
||||
format: "anthropic-text",
|
||||
title: "Proxy stream error",
|
||||
message:
|
||||
"The proxy received an unrecognized error from AWS while streaming.",
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
import {
|
||||
AnthropicV2StreamEvent,
|
||||
StreamingCompletionTransformer,
|
||||
} from "../index";
|
||||
import { parseEvent, ServerSentEvent } from "../parse-sse";
|
||||
import { logger } from "../../../../../logger";
|
||||
|
||||
const log = logger.child({
|
||||
module: "sse-transformer",
|
||||
transformer: "anthropic-chat-to-anthropic-v2",
|
||||
});
|
||||
|
||||
export type AnthropicChatEventType =
|
||||
| "message_start"
|
||||
| "content_block_start"
|
||||
| "content_block_delta"
|
||||
| "content_block_stop"
|
||||
| "message_delta"
|
||||
| "message_stop";
|
||||
|
||||
type AnthropicChatStartEvent = {
|
||||
type: "message_start";
|
||||
message: {
|
||||
id: string;
|
||||
type: "message";
|
||||
role: "assistant";
|
||||
content: [];
|
||||
model: string;
|
||||
stop_reason: null;
|
||||
stop_sequence: null;
|
||||
usage: { input_tokens: number; output_tokens: number };
|
||||
};
|
||||
};
|
||||
|
||||
type AnthropicChatContentBlockStartEvent = {
|
||||
type: "content_block_start";
|
||||
index: number;
|
||||
content_block: { type: "text"; text: string };
|
||||
};
|
||||
|
||||
export type AnthropicChatContentBlockDeltaEvent = {
|
||||
type: "content_block_delta";
|
||||
index: number;
|
||||
delta: { type: "text_delta"; text: string };
|
||||
};
|
||||
|
||||
type AnthropicChatContentBlockStopEvent = {
|
||||
type: "content_block_stop";
|
||||
index: number;
|
||||
};
|
||||
|
||||
type AnthropicChatMessageDeltaEvent = {
|
||||
type: "message_delta";
|
||||
delta: {
|
||||
stop_reason: string;
|
||||
stop_sequence: null;
|
||||
usage: { output_tokens: number };
|
||||
};
|
||||
};
|
||||
|
||||
type AnthropicChatMessageStopEvent = {
|
||||
type: "message_stop";
|
||||
};
|
||||
|
||||
type AnthropicChatTransformerState = { content: string };
|
||||
|
||||
/**
|
||||
* Transforms an incoming Anthropic Chat SSE to an equivalent Anthropic V2
|
||||
* Text SSE.
|
||||
* For now we assume there is only one content block and message delta. In the
|
||||
* future Anthropic may add multi-turn responses or multiple content blocks
|
||||
* (probably for multimodal responses, image generation, etc) but as far as I
|
||||
* can tell this is not yet implemented.
|
||||
*/
|
||||
export const anthropicChatToAnthropicV2: StreamingCompletionTransformer<
|
||||
AnthropicV2StreamEvent,
|
||||
AnthropicChatTransformerState
|
||||
> = (params) => {
|
||||
const { data } = params;
|
||||
|
||||
const rawEvent = parseEvent(data);
|
||||
if (!rawEvent.data || !rawEvent.type) {
|
||||
return { position: -1 };
|
||||
}
|
||||
|
||||
const deltaEvent = asAnthropicChatDelta(rawEvent);
|
||||
if (!deltaEvent) {
|
||||
return { position: -1 };
|
||||
}
|
||||
|
||||
const newEvent = {
|
||||
log_id: params.fallbackId,
|
||||
model: params.fallbackModel,
|
||||
completion: deltaEvent.delta.text,
|
||||
stop_reason: null,
|
||||
};
|
||||
|
||||
return { position: -1, event: newEvent };
|
||||
};
|
||||
|
||||
export function asAnthropicChatDelta(
|
||||
event: ServerSentEvent
|
||||
): AnthropicChatContentBlockDeltaEvent | null {
|
||||
if (
|
||||
!event.type ||
|
||||
!["content_block_start", "content_block_delta"].includes(event.type)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(event.data);
|
||||
if (parsed.type === "content_block_delta") {
|
||||
return parsed;
|
||||
} else if (parsed.type === "content_block_start") {
|
||||
return {
|
||||
type: "content_block_delta",
|
||||
index: parsed.index,
|
||||
delta: { type: "text_delta", text: parsed.content_block?.text ?? "" },
|
||||
};
|
||||
} else {
|
||||
// noinspection ExceptionCaughtLocallyJS
|
||||
throw new Error("Invalid event type");
|
||||
}
|
||||
} catch (error) {
|
||||
log.warn({ error: error.stack, event }, "Received invalid event");
|
||||
}
|
||||
return null;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
import { StreamingCompletionTransformer } from "../index";
|
||||
import { parseEvent } from "../parse-sse";
|
||||
import { logger } from "../../../../../logger";
|
||||
import { asAnthropicChatDelta } from "./anthropic-chat-to-anthropic-v2";
|
||||
|
||||
const log = logger.child({
|
||||
module: "sse-transformer",
|
||||
transformer: "anthropic-chat-to-openai",
|
||||
});
|
||||
|
||||
/**
|
||||
* Transforms an incoming Anthropic Chat SSE to an equivalent OpenAI
|
||||
* chat.completion.chunks SSE.
|
||||
*/
|
||||
export const anthropicChatToOpenAI: StreamingCompletionTransformer = (
|
||||
params
|
||||
) => {
|
||||
const { data } = params;
|
||||
|
||||
const rawEvent = parseEvent(data);
|
||||
if (!rawEvent.data || !rawEvent.type) {
|
||||
return { position: -1 };
|
||||
}
|
||||
|
||||
const deltaEvent = asAnthropicChatDelta(rawEvent);
|
||||
if (!deltaEvent) {
|
||||
return { position: -1 };
|
||||
}
|
||||
|
||||
const newEvent = {
|
||||
id: params.fallbackId,
|
||||
object: "chat.completion.chunk" as const,
|
||||
created: Date.now(),
|
||||
model: params.fallbackModel,
|
||||
choices: [
|
||||
{
|
||||
index: params.index,
|
||||
delta: { content: deltaEvent.delta.text },
|
||||
finish_reason: null,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
return { position: -1, event: newEvent };
|
||||
};
|
|
@ -1,4 +1,7 @@
|
|||
import { StreamingCompletionTransformer } from "../index";
|
||||
import {
|
||||
AnthropicV2StreamEvent,
|
||||
StreamingCompletionTransformer,
|
||||
} from "../index";
|
||||
import { parseEvent, ServerSentEvent } from "../parse-sse";
|
||||
import { logger } from "../../../../../logger";
|
||||
|
||||
|
@ -7,13 +10,6 @@ const log = logger.child({
|
|||
transformer: "anthropic-v2-to-openai",
|
||||
});
|
||||
|
||||
type AnthropicV2StreamEvent = {
|
||||
log_id?: string;
|
||||
model?: string;
|
||||
completion: string;
|
||||
stop_reason: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Transforms an incoming Anthropic SSE (2023-06-01 API) to an equivalent
|
||||
* OpenAI chat.completion.chunk SSE.
|
||||
|
|
|
@ -91,6 +91,7 @@ export type ServiceInfo = {
|
|||
openai2?: string;
|
||||
"openai-image"?: string;
|
||||
anthropic?: string;
|
||||
"anthropic-claude-3"?: string;
|
||||
"google-ai"?: string;
|
||||
"mistral-ai"?: string;
|
||||
aws?: string;
|
||||
|
@ -132,6 +133,7 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
|
|||
},
|
||||
anthropic: {
|
||||
anthropic: `%BASE%/anthropic`,
|
||||
"anthropic-claude-3 (temporary compatibility endpoint)": `%BASE%/anthropic/claude-3`,
|
||||
},
|
||||
"google-ai": {
|
||||
"google-ai": `%BASE%/google-ai`,
|
||||
|
|
|
@ -6,30 +6,64 @@ import {
|
|||
OpenAIChatMessage,
|
||||
OpenAIV1ChatCompletionSchema,
|
||||
} from "./openai";
|
||||
import { logger } from "../../logger";
|
||||
|
||||
const CLAUDE_OUTPUT_MAX = config.maxOutputTokensAnthropic;
|
||||
|
||||
// https://console.anthropic.com/docs/api/reference#-v1-complete
|
||||
export const AnthropicV1CompleteSchema = z
|
||||
const AnthropicV1BaseSchema = z
|
||||
.object({
|
||||
model: z.string().max(100),
|
||||
prompt: z.string({
|
||||
required_error:
|
||||
"No prompt found. Are you sending an OpenAI-formatted request to the Claude endpoint?",
|
||||
}),
|
||||
max_tokens_to_sample: z.coerce
|
||||
.number()
|
||||
.int()
|
||||
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
|
||||
stop_sequences: z.array(z.string().max(500)).optional(),
|
||||
stream: z.boolean().optional().default(false),
|
||||
temperature: z.coerce.number().optional().default(1),
|
||||
top_k: z.coerce.number().optional(),
|
||||
top_p: z.coerce.number().optional(),
|
||||
metadata: z.object({ user_id: z.string().optional() }).optional(),
|
||||
})
|
||||
.strip();
|
||||
|
||||
export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) {
|
||||
// https://docs.anthropic.com/claude/reference/complete_post [deprecated]
|
||||
export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
|
||||
z.object({
|
||||
prompt: z.string(),
|
||||
max_tokens_to_sample: z.coerce
|
||||
.number()
|
||||
.int()
|
||||
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
|
||||
})
|
||||
);
|
||||
|
||||
// https://docs.anthropic.com/claude/reference/messages_post
|
||||
export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
|
||||
z.object({
|
||||
messages: z
|
||||
.array(
|
||||
z.object({
|
||||
role: z.enum(["user", "assistant"]),
|
||||
content: z.union([
|
||||
z.string(),
|
||||
z.array(z.object({ type: z.string().max(100), text: z.string() })),
|
||||
]),
|
||||
})
|
||||
)
|
||||
.min(1)
|
||||
.refine((v) => v[0].role === "user", {
|
||||
message: `First message must be have 'user' role. Use 'system' parameter to start with a system message.`,
|
||||
}),
|
||||
max_tokens: z
|
||||
.number()
|
||||
.int()
|
||||
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
|
||||
system: z.string().optional(),
|
||||
})
|
||||
);
|
||||
export type AnthropicChatMessage = z.infer<
|
||||
typeof AnthropicV1MessagesSchema
|
||||
>["messages"][0];
|
||||
|
||||
export function openAIMessagesToClaudeTextPrompt(
|
||||
messages: OpenAIChatMessage[]
|
||||
) {
|
||||
return (
|
||||
messages
|
||||
.map((m) => {
|
||||
|
@ -51,13 +85,13 @@ export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) {
|
|||
);
|
||||
}
|
||||
|
||||
export function openAIToAnthropic(req: Request) {
|
||||
export function openAIToAnthropicText(req: Request) {
|
||||
const { body } = req;
|
||||
const result = OpenAIV1ChatCompletionSchema.safeParse(body);
|
||||
if (!result.success) {
|
||||
req.log.warn(
|
||||
{ issues: result.error.issues, body },
|
||||
"Invalid OpenAI-to-Anthropic request"
|
||||
"Invalid OpenAI-to-Anthropic Text request"
|
||||
);
|
||||
throw result.error;
|
||||
}
|
||||
|
@ -65,7 +99,7 @@ export function openAIToAnthropic(req: Request) {
|
|||
req.headers["anthropic-version"] = "2023-06-01";
|
||||
|
||||
const { messages, ...rest } = result.data;
|
||||
const prompt = openAIMessagesToClaudePrompt(messages);
|
||||
const prompt = openAIMessagesToClaudeTextPrompt(messages);
|
||||
|
||||
let stops = rest.stop
|
||||
? Array.isArray(rest.stop)
|
||||
|
@ -90,3 +124,105 @@ export function openAIToAnthropic(req: Request) {
|
|||
top_p: rest.top_p,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an older Anthropic Text Completion prompt to the newer Messages API
|
||||
* by splitting the flat text into messages.
|
||||
*/
|
||||
export function anthropicTextToAnthropicChat(req: Request) {
|
||||
const { body } = req;
|
||||
const result = AnthropicV1TextSchema.safeParse(body);
|
||||
if (!result.success) {
|
||||
req.log.warn(
|
||||
{ issues: result.error.issues, body },
|
||||
"Invalid Anthropic Text-to-Anthropic Chat request"
|
||||
);
|
||||
throw result.error;
|
||||
}
|
||||
|
||||
req.headers["anthropic-version"] = "2023-06-01";
|
||||
|
||||
const { model, max_tokens_to_sample, prompt, ...rest } = result.data;
|
||||
validateAnthropicTextPrompt(prompt);
|
||||
|
||||
// Iteratively slice the prompt into messages. Start from the beginning and
|
||||
// look for the next `\n\nHuman:` or `\n\nAssistant:`. Anything before the
|
||||
// first human message is a system message.
|
||||
let index = prompt.indexOf("\n\nHuman:");
|
||||
let remaining = prompt.slice(index);
|
||||
const system = prompt.slice(0, index);
|
||||
const messages: AnthropicChatMessage[] = [];
|
||||
while (remaining) {
|
||||
const isHuman = remaining.startsWith("\n\nHuman:");
|
||||
|
||||
// TODO: Are multiple consecutive human or assistant messages allowed?
|
||||
// Currently we will enforce alternating turns.
|
||||
const thisRole = isHuman ? "\n\nHuman:" : "\n\nAssistant:";
|
||||
const nextRole = isHuman ? "\n\nAssistant:" : "\n\nHuman:";
|
||||
const nextIndex = remaining.indexOf(nextRole);
|
||||
|
||||
// Collect text up to the next message, or the end of the prompt for the
|
||||
// Assistant prefill if present.
|
||||
const msg = remaining
|
||||
.slice(0, nextIndex === -1 ? undefined : nextIndex)
|
||||
.replace(thisRole, "")
|
||||
.trimStart();
|
||||
|
||||
const role = isHuman ? "user" : "assistant";
|
||||
messages.push({ role, content: msg });
|
||||
remaining = remaining.slice(nextIndex);
|
||||
|
||||
if (nextIndex === -1) break;
|
||||
}
|
||||
|
||||
// fix "messages: final assistant content cannot end with trailing whitespace"
|
||||
const lastMessage = messages[messages.length - 1];
|
||||
if (
|
||||
lastMessage.role === "assistant" &&
|
||||
typeof lastMessage.content === "string"
|
||||
) {
|
||||
messages[messages.length - 1].content = lastMessage.content.trimEnd();
|
||||
}
|
||||
|
||||
return {
|
||||
model,
|
||||
system,
|
||||
messages,
|
||||
max_tokens: max_tokens_to_sample,
|
||||
...rest,
|
||||
};
|
||||
}
|
||||
|
||||
function validateAnthropicTextPrompt(prompt: string) {
|
||||
if (!prompt.includes("\n\nHuman:") || !prompt.includes("\n\nAssistant:")) {
|
||||
throw new Error(
|
||||
"Prompt must contain at least one human and one assistant message."
|
||||
);
|
||||
}
|
||||
// First human message must be before first assistant message
|
||||
const firstHuman = prompt.indexOf("\n\nHuman:");
|
||||
const firstAssistant = prompt.indexOf("\n\nAssistant:");
|
||||
if (firstAssistant < firstHuman) {
|
||||
throw new Error(
|
||||
"First Assistant message must come after the first Human message."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function flattenAnthropicMessages(
|
||||
messages: AnthropicChatMessage[]
|
||||
): string {
|
||||
return messages
|
||||
.map((msg) => {
|
||||
const name = msg.role === "user" ? "\n\nHuman: " : "\n\nAssistant: ";
|
||||
const parts = Array.isArray(msg.content)
|
||||
? msg.content
|
||||
: [{ type: "text", text: msg.content }];
|
||||
return `${name}: ${parts
|
||||
.map(({ text, type }) =>
|
||||
type === "text" ? text : `[Unsupported content type: ${type}]`
|
||||
)
|
||||
.join("\n")}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { z } from "zod";
|
||||
import { APIFormat } from "../key-management";
|
||||
import { AnthropicV1CompleteSchema } from "./anthropic";
|
||||
import { AnthropicV1TextSchema, AnthropicV1MessagesSchema } from "./anthropic";
|
||||
import { OpenAIV1ChatCompletionSchema } from "./openai";
|
||||
import { OpenAIV1TextCompletionSchema } from "./openai-text";
|
||||
import { OpenAIV1ImagesGenerationSchema } from "./openai-image";
|
||||
|
@ -8,11 +8,13 @@ import { GoogleAIV1GenerateContentSchema } from "./google-ai";
|
|||
import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";
|
||||
|
||||
export { OpenAIChatMessage } from "./openai";
|
||||
export { AnthropicChatMessage, flattenAnthropicMessages } from "./anthropic";
|
||||
export { GoogleAIChatMessage } from "./google-ai";
|
||||
export { MistralAIChatMessage } from "./mistral-ai";
|
||||
|
||||
export const API_SCHEMA_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
|
||||
anthropic: AnthropicV1CompleteSchema,
|
||||
"anthropic-chat": AnthropicV1MessagesSchema,
|
||||
"anthropic-text": AnthropicV1TextSchema,
|
||||
openai: OpenAIV1ChatCompletionSchema,
|
||||
"openai-text": OpenAIV1TextCompletionSchema,
|
||||
"openai-image": OpenAIV1ImagesGenerationSchema,
|
||||
|
|
|
@ -12,7 +12,9 @@ export type AnthropicModel =
|
|||
| "claude-v1"
|
||||
| "claude-v1-100k"
|
||||
| "claude-2"
|
||||
| "claude-2.1";
|
||||
| "claude-2.1"
|
||||
| "claude-3-opus-20240229"
|
||||
| "claude-3-sonnet-20240229";
|
||||
|
||||
export type AnthropicKeyUpdate = Omit<
|
||||
Partial<AnthropicKey>,
|
||||
|
|
|
@ -9,11 +9,12 @@ import { KeyPool } from "./key-pool";
|
|||
/** The request and response format used by a model's API. */
|
||||
export type APIFormat =
|
||||
| "openai"
|
||||
| "anthropic"
|
||||
| "google-ai"
|
||||
| "mistral-ai"
|
||||
| "openai-text"
|
||||
| "openai-image";
|
||||
| "openai-image"
|
||||
| "anthropic-chat" // Anthropic's newer messages array format
|
||||
| "anthropic-text" // Legacy flat string prompt format
|
||||
| "google-ai"
|
||||
| "mistral-ai";
|
||||
export type Model =
|
||||
| OpenAIModel
|
||||
| AnthropicModel
|
||||
|
|
|
@ -192,7 +192,8 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
|
|||
modelFamily = getAzureOpenAIModelFamily(model);
|
||||
} else {
|
||||
switch (req.outboundApi) {
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
case "anthropic-text":
|
||||
modelFamily = getClaudeModelFamily(model);
|
||||
break;
|
||||
case "openai":
|
||||
|
|
|
@ -84,7 +84,7 @@ export function makeCompletionSSE({
|
|||
model,
|
||||
};
|
||||
break;
|
||||
case "anthropic":
|
||||
case "anthropic-text":
|
||||
event = {
|
||||
completion: content,
|
||||
stop_reason: title,
|
||||
|
@ -106,13 +106,14 @@ export function makeCompletionSSE({
|
|||
},
|
||||
],
|
||||
});
|
||||
case "anthropic-chat":
|
||||
case "openai-image":
|
||||
throw new Error(`SSE not supported for ${format} requests`);
|
||||
default:
|
||||
assertNever(format);
|
||||
}
|
||||
|
||||
if (format === "anthropic") {
|
||||
if (format === "anthropic-text") {
|
||||
return (
|
||||
["event: completion", `data: ${JSON.stringify(event)}`].join("\n") +
|
||||
"\n\n"
|
||||
|
|
|
@ -1,23 +1,27 @@
|
|||
import { getTokenizer } from "@anthropic-ai/tokenizer";
|
||||
import { Tiktoken } from "tiktoken/lite";
|
||||
import { AnthropicChatMessage } from "../api-schemas";
|
||||
|
||||
let encoder: Tiktoken;
|
||||
let userRoleCount = 0;
|
||||
let assistantRoleCount = 0;
|
||||
|
||||
export function init() {
|
||||
// they export a `countTokens` function too but it instantiates a new
|
||||
// tokenizer every single time and it is not fast...
|
||||
encoder = getTokenizer();
|
||||
userRoleCount = encoder.encode("\n\nHuman: ", "all").length;
|
||||
assistantRoleCount = encoder.encode("\n\nAssistant: ", "all").length;
|
||||
return true;
|
||||
}
|
||||
|
||||
export function getTokenCount(prompt: string, _model: string) {
|
||||
// Don't try tokenizing if the prompt is massive to prevent DoS.
|
||||
// 500k characters should be sufficient for all supported models.
|
||||
if (prompt.length > 500000) {
|
||||
return {
|
||||
tokenizer: "length fallback",
|
||||
token_count: 100000,
|
||||
};
|
||||
export function getTokenCount(prompt: string | AnthropicChatMessage[]) {
|
||||
if (typeof prompt !== "string") {
|
||||
return getTokenCountForMessages(prompt);
|
||||
}
|
||||
|
||||
if (prompt.length > 800000) {
|
||||
throw new Error("Content is too large to tokenize.");
|
||||
}
|
||||
|
||||
return {
|
||||
|
@ -25,3 +29,36 @@ export function getTokenCount(prompt: string, _model: string) {
|
|||
token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
|
||||
};
|
||||
}
|
||||
|
||||
function getTokenCountForMessages(messages: AnthropicChatMessage[]) {
|
||||
let numTokens = 0;
|
||||
|
||||
for (const message of messages) {
|
||||
const { content, role } = message;
|
||||
numTokens += role === "user" ? userRoleCount : assistantRoleCount;
|
||||
|
||||
const parts = Array.isArray(content)
|
||||
? content
|
||||
: [{ type: "text", text: content }];
|
||||
|
||||
for (const part of parts) {
|
||||
// We don't allow other content types for now because we can't estimate
|
||||
// cost for them.
|
||||
if (part.type !== "text") {
|
||||
throw new Error(`Unsupported Anthropic content type: ${part.type}`);
|
||||
}
|
||||
|
||||
if (part.text.length > 800000 || numTokens > 200000) {
|
||||
throw new Error("Content is too large to tokenize.");
|
||||
}
|
||||
|
||||
numTokens += encoder.encode(part.text.normalize("NFKC"), "all").length;
|
||||
}
|
||||
}
|
||||
|
||||
if (messages[messages.length - 1].role !== "assistant") {
|
||||
numTokens += assistantRoleCount;
|
||||
}
|
||||
|
||||
return { tokenizer: "@anthropic-ai/tokenizer", token_count: numTokens };
|
||||
}
|
||||
|
|
|
@ -29,13 +29,8 @@ export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
|
|||
}
|
||||
|
||||
function getTextTokenCount(prompt: string) {
|
||||
// Don't try tokenizing if the prompt is massive to prevent DoS.
|
||||
// 500k characters should be sufficient for all supported models.
|
||||
if (prompt.length > 500000) {
|
||||
return {
|
||||
tokenizer: "length fallback",
|
||||
token_count: 100000,
|
||||
};
|
||||
if (prompt.length > 800000) {
|
||||
throw new Error("Content is too large to tokenize.");
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
|
@ -60,16 +60,8 @@ export async function getTokenCount(
|
|||
textContent = value;
|
||||
}
|
||||
|
||||
// Break if we get a huge message or exceed the token limit to prevent
|
||||
// DoS.
|
||||
// 200k tokens allows for future 200k GPT-4 models and 500k characters
|
||||
// is just a sanity check
|
||||
if (textContent.length > 500000 || numTokens > 200000) {
|
||||
numTokens = 200000;
|
||||
return {
|
||||
tokenizer: "tiktoken (prompt length limit exceeded)",
|
||||
token_count: numTokens,
|
||||
};
|
||||
if (textContent.length > 800000 || numTokens > 200000) {
|
||||
throw new Error("Content is too large to tokenize.");
|
||||
}
|
||||
|
||||
numTokens += encoder.encode(textContent).length;
|
||||
|
|
|
@ -16,6 +16,7 @@ import {
|
|||
} from "./mistral";
|
||||
import { APIFormat } from "../key-management";
|
||||
import {
|
||||
AnthropicChatMessage,
|
||||
GoogleAIChatMessage,
|
||||
MistralAIChatMessage,
|
||||
OpenAIChatMessage,
|
||||
|
@ -27,23 +28,60 @@ export async function init() {
|
|||
initMistralAI();
|
||||
}
|
||||
|
||||
/** Tagged union via `service` field of the different types of requests that can
|
||||
* be made to the tokenization service, for both prompts and completions */
|
||||
type OpenAIChatTokenCountRequest = {
|
||||
prompt: OpenAIChatMessage[];
|
||||
completion?: never;
|
||||
service: "openai";
|
||||
};
|
||||
|
||||
type AnthropicChatTokenCountRequest = {
|
||||
prompt: AnthropicChatMessage[];
|
||||
completion?: never;
|
||||
service: "anthropic-chat";
|
||||
};
|
||||
|
||||
type GoogleAIChatTokenCountRequest = {
|
||||
prompt: GoogleAIChatMessage[];
|
||||
completion?: never;
|
||||
service: "google-ai";
|
||||
};
|
||||
|
||||
type MistralAIChatTokenCountRequest = {
|
||||
prompt: MistralAIChatMessage[];
|
||||
completion?: never;
|
||||
service: "mistral-ai";
|
||||
};
|
||||
|
||||
type FlatPromptTokenCountRequest = {
|
||||
prompt: string;
|
||||
completion?: never;
|
||||
service: "openai-text" | "anthropic-text" | "google-ai";
|
||||
};
|
||||
|
||||
type StringCompletionTokenCountRequest = {
|
||||
prompt?: never;
|
||||
completion: string;
|
||||
service: APIFormat;
|
||||
};
|
||||
|
||||
type OpenAIImageCompletionTokenCountRequest = {
|
||||
prompt?: never;
|
||||
completion?: never;
|
||||
service: "openai-image";
|
||||
};
|
||||
|
||||
/**
|
||||
* Tagged union via `service` field of the different types of requests that can
|
||||
* be made to the tokenization service, for both prompts and completions
|
||||
*/
|
||||
type TokenCountRequest = { req: Request } & (
|
||||
| { prompt: OpenAIChatMessage[]; completion?: never; service: "openai" }
|
||||
| {
|
||||
prompt: string;
|
||||
completion?: never;
|
||||
service: "openai-text" | "anthropic" | "google-ai";
|
||||
}
|
||||
| { prompt?: GoogleAIChatMessage[]; completion?: never; service: "google-ai" }
|
||||
| {
|
||||
prompt: MistralAIChatMessage[];
|
||||
completion?: never;
|
||||
service: "mistral-ai";
|
||||
}
|
||||
| { prompt?: never; completion: string; service: APIFormat }
|
||||
| { prompt?: never; completion?: never; service: "openai-image" }
|
||||
| OpenAIChatTokenCountRequest
|
||||
| AnthropicChatTokenCountRequest
|
||||
| GoogleAIChatTokenCountRequest
|
||||
| MistralAIChatTokenCountRequest
|
||||
| FlatPromptTokenCountRequest
|
||||
| StringCompletionTokenCountRequest
|
||||
| OpenAIImageCompletionTokenCountRequest
|
||||
);
|
||||
|
||||
type TokenCountResult = {
|
||||
|
@ -60,9 +98,14 @@ export async function countTokens({
|
|||
}: TokenCountRequest): Promise<TokenCountResult> {
|
||||
const time = process.hrtime();
|
||||
switch (service) {
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
return {
|
||||
...getClaudeTokenCount(prompt ?? completion, req.body.model),
|
||||
...getClaudeTokenCount(prompt ?? completion),
|
||||
tokenization_duration_ms: getElapsedMs(time),
|
||||
};
|
||||
case "anthropic-text":
|
||||
return {
|
||||
...getClaudeTokenCount(prompt ?? completion),
|
||||
tokenization_duration_ms: getElapsedMs(time),
|
||||
};
|
||||
case "openai":
|
||||
|
|
|
@ -400,7 +400,8 @@ function getModelFamilyForQuotaUsage(
|
|||
case "openai-text":
|
||||
case "openai-image":
|
||||
return getOpenAIModelFamily(model);
|
||||
case "anthropic":
|
||||
case "anthropic-chat":
|
||||
case "anthropic-text":
|
||||
return getClaudeModelFamily(model);
|
||||
case "google-ai":
|
||||
return getGoogleAIModelFamily(model);
|
||||
|
|
Loading…
Reference in New Issue