Implement Anthropic Chat Completions endpoint and Claude 3 (khanon/oai-reverse-proxy!64)

This commit is contained in:
khanon 2024-03-04 19:06:46 +00:00
parent b90abbda88
commit db318ec237
34 changed files with 841 additions and 161 deletions

View File

@ -42,6 +42,8 @@ const getModelsResponse = () => {
"claude-2",
"claude-2.0",
"claude-2.1",
"claude-3-opus-20240229",
"claude-3-sonnet-20240229",
];
const models = claudeVariants.map((id) => ({
@ -81,8 +83,16 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
}
if (req.inboundApi === "openai") {
req.log.info("Transforming Anthropic response to OpenAI format");
body = transformAnthropicResponse(body, req);
req.log.info("Transforming Anthropic text to OpenAI format");
body = transformAnthropicTextResponseToOpenAI(body, req);
}
if (
req.inboundApi === "anthropic-text" &&
req.outboundApi === "anthropic-chat"
) {
req.log.info("Transforming Anthropic text to Anthropic chat format");
body = transformAnthropicChatResponseToAnthropicText(body, req);
}
if (req.tokenizerInfo) {
@ -92,13 +102,32 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
res.status(200).json(body);
};
function transformAnthropicChatResponseToAnthropicText(
anthropicBody: Record<string, any>,
req: Request
): Record<string, any> {
return {
type: "completion",
id: "trans-" + anthropicBody.id,
completion: anthropicBody.content
.map((part: { type: string; text: string }) =>
part.type === "text" ? part.text : ""
)
.join(""),
stop_reason: anthropicBody.stop_reason,
stop: anthropicBody.stop_sequence,
model: anthropicBody.model,
usage: anthropicBody.usage,
};
}
/**
* Transforms a model response from the Anthropic API to match those from the
* OpenAI API, for users using Claude via the OpenAI-compatible endpoint. This
* is only used for non-streaming requests as streaming requests are handled
* on-the-fly.
*/
function transformAnthropicResponse(
function transformAnthropicTextResponseToOpenAI(
anthropicBody: Record<string, any>,
req: Request
): Record<string, any> {
@ -139,36 +168,96 @@ const anthropicProxy = createQueueMiddleware({
proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
error: handleProxyError,
},
pathRewrite: {
// Send OpenAI-compat requests to the real Anthropic endpoint.
"^/v1/chat/completions": "/v1/complete",
// Abusing pathFilter to rewrite the paths dynamically.
pathFilter: (pathname, req) => {
const isText = req.outboundApi === "anthropic-text";
const isChat = req.outboundApi === "anthropic-chat";
if (isChat && pathname === "/v1/complete") {
req.url = "/v1/messages";
}
if (isText && pathname === "/v1/chat/completions") {
req.url = "/v1/complete";
}
if (isChat && pathname === "/v1/claude-3/complete") {
req.url = "/v1/messages";
}
return true;
},
}),
});
const nativeTextPreprocessor = createPreprocessorMiddleware({
inApi: "anthropic-text",
outApi: "anthropic-text",
service: "anthropic",
});
const textToChatPreprocessor = createPreprocessorMiddleware({
inApi: "anthropic-text",
outApi: "anthropic-chat",
service: "anthropic",
});
/**
* Routes text completion prompts to anthropic-chat if they need translation
* (claude-3 based models do not support the old text completion endpoint).
*/
const claudeTextCompletionRouter: RequestHandler = (req, res, next) => {
if (req.body.model?.startsWith("claude-3")) {
textToChatPreprocessor(req, res, next);
} else {
nativeTextPreprocessor(req, res, next);
}
};
const anthropicRouter = Router();
anthropicRouter.get("/v1/models", handleModelRequest);
// Native Anthropic chat completion endpoint.
// Anthropic text completion endpoint. Dynamic routing based on model.
anthropicRouter.post(
"/v1/complete",
ipLimiter,
claudeTextCompletionRouter,
anthropicProxy
);
// Native Anthropic chat completion endpoint.
anthropicRouter.post(
"/v1/messages",
ipLimiter,
createPreprocessorMiddleware({
inApi: "anthropic",
outApi: "anthropic",
inApi: "anthropic-chat",
outApi: "anthropic-chat",
service: "anthropic",
}),
anthropicProxy
);
// OpenAI-to-Anthropic compatibility endpoint.
// OpenAI-to-Anthropic Text compatibility endpoint.
anthropicRouter.post(
"/v1/chat/completions",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "openai", outApi: "anthropic", service: "anthropic" },
{ inApi: "openai", outApi: "anthropic-text", service: "anthropic" },
{ afterTransform: [maybeReassignModel] }
),
anthropicProxy
);
// Temporary force Anthropic Text to Anthropic Chat for frontends which do not
// yet support the new model. Forces claude-3. Will be removed once common
// frontends have been updated.
anthropicRouter.post(
"/v1/claude-3/complete",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "anthropic-text", outApi: "anthropic-chat", service: "anthropic" },
{
beforeTransform: [
(req) => {
req.body.model = "claude-3-sonnet-20240229";
},
],
}
),
anthropicProxy
);
function maybeReassignModel(req: Request) {
const model = req.body.model;

View File

@ -32,6 +32,7 @@ const getModelsResponse = () => {
const variants = [
"anthropic.claude-v2",
"anthropic.claude-v2:1",
"anthropic.claude-3-sonnet-20240229-v1:0"
];
const models = variants.map((id) => ({
@ -145,7 +146,7 @@ awsRouter.post(
"/v1/complete",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "anthropic", outApi: "anthropic", service: "aws" },
{ inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
{ afterTransform: [maybeReassignModel] }
),
awsProxy
@ -155,7 +156,7 @@ awsRouter.post(
"/v1/chat/completions",
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "openai", outApi: "anthropic", service: "aws" },
{ inApi: "openai", outApi: "anthropic-text", service: "aws" },
{ afterTransform: [maybeReassignModel] }
),
awsProxy

View File

@ -11,6 +11,8 @@ const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
const ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT = "/v1/claude-3/complete";
export function isTextGenerationRequest(req: Request) {
return (
@ -19,6 +21,8 @@ export function isTextGenerationRequest(req: Request) {
OPENAI_CHAT_COMPLETION_ENDPOINT,
OPENAI_TEXT_COMPLETION_ENDPOINT,
ANTHROPIC_COMPLETION_ENDPOINT,
ANTHROPIC_MESSAGES_ENDPOINT,
ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT,
].some((endpoint) => req.path.startsWith(endpoint))
);
}
@ -91,6 +95,7 @@ export const classifyErrorAndSend = (
});
} catch (error) {
req.log.error(error, `Error writing error response headers, giving up.`);
res.end();
}
};
@ -199,11 +204,24 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
return body.choices[0].message.content || "";
case "openai-text":
return body.choices[0].text;
case "anthropic":
case "anthropic-chat":
if (!body.content) {
req.log.error(
{ body: JSON.stringify(body) },
"Received empty Anthropic chat completion"
);
return "";
}
return body.content
.map(({ text, type }: { type: string; text: string }) =>
type === "text" ? text : `[Unsupported content type: ${type}]`
)
.join("\n");
case "anthropic-text":
if (!body.completion) {
req.log.error(
{ body: JSON.stringify(body) },
"Received empty Anthropic completion"
"Received empty Anthropic text completion"
);
return "";
}
@ -229,7 +247,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
return body.model;
case "openai-image":
return req.body.model;
case "anthropic":
case "anthropic-chat":
case "anthropic-text":
// Anthropic confirms the model in the response, but AWS Claude doesn't.
return body.model || req.body.model;
case "google-ai":

View File

@ -28,7 +28,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
switch (req.outboundApi) {
// If we are translating between API formats we may need to select a model
// for the user, because the provided model is for the inbound API.
case "anthropic":
case "anthropic-chat":
case "anthropic-text":
assignedKey = keyPool.get("claude-v1", req.service);
break;
case "openai-text":
@ -71,6 +72,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
if (key.organizationId) {
proxyReq.setHeader("OpenAI-Organization", key.organizationId);
}
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
break;
case "mistral-ai":
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
break;

View File

@ -8,6 +8,10 @@ export const finalizeBody: HPMRequestCallback = (proxyReq, req) => {
if (req.outboundApi === "openai-image") {
delete req.body.stream;
}
// For anthropic text to chat requests, remove undefined prompt.
if (req.outboundApi === "anthropic-chat") {
delete req.body.prompt;
}
const updatedBody = JSON.stringify(req.body);
proxyReq.setHeader("Content-Length", Buffer.byteLength(updatedBody));

View File

@ -114,7 +114,7 @@ const handleTestMessage: RequestHandler = (req, res) => {
if (method !== "POST") {
return;
}
if (isTestMessage(body)) {
req.log.info({ body }, "Received test message. Skipping API call.");
res.json({
@ -145,6 +145,9 @@ function isTestMessage(body: any) {
messages[0].content === "Hi"
);
} else {
return prompt?.trim() === "Human: Hi\n\nAssistant:";
return (
prompt?.trim() === "Human: Hi\n\nAssistant:" ||
prompt?.startsWith("Hi\n\n")
);
}
}

View File

@ -2,6 +2,7 @@ import { RequestPreprocessor } from "../index";
import { countTokens } from "../../../../shared/tokenization";
import { assertNever } from "../../../../shared/utils";
import {
AnthropicChatMessage,
GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
@ -28,7 +29,13 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
result = await countTokens({ req, prompt, service });
break;
}
case "anthropic": {
case "anthropic-chat": {
req.outputTokens = req.body.max_tokens;
const prompt: AnthropicChatMessage[] = req.body.messages;
result = await countTokens({ req, prompt, service });
break;
}
case "anthropic-text": {
req.outputTokens = req.body.max_tokens_to_sample;
const prompt: string = req.body.prompt;
result = await countTokens({ req, prompt, service });

View File

@ -6,6 +6,7 @@ import { UserInputError } from "../../../../shared/errors";
import {
MistralAIChatMessage,
OpenAIChatMessage,
flattenAnthropicMessages,
} from "../../../../shared/api-schemas";
const rejectedClients = new Map<string, number>();
@ -53,7 +54,9 @@ function getPromptFromRequest(req: Request) {
const service = req.outboundApi;
const body = req.body;
switch (service) {
case "anthropic":
case "anthropic-chat":
return flattenAnthropicMessages(body.messages);
case "anthropic-text":
return body.prompt;
case "openai":
case "mistral-ai":

View File

@ -2,7 +2,10 @@ import express from "express";
import { Sha256 } from "@aws-crypto/sha256-js";
import { SignatureV4 } from "@smithy/signature-v4";
import { HttpRequest } from "@smithy/protocol-http";
import { AnthropicV1CompleteSchema } from "../../../../shared/api-schemas/anthropic";
import {
AnthropicV1TextSchema,
AnthropicV1MessagesSchema,
} from "../../../../shared/api-schemas/anthropic";
import { keyPool } from "../../../../shared/key-management";
import { RequestPreprocessor } from "../index";
@ -22,19 +25,37 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
req.body.prompt = preamble + req.body.prompt;
// AWS supports only a subset of Anthropic's parameters and is more strict
// about unknown parameters.
// AWS uses mostly the same parameters as Anthropic, with a few removed params
// and much stricter validation on unused parameters. Rather than treating it
// as a separate schema we will use the anthropic ones and strip the unused
// parameters.
// TODO: This should happen in transform-outbound-payload.ts
const strippedParams = AnthropicV1CompleteSchema.pick({
prompt: true,
max_tokens_to_sample: true,
stop_sequences: true,
temperature: true,
top_k: true,
top_p: true,
})
.strip()
.parse(req.body);
let strippedParams: Record<string, unknown>;
if (req.inboundApi === "anthropic-chat") {
strippedParams = AnthropicV1MessagesSchema
.pick({
messages: true,
max_tokens: true,
stop_sequences: true,
temperature: true,
top_k: true,
top_p: true,
})
.strip()
.parse(req.body);
} else {
strippedParams = AnthropicV1TextSchema
.pick({
prompt: true,
max_tokens_to_sample: true,
stop_sequences: true,
temperature: true,
top_k: true,
top_p: true,
})
.strip()
.parse(req.body);
}
const credential = getCredentialParts(req);
const host = AMZ_HOST.replace("%REGION%", credential.region);

View File

@ -1,4 +1,7 @@
import { openAIToAnthropic } from "../../../../shared/api-schemas/anthropic";
import {
anthropicTextToAnthropicChat,
openAIToAnthropicText,
} from "../../../../shared/api-schemas/anthropic";
import { openAIToOpenAIText } from "../../../../shared/api-schemas/openai-text";
import { openAIToOpenAIImage } from "../../../../shared/api-schemas/openai-image";
import { openAIToGoogleAI } from "../../../../shared/api-schemas/google-ai";
@ -41,8 +44,16 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
return;
}
if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
req.body = openAIToAnthropic(req);
if (
req.inboundApi === "anthropic-text" &&
req.outboundApi === "anthropic-chat"
) {
req.body = anthropicTextToAnthropicChat(req);
return;
}
if (req.inboundApi === "openai" && req.outboundApi === "anthropic-text") {
req.body = openAIToAnthropicText(req);
return;
}

View File

@ -29,7 +29,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
case "openai-text":
proxyMax = OPENAI_MAX_CONTEXT;
break;
case "anthropic":
case "anthropic-chat":
case "anthropic-text":
proxyMax = CLAUDE_MAX_CONTEXT;
break;
case "google-ai":

View File

@ -10,9 +10,11 @@ import {
import { ProxyResHandlerWithBody } from ".";
import { assertNever } from "../../../shared/utils";
import {
AnthropicChatMessage, flattenAnthropicMessages,
MistralAIChatMessage,
OpenAIChatMessage,
} from "../../../shared/api-schemas";
import { APIFormat } from "../../../shared/key-management";
/** If prompt logging is enabled, enqueues the prompt for logging. */
export const logPrompt: ProxyResHandlerWithBody = async (
@ -33,7 +35,7 @@ export const logPrompt: ProxyResHandlerWithBody = async (
if (!loggable) return;
const promptPayload = getPromptForRequest(req, responseBody);
const promptFlattened = flattenMessages(promptPayload);
const promptFlattened = flattenMessages(promptPayload, req.outboundApi);
const response = getCompletionFromBody(req, responseBody);
const model = getModelFromBody(req, responseBody);
@ -57,13 +59,19 @@ type OaiImageResult = {
const getPromptForRequest = (
req: Request,
responseBody: Record<string, any>
): string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult => {
):
| string
| OpenAIChatMessage[]
| AnthropicChatMessage[]
| MistralAIChatMessage[]
| OaiImageResult => {
// Since the prompt logger only runs after the request has been proxied, we
// can assume the body has already been transformed to the target API's
// format.
switch (req.outboundApi) {
case "openai":
case "mistral-ai":
case "anthropic-chat":
return req.body.messages;
case "openai-text":
return req.body.prompt;
@ -75,7 +83,7 @@ const getPromptForRequest = (
quality: req.body.quality,
revisedPrompt: responseBody.data[0].revised_prompt,
};
case "anthropic":
case "anthropic-text":
return req.body.prompt;
case "google-ai":
return req.body.prompt.text;
@ -85,11 +93,20 @@ const getPromptForRequest = (
};
const flattenMessages = (
val: string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult
val:
| string
| OpenAIChatMessage[]
| MistralAIChatMessage[]
| OaiImageResult
| AnthropicChatMessage[],
format: APIFormat,
): string => {
if (typeof val === "string") {
return val.trim();
}
if (format === "anthropic-chat") {
return flattenAnthropicMessages(val as AnthropicChatMessage[]);
}
if (Array.isArray(val)) {
return val
.map(({ content, role }) => {

View File

@ -0,0 +1,49 @@
import { OpenAIChatCompletionStreamEvent } from "../index";
export type AnthropicChatCompletionResponse = {
id: string;
type: "message";
role: "assistant";
content: { type: "text"; text: string }[];
model: string;
stop_reason: string | null;
stop_sequence: string | null;
usage: { input_tokens: number; output_tokens: number };
};
/**
* Given a list of OpenAI chat completion events, compiles them into a single
* finalized Anthropic chat completion response so that non-streaming middleware
* can operate on it as if it were a blocking response.
*/
export function mergeEventsForAnthropicChat(
events: OpenAIChatCompletionStreamEvent[]
): AnthropicChatCompletionResponse {
let merged: AnthropicChatCompletionResponse = {
id: "",
type: "message",
role: "assistant",
content: [],
model: "",
stop_reason: null,
stop_sequence: null,
usage: { input_tokens: 0, output_tokens: 0 },
};
merged = events.reduce((acc, event, i) => {
// The first event will only contain role assignment and response metadata
if (i === 0) {
acc.id = event.id;
acc.model = event.model;
acc.content = [{ type: "text", text: "" }];
return acc;
}
acc.stop_reason = event.choices[0].finish_reason ?? "";
if (event.choices[0].delta.content) {
acc.content[0].text += event.choices[0].delta.content;
}
return acc;
}, merged);
return merged;
}

View File

@ -1,6 +1,6 @@
import { OpenAIChatCompletionStreamEvent } from "../index";
export type AnthropicCompletionResponse = {
export type AnthropicTextCompletionResponse = {
completion: string;
stop_reason: string;
truncated: boolean;
@ -15,10 +15,10 @@ export type AnthropicCompletionResponse = {
* finalized Anthropic completion response so that non-streaming middleware
* can operate on it as if it were a blocking response.
*/
export function mergeEventsForAnthropic(
export function mergeEventsForAnthropicText(
events: OpenAIChatCompletionStreamEvent[]
): AnthropicCompletionResponse {
let merged: AnthropicCompletionResponse = {
): AnthropicTextCompletionResponse {
let merged: AnthropicTextCompletionResponse = {
log_id: "",
exception: null,
model: "",

View File

@ -1,9 +1,12 @@
import { APIFormat } from "../../../../shared/key-management";
import { assertNever } from "../../../../shared/utils";
import {
mergeEventsForAnthropic,
anthropicV2ToOpenAI,
mergeEventsForAnthropicChat,
mergeEventsForAnthropicText,
mergeEventsForOpenAIChat,
mergeEventsForOpenAIText,
AnthropicV2StreamEvent,
OpenAIChatCompletionStreamEvent,
} from "./index";
@ -20,8 +23,30 @@ export class EventAggregator {
this.format = format;
}
addEvent(event: OpenAIChatCompletionStreamEvent) {
this.events.push(event);
addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
if (eventIsOpenAIEvent(event)) {
this.events.push(event);
} else {
// horrible special case. previously all transformers' target format was
// openai, so the event aggregator could conveniently assume all incoming
// events were in openai format.
// now we have added anthropic-chat-to-text, so aggregator needs to know
// how to collapse events from two formats.
// because that is annoying, we will simply transform anthropic events to
// openai (even if the client didn't ask for openai) so we don't have to
// write aggregation logic for anthropic chat (which is also a troublesome
// stateful format).
const openAIEvent = anthropicV2ToOpenAI({
data: `event: completion\ndata: ${JSON.stringify(event)}\n\n`,
lastPosition: -1,
index: 0,
fallbackId: event.log_id || "event-aggregator-fallback",
fallbackModel: event.model || "claude-3-fallback",
});
if (openAIEvent.event) {
this.events.push(openAIEvent.event);
}
}
}
getFinalResponse() {
@ -32,8 +57,10 @@ export class EventAggregator {
return mergeEventsForOpenAIChat(this.events);
case "openai-text":
return mergeEventsForOpenAIText(this.events);
case "anthropic":
return mergeEventsForAnthropic(this.events);
case "anthropic-text":
return mergeEventsForAnthropicText(this.events);
case "anthropic-chat":
return mergeEventsForAnthropicChat(this.events);
case "openai-image":
throw new Error(`SSE aggregation not supported for ${this.format}`);
default:
@ -41,3 +68,9 @@ export class EventAggregator {
}
}
}
function eventIsOpenAIEvent(
event: any
): event is OpenAIChatCompletionStreamEvent {
return event?.object === "chat.completion.chunk";
}

View File

@ -1,9 +1,17 @@
export type SSEResponseTransformArgs = {
export type SSEResponseTransformArgs<S = Record<string, any>> = {
data: string;
lastPosition: number;
index: number;
fallbackId: string;
fallbackModel: string;
state?: S;
};
export type AnthropicV2StreamEvent = {
log_id?: string;
model?: string;
completion: string;
stop_reason: string | null;
};
export type OpenAIChatCompletionStreamEvent = {
@ -16,17 +24,24 @@ export type OpenAIChatCompletionStreamEvent = {
delta: { role?: string; content?: string };
finish_reason: string | null;
}[];
}
};
export type StreamingCompletionTransformer = (
params: SSEResponseTransformArgs
) => { position: number; event?: OpenAIChatCompletionStreamEvent };
export type StreamingCompletionTransformer<
T = OpenAIChatCompletionStreamEvent,
S = any,
> = (params: SSEResponseTransformArgs<S>) => {
position: number;
event?: T;
state?: S;
};
export { openAITextToOpenAIChat } from "./transformers/openai-text-to-openai";
export { anthropicV1ToOpenAI } from "./transformers/anthropic-v1-to-openai";
export { anthropicV2ToOpenAI } from "./transformers/anthropic-v2-to-openai";
export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-anthropic-v2";
export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
export { mergeEventsForAnthropic } from "./aggregators/anthropic";
export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
export { mergeEventsForAnthropicChat } from "./aggregators/anthropic-chat";

View File

@ -3,27 +3,27 @@ export type ServerSentEvent = { id?: string; type?: string; data: string };
/** Given a string of SSE data, parse it into a `ServerSentEvent` object. */
export function parseEvent(event: string) {
const buffer: ServerSentEvent = { data: "" };
return event.split(/\r?\n/).reduce(parseLine, buffer)
return event.split(/\r?\n/).reduce(parseLine, buffer);
}
function parseLine(event: ServerSentEvent, line: string) {
const separator = line.indexOf(":");
const field = separator === -1 ? line : line.slice(0,separator);
const field = separator === -1 ? line : line.slice(0, separator);
const value = separator === -1 ? "" : line.slice(separator + 1);
switch (field) {
case 'id':
event.id = value.trim()
break
case 'event':
event.type = value.trim()
break
case 'data':
event.data += value.trimStart()
break
case "id":
event.id = value.trim();
break;
case "event":
event.type = value.trim();
break;
case "data":
event.data += value.trimStart();
break;
default:
break
break;
}
return event
}
return event;
}

View File

@ -3,11 +3,13 @@ import { logger } from "../../../../logger";
import { APIFormat } from "../../../../shared/key-management";
import { assertNever } from "../../../../shared/utils";
import {
anthropicChatToAnthropicV2,
anthropicV1ToOpenAI,
AnthropicV2StreamEvent,
anthropicV2ToOpenAI,
googleAIToOpenAI,
OpenAIChatCompletionStreamEvent,
openAITextToOpenAIChat,
googleAIToOpenAI,
passthroughToOpenAI,
StreamingCompletionTransformer,
} from "./index";
@ -26,9 +28,13 @@ type SSEMessageTransformerOptions = TransformOptions & {
*/
export class SSEMessageTransformer extends Transform {
private lastPosition: number;
private transformState: any;
private msgCount: number;
private readonly inputFormat: APIFormat;
private readonly transformFn: StreamingCompletionTransformer;
private readonly transformFn: StreamingCompletionTransformer<
// TODO: Refactor transformers to not assume only OpenAI events as output
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
>;
private readonly log;
private readonly fallbackId: string;
private readonly fallbackModel: string;
@ -58,15 +64,20 @@ export class SSEMessageTransformer extends Transform {
_transform(chunk: Buffer, _encoding: BufferEncoding, callback: Function) {
try {
const originalMessage = chunk.toString();
const { event: transformedMessage, position: newPosition } =
this.transformFn({
data: originalMessage,
lastPosition: this.lastPosition,
index: this.msgCount++,
fallbackId: this.fallbackId,
fallbackModel: this.fallbackModel,
});
const {
event: transformedMessage,
position: newPosition,
state,
} = this.transformFn({
data: originalMessage,
lastPosition: this.lastPosition,
index: this.msgCount++,
fallbackId: this.fallbackId,
fallbackModel: this.fallbackModel,
state: this.transformState,
});
this.lastPosition = newPosition;
this.transformState = state;
// Special case for Azure OpenAI, which is 99% the same as OpenAI but
// sometimes emits an extra event at the beginning of the stream with the
@ -84,7 +95,7 @@ export class SSEMessageTransformer extends Transform {
// Some events may not be transformed, e.g. ping events
if (!transformedMessage) return callback();
if (this.msgCount === 1) {
if (this.msgCount === 1 && eventIsOpenAIEvent(transformedMessage)) {
// TODO: does this need to be skipped for passthroughToOpenAI?
this.push(createInitialMessage(transformedMessage));
}
@ -98,20 +109,30 @@ export class SSEMessageTransformer extends Transform {
}
}
function eventIsOpenAIEvent(
event: any
): event is OpenAIChatCompletionStreamEvent {
return event?.object === "chat.completion.chunk";
}
function getTransformer(
responseApi: APIFormat,
version?: string
): StreamingCompletionTransformer {
): StreamingCompletionTransformer<
OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
> {
switch (responseApi) {
case "openai":
case "mistral-ai":
return passthroughToOpenAI;
case "openai-text":
return openAITextToOpenAIChat;
case "anthropic":
case "anthropic-text":
return version === "2023-01-01"
? anthropicV1ToOpenAI
: anthropicV2ToOpenAI;
case "anthropic-chat":
return anthropicChatToAnthropicV2;
case "google-ai":
return googleAIToOpenAI;
case "openai-image":

View File

@ -67,7 +67,7 @@ export class SSEStreamAdapter extends Transform {
default:
this.log.error({ message, type }, "Received bad AWS stream event");
return makeCompletionSSE({
format: "anthropic",
format: "anthropic-text",
title: "Proxy stream error",
message:
"The proxy received an unrecognized error from AWS while streaming.",

View File

@ -0,0 +1,129 @@
import {
AnthropicV2StreamEvent,
StreamingCompletionTransformer,
} from "../index";
import { parseEvent, ServerSentEvent } from "../parse-sse";
import { logger } from "../../../../../logger";
const log = logger.child({
module: "sse-transformer",
transformer: "anthropic-chat-to-anthropic-v2",
});
export type AnthropicChatEventType =
| "message_start"
| "content_block_start"
| "content_block_delta"
| "content_block_stop"
| "message_delta"
| "message_stop";
type AnthropicChatStartEvent = {
type: "message_start";
message: {
id: string;
type: "message";
role: "assistant";
content: [];
model: string;
stop_reason: null;
stop_sequence: null;
usage: { input_tokens: number; output_tokens: number };
};
};
type AnthropicChatContentBlockStartEvent = {
type: "content_block_start";
index: number;
content_block: { type: "text"; text: string };
};
export type AnthropicChatContentBlockDeltaEvent = {
type: "content_block_delta";
index: number;
delta: { type: "text_delta"; text: string };
};
type AnthropicChatContentBlockStopEvent = {
type: "content_block_stop";
index: number;
};
type AnthropicChatMessageDeltaEvent = {
type: "message_delta";
delta: {
stop_reason: string;
stop_sequence: null;
usage: { output_tokens: number };
};
};
type AnthropicChatMessageStopEvent = {
type: "message_stop";
};
type AnthropicChatTransformerState = { content: string };
/**
* Transforms an incoming Anthropic Chat SSE to an equivalent Anthropic V2
* Text SSE.
* For now we assume there is only one content block and message delta. In the
* future Anthropic may add multi-turn responses or multiple content blocks
* (probably for multimodal responses, image generation, etc) but as far as I
* can tell this is not yet implemented.
*/
export const anthropicChatToAnthropicV2: StreamingCompletionTransformer<
AnthropicV2StreamEvent,
AnthropicChatTransformerState
> = (params) => {
const { data } = params;
const rawEvent = parseEvent(data);
if (!rawEvent.data || !rawEvent.type) {
return { position: -1 };
}
const deltaEvent = asAnthropicChatDelta(rawEvent);
if (!deltaEvent) {
return { position: -1 };
}
const newEvent = {
log_id: params.fallbackId,
model: params.fallbackModel,
completion: deltaEvent.delta.text,
stop_reason: null,
};
return { position: -1, event: newEvent };
};
export function asAnthropicChatDelta(
event: ServerSentEvent
): AnthropicChatContentBlockDeltaEvent | null {
if (
!event.type ||
!["content_block_start", "content_block_delta"].includes(event.type)
) {
return null;
}
try {
const parsed = JSON.parse(event.data);
if (parsed.type === "content_block_delta") {
return parsed;
} else if (parsed.type === "content_block_start") {
return {
type: "content_block_delta",
index: parsed.index,
delta: { type: "text_delta", text: parsed.content_block?.text ?? "" },
};
} else {
// noinspection ExceptionCaughtLocallyJS
throw new Error("Invalid event type");
}
} catch (error) {
log.warn({ error: error.stack, event }, "Received invalid event");
}
return null;
}

View File

@ -0,0 +1,45 @@
import { StreamingCompletionTransformer } from "../index";
import { parseEvent } from "../parse-sse";
import { logger } from "../../../../../logger";
import { asAnthropicChatDelta } from "./anthropic-chat-to-anthropic-v2";
const log = logger.child({
module: "sse-transformer",
transformer: "anthropic-chat-to-openai",
});
/**
* Transforms an incoming Anthropic Chat SSE to an equivalent OpenAI
* chat.completion.chunks SSE.
*/
export const anthropicChatToOpenAI: StreamingCompletionTransformer = (
params
) => {
const { data } = params;
const rawEvent = parseEvent(data);
if (!rawEvent.data || !rawEvent.type) {
return { position: -1 };
}
const deltaEvent = asAnthropicChatDelta(rawEvent);
if (!deltaEvent) {
return { position: -1 };
}
const newEvent = {
id: params.fallbackId,
object: "chat.completion.chunk" as const,
created: Date.now(),
model: params.fallbackModel,
choices: [
{
index: params.index,
delta: { content: deltaEvent.delta.text },
finish_reason: null,
},
],
};
return { position: -1, event: newEvent };
};

View File

@ -1,4 +1,7 @@
import { StreamingCompletionTransformer } from "../index";
import {
AnthropicV2StreamEvent,
StreamingCompletionTransformer,
} from "../index";
import { parseEvent, ServerSentEvent } from "../parse-sse";
import { logger } from "../../../../../logger";
@ -7,13 +10,6 @@ const log = logger.child({
transformer: "anthropic-v2-to-openai",
});
type AnthropicV2StreamEvent = {
log_id?: string;
model?: string;
completion: string;
stop_reason: string;
};
/**
* Transforms an incoming Anthropic SSE (2023-06-01 API) to an equivalent
* OpenAI chat.completion.chunk SSE.

View File

@ -91,6 +91,7 @@ export type ServiceInfo = {
openai2?: string;
"openai-image"?: string;
anthropic?: string;
"anthropic-claude-3"?: string;
"google-ai"?: string;
"mistral-ai"?: string;
aws?: string;
@ -132,6 +133,7 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
},
anthropic: {
anthropic: `%BASE%/anthropic`,
"anthropic-claude-3 (temporary compatibility endpoint)": `%BASE%/anthropic/claude-3`,
},
"google-ai": {
"google-ai": `%BASE%/google-ai`,

View File

@ -6,30 +6,64 @@ import {
OpenAIChatMessage,
OpenAIV1ChatCompletionSchema,
} from "./openai";
import { logger } from "../../logger";
const CLAUDE_OUTPUT_MAX = config.maxOutputTokensAnthropic;
// https://console.anthropic.com/docs/api/reference#-v1-complete
export const AnthropicV1CompleteSchema = z
const AnthropicV1BaseSchema = z
.object({
model: z.string().max(100),
prompt: z.string({
required_error:
"No prompt found. Are you sending an OpenAI-formatted request to the Claude endpoint?",
}),
max_tokens_to_sample: z.coerce
.number()
.int()
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
stop_sequences: z.array(z.string().max(500)).optional(),
stream: z.boolean().optional().default(false),
temperature: z.coerce.number().optional().default(1),
top_k: z.coerce.number().optional(),
top_p: z.coerce.number().optional(),
metadata: z.object({ user_id: z.string().optional() }).optional(),
})
.strip();
export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) {
// https://docs.anthropic.com/claude/reference/complete_post [deprecated]
export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
z.object({
prompt: z.string(),
max_tokens_to_sample: z.coerce
.number()
.int()
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
})
);
// https://docs.anthropic.com/claude/reference/messages_post
export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
z.object({
messages: z
.array(
z.object({
role: z.enum(["user", "assistant"]),
content: z.union([
z.string(),
z.array(z.object({ type: z.string().max(100), text: z.string() })),
]),
})
)
.min(1)
.refine((v) => v[0].role === "user", {
message: `First message must be have 'user' role. Use 'system' parameter to start with a system message.`,
}),
max_tokens: z
.number()
.int()
.transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
system: z.string().optional(),
})
);
export type AnthropicChatMessage = z.infer<
typeof AnthropicV1MessagesSchema
>["messages"][0];
export function openAIMessagesToClaudeTextPrompt(
messages: OpenAIChatMessage[]
) {
return (
messages
.map((m) => {
@ -51,13 +85,13 @@ export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) {
);
}
export function openAIToAnthropic(req: Request) {
export function openAIToAnthropicText(req: Request) {
const { body } = req;
const result = OpenAIV1ChatCompletionSchema.safeParse(body);
if (!result.success) {
req.log.warn(
{ issues: result.error.issues, body },
"Invalid OpenAI-to-Anthropic request"
"Invalid OpenAI-to-Anthropic Text request"
);
throw result.error;
}
@ -65,7 +99,7 @@ export function openAIToAnthropic(req: Request) {
req.headers["anthropic-version"] = "2023-06-01";
const { messages, ...rest } = result.data;
const prompt = openAIMessagesToClaudePrompt(messages);
const prompt = openAIMessagesToClaudeTextPrompt(messages);
let stops = rest.stop
? Array.isArray(rest.stop)
@ -90,3 +124,105 @@ export function openAIToAnthropic(req: Request) {
top_p: rest.top_p,
};
}
/**
* Converts an older Anthropic Text Completion prompt to the newer Messages API
* by splitting the flat text into messages.
*/
export function anthropicTextToAnthropicChat(req: Request) {
const { body } = req;
const result = AnthropicV1TextSchema.safeParse(body);
if (!result.success) {
req.log.warn(
{ issues: result.error.issues, body },
"Invalid Anthropic Text-to-Anthropic Chat request"
);
throw result.error;
}
req.headers["anthropic-version"] = "2023-06-01";
const { model, max_tokens_to_sample, prompt, ...rest } = result.data;
validateAnthropicTextPrompt(prompt);
// Iteratively slice the prompt into messages. Start from the beginning and
// look for the next `\n\nHuman:` or `\n\nAssistant:`. Anything before the
// first human message is a system message.
let index = prompt.indexOf("\n\nHuman:");
let remaining = prompt.slice(index);
const system = prompt.slice(0, index);
const messages: AnthropicChatMessage[] = [];
while (remaining) {
const isHuman = remaining.startsWith("\n\nHuman:");
// TODO: Are multiple consecutive human or assistant messages allowed?
// Currently we will enforce alternating turns.
const thisRole = isHuman ? "\n\nHuman:" : "\n\nAssistant:";
const nextRole = isHuman ? "\n\nAssistant:" : "\n\nHuman:";
const nextIndex = remaining.indexOf(nextRole);
// Collect text up to the next message, or the end of the prompt for the
// Assistant prefill if present.
const msg = remaining
.slice(0, nextIndex === -1 ? undefined : nextIndex)
.replace(thisRole, "")
.trimStart();
const role = isHuman ? "user" : "assistant";
messages.push({ role, content: msg });
remaining = remaining.slice(nextIndex);
if (nextIndex === -1) break;
}
// fix "messages: final assistant content cannot end with trailing whitespace"
const lastMessage = messages[messages.length - 1];
if (
lastMessage.role === "assistant" &&
typeof lastMessage.content === "string"
) {
messages[messages.length - 1].content = lastMessage.content.trimEnd();
}
return {
model,
system,
messages,
max_tokens: max_tokens_to_sample,
...rest,
};
}
function validateAnthropicTextPrompt(prompt: string) {
if (!prompt.includes("\n\nHuman:") || !prompt.includes("\n\nAssistant:")) {
throw new Error(
"Prompt must contain at least one human and one assistant message."
);
}
// First human message must be before first assistant message
const firstHuman = prompt.indexOf("\n\nHuman:");
const firstAssistant = prompt.indexOf("\n\nAssistant:");
if (firstAssistant < firstHuman) {
throw new Error(
"First Assistant message must come after the first Human message."
);
}
}
export function flattenAnthropicMessages(
messages: AnthropicChatMessage[]
): string {
return messages
.map((msg) => {
const name = msg.role === "user" ? "\n\nHuman: " : "\n\nAssistant: ";
const parts = Array.isArray(msg.content)
? msg.content
: [{ type: "text", text: msg.content }];
return `${name}: ${parts
.map(({ text, type }) =>
type === "text" ? text : `[Unsupported content type: ${type}]`
)
.join("\n")}`;
})
.join("\n\n");
}

View File

@ -1,6 +1,6 @@
import { z } from "zod";
import { APIFormat } from "../key-management";
import { AnthropicV1CompleteSchema } from "./anthropic";
import { AnthropicV1TextSchema, AnthropicV1MessagesSchema } from "./anthropic";
import { OpenAIV1ChatCompletionSchema } from "./openai";
import { OpenAIV1TextCompletionSchema } from "./openai-text";
import { OpenAIV1ImagesGenerationSchema } from "./openai-image";
@ -8,11 +8,13 @@ import { GoogleAIV1GenerateContentSchema } from "./google-ai";
import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";
export { OpenAIChatMessage } from "./openai";
export { AnthropicChatMessage, flattenAnthropicMessages } from "./anthropic";
export { GoogleAIChatMessage } from "./google-ai";
export { MistralAIChatMessage } from "./mistral-ai";
export const API_SCHEMA_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
anthropic: AnthropicV1CompleteSchema,
"anthropic-chat": AnthropicV1MessagesSchema,
"anthropic-text": AnthropicV1TextSchema,
openai: OpenAIV1ChatCompletionSchema,
"openai-text": OpenAIV1TextCompletionSchema,
"openai-image": OpenAIV1ImagesGenerationSchema,

View File

@ -12,7 +12,9 @@ export type AnthropicModel =
| "claude-v1"
| "claude-v1-100k"
| "claude-2"
| "claude-2.1";
| "claude-2.1"
| "claude-3-opus-20240229"
| "claude-3-sonnet-20240229";
export type AnthropicKeyUpdate = Omit<
Partial<AnthropicKey>,

View File

@ -9,11 +9,12 @@ import { KeyPool } from "./key-pool";
/** The request and response format used by a model's API. */
export type APIFormat =
| "openai"
| "anthropic"
| "google-ai"
| "mistral-ai"
| "openai-text"
| "openai-image";
| "openai-image"
| "anthropic-chat" // Anthropic's newer messages array format
| "anthropic-text" // Legacy flat string prompt format
| "google-ai"
| "mistral-ai";
export type Model =
| OpenAIModel
| AnthropicModel

View File

@ -192,7 +192,8 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
modelFamily = getAzureOpenAIModelFamily(model);
} else {
switch (req.outboundApi) {
case "anthropic":
case "anthropic-chat":
case "anthropic-text":
modelFamily = getClaudeModelFamily(model);
break;
case "openai":

View File

@ -84,7 +84,7 @@ export function makeCompletionSSE({
model,
};
break;
case "anthropic":
case "anthropic-text":
event = {
completion: content,
stop_reason: title,
@ -106,13 +106,14 @@ export function makeCompletionSSE({
},
],
});
case "anthropic-chat":
case "openai-image":
throw new Error(`SSE not supported for ${format} requests`);
default:
assertNever(format);
}
if (format === "anthropic") {
if (format === "anthropic-text") {
return (
["event: completion", `data: ${JSON.stringify(event)}`].join("\n") +
"\n\n"

View File

@ -1,23 +1,27 @@
import { getTokenizer } from "@anthropic-ai/tokenizer";
import { Tiktoken } from "tiktoken/lite";
import { AnthropicChatMessage } from "../api-schemas";
let encoder: Tiktoken;
let userRoleCount = 0;
let assistantRoleCount = 0;
export function init() {
// they export a `countTokens` function too but it instantiates a new
// tokenizer every single time and it is not fast...
encoder = getTokenizer();
userRoleCount = encoder.encode("\n\nHuman: ", "all").length;
assistantRoleCount = encoder.encode("\n\nAssistant: ", "all").length;
return true;
}
export function getTokenCount(prompt: string, _model: string) {
// Don't try tokenizing if the prompt is massive to prevent DoS.
// 500k characters should be sufficient for all supported models.
if (prompt.length > 500000) {
return {
tokenizer: "length fallback",
token_count: 100000,
};
export function getTokenCount(prompt: string | AnthropicChatMessage[]) {
if (typeof prompt !== "string") {
return getTokenCountForMessages(prompt);
}
if (prompt.length > 800000) {
throw new Error("Content is too large to tokenize.");
}
return {
@ -25,3 +29,36 @@ export function getTokenCount(prompt: string, _model: string) {
token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
};
}
function getTokenCountForMessages(messages: AnthropicChatMessage[]) {
let numTokens = 0;
for (const message of messages) {
const { content, role } = message;
numTokens += role === "user" ? userRoleCount : assistantRoleCount;
const parts = Array.isArray(content)
? content
: [{ type: "text", text: content }];
for (const part of parts) {
// We don't allow other content types for now because we can't estimate
// cost for them.
if (part.type !== "text") {
throw new Error(`Unsupported Anthropic content type: ${part.type}`);
}
if (part.text.length > 800000 || numTokens > 200000) {
throw new Error("Content is too large to tokenize.");
}
numTokens += encoder.encode(part.text.normalize("NFKC"), "all").length;
}
}
if (messages[messages.length - 1].role !== "assistant") {
numTokens += assistantRoleCount;
}
return { tokenizer: "@anthropic-ai/tokenizer", token_count: numTokens };
}

View File

@ -29,13 +29,8 @@ export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
}
function getTextTokenCount(prompt: string) {
// Don't try tokenizing if the prompt is massive to prevent DoS.
// 500k characters should be sufficient for all supported models.
if (prompt.length > 500000) {
return {
tokenizer: "length fallback",
token_count: 100000,
};
if (prompt.length > 800000) {
throw new Error("Content is too large to tokenize.");
}
return {

View File

@ -60,16 +60,8 @@ export async function getTokenCount(
textContent = value;
}
// Break if we get a huge message or exceed the token limit to prevent
// DoS.
// 200k tokens allows for future 200k GPT-4 models and 500k characters
// is just a sanity check
if (textContent.length > 500000 || numTokens > 200000) {
numTokens = 200000;
return {
tokenizer: "tiktoken (prompt length limit exceeded)",
token_count: numTokens,
};
if (textContent.length > 800000 || numTokens > 200000) {
throw new Error("Content is too large to tokenize.");
}
numTokens += encoder.encode(textContent).length;

View File

@ -16,6 +16,7 @@ import {
} from "./mistral";
import { APIFormat } from "../key-management";
import {
AnthropicChatMessage,
GoogleAIChatMessage,
MistralAIChatMessage,
OpenAIChatMessage,
@ -27,23 +28,60 @@ export async function init() {
initMistralAI();
}
/** Tagged union via `service` field of the different types of requests that can
* be made to the tokenization service, for both prompts and completions */
type OpenAIChatTokenCountRequest = {
prompt: OpenAIChatMessage[];
completion?: never;
service: "openai";
};
type AnthropicChatTokenCountRequest = {
prompt: AnthropicChatMessage[];
completion?: never;
service: "anthropic-chat";
};
type GoogleAIChatTokenCountRequest = {
prompt: GoogleAIChatMessage[];
completion?: never;
service: "google-ai";
};
type MistralAIChatTokenCountRequest = {
prompt: MistralAIChatMessage[];
completion?: never;
service: "mistral-ai";
};
type FlatPromptTokenCountRequest = {
prompt: string;
completion?: never;
service: "openai-text" | "anthropic-text" | "google-ai";
};
type StringCompletionTokenCountRequest = {
prompt?: never;
completion: string;
service: APIFormat;
};
type OpenAIImageCompletionTokenCountRequest = {
prompt?: never;
completion?: never;
service: "openai-image";
};
/**
* Tagged union via `service` field of the different types of requests that can
* be made to the tokenization service, for both prompts and completions
*/
type TokenCountRequest = { req: Request } & (
| { prompt: OpenAIChatMessage[]; completion?: never; service: "openai" }
| {
prompt: string;
completion?: never;
service: "openai-text" | "anthropic" | "google-ai";
}
| { prompt?: GoogleAIChatMessage[]; completion?: never; service: "google-ai" }
| {
prompt: MistralAIChatMessage[];
completion?: never;
service: "mistral-ai";
}
| { prompt?: never; completion: string; service: APIFormat }
| { prompt?: never; completion?: never; service: "openai-image" }
| OpenAIChatTokenCountRequest
| AnthropicChatTokenCountRequest
| GoogleAIChatTokenCountRequest
| MistralAIChatTokenCountRequest
| FlatPromptTokenCountRequest
| StringCompletionTokenCountRequest
| OpenAIImageCompletionTokenCountRequest
);
type TokenCountResult = {
@ -60,9 +98,14 @@ export async function countTokens({
}: TokenCountRequest): Promise<TokenCountResult> {
const time = process.hrtime();
switch (service) {
case "anthropic":
case "anthropic-chat":
return {
...getClaudeTokenCount(prompt ?? completion, req.body.model),
...getClaudeTokenCount(prompt ?? completion),
tokenization_duration_ms: getElapsedMs(time),
};
case "anthropic-text":
return {
...getClaudeTokenCount(prompt ?? completion),
tokenization_duration_ms: getElapsedMs(time),
};
case "openai":

View File

@ -400,7 +400,8 @@ function getModelFamilyForQuotaUsage(
case "openai-text":
case "openai-image":
return getOpenAIModelFamily(model);
case "anthropic":
case "anthropic-chat":
case "anthropic-text":
return getClaudeModelFamily(model);
case "google-ai":
return getGoogleAIModelFamily(model);