diff --git a/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts b/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts index a621753..130bedf 100644 --- a/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts +++ b/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts @@ -31,7 +31,10 @@ export const countPromptTokens: RequestPreprocessor = async (req) => { } case "anthropic-chat": { req.outputTokens = req.body.max_tokens; - const prompt: AnthropicChatMessage[] = req.body.messages; + const prompt = { + system: req.body.system ?? "", + messages: req.body.messages, + }; result = await countTokens({ req, prompt, service }); break; } diff --git a/src/proxy/middleware/request/preprocessors/validate-context-size.ts b/src/proxy/middleware/request/preprocessors/validate-context-size.ts index 127e60d..dd62212 100644 --- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts +++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts @@ -46,6 +46,11 @@ export const validateContextSize: RequestPreprocessor = async (req) => { } proxyMax ||= Number.MAX_SAFE_INTEGER; + if (req.user?.type === "special") { + req.log.debug("Special user, not enforcing proxy context limit."); + proxyMax = Number.MAX_SAFE_INTEGER; + } + let modelMax: number; if (model.match(/gpt-3.5-turbo-16k/)) { modelMax = 16384; diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index 83a2a68..667b89e 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -370,7 +370,7 @@ async function handleAnthropicBadRequestError( "Anthropic key has been disabled." ); keyPool.disable(req.key!, "revoked"); - errorPayload.proxy_note = `Assigned key has been disabled. ${error?.message}`; + errorPayload.proxy_note = `Assigned key has been disabled. (${error?.message})`; return; } diff --git a/src/shared/tokenization/claude.ts b/src/shared/tokenization/claude.ts index fa32eff..880db82 100644 --- a/src/shared/tokenization/claude.ts +++ b/src/shared/tokenization/claude.ts @@ -19,7 +19,9 @@ export function init() { return true; } -export async function getTokenCount(prompt: string | AnthropicChatMessage[]) { +export async function getTokenCount( + prompt: string | { system: string; messages: AnthropicChatMessage[] } +) { if (typeof prompt !== "string") { return getTokenCountForMessages(prompt); } @@ -34,9 +36,17 @@ export async function getTokenCount(prompt: string | AnthropicChatMessage[]) { }; } -async function getTokenCountForMessages(messages: AnthropicChatMessage[]) { +async function getTokenCountForMessages({ + system, + messages, +}: { + system: string; + messages: AnthropicChatMessage[]; +}) { let numTokens = 0; + numTokens += (await getTokenCount(system)).token_count; + for (const message of messages) { const { content, role } = message; numTokens += role === "user" ? userRoleCount : assistantRoleCount; diff --git a/src/shared/tokenization/tokenizer.ts b/src/shared/tokenization/tokenizer.ts index 8c86b33..1b03f3d 100644 --- a/src/shared/tokenization/tokenizer.ts +++ b/src/shared/tokenization/tokenizer.ts @@ -35,7 +35,7 @@ type OpenAIChatTokenCountRequest = { }; type AnthropicChatTokenCountRequest = { - prompt: AnthropicChatMessage[]; + prompt: { system: string; messages: AnthropicChatMessage[] }; completion?: never; service: "anthropic-chat"; };