exempt 'special' token type from context size limits
This commit is contained in:
parent
e068edcf48
commit
3f9fd25004
|
@ -31,7 +31,10 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
|
|||
}
|
||||
case "anthropic-chat": {
|
||||
req.outputTokens = req.body.max_tokens;
|
||||
const prompt: AnthropicChatMessage[] = req.body.messages;
|
||||
const prompt = {
|
||||
system: req.body.system ?? "",
|
||||
messages: req.body.messages,
|
||||
};
|
||||
result = await countTokens({ req, prompt, service });
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -46,6 +46,11 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
|||
}
|
||||
proxyMax ||= Number.MAX_SAFE_INTEGER;
|
||||
|
||||
if (req.user?.type === "special") {
|
||||
req.log.debug("Special user, not enforcing proxy context limit.");
|
||||
proxyMax = Number.MAX_SAFE_INTEGER;
|
||||
}
|
||||
|
||||
let modelMax: number;
|
||||
if (model.match(/gpt-3.5-turbo-16k/)) {
|
||||
modelMax = 16384;
|
||||
|
|
|
@ -370,7 +370,7 @@ async function handleAnthropicBadRequestError(
|
|||
"Anthropic key has been disabled."
|
||||
);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned key has been disabled. ${error?.message}`;
|
||||
errorPayload.proxy_note = `Assigned key has been disabled. (${error?.message})`;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,9 @@ export function init() {
|
|||
return true;
|
||||
}
|
||||
|
||||
export async function getTokenCount(prompt: string | AnthropicChatMessage[]) {
|
||||
export async function getTokenCount(
|
||||
prompt: string | { system: string; messages: AnthropicChatMessage[] }
|
||||
) {
|
||||
if (typeof prompt !== "string") {
|
||||
return getTokenCountForMessages(prompt);
|
||||
}
|
||||
|
@ -34,9 +36,17 @@ export async function getTokenCount(prompt: string | AnthropicChatMessage[]) {
|
|||
};
|
||||
}
|
||||
|
||||
async function getTokenCountForMessages(messages: AnthropicChatMessage[]) {
|
||||
async function getTokenCountForMessages({
|
||||
system,
|
||||
messages,
|
||||
}: {
|
||||
system: string;
|
||||
messages: AnthropicChatMessage[];
|
||||
}) {
|
||||
let numTokens = 0;
|
||||
|
||||
numTokens += (await getTokenCount(system)).token_count;
|
||||
|
||||
for (const message of messages) {
|
||||
const { content, role } = message;
|
||||
numTokens += role === "user" ? userRoleCount : assistantRoleCount;
|
||||
|
|
|
@ -35,7 +35,7 @@ type OpenAIChatTokenCountRequest = {
|
|||
};
|
||||
|
||||
type AnthropicChatTokenCountRequest = {
|
||||
prompt: AnthropicChatMessage[];
|
||||
prompt: { system: string; messages: AnthropicChatMessage[] };
|
||||
completion?: never;
|
||||
service: "anthropic-chat";
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue