exempt 'special' token type from context size limits
This commit is contained in:
parent
e068edcf48
commit
3f9fd25004
|
@ -31,7 +31,10 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
|
||||||
}
|
}
|
||||||
case "anthropic-chat": {
|
case "anthropic-chat": {
|
||||||
req.outputTokens = req.body.max_tokens;
|
req.outputTokens = req.body.max_tokens;
|
||||||
const prompt: AnthropicChatMessage[] = req.body.messages;
|
const prompt = {
|
||||||
|
system: req.body.system ?? "",
|
||||||
|
messages: req.body.messages,
|
||||||
|
};
|
||||||
result = await countTokens({ req, prompt, service });
|
result = await countTokens({ req, prompt, service });
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,11 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
|
||||||
}
|
}
|
||||||
proxyMax ||= Number.MAX_SAFE_INTEGER;
|
proxyMax ||= Number.MAX_SAFE_INTEGER;
|
||||||
|
|
||||||
|
if (req.user?.type === "special") {
|
||||||
|
req.log.debug("Special user, not enforcing proxy context limit.");
|
||||||
|
proxyMax = Number.MAX_SAFE_INTEGER;
|
||||||
|
}
|
||||||
|
|
||||||
let modelMax: number;
|
let modelMax: number;
|
||||||
if (model.match(/gpt-3.5-turbo-16k/)) {
|
if (model.match(/gpt-3.5-turbo-16k/)) {
|
||||||
modelMax = 16384;
|
modelMax = 16384;
|
||||||
|
|
|
@ -370,7 +370,7 @@ async function handleAnthropicBadRequestError(
|
||||||
"Anthropic key has been disabled."
|
"Anthropic key has been disabled."
|
||||||
);
|
);
|
||||||
keyPool.disable(req.key!, "revoked");
|
keyPool.disable(req.key!, "revoked");
|
||||||
errorPayload.proxy_note = `Assigned key has been disabled. ${error?.message}`;
|
errorPayload.proxy_note = `Assigned key has been disabled. (${error?.message})`;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,9 @@ export function init() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getTokenCount(prompt: string | AnthropicChatMessage[]) {
|
export async function getTokenCount(
|
||||||
|
prompt: string | { system: string; messages: AnthropicChatMessage[] }
|
||||||
|
) {
|
||||||
if (typeof prompt !== "string") {
|
if (typeof prompt !== "string") {
|
||||||
return getTokenCountForMessages(prompt);
|
return getTokenCountForMessages(prompt);
|
||||||
}
|
}
|
||||||
|
@ -34,9 +36,17 @@ export async function getTokenCount(prompt: string | AnthropicChatMessage[]) {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getTokenCountForMessages(messages: AnthropicChatMessage[]) {
|
async function getTokenCountForMessages({
|
||||||
|
system,
|
||||||
|
messages,
|
||||||
|
}: {
|
||||||
|
system: string;
|
||||||
|
messages: AnthropicChatMessage[];
|
||||||
|
}) {
|
||||||
let numTokens = 0;
|
let numTokens = 0;
|
||||||
|
|
||||||
|
numTokens += (await getTokenCount(system)).token_count;
|
||||||
|
|
||||||
for (const message of messages) {
|
for (const message of messages) {
|
||||||
const { content, role } = message;
|
const { content, role } = message;
|
||||||
numTokens += role === "user" ? userRoleCount : assistantRoleCount;
|
numTokens += role === "user" ? userRoleCount : assistantRoleCount;
|
||||||
|
|
|
@ -35,7 +35,7 @@ type OpenAIChatTokenCountRequest = {
|
||||||
};
|
};
|
||||||
|
|
||||||
type AnthropicChatTokenCountRequest = {
|
type AnthropicChatTokenCountRequest = {
|
||||||
prompt: AnthropicChatMessage[];
|
prompt: { system: string; messages: AnthropicChatMessage[] };
|
||||||
completion?: never;
|
completion?: never;
|
||||||
service: "anthropic-chat";
|
service: "anthropic-chat";
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue