diff --git a/package-lock.json b/package-lock.json index aefbf0c..269d7a2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.0.0", "license": "MIT", "dependencies": { + "@anthropic-ai/tokenizer": "^0.0.4", "axios": "^1.3.5", "cookie-parser": "^1.4.6", "copyfiles": "^2.4.1", @@ -47,6 +48,15 @@ "node": ">=18.0.0" } }, + "node_modules/@anthropic-ai/tokenizer": { + "version": "0.0.4", + "resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz", + "integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==", + "dependencies": { + "@types/node": "^18.11.18", + "tiktoken": "^1.0.10" + } + }, "node_modules/@babel/parser": { "version": "7.22.7", "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.22.7.tgz", diff --git a/package.json b/package.json index 8edee78..5b5136b 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "author": "", "license": "MIT", "dependencies": { + "@anthropic-ai/tokenizer": "^0.0.4", "axios": "^1.3.5", "cookie-parser": "^1.4.6", "copyfiles": "^2.4.1", diff --git a/src/proxy/middleware/request/check-context-size.ts b/src/proxy/middleware/request/check-context-size.ts index 16e5db6..bfdc2fa 100644 --- a/src/proxy/middleware/request/check-context-size.ts +++ b/src/proxy/middleware/request/check-context-size.ts @@ -7,14 +7,6 @@ import { RequestPreprocessor } from "."; const CLAUDE_MAX_CONTEXT = config.maxContextTokensAnthropic; const OPENAI_MAX_CONTEXT = config.maxContextTokensOpenAI; -/** - * Claude models don't throw an error if you exceed the token limit and - * instead just become extremely slow and provide schizo output. To be safe, - * we will only allow 95% of the stated limit, which also accounts for our - * tokenization being slightly different than Anthropic's. - */ -const CLAUDE_TOKEN_LIMIT_ADJUSTMENT = 0.95; - /** * Assigns `req.promptTokens` and `req.outputTokens` based on the request body * and outbound API format, which combined determine the size of the context. @@ -71,11 +63,11 @@ function validateContextSize(req: Request) { } else if (model.match(/gpt-4/)) { modelMax = 8192; } else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?(?:-100k)/)) { - modelMax = 100000 * CLAUDE_TOKEN_LIMIT_ADJUSTMENT; + modelMax = 100000; } else if (model.match(/claude-(?:instant-)?v1(?:\.\d)?$/)) { - modelMax = 9000 * CLAUDE_TOKEN_LIMIT_ADJUSTMENT; + modelMax = 9000; } else if (model.match(/claude-2/)) { - modelMax = 100000 * CLAUDE_TOKEN_LIMIT_ADJUSTMENT; + modelMax = 100000; } else { // Don't really want to throw here because I don't want to have to update // this ASAP every time a new model is released. diff --git a/src/tokenization/claude.ts b/src/tokenization/claude.ts index b37eab6..1eeac93 100644 --- a/src/tokenization/claude.ts +++ b/src/tokenization/claude.ts @@ -1,21 +1,6 @@ -// For now this is just using the GPT vocabulary, even though Claude has a -// different one. Token counts won't be perfect so this just provides -// a rough estimate. -// -// TODO: use huggingface tokenizers instead of openai's tiktoken library since -// that should support the vocabulary file Anthropic provides. - -import { Tiktoken } from "tiktoken/lite"; -import cl100k_base from "tiktoken/encoders/cl100k_base.json"; - -let encoder: Tiktoken; +import { countTokens } from "@anthropic-ai/tokenizer"; export function init() { - encoder = new Tiktoken( - cl100k_base.bpe_ranks, - cl100k_base.special_tokens, - cl100k_base.pat_str - ); return true; } @@ -24,13 +9,13 @@ export function getTokenCount(prompt: string, _model: string) { // 500k characters should be sufficient for all supported models. if (prompt.length > 500000) { return { - tokenizer: "tiktoken (prompt length limit exceeded)", + tokenizer: "length fallback", token_count: 100000, }; } return { - tokenizer: "tiktoken (cl100k_base)", - token_count: encoder.encode(prompt).length, + tokenizer: "@anthropic-ai/tokenizer", + token_count: countTokens(prompt), }; }