increases tokenizer failsafe to 500000 characters

This commit is contained in:
nai-degen 2023-07-27 15:21:06 -05:00
parent addfa7c57b
commit d29c304d5a
2 changed files with 8 additions and 5 deletions

View File

@ -20,7 +20,9 @@ export function init() {
}
export function getTokenCount(prompt: string, _model: string) {
if (prompt.length > 250000) {
// Don't try tokenizing if the prompt is massive to prevent DoS.
// 500k characters should be sufficient for all supported models.
if (prompt.length > 500000) {
return {
tokenizer: "tiktoken (prompt length limit exceeded)",
token_count: 100000,

View File

@ -28,10 +28,11 @@ export function getTokenCount(messages: any[], model: string) {
for (const key of Object.keys(message)) {
{
const value = message[key];
// Break if we get a huge message or exceed the token limit to prevent DoS
// 100k tokens allows for future 100k GPT-4 models and 250k characters is
// just a sanity check
if (value.length > 250000 || numTokens > 100000) {
// Break if we get a huge message or exceed the token limit to prevent
// DoS.
// 100k tokens allows for future 100k GPT-4 models and 500k characters
// is just a sanity check
if (value.length > 500000 || numTokens > 100000) {
numTokens = 100000;
return {
tokenizer: "tiktoken (prompt length limit exceeded)",