increases tokenizer failsafe to 500000 characters
This commit is contained in:
parent
addfa7c57b
commit
d29c304d5a
|
@ -20,7 +20,9 @@ export function init() {
|
|||
}
|
||||
|
||||
export function getTokenCount(prompt: string, _model: string) {
|
||||
if (prompt.length > 250000) {
|
||||
// Don't try tokenizing if the prompt is massive to prevent DoS.
|
||||
// 500k characters should be sufficient for all supported models.
|
||||
if (prompt.length > 500000) {
|
||||
return {
|
||||
tokenizer: "tiktoken (prompt length limit exceeded)",
|
||||
token_count: 100000,
|
||||
|
|
|
@ -28,10 +28,11 @@ export function getTokenCount(messages: any[], model: string) {
|
|||
for (const key of Object.keys(message)) {
|
||||
{
|
||||
const value = message[key];
|
||||
// Break if we get a huge message or exceed the token limit to prevent DoS
|
||||
// 100k tokens allows for future 100k GPT-4 models and 250k characters is
|
||||
// just a sanity check
|
||||
if (value.length > 250000 || numTokens > 100000) {
|
||||
// Break if we get a huge message or exceed the token limit to prevent
|
||||
// DoS.
|
||||
// 100k tokens allows for future 100k GPT-4 models and 500k characters
|
||||
// is just a sanity check
|
||||
if (value.length > 500000 || numTokens > 100000) {
|
||||
numTokens = 100000;
|
||||
return {
|
||||
tokenizer: "tiktoken (prompt length limit exceeded)",
|
||||
|
|
Loading…
Reference in New Issue