oai-reverse-proxy/src/shared/tokenization/mistral.ts

41 lines
1009 B
TypeScript

import * as tokenizer from "./mistral-tokenizer-js";
import { MistralAIChatMessage } from "../api-support";
export function init() {
tokenizer.initializemistralTokenizer();
return true;
}
export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
if (typeof prompt === "string") {
return getTextTokenCount(prompt);
}
let chunks = [];
for (const message of prompt) {
switch (message.role) {
case "system":
chunks.push(message.content);
break;
case "assistant":
chunks.push(message.content + "</s>");
break;
case "user":
chunks.push("[INST] " + message.content + " [/INST]");
break;
}
}
return getTextTokenCount(chunks.join(" "));
}
function getTextTokenCount(prompt: string) {
if (prompt.length > 800000) {
throw new Error("Content is too large to tokenize.");
}
return {
tokenizer: "mistral-tokenizer-js",
token_count: tokenizer.encode(prompt.normalize("NFKC"))!.length,
};
}