adds token consumption stats to infopage

This commit is contained in:
nai-degen 2023-08-30 20:40:40 -05:00
parent bed275a195
commit 2c0a659b2d
7 changed files with 90 additions and 20 deletions

View File

@ -3,6 +3,7 @@ import { Request, Response } from "express";
import showdown from "showdown";
import { config, listConfig } from "./config";
import {
AnthropicKey,
ModelFamily,
OpenAIKey,
OpenAIModelFamily,
@ -10,6 +11,7 @@ import {
} from "./key-management";
import { getUniqueIps } from "./proxy/rate-limit";
import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
import { logger } from "./logger";
const INFO_PAGE_TTL = 2000;
let infoPageHtml: string | undefined;
@ -18,6 +20,8 @@ let infoPageLastUpdated = 0;
type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
k.service === "openai";
const keyIsAnthropciKey = (k: KeyPoolKey): k is AnthropicKey =>
k.service === "anthropic";
type ModelAggregates = {
active: number;
@ -26,6 +30,7 @@ type ModelAggregates = {
overQuota?: number;
queued: number;
queueTime: string;
tokens: number;
};
type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`;
type ServiceAggregates = {
@ -34,6 +39,8 @@ type ServiceAggregates = {
openaiOrgs?: number;
anthropicKeys?: number;
proompts: number;
tokens: number;
tokenCost: number;
uncheckedKeys?: number;
} & {
[modelFamily in ModelFamily]?: ModelAggregates;
@ -42,6 +49,27 @@ type ServiceAggregates = {
const modelStats = new Map<ModelAggregateKey, number>();
const serviceStats = new Map<keyof ServiceAggregates, number>();
// technically slightly underestimates, because completion tokens cost more
// than prompt tokens but we don't track those separately right now
function getTokenCostUsd(model: ModelFamily, tokens: number) {
let cost = 0;
switch (model) {
case "gpt4-32k":
cost = 0.00006;
break;
case "gpt4":
cost = 0.00003;
break;
case "turbo":
cost = 0.0000015;
break;
case "claude":
cost = 0.00001102;
break;
}
return cost * tokens;
}
export const handleInfoPage = (req: Request, res: Response) => {
if (infoPageLastUpdated + INFO_PAGE_TTL > Date.now()) {
res.send(infoPageHtml);
@ -66,6 +94,8 @@ function cacheInfoPageHtml(baseUrl: string) {
const openaiKeys = serviceStats.get("openaiKeys") || 0;
const anthropicKeys = serviceStats.get("anthropicKeys") || 0;
const proompts = serviceStats.get("proompts") || 0;
const tokens = serviceStats.get("tokens") || 0;
const info = {
uptime: Math.floor(process.uptime()),
@ -73,7 +103,8 @@ function cacheInfoPageHtml(baseUrl: string) {
...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}),
...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}),
},
proompts: keys.reduce((acc, k) => acc + k.promptCount, 0),
proompts,
tookens: `${tokens} ($${(serviceStats.get("tokenCost") || 0).toFixed(2)})`,
...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}),
openaiKeys,
anthropicKeys,
@ -127,13 +158,26 @@ function addKeyToAggregates(k: KeyPoolKey) {
increment(serviceStats, "openaiKeys", k.service === "openai" ? 1 : 0);
increment(serviceStats, "anthropicKeys", k.service === "anthropic" ? 1 : 0);
let sumTokens = 0;
let sumCost = 0;
let family: ModelFamily;
const families = k.modelFamilies.filter((f) =>
config.allowedModelFamilies.includes(f)
);
if (keyIsOpenAIKey(k)) {
// Currently only OpenAI keys are checked
increment(serviceStats, "uncheckedKeys", Boolean(k.lastChecked) ? 0 : 1);
// Technically this would not account for keys that have tokens recorded
// on models they aren't provisioned for, but that would be strange
k.modelFamilies.forEach((f) => {
const tokens = k[`${f}Tokens`];
sumTokens += tokens;
sumCost += getTokenCostUsd(f, tokens);
increment(modelStats, `${f}__tokens`, tokens);
});
if (families.includes("gpt4-32k")) {
family = "gpt4-32k";
} else if (families.includes("gpt4")) {
@ -141,10 +185,18 @@ function addKeyToAggregates(k: KeyPoolKey) {
} else {
family = "turbo";
}
} else {
} else if (keyIsAnthropciKey(k)) {
const tokens = k.claudeTokens;
family = "claude";
sumTokens += tokens;
increment(modelStats, `${family}__tokens`, tokens);
} else {
logger.error({ key: k.hash }, "Unknown key type when adding to aggregates");
return;
}
increment(serviceStats, "tokens", sumTokens);
increment(serviceStats, "tokenCost", sumCost);
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${family}__trial`, k.isTrial ? 1 : 0);
if ("isRevoked" in k) {
@ -158,6 +210,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
function getOpenAIInfo() {
const info: { status?: string; openaiKeys?: number; openaiOrgs?: number } & {
[modelFamily in OpenAIModelFamily]?: {
usage?: string;
activeKeys: number;
trialKeys?: number;
revokedKeys?: number;
@ -185,7 +238,11 @@ function getOpenAIInfo() {
info.openaiOrgs = getUniqueOpenAIOrgs(keys);
families.forEach((f) => {
const tokens = modelStats.get(`${f}__tokens`) || 0;
const cost = getTokenCostUsd(f, tokens);
info[f] = {
usage: `${tokens} tokens ($${cost.toFixed(2)})`,
activeKeys: modelStats.get(`${f}__active`) || 0,
trialKeys: modelStats.get(`${f}__trial`) || 0,
revokedKeys: modelStats.get(`${f}__revoked`) || 0,
@ -203,8 +260,8 @@ function getOpenAIInfo() {
}
families.forEach((f) => {
const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f);
if (info[f]) {
const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f);
info[f]!.proomptersInQueue = proomptersInQueue;
info[f]!.estimatedQueueTime = estimatedQueueTime;
}
@ -220,8 +277,11 @@ function getAnthropicInfo() {
const queue = getQueueInformation("claude");
claudeInfo.queued = queue.proomptersInQueue;
claudeInfo.queueTime = queue.estimatedQueueTime;
const tokens = modelStats.get("claude__tokens") || 0;
const cost = getTokenCostUsd("claude", tokens);
return {
claude: {
usage: `${tokens} tokens ($${cost.toFixed(2)})`,
activeKeys: claudeInfo.active,
proomptersInQueue: claudeInfo.queued,
estimatedQueueTime: claudeInfo.queueTime,

View File

@ -24,7 +24,11 @@ export type AnthropicKeyUpdate = Omit<
| "rateLimitedUntil"
>;
export interface AnthropicKey extends Key {
type AnthropicKeyUsage = {
[K in AnthropicModelFamily as `${K}Tokens`]: number;
};
export interface AnthropicKey extends Key, AnthropicKeyUsage {
readonly service: "anthropic";
readonly modelFamilies: AnthropicModelFamily[];
/** The time at which this key was last rate limited. */
@ -87,6 +91,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
.digest("hex")
.slice(0, 8)}`,
lastChecked: 0,
claudeTokens: 0,
};
this.keys.push(newKey);
}
@ -162,10 +167,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
return false;
}
public incrementPrompt(hash?: string) {
public incrementUsage(hash: string, _model: string, tokens: number) {
const key = this.keys.find((k) => k.hash === hash);
if (!key) return;
key.promptCount++;
key.claudeTokens += tokens;
}
public getLockoutPeriod(_model: AnthropicModel) {

View File

@ -51,7 +51,7 @@ export interface KeyProvider<T extends Key = Key> {
update(hash: string, update: Partial<T>): void;
available(): number;
anyUnchecked(): boolean;
incrementPrompt(hash: string): void;
incrementUsage(hash: string, model: string, tokens: number): void;
getLockoutPeriod(model: Model): number;
markRateLimited(hash: string): void;
recheck(service: AIService): void;

View File

@ -67,9 +67,9 @@ export class KeyPool {
return this.keyProviders.some((provider) => provider.anyUnchecked());
}
public incrementPrompt(key: Key): void {
public incrementUsage(key: Key, model: string, tokens: number): void {
const provider = this.getKeyProvider(key.service);
provider.incrementPrompt(key.hash);
provider.incrementUsage(key.hash, model, tokens);
}
public getLockoutPeriod(model: Model): number {

View File

@ -17,7 +17,13 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
"gpt-4",
] as const;
export interface OpenAIKey extends Key {
// Flattening model families instead of using a nested object for easier
// cloning.
type OpenAIKeyUsage = {
[K in OpenAIModelFamily as `${K}Tokens`]: number;
};
export interface OpenAIKey extends Key, OpenAIKeyUsage {
readonly service: "openai";
modelFamilies: OpenAIModelFamily[];
/**
@ -78,7 +84,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
bareKeys = keyString.split(",").map((k) => k.trim());
bareKeys = [...new Set(bareKeys)];
for (const k of bareKeys) {
const newKey = {
const newKey: OpenAIKey = {
key: k,
service: "openai" as const,
modelFamilies: ["turbo" as const, "gpt4" as const],
@ -86,10 +92,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
isDisabled: false,
isRevoked: false,
isOverQuota: false,
softLimit: 0,
hardLimit: 0,
systemHardLimit: 0,
usage: 0,
lastUsed: 0,
lastChecked: 0,
promptCount: 0,
@ -101,6 +103,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
rateLimitedAt: 0,
rateLimitRequestsReset: 0,
rateLimitTokensReset: 0,
turboTokens: 0,
gpt4Tokens: 0,
"gpt4-32kTokens": 0,
};
this.keys.push(newKey);
}
@ -314,10 +319,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
key.rateLimitedAt = Date.now();
}
public incrementPrompt(keyHash?: string) {
public incrementUsage(keyHash: string, model: string, tokens: number) {
const key = this.keys.find((k) => k.hash === keyHash);
if (!key) return;
key.promptCount++;
key[`${getOpenAIModelFamily(model)}Tokens`] += tokens;
}
public updateRateLimits(keyHash: string, headers: http.IncomingHttpHeaders) {

View File

@ -411,11 +411,11 @@ function handleOpenAIRateLimitError(
const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
if (isCompletionRequest(req)) {
keyPool.incrementPrompt(req.key!);
const model = req.body.model;
const tokensUsed = req.promptTokens! + req.outputTokens!;
keyPool.incrementUsage(req.key!, model, tokensUsed);
if (req.user) {
incrementPromptCount(req.user.token);
const model = req.body.model;
const tokensUsed = req.promptTokens! + req.outputTokens!;
incrementTokenCount(req.user.token, model, tokensUsed);
}
}

View File

@ -60,8 +60,6 @@ function getModelsResponse() {
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x)));
console.log(available);
const models = knownModels
.map((id) => ({
id,