adds token consumption stats to infopage

This commit is contained in:
nai-degen 2023-08-30 20:40:40 -05:00
parent bed275a195
commit 2c0a659b2d
7 changed files with 90 additions and 20 deletions

View File

@ -3,6 +3,7 @@ import { Request, Response } from "express";
import showdown from "showdown"; import showdown from "showdown";
import { config, listConfig } from "./config"; import { config, listConfig } from "./config";
import { import {
AnthropicKey,
ModelFamily, ModelFamily,
OpenAIKey, OpenAIKey,
OpenAIModelFamily, OpenAIModelFamily,
@ -10,6 +11,7 @@ import {
} from "./key-management"; } from "./key-management";
import { getUniqueIps } from "./proxy/rate-limit"; import { getUniqueIps } from "./proxy/rate-limit";
import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue"; import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
import { logger } from "./logger";
const INFO_PAGE_TTL = 2000; const INFO_PAGE_TTL = 2000;
let infoPageHtml: string | undefined; let infoPageHtml: string | undefined;
@ -18,6 +20,8 @@ let infoPageLastUpdated = 0;
type KeyPoolKey = ReturnType<typeof keyPool.list>[0]; type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey => const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
k.service === "openai"; k.service === "openai";
const keyIsAnthropciKey = (k: KeyPoolKey): k is AnthropicKey =>
k.service === "anthropic";
type ModelAggregates = { type ModelAggregates = {
active: number; active: number;
@ -26,6 +30,7 @@ type ModelAggregates = {
overQuota?: number; overQuota?: number;
queued: number; queued: number;
queueTime: string; queueTime: string;
tokens: number;
}; };
type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`; type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`;
type ServiceAggregates = { type ServiceAggregates = {
@ -34,6 +39,8 @@ type ServiceAggregates = {
openaiOrgs?: number; openaiOrgs?: number;
anthropicKeys?: number; anthropicKeys?: number;
proompts: number; proompts: number;
tokens: number;
tokenCost: number;
uncheckedKeys?: number; uncheckedKeys?: number;
} & { } & {
[modelFamily in ModelFamily]?: ModelAggregates; [modelFamily in ModelFamily]?: ModelAggregates;
@ -42,6 +49,27 @@ type ServiceAggregates = {
const modelStats = new Map<ModelAggregateKey, number>(); const modelStats = new Map<ModelAggregateKey, number>();
const serviceStats = new Map<keyof ServiceAggregates, number>(); const serviceStats = new Map<keyof ServiceAggregates, number>();
// technically slightly underestimates, because completion tokens cost more
// than prompt tokens but we don't track those separately right now
function getTokenCostUsd(model: ModelFamily, tokens: number) {
let cost = 0;
switch (model) {
case "gpt4-32k":
cost = 0.00006;
break;
case "gpt4":
cost = 0.00003;
break;
case "turbo":
cost = 0.0000015;
break;
case "claude":
cost = 0.00001102;
break;
}
return cost * tokens;
}
export const handleInfoPage = (req: Request, res: Response) => { export const handleInfoPage = (req: Request, res: Response) => {
if (infoPageLastUpdated + INFO_PAGE_TTL > Date.now()) { if (infoPageLastUpdated + INFO_PAGE_TTL > Date.now()) {
res.send(infoPageHtml); res.send(infoPageHtml);
@ -66,6 +94,8 @@ function cacheInfoPageHtml(baseUrl: string) {
const openaiKeys = serviceStats.get("openaiKeys") || 0; const openaiKeys = serviceStats.get("openaiKeys") || 0;
const anthropicKeys = serviceStats.get("anthropicKeys") || 0; const anthropicKeys = serviceStats.get("anthropicKeys") || 0;
const proompts = serviceStats.get("proompts") || 0;
const tokens = serviceStats.get("tokens") || 0;
const info = { const info = {
uptime: Math.floor(process.uptime()), uptime: Math.floor(process.uptime()),
@ -73,7 +103,8 @@ function cacheInfoPageHtml(baseUrl: string) {
...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}), ...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}),
...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}), ...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}),
}, },
proompts: keys.reduce((acc, k) => acc + k.promptCount, 0), proompts,
tookens: `${tokens} ($${(serviceStats.get("tokenCost") || 0).toFixed(2)})`,
...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}), ...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}),
openaiKeys, openaiKeys,
anthropicKeys, anthropicKeys,
@ -127,13 +158,26 @@ function addKeyToAggregates(k: KeyPoolKey) {
increment(serviceStats, "openaiKeys", k.service === "openai" ? 1 : 0); increment(serviceStats, "openaiKeys", k.service === "openai" ? 1 : 0);
increment(serviceStats, "anthropicKeys", k.service === "anthropic" ? 1 : 0); increment(serviceStats, "anthropicKeys", k.service === "anthropic" ? 1 : 0);
let sumTokens = 0;
let sumCost = 0;
let family: ModelFamily; let family: ModelFamily;
const families = k.modelFamilies.filter((f) => const families = k.modelFamilies.filter((f) =>
config.allowedModelFamilies.includes(f) config.allowedModelFamilies.includes(f)
); );
if (keyIsOpenAIKey(k)) { if (keyIsOpenAIKey(k)) {
// Currently only OpenAI keys are checked // Currently only OpenAI keys are checked
increment(serviceStats, "uncheckedKeys", Boolean(k.lastChecked) ? 0 : 1); increment(serviceStats, "uncheckedKeys", Boolean(k.lastChecked) ? 0 : 1);
// Technically this would not account for keys that have tokens recorded
// on models they aren't provisioned for, but that would be strange
k.modelFamilies.forEach((f) => {
const tokens = k[`${f}Tokens`];
sumTokens += tokens;
sumCost += getTokenCostUsd(f, tokens);
increment(modelStats, `${f}__tokens`, tokens);
});
if (families.includes("gpt4-32k")) { if (families.includes("gpt4-32k")) {
family = "gpt4-32k"; family = "gpt4-32k";
} else if (families.includes("gpt4")) { } else if (families.includes("gpt4")) {
@ -141,10 +185,18 @@ function addKeyToAggregates(k: KeyPoolKey) {
} else { } else {
family = "turbo"; family = "turbo";
} }
} else { } else if (keyIsAnthropciKey(k)) {
const tokens = k.claudeTokens;
family = "claude"; family = "claude";
sumTokens += tokens;
increment(modelStats, `${family}__tokens`, tokens);
} else {
logger.error({ key: k.hash }, "Unknown key type when adding to aggregates");
return;
} }
increment(serviceStats, "tokens", sumTokens);
increment(serviceStats, "tokenCost", sumCost);
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1); increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${family}__trial`, k.isTrial ? 1 : 0); increment(modelStats, `${family}__trial`, k.isTrial ? 1 : 0);
if ("isRevoked" in k) { if ("isRevoked" in k) {
@ -158,6 +210,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
function getOpenAIInfo() { function getOpenAIInfo() {
const info: { status?: string; openaiKeys?: number; openaiOrgs?: number } & { const info: { status?: string; openaiKeys?: number; openaiOrgs?: number } & {
[modelFamily in OpenAIModelFamily]?: { [modelFamily in OpenAIModelFamily]?: {
usage?: string;
activeKeys: number; activeKeys: number;
trialKeys?: number; trialKeys?: number;
revokedKeys?: number; revokedKeys?: number;
@ -185,7 +238,11 @@ function getOpenAIInfo() {
info.openaiOrgs = getUniqueOpenAIOrgs(keys); info.openaiOrgs = getUniqueOpenAIOrgs(keys);
families.forEach((f) => { families.forEach((f) => {
const tokens = modelStats.get(`${f}__tokens`) || 0;
const cost = getTokenCostUsd(f, tokens);
info[f] = { info[f] = {
usage: `${tokens} tokens ($${cost.toFixed(2)})`,
activeKeys: modelStats.get(`${f}__active`) || 0, activeKeys: modelStats.get(`${f}__active`) || 0,
trialKeys: modelStats.get(`${f}__trial`) || 0, trialKeys: modelStats.get(`${f}__trial`) || 0,
revokedKeys: modelStats.get(`${f}__revoked`) || 0, revokedKeys: modelStats.get(`${f}__revoked`) || 0,
@ -203,8 +260,8 @@ function getOpenAIInfo() {
} }
families.forEach((f) => { families.forEach((f) => {
const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f);
if (info[f]) { if (info[f]) {
const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f);
info[f]!.proomptersInQueue = proomptersInQueue; info[f]!.proomptersInQueue = proomptersInQueue;
info[f]!.estimatedQueueTime = estimatedQueueTime; info[f]!.estimatedQueueTime = estimatedQueueTime;
} }
@ -220,8 +277,11 @@ function getAnthropicInfo() {
const queue = getQueueInformation("claude"); const queue = getQueueInformation("claude");
claudeInfo.queued = queue.proomptersInQueue; claudeInfo.queued = queue.proomptersInQueue;
claudeInfo.queueTime = queue.estimatedQueueTime; claudeInfo.queueTime = queue.estimatedQueueTime;
const tokens = modelStats.get("claude__tokens") || 0;
const cost = getTokenCostUsd("claude", tokens);
return { return {
claude: { claude: {
usage: `${tokens} tokens ($${cost.toFixed(2)})`,
activeKeys: claudeInfo.active, activeKeys: claudeInfo.active,
proomptersInQueue: claudeInfo.queued, proomptersInQueue: claudeInfo.queued,
estimatedQueueTime: claudeInfo.queueTime, estimatedQueueTime: claudeInfo.queueTime,

View File

@ -24,7 +24,11 @@ export type AnthropicKeyUpdate = Omit<
| "rateLimitedUntil" | "rateLimitedUntil"
>; >;
export interface AnthropicKey extends Key { type AnthropicKeyUsage = {
[K in AnthropicModelFamily as `${K}Tokens`]: number;
};
export interface AnthropicKey extends Key, AnthropicKeyUsage {
readonly service: "anthropic"; readonly service: "anthropic";
readonly modelFamilies: AnthropicModelFamily[]; readonly modelFamilies: AnthropicModelFamily[];
/** The time at which this key was last rate limited. */ /** The time at which this key was last rate limited. */
@ -87,6 +91,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
.digest("hex") .digest("hex")
.slice(0, 8)}`, .slice(0, 8)}`,
lastChecked: 0, lastChecked: 0,
claudeTokens: 0,
}; };
this.keys.push(newKey); this.keys.push(newKey);
} }
@ -162,10 +167,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
return false; return false;
} }
public incrementPrompt(hash?: string) { public incrementUsage(hash: string, _model: string, tokens: number) {
const key = this.keys.find((k) => k.hash === hash); const key = this.keys.find((k) => k.hash === hash);
if (!key) return; if (!key) return;
key.promptCount++; key.promptCount++;
key.claudeTokens += tokens;
} }
public getLockoutPeriod(_model: AnthropicModel) { public getLockoutPeriod(_model: AnthropicModel) {

View File

@ -51,7 +51,7 @@ export interface KeyProvider<T extends Key = Key> {
update(hash: string, update: Partial<T>): void; update(hash: string, update: Partial<T>): void;
available(): number; available(): number;
anyUnchecked(): boolean; anyUnchecked(): boolean;
incrementPrompt(hash: string): void; incrementUsage(hash: string, model: string, tokens: number): void;
getLockoutPeriod(model: Model): number; getLockoutPeriod(model: Model): number;
markRateLimited(hash: string): void; markRateLimited(hash: string): void;
recheck(service: AIService): void; recheck(service: AIService): void;

View File

@ -67,9 +67,9 @@ export class KeyPool {
return this.keyProviders.some((provider) => provider.anyUnchecked()); return this.keyProviders.some((provider) => provider.anyUnchecked());
} }
public incrementPrompt(key: Key): void { public incrementUsage(key: Key, model: string, tokens: number): void {
const provider = this.getKeyProvider(key.service); const provider = this.getKeyProvider(key.service);
provider.incrementPrompt(key.hash); provider.incrementUsage(key.hash, model, tokens);
} }
public getLockoutPeriod(model: Model): number { public getLockoutPeriod(model: Model): number {

View File

@ -17,7 +17,13 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
"gpt-4", "gpt-4",
] as const; ] as const;
export interface OpenAIKey extends Key { // Flattening model families instead of using a nested object for easier
// cloning.
type OpenAIKeyUsage = {
[K in OpenAIModelFamily as `${K}Tokens`]: number;
};
export interface OpenAIKey extends Key, OpenAIKeyUsage {
readonly service: "openai"; readonly service: "openai";
modelFamilies: OpenAIModelFamily[]; modelFamilies: OpenAIModelFamily[];
/** /**
@ -78,7 +84,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
bareKeys = keyString.split(",").map((k) => k.trim()); bareKeys = keyString.split(",").map((k) => k.trim());
bareKeys = [...new Set(bareKeys)]; bareKeys = [...new Set(bareKeys)];
for (const k of bareKeys) { for (const k of bareKeys) {
const newKey = { const newKey: OpenAIKey = {
key: k, key: k,
service: "openai" as const, service: "openai" as const,
modelFamilies: ["turbo" as const, "gpt4" as const], modelFamilies: ["turbo" as const, "gpt4" as const],
@ -86,10 +92,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
isDisabled: false, isDisabled: false,
isRevoked: false, isRevoked: false,
isOverQuota: false, isOverQuota: false,
softLimit: 0,
hardLimit: 0,
systemHardLimit: 0,
usage: 0,
lastUsed: 0, lastUsed: 0,
lastChecked: 0, lastChecked: 0,
promptCount: 0, promptCount: 0,
@ -101,6 +103,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
rateLimitedAt: 0, rateLimitedAt: 0,
rateLimitRequestsReset: 0, rateLimitRequestsReset: 0,
rateLimitTokensReset: 0, rateLimitTokensReset: 0,
turboTokens: 0,
gpt4Tokens: 0,
"gpt4-32kTokens": 0,
}; };
this.keys.push(newKey); this.keys.push(newKey);
} }
@ -314,10 +319,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
key.rateLimitedAt = Date.now(); key.rateLimitedAt = Date.now();
} }
public incrementPrompt(keyHash?: string) { public incrementUsage(keyHash: string, model: string, tokens: number) {
const key = this.keys.find((k) => k.hash === keyHash); const key = this.keys.find((k) => k.hash === keyHash);
if (!key) return; if (!key) return;
key.promptCount++; key.promptCount++;
key[`${getOpenAIModelFamily(model)}Tokens`] += tokens;
} }
public updateRateLimits(keyHash: string, headers: http.IncomingHttpHeaders) { public updateRateLimits(keyHash: string, headers: http.IncomingHttpHeaders) {

View File

@ -411,11 +411,11 @@ function handleOpenAIRateLimitError(
const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => { const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
if (isCompletionRequest(req)) { if (isCompletionRequest(req)) {
keyPool.incrementPrompt(req.key!); const model = req.body.model;
const tokensUsed = req.promptTokens! + req.outputTokens!;
keyPool.incrementUsage(req.key!, model, tokensUsed);
if (req.user) { if (req.user) {
incrementPromptCount(req.user.token); incrementPromptCount(req.user.token);
const model = req.body.model;
const tokensUsed = req.promptTokens! + req.outputTokens!;
incrementTokenCount(req.user.token, model, tokensUsed); incrementTokenCount(req.user.token, model, tokensUsed);
} }
} }

View File

@ -60,8 +60,6 @@ function getModelsResponse() {
const allowed = new Set<ModelFamily>(config.allowedModelFamilies); const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x))); available = new Set([...available].filter((x) => allowed.has(x)));
console.log(available);
const models = knownModels const models = knownModels
.map((id) => ({ .map((id) => ({
id, id,