diff --git a/src/info-page.ts b/src/info-page.ts index 2ad8db0..3d60b34 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -3,6 +3,7 @@ import { Request, Response } from "express"; import showdown from "showdown"; import { config, listConfig } from "./config"; import { + AnthropicKey, ModelFamily, OpenAIKey, OpenAIModelFamily, @@ -10,6 +11,7 @@ import { } from "./key-management"; import { getUniqueIps } from "./proxy/rate-limit"; import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue"; +import { logger } from "./logger"; const INFO_PAGE_TTL = 2000; let infoPageHtml: string | undefined; @@ -18,6 +20,8 @@ let infoPageLastUpdated = 0; type KeyPoolKey = ReturnType[0]; const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey => k.service === "openai"; +const keyIsAnthropciKey = (k: KeyPoolKey): k is AnthropicKey => + k.service === "anthropic"; type ModelAggregates = { active: number; @@ -26,6 +30,7 @@ type ModelAggregates = { overQuota?: number; queued: number; queueTime: string; + tokens: number; }; type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`; type ServiceAggregates = { @@ -34,6 +39,8 @@ type ServiceAggregates = { openaiOrgs?: number; anthropicKeys?: number; proompts: number; + tokens: number; + tokenCost: number; uncheckedKeys?: number; } & { [modelFamily in ModelFamily]?: ModelAggregates; @@ -42,6 +49,27 @@ type ServiceAggregates = { const modelStats = new Map(); const serviceStats = new Map(); +// technically slightly underestimates, because completion tokens cost more +// than prompt tokens but we don't track those separately right now +function getTokenCostUsd(model: ModelFamily, tokens: number) { + let cost = 0; + switch (model) { + case "gpt4-32k": + cost = 0.00006; + break; + case "gpt4": + cost = 0.00003; + break; + case "turbo": + cost = 0.0000015; + break; + case "claude": + cost = 0.00001102; + break; + } + return cost * tokens; +} + export const handleInfoPage = (req: Request, res: Response) => { if (infoPageLastUpdated + INFO_PAGE_TTL > Date.now()) { res.send(infoPageHtml); @@ -66,6 +94,8 @@ function cacheInfoPageHtml(baseUrl: string) { const openaiKeys = serviceStats.get("openaiKeys") || 0; const anthropicKeys = serviceStats.get("anthropicKeys") || 0; + const proompts = serviceStats.get("proompts") || 0; + const tokens = serviceStats.get("tokens") || 0; const info = { uptime: Math.floor(process.uptime()), @@ -73,7 +103,8 @@ function cacheInfoPageHtml(baseUrl: string) { ...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}), ...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}), }, - proompts: keys.reduce((acc, k) => acc + k.promptCount, 0), + proompts, + tookens: `${tokens} ($${(serviceStats.get("tokenCost") || 0).toFixed(2)})`, ...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}), openaiKeys, anthropicKeys, @@ -127,13 +158,26 @@ function addKeyToAggregates(k: KeyPoolKey) { increment(serviceStats, "openaiKeys", k.service === "openai" ? 1 : 0); increment(serviceStats, "anthropicKeys", k.service === "anthropic" ? 1 : 0); + let sumTokens = 0; + let sumCost = 0; let family: ModelFamily; const families = k.modelFamilies.filter((f) => config.allowedModelFamilies.includes(f) ); + if (keyIsOpenAIKey(k)) { // Currently only OpenAI keys are checked increment(serviceStats, "uncheckedKeys", Boolean(k.lastChecked) ? 0 : 1); + + // Technically this would not account for keys that have tokens recorded + // on models they aren't provisioned for, but that would be strange + k.modelFamilies.forEach((f) => { + const tokens = k[`${f}Tokens`]; + sumTokens += tokens; + sumCost += getTokenCostUsd(f, tokens); + increment(modelStats, `${f}__tokens`, tokens); + }); + if (families.includes("gpt4-32k")) { family = "gpt4-32k"; } else if (families.includes("gpt4")) { @@ -141,10 +185,18 @@ function addKeyToAggregates(k: KeyPoolKey) { } else { family = "turbo"; } - } else { + } else if (keyIsAnthropciKey(k)) { + const tokens = k.claudeTokens; family = "claude"; + sumTokens += tokens; + increment(modelStats, `${family}__tokens`, tokens); + } else { + logger.error({ key: k.hash }, "Unknown key type when adding to aggregates"); + return; } + increment(serviceStats, "tokens", sumTokens); + increment(serviceStats, "tokenCost", sumCost); increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1); increment(modelStats, `${family}__trial`, k.isTrial ? 1 : 0); if ("isRevoked" in k) { @@ -158,6 +210,7 @@ function addKeyToAggregates(k: KeyPoolKey) { function getOpenAIInfo() { const info: { status?: string; openaiKeys?: number; openaiOrgs?: number } & { [modelFamily in OpenAIModelFamily]?: { + usage?: string; activeKeys: number; trialKeys?: number; revokedKeys?: number; @@ -185,7 +238,11 @@ function getOpenAIInfo() { info.openaiOrgs = getUniqueOpenAIOrgs(keys); families.forEach((f) => { + const tokens = modelStats.get(`${f}__tokens`) || 0; + const cost = getTokenCostUsd(f, tokens); + info[f] = { + usage: `${tokens} tokens ($${cost.toFixed(2)})`, activeKeys: modelStats.get(`${f}__active`) || 0, trialKeys: modelStats.get(`${f}__trial`) || 0, revokedKeys: modelStats.get(`${f}__revoked`) || 0, @@ -203,8 +260,8 @@ function getOpenAIInfo() { } families.forEach((f) => { - const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f); if (info[f]) { + const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f); info[f]!.proomptersInQueue = proomptersInQueue; info[f]!.estimatedQueueTime = estimatedQueueTime; } @@ -220,8 +277,11 @@ function getAnthropicInfo() { const queue = getQueueInformation("claude"); claudeInfo.queued = queue.proomptersInQueue; claudeInfo.queueTime = queue.estimatedQueueTime; + const tokens = modelStats.get("claude__tokens") || 0; + const cost = getTokenCostUsd("claude", tokens); return { claude: { + usage: `${tokens} tokens ($${cost.toFixed(2)})`, activeKeys: claudeInfo.active, proomptersInQueue: claudeInfo.queued, estimatedQueueTime: claudeInfo.queueTime, diff --git a/src/key-management/anthropic/provider.ts b/src/key-management/anthropic/provider.ts index a9f723f..8cd146b 100644 --- a/src/key-management/anthropic/provider.ts +++ b/src/key-management/anthropic/provider.ts @@ -24,7 +24,11 @@ export type AnthropicKeyUpdate = Omit< | "rateLimitedUntil" >; -export interface AnthropicKey extends Key { +type AnthropicKeyUsage = { + [K in AnthropicModelFamily as `${K}Tokens`]: number; +}; + +export interface AnthropicKey extends Key, AnthropicKeyUsage { readonly service: "anthropic"; readonly modelFamilies: AnthropicModelFamily[]; /** The time at which this key was last rate limited. */ @@ -87,6 +91,7 @@ export class AnthropicKeyProvider implements KeyProvider { .digest("hex") .slice(0, 8)}`, lastChecked: 0, + claudeTokens: 0, }; this.keys.push(newKey); } @@ -162,10 +167,11 @@ export class AnthropicKeyProvider implements KeyProvider { return false; } - public incrementPrompt(hash?: string) { + public incrementUsage(hash: string, _model: string, tokens: number) { const key = this.keys.find((k) => k.hash === hash); if (!key) return; key.promptCount++; + key.claudeTokens += tokens; } public getLockoutPeriod(_model: AnthropicModel) { diff --git a/src/key-management/index.ts b/src/key-management/index.ts index d23dce2..eb0dc46 100644 --- a/src/key-management/index.ts +++ b/src/key-management/index.ts @@ -51,7 +51,7 @@ export interface KeyProvider { update(hash: string, update: Partial): void; available(): number; anyUnchecked(): boolean; - incrementPrompt(hash: string): void; + incrementUsage(hash: string, model: string, tokens: number): void; getLockoutPeriod(model: Model): number; markRateLimited(hash: string): void; recheck(service: AIService): void; diff --git a/src/key-management/key-pool.ts b/src/key-management/key-pool.ts index e15a638..abe57ab 100644 --- a/src/key-management/key-pool.ts +++ b/src/key-management/key-pool.ts @@ -67,9 +67,9 @@ export class KeyPool { return this.keyProviders.some((provider) => provider.anyUnchecked()); } - public incrementPrompt(key: Key): void { + public incrementUsage(key: Key, model: string, tokens: number): void { const provider = this.getKeyProvider(key.service); - provider.incrementPrompt(key.hash); + provider.incrementUsage(key.hash, model, tokens); } public getLockoutPeriod(model: Model): number { diff --git a/src/key-management/openai/provider.ts b/src/key-management/openai/provider.ts index ad0a8de..9a8b5b1 100644 --- a/src/key-management/openai/provider.ts +++ b/src/key-management/openai/provider.ts @@ -17,7 +17,13 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [ "gpt-4", ] as const; -export interface OpenAIKey extends Key { +// Flattening model families instead of using a nested object for easier +// cloning. +type OpenAIKeyUsage = { + [K in OpenAIModelFamily as `${K}Tokens`]: number; +}; + +export interface OpenAIKey extends Key, OpenAIKeyUsage { readonly service: "openai"; modelFamilies: OpenAIModelFamily[]; /** @@ -78,7 +84,7 @@ export class OpenAIKeyProvider implements KeyProvider { bareKeys = keyString.split(",").map((k) => k.trim()); bareKeys = [...new Set(bareKeys)]; for (const k of bareKeys) { - const newKey = { + const newKey: OpenAIKey = { key: k, service: "openai" as const, modelFamilies: ["turbo" as const, "gpt4" as const], @@ -86,10 +92,6 @@ export class OpenAIKeyProvider implements KeyProvider { isDisabled: false, isRevoked: false, isOverQuota: false, - softLimit: 0, - hardLimit: 0, - systemHardLimit: 0, - usage: 0, lastUsed: 0, lastChecked: 0, promptCount: 0, @@ -101,6 +103,9 @@ export class OpenAIKeyProvider implements KeyProvider { rateLimitedAt: 0, rateLimitRequestsReset: 0, rateLimitTokensReset: 0, + turboTokens: 0, + gpt4Tokens: 0, + "gpt4-32kTokens": 0, }; this.keys.push(newKey); } @@ -314,10 +319,11 @@ export class OpenAIKeyProvider implements KeyProvider { key.rateLimitedAt = Date.now(); } - public incrementPrompt(keyHash?: string) { + public incrementUsage(keyHash: string, model: string, tokens: number) { const key = this.keys.find((k) => k.hash === keyHash); if (!key) return; key.promptCount++; + key[`${getOpenAIModelFamily(model)}Tokens`] += tokens; } public updateRateLimits(keyHash: string, headers: http.IncomingHttpHeaders) { diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index fe6ca81..e398481 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -411,11 +411,11 @@ function handleOpenAIRateLimitError( const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => { if (isCompletionRequest(req)) { - keyPool.incrementPrompt(req.key!); + const model = req.body.model; + const tokensUsed = req.promptTokens! + req.outputTokens!; + keyPool.incrementUsage(req.key!, model, tokensUsed); if (req.user) { incrementPromptCount(req.user.token); - const model = req.body.model; - const tokensUsed = req.promptTokens! + req.outputTokens!; incrementTokenCount(req.user.token, model, tokensUsed); } } diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 3f008d5..1e3e626 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -60,8 +60,6 @@ function getModelsResponse() { const allowed = new Set(config.allowedModelFamilies); available = new Set([...available].filter((x) => allowed.has(x))); - console.log(available); - const models = knownModels .map((id) => ({ id,