adds token consumption stats to infopage
This commit is contained in:
parent
bed275a195
commit
2c0a659b2d
|
@ -3,6 +3,7 @@ import { Request, Response } from "express";
|
||||||
import showdown from "showdown";
|
import showdown from "showdown";
|
||||||
import { config, listConfig } from "./config";
|
import { config, listConfig } from "./config";
|
||||||
import {
|
import {
|
||||||
|
AnthropicKey,
|
||||||
ModelFamily,
|
ModelFamily,
|
||||||
OpenAIKey,
|
OpenAIKey,
|
||||||
OpenAIModelFamily,
|
OpenAIModelFamily,
|
||||||
|
@ -10,6 +11,7 @@ import {
|
||||||
} from "./key-management";
|
} from "./key-management";
|
||||||
import { getUniqueIps } from "./proxy/rate-limit";
|
import { getUniqueIps } from "./proxy/rate-limit";
|
||||||
import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
|
import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
|
||||||
|
import { logger } from "./logger";
|
||||||
|
|
||||||
const INFO_PAGE_TTL = 2000;
|
const INFO_PAGE_TTL = 2000;
|
||||||
let infoPageHtml: string | undefined;
|
let infoPageHtml: string | undefined;
|
||||||
|
@ -18,6 +20,8 @@ let infoPageLastUpdated = 0;
|
||||||
type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
|
type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
|
||||||
const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
|
const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
|
||||||
k.service === "openai";
|
k.service === "openai";
|
||||||
|
const keyIsAnthropciKey = (k: KeyPoolKey): k is AnthropicKey =>
|
||||||
|
k.service === "anthropic";
|
||||||
|
|
||||||
type ModelAggregates = {
|
type ModelAggregates = {
|
||||||
active: number;
|
active: number;
|
||||||
|
@ -26,6 +30,7 @@ type ModelAggregates = {
|
||||||
overQuota?: number;
|
overQuota?: number;
|
||||||
queued: number;
|
queued: number;
|
||||||
queueTime: string;
|
queueTime: string;
|
||||||
|
tokens: number;
|
||||||
};
|
};
|
||||||
type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`;
|
type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`;
|
||||||
type ServiceAggregates = {
|
type ServiceAggregates = {
|
||||||
|
@ -34,6 +39,8 @@ type ServiceAggregates = {
|
||||||
openaiOrgs?: number;
|
openaiOrgs?: number;
|
||||||
anthropicKeys?: number;
|
anthropicKeys?: number;
|
||||||
proompts: number;
|
proompts: number;
|
||||||
|
tokens: number;
|
||||||
|
tokenCost: number;
|
||||||
uncheckedKeys?: number;
|
uncheckedKeys?: number;
|
||||||
} & {
|
} & {
|
||||||
[modelFamily in ModelFamily]?: ModelAggregates;
|
[modelFamily in ModelFamily]?: ModelAggregates;
|
||||||
|
@ -42,6 +49,27 @@ type ServiceAggregates = {
|
||||||
const modelStats = new Map<ModelAggregateKey, number>();
|
const modelStats = new Map<ModelAggregateKey, number>();
|
||||||
const serviceStats = new Map<keyof ServiceAggregates, number>();
|
const serviceStats = new Map<keyof ServiceAggregates, number>();
|
||||||
|
|
||||||
|
// technically slightly underestimates, because completion tokens cost more
|
||||||
|
// than prompt tokens but we don't track those separately right now
|
||||||
|
function getTokenCostUsd(model: ModelFamily, tokens: number) {
|
||||||
|
let cost = 0;
|
||||||
|
switch (model) {
|
||||||
|
case "gpt4-32k":
|
||||||
|
cost = 0.00006;
|
||||||
|
break;
|
||||||
|
case "gpt4":
|
||||||
|
cost = 0.00003;
|
||||||
|
break;
|
||||||
|
case "turbo":
|
||||||
|
cost = 0.0000015;
|
||||||
|
break;
|
||||||
|
case "claude":
|
||||||
|
cost = 0.00001102;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return cost * tokens;
|
||||||
|
}
|
||||||
|
|
||||||
export const handleInfoPage = (req: Request, res: Response) => {
|
export const handleInfoPage = (req: Request, res: Response) => {
|
||||||
if (infoPageLastUpdated + INFO_PAGE_TTL > Date.now()) {
|
if (infoPageLastUpdated + INFO_PAGE_TTL > Date.now()) {
|
||||||
res.send(infoPageHtml);
|
res.send(infoPageHtml);
|
||||||
|
@ -66,6 +94,8 @@ function cacheInfoPageHtml(baseUrl: string) {
|
||||||
|
|
||||||
const openaiKeys = serviceStats.get("openaiKeys") || 0;
|
const openaiKeys = serviceStats.get("openaiKeys") || 0;
|
||||||
const anthropicKeys = serviceStats.get("anthropicKeys") || 0;
|
const anthropicKeys = serviceStats.get("anthropicKeys") || 0;
|
||||||
|
const proompts = serviceStats.get("proompts") || 0;
|
||||||
|
const tokens = serviceStats.get("tokens") || 0;
|
||||||
|
|
||||||
const info = {
|
const info = {
|
||||||
uptime: Math.floor(process.uptime()),
|
uptime: Math.floor(process.uptime()),
|
||||||
|
@ -73,7 +103,8 @@ function cacheInfoPageHtml(baseUrl: string) {
|
||||||
...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}),
|
...(openaiKeys ? { openai: baseUrl + "/proxy/openai" } : {}),
|
||||||
...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}),
|
...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}),
|
||||||
},
|
},
|
||||||
proompts: keys.reduce((acc, k) => acc + k.promptCount, 0),
|
proompts,
|
||||||
|
tookens: `${tokens} ($${(serviceStats.get("tokenCost") || 0).toFixed(2)})`,
|
||||||
...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}),
|
...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}),
|
||||||
openaiKeys,
|
openaiKeys,
|
||||||
anthropicKeys,
|
anthropicKeys,
|
||||||
|
@ -127,13 +158,26 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||||
increment(serviceStats, "openaiKeys", k.service === "openai" ? 1 : 0);
|
increment(serviceStats, "openaiKeys", k.service === "openai" ? 1 : 0);
|
||||||
increment(serviceStats, "anthropicKeys", k.service === "anthropic" ? 1 : 0);
|
increment(serviceStats, "anthropicKeys", k.service === "anthropic" ? 1 : 0);
|
||||||
|
|
||||||
|
let sumTokens = 0;
|
||||||
|
let sumCost = 0;
|
||||||
let family: ModelFamily;
|
let family: ModelFamily;
|
||||||
const families = k.modelFamilies.filter((f) =>
|
const families = k.modelFamilies.filter((f) =>
|
||||||
config.allowedModelFamilies.includes(f)
|
config.allowedModelFamilies.includes(f)
|
||||||
);
|
);
|
||||||
|
|
||||||
if (keyIsOpenAIKey(k)) {
|
if (keyIsOpenAIKey(k)) {
|
||||||
// Currently only OpenAI keys are checked
|
// Currently only OpenAI keys are checked
|
||||||
increment(serviceStats, "uncheckedKeys", Boolean(k.lastChecked) ? 0 : 1);
|
increment(serviceStats, "uncheckedKeys", Boolean(k.lastChecked) ? 0 : 1);
|
||||||
|
|
||||||
|
// Technically this would not account for keys that have tokens recorded
|
||||||
|
// on models they aren't provisioned for, but that would be strange
|
||||||
|
k.modelFamilies.forEach((f) => {
|
||||||
|
const tokens = k[`${f}Tokens`];
|
||||||
|
sumTokens += tokens;
|
||||||
|
sumCost += getTokenCostUsd(f, tokens);
|
||||||
|
increment(modelStats, `${f}__tokens`, tokens);
|
||||||
|
});
|
||||||
|
|
||||||
if (families.includes("gpt4-32k")) {
|
if (families.includes("gpt4-32k")) {
|
||||||
family = "gpt4-32k";
|
family = "gpt4-32k";
|
||||||
} else if (families.includes("gpt4")) {
|
} else if (families.includes("gpt4")) {
|
||||||
|
@ -141,10 +185,18 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||||
} else {
|
} else {
|
||||||
family = "turbo";
|
family = "turbo";
|
||||||
}
|
}
|
||||||
} else {
|
} else if (keyIsAnthropciKey(k)) {
|
||||||
|
const tokens = k.claudeTokens;
|
||||||
family = "claude";
|
family = "claude";
|
||||||
|
sumTokens += tokens;
|
||||||
|
increment(modelStats, `${family}__tokens`, tokens);
|
||||||
|
} else {
|
||||||
|
logger.error({ key: k.hash }, "Unknown key type when adding to aggregates");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
increment(serviceStats, "tokens", sumTokens);
|
||||||
|
increment(serviceStats, "tokenCost", sumCost);
|
||||||
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
|
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
|
||||||
increment(modelStats, `${family}__trial`, k.isTrial ? 1 : 0);
|
increment(modelStats, `${family}__trial`, k.isTrial ? 1 : 0);
|
||||||
if ("isRevoked" in k) {
|
if ("isRevoked" in k) {
|
||||||
|
@ -158,6 +210,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||||
function getOpenAIInfo() {
|
function getOpenAIInfo() {
|
||||||
const info: { status?: string; openaiKeys?: number; openaiOrgs?: number } & {
|
const info: { status?: string; openaiKeys?: number; openaiOrgs?: number } & {
|
||||||
[modelFamily in OpenAIModelFamily]?: {
|
[modelFamily in OpenAIModelFamily]?: {
|
||||||
|
usage?: string;
|
||||||
activeKeys: number;
|
activeKeys: number;
|
||||||
trialKeys?: number;
|
trialKeys?: number;
|
||||||
revokedKeys?: number;
|
revokedKeys?: number;
|
||||||
|
@ -185,7 +238,11 @@ function getOpenAIInfo() {
|
||||||
info.openaiOrgs = getUniqueOpenAIOrgs(keys);
|
info.openaiOrgs = getUniqueOpenAIOrgs(keys);
|
||||||
|
|
||||||
families.forEach((f) => {
|
families.forEach((f) => {
|
||||||
|
const tokens = modelStats.get(`${f}__tokens`) || 0;
|
||||||
|
const cost = getTokenCostUsd(f, tokens);
|
||||||
|
|
||||||
info[f] = {
|
info[f] = {
|
||||||
|
usage: `${tokens} tokens ($${cost.toFixed(2)})`,
|
||||||
activeKeys: modelStats.get(`${f}__active`) || 0,
|
activeKeys: modelStats.get(`${f}__active`) || 0,
|
||||||
trialKeys: modelStats.get(`${f}__trial`) || 0,
|
trialKeys: modelStats.get(`${f}__trial`) || 0,
|
||||||
revokedKeys: modelStats.get(`${f}__revoked`) || 0,
|
revokedKeys: modelStats.get(`${f}__revoked`) || 0,
|
||||||
|
@ -203,8 +260,8 @@ function getOpenAIInfo() {
|
||||||
}
|
}
|
||||||
|
|
||||||
families.forEach((f) => {
|
families.forEach((f) => {
|
||||||
const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f);
|
|
||||||
if (info[f]) {
|
if (info[f]) {
|
||||||
|
const { estimatedQueueTime, proomptersInQueue } = getQueueInformation(f);
|
||||||
info[f]!.proomptersInQueue = proomptersInQueue;
|
info[f]!.proomptersInQueue = proomptersInQueue;
|
||||||
info[f]!.estimatedQueueTime = estimatedQueueTime;
|
info[f]!.estimatedQueueTime = estimatedQueueTime;
|
||||||
}
|
}
|
||||||
|
@ -220,8 +277,11 @@ function getAnthropicInfo() {
|
||||||
const queue = getQueueInformation("claude");
|
const queue = getQueueInformation("claude");
|
||||||
claudeInfo.queued = queue.proomptersInQueue;
|
claudeInfo.queued = queue.proomptersInQueue;
|
||||||
claudeInfo.queueTime = queue.estimatedQueueTime;
|
claudeInfo.queueTime = queue.estimatedQueueTime;
|
||||||
|
const tokens = modelStats.get("claude__tokens") || 0;
|
||||||
|
const cost = getTokenCostUsd("claude", tokens);
|
||||||
return {
|
return {
|
||||||
claude: {
|
claude: {
|
||||||
|
usage: `${tokens} tokens ($${cost.toFixed(2)})`,
|
||||||
activeKeys: claudeInfo.active,
|
activeKeys: claudeInfo.active,
|
||||||
proomptersInQueue: claudeInfo.queued,
|
proomptersInQueue: claudeInfo.queued,
|
||||||
estimatedQueueTime: claudeInfo.queueTime,
|
estimatedQueueTime: claudeInfo.queueTime,
|
||||||
|
|
|
@ -24,7 +24,11 @@ export type AnthropicKeyUpdate = Omit<
|
||||||
| "rateLimitedUntil"
|
| "rateLimitedUntil"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
export interface AnthropicKey extends Key {
|
type AnthropicKeyUsage = {
|
||||||
|
[K in AnthropicModelFamily as `${K}Tokens`]: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export interface AnthropicKey extends Key, AnthropicKeyUsage {
|
||||||
readonly service: "anthropic";
|
readonly service: "anthropic";
|
||||||
readonly modelFamilies: AnthropicModelFamily[];
|
readonly modelFamilies: AnthropicModelFamily[];
|
||||||
/** The time at which this key was last rate limited. */
|
/** The time at which this key was last rate limited. */
|
||||||
|
@ -87,6 +91,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
.digest("hex")
|
.digest("hex")
|
||||||
.slice(0, 8)}`,
|
.slice(0, 8)}`,
|
||||||
lastChecked: 0,
|
lastChecked: 0,
|
||||||
|
claudeTokens: 0,
|
||||||
};
|
};
|
||||||
this.keys.push(newKey);
|
this.keys.push(newKey);
|
||||||
}
|
}
|
||||||
|
@ -162,10 +167,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public incrementPrompt(hash?: string) {
|
public incrementUsage(hash: string, _model: string, tokens: number) {
|
||||||
const key = this.keys.find((k) => k.hash === hash);
|
const key = this.keys.find((k) => k.hash === hash);
|
||||||
if (!key) return;
|
if (!key) return;
|
||||||
key.promptCount++;
|
key.promptCount++;
|
||||||
|
key.claudeTokens += tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
public getLockoutPeriod(_model: AnthropicModel) {
|
public getLockoutPeriod(_model: AnthropicModel) {
|
||||||
|
|
|
@ -51,7 +51,7 @@ export interface KeyProvider<T extends Key = Key> {
|
||||||
update(hash: string, update: Partial<T>): void;
|
update(hash: string, update: Partial<T>): void;
|
||||||
available(): number;
|
available(): number;
|
||||||
anyUnchecked(): boolean;
|
anyUnchecked(): boolean;
|
||||||
incrementPrompt(hash: string): void;
|
incrementUsage(hash: string, model: string, tokens: number): void;
|
||||||
getLockoutPeriod(model: Model): number;
|
getLockoutPeriod(model: Model): number;
|
||||||
markRateLimited(hash: string): void;
|
markRateLimited(hash: string): void;
|
||||||
recheck(service: AIService): void;
|
recheck(service: AIService): void;
|
||||||
|
|
|
@ -67,9 +67,9 @@ export class KeyPool {
|
||||||
return this.keyProviders.some((provider) => provider.anyUnchecked());
|
return this.keyProviders.some((provider) => provider.anyUnchecked());
|
||||||
}
|
}
|
||||||
|
|
||||||
public incrementPrompt(key: Key): void {
|
public incrementUsage(key: Key, model: string, tokens: number): void {
|
||||||
const provider = this.getKeyProvider(key.service);
|
const provider = this.getKeyProvider(key.service);
|
||||||
provider.incrementPrompt(key.hash);
|
provider.incrementUsage(key.hash, model, tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
public getLockoutPeriod(model: Model): number {
|
public getLockoutPeriod(model: Model): number {
|
||||||
|
|
|
@ -17,7 +17,13 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
|
||||||
"gpt-4",
|
"gpt-4",
|
||||||
] as const;
|
] as const;
|
||||||
|
|
||||||
export interface OpenAIKey extends Key {
|
// Flattening model families instead of using a nested object for easier
|
||||||
|
// cloning.
|
||||||
|
type OpenAIKeyUsage = {
|
||||||
|
[K in OpenAIModelFamily as `${K}Tokens`]: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
export interface OpenAIKey extends Key, OpenAIKeyUsage {
|
||||||
readonly service: "openai";
|
readonly service: "openai";
|
||||||
modelFamilies: OpenAIModelFamily[];
|
modelFamilies: OpenAIModelFamily[];
|
||||||
/**
|
/**
|
||||||
|
@ -78,7 +84,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
bareKeys = keyString.split(",").map((k) => k.trim());
|
bareKeys = keyString.split(",").map((k) => k.trim());
|
||||||
bareKeys = [...new Set(bareKeys)];
|
bareKeys = [...new Set(bareKeys)];
|
||||||
for (const k of bareKeys) {
|
for (const k of bareKeys) {
|
||||||
const newKey = {
|
const newKey: OpenAIKey = {
|
||||||
key: k,
|
key: k,
|
||||||
service: "openai" as const,
|
service: "openai" as const,
|
||||||
modelFamilies: ["turbo" as const, "gpt4" as const],
|
modelFamilies: ["turbo" as const, "gpt4" as const],
|
||||||
|
@ -86,10 +92,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
isDisabled: false,
|
isDisabled: false,
|
||||||
isRevoked: false,
|
isRevoked: false,
|
||||||
isOverQuota: false,
|
isOverQuota: false,
|
||||||
softLimit: 0,
|
|
||||||
hardLimit: 0,
|
|
||||||
systemHardLimit: 0,
|
|
||||||
usage: 0,
|
|
||||||
lastUsed: 0,
|
lastUsed: 0,
|
||||||
lastChecked: 0,
|
lastChecked: 0,
|
||||||
promptCount: 0,
|
promptCount: 0,
|
||||||
|
@ -101,6 +103,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
rateLimitedAt: 0,
|
rateLimitedAt: 0,
|
||||||
rateLimitRequestsReset: 0,
|
rateLimitRequestsReset: 0,
|
||||||
rateLimitTokensReset: 0,
|
rateLimitTokensReset: 0,
|
||||||
|
turboTokens: 0,
|
||||||
|
gpt4Tokens: 0,
|
||||||
|
"gpt4-32kTokens": 0,
|
||||||
};
|
};
|
||||||
this.keys.push(newKey);
|
this.keys.push(newKey);
|
||||||
}
|
}
|
||||||
|
@ -314,10 +319,11 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
key.rateLimitedAt = Date.now();
|
key.rateLimitedAt = Date.now();
|
||||||
}
|
}
|
||||||
|
|
||||||
public incrementPrompt(keyHash?: string) {
|
public incrementUsage(keyHash: string, model: string, tokens: number) {
|
||||||
const key = this.keys.find((k) => k.hash === keyHash);
|
const key = this.keys.find((k) => k.hash === keyHash);
|
||||||
if (!key) return;
|
if (!key) return;
|
||||||
key.promptCount++;
|
key.promptCount++;
|
||||||
|
key[`${getOpenAIModelFamily(model)}Tokens`] += tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
public updateRateLimits(keyHash: string, headers: http.IncomingHttpHeaders) {
|
public updateRateLimits(keyHash: string, headers: http.IncomingHttpHeaders) {
|
||||||
|
|
|
@ -411,11 +411,11 @@ function handleOpenAIRateLimitError(
|
||||||
|
|
||||||
const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
|
const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
|
||||||
if (isCompletionRequest(req)) {
|
if (isCompletionRequest(req)) {
|
||||||
keyPool.incrementPrompt(req.key!);
|
const model = req.body.model;
|
||||||
|
const tokensUsed = req.promptTokens! + req.outputTokens!;
|
||||||
|
keyPool.incrementUsage(req.key!, model, tokensUsed);
|
||||||
if (req.user) {
|
if (req.user) {
|
||||||
incrementPromptCount(req.user.token);
|
incrementPromptCount(req.user.token);
|
||||||
const model = req.body.model;
|
|
||||||
const tokensUsed = req.promptTokens! + req.outputTokens!;
|
|
||||||
incrementTokenCount(req.user.token, model, tokensUsed);
|
incrementTokenCount(req.user.token, model, tokensUsed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,8 +60,6 @@ function getModelsResponse() {
|
||||||
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
|
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
|
||||||
available = new Set([...available].filter((x) => allowed.has(x)));
|
available = new Set([...available].filter((x) => allowed.has(x)));
|
||||||
|
|
||||||
console.log(available);
|
|
||||||
|
|
||||||
const models = knownModels
|
const models = knownModels
|
||||||
.map((id) => ({
|
.map((id) => ({
|
||||||
id,
|
id,
|
||||||
|
|
Loading…
Reference in New Issue