oai-reverse-proxy/src/service-info.ts

418 lines
13 KiB
TypeScript

/** Calculates and returns stats about the service. */
import { config, listConfig } from "./config";
import {
AnthropicKey,
AwsBedrockKey,
AzureOpenAIKey,
GoogleAIKey,
keyPool,
OpenAIKey,
} from "./shared/key-management";
import {
AnthropicModelFamily,
assertIsKnownModelFamily,
AwsBedrockModelFamily,
AzureOpenAIModelFamily,
GoogleAIModelFamily,
LLM_SERVICES,
LLMService,
MODEL_FAMILY_SERVICE,
ModelFamily,
OpenAIModelFamily,
} from "./shared/models";
import { getCostSuffix, getTokenCostUsd, prettyTokens } from "./shared/stats";
import { getUniqueIps } from "./proxy/rate-limit";
import { assertNever } from "./shared/utils";
import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
const CACHE_TTL = 2000;
type KeyPoolKey = ReturnType<typeof keyPool.list>[0];
const keyIsOpenAIKey = (k: KeyPoolKey): k is OpenAIKey =>
k.service === "openai";
const keyIsAzureKey = (k: KeyPoolKey): k is AzureOpenAIKey =>
k.service === "azure";
const keyIsAnthropicKey = (k: KeyPoolKey): k is AnthropicKey =>
k.service === "anthropic";
const keyIsGoogleAIKey = (k: KeyPoolKey): k is GoogleAIKey =>
k.service === "google-ai";
const keyIsAwsKey = (k: KeyPoolKey): k is AwsBedrockKey => k.service === "aws";
/** Stats aggregated across all keys for a given service. */
type ServiceAggregate = "keys" | "uncheckedKeys" | "orgs";
/** Stats aggregated across all keys for a given model family. */
type ModelAggregates = {
active: number;
trial?: number;
revoked?: number;
overQuota?: number;
pozzed?: number;
awsLogged?: number;
queued: number;
queueTime: string;
tokens: number;
};
/** All possible combinations of model family and aggregate type. */
type ModelAggregateKey = `${ModelFamily}__${keyof ModelAggregates}`;
type AllStats = {
proompts: number;
tokens: number;
tokenCost: number;
} & { [modelFamily in ModelFamily]?: ModelAggregates } & {
[service in LLMService as `${service}__${ServiceAggregate}`]?: number;
};
type BaseFamilyInfo = {
usage?: string;
activeKeys: number;
revokedKeys?: number;
proomptersInQueue?: number;
estimatedQueueTime?: string;
};
type OpenAIInfo = BaseFamilyInfo & {
trialKeys?: number;
overQuotaKeys?: number;
};
type AnthropicInfo = BaseFamilyInfo & { pozzedKeys?: number };
type AwsInfo = BaseFamilyInfo & { privacy?: string };
// prettier-ignore
export type ServiceInfo = {
uptime: number;
endpoints: {
openai?: string;
openai2?: string;
"openai-image"?: string;
anthropic?: string;
"google-ai"?: string;
aws?: string;
azure?: string;
};
proompts?: number;
tookens?: string;
proomptersNow?: number;
status?: string;
config: ReturnType<typeof listConfig>;
build: string;
} & { [f in OpenAIModelFamily]?: OpenAIInfo }
& { [f in AnthropicModelFamily]?: AnthropicInfo; }
& { [f in AwsBedrockModelFamily]?: AwsInfo }
& { [f in AzureOpenAIModelFamily]?: BaseFamilyInfo; }
& { [f in GoogleAIModelFamily]?: BaseFamilyInfo };
// https://stackoverflow.com/a/66661477
// type DeepKeyOf<T> = (
// [T] extends [never]
// ? ""
// : T extends object
// ? {
// [K in Exclude<keyof T, symbol>]: `${K}${DotPrefix<DeepKeyOf<T[K]>>}`;
// }[Exclude<keyof T, symbol>]
// : ""
// ) extends infer D
// ? Extract<D, string>
// : never;
// type DotPrefix<T extends string> = T extends "" ? "" : `.${T}`;
// type ServiceInfoPath = `{${DeepKeyOf<ServiceInfo>}}`;
const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
openai: {
openai: `%BASE%/openai`,
openai2: `%BASE%/openai/turbo-instruct`,
"openai-image": `%BASE%/openai-image`,
},
anthropic: {
anthropic: `%BASE%/anthropic`,
},
"google-ai": {
"google-ai": `%BASE%/google-ai`,
},
aws: {
aws: `%BASE%/aws/claude`,
},
azure: {
azure: `%BASE%/azure/openai`,
},
};
const modelStats = new Map<ModelAggregateKey, number>();
const serviceStats = new Map<keyof AllStats, number>();
let cachedInfo: ServiceInfo | undefined;
let cacheTime = 0;
export function buildInfo(baseUrl: string, forAdmin = false): ServiceInfo {
if (cacheTime + CACHE_TTL > Date.now()) return cachedInfo!;
const keys = keyPool.list();
const accessibleFamilies = new Set(
keys
.flatMap((k) => k.modelFamilies)
.filter((f) => config.allowedModelFamilies.includes(f))
.concat("turbo")
);
serviceStats.clear();
keys.forEach(addKeyToAggregates);
const endpoints = getEndpoints(baseUrl, accessibleFamilies);
const trafficStats = getTrafficStats();
const { serviceInfo, modelFamilyInfo } =
getServiceModelStats(accessibleFamilies);
const status = getStatus();
if (config.staticServiceInfo && !forAdmin) {
delete trafficStats.proompts;
delete trafficStats.tookens;
delete trafficStats.proomptersNow;
for (const family of Object.keys(modelFamilyInfo)) {
assertIsKnownModelFamily(family);
delete modelFamilyInfo[family]?.proomptersInQueue;
delete modelFamilyInfo[family]?.estimatedQueueTime;
delete modelFamilyInfo[family]?.usage;
}
}
return (cachedInfo = {
uptime: Math.floor(process.uptime()),
endpoints,
...trafficStats,
...serviceInfo,
status,
...modelFamilyInfo,
config: listConfig(),
build: process.env.BUILD_INFO || "dev",
});
}
function getStatus() {
if (!config.checkKeys) return "Key checking is disabled.";
let unchecked = 0;
for (const service of LLM_SERVICES) {
unchecked += serviceStats.get(`${service}__uncheckedKeys`) || 0;
}
return unchecked ? `Checking ${unchecked} keys...` : undefined;
}
function getEndpoints(baseUrl: string, accessibleFamilies: Set<ModelFamily>) {
const endpoints: Record<string, string> = {};
for (const service of LLM_SERVICES) {
for (const [name, url] of Object.entries(SERVICE_ENDPOINTS[service])) {
endpoints[name] = url.replace("%BASE%", baseUrl);
}
if (service === "openai" && !accessibleFamilies.has("dall-e")) {
delete endpoints["openai-image"];
}
}
return endpoints;
}
type TrafficStats = Pick<ServiceInfo, "proompts" | "tookens" | "proomptersNow">;
function getTrafficStats(): TrafficStats {
const tokens = serviceStats.get("tokens") || 0;
const tokenCost = serviceStats.get("tokenCost") || 0;
return {
proompts: serviceStats.get("proompts") || 0,
tookens: `${prettyTokens(tokens)}${getCostSuffix(tokenCost)}`,
...(config.textModelRateLimit ? { proomptersNow: getUniqueIps() } : {}),
};
}
function getServiceModelStats(accessibleFamilies: Set<ModelFamily>) {
const serviceInfo: {
[s in LLMService as `${s}${"Keys" | "Orgs"}`]?: number;
} = {};
const modelFamilyInfo: { [f in ModelFamily]?: BaseFamilyInfo } = {};
for (const service of LLM_SERVICES) {
const hasKeys = serviceStats.get(`${service}__keys`) || 0;
if (!hasKeys) continue;
serviceInfo[`${service}Keys`] = hasKeys;
accessibleFamilies.forEach((f) => {
if (MODEL_FAMILY_SERVICE[f] === service) {
modelFamilyInfo[f] = getInfoForFamily(f);
}
});
if (service === "openai" && config.checkKeys) {
serviceInfo.openaiOrgs = getUniqueOpenAIOrgs(keyPool.list());
}
}
return { serviceInfo, modelFamilyInfo };
}
function getUniqueOpenAIOrgs(keys: KeyPoolKey[]) {
const orgIds = new Set(
keys.filter((k) => k.service === "openai").map((k: any) => k.organizationId)
);
return orgIds.size;
}
function increment<T extends keyof AllStats | ModelAggregateKey>(
map: Map<T, number>,
key: T,
delta = 1
) {
map.set(key, (map.get(key) || 0) + delta);
}
function addKeyToAggregates(k: KeyPoolKey) {
increment(serviceStats, "proompts", k.promptCount);
increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0);
increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0);
increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0);
increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0);
increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0);
let sumTokens = 0;
let sumCost = 0;
switch (k.service) {
case "openai":
if (!keyIsOpenAIKey(k)) throw new Error("Invalid key type");
increment(
serviceStats,
"openai__uncheckedKeys",
Boolean(k.lastChecked) ? 0 : 1
);
k.modelFamilies.forEach((f) => {
const tokens = k[`${f}Tokens`];
sumTokens += tokens;
sumCost += getTokenCostUsd(f, tokens);
increment(modelStats, `${f}__tokens`, tokens);
increment(modelStats, `${f}__revoked`, k.isRevoked ? 1 : 0);
increment(modelStats, `${f}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${f}__trial`, k.isTrial ? 1 : 0);
increment(modelStats, `${f}__overQuota`, k.isOverQuota ? 1 : 0);
});
break;
case "azure":
if (!keyIsAzureKey(k)) throw new Error("Invalid key type");
k.modelFamilies.forEach((f) => {
const tokens = k[`${f}Tokens`];
sumTokens += tokens;
sumCost += getTokenCostUsd(f, tokens);
increment(modelStats, `${f}__tokens`, tokens);
increment(modelStats, `${f}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${f}__revoked`, k.isRevoked ? 1 : 0);
});
break;
case "anthropic": {
if (!keyIsAnthropicKey(k)) throw new Error("Invalid key type");
const family = "claude";
sumTokens += k.claudeTokens;
sumCost += getTokenCostUsd(family, k.claudeTokens);
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
increment(modelStats, `${family}__tokens`, k.claudeTokens);
increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0);
increment(
serviceStats,
"anthropic__uncheckedKeys",
Boolean(k.lastChecked) ? 0 : 1
);
break;
}
case "google-ai": {
if (!keyIsGoogleAIKey(k)) throw new Error("Invalid key type");
const family = "gemini-pro";
sumTokens += k["gemini-proTokens"];
sumCost += getTokenCostUsd(family, k["gemini-proTokens"]);
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
increment(modelStats, `${family}__tokens`, k["gemini-proTokens"]);
break;
}
case "aws": {
if (!keyIsAwsKey(k)) throw new Error("Invalid key type");
const family = "aws-claude";
sumTokens += k["aws-claudeTokens"];
sumCost += getTokenCostUsd(family, k["aws-claudeTokens"]);
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
increment(modelStats, `${family}__tokens`, k["aws-claudeTokens"]);
// Ignore revoked keys for aws logging stats, but include keys where the
// logging status is unknown.
const countAsLogged =
k.lastChecked && !k.isDisabled && k.awsLoggingStatus !== "disabled";
increment(modelStats, `${family}__awsLogged`, countAsLogged ? 1 : 0);
break;
}
default:
assertNever(k.service);
}
increment(serviceStats, "tokens", sumTokens);
increment(serviceStats, "tokenCost", sumCost);
}
function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
const tokens = modelStats.get(`${family}__tokens`) || 0;
const cost = getTokenCostUsd(family, tokens);
let info: BaseFamilyInfo & OpenAIInfo & AnthropicInfo & AwsInfo = {
usage: `${prettyTokens(tokens)} tokens${getCostSuffix(cost)}`,
activeKeys: modelStats.get(`${family}__active`) || 0,
revokedKeys: modelStats.get(`${family}__revoked`) || 0,
};
// Add service-specific stats to the info object.
if (config.checkKeys) {
const service = MODEL_FAMILY_SERVICE[family];
switch (service) {
case "openai":
info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
info.trialKeys = modelStats.get(`${family}__trial`) || 0;
// Delete trial/revoked keys for non-turbo families.
// Trials are turbo 99% of the time, and if a key is invalid we don't
// know what models it might have had assigned to it.
if (family !== "turbo") {
delete info.trialKeys;
delete info.revokedKeys;
}
break;
case "anthropic":
info.pozzedKeys = modelStats.get(`${family}__pozzed`) || 0;
break;
case "aws":
const logged = modelStats.get(`${family}__awsLogged`) || 0;
const logMsg = config.allowAwsLogging
? `${logged} active keys are potentially logged.`
: `${logged} active keys are potentially logged and can't be used. Set ALLOW_AWS_LOGGING=true to override.`;
info.privacy = logMsg;
break;
}
}
// Add queue stats to the info object.
const queue = getQueueInformation(family);
info.proomptersInQueue = queue.proomptersInQueue;
info.estimatedQueueTime = queue.estimatedQueueTime;
return info;
}
/** Returns queue time in seconds, or minutes + seconds if over 60 seconds. */
function getQueueInformation(partition: ModelFamily) {
const waitMs = getEstimatedWaitTime(partition);
const waitTime =
waitMs < 60000
? `${Math.round(waitMs / 1000)}sec`
: `${Math.round(waitMs / 60000)}min, ${Math.round(
(waitMs % 60000) / 1000
)}sec`;
return {
proomptersInQueue: getQueueLength(partition),
estimatedQueueTime: waitMs > 2000 ? waitTime : "no wait",
};
}