Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29)

This commit is contained in:
khanon 2023-07-21 04:00:12 +00:00
parent cbf9f16108
commit aa5380d2ef
10 changed files with 199 additions and 190 deletions

18
package-lock.json generated
View File

@ -16,7 +16,6 @@
"firebase-admin": "^11.10.1", "firebase-admin": "^11.10.1",
"googleapis": "^122.0.0", "googleapis": "^122.0.0",
"http-proxy-middleware": "^3.0.0-beta.1", "http-proxy-middleware": "^3.0.0-beta.1",
"openai": "^3.2.1",
"pino": "^8.11.0", "pino": "^8.11.0",
"pino-http": "^8.3.3", "pino-http": "^8.3.3",
"showdown": "^2.1.0", "showdown": "^2.1.0",
@ -3163,23 +3162,6 @@
"wrappy": "1" "wrappy": "1"
} }
}, },
"node_modules/openai": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
"dependencies": {
"axios": "^0.26.0",
"form-data": "^4.0.0"
}
},
"node_modules/openai/node_modules/axios": {
"version": "0.26.1",
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
"dependencies": {
"follow-redirects": "^1.14.8"
}
},
"node_modules/p-limit": { "node_modules/p-limit": {
"version": "3.1.0", "version": "3.1.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",

View File

@ -25,7 +25,6 @@
"firebase-admin": "^11.10.1", "firebase-admin": "^11.10.1",
"googleapis": "^122.0.0", "googleapis": "^122.0.0",
"http-proxy-middleware": "^3.0.0-beta.1", "http-proxy-middleware": "^3.0.0-beta.1",
"openai": "^3.2.1",
"pino": "^8.11.0", "pino": "^8.11.0",
"pino-http": "^8.3.3", "pino-http": "^8.3.3",
"showdown": "^2.1.0", "showdown": "^2.1.0",

View File

@ -88,11 +88,11 @@ type Config = {
* *
* `none`: Hide quota information * `none`: Hide quota information
* *
* `partial`: Display quota information only as a percentage * `partial`: (deprecated) Same as `full` because usage is no longer tracked
* *
* `full`: Display quota information as usage against total capacity * `full`: Displays information about keys' quota limits
*/ */
quotaDisplayMode: "none" | "partial" | "full"; quotaDisplayMode: "none" | "full";
/** /**
* Which request queueing strategy to use when keys are over their rate limit. * Which request queueing strategy to use when keys are over their rate limit.
* *
@ -152,7 +152,7 @@ export const config: Config = {
), ),
logLevel: getEnvWithDefault("LOG_LEVEL", "info"), logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev), checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"), quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false), promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined), promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined), googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),

View File

@ -2,7 +2,7 @@ import fs from "fs";
import { Request, Response } from "express"; import { Request, Response } from "express";
import showdown from "showdown"; import showdown from "showdown";
import { config, listConfig } from "./config"; import { config, listConfig } from "./config";
import { keyPool } from "./key-management"; import { OpenAIKey, keyPool } from "./key-management";
import { getUniqueIps } from "./proxy/rate-limit"; import { getUniqueIps } from "./proxy/rate-limit";
import { import {
QueuePartition, QueuePartition,
@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
type ServiceInfo = { type ServiceInfo = {
activeKeys: number; activeKeys: number;
trialKeys?: number; trialKeys?: number;
quota: string; activeLimit: string;
revokedKeys?: number;
overQuotaKeys?: number;
proomptersInQueue: number; proomptersInQueue: number;
estimatedQueueTime: string; estimatedQueueTime: string;
}; };
@ -88,51 +90,55 @@ type ServiceInfo = {
function getOpenAIInfo() { function getOpenAIInfo() {
const info: { [model: string]: Partial<ServiceInfo> } = {}; const info: { [model: string]: Partial<ServiceInfo> } = {};
const keys = keyPool.list().filter((k) => k.service === "openai"); const keys = keyPool
.list()
.filter((k) => k.service === "openai") as OpenAIKey[];
const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly; const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
if (keyPool.anyUnchecked()) { if (keyPool.anyUnchecked()) {
const uncheckedKeys = keys.filter((k) => !k.lastChecked); const uncheckedKeys = keys.filter((k) => !k.lastChecked);
info.status = `Still checking ${uncheckedKeys.length} keys...` as any; info.status =
`Performing startup key checks (${uncheckedKeys.length} left).` as any;
} else { } else {
delete info.status; delete info.status;
} }
if (config.checkKeys) { if (config.checkKeys) {
const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled); const turboKeys = keys.filter((k) => !k.isGpt4);
const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled); const gpt4Keys = keys.filter((k) => k.isGpt4);
const quota: Record<string, string> = { turbo: "", gpt4: "" }; const quota: Record<string, string> = { turbo: "", gpt4: "" };
const turboQuota = keyPool.remainingQuota("openai") * 100; const turboQuota = keyPool.activeLimitInUsd("openai");
const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100; const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });
if (config.quotaDisplayMode === "full") { // Don't invert this condition; some proxies may be using the now-deprecated
const turboUsage = keyPool.usageInUsd("openai"); // 'partial' option which we want to treat as 'full' here.
const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true }); if (config.quotaDisplayMode !== "none") {
quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`; quota.turbo = turboQuota;
quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`; quota.gpt4 = gpt4Quota;
} else {
quota.turbo = `${Math.round(turboQuota)}%`;
quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
} }
info.turbo = { info.turbo = {
activeKeys: turboKeys.filter((k) => !k.isDisabled).length, activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
trialKeys: turboKeys.filter((k) => k.isTrial).length, trialKeys: turboKeys.filter((k) => k.isTrial).length,
quota: quota.turbo, activeLimit: quota.turbo,
revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
}; };
if (hasGpt4) { if (hasGpt4) {
info.gpt4 = { info.gpt4 = {
activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length, activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
trialKeys: gpt4Keys.filter((k) => k.isTrial).length, trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
quota: quota.gpt4, activeLimit: quota.gpt4,
revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
}; };
} }
if (config.quotaDisplayMode === "none") { if (config.quotaDisplayMode === "none") {
delete info.turbo?.quota; delete info.turbo?.activeLimit;
delete info.gpt4?.quota; delete info.gpt4?.activeLimit;
} }
} else { } else {
info.status = "Key checking is disabled." as any; info.status = "Key checking is disabled." as any;

View File

@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT; key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
} }
public remainingQuota() { public activeLimitInUsd() {
const activeKeys = this.keys.filter((k) => !k.isDisabled).length; return "∞";
const allKeys = this.keys.length;
if (activeKeys === 0) return 0;
return Math.round((activeKeys / allKeys) * 100) / 100;
}
public usageInUsd() {
return "$0.00 / ∞";
} }
} }

View File

@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
anyUnchecked(): boolean; anyUnchecked(): boolean;
incrementPrompt(hash: string): void; incrementPrompt(hash: string): void;
getLockoutPeriod(model: Model): number; getLockoutPeriod(model: Model): number;
remainingQuota(options?: Record<string, unknown>): number; activeLimitInUsd(options?: Record<string, unknown>): string;
usageInUsd(options?: Record<string, unknown>): string;
markRateLimited(hash: string): void; markRateLimited(hash: string): void;
} }

View File

@ -32,9 +32,15 @@ export class KeyPool {
return this.keyProviders.flatMap((provider) => provider.list()); return this.keyProviders.flatMap((provider) => provider.list());
} }
public disable(key: Key): void { public disable(key: Key, reason: "quota" | "revoked"): void {
const service = this.getKeyProvider(key.service); const service = this.getKeyProvider(key.service);
service.disable(key); service.disable(key);
if (service instanceof OpenAIKeyProvider) {
service.update(key.hash, {
isRevoked: reason === "revoked",
isOverQuota: reason === "quota",
});
}
} }
public update(key: Key, props: AllowedPartial): void { public update(key: Key, props: AllowedPartial): void {
@ -75,18 +81,11 @@ export class KeyPool {
} }
} }
public remainingQuota( public activeLimitInUsd(
service: AIService,
options?: Record<string, unknown>
): number {
return this.getKeyProvider(service).remainingQuota(options);
}
public usageInUsd(
service: AIService, service: AIService,
options?: Record<string, unknown> options?: Record<string, unknown>
): string { ): string {
return this.getKeyProvider(service).usageInUsd(options); return this.getKeyProvider(service).activeLimitInUsd(options);
} }
private getService(model: Model): AIService { private getService(model: Model): AIService {

View File

@ -1,14 +1,24 @@
import axios, { AxiosError } from "axios"; import axios, { AxiosError } from "axios";
import { Configuration, OpenAIApi } from "openai";
import { logger } from "../../logger"; import { logger } from "../../logger";
import type { OpenAIKey, OpenAIKeyProvider } from "./provider"; import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
/** Minimum time in between any two key checks. */
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes /**
* Minimum time in between checks for a given key. Because we can no longer
* read quota usage, there is little reason to check a single key more often
* than this.
**/
const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
const GET_MODELS_URL = "https://api.openai.com/v1/models";
const GET_SUBSCRIPTION_URL = const GET_SUBSCRIPTION_URL =
"https://api.openai.com/dashboard/billing/subscription"; "https://api.openai.com/dashboard/billing/subscription";
const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
type GetModelsResponse = {
data: [{ id: string }];
};
type GetSubscriptionResponse = { type GetSubscriptionResponse = {
plan: { title: string }; plan: { title: string };
@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
system_hard_limit_usd: number; system_hard_limit_usd: number;
}; };
type GetUsageResponse = {
total_usage: number;
};
type OpenAIError = { type OpenAIError = {
error: { type: string; code: string; param: unknown; message: string }; error: { type: string; code: string; param: unknown; message: string };
}; };
@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
/** /**
* Schedules the next check. If there are still keys yet to be checked, it * Schedules the next check. If there are still keys yet to be checked, it
* will schedule a check immediately for the next unchecked key. Otherwise, * will schedule a check immediately for the next unchecked key. Otherwise,
* it will schedule a check in several minutes for the oldest key. * it will schedule a check for the least recently checked key, respecting
* the minimum check interval.
**/ **/
private scheduleNextCheck() { private scheduleNextCheck() {
const enabledKeys = this.keys.filter((key) => !key.isDisabled); const enabledKeys = this.keys.filter((key) => !key.isDisabled);
@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
key.lastChecked < oldest.lastChecked ? key : oldest key.lastChecked < oldest.lastChecked ? key : oldest
); );
// Don't check any individual key more than once every 5 minutes. // Don't check any individual key too often.
// Also, don't check anything more often than once every 3 seconds. // Don't check anything at all at a rate faster than once per 3 seconds.
const nextCheck = Math.max( const nextCheck = Math.max(
oldestKey.lastChecked + KEY_CHECK_PERIOD, oldestKey.lastChecked + KEY_CHECK_PERIOD,
this.lastCheck + MIN_CHECK_INTERVAL this.lastCheck + MIN_CHECK_INTERVAL
@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
this.log.debug({ key: key.hash }, "Checking key..."); this.log.debug({ key: key.hash }, "Checking key...");
let isInitialCheck = !key.lastChecked; let isInitialCheck = !key.lastChecked;
try { try {
// During the initial check we need to get the subscription first because // We only need to check for provisioned models on the initial check.
// trials have different behavior.
if (isInitialCheck) { if (isInitialCheck) {
const subscription = await this.getSubscription(key); const [subscription, provisionedModels, _livenessTest] =
this.updateKey(key.hash, { isTrial: !subscription.has_payment_method }); await Promise.all([
if (key.isTrial) { this.getSubscription(key),
this.log.debug(
{ key: key.hash },
"Attempting generation on trial key."
);
await this.assertCanGenerate(key);
}
const [provisionedModels, usage] = await Promise.all([
this.getProvisionedModels(key), this.getProvisionedModels(key),
this.getUsage(key), this.testLiveness(key),
]); ]);
const updates = { const updates = {
isGpt4: provisionedModels.gpt4, isGpt4: provisionedModels.gpt4,
isTrial: !subscription.has_payment_method,
softLimit: subscription.soft_limit_usd, softLimit: subscription.soft_limit_usd,
hardLimit: subscription.hard_limit_usd, hardLimit: subscription.hard_limit_usd,
systemHardLimit: subscription.system_hard_limit_usd, systemHardLimit: subscription.system_hard_limit_usd,
usage,
}; };
this.updateKey(key.hash, updates); this.updateKey(key.hash, updates);
} else { } else {
// Don't check provisioned models after the initial check because it's // Provisioned models don't change, so we don't need to check them again
// not likely to change. const [subscription, _livenessTest] = await Promise.all([
const [subscription, usage] = await Promise.all([
this.getSubscription(key), this.getSubscription(key),
this.getUsage(key), this.testLiveness(key),
]); ]);
const updates = { const updates = {
softLimit: subscription.soft_limit_usd, softLimit: subscription.soft_limit_usd,
hardLimit: subscription.hard_limit_usd, hardLimit: subscription.hard_limit_usd,
systemHardLimit: subscription.system_hard_limit_usd, systemHardLimit: subscription.system_hard_limit_usd,
usage,
}; };
this.updateKey(key.hash, updates); this.updateKey(key.hash, updates);
} }
this.log.info( this.log.info(
{ key: key.hash, usage: key.usage, hardLimit: key.hardLimit }, { key: key.hash, hardLimit: key.hardLimit },
"Key check complete." "Key check complete."
); );
} catch (error) { } catch (error) {
@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
private async getProvisionedModels( private async getProvisionedModels(
key: OpenAIKey key: OpenAIKey
): Promise<{ turbo: boolean; gpt4: boolean }> { ): Promise<{ turbo: boolean; gpt4: boolean }> {
const openai = new OpenAIApi(new Configuration({ apiKey: key.key })); const opts = { headers: { Authorization: `Bearer ${key.key}` } };
const models = (await openai.listModels()!).data.data; const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
const models = data.data;
const turbo = models.some(({ id }) => id.startsWith("gpt-3.5")); const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
const gpt4 = models.some(({ id }) => id.startsWith("gpt-4")); const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
// We want to update the key's `isGpt4` flag here, but we don't want to
// update its `lastChecked` timestamp because we need to let the liveness
// check run before we can consider the key checked.
// Need to use `find` here because keys are cloned from the pool.
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
this.updateKey(key.hash, {
isGpt4: gpt4,
lastChecked: keyFromPool.lastChecked,
});
return { turbo, gpt4 }; return { turbo, gpt4 };
} }
@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
return data; return data;
} }
private async getUsage(key: OpenAIKey) {
const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
const url = `${GET_USAGE_URL}?${querystring}`;
const { data } = await axios.get<GetUsageResponse>(url, {
headers: { Authorization: `Bearer ${key.key}` },
});
return parseFloat((data.total_usage / 100).toFixed(2));
}
private handleAxiosError(key: OpenAIKey, error: AxiosError) { private handleAxiosError(key: OpenAIKey, error: AxiosError) {
if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) { if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
const { status, data } = error.response; const { status, data } = error.response;
if (status === 401) { if (status === 401) {
this.log.warn( this.log.warn(
{ key: key.hash, error: data }, { key: key.hash, error: data },
"Key is invalid or revoked. Disabling key." "Key is invalid or revoked. Disabling key."
); );
this.updateKey(key.hash, { isDisabled: true }); this.updateKey(key.hash, {
} else if (status === 429 && data.error.type === "insufficient_quota") { isDisabled: true,
isRevoked: true,
isGpt4: false,
});
} else if (status === 429) {
switch (data.error.type) {
case "insufficient_quota":
case "access_terminated":
case "billing_not_active":
const isOverQuota = data.error.type === "insufficient_quota";
const isRevoked = !isOverQuota;
const isGpt4 = isRevoked ? false : key.isGpt4;
this.log.warn( this.log.warn(
{ key: key.hash, isTrial: key.isTrial, error: data }, { key: key.hash, rateLimitType: data.error.type, error: data },
"Key is out of quota. Disabling key." "Key returned a non-transient 429 error. Disabling key."
); );
this.updateKey(key.hash, { isDisabled: true }); this.updateKey(key.hash, {
isDisabled: true,
isRevoked,
isOverQuota,
isGpt4,
});
break;
case "requests":
// Trial keys have extremely low requests-per-minute limits and we
// can often hit them just while checking the key, so we need to
// retry the check later to know if the key has quota remaining.
this.log.warn(
{ key: key.hash, error: data },
"Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
);
// To trigger a shorter than usual delay before the next check, we
// will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
// This will cause the usual key check scheduling logic to schedule
// the next check in 15 seconds. This also prevents the key from
// holding up startup checks for other keys.
const fifteenSeconds = 15 * 1000;
const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
this.updateKey(key.hash, { lastChecked: next });
break;
case "tokens":
// Hitting a token rate limit, even on a trial key, actually implies
// that the key is valid and can generate completions, so we will
// treat this as effectively a successful `testLiveness` call.
this.log.info(
{ key: key.hash },
"Key is currently `tokens` rate limited; assuming it is operational."
);
this.updateKey(key.hash, { lastChecked: Date.now() });
break;
default:
this.log.error(
{ key: key.hash, rateLimitType: data.error.type, error: data },
"Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
);
// We don't know what this error means, so we just let the key
// through and maybe it will fail when someone tries to use it.
this.updateKey(key.hash, { lastChecked: Date.now() });
} }
else if (status === 429 && data.error.type === "access_terminated") {
this.log.warn(
{ key: key.hash, isTrial: key.isTrial, error: data },
"Key has been terminated due to policy violations. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true });
} else { } else {
this.log.error( this.log.error(
{ key: key.hash, status, error: data }, { key: key.hash, status, error: data },
"Encountered API error while checking key." "Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
); );
this.updateKey(key.hash, { lastChecked: Date.now() });
} }
return; return;
} }
this.log.error( this.log.error(
{ key: key.hash, error }, { key: key.hash, error: error.message },
"Network error while checking key; trying again later." "Network error while checking key; trying this key again in a minute."
); );
const oneMinute = 60 * 1000;
const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
this.updateKey(key.hash, { lastChecked: next });
} }
/** /**
* Trial key usage reporting is inaccurate, so we need to run an actual * Tests whether the key is valid and has quota remaining. The request we send
* completion to test them for liveness. * is actually not valid, but keys which are revoked or out of quota will fail
* with a 401 or 429 error instead of the expected 400 Bad Request error.
* This lets us avoid test keys without spending any quota.
*/ */
private async assertCanGenerate(key: OpenAIKey): Promise<void> { private async testLiveness(key: OpenAIKey): Promise<void> {
const openai = new OpenAIApi(new Configuration({ apiKey: key.key })); const payload = {
// This will throw an AxiosError if the key is invalid or out of quota.
await openai.createChatCompletion({
model: "gpt-3.5-turbo", model: "gpt-3.5-turbo",
messages: [{ role: "user", content: "Hello" }], max_tokens: -1,
max_tokens: 1, messages: [{ role: "user", content: "" }],
}); };
const { data } = await axios.post<OpenAIError>(
POST_CHAT_COMPLETIONS_URL,
payload,
{
headers: { Authorization: `Bearer ${key.key}` },
validateStatus: (status) => status === 400,
}
);
if (data.error.type === "invalid_request_error") {
// This is the expected error type for our bad prompt, so key is valid.
return;
} else {
this.log.warn(
{ key: key.hash, error: data },
"Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
);
}
} }
static getUsageQuerystring(isTrial: boolean) { static errorIsOpenAIError(
// For paid keys, the limit resets every month, so we can use the first day
// of the current month.
// For trial keys, the limit does not reset and we don't know when the key
// was created, so we use 99 days ago because that's as far back as the API
// will let us go.
// End date needs to be set to the beginning of the next day so that we get
// usage for the current day.
const today = new Date();
const startDate = isTrial
? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
: new Date(today.getFullYear(), today.getMonth(), 1);
const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
endDate.toISOString().split("T")[0]
}`;
}
static errorIsOpenAiError(
error: AxiosError error: AxiosError
): error is AxiosError<OpenAIError> { ): error is AxiosError<OpenAIError> {
const data = error.response?.data as any; const data = error.response?.data as any;

View File

@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
export interface OpenAIKey extends Key { export interface OpenAIKey extends Key {
readonly service: "openai"; readonly service: "openai";
/** The current usage of this key. */ /** Set when key check returns a 401. */
usage: number; isRevoked: boolean;
/** Set when key check returns a non-transient 429. */
isOverQuota: boolean;
/** Threshold at which a warning email will be sent by OpenAI. */ /** Threshold at which a warning email will be sent by OpenAI. */
softLimit: number; softLimit: number;
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */ /** Threshold at which the key will be disabled because it has reached the user-defined limit. */
@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {
export type OpenAIKeyUpdate = Omit< export type OpenAIKeyUpdate = Omit<
Partial<OpenAIKey>, Partial<OpenAIKey>,
"key" | "hash" | "lastUsed" | "lastChecked" | "promptCount" "key" | "hash" | "promptCount"
>; >;
export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> { export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
isGpt4: true, isGpt4: true,
isTrial: false, isTrial: false,
isDisabled: false, isDisabled: false,
isRevoked: false,
isOverQuota: false,
softLimit: 0, softLimit: 0,
hardLimit: 0, hardLimit: 0,
systemHardLimit: 0, systemHardLimit: 0,
@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
/** Called by the key checker to update key information. */ /** Called by the key checker to update key information. */
public update(keyHash: string, update: OpenAIKeyUpdate) { public update(keyHash: string, update: OpenAIKeyUpdate) {
const keyFromPool = this.keys.find((k) => k.hash === keyHash)!; const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
Object.assign(keyFromPool, { ...update, lastChecked: Date.now() }); Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
// this.writeKeyStatus(); // this.writeKeyStatus();
} }
@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
const keyFromPool = this.keys.find((k) => k.key === key.key); const keyFromPool = this.keys.find((k) => k.key === key.key);
if (!keyFromPool || keyFromPool.isDisabled) return; if (!keyFromPool || keyFromPool.isDisabled) return;
keyFromPool.isDisabled = true; keyFromPool.isDisabled = true;
// If it's disabled just set the usage to the hard limit so it doesn't
// mess with the aggregate usage.
keyFromPool.usage = keyFromPool.hardLimit;
this.log.warn({ key: key.hash }, "Key disabled"); this.log.warn({ key: key.hash }, "Key disabled");
} }
@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
} }
/** /**
* Returns the remaining aggregate quota for all keys as a percentage. * Returns the total quota limit of all keys in USD. Keys which are disabled
* Can go slightly negative because keys will typically go slightly over their * are not included in the total.
* limit before being disabled. Can sometimes go *really* negative if the */
* cron job OpenAI uses to disable keys fails, as the key will essentially public activeLimitInUsd(
* have unlimited quota. { gpt4 }: { gpt4: boolean } = { gpt4: false }
**/ ): string {
public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number { const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
if (keys.length === 0) return 0;
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0); const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
return `$${totalLimit.toFixed(2)}`;
return 1 - totalUsage / totalLimit;
}
/** Returns used and available usage in USD. */
public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
if (keys.length === 0) return "???";
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
} }
/** Writes key status to disk. */ /** Writes key status to disk. */

View File

@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
} }
} else if (statusCode === 401) { } else if (statusCode === 401) {
// Key is invalid or was revoked // Key is invalid or was revoked
keyPool.disable(req.key!); keyPool.disable(req.key!, "revoked");
errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`; errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
} else if (statusCode === 429) { } else if (statusCode === 429) {
// OpenAI uses this for a bunch of different rate-limiting scenarios. // OpenAI uses this for a bunch of different rate-limiting scenarios.
@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
const type = errorPayload.error?.type; const type = errorPayload.error?.type;
if (type === "insufficient_quota") { if (type === "insufficient_quota") {
// Billing quota exceeded (key is dead, disable it) // Billing quota exceeded (key is dead, disable it)
keyPool.disable(req.key!); keyPool.disable(req.key!, "quota");
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`; errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
} else if (type === "access_terminated") { } else if (type === "access_terminated") {
// Account banned (key is dead, disable it) // Account banned (key is dead, disable it)
keyPool.disable(req.key!); keyPool.disable(req.key!, "revoked");
errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`; errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
} else if (type === "billing_not_active") { } else if (type === "billing_not_active") {
// Billing is not active (key is dead, disable it) // Billing is not active (key is dead, disable it)
keyPool.disable(req.key!); keyPool.disable(req.key!, "revoked");
errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`; errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
} else if (type === "requests" || type === "tokens") { } else if (type === "requests" || type === "tokens") {
// Per-minute request or token rate limit is exceeded, which we can retry // Per-minute request or token rate limit is exceeded, which we can retry