Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29)

This commit is contained in:
khanon 2023-07-21 04:00:12 +00:00
parent cbf9f16108
commit aa5380d2ef
10 changed files with 199 additions and 190 deletions

18
package-lock.json generated
View File

@ -16,7 +16,6 @@
"firebase-admin": "^11.10.1",
"googleapis": "^122.0.0",
"http-proxy-middleware": "^3.0.0-beta.1",
"openai": "^3.2.1",
"pino": "^8.11.0",
"pino-http": "^8.3.3",
"showdown": "^2.1.0",
@ -3163,23 +3162,6 @@
"wrappy": "1"
}
},
"node_modules/openai": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
"dependencies": {
"axios": "^0.26.0",
"form-data": "^4.0.0"
}
},
"node_modules/openai/node_modules/axios": {
"version": "0.26.1",
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
"dependencies": {
"follow-redirects": "^1.14.8"
}
},
"node_modules/p-limit": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",

View File

@ -25,7 +25,6 @@
"firebase-admin": "^11.10.1",
"googleapis": "^122.0.0",
"http-proxy-middleware": "^3.0.0-beta.1",
"openai": "^3.2.1",
"pino": "^8.11.0",
"pino-http": "^8.3.3",
"showdown": "^2.1.0",

View File

@ -88,11 +88,11 @@ type Config = {
*
* `none`: Hide quota information
*
* `partial`: Display quota information only as a percentage
* `partial`: (deprecated) Same as `full` because usage is no longer tracked
*
* `full`: Display quota information as usage against total capacity
* `full`: Displays information about keys' quota limits
*/
quotaDisplayMode: "none" | "partial" | "full";
quotaDisplayMode: "none" | "full";
/**
* Which request queueing strategy to use when keys are over their rate limit.
*
@ -152,7 +152,7 @@ export const config: Config = {
),
logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),

View File

@ -2,7 +2,7 @@ import fs from "fs";
import { Request, Response } from "express";
import showdown from "showdown";
import { config, listConfig } from "./config";
import { keyPool } from "./key-management";
import { OpenAIKey, keyPool } from "./key-management";
import { getUniqueIps } from "./proxy/rate-limit";
import {
QueuePartition,
@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
type ServiceInfo = {
activeKeys: number;
trialKeys?: number;
quota: string;
activeLimit: string;
revokedKeys?: number;
overQuotaKeys?: number;
proomptersInQueue: number;
estimatedQueueTime: string;
};
@ -88,51 +90,55 @@ type ServiceInfo = {
function getOpenAIInfo() {
const info: { [model: string]: Partial<ServiceInfo> } = {};
const keys = keyPool.list().filter((k) => k.service === "openai");
const keys = keyPool
.list()
.filter((k) => k.service === "openai") as OpenAIKey[];
const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
if (keyPool.anyUnchecked()) {
const uncheckedKeys = keys.filter((k) => !k.lastChecked);
info.status = `Still checking ${uncheckedKeys.length} keys...` as any;
info.status =
`Performing startup key checks (${uncheckedKeys.length} left).` as any;
} else {
delete info.status;
}
if (config.checkKeys) {
const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled);
const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled);
const turboKeys = keys.filter((k) => !k.isGpt4);
const gpt4Keys = keys.filter((k) => k.isGpt4);
const quota: Record<string, string> = { turbo: "", gpt4: "" };
const turboQuota = keyPool.remainingQuota("openai") * 100;
const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100;
const turboQuota = keyPool.activeLimitInUsd("openai");
const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });
if (config.quotaDisplayMode === "full") {
const turboUsage = keyPool.usageInUsd("openai");
const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true });
quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`;
quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`;
} else {
quota.turbo = `${Math.round(turboQuota)}%`;
quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
// Don't invert this condition; some proxies may be using the now-deprecated
// 'partial' option which we want to treat as 'full' here.
if (config.quotaDisplayMode !== "none") {
quota.turbo = turboQuota;
quota.gpt4 = gpt4Quota;
}
info.turbo = {
activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
trialKeys: turboKeys.filter((k) => k.isTrial).length,
quota: quota.turbo,
activeLimit: quota.turbo,
revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
};
if (hasGpt4) {
info.gpt4 = {
activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
quota: quota.gpt4,
activeLimit: quota.gpt4,
revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
};
}
if (config.quotaDisplayMode === "none") {
delete info.turbo?.quota;
delete info.gpt4?.quota;
delete info.turbo?.activeLimit;
delete info.gpt4?.activeLimit;
}
} else {
info.status = "Key checking is disabled." as any;

View File

@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
}
public remainingQuota() {
const activeKeys = this.keys.filter((k) => !k.isDisabled).length;
const allKeys = this.keys.length;
if (activeKeys === 0) return 0;
return Math.round((activeKeys / allKeys) * 100) / 100;
}
public usageInUsd() {
return "$0.00 / ∞";
public activeLimitInUsd() {
return "∞";
}
}

View File

@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
anyUnchecked(): boolean;
incrementPrompt(hash: string): void;
getLockoutPeriod(model: Model): number;
remainingQuota(options?: Record<string, unknown>): number;
usageInUsd(options?: Record<string, unknown>): string;
activeLimitInUsd(options?: Record<string, unknown>): string;
markRateLimited(hash: string): void;
}

View File

@ -32,9 +32,15 @@ export class KeyPool {
return this.keyProviders.flatMap((provider) => provider.list());
}
public disable(key: Key): void {
public disable(key: Key, reason: "quota" | "revoked"): void {
const service = this.getKeyProvider(key.service);
service.disable(key);
if (service instanceof OpenAIKeyProvider) {
service.update(key.hash, {
isRevoked: reason === "revoked",
isOverQuota: reason === "quota",
});
}
}
public update(key: Key, props: AllowedPartial): void {
@ -75,18 +81,11 @@ export class KeyPool {
}
}
public remainingQuota(
service: AIService,
options?: Record<string, unknown>
): number {
return this.getKeyProvider(service).remainingQuota(options);
}
public usageInUsd(
public activeLimitInUsd(
service: AIService,
options?: Record<string, unknown>
): string {
return this.getKeyProvider(service).usageInUsd(options);
return this.getKeyProvider(service).activeLimitInUsd(options);
}
private getService(model: Model): AIService {

View File

@ -1,14 +1,24 @@
import axios, { AxiosError } from "axios";
import { Configuration, OpenAIApi } from "openai";
import { logger } from "../../logger";
import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
/** Minimum time in between any two key checks. */
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes
/**
* Minimum time in between checks for a given key. Because we can no longer
* read quota usage, there is little reason to check a single key more often
* than this.
**/
const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
const GET_MODELS_URL = "https://api.openai.com/v1/models";
const GET_SUBSCRIPTION_URL =
"https://api.openai.com/dashboard/billing/subscription";
const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
type GetModelsResponse = {
data: [{ id: string }];
};
type GetSubscriptionResponse = {
plan: { title: string };
@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
system_hard_limit_usd: number;
};
type GetUsageResponse = {
total_usage: number;
};
type OpenAIError = {
error: { type: string; code: string; param: unknown; message: string };
};
@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
/**
* Schedules the next check. If there are still keys yet to be checked, it
* will schedule a check immediately for the next unchecked key. Otherwise,
* it will schedule a check in several minutes for the oldest key.
* it will schedule a check for the least recently checked key, respecting
* the minimum check interval.
**/
private scheduleNextCheck() {
const enabledKeys = this.keys.filter((key) => !key.isDisabled);
@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
key.lastChecked < oldest.lastChecked ? key : oldest
);
// Don't check any individual key more than once every 5 minutes.
// Also, don't check anything more often than once every 3 seconds.
// Don't check any individual key too often.
// Don't check anything at all at a rate faster than once per 3 seconds.
const nextCheck = Math.max(
oldestKey.lastChecked + KEY_CHECK_PERIOD,
this.lastCheck + MIN_CHECK_INTERVAL
@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
this.log.debug({ key: key.hash }, "Checking key...");
let isInitialCheck = !key.lastChecked;
try {
// During the initial check we need to get the subscription first because
// trials have different behavior.
// We only need to check for provisioned models on the initial check.
if (isInitialCheck) {
const subscription = await this.getSubscription(key);
this.updateKey(key.hash, { isTrial: !subscription.has_payment_method });
if (key.isTrial) {
this.log.debug(
{ key: key.hash },
"Attempting generation on trial key."
);
await this.assertCanGenerate(key);
}
const [provisionedModels, usage] = await Promise.all([
this.getProvisionedModels(key),
this.getUsage(key),
]);
const [subscription, provisionedModels, _livenessTest] =
await Promise.all([
this.getSubscription(key),
this.getProvisionedModels(key),
this.testLiveness(key),
]);
const updates = {
isGpt4: provisionedModels.gpt4,
isTrial: !subscription.has_payment_method,
softLimit: subscription.soft_limit_usd,
hardLimit: subscription.hard_limit_usd,
systemHardLimit: subscription.system_hard_limit_usd,
usage,
};
this.updateKey(key.hash, updates);
} else {
// Don't check provisioned models after the initial check because it's
// not likely to change.
const [subscription, usage] = await Promise.all([
// Provisioned models don't change, so we don't need to check them again
const [subscription, _livenessTest] = await Promise.all([
this.getSubscription(key),
this.getUsage(key),
this.testLiveness(key),
]);
const updates = {
softLimit: subscription.soft_limit_usd,
hardLimit: subscription.hard_limit_usd,
systemHardLimit: subscription.system_hard_limit_usd,
usage,
};
this.updateKey(key.hash, updates);
}
this.log.info(
{ key: key.hash, usage: key.usage, hardLimit: key.hardLimit },
{ key: key.hash, hardLimit: key.hardLimit },
"Key check complete."
);
} catch (error) {
@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
private async getProvisionedModels(
key: OpenAIKey
): Promise<{ turbo: boolean; gpt4: boolean }> {
const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
const models = (await openai.listModels()!).data.data;
const opts = { headers: { Authorization: `Bearer ${key.key}` } };
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
const models = data.data;
const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
// We want to update the key's `isGpt4` flag here, but we don't want to
// update its `lastChecked` timestamp because we need to let the liveness
// check run before we can consider the key checked.
// Need to use `find` here because keys are cloned from the pool.
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
this.updateKey(key.hash, {
isGpt4: gpt4,
lastChecked: keyFromPool.lastChecked,
});
return { turbo, gpt4 };
}
@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
return data;
}
private async getUsage(key: OpenAIKey) {
const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
const url = `${GET_USAGE_URL}?${querystring}`;
const { data } = await axios.get<GetUsageResponse>(url, {
headers: { Authorization: `Bearer ${key.key}` },
});
return parseFloat((data.total_usage / 100).toFixed(2));
}
private handleAxiosError(key: OpenAIKey, error: AxiosError) {
if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) {
if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
const { status, data } = error.response;
if (status === 401) {
this.log.warn(
{ key: key.hash, error: data },
"Key is invalid or revoked. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true });
} else if (status === 429 && data.error.type === "insufficient_quota") {
this.log.warn(
{ key: key.hash, isTrial: key.isTrial, error: data },
"Key is out of quota. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true });
}
else if (status === 429 && data.error.type === "access_terminated") {
this.log.warn(
{ key: key.hash, isTrial: key.isTrial, error: data },
"Key has been terminated due to policy violations. Disabling key."
);
this.updateKey(key.hash, { isDisabled: true });
this.updateKey(key.hash, {
isDisabled: true,
isRevoked: true,
isGpt4: false,
});
} else if (status === 429) {
switch (data.error.type) {
case "insufficient_quota":
case "access_terminated":
case "billing_not_active":
const isOverQuota = data.error.type === "insufficient_quota";
const isRevoked = !isOverQuota;
const isGpt4 = isRevoked ? false : key.isGpt4;
this.log.warn(
{ key: key.hash, rateLimitType: data.error.type, error: data },
"Key returned a non-transient 429 error. Disabling key."
);
this.updateKey(key.hash, {
isDisabled: true,
isRevoked,
isOverQuota,
isGpt4,
});
break;
case "requests":
// Trial keys have extremely low requests-per-minute limits and we
// can often hit them just while checking the key, so we need to
// retry the check later to know if the key has quota remaining.
this.log.warn(
{ key: key.hash, error: data },
"Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
);
// To trigger a shorter than usual delay before the next check, we
// will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
// This will cause the usual key check scheduling logic to schedule
// the next check in 15 seconds. This also prevents the key from
// holding up startup checks for other keys.
const fifteenSeconds = 15 * 1000;
const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
this.updateKey(key.hash, { lastChecked: next });
break;
case "tokens":
// Hitting a token rate limit, even on a trial key, actually implies
// that the key is valid and can generate completions, so we will
// treat this as effectively a successful `testLiveness` call.
this.log.info(
{ key: key.hash },
"Key is currently `tokens` rate limited; assuming it is operational."
);
this.updateKey(key.hash, { lastChecked: Date.now() });
break;
default:
this.log.error(
{ key: key.hash, rateLimitType: data.error.type, error: data },
"Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
);
// We don't know what this error means, so we just let the key
// through and maybe it will fail when someone tries to use it.
this.updateKey(key.hash, { lastChecked: Date.now() });
}
} else {
this.log.error(
{ key: key.hash, status, error: data },
"Encountered API error while checking key."
"Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
);
this.updateKey(key.hash, { lastChecked: Date.now() });
}
return;
}
this.log.error(
{ key: key.hash, error },
"Network error while checking key; trying again later."
{ key: key.hash, error: error.message },
"Network error while checking key; trying this key again in a minute."
);
const oneMinute = 60 * 1000;
const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
this.updateKey(key.hash, { lastChecked: next });
}
/**
* Trial key usage reporting is inaccurate, so we need to run an actual
* completion to test them for liveness.
* Tests whether the key is valid and has quota remaining. The request we send
* is actually not valid, but keys which are revoked or out of quota will fail
* with a 401 or 429 error instead of the expected 400 Bad Request error.
* This lets us avoid test keys without spending any quota.
*/
private async assertCanGenerate(key: OpenAIKey): Promise<void> {
const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
// This will throw an AxiosError if the key is invalid or out of quota.
await openai.createChatCompletion({
private async testLiveness(key: OpenAIKey): Promise<void> {
const payload = {
model: "gpt-3.5-turbo",
messages: [{ role: "user", content: "Hello" }],
max_tokens: 1,
});
max_tokens: -1,
messages: [{ role: "user", content: "" }],
};
const { data } = await axios.post<OpenAIError>(
POST_CHAT_COMPLETIONS_URL,
payload,
{
headers: { Authorization: `Bearer ${key.key}` },
validateStatus: (status) => status === 400,
}
);
if (data.error.type === "invalid_request_error") {
// This is the expected error type for our bad prompt, so key is valid.
return;
} else {
this.log.warn(
{ key: key.hash, error: data },
"Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
);
}
}
static getUsageQuerystring(isTrial: boolean) {
// For paid keys, the limit resets every month, so we can use the first day
// of the current month.
// For trial keys, the limit does not reset and we don't know when the key
// was created, so we use 99 days ago because that's as far back as the API
// will let us go.
// End date needs to be set to the beginning of the next day so that we get
// usage for the current day.
const today = new Date();
const startDate = isTrial
? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
: new Date(today.getFullYear(), today.getMonth(), 1);
const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
endDate.toISOString().split("T")[0]
}`;
}
static errorIsOpenAiError(
static errorIsOpenAIError(
error: AxiosError
): error is AxiosError<OpenAIError> {
const data = error.response?.data as any;

View File

@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
export interface OpenAIKey extends Key {
readonly service: "openai";
/** The current usage of this key. */
usage: number;
/** Set when key check returns a 401. */
isRevoked: boolean;
/** Set when key check returns a non-transient 429. */
isOverQuota: boolean;
/** Threshold at which a warning email will be sent by OpenAI. */
softLimit: number;
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */
@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {
export type OpenAIKeyUpdate = Omit<
Partial<OpenAIKey>,
"key" | "hash" | "lastUsed" | "lastChecked" | "promptCount"
"key" | "hash" | "promptCount"
>;
export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
isGpt4: true,
isTrial: false,
isDisabled: false,
isRevoked: false,
isOverQuota: false,
softLimit: 0,
hardLimit: 0,
systemHardLimit: 0,
@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
/** Called by the key checker to update key information. */
public update(keyHash: string, update: OpenAIKeyUpdate) {
const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
// this.writeKeyStatus();
}
@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
const keyFromPool = this.keys.find((k) => k.key === key.key);
if (!keyFromPool || keyFromPool.isDisabled) return;
keyFromPool.isDisabled = true;
// If it's disabled just set the usage to the hard limit so it doesn't
// mess with the aggregate usage.
keyFromPool.usage = keyFromPool.hardLimit;
this.log.warn({ key: key.hash }, "Key disabled");
}
@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
}
/**
* Returns the remaining aggregate quota for all keys as a percentage.
* Can go slightly negative because keys will typically go slightly over their
* limit before being disabled. Can sometimes go *really* negative if the
* cron job OpenAI uses to disable keys fails, as the key will essentially
* have unlimited quota.
**/
public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number {
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
if (keys.length === 0) return 0;
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
* Returns the total quota limit of all keys in USD. Keys which are disabled
* are not included in the total.
*/
public activeLimitInUsd(
{ gpt4 }: { gpt4: boolean } = { gpt4: false }
): string {
const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
return 1 - totalUsage / totalLimit;
}
/** Returns used and available usage in USD. */
public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
if (keys.length === 0) return "???";
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
return `$${totalLimit.toFixed(2)}`;
}
/** Writes key status to disk. */

View File

@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
}
} else if (statusCode === 401) {
// Key is invalid or was revoked
keyPool.disable(req.key!);
keyPool.disable(req.key!, "revoked");
errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
} else if (statusCode === 429) {
// OpenAI uses this for a bunch of different rate-limiting scenarios.
@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
const type = errorPayload.error?.type;
if (type === "insufficient_quota") {
// Billing quota exceeded (key is dead, disable it)
keyPool.disable(req.key!);
keyPool.disable(req.key!, "quota");
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
} else if (type === "access_terminated") {
// Account banned (key is dead, disable it)
keyPool.disable(req.key!);
keyPool.disable(req.key!, "revoked");
errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
} else if (type === "billing_not_active") {
// Billing is not active (key is dead, disable it)
keyPool.disable(req.key!);
keyPool.disable(req.key!, "revoked");
errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
} else if (type === "requests" || type === "tokens") {
// Per-minute request or token rate limit is exceeded, which we can retry