From aa5380d2ef9d2a2034fd3eff747e877933a4e5c7 Mon Sep 17 00:00:00 2001 From: khanon Date: Fri, 21 Jul 2023 04:00:12 +0000 Subject: [PATCH] Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29) --- package-lock.json | 18 -- package.json | 1 - src/config.ts | 8 +- src/info-page.ts | 46 +++-- src/key-management/anthropic/provider.ts | 11 +- src/key-management/index.ts | 3 +- src/key-management/key-pool.ts | 19 +- src/key-management/openai/checker.ts | 228 ++++++++++++++--------- src/key-management/openai/provider.ts | 47 ++--- src/proxy/middleware/response/index.ts | 8 +- 10 files changed, 199 insertions(+), 190 deletions(-) diff --git a/package-lock.json b/package-lock.json index 78f88d9..93101b1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -16,7 +16,6 @@ "firebase-admin": "^11.10.1", "googleapis": "^122.0.0", "http-proxy-middleware": "^3.0.0-beta.1", - "openai": "^3.2.1", "pino": "^8.11.0", "pino-http": "^8.3.3", "showdown": "^2.1.0", @@ -3163,23 +3162,6 @@ "wrappy": "1" } }, - "node_modules/openai": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz", - "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==", - "dependencies": { - "axios": "^0.26.0", - "form-data": "^4.0.0" - } - }, - "node_modules/openai/node_modules/axios": { - "version": "0.26.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", - "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", - "dependencies": { - "follow-redirects": "^1.14.8" - } - }, "node_modules/p-limit": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", diff --git a/package.json b/package.json index a30e876..c866f8a 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,6 @@ "firebase-admin": "^11.10.1", "googleapis": "^122.0.0", "http-proxy-middleware": "^3.0.0-beta.1", - "openai": "^3.2.1", "pino": "^8.11.0", "pino-http": "^8.3.3", "showdown": "^2.1.0", diff --git a/src/config.ts b/src/config.ts index 9fa76a2..8d0ab06 100644 --- a/src/config.ts +++ b/src/config.ts @@ -88,11 +88,11 @@ type Config = { * * `none`: Hide quota information * - * `partial`: Display quota information only as a percentage + * `partial`: (deprecated) Same as `full` because usage is no longer tracked * - * `full`: Display quota information as usage against total capacity + * `full`: Displays information about keys' quota limits */ - quotaDisplayMode: "none" | "partial" | "full"; + quotaDisplayMode: "none" | "full"; /** * Which request queueing strategy to use when keys are over their rate limit. * @@ -152,7 +152,7 @@ export const config: Config = { ), logLevel: getEnvWithDefault("LOG_LEVEL", "info"), checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev), - quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"), + quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"), promptLogging: getEnvWithDefault("PROMPT_LOGGING", false), promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined), googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined), diff --git a/src/info-page.ts b/src/info-page.ts index 3c79729..51cdaa0 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -2,7 +2,7 @@ import fs from "fs"; import { Request, Response } from "express"; import showdown from "showdown"; import { config, listConfig } from "./config"; -import { keyPool } from "./key-management"; +import { OpenAIKey, keyPool } from "./key-management"; import { getUniqueIps } from "./proxy/rate-limit"; import { QueuePartition, @@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) { type ServiceInfo = { activeKeys: number; trialKeys?: number; - quota: string; + activeLimit: string; + revokedKeys?: number; + overQuotaKeys?: number; proomptersInQueue: number; estimatedQueueTime: string; }; @@ -88,51 +90,55 @@ type ServiceInfo = { function getOpenAIInfo() { const info: { [model: string]: Partial } = {}; - const keys = keyPool.list().filter((k) => k.service === "openai"); + const keys = keyPool + .list() + .filter((k) => k.service === "openai") as OpenAIKey[]; const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly; if (keyPool.anyUnchecked()) { const uncheckedKeys = keys.filter((k) => !k.lastChecked); - info.status = `Still checking ${uncheckedKeys.length} keys...` as any; + info.status = + `Performing startup key checks (${uncheckedKeys.length} left).` as any; } else { delete info.status; } if (config.checkKeys) { - const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled); - const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled); + const turboKeys = keys.filter((k) => !k.isGpt4); + const gpt4Keys = keys.filter((k) => k.isGpt4); const quota: Record = { turbo: "", gpt4: "" }; - const turboQuota = keyPool.remainingQuota("openai") * 100; - const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100; + const turboQuota = keyPool.activeLimitInUsd("openai"); + const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true }); - if (config.quotaDisplayMode === "full") { - const turboUsage = keyPool.usageInUsd("openai"); - const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true }); - quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`; - quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`; - } else { - quota.turbo = `${Math.round(turboQuota)}%`; - quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`; + // Don't invert this condition; some proxies may be using the now-deprecated + // 'partial' option which we want to treat as 'full' here. + if (config.quotaDisplayMode !== "none") { + quota.turbo = turboQuota; + quota.gpt4 = gpt4Quota; } info.turbo = { activeKeys: turboKeys.filter((k) => !k.isDisabled).length, trialKeys: turboKeys.filter((k) => k.isTrial).length, - quota: quota.turbo, + activeLimit: quota.turbo, + revokedKeys: turboKeys.filter((k) => k.isRevoked).length, + overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length, }; if (hasGpt4) { info.gpt4 = { activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length, trialKeys: gpt4Keys.filter((k) => k.isTrial).length, - quota: quota.gpt4, + activeLimit: quota.gpt4, + revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length, + overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length, }; } if (config.quotaDisplayMode === "none") { - delete info.turbo?.quota; - delete info.gpt4?.quota; + delete info.turbo?.activeLimit; + delete info.gpt4?.activeLimit; } } else { info.status = "Key checking is disabled." as any; diff --git a/src/key-management/anthropic/provider.ts b/src/key-management/anthropic/provider.ts index 870b485..cba5b4e 100644 --- a/src/key-management/anthropic/provider.ts +++ b/src/key-management/anthropic/provider.ts @@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider { key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT; } - public remainingQuota() { - const activeKeys = this.keys.filter((k) => !k.isDisabled).length; - const allKeys = this.keys.length; - if (activeKeys === 0) return 0; - return Math.round((activeKeys / allKeys) * 100) / 100; - } - - public usageInUsd() { - return "$0.00 / ∞"; + public activeLimitInUsd() { + return "∞"; } } diff --git a/src/key-management/index.ts b/src/key-management/index.ts index 13c73a5..a7b3c73 100644 --- a/src/key-management/index.ts +++ b/src/key-management/index.ts @@ -52,8 +52,7 @@ export interface KeyProvider { anyUnchecked(): boolean; incrementPrompt(hash: string): void; getLockoutPeriod(model: Model): number; - remainingQuota(options?: Record): number; - usageInUsd(options?: Record): string; + activeLimitInUsd(options?: Record): string; markRateLimited(hash: string): void; } diff --git a/src/key-management/key-pool.ts b/src/key-management/key-pool.ts index 1f100a5..8a000eb 100644 --- a/src/key-management/key-pool.ts +++ b/src/key-management/key-pool.ts @@ -32,9 +32,15 @@ export class KeyPool { return this.keyProviders.flatMap((provider) => provider.list()); } - public disable(key: Key): void { + public disable(key: Key, reason: "quota" | "revoked"): void { const service = this.getKeyProvider(key.service); service.disable(key); + if (service instanceof OpenAIKeyProvider) { + service.update(key.hash, { + isRevoked: reason === "revoked", + isOverQuota: reason === "quota", + }); + } } public update(key: Key, props: AllowedPartial): void { @@ -75,18 +81,11 @@ export class KeyPool { } } - public remainingQuota( - service: AIService, - options?: Record - ): number { - return this.getKeyProvider(service).remainingQuota(options); - } - - public usageInUsd( + public activeLimitInUsd( service: AIService, options?: Record ): string { - return this.getKeyProvider(service).usageInUsd(options); + return this.getKeyProvider(service).activeLimitInUsd(options); } private getService(model: Model): AIService { diff --git a/src/key-management/openai/checker.ts b/src/key-management/openai/checker.ts index be29862..10922eb 100644 --- a/src/key-management/openai/checker.ts +++ b/src/key-management/openai/checker.ts @@ -1,14 +1,24 @@ import axios, { AxiosError } from "axios"; -import { Configuration, OpenAIApi } from "openai"; import { logger } from "../../logger"; import type { OpenAIKey, OpenAIKeyProvider } from "./provider"; +/** Minimum time in between any two key checks. */ const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds -const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes +/** + * Minimum time in between checks for a given key. Because we can no longer + * read quota usage, there is little reason to check a single key more often + * than this. + **/ +const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour +const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions"; +const GET_MODELS_URL = "https://api.openai.com/v1/models"; const GET_SUBSCRIPTION_URL = "https://api.openai.com/dashboard/billing/subscription"; -const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage"; + +type GetModelsResponse = { + data: [{ id: string }]; +}; type GetSubscriptionResponse = { plan: { title: string }; @@ -18,10 +28,6 @@ type GetSubscriptionResponse = { system_hard_limit_usd: number; }; -type GetUsageResponse = { - total_usage: number; -}; - type OpenAIError = { error: { type: string; code: string; param: unknown; message: string }; }; @@ -54,7 +60,8 @@ export class OpenAIKeyChecker { /** * Schedules the next check. If there are still keys yet to be checked, it * will schedule a check immediately for the next unchecked key. Otherwise, - * it will schedule a check in several minutes for the oldest key. + * it will schedule a check for the least recently checked key, respecting + * the minimum check interval. **/ private scheduleNextCheck() { const enabledKeys = this.keys.filter((key) => !key.isDisabled); @@ -94,8 +101,8 @@ export class OpenAIKeyChecker { key.lastChecked < oldest.lastChecked ? key : oldest ); - // Don't check any individual key more than once every 5 minutes. - // Also, don't check anything more often than once every 3 seconds. + // Don't check any individual key too often. + // Don't check anything at all at a rate faster than once per 3 seconds. const nextCheck = Math.max( oldestKey.lastChecked + KEY_CHECK_PERIOD, this.lastCheck + MIN_CHECK_INTERVAL @@ -122,47 +129,37 @@ export class OpenAIKeyChecker { this.log.debug({ key: key.hash }, "Checking key..."); let isInitialCheck = !key.lastChecked; try { - // During the initial check we need to get the subscription first because - // trials have different behavior. + // We only need to check for provisioned models on the initial check. if (isInitialCheck) { - const subscription = await this.getSubscription(key); - this.updateKey(key.hash, { isTrial: !subscription.has_payment_method }); - if (key.isTrial) { - this.log.debug( - { key: key.hash }, - "Attempting generation on trial key." - ); - await this.assertCanGenerate(key); - } - const [provisionedModels, usage] = await Promise.all([ - this.getProvisionedModels(key), - this.getUsage(key), - ]); + const [subscription, provisionedModels, _livenessTest] = + await Promise.all([ + this.getSubscription(key), + this.getProvisionedModels(key), + this.testLiveness(key), + ]); const updates = { isGpt4: provisionedModels.gpt4, + isTrial: !subscription.has_payment_method, softLimit: subscription.soft_limit_usd, hardLimit: subscription.hard_limit_usd, systemHardLimit: subscription.system_hard_limit_usd, - usage, }; this.updateKey(key.hash, updates); } else { - // Don't check provisioned models after the initial check because it's - // not likely to change. - const [subscription, usage] = await Promise.all([ + // Provisioned models don't change, so we don't need to check them again + const [subscription, _livenessTest] = await Promise.all([ this.getSubscription(key), - this.getUsage(key), + this.testLiveness(key), ]); const updates = { softLimit: subscription.soft_limit_usd, hardLimit: subscription.hard_limit_usd, systemHardLimit: subscription.system_hard_limit_usd, - usage, }; this.updateKey(key.hash, updates); } this.log.info( - { key: key.hash, usage: key.usage, hardLimit: key.hardLimit }, + { key: key.hash, hardLimit: key.hardLimit }, "Key check complete." ); } catch (error) { @@ -182,10 +179,21 @@ export class OpenAIKeyChecker { private async getProvisionedModels( key: OpenAIKey ): Promise<{ turbo: boolean; gpt4: boolean }> { - const openai = new OpenAIApi(new Configuration({ apiKey: key.key })); - const models = (await openai.listModels()!).data.data; + const opts = { headers: { Authorization: `Bearer ${key.key}` } }; + const { data } = await axios.get(GET_MODELS_URL, opts); + const models = data.data; const turbo = models.some(({ id }) => id.startsWith("gpt-3.5")); const gpt4 = models.some(({ id }) => id.startsWith("gpt-4")); + // We want to update the key's `isGpt4` flag here, but we don't want to + // update its `lastChecked` timestamp because we need to let the liveness + // check run before we can consider the key checked. + + // Need to use `find` here because keys are cloned from the pool. + const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; + this.updateKey(key.hash, { + isGpt4: gpt4, + lastChecked: keyFromPool.lastChecked, + }); return { turbo, gpt4 }; } @@ -197,86 +205,124 @@ export class OpenAIKeyChecker { return data; } - private async getUsage(key: OpenAIKey) { - const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial); - const url = `${GET_USAGE_URL}?${querystring}`; - const { data } = await axios.get(url, { - headers: { Authorization: `Bearer ${key.key}` }, - }); - return parseFloat((data.total_usage / 100).toFixed(2)); - } - private handleAxiosError(key: OpenAIKey, error: AxiosError) { - if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) { + if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) { const { status, data } = error.response; if (status === 401) { this.log.warn( { key: key.hash, error: data }, "Key is invalid or revoked. Disabling key." ); - this.updateKey(key.hash, { isDisabled: true }); - } else if (status === 429 && data.error.type === "insufficient_quota") { - this.log.warn( - { key: key.hash, isTrial: key.isTrial, error: data }, - "Key is out of quota. Disabling key." - ); - this.updateKey(key.hash, { isDisabled: true }); - } - else if (status === 429 && data.error.type === "access_terminated") { - this.log.warn( - { key: key.hash, isTrial: key.isTrial, error: data }, - "Key has been terminated due to policy violations. Disabling key." - ); - this.updateKey(key.hash, { isDisabled: true }); + this.updateKey(key.hash, { + isDisabled: true, + isRevoked: true, + isGpt4: false, + }); + } else if (status === 429) { + switch (data.error.type) { + case "insufficient_quota": + case "access_terminated": + case "billing_not_active": + const isOverQuota = data.error.type === "insufficient_quota"; + const isRevoked = !isOverQuota; + const isGpt4 = isRevoked ? false : key.isGpt4; + this.log.warn( + { key: key.hash, rateLimitType: data.error.type, error: data }, + "Key returned a non-transient 429 error. Disabling key." + ); + this.updateKey(key.hash, { + isDisabled: true, + isRevoked, + isOverQuota, + isGpt4, + }); + break; + case "requests": + // Trial keys have extremely low requests-per-minute limits and we + // can often hit them just while checking the key, so we need to + // retry the check later to know if the key has quota remaining. + this.log.warn( + { key: key.hash, error: data }, + "Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds." + ); + // To trigger a shorter than usual delay before the next check, we + // will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)). + // This will cause the usual key check scheduling logic to schedule + // the next check in 15 seconds. This also prevents the key from + // holding up startup checks for other keys. + const fifteenSeconds = 15 * 1000; + const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds); + this.updateKey(key.hash, { lastChecked: next }); + break; + case "tokens": + // Hitting a token rate limit, even on a trial key, actually implies + // that the key is valid and can generate completions, so we will + // treat this as effectively a successful `testLiveness` call. + this.log.info( + { key: key.hash }, + "Key is currently `tokens` rate limited; assuming it is operational." + ); + this.updateKey(key.hash, { lastChecked: Date.now() }); + break; + default: + this.log.error( + { key: key.hash, rateLimitType: data.error.type, error: data }, + "Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this." + ); + // We don't know what this error means, so we just let the key + // through and maybe it will fail when someone tries to use it. + this.updateKey(key.hash, { lastChecked: Date.now() }); + } } else { this.log.error( { key: key.hash, status, error: data }, - "Encountered API error while checking key." + "Encountered unexpected error status while checking key. This may indicate a change in the API; please report this." ); + this.updateKey(key.hash, { lastChecked: Date.now() }); } return; } this.log.error( - { key: key.hash, error }, - "Network error while checking key; trying again later." + { key: key.hash, error: error.message }, + "Network error while checking key; trying this key again in a minute." ); + const oneMinute = 60 * 1000; + const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute); + this.updateKey(key.hash, { lastChecked: next }); } /** - * Trial key usage reporting is inaccurate, so we need to run an actual - * completion to test them for liveness. + * Tests whether the key is valid and has quota remaining. The request we send + * is actually not valid, but keys which are revoked or out of quota will fail + * with a 401 or 429 error instead of the expected 400 Bad Request error. + * This lets us avoid test keys without spending any quota. */ - private async assertCanGenerate(key: OpenAIKey): Promise { - const openai = new OpenAIApi(new Configuration({ apiKey: key.key })); - // This will throw an AxiosError if the key is invalid or out of quota. - await openai.createChatCompletion({ + private async testLiveness(key: OpenAIKey): Promise { + const payload = { model: "gpt-3.5-turbo", - messages: [{ role: "user", content: "Hello" }], - max_tokens: 1, - }); + max_tokens: -1, + messages: [{ role: "user", content: "" }], + }; + const { data } = await axios.post( + POST_CHAT_COMPLETIONS_URL, + payload, + { + headers: { Authorization: `Bearer ${key.key}` }, + validateStatus: (status) => status === 400, + } + ); + if (data.error.type === "invalid_request_error") { + // This is the expected error type for our bad prompt, so key is valid. + return; + } else { + this.log.warn( + { key: key.hash, error: data }, + "Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API." + ); + } } - static getUsageQuerystring(isTrial: boolean) { - // For paid keys, the limit resets every month, so we can use the first day - // of the current month. - // For trial keys, the limit does not reset and we don't know when the key - // was created, so we use 99 days ago because that's as far back as the API - // will let us go. - - // End date needs to be set to the beginning of the next day so that we get - // usage for the current day. - - const today = new Date(); - const startDate = isTrial - ? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000) - : new Date(today.getFullYear(), today.getMonth(), 1); - const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000); - return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${ - endDate.toISOString().split("T")[0] - }`; - } - - static errorIsOpenAiError( + static errorIsOpenAIError( error: AxiosError ): error is AxiosError { const data = error.response?.data as any; diff --git a/src/key-management/openai/provider.ts b/src/key-management/openai/provider.ts index 712a4cc..082b642 100644 --- a/src/key-management/openai/provider.ts +++ b/src/key-management/openai/provider.ts @@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [ export interface OpenAIKey extends Key { readonly service: "openai"; - /** The current usage of this key. */ - usage: number; + /** Set when key check returns a 401. */ + isRevoked: boolean; + /** Set when key check returns a non-transient 429. */ + isOverQuota: boolean; /** Threshold at which a warning email will be sent by OpenAI. */ softLimit: number; /** Threshold at which the key will be disabled because it has reached the user-defined limit. */ @@ -54,7 +56,7 @@ export interface OpenAIKey extends Key { export type OpenAIKeyUpdate = Omit< Partial, - "key" | "hash" | "lastUsed" | "lastChecked" | "promptCount" + "key" | "hash" | "promptCount" >; export class OpenAIKeyProvider implements KeyProvider { @@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider { isGpt4: true, isTrial: false, isDisabled: false, + isRevoked: false, + isOverQuota: false, softLimit: 0, hardLimit: 0, systemHardLimit: 0, @@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider { /** Called by the key checker to update key information. */ public update(keyHash: string, update: OpenAIKeyUpdate) { const keyFromPool = this.keys.find((k) => k.hash === keyHash)!; - Object.assign(keyFromPool, { ...update, lastChecked: Date.now() }); + Object.assign(keyFromPool, { lastChecked: Date.now(), ...update }); // this.writeKeyStatus(); } @@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider { const keyFromPool = this.keys.find((k) => k.key === key.key); if (!keyFromPool || keyFromPool.isDisabled) return; keyFromPool.isDisabled = true; - // If it's disabled just set the usage to the hard limit so it doesn't - // mess with the aggregate usage. - keyFromPool.usage = keyFromPool.hardLimit; this.log.warn({ key: key.hash }, "Key disabled"); } @@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider { } /** - * Returns the remaining aggregate quota for all keys as a percentage. - * Can go slightly negative because keys will typically go slightly over their - * limit before being disabled. Can sometimes go *really* negative if the - * cron job OpenAI uses to disable keys fails, as the key will essentially - * have unlimited quota. - **/ - public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number { - const keys = this.keys.filter((k) => k.isGpt4 === gpt4); - if (keys.length === 0) return 0; - - const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0); + * Returns the total quota limit of all keys in USD. Keys which are disabled + * are not included in the total. + */ + public activeLimitInUsd( + { gpt4 }: { gpt4: boolean } = { gpt4: false } + ): string { + const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4); const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0); - - return 1 - totalUsage / totalLimit; - } - - /** Returns used and available usage in USD. */ - public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string { - const keys = this.keys.filter((k) => k.isGpt4 === gpt4); - if (keys.length === 0) return "???"; - - const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0); - const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0); - - return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`; + return `$${totalLimit.toFixed(2)}`; } /** Writes key status to disk. */ diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index 4f02a3c..6c9ca7f 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( } } else if (statusCode === 401) { // Key is invalid or was revoked - keyPool.disable(req.key!); + keyPool.disable(req.key!, "revoked"); errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`; } else if (statusCode === 429) { // OpenAI uses this for a bunch of different rate-limiting scenarios. @@ -375,15 +375,15 @@ function handleOpenAIRateLimitError( const type = errorPayload.error?.type; if (type === "insufficient_quota") { // Billing quota exceeded (key is dead, disable it) - keyPool.disable(req.key!); + keyPool.disable(req.key!, "quota"); errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`; } else if (type === "access_terminated") { // Account banned (key is dead, disable it) - keyPool.disable(req.key!); + keyPool.disable(req.key!, "revoked"); errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`; } else if (type === "billing_not_active") { // Billing is not active (key is dead, disable it) - keyPool.disable(req.key!); + keyPool.disable(req.key!, "revoked"); errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`; } else if (type === "requests" || type === "tokens") { // Per-minute request or token rate limit is exceeded, which we can retry