Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29)

2023-07-21 04:00:12 +00:00 · 2023-07-21 04:00:12 +00:00 · aa5380d2ef
parent cbf9f16108
commit aa5380d2ef
10 changed files with 199 additions and 190 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -16,7 +16,6 @@
        "firebase-admin": "^11.10.1",
        "googleapis": "^122.0.0",
        "http-proxy-middleware": "^3.0.0-beta.1",
        "openai": "^3.2.1",
        "pino": "^8.11.0",
        "pino-http": "^8.3.3",
        "showdown": "^2.1.0",
@ -3163,23 +3162,6 @@
        "wrappy": "1"
      }
    },
    "node_modules/openai": {
      "version": "3.2.1",
      "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
      "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
      "dependencies": {
        "axios": "^0.26.0",
        "form-data": "^4.0.0"
      }
    },
    "node_modules/openai/node_modules/axios": {
      "version": "0.26.1",
      "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
      "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
      "dependencies": {
        "follow-redirects": "^1.14.8"
      }
    },
    "node_modules/p-limit": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
--- a/package.json
+++ b/package.json
@ -25,7 +25,6 @@
    "firebase-admin": "^11.10.1",
    "googleapis": "^122.0.0",
    "http-proxy-middleware": "^3.0.0-beta.1",
    "openai": "^3.2.1",
    "pino": "^8.11.0",
    "pino-http": "^8.3.3",
    "showdown": "^2.1.0",
--- a/src/config.ts
+++ b/src/config.ts
@ -88,11 +88,11 @@ type Config = {
   *
   * `none`: Hide quota information
   *
-   * `partial`: Display quota information only as a percentage
+   * `partial`: (deprecated) Same as `full` because usage is no longer tracked
   *
-   * `full`: Display quota information as usage against total capacity
+   * `full`: Displays information about keys' quota limits
   */
-  quotaDisplayMode: "none" | "partial" | "full";
+  quotaDisplayMode: "none" | "full";
  /**
   * Which request queueing strategy to use when keys are over their rate limit.
   *
@ -152,7 +152,7 @@ export const config: Config = {
  ),
  logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
  checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
-  quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
+  quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
  promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
  promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
  googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
--- a/src/info-page.ts
+++ b/src/info-page.ts
@ -2,7 +2,7 @@ import fs from "fs";
 import { Request, Response } from "express";
 import showdown from "showdown";
 import { config, listConfig } from "./config";
-import { keyPool } from "./key-management";
+import { OpenAIKey, keyPool } from "./key-management";
 import { getUniqueIps } from "./proxy/rate-limit";
 import {
  QueuePartition,
@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
 type ServiceInfo = {
  activeKeys: number;
  trialKeys?: number;
-  quota: string;
+  activeLimit: string;
  revokedKeys?: number;
  overQuotaKeys?: number;
  proomptersInQueue: number;
  estimatedQueueTime: string;
 };
@ -88,51 +90,55 @@ type ServiceInfo = {
 function getOpenAIInfo() {
  const info: { [model: string]: Partial<ServiceInfo> } = {};
-  const keys = keyPool.list().filter((k) => k.service === "openai");
+  const keys = keyPool
    .list()
    .filter((k) => k.service === "openai") as OpenAIKey[];
  const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
  if (keyPool.anyUnchecked()) {
    const uncheckedKeys = keys.filter((k) => !k.lastChecked);
-    info.status = `Still checking ${uncheckedKeys.length} keys...` as any;
+    info.status =
      `Performing startup key checks (${uncheckedKeys.length} left).` as any;
  } else {
    delete info.status;
  }
  if (config.checkKeys) {
-    const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled);
+    const turboKeys = keys.filter((k) => !k.isGpt4);
-    const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled);
+    const gpt4Keys = keys.filter((k) => k.isGpt4);
    const quota: Record<string, string> = { turbo: "", gpt4: "" };
-    const turboQuota = keyPool.remainingQuota("openai") * 100;
+    const turboQuota = keyPool.activeLimitInUsd("openai");
-    const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100;
+    const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });
-    if (config.quotaDisplayMode === "full") {
+    // Don't invert this condition; some proxies may be using the now-deprecated
-      const turboUsage = keyPool.usageInUsd("openai");
+    // 'partial' option which we want to treat as 'full' here.
-      const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true });
+    if (config.quotaDisplayMode !== "none") {
-      quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`;
+      quota.turbo = turboQuota;
-      quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`;
+      quota.gpt4 = gpt4Quota;
    } else {
      quota.turbo = `${Math.round(turboQuota)}%`;
      quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
    }
    info.turbo = {
      activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
      trialKeys: turboKeys.filter((k) => k.isTrial).length,
-      quota: quota.turbo,
+      activeLimit: quota.turbo,
      revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
      overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
    };
    if (hasGpt4) {
      info.gpt4 = {
        activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
        trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
-        quota: quota.gpt4,
+        activeLimit: quota.gpt4,
        revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
        overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
      };
    }
    if (config.quotaDisplayMode === "none") {
-      delete info.turbo?.quota;
+      delete info.turbo?.activeLimit;
-      delete info.gpt4?.quota;
+      delete info.gpt4?.activeLimit;
    }
  } else {
    info.status = "Key checking is disabled." as any;
--- a/src/key-management/anthropic/provider.ts
+++ b/src/key-management/anthropic/provider.ts
@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
  }
-  public remainingQuota() {
+  public activeLimitInUsd() {
-    const activeKeys = this.keys.filter((k) => !k.isDisabled).length;
+    return "∞";
    const allKeys = this.keys.length;
    if (activeKeys === 0) return 0;
    return Math.round((activeKeys / allKeys) * 100) / 100;
  }
  public usageInUsd() {
    return "$0.00 / ∞";
  }
 }
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
  anyUnchecked(): boolean;
  incrementPrompt(hash: string): void;
  getLockoutPeriod(model: Model): number;
-  remainingQuota(options?: Record<string, unknown>): number;
+  activeLimitInUsd(options?: Record<string, unknown>): string;
  usageInUsd(options?: Record<string, unknown>): string;
  markRateLimited(hash: string): void;
 }
--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@ -32,9 +32,15 @@ export class KeyPool {
    return this.keyProviders.flatMap((provider) => provider.list());
  }
-  public disable(key: Key): void {
+  public disable(key: Key, reason: "quota" | "revoked"): void {
    const service = this.getKeyProvider(key.service);
    service.disable(key);
    if (service instanceof OpenAIKeyProvider) {
      service.update(key.hash, {
        isRevoked: reason === "revoked",
        isOverQuota: reason === "quota",
      });
    }
  }
  public update(key: Key, props: AllowedPartial): void {
@ -75,18 +81,11 @@ export class KeyPool {
    }
  }
-  public remainingQuota(
+  public activeLimitInUsd(
    service: AIService,
    options?: Record<string, unknown>
  ): number {
    return this.getKeyProvider(service).remainingQuota(options);
  }
  public usageInUsd(
    service: AIService,
    options?: Record<string, unknown>
  ): string {
-    return this.getKeyProvider(service).usageInUsd(options);
+    return this.getKeyProvider(service).activeLimitInUsd(options);
  }
  private getService(model: Model): AIService {
--- a/src/key-management/openai/checker.ts
+++ b/src/key-management/openai/checker.ts
@ -1,14 +1,24 @@
 import axios, { AxiosError } from "axios";
 import { Configuration, OpenAIApi } from "openai";
 import { logger } from "../../logger";
 import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
 /** Minimum time in between any two key checks. */
 const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
-const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes
+/**
 * Minimum time in between checks for a given key. Because we can no longer
 * read quota usage, there is little reason to check a single key more often
 * than this.
 **/
 const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
 const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
 const GET_MODELS_URL = "https://api.openai.com/v1/models";
 const GET_SUBSCRIPTION_URL =
  "https://api.openai.com/dashboard/billing/subscription";
-const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
+
 type GetModelsResponse = {
  data: [{ id: string }];
 };
 type GetSubscriptionResponse = {
  plan: { title: string };
@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
  system_hard_limit_usd: number;
 };
 type GetUsageResponse = {
  total_usage: number;
 };
 type OpenAIError = {
  error: { type: string; code: string; param: unknown; message: string };
 };
@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
  /**
   * Schedules the next check. If there are still keys yet to be checked, it
   * will schedule a check immediately for the next unchecked key. Otherwise,
-   * it will schedule a check in several minutes for the oldest key.
+   * it will schedule a check for the least recently checked key, respecting
   * the minimum check interval.
   **/
  private scheduleNextCheck() {
    const enabledKeys = this.keys.filter((key) => !key.isDisabled);
@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
      key.lastChecked < oldest.lastChecked ? key : oldest
    );
-    // Don't check any individual key more than once every 5 minutes.
+    // Don't check any individual key too often.
-    // Also, don't check anything more often than once every 3 seconds.
+    // Don't check anything at all at a rate faster than once per 3 seconds.
    const nextCheck = Math.max(
      oldestKey.lastChecked + KEY_CHECK_PERIOD,
      this.lastCheck + MIN_CHECK_INTERVAL
@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
    this.log.debug({ key: key.hash }, "Checking key...");
    let isInitialCheck = !key.lastChecked;
    try {
-      // During the initial check we need to get the subscription first because
+      // We only need to check for provisioned models on the initial check.
      // trials have different behavior.
      if (isInitialCheck) {
-        const subscription = await this.getSubscription(key);
+        const [subscription, provisionedModels, _livenessTest] =
-        this.updateKey(key.hash, { isTrial: !subscription.has_payment_method });
+          await Promise.all([
-        if (key.isTrial) {
+            this.getSubscription(key),
-          this.log.debug(
+            this.getProvisionedModels(key),
-            { key: key.hash },
+            this.testLiveness(key),
-            "Attempting generation on trial key."
+          ]);
          );
          await this.assertCanGenerate(key);
        }
        const [provisionedModels, usage] = await Promise.all([
          this.getProvisionedModels(key),
          this.getUsage(key),
        ]);
        const updates = {
          isGpt4: provisionedModels.gpt4,
          isTrial: !subscription.has_payment_method,
          softLimit: subscription.soft_limit_usd,
          hardLimit: subscription.hard_limit_usd,
          systemHardLimit: subscription.system_hard_limit_usd,
          usage,
        };
        this.updateKey(key.hash, updates);
      } else {
-        // Don't check provisioned models after the initial check because it's
+        // Provisioned models don't change, so we don't need to check them again
-        // not likely to change.
+        const [subscription, _livenessTest] = await Promise.all([
        const [subscription, usage] = await Promise.all([
          this.getSubscription(key),
-          this.getUsage(key),
+          this.testLiveness(key),
        ]);
        const updates = {
          softLimit: subscription.soft_limit_usd,
          hardLimit: subscription.hard_limit_usd,
          systemHardLimit: subscription.system_hard_limit_usd,
          usage,
        };
        this.updateKey(key.hash, updates);
      }
      this.log.info(
-        { key: key.hash, usage: key.usage, hardLimit: key.hardLimit },
+        { key: key.hash, hardLimit: key.hardLimit },
        "Key check complete."
      );
    } catch (error) {
@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
  private async getProvisionedModels(
    key: OpenAIKey
  ): Promise<{ turbo: boolean; gpt4: boolean }> {
-    const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
+    const opts = { headers: { Authorization: `Bearer ${key.key}` } };
-    const models = (await openai.listModels()!).data.data;
+    const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
    const models = data.data;
    const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
    const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
    // We want to update the key's `isGpt4` flag here, but we don't want to
    // update its `lastChecked` timestamp because we need to let the liveness
    // check run before we can consider the key checked.
    // Need to use `find` here because keys are cloned from the pool.
    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
    this.updateKey(key.hash, {
      isGpt4: gpt4,
      lastChecked: keyFromPool.lastChecked,
    });
    return { turbo, gpt4 };
  }
@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
    return data;
  }
  private async getUsage(key: OpenAIKey) {
    const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
    const url = `${GET_USAGE_URL}?${querystring}`;
    const { data } = await axios.get<GetUsageResponse>(url, {
      headers: { Authorization: `Bearer ${key.key}` },
    });
    return parseFloat((data.total_usage / 100).toFixed(2));
  }
  private handleAxiosError(key: OpenAIKey, error: AxiosError) {
-    if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) {
+    if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
      const { status, data } = error.response;
      if (status === 401) {
        this.log.warn(
          { key: key.hash, error: data },
          "Key is invalid or revoked. Disabling key."
        );
-        this.updateKey(key.hash, { isDisabled: true });
+        this.updateKey(key.hash, {
-      } else if (status === 429 && data.error.type === "insufficient_quota") {
+          isDisabled: true,
-        this.log.warn(
+          isRevoked: true,
-          { key: key.hash, isTrial: key.isTrial, error: data },
+          isGpt4: false,
-          "Key is out of quota. Disabling key."
+        });
-        );
+      } else if (status === 429) {
-        this.updateKey(key.hash, { isDisabled: true });
+        switch (data.error.type) {
-      }
+          case "insufficient_quota":
-      else if (status === 429 && data.error.type === "access_terminated") {
+          case "access_terminated":
-        this.log.warn(
+          case "billing_not_active":
-          { key: key.hash, isTrial: key.isTrial, error: data },
+            const isOverQuota = data.error.type === "insufficient_quota";
-          "Key has been terminated due to policy violations. Disabling key."
+            const isRevoked = !isOverQuota;
-        );
+            const isGpt4 = isRevoked ? false : key.isGpt4;
-        this.updateKey(key.hash, { isDisabled: true });
+            this.log.warn(
              { key: key.hash, rateLimitType: data.error.type, error: data },
              "Key returned a non-transient 429 error. Disabling key."
            );
            this.updateKey(key.hash, {
              isDisabled: true,
              isRevoked,
              isOverQuota,
              isGpt4,
            });
            break;
          case "requests":
            // Trial keys have extremely low requests-per-minute limits and we
            // can often hit them just while checking the key, so we need to
            // retry the check later to know if the key has quota remaining.
            this.log.warn(
              { key: key.hash, error: data },
              "Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
            );
            // To trigger a shorter than usual delay before the next check, we
            // will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
            // This will cause the usual key check scheduling logic to schedule
            // the next check in 15 seconds. This also prevents the key from
            // holding up startup checks for other keys.
            const fifteenSeconds = 15 * 1000;
            const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
            this.updateKey(key.hash, { lastChecked: next });
            break;
          case "tokens":
            // Hitting a token rate limit, even on a trial key, actually implies
            // that the key is valid and can generate completions, so we will
            // treat this as effectively a successful `testLiveness` call.
            this.log.info(
              { key: key.hash },
              "Key is currently `tokens` rate limited; assuming it is operational."
            );
            this.updateKey(key.hash, { lastChecked: Date.now() });
            break;
          default:
            this.log.error(
              { key: key.hash, rateLimitType: data.error.type, error: data },
              "Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
            );
            // We don't know what this error means, so we just let the key
            // through and maybe it will fail when someone tries to use it.
            this.updateKey(key.hash, { lastChecked: Date.now() });
        }
      } else {
        this.log.error(
          { key: key.hash, status, error: data },
-          "Encountered API error while checking key."
+          "Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
        );
        this.updateKey(key.hash, { lastChecked: Date.now() });
      }
      return;
    }
    this.log.error(
-      { key: key.hash, error },
+      { key: key.hash, error: error.message },
-      "Network error while checking key; trying again later."
+      "Network error while checking key; trying this key again in a minute."
    );
    const oneMinute = 60 * 1000;
    const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
    this.updateKey(key.hash, { lastChecked: next });
  }
  /**
-   * Trial key usage reporting is inaccurate, so we need to run an actual
+   * Tests whether the key is valid and has quota remaining. The request we send
-   * completion to test them for liveness.
+   * is actually not valid, but keys which are revoked or out of quota will fail
   * with a 401 or 429 error instead of the expected 400 Bad Request error.
   * This lets us avoid test keys without spending any quota.
   */
-  private async assertCanGenerate(key: OpenAIKey): Promise<void> {
+  private async testLiveness(key: OpenAIKey): Promise<void> {
-    const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
+    const payload = {
    // This will throw an AxiosError if the key is invalid or out of quota.
    await openai.createChatCompletion({
      model: "gpt-3.5-turbo",
-      messages: [{ role: "user", content: "Hello" }],
+      max_tokens: -1,
-      max_tokens: 1,
+      messages: [{ role: "user", content: "" }],
-    });
+    };
    const { data } = await axios.post<OpenAIError>(
      POST_CHAT_COMPLETIONS_URL,
      payload,
      {
        headers: { Authorization: `Bearer ${key.key}` },
        validateStatus: (status) => status === 400,
      }
    );
    if (data.error.type === "invalid_request_error") {
      // This is the expected error type for our bad prompt, so key is valid.
      return;
    } else {
      this.log.warn(
        { key: key.hash, error: data },
        "Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
      );
    }
  }
-  static getUsageQuerystring(isTrial: boolean) {
+  static errorIsOpenAIError(
    // For paid keys, the limit resets every month, so we can use the first day
    // of the current month.
    // For trial keys, the limit does not reset and we don't know when the key
    // was created, so we use 99 days ago because that's as far back as the API
    // will let us go.
    // End date needs to be set to the beginning of the next day so that we get
    // usage for the current day.
    const today = new Date();
    const startDate = isTrial
      ? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
      : new Date(today.getFullYear(), today.getMonth(), 1);
    const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
    return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
      endDate.toISOString().split("T")[0]
    }`;
  }
  static errorIsOpenAiError(
    error: AxiosError
  ): error is AxiosError<OpenAIError> {
    const data = error.response?.data as any;
--- a/src/key-management/openai/provider.ts
+++ b/src/key-management/openai/provider.ts
@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
 export interface OpenAIKey extends Key {
  readonly service: "openai";
-  /** The current usage of this key. */
+  /** Set when key check returns a 401. */
-  usage: number;
+  isRevoked: boolean;
  /** Set when key check returns a non-transient 429. */
  isOverQuota: boolean;
  /** Threshold at which a warning email will be sent by OpenAI. */
  softLimit: number;
  /** Threshold at which the key will be disabled because it has reached the user-defined limit. */
@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {
 export type OpenAIKeyUpdate = Omit<
  Partial<OpenAIKey>,
-  "key" | "hash" | "lastUsed" | "lastChecked" | "promptCount"
+  "key" | "hash" | "promptCount"
 >;
 export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        isGpt4: true,
        isTrial: false,
        isDisabled: false,
        isRevoked: false,
        isOverQuota: false,
        softLimit: 0,
        hardLimit: 0,
        systemHardLimit: 0,
@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
  /** Called by the key checker to update key information. */
  public update(keyHash: string, update: OpenAIKeyUpdate) {
    const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
-    Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
+    Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
    // this.writeKeyStatus();
  }
@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    const keyFromPool = this.keys.find((k) => k.key === key.key);
    if (!keyFromPool || keyFromPool.isDisabled) return;
    keyFromPool.isDisabled = true;
    // If it's disabled just set the usage to the hard limit so it doesn't
    // mess with the aggregate usage.
    keyFromPool.usage = keyFromPool.hardLimit;
    this.log.warn({ key: key.hash }, "Key disabled");
  }
@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
  }
  /**
-   * Returns the remaining aggregate quota for all keys as a percentage.
+   * Returns the total quota limit of all keys in USD. Keys which are disabled
-   * Can go slightly negative because keys will typically go slightly over their
+   * are not included in the total.
-   * limit before being disabled.  Can sometimes go *really* negative if the
+   */
-   * cron job OpenAI uses to disable keys fails, as the key will essentially
+  public activeLimitInUsd(
-   * have unlimited quota.
+    { gpt4 }: { gpt4: boolean } = { gpt4: false }
-   **/
+  ): string {
-  public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number {
+    const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
    const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
    if (keys.length === 0) return 0;
    const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
    const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
-
+    return `$${totalLimit.toFixed(2)}`;
    return 1 - totalUsage / totalLimit;
  }
  /** Returns used and available usage in USD. */
  public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
    const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
    if (keys.length === 0) return "???";
    const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
    const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
    return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
  }
  /** Writes key status to disk. */
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    }
  } else if (statusCode === 401) {
    // Key is invalid or was revoked
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
    errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
  } else if (statusCode === 429) {
    // OpenAI uses this for a bunch of different rate-limiting scenarios.
@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
  const type = errorPayload.error?.type;
  if (type === "insufficient_quota") {
    // Billing quota exceeded (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "quota");
    errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
  } else if (type === "access_terminated") {
    // Account banned (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
    errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
  } else if (type === "billing_not_active") {
    // Billing is not active (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
    errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
  } else if (type === "requests" || type === "tokens") {
    // Per-minute request or token rate limit is exceeded, which we can retry