Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29)

2023-07-21 04:00:12 +00:00 · 2023-07-21 04:00:12 +00:00 · aa5380d2ef
parent cbf9f16108
commit aa5380d2ef
10 changed files with 199 additions and 190 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -16,7 +16,6 @@
        "firebase-admin": "^11.10.1",
        "googleapis": "^122.0.0",
        "http-proxy-middleware": "^3.0.0-beta.1",
-        "openai": "^3.2.1",
        "pino": "^8.11.0",
        "pino-http": "^8.3.3",
        "showdown": "^2.1.0",
@ -3163,23 +3162,6 @@
        "wrappy": "1"
      }
    },
-    "node_modules/openai": {
-      "version": "3.2.1",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
-      "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
-      "dependencies": {
-        "axios": "^0.26.0",
-        "form-data": "^4.0.0"
-      }
-    },
-    "node_modules/openai/node_modules/axios": {
-      "version": "0.26.1",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
-      "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
-      "dependencies": {
-        "follow-redirects": "^1.14.8"
-      }
-    },
    "node_modules/p-limit": {
      "version": "3.1.0",
      "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
--- a/package.json
+++ b/package.json
@ -25,7 +25,6 @@
    "firebase-admin": "^11.10.1",
    "googleapis": "^122.0.0",
    "http-proxy-middleware": "^3.0.0-beta.1",
-    "openai": "^3.2.1",
    "pino": "^8.11.0",
    "pino-http": "^8.3.3",
    "showdown": "^2.1.0",
--- a/src/config.ts
+++ b/src/config.ts
@ -88,11 +88,11 @@ type Config = {
   *
   * `none`: Hide quota information
   *
-   * `partial`: Display quota information only as a percentage
+   * `partial`: (deprecated) Same as `full` because usage is no longer tracked
   *
-   * `full`: Display quota information as usage against total capacity
+   * `full`: Displays information about keys' quota limits
   */
-  quotaDisplayMode: "none" | "partial" | "full";
+  quotaDisplayMode: "none" | "full";
  /**
   * Which request queueing strategy to use when keys are over their rate limit.
   *
@ -152,7 +152,7 @@ export const config: Config = {
  ),
  logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
  checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
-  quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
+  quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
  promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
  promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
  googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
--- a/src/info-page.ts
+++ b/src/info-page.ts
@ -2,7 +2,7 @@ import fs from "fs";
 import { Request, Response } from "express";
 import showdown from "showdown";
 import { config, listConfig } from "./config";
-import { keyPool } from "./key-management";
+import { OpenAIKey, keyPool } from "./key-management";
 import { getUniqueIps } from "./proxy/rate-limit";
 import {
  QueuePartition,
@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
 type ServiceInfo = {
  activeKeys: number;
  trialKeys?: number;
-  quota: string;
+  activeLimit: string;
+  revokedKeys?: number;
+  overQuotaKeys?: number;
  proomptersInQueue: number;
  estimatedQueueTime: string;
 };
@ -88,51 +90,55 @@ type ServiceInfo = {

 function getOpenAIInfo() {
  const info: { [model: string]: Partial<ServiceInfo> } = {};
-  const keys = keyPool.list().filter((k) => k.service === "openai");
+  const keys = keyPool
+    .list()
+    .filter((k) => k.service === "openai") as OpenAIKey[];
  const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;

  if (keyPool.anyUnchecked()) {
    const uncheckedKeys = keys.filter((k) => !k.lastChecked);
-    info.status = `Still checking ${uncheckedKeys.length} keys...` as any;
+    info.status =
+      `Performing startup key checks (${uncheckedKeys.length} left).` as any;
  } else {
    delete info.status;
  }

  if (config.checkKeys) {
-    const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled);
-    const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled);
+    const turboKeys = keys.filter((k) => !k.isGpt4);
+    const gpt4Keys = keys.filter((k) => k.isGpt4);

    const quota: Record<string, string> = { turbo: "", gpt4: "" };
-    const turboQuota = keyPool.remainingQuota("openai") * 100;
-    const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100;
+    const turboQuota = keyPool.activeLimitInUsd("openai");
+    const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });

-    if (config.quotaDisplayMode === "full") {
-      const turboUsage = keyPool.usageInUsd("openai");
-      const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true });
-      quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`;
-      quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`;
-    } else {
-      quota.turbo = `${Math.round(turboQuota)}%`;
-      quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
+    // Don't invert this condition; some proxies may be using the now-deprecated
+    // 'partial' option which we want to treat as 'full' here.
+    if (config.quotaDisplayMode !== "none") {
+      quota.turbo = turboQuota;
+      quota.gpt4 = gpt4Quota;
    }

    info.turbo = {
      activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
      trialKeys: turboKeys.filter((k) => k.isTrial).length,
-      quota: quota.turbo,
+      activeLimit: quota.turbo,
+      revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
+      overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
    };

    if (hasGpt4) {
      info.gpt4 = {
        activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
        trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
-        quota: quota.gpt4,
+        activeLimit: quota.gpt4,
+        revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
+        overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
      };
    }

    if (config.quotaDisplayMode === "none") {
-      delete info.turbo?.quota;
-      delete info.gpt4?.quota;
+      delete info.turbo?.activeLimit;
+      delete info.gpt4?.activeLimit;
    }
  } else {
    info.status = "Key checking is disabled." as any;
--- a/src/key-management/anthropic/provider.ts
+++ b/src/key-management/anthropic/provider.ts
@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
  }

-  public remainingQuota() {
-    const activeKeys = this.keys.filter((k) => !k.isDisabled).length;
-    const allKeys = this.keys.length;
-    if (activeKeys === 0) return 0;
-    return Math.round((activeKeys / allKeys) * 100) / 100;
-  }
-
-  public usageInUsd() {
-    return "$0.00 / ∞";
+  public activeLimitInUsd() {
+    return "∞";
  }
 }
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
  anyUnchecked(): boolean;
  incrementPrompt(hash: string): void;
  getLockoutPeriod(model: Model): number;
-  remainingQuota(options?: Record<string, unknown>): number;
-  usageInUsd(options?: Record<string, unknown>): string;
+  activeLimitInUsd(options?: Record<string, unknown>): string;
  markRateLimited(hash: string): void;
 }

--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@ -32,9 +32,15 @@ export class KeyPool {
    return this.keyProviders.flatMap((provider) => provider.list());
  }

-  public disable(key: Key): void {
+  public disable(key: Key, reason: "quota" | "revoked"): void {
    const service = this.getKeyProvider(key.service);
    service.disable(key);
+    if (service instanceof OpenAIKeyProvider) {
+      service.update(key.hash, {
+        isRevoked: reason === "revoked",
+        isOverQuota: reason === "quota",
+      });
+    }
  }

  public update(key: Key, props: AllowedPartial): void {
@ -75,18 +81,11 @@ export class KeyPool {
    }
  }

-  public remainingQuota(
-    service: AIService,
-    options?: Record<string, unknown>
-  ): number {
-    return this.getKeyProvider(service).remainingQuota(options);
-  }
-
-  public usageInUsd(
+  public activeLimitInUsd(
    service: AIService,
    options?: Record<string, unknown>
  ): string {
-    return this.getKeyProvider(service).usageInUsd(options);
+    return this.getKeyProvider(service).activeLimitInUsd(options);
  }

  private getService(model: Model): AIService {
--- a/src/key-management/openai/checker.ts
+++ b/src/key-management/openai/checker.ts
@ -1,14 +1,24 @@
 import axios, { AxiosError } from "axios";
-import { Configuration, OpenAIApi } from "openai";
 import { logger } from "../../logger";
 import type { OpenAIKey, OpenAIKeyProvider } from "./provider";

+/** Minimum time in between any two key checks. */
 const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
-const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes
+/**
+ * Minimum time in between checks for a given key. Because we can no longer
+ * read quota usage, there is little reason to check a single key more often
+ * than this.
+ **/
+const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour

+const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
+const GET_MODELS_URL = "https://api.openai.com/v1/models";
 const GET_SUBSCRIPTION_URL =
  "https://api.openai.com/dashboard/billing/subscription";
-const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
+
+type GetModelsResponse = {
+  data: [{ id: string }];
+};

 type GetSubscriptionResponse = {
  plan: { title: string };
@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
  system_hard_limit_usd: number;
 };

-type GetUsageResponse = {
-  total_usage: number;
-};
-
 type OpenAIError = {
  error: { type: string; code: string; param: unknown; message: string };
 };
@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
  /**
   * Schedules the next check. If there are still keys yet to be checked, it
   * will schedule a check immediately for the next unchecked key. Otherwise,
-   * it will schedule a check in several minutes for the oldest key.
+   * it will schedule a check for the least recently checked key, respecting
+   * the minimum check interval.
   **/
  private scheduleNextCheck() {
    const enabledKeys = this.keys.filter((key) => !key.isDisabled);
@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
      key.lastChecked < oldest.lastChecked ? key : oldest
    );

-    // Don't check any individual key more than once every 5 minutes.
-    // Also, don't check anything more often than once every 3 seconds.
+    // Don't check any individual key too often.
+    // Don't check anything at all at a rate faster than once per 3 seconds.
    const nextCheck = Math.max(
      oldestKey.lastChecked + KEY_CHECK_PERIOD,
      this.lastCheck + MIN_CHECK_INTERVAL
@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
    this.log.debug({ key: key.hash }, "Checking key...");
    let isInitialCheck = !key.lastChecked;
    try {
-      // During the initial check we need to get the subscription first because
-      // trials have different behavior.
+      // We only need to check for provisioned models on the initial check.
      if (isInitialCheck) {
-        const subscription = await this.getSubscription(key);
-        this.updateKey(key.hash, { isTrial: !subscription.has_payment_method });
-        if (key.isTrial) {
-          this.log.debug(
-            { key: key.hash },
-            "Attempting generation on trial key."
-          );
-          await this.assertCanGenerate(key);
-        }
-        const [provisionedModels, usage] = await Promise.all([
+        const [subscription, provisionedModels, _livenessTest] =
+          await Promise.all([
+            this.getSubscription(key),
            this.getProvisionedModels(key),
-          this.getUsage(key),
+            this.testLiveness(key),
          ]);
        const updates = {
          isGpt4: provisionedModels.gpt4,
+          isTrial: !subscription.has_payment_method,
          softLimit: subscription.soft_limit_usd,
          hardLimit: subscription.hard_limit_usd,
          systemHardLimit: subscription.system_hard_limit_usd,
-          usage,
        };
        this.updateKey(key.hash, updates);
      } else {
-        // Don't check provisioned models after the initial check because it's
-        // not likely to change.
-        const [subscription, usage] = await Promise.all([
+        // Provisioned models don't change, so we don't need to check them again
+        const [subscription, _livenessTest] = await Promise.all([
          this.getSubscription(key),
-          this.getUsage(key),
+          this.testLiveness(key),
        ]);
        const updates = {
          softLimit: subscription.soft_limit_usd,
          hardLimit: subscription.hard_limit_usd,
          systemHardLimit: subscription.system_hard_limit_usd,
-          usage,
        };
        this.updateKey(key.hash, updates);
      }
      this.log.info(
-        { key: key.hash, usage: key.usage, hardLimit: key.hardLimit },
+        { key: key.hash, hardLimit: key.hardLimit },
        "Key check complete."
      );
    } catch (error) {
@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
  private async getProvisionedModels(
    key: OpenAIKey
  ): Promise<{ turbo: boolean; gpt4: boolean }> {
-    const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
-    const models = (await openai.listModels()!).data.data;
+    const opts = { headers: { Authorization: `Bearer ${key.key}` } };
+    const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
+    const models = data.data;
    const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
    const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
+    // We want to update the key's `isGpt4` flag here, but we don't want to
+    // update its `lastChecked` timestamp because we need to let the liveness
+    // check run before we can consider the key checked.
+
+    // Need to use `find` here because keys are cloned from the pool.
+    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
+    this.updateKey(key.hash, {
+      isGpt4: gpt4,
+      lastChecked: keyFromPool.lastChecked,
+    });
    return { turbo, gpt4 };
  }

@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
    return data;
  }

-  private async getUsage(key: OpenAIKey) {
-    const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
-    const url = `${GET_USAGE_URL}?${querystring}`;
-    const { data } = await axios.get<GetUsageResponse>(url, {
-      headers: { Authorization: `Bearer ${key.key}` },
-    });
-    return parseFloat((data.total_usage / 100).toFixed(2));
-  }
-
  private handleAxiosError(key: OpenAIKey, error: AxiosError) {
-    if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) {
+    if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
      const { status, data } = error.response;
      if (status === 401) {
        this.log.warn(
          { key: key.hash, error: data },
          "Key is invalid or revoked. Disabling key."
        );
-        this.updateKey(key.hash, { isDisabled: true });
-      } else if (status === 429 && data.error.type === "insufficient_quota") {
+        this.updateKey(key.hash, {
+          isDisabled: true,
+          isRevoked: true,
+          isGpt4: false,
+        });
+      } else if (status === 429) {
+        switch (data.error.type) {
+          case "insufficient_quota":
+          case "access_terminated":
+          case "billing_not_active":
+            const isOverQuota = data.error.type === "insufficient_quota";
+            const isRevoked = !isOverQuota;
+            const isGpt4 = isRevoked ? false : key.isGpt4;
            this.log.warn(
-          { key: key.hash, isTrial: key.isTrial, error: data },
-          "Key is out of quota. Disabling key."
+              { key: key.hash, rateLimitType: data.error.type, error: data },
+              "Key returned a non-transient 429 error. Disabling key."
            );
-        this.updateKey(key.hash, { isDisabled: true });
+            this.updateKey(key.hash, {
+              isDisabled: true,
+              isRevoked,
+              isOverQuota,
+              isGpt4,
+            });
+            break;
+          case "requests":
+            // Trial keys have extremely low requests-per-minute limits and we
+            // can often hit them just while checking the key, so we need to
+            // retry the check later to know if the key has quota remaining.
+            this.log.warn(
+              { key: key.hash, error: data },
+              "Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
+            );
+            // To trigger a shorter than usual delay before the next check, we
+            // will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
+            // This will cause the usual key check scheduling logic to schedule
+            // the next check in 15 seconds. This also prevents the key from
+            // holding up startup checks for other keys.
+            const fifteenSeconds = 15 * 1000;
+            const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
+            this.updateKey(key.hash, { lastChecked: next });
+            break;
+          case "tokens":
+            // Hitting a token rate limit, even on a trial key, actually implies
+            // that the key is valid and can generate completions, so we will
+            // treat this as effectively a successful `testLiveness` call.
+            this.log.info(
+              { key: key.hash },
+              "Key is currently `tokens` rate limited; assuming it is operational."
+            );
+            this.updateKey(key.hash, { lastChecked: Date.now() });
+            break;
+          default:
+            this.log.error(
+              { key: key.hash, rateLimitType: data.error.type, error: data },
+              "Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
+            );
+            // We don't know what this error means, so we just let the key
+            // through and maybe it will fail when someone tries to use it.
+            this.updateKey(key.hash, { lastChecked: Date.now() });
        }
-      else if (status === 429 && data.error.type === "access_terminated") {
-        this.log.warn(
-          { key: key.hash, isTrial: key.isTrial, error: data },
-          "Key has been terminated due to policy violations. Disabling key."
-        );
-        this.updateKey(key.hash, { isDisabled: true });
      } else {
        this.log.error(
          { key: key.hash, status, error: data },
-          "Encountered API error while checking key."
+          "Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
        );
+        this.updateKey(key.hash, { lastChecked: Date.now() });
      }
      return;
    }
    this.log.error(
-      { key: key.hash, error },
-      "Network error while checking key; trying again later."
+      { key: key.hash, error: error.message },
+      "Network error while checking key; trying this key again in a minute."
    );
+    const oneMinute = 60 * 1000;
+    const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
+    this.updateKey(key.hash, { lastChecked: next });
  }

  /**
-   * Trial key usage reporting is inaccurate, so we need to run an actual
-   * completion to test them for liveness.
+   * Tests whether the key is valid and has quota remaining. The request we send
+   * is actually not valid, but keys which are revoked or out of quota will fail
+   * with a 401 or 429 error instead of the expected 400 Bad Request error.
+   * This lets us avoid test keys without spending any quota.
   */
-  private async assertCanGenerate(key: OpenAIKey): Promise<void> {
-    const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
-    // This will throw an AxiosError if the key is invalid or out of quota.
-    await openai.createChatCompletion({
+  private async testLiveness(key: OpenAIKey): Promise<void> {
+    const payload = {
      model: "gpt-3.5-turbo",
-      messages: [{ role: "user", content: "Hello" }],
-      max_tokens: 1,
-    });
+      max_tokens: -1,
+      messages: [{ role: "user", content: "" }],
+    };
+    const { data } = await axios.post<OpenAIError>(
+      POST_CHAT_COMPLETIONS_URL,
+      payload,
+      {
+        headers: { Authorization: `Bearer ${key.key}` },
+        validateStatus: (status) => status === 400,
+      }
+    );
+    if (data.error.type === "invalid_request_error") {
+      // This is the expected error type for our bad prompt, so key is valid.
+      return;
+    } else {
+      this.log.warn(
+        { key: key.hash, error: data },
+        "Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
+      );
+    }
  }

-  static getUsageQuerystring(isTrial: boolean) {
-    // For paid keys, the limit resets every month, so we can use the first day
-    // of the current month.
-    // For trial keys, the limit does not reset and we don't know when the key
-    // was created, so we use 99 days ago because that's as far back as the API
-    // will let us go.
-
-    // End date needs to be set to the beginning of the next day so that we get
-    // usage for the current day.
-
-    const today = new Date();
-    const startDate = isTrial
-      ? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
-      : new Date(today.getFullYear(), today.getMonth(), 1);
-    const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
-    return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
-      endDate.toISOString().split("T")[0]
-    }`;
-  }
-
-  static errorIsOpenAiError(
+  static errorIsOpenAIError(
    error: AxiosError
  ): error is AxiosError<OpenAIError> {
    const data = error.response?.data as any;
--- a/src/key-management/openai/provider.ts
+++ b/src/key-management/openai/provider.ts
@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [

 export interface OpenAIKey extends Key {
  readonly service: "openai";
-  /** The current usage of this key. */
-  usage: number;
+  /** Set when key check returns a 401. */
+  isRevoked: boolean;
+  /** Set when key check returns a non-transient 429. */
+  isOverQuota: boolean;
  /** Threshold at which a warning email will be sent by OpenAI. */
  softLimit: number;
  /** Threshold at which the key will be disabled because it has reached the user-defined limit. */
@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {

 export type OpenAIKeyUpdate = Omit<
  Partial<OpenAIKey>,
-  "key" | "hash" | "lastUsed" | "lastChecked" | "promptCount"
+  "key" | "hash" | "promptCount"
 >;

 export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
        isGpt4: true,
        isTrial: false,
        isDisabled: false,
+        isRevoked: false,
+        isOverQuota: false,
        softLimit: 0,
        hardLimit: 0,
        systemHardLimit: 0,
@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
  /** Called by the key checker to update key information. */
  public update(keyHash: string, update: OpenAIKeyUpdate) {
    const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
-    Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
+    Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
    // this.writeKeyStatus();
  }

@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
    const keyFromPool = this.keys.find((k) => k.key === key.key);
    if (!keyFromPool || keyFromPool.isDisabled) return;
    keyFromPool.isDisabled = true;
-    // If it's disabled just set the usage to the hard limit so it doesn't
-    // mess with the aggregate usage.
-    keyFromPool.usage = keyFromPool.hardLimit;
    this.log.warn({ key: key.hash }, "Key disabled");
  }

@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
  }

  /**
-   * Returns the remaining aggregate quota for all keys as a percentage.
-   * Can go slightly negative because keys will typically go slightly over their
-   * limit before being disabled.  Can sometimes go *really* negative if the
-   * cron job OpenAI uses to disable keys fails, as the key will essentially
-   * have unlimited quota.
-   **/
-  public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number {
-    const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
-    if (keys.length === 0) return 0;
-
-    const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
+   * Returns the total quota limit of all keys in USD. Keys which are disabled
+   * are not included in the total.
+   */
+  public activeLimitInUsd(
+    { gpt4 }: { gpt4: boolean } = { gpt4: false }
+  ): string {
+    const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
    const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
-
-    return 1 - totalUsage / totalLimit;
-  }
-
-  /** Returns used and available usage in USD. */
-  public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
-    const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
-    if (keys.length === 0) return "???";
-
-    const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
-    const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
-
-    return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
+    return `$${totalLimit.toFixed(2)}`;
  }

  /** Writes key status to disk. */
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    }
  } else if (statusCode === 401) {
    // Key is invalid or was revoked
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
    errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
  } else if (statusCode === 429) {
    // OpenAI uses this for a bunch of different rate-limiting scenarios.
@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
  const type = errorPayload.error?.type;
  if (type === "insufficient_quota") {
    // Billing quota exceeded (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "quota");
    errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
  } else if (type === "access_terminated") {
    // Account banned (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
    errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
  } else if (type === "billing_not_active") {
    // Billing is not active (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
    errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
  } else if (type === "requests" || type === "tokens") {
    // Per-minute request or token rate limit is exceeded, which we can retry