From aa5380d2ef9d2a2034fd3eff747e877933a4e5c7 Mon Sep 17 00:00:00 2001
From: khanon <khoners@protonmail.com>
Date: Fri, 21 Jul 2023 04:00:12 +0000
Subject: [PATCH] Rework OpenAIKeyChecker to remove usage tracking and test all
 keys for liveness (khanon/oai-reverse-proxy!29)

---
 package-lock.json                        |  18 --
 package.json                             |   1 -
 src/config.ts                            |   8 +-
 src/info-page.ts                         |  46 +++--
 src/key-management/anthropic/provider.ts |  11 +-
 src/key-management/index.ts              |   3 +-
 src/key-management/key-pool.ts           |  19 +-
 src/key-management/openai/checker.ts     | 228 ++++++++++++++---------
 src/key-management/openai/provider.ts    |  47 ++---
 src/proxy/middleware/response/index.ts   |   8 +-
 10 files changed, 199 insertions(+), 190 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 78f88d9..93101b1 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -16,7 +16,6 @@
         "firebase-admin": "^11.10.1",
         "googleapis": "^122.0.0",
         "http-proxy-middleware": "^3.0.0-beta.1",
-        "openai": "^3.2.1",
         "pino": "^8.11.0",
         "pino-http": "^8.3.3",
         "showdown": "^2.1.0",
@@ -3163,23 +3162,6 @@
         "wrappy": "1"
       }
     },
-    "node_modules/openai": {
-      "version": "3.2.1",
-      "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
-      "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
-      "dependencies": {
-        "axios": "^0.26.0",
-        "form-data": "^4.0.0"
-      }
-    },
-    "node_modules/openai/node_modules/axios": {
-      "version": "0.26.1",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
-      "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
-      "dependencies": {
-        "follow-redirects": "^1.14.8"
-      }
-    },
     "node_modules/p-limit": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
diff --git a/package.json b/package.json
index a30e876..c866f8a 100644
--- a/package.json
+++ b/package.json
@@ -25,7 +25,6 @@
     "firebase-admin": "^11.10.1",
     "googleapis": "^122.0.0",
     "http-proxy-middleware": "^3.0.0-beta.1",
-    "openai": "^3.2.1",
     "pino": "^8.11.0",
     "pino-http": "^8.3.3",
     "showdown": "^2.1.0",
diff --git a/src/config.ts b/src/config.ts
index 9fa76a2..8d0ab06 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -88,11 +88,11 @@ type Config = {
    *
    * `none`: Hide quota information
    *
-   * `partial`: Display quota information only as a percentage
+   * `partial`: (deprecated) Same as `full` because usage is no longer tracked
    *
-   * `full`: Display quota information as usage against total capacity
+   * `full`: Displays information about keys' quota limits
    */
-  quotaDisplayMode: "none" | "partial" | "full";
+  quotaDisplayMode: "none" | "full";
   /**
    * Which request queueing strategy to use when keys are over their rate limit.
    *
@@ -152,7 +152,7 @@ export const config: Config = {
   ),
   logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
   checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
-  quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
+  quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
   promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
   promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
   googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
diff --git a/src/info-page.ts b/src/info-page.ts
index 3c79729..51cdaa0 100644
--- a/src/info-page.ts
+++ b/src/info-page.ts
@@ -2,7 +2,7 @@ import fs from "fs";
 import { Request, Response } from "express";
 import showdown from "showdown";
 import { config, listConfig } from "./config";
-import { keyPool } from "./key-management";
+import { OpenAIKey, keyPool } from "./key-management";
 import { getUniqueIps } from "./proxy/rate-limit";
 import {
   QueuePartition,
@@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
 type ServiceInfo = {
   activeKeys: number;
   trialKeys?: number;
-  quota: string;
+  activeLimit: string;
+  revokedKeys?: number;
+  overQuotaKeys?: number;
   proomptersInQueue: number;
   estimatedQueueTime: string;
 };
@@ -88,51 +90,55 @@ type ServiceInfo = {
 
 function getOpenAIInfo() {
   const info: { [model: string]: Partial<ServiceInfo> } = {};
-  const keys = keyPool.list().filter((k) => k.service === "openai");
+  const keys = keyPool
+    .list()
+    .filter((k) => k.service === "openai") as OpenAIKey[];
   const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
 
   if (keyPool.anyUnchecked()) {
     const uncheckedKeys = keys.filter((k) => !k.lastChecked);
-    info.status = `Still checking ${uncheckedKeys.length} keys...` as any;
+    info.status =
+      `Performing startup key checks (${uncheckedKeys.length} left).` as any;
   } else {
     delete info.status;
   }
 
   if (config.checkKeys) {
-    const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled);
-    const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled);
+    const turboKeys = keys.filter((k) => !k.isGpt4);
+    const gpt4Keys = keys.filter((k) => k.isGpt4);
 
     const quota: Record<string, string> = { turbo: "", gpt4: "" };
-    const turboQuota = keyPool.remainingQuota("openai") * 100;
-    const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100;
+    const turboQuota = keyPool.activeLimitInUsd("openai");
+    const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });
 
-    if (config.quotaDisplayMode === "full") {
-      const turboUsage = keyPool.usageInUsd("openai");
-      const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true });
-      quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`;
-      quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`;
-    } else {
-      quota.turbo = `${Math.round(turboQuota)}%`;
-      quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
+    // Don't invert this condition; some proxies may be using the now-deprecated
+    // 'partial' option which we want to treat as 'full' here.
+    if (config.quotaDisplayMode !== "none") {
+      quota.turbo = turboQuota;
+      quota.gpt4 = gpt4Quota;
     }
 
     info.turbo = {
       activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
       trialKeys: turboKeys.filter((k) => k.isTrial).length,
-      quota: quota.turbo,
+      activeLimit: quota.turbo,
+      revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
+      overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
     };
 
     if (hasGpt4) {
       info.gpt4 = {
         activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
         trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
-        quota: quota.gpt4,
+        activeLimit: quota.gpt4,
+        revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
+        overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
       };
     }
 
     if (config.quotaDisplayMode === "none") {
-      delete info.turbo?.quota;
-      delete info.gpt4?.quota;
+      delete info.turbo?.activeLimit;
+      delete info.gpt4?.activeLimit;
     }
   } else {
     info.status = "Key checking is disabled." as any;
diff --git a/src/key-management/anthropic/provider.ts b/src/key-management/anthropic/provider.ts
index 870b485..cba5b4e 100644
--- a/src/key-management/anthropic/provider.ts
+++ b/src/key-management/anthropic/provider.ts
@@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
     key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
   }
 
-  public remainingQuota() {
-    const activeKeys = this.keys.filter((k) => !k.isDisabled).length;
-    const allKeys = this.keys.length;
-    if (activeKeys === 0) return 0;
-    return Math.round((activeKeys / allKeys) * 100) / 100;
-  }
-
-  public usageInUsd() {
-    return "$0.00 / ∞";
+  public activeLimitInUsd() {
+    return "∞";
   }
 }
diff --git a/src/key-management/index.ts b/src/key-management/index.ts
index 13c73a5..a7b3c73 100644
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
   anyUnchecked(): boolean;
   incrementPrompt(hash: string): void;
   getLockoutPeriod(model: Model): number;
-  remainingQuota(options?: Record<string, unknown>): number;
-  usageInUsd(options?: Record<string, unknown>): string;
+  activeLimitInUsd(options?: Record<string, unknown>): string;
   markRateLimited(hash: string): void;
 }
 
diff --git a/src/key-management/key-pool.ts b/src/key-management/key-pool.ts
index 1f100a5..8a000eb 100644
--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@@ -32,9 +32,15 @@ export class KeyPool {
     return this.keyProviders.flatMap((provider) => provider.list());
   }
 
-  public disable(key: Key): void {
+  public disable(key: Key, reason: "quota" | "revoked"): void {
     const service = this.getKeyProvider(key.service);
     service.disable(key);
+    if (service instanceof OpenAIKeyProvider) {
+      service.update(key.hash, {
+        isRevoked: reason === "revoked",
+        isOverQuota: reason === "quota",
+      });
+    }
   }
 
   public update(key: Key, props: AllowedPartial): void {
@@ -75,18 +81,11 @@ export class KeyPool {
     }
   }
 
-  public remainingQuota(
-    service: AIService,
-    options?: Record<string, unknown>
-  ): number {
-    return this.getKeyProvider(service).remainingQuota(options);
-  }
-
-  public usageInUsd(
+  public activeLimitInUsd(
     service: AIService,
     options?: Record<string, unknown>
   ): string {
-    return this.getKeyProvider(service).usageInUsd(options);
+    return this.getKeyProvider(service).activeLimitInUsd(options);
   }
 
   private getService(model: Model): AIService {
diff --git a/src/key-management/openai/checker.ts b/src/key-management/openai/checker.ts
index be29862..10922eb 100644
--- a/src/key-management/openai/checker.ts
+++ b/src/key-management/openai/checker.ts
@@ -1,14 +1,24 @@
 import axios, { AxiosError } from "axios";
-import { Configuration, OpenAIApi } from "openai";
 import { logger } from "../../logger";
 import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
 
+/** Minimum time in between any two key checks. */
 const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
-const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes
+/**
+ * Minimum time in between checks for a given key. Because we can no longer
+ * read quota usage, there is little reason to check a single key more often
+ * than this.
+ **/
+const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
 
+const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
+const GET_MODELS_URL = "https://api.openai.com/v1/models";
 const GET_SUBSCRIPTION_URL =
   "https://api.openai.com/dashboard/billing/subscription";
-const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
+
+type GetModelsResponse = {
+  data: [{ id: string }];
+};
 
 type GetSubscriptionResponse = {
   plan: { title: string };
@@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
   system_hard_limit_usd: number;
 };
 
-type GetUsageResponse = {
-  total_usage: number;
-};
-
 type OpenAIError = {
   error: { type: string; code: string; param: unknown; message: string };
 };
@@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
   /**
    * Schedules the next check. If there are still keys yet to be checked, it
    * will schedule a check immediately for the next unchecked key. Otherwise,
-   * it will schedule a check in several minutes for the oldest key.
+   * it will schedule a check for the least recently checked key, respecting
+   * the minimum check interval.
    **/
   private scheduleNextCheck() {
     const enabledKeys = this.keys.filter((key) => !key.isDisabled);
@@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
       key.lastChecked < oldest.lastChecked ? key : oldest
     );
 
-    // Don't check any individual key more than once every 5 minutes.
-    // Also, don't check anything more often than once every 3 seconds.
+    // Don't check any individual key too often.
+    // Don't check anything at all at a rate faster than once per 3 seconds.
     const nextCheck = Math.max(
       oldestKey.lastChecked + KEY_CHECK_PERIOD,
       this.lastCheck + MIN_CHECK_INTERVAL
@@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
     this.log.debug({ key: key.hash }, "Checking key...");
     let isInitialCheck = !key.lastChecked;
     try {
-      // During the initial check we need to get the subscription first because
-      // trials have different behavior.
+      // We only need to check for provisioned models on the initial check.
       if (isInitialCheck) {
-        const subscription = await this.getSubscription(key);
-        this.updateKey(key.hash, { isTrial: !subscription.has_payment_method });
-        if (key.isTrial) {
-          this.log.debug(
-            { key: key.hash },
-            "Attempting generation on trial key."
-          );
-          await this.assertCanGenerate(key);
-        }
-        const [provisionedModels, usage] = await Promise.all([
-          this.getProvisionedModels(key),
-          this.getUsage(key),
-        ]);
+        const [subscription, provisionedModels, _livenessTest] =
+          await Promise.all([
+            this.getSubscription(key),
+            this.getProvisionedModels(key),
+            this.testLiveness(key),
+          ]);
         const updates = {
           isGpt4: provisionedModels.gpt4,
+          isTrial: !subscription.has_payment_method,
           softLimit: subscription.soft_limit_usd,
           hardLimit: subscription.hard_limit_usd,
           systemHardLimit: subscription.system_hard_limit_usd,
-          usage,
         };
         this.updateKey(key.hash, updates);
       } else {
-        // Don't check provisioned models after the initial check because it's
-        // not likely to change.
-        const [subscription, usage] = await Promise.all([
+        // Provisioned models don't change, so we don't need to check them again
+        const [subscription, _livenessTest] = await Promise.all([
           this.getSubscription(key),
-          this.getUsage(key),
+          this.testLiveness(key),
         ]);
         const updates = {
           softLimit: subscription.soft_limit_usd,
           hardLimit: subscription.hard_limit_usd,
           systemHardLimit: subscription.system_hard_limit_usd,
-          usage,
         };
         this.updateKey(key.hash, updates);
       }
       this.log.info(
-        { key: key.hash, usage: key.usage, hardLimit: key.hardLimit },
+        { key: key.hash, hardLimit: key.hardLimit },
         "Key check complete."
       );
     } catch (error) {
@@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
   private async getProvisionedModels(
     key: OpenAIKey
   ): Promise<{ turbo: boolean; gpt4: boolean }> {
-    const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
-    const models = (await openai.listModels()!).data.data;
+    const opts = { headers: { Authorization: `Bearer ${key.key}` } };
+    const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
+    const models = data.data;
     const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
     const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
+    // We want to update the key's `isGpt4` flag here, but we don't want to
+    // update its `lastChecked` timestamp because we need to let the liveness
+    // check run before we can consider the key checked.
+
+    // Need to use `find` here because keys are cloned from the pool.
+    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
+    this.updateKey(key.hash, {
+      isGpt4: gpt4,
+      lastChecked: keyFromPool.lastChecked,
+    });
     return { turbo, gpt4 };
   }
 
@@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
     return data;
   }
 
-  private async getUsage(key: OpenAIKey) {
-    const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
-    const url = `${GET_USAGE_URL}?${querystring}`;
-    const { data } = await axios.get<GetUsageResponse>(url, {
-      headers: { Authorization: `Bearer ${key.key}` },
-    });
-    return parseFloat((data.total_usage / 100).toFixed(2));
-  }
-
   private handleAxiosError(key: OpenAIKey, error: AxiosError) {
-    if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) {
+    if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
       const { status, data } = error.response;
       if (status === 401) {
         this.log.warn(
           { key: key.hash, error: data },
           "Key is invalid or revoked. Disabling key."
         );
-        this.updateKey(key.hash, { isDisabled: true });
-      } else if (status === 429 && data.error.type === "insufficient_quota") {
-        this.log.warn(
-          { key: key.hash, isTrial: key.isTrial, error: data },
-          "Key is out of quota. Disabling key."
-        );
-        this.updateKey(key.hash, { isDisabled: true });
-      }
-      else if (status === 429 && data.error.type === "access_terminated") {
-        this.log.warn(
-          { key: key.hash, isTrial: key.isTrial, error: data },
-          "Key has been terminated due to policy violations. Disabling key."
-        );
-        this.updateKey(key.hash, { isDisabled: true });
+        this.updateKey(key.hash, {
+          isDisabled: true,
+          isRevoked: true,
+          isGpt4: false,
+        });
+      } else if (status === 429) {
+        switch (data.error.type) {
+          case "insufficient_quota":
+          case "access_terminated":
+          case "billing_not_active":
+            const isOverQuota = data.error.type === "insufficient_quota";
+            const isRevoked = !isOverQuota;
+            const isGpt4 = isRevoked ? false : key.isGpt4;
+            this.log.warn(
+              { key: key.hash, rateLimitType: data.error.type, error: data },
+              "Key returned a non-transient 429 error. Disabling key."
+            );
+            this.updateKey(key.hash, {
+              isDisabled: true,
+              isRevoked,
+              isOverQuota,
+              isGpt4,
+            });
+            break;
+          case "requests":
+            // Trial keys have extremely low requests-per-minute limits and we
+            // can often hit them just while checking the key, so we need to
+            // retry the check later to know if the key has quota remaining.
+            this.log.warn(
+              { key: key.hash, error: data },
+              "Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
+            );
+            // To trigger a shorter than usual delay before the next check, we
+            // will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
+            // This will cause the usual key check scheduling logic to schedule
+            // the next check in 15 seconds. This also prevents the key from
+            // holding up startup checks for other keys.
+            const fifteenSeconds = 15 * 1000;
+            const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
+            this.updateKey(key.hash, { lastChecked: next });
+            break;
+          case "tokens":
+            // Hitting a token rate limit, even on a trial key, actually implies
+            // that the key is valid and can generate completions, so we will
+            // treat this as effectively a successful `testLiveness` call.
+            this.log.info(
+              { key: key.hash },
+              "Key is currently `tokens` rate limited; assuming it is operational."
+            );
+            this.updateKey(key.hash, { lastChecked: Date.now() });
+            break;
+          default:
+            this.log.error(
+              { key: key.hash, rateLimitType: data.error.type, error: data },
+              "Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
+            );
+            // We don't know what this error means, so we just let the key
+            // through and maybe it will fail when someone tries to use it.
+            this.updateKey(key.hash, { lastChecked: Date.now() });
+        }
       } else {
         this.log.error(
           { key: key.hash, status, error: data },
-          "Encountered API error while checking key."
+          "Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
         );
+        this.updateKey(key.hash, { lastChecked: Date.now() });
       }
       return;
     }
     this.log.error(
-      { key: key.hash, error },
-      "Network error while checking key; trying again later."
+      { key: key.hash, error: error.message },
+      "Network error while checking key; trying this key again in a minute."
     );
+    const oneMinute = 60 * 1000;
+    const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
+    this.updateKey(key.hash, { lastChecked: next });
   }
 
   /**
-   * Trial key usage reporting is inaccurate, so we need to run an actual
-   * completion to test them for liveness.
+   * Tests whether the key is valid and has quota remaining. The request we send
+   * is actually not valid, but keys which are revoked or out of quota will fail
+   * with a 401 or 429 error instead of the expected 400 Bad Request error.
+   * This lets us avoid test keys without spending any quota.
    */
-  private async assertCanGenerate(key: OpenAIKey): Promise<void> {
-    const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
-    // This will throw an AxiosError if the key is invalid or out of quota.
-    await openai.createChatCompletion({
+  private async testLiveness(key: OpenAIKey): Promise<void> {
+    const payload = {
       model: "gpt-3.5-turbo",
-      messages: [{ role: "user", content: "Hello" }],
-      max_tokens: 1,
-    });
+      max_tokens: -1,
+      messages: [{ role: "user", content: "" }],
+    };
+    const { data } = await axios.post<OpenAIError>(
+      POST_CHAT_COMPLETIONS_URL,
+      payload,
+      {
+        headers: { Authorization: `Bearer ${key.key}` },
+        validateStatus: (status) => status === 400,
+      }
+    );
+    if (data.error.type === "invalid_request_error") {
+      // This is the expected error type for our bad prompt, so key is valid.
+      return;
+    } else {
+      this.log.warn(
+        { key: key.hash, error: data },
+        "Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
+      );
+    }
   }
 
-  static getUsageQuerystring(isTrial: boolean) {
-    // For paid keys, the limit resets every month, so we can use the first day
-    // of the current month.
-    // For trial keys, the limit does not reset and we don't know when the key
-    // was created, so we use 99 days ago because that's as far back as the API
-    // will let us go.
-
-    // End date needs to be set to the beginning of the next day so that we get
-    // usage for the current day.
-
-    const today = new Date();
-    const startDate = isTrial
-      ? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
-      : new Date(today.getFullYear(), today.getMonth(), 1);
-    const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
-    return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
-      endDate.toISOString().split("T")[0]
-    }`;
-  }
-
-  static errorIsOpenAiError(
+  static errorIsOpenAIError(
     error: AxiosError
   ): error is AxiosError<OpenAIError> {
     const data = error.response?.data as any;
diff --git a/src/key-management/openai/provider.ts b/src/key-management/openai/provider.ts
index 712a4cc..082b642 100644
--- a/src/key-management/openai/provider.ts
+++ b/src/key-management/openai/provider.ts
@@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
 
 export interface OpenAIKey extends Key {
   readonly service: "openai";
-  /** The current usage of this key. */
-  usage: number;
+  /** Set when key check returns a 401. */
+  isRevoked: boolean;
+  /** Set when key check returns a non-transient 429. */
+  isOverQuota: boolean;
   /** Threshold at which a warning email will be sent by OpenAI. */
   softLimit: number;
   /** Threshold at which the key will be disabled because it has reached the user-defined limit. */
@@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {
 
 export type OpenAIKeyUpdate = Omit<
   Partial<OpenAIKey>,
-  "key" | "hash" | "lastUsed" | "lastChecked" | "promptCount"
+  "key" | "hash" | "promptCount"
 >;
 
 export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
@@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
         isGpt4: true,
         isTrial: false,
         isDisabled: false,
+        isRevoked: false,
+        isOverQuota: false,
         softLimit: 0,
         hardLimit: 0,
         systemHardLimit: 0,
@@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
   /** Called by the key checker to update key information. */
   public update(keyHash: string, update: OpenAIKeyUpdate) {
     const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
-    Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
+    Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
     // this.writeKeyStatus();
   }
 
@@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
     const keyFromPool = this.keys.find((k) => k.key === key.key);
     if (!keyFromPool || keyFromPool.isDisabled) return;
     keyFromPool.isDisabled = true;
-    // If it's disabled just set the usage to the hard limit so it doesn't
-    // mess with the aggregate usage.
-    keyFromPool.usage = keyFromPool.hardLimit;
     this.log.warn({ key: key.hash }, "Key disabled");
   }
 
@@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
   }
 
   /**
-   * Returns the remaining aggregate quota for all keys as a percentage.
-   * Can go slightly negative because keys will typically go slightly over their
-   * limit before being disabled.  Can sometimes go *really* negative if the
-   * cron job OpenAI uses to disable keys fails, as the key will essentially
-   * have unlimited quota.
-   **/
-  public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number {
-    const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
-    if (keys.length === 0) return 0;
-
-    const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
+   * Returns the total quota limit of all keys in USD. Keys which are disabled
+   * are not included in the total.
+   */
+  public activeLimitInUsd(
+    { gpt4 }: { gpt4: boolean } = { gpt4: false }
+  ): string {
+    const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
     const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
-
-    return 1 - totalUsage / totalLimit;
-  }
-
-  /** Returns used and available usage in USD. */
-  public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
-    const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
-    if (keys.length === 0) return "???";
-
-    const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
-    const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
-
-    return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
+    return `$${totalLimit.toFixed(2)}`;
   }
 
   /** Writes key status to disk. */
diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts
index 4f02a3c..6c9ca7f 100644
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
     }
   } else if (statusCode === 401) {
     // Key is invalid or was revoked
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
     errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
   } else if (statusCode === 429) {
     // OpenAI uses this for a bunch of different rate-limiting scenarios.
@@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
   const type = errorPayload.error?.type;
   if (type === "insufficient_quota") {
     // Billing quota exceeded (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "quota");
     errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
   } else if (type === "access_terminated") {
     // Account banned (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
     errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
   } else if (type === "billing_not_active") {
     // Billing is not active (key is dead, disable it)
-    keyPool.disable(req.key!);
+    keyPool.disable(req.key!, "revoked");
     errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
   } else if (type === "requests" || type === "tokens") {
     // Per-minute request or token rate limit is exceeded, which we can retry