improves keychecker stability with rate-limited trial keys

2023-08-30 08:33:00 -05:00 · 2023-08-30 08:33:00 -05:00 · e462ad585e
parent 4d781e1720
commit e462ad585e
2 changed files with 60 additions and 31 deletions
--- a/src/key-management/openai/checker.ts
+++ b/src/key-management/openai/checker.ts
@ -139,19 +139,15 @@ export class OpenAIKeyChecker {
        const updates = {
          modelFamilies: provisionedModels,
          isTrial: livenessTest.rateLimit <= 250,
-          softLimit: 0,
-          hardLimit: 0,
-          systemHardLimit: 0,
        };
        this.updateKey(key.hash, updates);
      } else {
-        // Provisioned models don't change, so we don't need to check them again
+        // No updates needed as models and trial status generally don't change.
        const [_livenessTest] = await Promise.all([this.testLiveness(key)]);
-        const updates = { softLimit: 0, hardLimit: 0, systemHardLimit: 0 };
-        this.updateKey(key.hash, updates);
+        this.updateKey(key.hash, {});
      }
      this.log.info(
-        { key: key.hash, models: key.modelFamilies },
+        { key: key.hash, models: key.modelFamilies, trial: key.isTrial },
        "Key check complete."
      );
    } catch (error) {
@ -164,6 +160,10 @@ export class OpenAIKeyChecker {
    // Only enqueue the next check if this wasn't a startup check, since those
    // are batched together elsewhere.
    if (!isInitialCheck) {
+      this.log.info(
+        { key: key.hash },
+        "Recurring keychecks are disabled, no-op."
+      );
      // this.scheduleNextCheck();
    }
  }
@ -192,7 +192,6 @@ export class OpenAIKeyChecker {
    // update its `lastChecked` timestamp because we need to let the liveness
    // check run before we can consider the key checked.

-    // Need to use `find` here because keys are cloned from the pool.
    const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
    this.updateKey(key.hash, {
      modelFamilies: families,
@ -259,21 +258,32 @@ export class OpenAIKeyChecker {
            });
            break;
          case "requests":
-            // Trial keys have extremely low requests-per-minute limits and we
-            // can often hit them just while checking the key, so we need to
-            // retry the check later to know if the key has quota remaining.
-            this.log.warn(
-              { key: key.hash, error: data },
-              "Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
-            );
-            // To trigger a shorter than usual delay before the next check, we
-            // will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
-            // This will cause the usual key check scheduling logic to schedule
-            // the next check in 15 seconds. This also prevents the key from
-            // holding up startup checks for other keys.
-            const fifteenSeconds = 15 * 1000;
-            const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
-            this.updateKey(key.hash, { lastChecked: next });
+            // If we hit the text completion rate limit on a trial key, it is
+            // likely being used by many proxies. We will disable the key since
+            // it's just going to be constantly rate limited.
+            const isTrial =
+              Number(error.response.headers["x-ratelimit-limit-requests"]) <=
+              250;
+
+            if (isTrial) {
+              this.log.warn(
+                { key: key.hash, error: data },
+                "Trial key is rate limited on text completion endpoint. This indicates the key is being used by several proxies at once and is not likely to be usable. Disabling key."
+              );
+              this.updateKey(key.hash, {
+                isTrial,
+                isDisabled: true,
+                isOverQuota: true,
+                modelFamilies: ["turbo"],
+                lastChecked: Date.now(),
+              });
+            } else {
+              this.log.warn(
+                { key: key.hash, error: data },
+                "Non-trial key is rate limited on text completion endpoint. This is unusual and may indicate a bug. Assuming key is operational."
+              );
+              this.updateKey(key.hash, { lastChecked: Date.now() });
+            }
            break;
          case "tokens":
            // Hitting a token rate limit, even on a trial key, actually implies
@ -321,8 +331,26 @@ export class OpenAIKeyChecker {
   * We use the rate limit header to determine whether it's a trial key.
   */
  private async testLiveness(key: OpenAIKey): Promise<{ rateLimit: number }> {
+    // What the hell this is doing:
+
+    // OpenAI enforces separate rate limits for chat and text completions. Trial
+    // keys have extremely low rate limits of 200 per day per API type. In order
+    // to avoid wasting more valuable chat quota, we send an (invalid) chat
+    // request to Babbage (a text completion model). Even though our request is
+    // to the chat endpoint, we get text rate limit headers back because the
+    // requested model determines the rate limit used, not the endpoint.
+
+    // Once we have headers, we can determine:
+    // 1. Is the key revoked? (401, OAI doesn't even validate the request)
+    // 2. Is the key out of quota? (400, OAI will still validate the request)
+    // 3. Is the key a trial key? (400, x-ratelimit-limit-requests: 200)
+
+    // This might still cause issues if too many proxies are running a train on
+    // the same trial key and even the text completion quota is exhausted, but
+    // it should work better than the alternative.
+
    const payload = {
-      model: "gpt-3.5-turbo",
+      model: "babbage-002",
      max_tokens: -1,
      messages: [{ role: "user", content: "" }],
    };
--- a/src/key-management/openai/provider.ts
+++ b/src/key-management/openai/provider.ts
@ -30,12 +30,6 @@ export interface OpenAIKey extends Key {
  isRevoked: boolean;
  /** Set when key check returns a non-transient 429. */
  isOverQuota: boolean;
-  /** Threshold at which a warning email will be sent by OpenAI. */
-  softLimit: number;
-  /** Threshold at which the key will be disabled because it has reached the user-defined limit. */
-  hardLimit: number;
-  /** The maximum quota allocated to this key by OpenAI. */
-  systemHardLimit: number;
  /** The time at which this key was last rate limited. */
  rateLimitedAt: number;
  /**
@ -359,7 +353,14 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
  }

  public recheck() {
-    this.keys.forEach((key) => (key.lastChecked = 0));
+    this.keys.forEach((key) => {
+      this.update(key.hash, {
+        isRevoked: false,
+        isOverQuota: false,
+        isDisabled: false,
+        lastChecked: 0,
+      });
+    });
    this.checker?.scheduleNextCheck();
  }