From b6f8f15a1fa009daa8e5cabbb316c9303c057246 Mon Sep 17 00:00:00 2001
From: nai-degen <khoners@protonmail.com>
Date: Mon, 6 Nov 2023 21:16:36 -0600
Subject: [PATCH] tries to prevent per-day rate limited keys from bricking the
 queue

---
 src/proxy/middleware/response/index.ts       |  8 ++-
 src/shared/key-management/openai/provider.ts | 76 ++++++++++++--------
 2 files changed, 54 insertions(+), 30 deletions(-)
diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts
index 1f1c7b8..b398975 100644
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@@ -469,8 +469,14 @@ function handleOpenAIRateLimitError(
       break;
     case "requests":
     case "tokens":
-      // Per-minute request or token rate limit is exceeded, which we can retry
       keyPool.markRateLimited(req.key!);
+      if (errorPayload.error?.message?.match(/on requests per day/)) {
+        // This key has a very low rate limit, so we can't re-enqueue it.
+        errorPayload.proxy_note = `Assigned key has reached its per-day request limit for this model. Try another model.`;
+        break;
+      }
+
+      // Per-minute request or token rate limit is exceeded, which we can retry
       reenqueueRequest(req);
       throw new RetryableError("Rate-limited request re-enqueued.");
     default:
diff --git a/src/shared/key-management/openai/provider.ts b/src/shared/key-management/openai/provider.ts
index e0db07d..301f19a 100644
--- a/src/shared/key-management/openai/provider.ts
+++ b/src/shared/key-management/openai/provider.ts
@@ -65,6 +65,10 @@ export interface OpenAIKey extends Key, OpenAIKeyUsage {
    * tokens.
    */
   rateLimitTokensReset: number;
+  /**
+   * This key's maximum request rate for GPT-4, per minute.
+   */
+  gpt4Rpm: number;
 }
 
 export type OpenAIKeyUpdate = Omit<
@@ -123,6 +127,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
         gpt4Tokens: 0,
         "gpt4-32kTokens": 0,
         "gpt4-turboTokens": 0,
+        gpt4Rpm: 0,
       };
       this.keys.push(newKey);
     }
@@ -301,7 +306,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
         key.rateLimitRequestsReset,
         key.rateLimitTokensReset
       );
-      return now < key.rateLimitedAt + resetTime;
+      return now < key.rateLimitedAt + Math.min(10000, resetTime)
     }).length;
     const anyNotRateLimited = rateLimitedKeys < activeKeys.length;
 
@@ -310,14 +315,16 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
     }
 
     // If all keys are rate-limited, return the time until the first key is
-    // ready.
+    // ready. We don't want to wait longer than 10 seconds because rate limits
+    // are a rolling window and keys may become available sooner than the stated
+    // reset time.
     return Math.min(
       ...activeKeys.map((key) => {
         const resetTime = Math.max(
           key.rateLimitRequestsReset,
           key.rateLimitTokensReset
         );
-        return key.rateLimitedAt + resetTime - now;
+        return key.rateLimitedAt + Math.min(10000, resetTime) - now;
       })
     );
   }
@@ -340,30 +347,16 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
     const requestsReset = headers["x-ratelimit-reset-requests"];
     const tokensReset = headers["x-ratelimit-reset-tokens"];
 
-    // Sometimes OpenAI only sends one of the two rate limit headers, it's
-    // unclear why.
-
-    if (requestsReset && typeof requestsReset === "string") {
-      this.log.debug(
-        { key: key.hash, requestsReset },
-        `Updating rate limit requests reset time`
-      );
+    if (typeof requestsReset === "string") {
       key.rateLimitRequestsReset = getResetDurationMillis(requestsReset);
     }
 
-    if (tokensReset && typeof tokensReset === "string") {
-      this.log.debug(
-        { key: key.hash, tokensReset },
-        `Updating rate limit tokens reset time`
-      );
+    if (typeof tokensReset === "string") {
       key.rateLimitTokensReset = getResetDurationMillis(tokensReset);
     }
 
     if (!requestsReset && !tokensReset) {
-      this.log.warn(
-        { key: key.hash },
-        `No rate limit headers in OpenAI response; skipping update`
-      );
+      this.log.warn({ key: key.hash }, `No ratelimit headers; skipping update`);
       return;
     }
   }
@@ -402,19 +395,44 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
   }
 }
 
+// wip
+function calculateRequestsPerMinute(headers: http.IncomingHttpHeaders) {
+  const requestsLimit = headers["x-ratelimit-limit-requests"];
+  const requestsReset = headers["x-ratelimit-reset-requests"];
+
+  if (typeof requestsLimit !== "string" || typeof requestsReset !== "string") {
+    return 0;
+  }
+
+  const limit = parseInt(requestsLimit, 10);
+  const reset = getResetDurationMillis(requestsReset);
+
+  // If `reset` is less than one minute, OpenAI specifies the `limit` as an
+  // integer representing requests per minute.  Otherwise it actually means the
+  // requests per day.
+  const isPerMinute = reset < 60000;
+  if (isPerMinute) return limit;
+  return limit / 1440;
+}
+
 /**
- * Converts reset string ("21.0032s" or "21ms") to a number of milliseconds.
- * Result is clamped to 10s even though the API returns up to 60s, because the
- * API returns the time until the entire quota is reset, even if a key may be
- * able to fulfill requests before then due to partial resets.
+ * Converts reset string ("14m25s", "21.0032s", "14ms" or "21ms") to a number of
+ * milliseconds.
  **/
 function getResetDurationMillis(resetDuration?: string): number {
-  const match = resetDuration?.match(/(\d+(\.\d+)?)(s|ms)/);
+  const match = resetDuration?.match(
+    /(?:(\d+)m(?!s))?(?:(\d+(?:\.\d+)?)s)?(?:(\d+)ms)?/
+  );
+
   if (match) {
-    const [, time, , unit] = match;
-    const value = parseFloat(time);
-    const result = unit === "s" ? value * 1000 : value;
-    return Math.min(result, 10000);
+    const [, minutes, seconds, milliseconds] = match.map(Number);
+
+    const minutesToMillis = (minutes || 0) * 60 * 1000;
+    const secondsToMillis = (seconds || 0) * 1000;
+    const millisecondsValue = milliseconds || 0;
+
+    return minutesToMillis + secondsToMillis + millisecondsValue;
   }
+
   return 0;
 }