From 55f7337ea419011b47de90b829602f7c8ff5e3b7 Mon Sep 17 00:00:00 2001
From: nai-degen <khoners@protonmail.com>
Date: Wed, 23 Oct 2024 15:35:32 -0500
Subject: [PATCH] adjusts AWS keychecker to treat rate limited models as
 available models

---
 src/shared/key-management/aws/checker.ts | 31 +++++++++++++++++++++---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/shared/key-management/aws/checker.ts b/src/shared/key-management/aws/checker.ts
index 2bdca91..1ce0950 100644
--- a/src/shared/key-management/aws/checker.ts
+++ b/src/shared/key-management/aws/checker.ts
@@ -108,6 +108,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
     }
 
     // Perform checks for all parent model IDs
+    // TODO: use allsettled
     const results = await Promise.all(
       KNOWN_MODEL_IDS.filter(([model]) =>
         // Skip checks for models that are disabled anyway
@@ -181,9 +182,9 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
           // not necessarily disabled. Retry in 10 seconds.
           this.log.warn(
             { key: key.hash, errorType, error: error.response.data },
-            "Key is rate limited. Rechecking in 10 seconds."
+            "Key is rate limited. Rechecking in 30 seconds."
           );
-          const next = Date.now() - (KEY_CHECK_PERIOD - 10 * 1000);
+          const next = Date.now() - (KEY_CHECK_PERIOD - 30 * 1000);
           return this.updateKey(key.hash, { lastChecked: next });
         case "ValidationException":
         default:
@@ -239,7 +240,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
         } catch (e) {
           this.log.error(
             { key: key.hash, model, profile, error: e.message },
-            "Error testing model with inference profile; trying model ID directly."
+            "InvokeModel via inference profile returned an error; trying model ID directly."
           );
           result = false;
         }
@@ -249,6 +250,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
         // profile will be used when the key is used for inference.
         if (result) return true;
       }
+      this.log.debug({ key: key.hash, model }, "Testing model via model ID.");
       return this.testClaudeModel(key, model);
     } else if (model.includes("mistral")) {
       return this.testMistralModel(key, model);
@@ -274,7 +276,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
       method: "POST",
       url: POST_INVOKE_MODEL_URL(creds.region, model),
       data: payload,
-      validateStatus: (status) => [400, 403, 404, 503].includes(status),
+      validateStatus: (status) => [400, 403, 404, 429, 503].includes(status),
     };
     config.headers = new AxiosHeaders({
       "content-type": "application/json",
@@ -298,6 +300,27 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
       return true;
     }
 
+    // 429 ThrottlingException can suggest the model is available but the key
+    // is being rate limited. I think if a key does not have access to the
+    // model, it cannot receive a 429 response, so this should be a success.
+    if (status === 429) {
+      if (errorType.match(/ThrosttlingException/i)) {
+        this.log.debug(
+          { key: key.hash, model, errorType, data, status, headers },
+          "Model is available but key is rate limited."
+        );
+        return true;
+      } else {
+        throw new AxiosError(
+          `InvokeModel returned 429 of type ${errorType}`,
+          `AWS_INVOKE_MODEL_RATE_LIMITED`,
+          response.config,
+          response.request,
+          response
+        );
+      }
+    }
+
     // This message indicates the key is valid but this particular model is not
     // accessible. Other 403s may indicate the key is not usable.
     if (