From 55f7337ea419011b47de90b829602f7c8ff5e3b7 Mon Sep 17 00:00:00 2001 From: nai-degen Date: Wed, 23 Oct 2024 15:35:32 -0500 Subject: [PATCH] adjusts AWS keychecker to treat rate limited models as available models --- src/shared/key-management/aws/checker.ts | 31 +++++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/shared/key-management/aws/checker.ts b/src/shared/key-management/aws/checker.ts index 2bdca91..1ce0950 100644 --- a/src/shared/key-management/aws/checker.ts +++ b/src/shared/key-management/aws/checker.ts @@ -108,6 +108,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference- } // Perform checks for all parent model IDs + // TODO: use allsettled const results = await Promise.all( KNOWN_MODEL_IDS.filter(([model]) => // Skip checks for models that are disabled anyway @@ -181,9 +182,9 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference- // not necessarily disabled. Retry in 10 seconds. this.log.warn( { key: key.hash, errorType, error: error.response.data }, - "Key is rate limited. Rechecking in 10 seconds." + "Key is rate limited. Rechecking in 30 seconds." ); - const next = Date.now() - (KEY_CHECK_PERIOD - 10 * 1000); + const next = Date.now() - (KEY_CHECK_PERIOD - 30 * 1000); return this.updateKey(key.hash, { lastChecked: next }); case "ValidationException": default: @@ -239,7 +240,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference- } catch (e) { this.log.error( { key: key.hash, model, profile, error: e.message }, - "Error testing model with inference profile; trying model ID directly." + "InvokeModel via inference profile returned an error; trying model ID directly." ); result = false; } @@ -249,6 +250,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference- // profile will be used when the key is used for inference. if (result) return true; } + this.log.debug({ key: key.hash, model }, "Testing model via model ID."); return this.testClaudeModel(key, model); } else if (model.includes("mistral")) { return this.testMistralModel(key, model); @@ -274,7 +276,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference- method: "POST", url: POST_INVOKE_MODEL_URL(creds.region, model), data: payload, - validateStatus: (status) => [400, 403, 404, 503].includes(status), + validateStatus: (status) => [400, 403, 404, 429, 503].includes(status), }; config.headers = new AxiosHeaders({ "content-type": "application/json", @@ -298,6 +300,27 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference- return true; } + // 429 ThrottlingException can suggest the model is available but the key + // is being rate limited. I think if a key does not have access to the + // model, it cannot receive a 429 response, so this should be a success. + if (status === 429) { + if (errorType.match(/ThrosttlingException/i)) { + this.log.debug( + { key: key.hash, model, errorType, data, status, headers }, + "Model is available but key is rate limited." + ); + return true; + } else { + throw new AxiosError( + `InvokeModel returned 429 of type ${errorType}`, + `AWS_INVOKE_MODEL_RATE_LIMITED`, + response.config, + response.request, + response + ); + } + } + // This message indicates the key is valid but this particular model is not // accessible. Other 403s may indicate the key is not usable. if (