adjusts AWS keychecker to treat rate limited models as available models

This commit is contained in:
nai-degen 2024-10-23 15:35:32 -05:00
parent f3b876887e
commit 55f7337ea4
1 changed files with 27 additions and 4 deletions

View File

@ -108,6 +108,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
} }
// Perform checks for all parent model IDs // Perform checks for all parent model IDs
// TODO: use allsettled
const results = await Promise.all( const results = await Promise.all(
KNOWN_MODEL_IDS.filter(([model]) => KNOWN_MODEL_IDS.filter(([model]) =>
// Skip checks for models that are disabled anyway // Skip checks for models that are disabled anyway
@ -181,9 +182,9 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
// not necessarily disabled. Retry in 10 seconds. // not necessarily disabled. Retry in 10 seconds.
this.log.warn( this.log.warn(
{ key: key.hash, errorType, error: error.response.data }, { key: key.hash, errorType, error: error.response.data },
"Key is rate limited. Rechecking in 10 seconds." "Key is rate limited. Rechecking in 30 seconds."
); );
const next = Date.now() - (KEY_CHECK_PERIOD - 10 * 1000); const next = Date.now() - (KEY_CHECK_PERIOD - 30 * 1000);
return this.updateKey(key.hash, { lastChecked: next }); return this.updateKey(key.hash, { lastChecked: next });
case "ValidationException": case "ValidationException":
default: default:
@ -239,7 +240,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
} catch (e) { } catch (e) {
this.log.error( this.log.error(
{ key: key.hash, model, profile, error: e.message }, { key: key.hash, model, profile, error: e.message },
"Error testing model with inference profile; trying model ID directly." "InvokeModel via inference profile returned an error; trying model ID directly."
); );
result = false; result = false;
} }
@ -249,6 +250,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
// profile will be used when the key is used for inference. // profile will be used when the key is used for inference.
if (result) return true; if (result) return true;
} }
this.log.debug({ key: key.hash, model }, "Testing model via model ID.");
return this.testClaudeModel(key, model); return this.testClaudeModel(key, model);
} else if (model.includes("mistral")) { } else if (model.includes("mistral")) {
return this.testMistralModel(key, model); return this.testMistralModel(key, model);
@ -274,7 +276,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
method: "POST", method: "POST",
url: POST_INVOKE_MODEL_URL(creds.region, model), url: POST_INVOKE_MODEL_URL(creds.region, model),
data: payload, data: payload,
validateStatus: (status) => [400, 403, 404, 503].includes(status), validateStatus: (status) => [400, 403, 404, 429, 503].includes(status),
}; };
config.headers = new AxiosHeaders({ config.headers = new AxiosHeaders({
"content-type": "application/json", "content-type": "application/json",
@ -298,6 +300,27 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
return true; return true;
} }
// 429 ThrottlingException can suggest the model is available but the key
// is being rate limited. I think if a key does not have access to the
// model, it cannot receive a 429 response, so this should be a success.
if (status === 429) {
if (errorType.match(/ThrosttlingException/i)) {
this.log.debug(
{ key: key.hash, model, errorType, data, status, headers },
"Model is available but key is rate limited."
);
return true;
} else {
throw new AxiosError(
`InvokeModel returned 429 of type ${errorType}`,
`AWS_INVOKE_MODEL_RATE_LIMITED`,
response.config,
response.request,
response
);
}
}
// This message indicates the key is valid but this particular model is not // This message indicates the key is valid but this particular model is not
// accessible. Other 403s may indicate the key is not usable. // accessible. Other 403s may indicate the key is not usable.
if ( if (