adjusts AWS keychecker to treat rate limited models as available models
This commit is contained in:
parent
f3b876887e
commit
55f7337ea4
|
@ -108,6 +108,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
|
||||||
}
|
}
|
||||||
|
|
||||||
// Perform checks for all parent model IDs
|
// Perform checks for all parent model IDs
|
||||||
|
// TODO: use allsettled
|
||||||
const results = await Promise.all(
|
const results = await Promise.all(
|
||||||
KNOWN_MODEL_IDS.filter(([model]) =>
|
KNOWN_MODEL_IDS.filter(([model]) =>
|
||||||
// Skip checks for models that are disabled anyway
|
// Skip checks for models that are disabled anyway
|
||||||
|
@ -181,9 +182,9 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
|
||||||
// not necessarily disabled. Retry in 10 seconds.
|
// not necessarily disabled. Retry in 10 seconds.
|
||||||
this.log.warn(
|
this.log.warn(
|
||||||
{ key: key.hash, errorType, error: error.response.data },
|
{ key: key.hash, errorType, error: error.response.data },
|
||||||
"Key is rate limited. Rechecking in 10 seconds."
|
"Key is rate limited. Rechecking in 30 seconds."
|
||||||
);
|
);
|
||||||
const next = Date.now() - (KEY_CHECK_PERIOD - 10 * 1000);
|
const next = Date.now() - (KEY_CHECK_PERIOD - 30 * 1000);
|
||||||
return this.updateKey(key.hash, { lastChecked: next });
|
return this.updateKey(key.hash, { lastChecked: next });
|
||||||
case "ValidationException":
|
case "ValidationException":
|
||||||
default:
|
default:
|
||||||
|
@ -239,7 +240,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.log.error(
|
this.log.error(
|
||||||
{ key: key.hash, model, profile, error: e.message },
|
{ key: key.hash, model, profile, error: e.message },
|
||||||
"Error testing model with inference profile; trying model ID directly."
|
"InvokeModel via inference profile returned an error; trying model ID directly."
|
||||||
);
|
);
|
||||||
result = false;
|
result = false;
|
||||||
}
|
}
|
||||||
|
@ -249,6 +250,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
|
||||||
// profile will be used when the key is used for inference.
|
// profile will be used when the key is used for inference.
|
||||||
if (result) return true;
|
if (result) return true;
|
||||||
}
|
}
|
||||||
|
this.log.debug({ key: key.hash, model }, "Testing model via model ID.");
|
||||||
return this.testClaudeModel(key, model);
|
return this.testClaudeModel(key, model);
|
||||||
} else if (model.includes("mistral")) {
|
} else if (model.includes("mistral")) {
|
||||||
return this.testMistralModel(key, model);
|
return this.testMistralModel(key, model);
|
||||||
|
@ -274,7 +276,7 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
|
||||||
method: "POST",
|
method: "POST",
|
||||||
url: POST_INVOKE_MODEL_URL(creds.region, model),
|
url: POST_INVOKE_MODEL_URL(creds.region, model),
|
||||||
data: payload,
|
data: payload,
|
||||||
validateStatus: (status) => [400, 403, 404, 503].includes(status),
|
validateStatus: (status) => [400, 403, 404, 429, 503].includes(status),
|
||||||
};
|
};
|
||||||
config.headers = new AxiosHeaders({
|
config.headers = new AxiosHeaders({
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
|
@ -298,6 +300,27 @@ See https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference-
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 429 ThrottlingException can suggest the model is available but the key
|
||||||
|
// is being rate limited. I think if a key does not have access to the
|
||||||
|
// model, it cannot receive a 429 response, so this should be a success.
|
||||||
|
if (status === 429) {
|
||||||
|
if (errorType.match(/ThrosttlingException/i)) {
|
||||||
|
this.log.debug(
|
||||||
|
{ key: key.hash, model, errorType, data, status, headers },
|
||||||
|
"Model is available but key is rate limited."
|
||||||
|
);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
throw new AxiosError(
|
||||||
|
`InvokeModel returned 429 of type ${errorType}`,
|
||||||
|
`AWS_INVOKE_MODEL_RATE_LIMITED`,
|
||||||
|
response.config,
|
||||||
|
response.request,
|
||||||
|
response
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// This message indicates the key is valid but this particular model is not
|
// This message indicates the key is valid but this particular model is not
|
||||||
// accessible. Other 403s may indicate the key is not usable.
|
// accessible. Other 403s may indicate the key is not usable.
|
||||||
if (
|
if (
|
||||||
|
|
Loading…
Reference in New Issue