handles AWS HTTP 503 ServiceUnavailableException similarly to 429s

This commit is contained in:
nai-degen 2024-09-09 08:07:08 -05:00
parent 1d5b8efa23
commit ffcaa23511
2 changed files with 23 additions and 5 deletions

View File

@ -212,11 +212,16 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
delete errorPayload.message; delete errorPayload.message;
} else if (service === "gcp") { } else if (service === "gcp") {
// Try to standardize the error format for GCP // Try to standardize the error format for GCP
if (errorPayload.error?.code) { // GCP Error if (errorPayload.error?.code) {
errorPayload.error = { message: errorPayload.error.message, type: errorPayload.error.status || errorPayload.error.code }; // GCP Error
errorPayload.error = {
message: errorPayload.error.message,
type: errorPayload.error.status || errorPayload.error.code,
};
} }
} }
// TODO: this shitshow should be switched on the service, not the error code
if (statusCode === 400) { if (statusCode === 400) {
switch (service) { switch (service) {
case "openai": case "openai":
@ -358,6 +363,18 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
default: default:
assertNever(service); assertNever(service);
} }
} else if (statusCode === 503) {
switch (service) {
case "aws":
if (
errorPayload.error?.type === "ServiceUnavailableException" &&
errorPayload.error?.message?.match(/too many connections/i)
) {
// This is effectively a 429 rate limit error under another name.
await handleAwsRateLimitError(req, errorPayload);
}
break;
}
} else { } else {
errorPayload.proxy_note = `Unrecognized error from upstream service.`; errorPayload.proxy_note = `Unrecognized error from upstream service.`;
} }
@ -446,6 +463,7 @@ async function handleAwsRateLimitError(
) { ) {
const errorType = errorPayload.error?.type; const errorType = errorPayload.error?.type;
switch (errorType) { switch (errorType) {
case "ServiceUnavailableException":
case "ThrottlingException": case "ThrottlingException":
keyPool.markRateLimited(req.key!); keyPool.markRateLimited(req.key!);
await reenqueueRequest(req); await reenqueueRequest(req);

View File

@ -29,7 +29,7 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage {
* Upon being rate limited, a key will be locked out for this many milliseconds * Upon being rate limited, a key will be locked out for this many milliseconds
* while we wait for other concurrent requests to finish. * while we wait for other concurrent requests to finish.
*/ */
const RATE_LIMIT_LOCKOUT = 4000; const RATE_LIMIT_LOCKOUT = 5000;
/** /**
* Upon assigning a key, we will wait this many milliseconds before allowing it * Upon assigning a key, we will wait this many milliseconds before allowing it
* to be used again. This is to prevent the queue from flooding a key with too * to be used again. This is to prevent the queue from flooding a key with too