handles AWS HTTP 503 ServiceUnavailableException similarly to 429s
This commit is contained in:
parent
1d5b8efa23
commit
ffcaa23511
|
@ -212,11 +212,16 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
delete errorPayload.message;
|
delete errorPayload.message;
|
||||||
} else if (service === "gcp") {
|
} else if (service === "gcp") {
|
||||||
// Try to standardize the error format for GCP
|
// Try to standardize the error format for GCP
|
||||||
if (errorPayload.error?.code) { // GCP Error
|
if (errorPayload.error?.code) {
|
||||||
errorPayload.error = { message: errorPayload.error.message, type: errorPayload.error.status || errorPayload.error.code };
|
// GCP Error
|
||||||
|
errorPayload.error = {
|
||||||
|
message: errorPayload.error.message,
|
||||||
|
type: errorPayload.error.status || errorPayload.error.code,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: this shitshow should be switched on the service, not the error code
|
||||||
if (statusCode === 400) {
|
if (statusCode === 400) {
|
||||||
switch (service) {
|
switch (service) {
|
||||||
case "openai":
|
case "openai":
|
||||||
|
@ -358,6 +363,18 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
default:
|
default:
|
||||||
assertNever(service);
|
assertNever(service);
|
||||||
}
|
}
|
||||||
|
} else if (statusCode === 503) {
|
||||||
|
switch (service) {
|
||||||
|
case "aws":
|
||||||
|
if (
|
||||||
|
errorPayload.error?.type === "ServiceUnavailableException" &&
|
||||||
|
errorPayload.error?.message?.match(/too many connections/i)
|
||||||
|
) {
|
||||||
|
// This is effectively a 429 rate limit error under another name.
|
||||||
|
await handleAwsRateLimitError(req, errorPayload);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
errorPayload.proxy_note = `Unrecognized error from upstream service.`;
|
errorPayload.proxy_note = `Unrecognized error from upstream service.`;
|
||||||
}
|
}
|
||||||
|
@ -446,6 +463,7 @@ async function handleAwsRateLimitError(
|
||||||
) {
|
) {
|
||||||
const errorType = errorPayload.error?.type;
|
const errorType = errorPayload.error?.type;
|
||||||
switch (errorType) {
|
switch (errorType) {
|
||||||
|
case "ServiceUnavailableException":
|
||||||
case "ThrottlingException":
|
case "ThrottlingException":
|
||||||
keyPool.markRateLimited(req.key!);
|
keyPool.markRateLimited(req.key!);
|
||||||
await reenqueueRequest(req);
|
await reenqueueRequest(req);
|
||||||
|
|
|
@ -29,7 +29,7 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage {
|
||||||
* Upon being rate limited, a key will be locked out for this many milliseconds
|
* Upon being rate limited, a key will be locked out for this many milliseconds
|
||||||
* while we wait for other concurrent requests to finish.
|
* while we wait for other concurrent requests to finish.
|
||||||
*/
|
*/
|
||||||
const RATE_LIMIT_LOCKOUT = 4000;
|
const RATE_LIMIT_LOCKOUT = 5000;
|
||||||
/**
|
/**
|
||||||
* Upon assigning a key, we will wait this many milliseconds before allowing it
|
* Upon assigning a key, we will wait this many milliseconds before allowing it
|
||||||
* to be used again. This is to prevent the queue from flooding a key with too
|
* to be used again. This is to prevent the queue from flooding a key with too
|
||||||
|
|
Loading…
Reference in New Issue