From ffcaa23511e301f667aedb201e7ad7eadfde0ba7 Mon Sep 17 00:00:00 2001 From: nai-degen Date: Mon, 9 Sep 2024 08:07:08 -0500 Subject: [PATCH] handles AWS HTTP 503 ServiceUnavailableException similarly to 429s --- src/proxy/middleware/response/index.ts | 26 +++++++++++++++++++---- src/shared/key-management/aws/provider.ts | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index a67e1ea..968477d 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -212,11 +212,16 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( delete errorPayload.message; } else if (service === "gcp") { // Try to standardize the error format for GCP - if (errorPayload.error?.code) { // GCP Error - errorPayload.error = { message: errorPayload.error.message, type: errorPayload.error.status || errorPayload.error.code }; + if (errorPayload.error?.code) { + // GCP Error + errorPayload.error = { + message: errorPayload.error.message, + type: errorPayload.error.status || errorPayload.error.code, + }; } } + // TODO: this shitshow should be switched on the service, not the error code if (statusCode === 400) { switch (service) { case "openai": @@ -293,8 +298,8 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( errorPayload.proxy_note = `Received 403 error. Key may be invalid.`; } return; - case "mistral-ai": - case "gcp": + case "mistral-ai": + case "gcp": keyPool.disable(req.key!, "revoked"); errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`; return; @@ -358,6 +363,18 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( default: assertNever(service); } + } else if (statusCode === 503) { + switch (service) { + case "aws": + if ( + errorPayload.error?.type === "ServiceUnavailableException" && + errorPayload.error?.message?.match(/too many connections/i) + ) { + // This is effectively a 429 rate limit error under another name. + await handleAwsRateLimitError(req, errorPayload); + } + break; + } } else { errorPayload.proxy_note = `Unrecognized error from upstream service.`; } @@ -446,6 +463,7 @@ async function handleAwsRateLimitError( ) { const errorType = errorPayload.error?.type; switch (errorType) { + case "ServiceUnavailableException": case "ThrottlingException": keyPool.markRateLimited(req.key!); await reenqueueRequest(req); diff --git a/src/shared/key-management/aws/provider.ts b/src/shared/key-management/aws/provider.ts index 442eadd..74cd26f 100644 --- a/src/shared/key-management/aws/provider.ts +++ b/src/shared/key-management/aws/provider.ts @@ -29,7 +29,7 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage { * Upon being rate limited, a key will be locked out for this many milliseconds * while we wait for other concurrent requests to finish. */ -const RATE_LIMIT_LOCKOUT = 4000; +const RATE_LIMIT_LOCKOUT = 5000; /** * Upon assigning a key, we will wait this many milliseconds before allowing it * to be used again. This is to prevent the queue from flooding a key with too