handles AWS HTTP 503 ServiceUnavailableException similarly to 429s

2024-09-09 08:07:08 -05:00 · 2024-09-09 08:07:08 -05:00 · ffcaa23511
parent 1d5b8efa23
commit ffcaa23511
2 changed files with 23 additions and 5 deletions
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@ -212,11 +212,16 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    delete errorPayload.message;
  } else if (service === "gcp") {
    // Try to standardize the error format for GCP
-    if (errorPayload.error?.code) { // GCP Error
+    if (errorPayload.error?.code) {
-      errorPayload.error = { message: errorPayload.error.message, type: errorPayload.error.status || errorPayload.error.code };
+      // GCP Error
      errorPayload.error = {
        message: errorPayload.error.message,
        type: errorPayload.error.status || errorPayload.error.code,
      };
    }
  }
  // TODO: this shitshow should be switched on the service, not the error code
  if (statusCode === 400) {
    switch (service) {
      case "openai":
@ -358,6 +363,18 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
      default:
        assertNever(service);
    }
  } else if (statusCode === 503) {
    switch (service) {
      case "aws":
        if (
          errorPayload.error?.type === "ServiceUnavailableException" &&
          errorPayload.error?.message?.match(/too many connections/i)
        ) {
          // This is effectively a 429 rate limit error under another name.
          await handleAwsRateLimitError(req, errorPayload);
        }
        break;
    }
  } else {
    errorPayload.proxy_note = `Unrecognized error from upstream service.`;
  }
@ -446,6 +463,7 @@ async function handleAwsRateLimitError(
 ) {
  const errorType = errorPayload.error?.type;
  switch (errorType) {
    case "ServiceUnavailableException":
    case "ThrottlingException":
      keyPool.markRateLimited(req.key!);
      await reenqueueRequest(req);
--- a/src/shared/key-management/aws/provider.ts
+++ b/src/shared/key-management/aws/provider.ts
@ -29,7 +29,7 @@ export interface AwsBedrockKey extends Key, AwsBedrockKeyUsage {
 * Upon being rate limited, a key will be locked out for this many milliseconds
 * while we wait for other concurrent requests to finish.
 */
-const RATE_LIMIT_LOCKOUT = 4000;
+const RATE_LIMIT_LOCKOUT = 5000;
 /**
 * Upon assigning a key, we will wait this many milliseconds before allowing it
 * to be used again. This is to prevent the queue from flooding a key with too