adds Claude over-quota detection

2024-02-17 15:56:22 -06:00 · 2024-02-17 15:56:22 -06:00 · 68d829bceb
parent 9c03290a3d
commit 68d829bceb
4 changed files with 40 additions and 32 deletions
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@ -310,7 +310,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
        break;
      case "anthropic":
      case "aws":
-        await maybeHandleMissingPreambleError(req, errorPayload);
+        await handleAnthropicBadRequestError(req, errorPayload);
        break;
      default:
        assertNever(service);
@ -411,33 +411,17 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
  throw new HttpError(statusCode, errorPayload.error?.message);
 };

-/**
- * This is a workaround for a very strange issue where certain API keys seem to
- * enforce more strict input validation than others -- specifically, they will
- * require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
- * being used as a generic text completion service and to enforce the use of
- * the chat RLHF.  This is not documented anywhere, and it's not clear why some
- * keys enforce this and others don't.
- * This middleware checks for that specific error and marks the key as being
- * one that requires the prefix, and then re-enqueues the request.
- * The exact error is:
- * ```
- * {
- *   "error": {
- *     "type": "invalid_request_error",
- *     "message": "prompt must start with \"\n\nHuman:\" turn"
- *   }
- * }
- * ```
- */
-async function maybeHandleMissingPreambleError(
+async function handleAnthropicBadRequestError(
  req: Request,
  errorPayload: ProxiedErrorPayload
 ) {
-  if (
-    errorPayload.error?.type === "invalid_request_error" &&
-    errorPayload.error?.message.startsWith('prompt must start with "\n\nHuman:" turn')
-  ) {
+  const { error } = errorPayload;
+  const isMissingPreamble = error?.message.startsWith(
+    `prompt must start with "\n\nHuman:" turn`
+  );
+
+  // Some keys mandate a \n\nHuman: preamble, which we can add and retry
+  if (isMissingPreamble) {
    req.log.warn(
      { key: req.key?.hash },
      "Request failed due to missing preamble. Key will be marked as such for subsequent requests."
@ -445,9 +429,22 @@ async function maybeHandleMissingPreambleError(
    keyPool.update(req.key!, { requiresPreamble: true });
    await reenqueueRequest(req);
    throw new RetryableError("Claude request re-enqueued to add preamble.");
-  } else {
-    errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
  }
+
+  // Only affects Anthropic keys
+  // {"type":"error","error":{"type":"invalid_request_error","message":"Usage blocked until 2024-03-01T00:00:00+00:00 due to user specified spend limits."}}
+  const isOverQuota = error?.message?.match(/usage blocked until/i);
+  if (isOverQuota) {
+    req.log.warn(
+      { key: req.key?.hash, message: error?.message },
+      "Anthropic key has hit spending limit and will be disabled."
+    );
+    keyPool.disable(req.key!, "quota");
+    errorPayload.proxy_note = `Assigned key has hit its spending limit. ${error?.message}`;
+    return;
+  }
+
+  errorPayload.proxy_note = `Unrecognized 400 Bad Request error from the API.`;
 }

 async function handleAnthropicRateLimitError(
@ -459,7 +456,7 @@ async function handleAnthropicRateLimitError(
    await reenqueueRequest(req);
    throw new RetryableError("Claude rate-limited request re-enqueued.");
  } else {
-    errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`;
+    errorPayload.proxy_note = `Unrecognized 429 Too Many Requests error from the API.`;
  }
 }

--- a/src/service-info.ts
+++ b/src/service-info.ts
@ -1,4 +1,3 @@
-/** Calculates and returns stats about the service. */
 import { config, listConfig } from "./config";
 import {
  AnthropicKey,
@ -78,7 +77,10 @@ type OpenAIInfo = BaseFamilyInfo & {
  trialKeys?: number;
  overQuotaKeys?: number;
 };
-type AnthropicInfo = BaseFamilyInfo & { prefilledKeys?: number };
+type AnthropicInfo = BaseFamilyInfo & {
+  prefilledKeys?: number;
+  overQuotaKeys?: number;
+};
 type AwsInfo = BaseFamilyInfo & { privacy?: string };

 // prettier-ignore
@ -277,7 +279,11 @@ function addKeyToAggregates(k: KeyPoolKey) {
  increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0);
  increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0);
  increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0);
-  increment(serviceStats, "mistral-ai__keys", k.service === "mistral-ai" ? 1 : 0);
+  increment(
+    serviceStats,
+    "mistral-ai__keys",
+    k.service === "mistral-ai" ? 1 : 0
+  );
  increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0);
  increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0);

@ -321,6 +327,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
      sumTokens += k.claudeTokens;
      sumCost += getTokenCostUsd(family, k.claudeTokens);
      increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
+      increment(modelStats, `${family}__overQuota`, k.isOverQuota ? 1 : 0);
      increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
      increment(modelStats, `${family}__tokens`, k.claudeTokens);
      increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0);
@ -404,6 +411,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
        }
        break;
      case "anthropic":
+        info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
        info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
        break;
      case "aws":
--- a/src/shared/key-management/anthropic/provider.ts
+++ b/src/shared/key-management/anthropic/provider.ts
@ -52,6 +52,7 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
   * outputting copyrighted material, which still interferes with outputs.
   */
  isPozzed: boolean;
+  isOverQuota: boolean;
 }

 /**
@ -89,6 +90,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
        service: this.service,
        modelFamilies: ["claude"],
        isDisabled: false,
+        isOverQuota: false,
        isRevoked: false,
        isPozzed: false,
        promptCount: 0,
@ -219,6 +221,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    this.keys.forEach((key) => {
      this.update(key.hash, {
        isPozzed: false,
+        isOverQuota: false,
        isDisabled: false,
        isRevoked: false,
        lastChecked: 0,
--- a/src/shared/key-management/key-pool.ts
+++ b/src/shared/key-management/key-pool.ts
@ -59,7 +59,7 @@ export class KeyPool {
    const service = this.getKeyProvider(key.service);
    service.disable(key);
    service.update(key.hash, { isRevoked: reason === "revoked" });
-    if (service instanceof OpenAIKeyProvider) {
+    if (service instanceof OpenAIKeyProvider || service instanceof AnthropicKeyProvider) {
      service.update(key.hash, { isOverQuota: reason === "quota" });
    }
  }