From 68d829bcebf4af65a416eae7501b77536f46274a Mon Sep 17 00:00:00 2001 From: nai-degen Date: Sat, 17 Feb 2024 15:56:22 -0600 Subject: [PATCH] adds Claude over-quota detection --- src/proxy/middleware/response/index.ts | 53 +++++++++---------- src/service-info.ts | 14 +++-- .../key-management/anthropic/provider.ts | 3 ++ src/shared/key-management/key-pool.ts | 2 +- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index 3b850bd..4add890 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -310,7 +310,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( break; case "anthropic": case "aws": - await maybeHandleMissingPreambleError(req, errorPayload); + await handleAnthropicBadRequestError(req, errorPayload); break; default: assertNever(service); @@ -411,33 +411,17 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( throw new HttpError(statusCode, errorPayload.error?.message); }; -/** - * This is a workaround for a very strange issue where certain API keys seem to - * enforce more strict input validation than others -- specifically, they will - * require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from - * being used as a generic text completion service and to enforce the use of - * the chat RLHF. This is not documented anywhere, and it's not clear why some - * keys enforce this and others don't. - * This middleware checks for that specific error and marks the key as being - * one that requires the prefix, and then re-enqueues the request. - * The exact error is: - * ``` - * { - * "error": { - * "type": "invalid_request_error", - * "message": "prompt must start with \"\n\nHuman:\" turn" - * } - * } - * ``` - */ -async function maybeHandleMissingPreambleError( +async function handleAnthropicBadRequestError( req: Request, errorPayload: ProxiedErrorPayload ) { - if ( - errorPayload.error?.type === "invalid_request_error" && - errorPayload.error?.message.startsWith('prompt must start with "\n\nHuman:" turn') - ) { + const { error } = errorPayload; + const isMissingPreamble = error?.message.startsWith( + `prompt must start with "\n\nHuman:" turn` + ); + + // Some keys mandate a \n\nHuman: preamble, which we can add and retry + if (isMissingPreamble) { req.log.warn( { key: req.key?.hash }, "Request failed due to missing preamble. Key will be marked as such for subsequent requests." @@ -445,9 +429,22 @@ async function maybeHandleMissingPreambleError( keyPool.update(req.key!, { requiresPreamble: true }); await reenqueueRequest(req); throw new RetryableError("Claude request re-enqueued to add preamble."); - } else { - errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`; } + + // Only affects Anthropic keys + // {"type":"error","error":{"type":"invalid_request_error","message":"Usage blocked until 2024-03-01T00:00:00+00:00 due to user specified spend limits."}} + const isOverQuota = error?.message?.match(/usage blocked until/i); + if (isOverQuota) { + req.log.warn( + { key: req.key?.hash, message: error?.message }, + "Anthropic key has hit spending limit and will be disabled." + ); + keyPool.disable(req.key!, "quota"); + errorPayload.proxy_note = `Assigned key has hit its spending limit. ${error?.message}`; + return; + } + + errorPayload.proxy_note = `Unrecognized 400 Bad Request error from the API.`; } async function handleAnthropicRateLimitError( @@ -459,7 +456,7 @@ async function handleAnthropicRateLimitError( await reenqueueRequest(req); throw new RetryableError("Claude rate-limited request re-enqueued."); } else { - errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`; + errorPayload.proxy_note = `Unrecognized 429 Too Many Requests error from the API.`; } } diff --git a/src/service-info.ts b/src/service-info.ts index 72ec76b..152dc55 100644 --- a/src/service-info.ts +++ b/src/service-info.ts @@ -1,4 +1,3 @@ -/** Calculates and returns stats about the service. */ import { config, listConfig } from "./config"; import { AnthropicKey, @@ -78,7 +77,10 @@ type OpenAIInfo = BaseFamilyInfo & { trialKeys?: number; overQuotaKeys?: number; }; -type AnthropicInfo = BaseFamilyInfo & { prefilledKeys?: number }; +type AnthropicInfo = BaseFamilyInfo & { + prefilledKeys?: number; + overQuotaKeys?: number; +}; type AwsInfo = BaseFamilyInfo & { privacy?: string }; // prettier-ignore @@ -277,7 +279,11 @@ function addKeyToAggregates(k: KeyPoolKey) { increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0); increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0); increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0); - increment(serviceStats, "mistral-ai__keys", k.service === "mistral-ai" ? 1 : 0); + increment( + serviceStats, + "mistral-ai__keys", + k.service === "mistral-ai" ? 1 : 0 + ); increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0); increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0); @@ -321,6 +327,7 @@ function addKeyToAggregates(k: KeyPoolKey) { sumTokens += k.claudeTokens; sumCost += getTokenCostUsd(family, k.claudeTokens); increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1); + increment(modelStats, `${family}__overQuota`, k.isOverQuota ? 1 : 0); increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0); increment(modelStats, `${family}__tokens`, k.claudeTokens); increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0); @@ -404,6 +411,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo { } break; case "anthropic": + info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0; info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0; break; case "aws": diff --git a/src/shared/key-management/anthropic/provider.ts b/src/shared/key-management/anthropic/provider.ts index 80fff68..d1500a8 100644 --- a/src/shared/key-management/anthropic/provider.ts +++ b/src/shared/key-management/anthropic/provider.ts @@ -52,6 +52,7 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage { * outputting copyrighted material, which still interferes with outputs. */ isPozzed: boolean; + isOverQuota: boolean; } /** @@ -89,6 +90,7 @@ export class AnthropicKeyProvider implements KeyProvider { service: this.service, modelFamilies: ["claude"], isDisabled: false, + isOverQuota: false, isRevoked: false, isPozzed: false, promptCount: 0, @@ -219,6 +221,7 @@ export class AnthropicKeyProvider implements KeyProvider { this.keys.forEach((key) => { this.update(key.hash, { isPozzed: false, + isOverQuota: false, isDisabled: false, isRevoked: false, lastChecked: 0, diff --git a/src/shared/key-management/key-pool.ts b/src/shared/key-management/key-pool.ts index a185aa3..dc299c1 100644 --- a/src/shared/key-management/key-pool.ts +++ b/src/shared/key-management/key-pool.ts @@ -59,7 +59,7 @@ export class KeyPool { const service = this.getKeyProvider(key.service); service.disable(key); service.update(key.hash, { isRevoked: reason === "revoked" }); - if (service instanceof OpenAIKeyProvider) { + if (service instanceof OpenAIKeyProvider || service instanceof AnthropicKeyProvider) { service.update(key.hash, { isOverQuota: reason === "quota" }); } }