adds Claude over-quota detection

This commit is contained in:
nai-degen 2024-02-17 15:56:22 -06:00
parent 9c03290a3d
commit 68d829bceb
4 changed files with 40 additions and 32 deletions

View File

@ -310,7 +310,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
break; break;
case "anthropic": case "anthropic":
case "aws": case "aws":
await maybeHandleMissingPreambleError(req, errorPayload); await handleAnthropicBadRequestError(req, errorPayload);
break; break;
default: default:
assertNever(service); assertNever(service);
@ -411,33 +411,17 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
throw new HttpError(statusCode, errorPayload.error?.message); throw new HttpError(statusCode, errorPayload.error?.message);
}; };
/** async function handleAnthropicBadRequestError(
* This is a workaround for a very strange issue where certain API keys seem to
* enforce more strict input validation than others -- specifically, they will
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
* being used as a generic text completion service and to enforce the use of
* the chat RLHF. This is not documented anywhere, and it's not clear why some
* keys enforce this and others don't.
* This middleware checks for that specific error and marks the key as being
* one that requires the prefix, and then re-enqueues the request.
* The exact error is:
* ```
* {
* "error": {
* "type": "invalid_request_error",
* "message": "prompt must start with \"\n\nHuman:\" turn"
* }
* }
* ```
*/
async function maybeHandleMissingPreambleError(
req: Request, req: Request,
errorPayload: ProxiedErrorPayload errorPayload: ProxiedErrorPayload
) { ) {
if ( const { error } = errorPayload;
errorPayload.error?.type === "invalid_request_error" && const isMissingPreamble = error?.message.startsWith(
errorPayload.error?.message.startsWith('prompt must start with "\n\nHuman:" turn') `prompt must start with "\n\nHuman:" turn`
) { );
// Some keys mandate a \n\nHuman: preamble, which we can add and retry
if (isMissingPreamble) {
req.log.warn( req.log.warn(
{ key: req.key?.hash }, { key: req.key?.hash },
"Request failed due to missing preamble. Key will be marked as such for subsequent requests." "Request failed due to missing preamble. Key will be marked as such for subsequent requests."
@ -445,9 +429,22 @@ async function maybeHandleMissingPreambleError(
keyPool.update(req.key!, { requiresPreamble: true }); keyPool.update(req.key!, { requiresPreamble: true });
await reenqueueRequest(req); await reenqueueRequest(req);
throw new RetryableError("Claude request re-enqueued to add preamble."); throw new RetryableError("Claude request re-enqueued to add preamble.");
} else {
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
} }
// Only affects Anthropic keys
// {"type":"error","error":{"type":"invalid_request_error","message":"Usage blocked until 2024-03-01T00:00:00+00:00 due to user specified spend limits."}}
const isOverQuota = error?.message?.match(/usage blocked until/i);
if (isOverQuota) {
req.log.warn(
{ key: req.key?.hash, message: error?.message },
"Anthropic key has hit spending limit and will be disabled."
);
keyPool.disable(req.key!, "quota");
errorPayload.proxy_note = `Assigned key has hit its spending limit. ${error?.message}`;
return;
}
errorPayload.proxy_note = `Unrecognized 400 Bad Request error from the API.`;
} }
async function handleAnthropicRateLimitError( async function handleAnthropicRateLimitError(
@ -459,7 +456,7 @@ async function handleAnthropicRateLimitError(
await reenqueueRequest(req); await reenqueueRequest(req);
throw new RetryableError("Claude rate-limited request re-enqueued."); throw new RetryableError("Claude rate-limited request re-enqueued.");
} else { } else {
errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`; errorPayload.proxy_note = `Unrecognized 429 Too Many Requests error from the API.`;
} }
} }

View File

@ -1,4 +1,3 @@
/** Calculates and returns stats about the service. */
import { config, listConfig } from "./config"; import { config, listConfig } from "./config";
import { import {
AnthropicKey, AnthropicKey,
@ -78,7 +77,10 @@ type OpenAIInfo = BaseFamilyInfo & {
trialKeys?: number; trialKeys?: number;
overQuotaKeys?: number; overQuotaKeys?: number;
}; };
type AnthropicInfo = BaseFamilyInfo & { prefilledKeys?: number }; type AnthropicInfo = BaseFamilyInfo & {
prefilledKeys?: number;
overQuotaKeys?: number;
};
type AwsInfo = BaseFamilyInfo & { privacy?: string }; type AwsInfo = BaseFamilyInfo & { privacy?: string };
// prettier-ignore // prettier-ignore
@ -277,7 +279,11 @@ function addKeyToAggregates(k: KeyPoolKey) {
increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0); increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0);
increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0); increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0);
increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0); increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0);
increment(serviceStats, "mistral-ai__keys", k.service === "mistral-ai" ? 1 : 0); increment(
serviceStats,
"mistral-ai__keys",
k.service === "mistral-ai" ? 1 : 0
);
increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0); increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0);
increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0); increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0);
@ -321,6 +327,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
sumTokens += k.claudeTokens; sumTokens += k.claudeTokens;
sumCost += getTokenCostUsd(family, k.claudeTokens); sumCost += getTokenCostUsd(family, k.claudeTokens);
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1); increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${family}__overQuota`, k.isOverQuota ? 1 : 0);
increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0); increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
increment(modelStats, `${family}__tokens`, k.claudeTokens); increment(modelStats, `${family}__tokens`, k.claudeTokens);
increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0); increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0);
@ -404,6 +411,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
} }
break; break;
case "anthropic": case "anthropic":
info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0; info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
break; break;
case "aws": case "aws":

View File

@ -52,6 +52,7 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
* outputting copyrighted material, which still interferes with outputs. * outputting copyrighted material, which still interferes with outputs.
*/ */
isPozzed: boolean; isPozzed: boolean;
isOverQuota: boolean;
} }
/** /**
@ -89,6 +90,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
service: this.service, service: this.service,
modelFamilies: ["claude"], modelFamilies: ["claude"],
isDisabled: false, isDisabled: false,
isOverQuota: false,
isRevoked: false, isRevoked: false,
isPozzed: false, isPozzed: false,
promptCount: 0, promptCount: 0,
@ -219,6 +221,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
this.keys.forEach((key) => { this.keys.forEach((key) => {
this.update(key.hash, { this.update(key.hash, {
isPozzed: false, isPozzed: false,
isOverQuota: false,
isDisabled: false, isDisabled: false,
isRevoked: false, isRevoked: false,
lastChecked: 0, lastChecked: 0,

View File

@ -59,7 +59,7 @@ export class KeyPool {
const service = this.getKeyProvider(key.service); const service = this.getKeyProvider(key.service);
service.disable(key); service.disable(key);
service.update(key.hash, { isRevoked: reason === "revoked" }); service.update(key.hash, { isRevoked: reason === "revoked" });
if (service instanceof OpenAIKeyProvider) { if (service instanceof OpenAIKeyProvider || service instanceof AnthropicKeyProvider) {
service.update(key.hash, { isOverQuota: reason === "quota" }); service.update(key.hash, { isOverQuota: reason === "quota" });
} }
} }