adds Claude over-quota detection
This commit is contained in:
parent
9c03290a3d
commit
68d829bceb
|
@ -310,7 +310,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
break;
|
break;
|
||||||
case "anthropic":
|
case "anthropic":
|
||||||
case "aws":
|
case "aws":
|
||||||
await maybeHandleMissingPreambleError(req, errorPayload);
|
await handleAnthropicBadRequestError(req, errorPayload);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assertNever(service);
|
assertNever(service);
|
||||||
|
@ -411,33 +411,17 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
throw new HttpError(statusCode, errorPayload.error?.message);
|
throw new HttpError(statusCode, errorPayload.error?.message);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
async function handleAnthropicBadRequestError(
|
||||||
* This is a workaround for a very strange issue where certain API keys seem to
|
|
||||||
* enforce more strict input validation than others -- specifically, they will
|
|
||||||
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
|
|
||||||
* being used as a generic text completion service and to enforce the use of
|
|
||||||
* the chat RLHF. This is not documented anywhere, and it's not clear why some
|
|
||||||
* keys enforce this and others don't.
|
|
||||||
* This middleware checks for that specific error and marks the key as being
|
|
||||||
* one that requires the prefix, and then re-enqueues the request.
|
|
||||||
* The exact error is:
|
|
||||||
* ```
|
|
||||||
* {
|
|
||||||
* "error": {
|
|
||||||
* "type": "invalid_request_error",
|
|
||||||
* "message": "prompt must start with \"\n\nHuman:\" turn"
|
|
||||||
* }
|
|
||||||
* }
|
|
||||||
* ```
|
|
||||||
*/
|
|
||||||
async function maybeHandleMissingPreambleError(
|
|
||||||
req: Request,
|
req: Request,
|
||||||
errorPayload: ProxiedErrorPayload
|
errorPayload: ProxiedErrorPayload
|
||||||
) {
|
) {
|
||||||
if (
|
const { error } = errorPayload;
|
||||||
errorPayload.error?.type === "invalid_request_error" &&
|
const isMissingPreamble = error?.message.startsWith(
|
||||||
errorPayload.error?.message.startsWith('prompt must start with "\n\nHuman:" turn')
|
`prompt must start with "\n\nHuman:" turn`
|
||||||
) {
|
);
|
||||||
|
|
||||||
|
// Some keys mandate a \n\nHuman: preamble, which we can add and retry
|
||||||
|
if (isMissingPreamble) {
|
||||||
req.log.warn(
|
req.log.warn(
|
||||||
{ key: req.key?.hash },
|
{ key: req.key?.hash },
|
||||||
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
|
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
|
||||||
|
@ -445,9 +429,22 @@ async function maybeHandleMissingPreambleError(
|
||||||
keyPool.update(req.key!, { requiresPreamble: true });
|
keyPool.update(req.key!, { requiresPreamble: true });
|
||||||
await reenqueueRequest(req);
|
await reenqueueRequest(req);
|
||||||
throw new RetryableError("Claude request re-enqueued to add preamble.");
|
throw new RetryableError("Claude request re-enqueued to add preamble.");
|
||||||
} else {
|
|
||||||
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only affects Anthropic keys
|
||||||
|
// {"type":"error","error":{"type":"invalid_request_error","message":"Usage blocked until 2024-03-01T00:00:00+00:00 due to user specified spend limits."}}
|
||||||
|
const isOverQuota = error?.message?.match(/usage blocked until/i);
|
||||||
|
if (isOverQuota) {
|
||||||
|
req.log.warn(
|
||||||
|
{ key: req.key?.hash, message: error?.message },
|
||||||
|
"Anthropic key has hit spending limit and will be disabled."
|
||||||
|
);
|
||||||
|
keyPool.disable(req.key!, "quota");
|
||||||
|
errorPayload.proxy_note = `Assigned key has hit its spending limit. ${error?.message}`;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
errorPayload.proxy_note = `Unrecognized 400 Bad Request error from the API.`;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function handleAnthropicRateLimitError(
|
async function handleAnthropicRateLimitError(
|
||||||
|
@ -459,7 +456,7 @@ async function handleAnthropicRateLimitError(
|
||||||
await reenqueueRequest(req);
|
await reenqueueRequest(req);
|
||||||
throw new RetryableError("Claude rate-limited request re-enqueued.");
|
throw new RetryableError("Claude rate-limited request re-enqueued.");
|
||||||
} else {
|
} else {
|
||||||
errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`;
|
errorPayload.proxy_note = `Unrecognized 429 Too Many Requests error from the API.`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
/** Calculates and returns stats about the service. */
|
|
||||||
import { config, listConfig } from "./config";
|
import { config, listConfig } from "./config";
|
||||||
import {
|
import {
|
||||||
AnthropicKey,
|
AnthropicKey,
|
||||||
|
@ -78,7 +77,10 @@ type OpenAIInfo = BaseFamilyInfo & {
|
||||||
trialKeys?: number;
|
trialKeys?: number;
|
||||||
overQuotaKeys?: number;
|
overQuotaKeys?: number;
|
||||||
};
|
};
|
||||||
type AnthropicInfo = BaseFamilyInfo & { prefilledKeys?: number };
|
type AnthropicInfo = BaseFamilyInfo & {
|
||||||
|
prefilledKeys?: number;
|
||||||
|
overQuotaKeys?: number;
|
||||||
|
};
|
||||||
type AwsInfo = BaseFamilyInfo & { privacy?: string };
|
type AwsInfo = BaseFamilyInfo & { privacy?: string };
|
||||||
|
|
||||||
// prettier-ignore
|
// prettier-ignore
|
||||||
|
@ -277,7 +279,11 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||||
increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0);
|
increment(serviceStats, "openai__keys", k.service === "openai" ? 1 : 0);
|
||||||
increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0);
|
increment(serviceStats, "anthropic__keys", k.service === "anthropic" ? 1 : 0);
|
||||||
increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0);
|
increment(serviceStats, "google-ai__keys", k.service === "google-ai" ? 1 : 0);
|
||||||
increment(serviceStats, "mistral-ai__keys", k.service === "mistral-ai" ? 1 : 0);
|
increment(
|
||||||
|
serviceStats,
|
||||||
|
"mistral-ai__keys",
|
||||||
|
k.service === "mistral-ai" ? 1 : 0
|
||||||
|
);
|
||||||
increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0);
|
increment(serviceStats, "aws__keys", k.service === "aws" ? 1 : 0);
|
||||||
increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0);
|
increment(serviceStats, "azure__keys", k.service === "azure" ? 1 : 0);
|
||||||
|
|
||||||
|
@ -321,6 +327,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
||||||
sumTokens += k.claudeTokens;
|
sumTokens += k.claudeTokens;
|
||||||
sumCost += getTokenCostUsd(family, k.claudeTokens);
|
sumCost += getTokenCostUsd(family, k.claudeTokens);
|
||||||
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
|
increment(modelStats, `${family}__active`, k.isDisabled ? 0 : 1);
|
||||||
|
increment(modelStats, `${family}__overQuota`, k.isOverQuota ? 1 : 0);
|
||||||
increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
|
increment(modelStats, `${family}__revoked`, k.isRevoked ? 1 : 0);
|
||||||
increment(modelStats, `${family}__tokens`, k.claudeTokens);
|
increment(modelStats, `${family}__tokens`, k.claudeTokens);
|
||||||
increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0);
|
increment(modelStats, `${family}__pozzed`, k.isPozzed ? 1 : 0);
|
||||||
|
@ -404,6 +411,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case "anthropic":
|
case "anthropic":
|
||||||
|
info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
|
||||||
info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
|
info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
|
||||||
break;
|
break;
|
||||||
case "aws":
|
case "aws":
|
||||||
|
|
|
@ -52,6 +52,7 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
|
||||||
* outputting copyrighted material, which still interferes with outputs.
|
* outputting copyrighted material, which still interferes with outputs.
|
||||||
*/
|
*/
|
||||||
isPozzed: boolean;
|
isPozzed: boolean;
|
||||||
|
isOverQuota: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -89,6 +90,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
service: this.service,
|
service: this.service,
|
||||||
modelFamilies: ["claude"],
|
modelFamilies: ["claude"],
|
||||||
isDisabled: false,
|
isDisabled: false,
|
||||||
|
isOverQuota: false,
|
||||||
isRevoked: false,
|
isRevoked: false,
|
||||||
isPozzed: false,
|
isPozzed: false,
|
||||||
promptCount: 0,
|
promptCount: 0,
|
||||||
|
@ -219,6 +221,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
this.keys.forEach((key) => {
|
this.keys.forEach((key) => {
|
||||||
this.update(key.hash, {
|
this.update(key.hash, {
|
||||||
isPozzed: false,
|
isPozzed: false,
|
||||||
|
isOverQuota: false,
|
||||||
isDisabled: false,
|
isDisabled: false,
|
||||||
isRevoked: false,
|
isRevoked: false,
|
||||||
lastChecked: 0,
|
lastChecked: 0,
|
||||||
|
|
|
@ -59,7 +59,7 @@ export class KeyPool {
|
||||||
const service = this.getKeyProvider(key.service);
|
const service = this.getKeyProvider(key.service);
|
||||||
service.disable(key);
|
service.disable(key);
|
||||||
service.update(key.hash, { isRevoked: reason === "revoked" });
|
service.update(key.hash, { isRevoked: reason === "revoked" });
|
||||||
if (service instanceof OpenAIKeyProvider) {
|
if (service instanceof OpenAIKeyProvider || service instanceof AnthropicKeyProvider) {
|
||||||
service.update(key.hash, { isOverQuota: reason === "quota" });
|
service.update(key.hash, { isOverQuota: reason === "quota" });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue