adds Anthropic key tier detection and trial key display
This commit is contained in:
parent
2098948b7a
commit
e068edcf48
|
@ -80,6 +80,7 @@ type OpenAIInfo = BaseFamilyInfo & {
|
|||
overQuotaKeys?: number;
|
||||
};
|
||||
type AnthropicInfo = BaseFamilyInfo & {
|
||||
trialKeys?: number;
|
||||
prefilledKeys?: number;
|
||||
overQuotaKeys?: number;
|
||||
};
|
||||
|
@ -349,6 +350,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
|
|||
sumTokens += tokens;
|
||||
sumCost += getTokenCostUsd(f, tokens);
|
||||
increment(modelStats, `${f}__tokens`, tokens);
|
||||
increment(modelStats, `${f}__trial`, k.tier === "free" ? 1 : 0);
|
||||
increment(modelStats, `${f}__revoked`, k.isRevoked ? 1 : 0);
|
||||
increment(modelStats, `${f}__active`, k.isDisabled ? 0 : 1);
|
||||
increment(modelStats, `${f}__overQuota`, k.isOverQuota ? 1 : 0);
|
||||
|
@ -437,6 +439,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
|
|||
break;
|
||||
case "anthropic":
|
||||
info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
|
||||
info.trialKeys = modelStats.get(`${family}__trial`) || 0;
|
||||
info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
|
||||
break;
|
||||
case "aws":
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import axios, { AxiosError } from "axios";
|
||||
import axios, { AxiosError, AxiosResponse } from "axios";
|
||||
import { KeyCheckerBase } from "../key-checker-base";
|
||||
import type { AnthropicKey, AnthropicKeyProvider } from "./provider";
|
||||
|
||||
|
@ -52,10 +52,13 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
|
|||
}
|
||||
|
||||
protected async testKeyOrFail(key: AnthropicKey) {
|
||||
const [{ pozzed }] = await Promise.all([this.testLiveness(key)]);
|
||||
const updates = { isPozzed: pozzed };
|
||||
const [{ pozzed, tier }] = await Promise.all([this.testLiveness(key)]);
|
||||
const updates = { isPozzed: pozzed, tier };
|
||||
this.updateKey(key.hash, updates);
|
||||
this.log.info({ key: key.hash, models: key.modelFamilies }, "Checked key.");
|
||||
this.log.info(
|
||||
{ key: key.hash, tier, models: key.modelFamilies },
|
||||
"Checked key."
|
||||
);
|
||||
}
|
||||
|
||||
protected handleAxiosError(key: AnthropicKey, error: AxiosError) {
|
||||
|
@ -124,7 +127,9 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
|
|||
this.updateKey(key.hash, { lastChecked: next });
|
||||
}
|
||||
|
||||
private async testLiveness(key: AnthropicKey): Promise<{ pozzed: boolean }> {
|
||||
private async testLiveness(
|
||||
key: AnthropicKey
|
||||
): Promise<{ pozzed: boolean; tier: AnthropicKey["tier"] }> {
|
||||
const payload = {
|
||||
model: TEST_MODEL,
|
||||
max_tokens: 40,
|
||||
|
@ -133,24 +138,27 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
|
|||
system: SYSTEM,
|
||||
messages: DETECTION_PROMPT,
|
||||
};
|
||||
const { data } = await axios.post<MessageResponse>(
|
||||
const { data, headers } = await axios.post<MessageResponse>(
|
||||
POST_MESSAGES_URL,
|
||||
payload,
|
||||
{ headers: AnthropicKeyChecker.getHeaders(key) }
|
||||
{ headers: AnthropicKeyChecker.getRequestHeaders(key) }
|
||||
);
|
||||
this.log.debug({ data }, "Response from Anthropic");
|
||||
|
||||
const tier = AnthropicKeyChecker.detectTier(headers);
|
||||
|
||||
const completion = data.content.map((part) => part.text).join("");
|
||||
if (POZZ_PROMPT.some((re) => re.test(completion))) {
|
||||
this.log.info({ key: key.hash, response: completion }, "Key is pozzed.");
|
||||
return { pozzed: true };
|
||||
return { pozzed: true, tier };
|
||||
} else if (COPYRIGHT_PROMPT.some((re) => re.test(completion))) {
|
||||
this.log.info(
|
||||
{ key: key.hash, response: completion },
|
||||
"Key has copyright CYA prompt."
|
||||
);
|
||||
return { pozzed: true };
|
||||
return { pozzed: true, tier };
|
||||
} else {
|
||||
return { pozzed: false };
|
||||
return { pozzed: false, tier };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -161,7 +169,19 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
|
|||
return data?.error?.type;
|
||||
}
|
||||
|
||||
static getHeaders(key: AnthropicKey) {
|
||||
static getRequestHeaders(key: AnthropicKey) {
|
||||
return { "X-API-Key": key.key, "anthropic-version": "2023-06-01" };
|
||||
}
|
||||
|
||||
static detectTier(headers: AxiosResponse["headers"]) {
|
||||
const tokensLimit = headers["anthropic-ratelimit-tokens-limit"];
|
||||
const intTokensLimit = parseInt(tokensLimit, 10);
|
||||
if (!tokensLimit || isNaN(intTokensLimit)) return "unknown";
|
||||
if (intTokensLimit <= 25000) return "free";
|
||||
if (intTokensLimit <= 50000) return "build_1";
|
||||
if (intTokensLimit <= 100000) return "build_2";
|
||||
if (intTokensLimit <= 200000) return "build_3";
|
||||
if (intTokensLimit <= 400000) return "build_4";
|
||||
return "scale";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import { config } from "../../../config";
|
|||
import { logger } from "../../../logger";
|
||||
import { AnthropicModelFamily, getClaudeModelFamily } from "../../models";
|
||||
import { AnthropicKeyChecker } from "./checker";
|
||||
import { HttpError, PaymentRequiredError } from "../../errors";
|
||||
import { PaymentRequiredError } from "../../errors";
|
||||
|
||||
export type AnthropicKeyUpdate = Omit<
|
||||
Partial<AnthropicKey>,
|
||||
|
@ -45,13 +45,39 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
|
|||
*/
|
||||
isPozzed: boolean;
|
||||
isOverQuota: boolean;
|
||||
/**
|
||||
* Key billing tier (https://docs.anthropic.com/claude/reference/rate-limits)
|
||||
**/
|
||||
tier: typeof TIER_PRIORITY[number];
|
||||
}
|
||||
|
||||
/**
|
||||
* Upon being rate limited, a key will be locked out for this many milliseconds
|
||||
* while we wait for other concurrent requests to finish.
|
||||
* Selection priority for Anthropic keys. Aims to maximize throughput by
|
||||
* saturating concurrency-limited keys first, then trying keys with increasingly
|
||||
* strict rate limits. Free keys have very limited throughput and are used last.
|
||||
*/
|
||||
const RATE_LIMIT_LOCKOUT = 2000;
|
||||
const TIER_PRIORITY = [
|
||||
"unknown",
|
||||
"scale",
|
||||
"build_4",
|
||||
"build_3",
|
||||
"build_2",
|
||||
"build_1",
|
||||
"free",
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Upon being rate limited, a Scale-tier key will be locked out for this many
|
||||
* milliseconds while we wait for other concurrent requests to finish.
|
||||
*/
|
||||
const SCALE_RATE_LIMIT_LOCKOUT = 2000;
|
||||
/**
|
||||
* Upon being rate limited, a Build-tier key will be locked out for this many
|
||||
* milliseconds while we wait for the per-minute rate limit to reset. Because
|
||||
* the reset provided in the headers specifies the time for the full quota to
|
||||
* become available, the key may become available before that time.
|
||||
*/
|
||||
const BUILD_RATE_LIMIT_LOCKOUT = 10000;
|
||||
/**
|
||||
* Upon assigning a key, we will wait this many milliseconds before allowing it
|
||||
* to be used again. This is to prevent the queue from flooding a key with too
|
||||
|
@ -98,6 +124,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
lastChecked: 0,
|
||||
claudeTokens: 0,
|
||||
"claude-opusTokens": 0,
|
||||
tier: "unknown",
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
}
|
||||
|
@ -123,25 +150,27 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
throw new PaymentRequiredError("No Anthropic keys available.");
|
||||
}
|
||||
|
||||
// (largely copied from the OpenAI provider, without trial key support)
|
||||
// Select a key, from highest priority to lowest priority:
|
||||
// 1. Keys which are not rate limited
|
||||
// a. If all keys were rate limited recently, select the least-recently
|
||||
// rate limited key.
|
||||
// 2. Keys which are not pozzed
|
||||
// 3. Keys which have not been used in the longest time
|
||||
// 1. Keys which are not rate limit locked
|
||||
// 2. Keys with the highest tier
|
||||
// 3. Keys which are not pozzed
|
||||
// 4. Keys which have not been used in the longest time
|
||||
|
||||
const now = Date.now();
|
||||
|
||||
const keysByPriority = availableKeys.sort((a, b) => {
|
||||
const aRateLimited = now - a.rateLimitedAt < RATE_LIMIT_LOCKOUT;
|
||||
const bRateLimited = now - b.rateLimitedAt < RATE_LIMIT_LOCKOUT;
|
||||
const aLockoutPeriod = getKeyLockout(a);
|
||||
const bLockoutPeriod = getKeyLockout(b);
|
||||
|
||||
const aRateLimited = now - a.rateLimitedAt < aLockoutPeriod;
|
||||
const bRateLimited = now - b.rateLimitedAt < bLockoutPeriod;
|
||||
|
||||
if (aRateLimited && !bRateLimited) return 1;
|
||||
if (!aRateLimited && bRateLimited) return -1;
|
||||
if (aRateLimited && bRateLimited) {
|
||||
return a.rateLimitedAt - b.rateLimitedAt;
|
||||
}
|
||||
|
||||
const aTierIndex = TIER_PRIORITY.indexOf(a.tier);
|
||||
const bTierIndex = TIER_PRIORITY.indexOf(b.tier);
|
||||
if (aTierIndex > bTierIndex) return -1;
|
||||
|
||||
if (a.isPozzed && !b.isPozzed) return 1;
|
||||
if (!a.isPozzed && b.isPozzed) return -1;
|
||||
|
@ -207,7 +236,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
const key = this.keys.find((k) => k.hash === keyHash)!;
|
||||
const now = Date.now();
|
||||
key.rateLimitedAt = now;
|
||||
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
|
||||
key.rateLimitedUntil = now + SCALE_RATE_LIMIT_LOCKOUT;
|
||||
}
|
||||
|
||||
public recheck() {
|
||||
|
@ -239,3 +268,9 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
|
||||
}
|
||||
}
|
||||
|
||||
function getKeyLockout(key: AnthropicKey) {
|
||||
return ["scale", "unknown"].includes(key.tier)
|
||||
? SCALE_RATE_LIMIT_LOCKOUT
|
||||
: BUILD_RATE_LIMIT_LOCKOUT;
|
||||
}
|
||||
|
|
|
@ -13,15 +13,15 @@ type KeyCheckerOptions<TKey extends Key = Key> = {
|
|||
|
||||
export abstract class KeyCheckerBase<TKey extends Key> {
|
||||
protected readonly service: string;
|
||||
protected readonly RECURRING_CHECKS_ENABLED: boolean;
|
||||
protected readonly recurringChecksEnabled: boolean;
|
||||
/** Minimum time in between any two key checks. */
|
||||
protected readonly MIN_CHECK_INTERVAL: number;
|
||||
protected readonly minCheckInterval: number;
|
||||
/**
|
||||
* Minimum time in between checks for a given key. Because we can no longer
|
||||
* read quota usage, there is little reason to check a single key more often
|
||||
* than this.
|
||||
*/
|
||||
protected readonly KEY_CHECK_PERIOD: number;
|
||||
protected readonly keyCheckPeriod: number;
|
||||
protected readonly updateKey: (hash: string, props: Partial<TKey>) => void;
|
||||
protected readonly keys: TKey[] = [];
|
||||
protected log: pino.Logger;
|
||||
|
@ -29,14 +29,13 @@ export abstract class KeyCheckerBase<TKey extends Key> {
|
|||
protected lastCheck = 0;
|
||||
|
||||
protected constructor(keys: TKey[], opts: KeyCheckerOptions<TKey>) {
|
||||
const { service, keyCheckPeriod, minCheckInterval } = opts;
|
||||
this.keys = keys;
|
||||
this.KEY_CHECK_PERIOD = keyCheckPeriod;
|
||||
this.MIN_CHECK_INTERVAL = minCheckInterval;
|
||||
this.RECURRING_CHECKS_ENABLED = opts.recurringChecksEnabled ?? true;
|
||||
this.keyCheckPeriod = opts.keyCheckPeriod;
|
||||
this.minCheckInterval = opts.minCheckInterval;
|
||||
this.recurringChecksEnabled = opts.recurringChecksEnabled ?? true;
|
||||
this.updateKey = opts.updateKey;
|
||||
this.service = service;
|
||||
this.log = logger.child({ module: "key-checker", service });
|
||||
this.service = opts.service;
|
||||
this.log = logger.child({ module: "key-checker", service: opts.service });
|
||||
}
|
||||
|
||||
public start() {
|
||||
|
@ -102,7 +101,7 @@ export abstract class KeyCheckerBase<TKey extends Key> {
|
|||
return;
|
||||
}
|
||||
|
||||
if (!this.RECURRING_CHECKS_ENABLED) {
|
||||
if (!this.recurringChecksEnabled) {
|
||||
checkLog.info(
|
||||
"Initial checks complete and recurring checks are disabled for this service. Stopping."
|
||||
);
|
||||
|
@ -117,8 +116,8 @@ export abstract class KeyCheckerBase<TKey extends Key> {
|
|||
// Don't check any individual key too often.
|
||||
// Don't check anything at all at a rate faster than once per 3 seconds.
|
||||
const nextCheck = Math.max(
|
||||
oldestKey.lastChecked + this.KEY_CHECK_PERIOD,
|
||||
this.lastCheck + this.MIN_CHECK_INTERVAL
|
||||
oldestKey.lastChecked + this.keyCheckPeriod,
|
||||
this.lastCheck + this.minCheckInterval
|
||||
);
|
||||
|
||||
const delay = nextCheck - Date.now();
|
||||
|
|
Loading…
Reference in New Issue