adds Anthropic key tier detection and trial key display

This commit is contained in:
nai-degen 2024-03-18 15:20:34 -05:00
parent 2098948b7a
commit e068edcf48
4 changed files with 96 additions and 39 deletions

View File

@ -80,6 +80,7 @@ type OpenAIInfo = BaseFamilyInfo & {
overQuotaKeys?: number;
};
type AnthropicInfo = BaseFamilyInfo & {
trialKeys?: number;
prefilledKeys?: number;
overQuotaKeys?: number;
};
@ -349,6 +350,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
sumTokens += tokens;
sumCost += getTokenCostUsd(f, tokens);
increment(modelStats, `${f}__tokens`, tokens);
increment(modelStats, `${f}__trial`, k.tier === "free" ? 1 : 0);
increment(modelStats, `${f}__revoked`, k.isRevoked ? 1 : 0);
increment(modelStats, `${f}__active`, k.isDisabled ? 0 : 1);
increment(modelStats, `${f}__overQuota`, k.isOverQuota ? 1 : 0);
@ -437,6 +439,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
break;
case "anthropic":
info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
info.trialKeys = modelStats.get(`${family}__trial`) || 0;
info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
break;
case "aws":

View File

@ -1,4 +1,4 @@
import axios, { AxiosError } from "axios";
import axios, { AxiosError, AxiosResponse } from "axios";
import { KeyCheckerBase } from "../key-checker-base";
import type { AnthropicKey, AnthropicKeyProvider } from "./provider";
@ -52,10 +52,13 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
}
protected async testKeyOrFail(key: AnthropicKey) {
const [{ pozzed }] = await Promise.all([this.testLiveness(key)]);
const updates = { isPozzed: pozzed };
const [{ pozzed, tier }] = await Promise.all([this.testLiveness(key)]);
const updates = { isPozzed: pozzed, tier };
this.updateKey(key.hash, updates);
this.log.info({ key: key.hash, models: key.modelFamilies }, "Checked key.");
this.log.info(
{ key: key.hash, tier, models: key.modelFamilies },
"Checked key."
);
}
protected handleAxiosError(key: AnthropicKey, error: AxiosError) {
@ -124,7 +127,9 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
this.updateKey(key.hash, { lastChecked: next });
}
private async testLiveness(key: AnthropicKey): Promise<{ pozzed: boolean }> {
private async testLiveness(
key: AnthropicKey
): Promise<{ pozzed: boolean; tier: AnthropicKey["tier"] }> {
const payload = {
model: TEST_MODEL,
max_tokens: 40,
@ -133,24 +138,27 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
system: SYSTEM,
messages: DETECTION_PROMPT,
};
const { data } = await axios.post<MessageResponse>(
const { data, headers } = await axios.post<MessageResponse>(
POST_MESSAGES_URL,
payload,
{ headers: AnthropicKeyChecker.getHeaders(key) }
{ headers: AnthropicKeyChecker.getRequestHeaders(key) }
);
this.log.debug({ data }, "Response from Anthropic");
const tier = AnthropicKeyChecker.detectTier(headers);
const completion = data.content.map((part) => part.text).join("");
if (POZZ_PROMPT.some((re) => re.test(completion))) {
this.log.info({ key: key.hash, response: completion }, "Key is pozzed.");
return { pozzed: true };
return { pozzed: true, tier };
} else if (COPYRIGHT_PROMPT.some((re) => re.test(completion))) {
this.log.info(
{ key: key.hash, response: completion },
"Key has copyright CYA prompt."
);
return { pozzed: true };
return { pozzed: true, tier };
} else {
return { pozzed: false };
return { pozzed: false, tier };
}
}
@ -161,7 +169,19 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
return data?.error?.type;
}
static getHeaders(key: AnthropicKey) {
static getRequestHeaders(key: AnthropicKey) {
return { "X-API-Key": key.key, "anthropic-version": "2023-06-01" };
}
static detectTier(headers: AxiosResponse["headers"]) {
const tokensLimit = headers["anthropic-ratelimit-tokens-limit"];
const intTokensLimit = parseInt(tokensLimit, 10);
if (!tokensLimit || isNaN(intTokensLimit)) return "unknown";
if (intTokensLimit <= 25000) return "free";
if (intTokensLimit <= 50000) return "build_1";
if (intTokensLimit <= 100000) return "build_2";
if (intTokensLimit <= 200000) return "build_3";
if (intTokensLimit <= 400000) return "build_4";
return "scale";
}
}

View File

@ -4,7 +4,7 @@ import { config } from "../../../config";
import { logger } from "../../../logger";
import { AnthropicModelFamily, getClaudeModelFamily } from "../../models";
import { AnthropicKeyChecker } from "./checker";
import { HttpError, PaymentRequiredError } from "../../errors";
import { PaymentRequiredError } from "../../errors";
export type AnthropicKeyUpdate = Omit<
Partial<AnthropicKey>,
@ -45,13 +45,39 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
*/
isPozzed: boolean;
isOverQuota: boolean;
/**
* Key billing tier (https://docs.anthropic.com/claude/reference/rate-limits)
**/
tier: typeof TIER_PRIORITY[number];
}
/**
* Upon being rate limited, a key will be locked out for this many milliseconds
* while we wait for other concurrent requests to finish.
* Selection priority for Anthropic keys. Aims to maximize throughput by
* saturating concurrency-limited keys first, then trying keys with increasingly
* strict rate limits. Free keys have very limited throughput and are used last.
*/
const RATE_LIMIT_LOCKOUT = 2000;
const TIER_PRIORITY = [
"unknown",
"scale",
"build_4",
"build_3",
"build_2",
"build_1",
"free",
] as const;
/**
* Upon being rate limited, a Scale-tier key will be locked out for this many
* milliseconds while we wait for other concurrent requests to finish.
*/
const SCALE_RATE_LIMIT_LOCKOUT = 2000;
/**
* Upon being rate limited, a Build-tier key will be locked out for this many
* milliseconds while we wait for the per-minute rate limit to reset. Because
* the reset provided in the headers specifies the time for the full quota to
* become available, the key may become available before that time.
*/
const BUILD_RATE_LIMIT_LOCKOUT = 10000;
/**
* Upon assigning a key, we will wait this many milliseconds before allowing it
* to be used again. This is to prevent the queue from flooding a key with too
@ -98,6 +124,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
lastChecked: 0,
claudeTokens: 0,
"claude-opusTokens": 0,
tier: "unknown",
};
this.keys.push(newKey);
}
@ -123,25 +150,27 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
throw new PaymentRequiredError("No Anthropic keys available.");
}
// (largely copied from the OpenAI provider, without trial key support)
// Select a key, from highest priority to lowest priority:
// 1. Keys which are not rate limited
// a. If all keys were rate limited recently, select the least-recently
// rate limited key.
// 2. Keys which are not pozzed
// 3. Keys which have not been used in the longest time
// 1. Keys which are not rate limit locked
// 2. Keys with the highest tier
// 3. Keys which are not pozzed
// 4. Keys which have not been used in the longest time
const now = Date.now();
const keysByPriority = availableKeys.sort((a, b) => {
const aRateLimited = now - a.rateLimitedAt < RATE_LIMIT_LOCKOUT;
const bRateLimited = now - b.rateLimitedAt < RATE_LIMIT_LOCKOUT;
const aLockoutPeriod = getKeyLockout(a);
const bLockoutPeriod = getKeyLockout(b);
const aRateLimited = now - a.rateLimitedAt < aLockoutPeriod;
const bRateLimited = now - b.rateLimitedAt < bLockoutPeriod;
if (aRateLimited && !bRateLimited) return 1;
if (!aRateLimited && bRateLimited) return -1;
if (aRateLimited && bRateLimited) {
return a.rateLimitedAt - b.rateLimitedAt;
}
const aTierIndex = TIER_PRIORITY.indexOf(a.tier);
const bTierIndex = TIER_PRIORITY.indexOf(b.tier);
if (aTierIndex > bTierIndex) return -1;
if (a.isPozzed && !b.isPozzed) return 1;
if (!a.isPozzed && b.isPozzed) return -1;
@ -207,7 +236,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
const key = this.keys.find((k) => k.hash === keyHash)!;
const now = Date.now();
key.rateLimitedAt = now;
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
key.rateLimitedUntil = now + SCALE_RATE_LIMIT_LOCKOUT;
}
public recheck() {
@ -239,3 +268,9 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
}
}
function getKeyLockout(key: AnthropicKey) {
return ["scale", "unknown"].includes(key.tier)
? SCALE_RATE_LIMIT_LOCKOUT
: BUILD_RATE_LIMIT_LOCKOUT;
}

View File

@ -13,15 +13,15 @@ type KeyCheckerOptions<TKey extends Key = Key> = {
export abstract class KeyCheckerBase<TKey extends Key> {
protected readonly service: string;
protected readonly RECURRING_CHECKS_ENABLED: boolean;
protected readonly recurringChecksEnabled: boolean;
/** Minimum time in between any two key checks. */
protected readonly MIN_CHECK_INTERVAL: number;
protected readonly minCheckInterval: number;
/**
* Minimum time in between checks for a given key. Because we can no longer
* read quota usage, there is little reason to check a single key more often
* than this.
*/
protected readonly KEY_CHECK_PERIOD: number;
protected readonly keyCheckPeriod: number;
protected readonly updateKey: (hash: string, props: Partial<TKey>) => void;
protected readonly keys: TKey[] = [];
protected log: pino.Logger;
@ -29,14 +29,13 @@ export abstract class KeyCheckerBase<TKey extends Key> {
protected lastCheck = 0;
protected constructor(keys: TKey[], opts: KeyCheckerOptions<TKey>) {
const { service, keyCheckPeriod, minCheckInterval } = opts;
this.keys = keys;
this.KEY_CHECK_PERIOD = keyCheckPeriod;
this.MIN_CHECK_INTERVAL = minCheckInterval;
this.RECURRING_CHECKS_ENABLED = opts.recurringChecksEnabled ?? true;
this.keyCheckPeriod = opts.keyCheckPeriod;
this.minCheckInterval = opts.minCheckInterval;
this.recurringChecksEnabled = opts.recurringChecksEnabled ?? true;
this.updateKey = opts.updateKey;
this.service = service;
this.log = logger.child({ module: "key-checker", service });
this.service = opts.service;
this.log = logger.child({ module: "key-checker", service: opts.service });
}
public start() {
@ -102,7 +101,7 @@ export abstract class KeyCheckerBase<TKey extends Key> {
return;
}
if (!this.RECURRING_CHECKS_ENABLED) {
if (!this.recurringChecksEnabled) {
checkLog.info(
"Initial checks complete and recurring checks are disabled for this service. Stopping."
);
@ -117,8 +116,8 @@ export abstract class KeyCheckerBase<TKey extends Key> {
// Don't check any individual key too often.
// Don't check anything at all at a rate faster than once per 3 seconds.
const nextCheck = Math.max(
oldestKey.lastChecked + this.KEY_CHECK_PERIOD,
this.lastCheck + this.MIN_CHECK_INTERVAL
oldestKey.lastChecked + this.keyCheckPeriod,
this.lastCheck + this.minCheckInterval
);
const delay = nextCheck - Date.now();