adds Anthropic key tier detection and trial key display

2024-03-18 15:20:34 -05:00 · 2024-03-18 15:20:34 -05:00 · e068edcf48
parent 2098948b7a
commit e068edcf48
4 changed files with 96 additions and 39 deletions
--- a/src/service-info.ts
+++ b/src/service-info.ts
@ -80,6 +80,7 @@ type OpenAIInfo = BaseFamilyInfo & {
  overQuotaKeys?: number;
 };
 type AnthropicInfo = BaseFamilyInfo & {
+  trialKeys?: number;
  prefilledKeys?: number;
  overQuotaKeys?: number;
 };
@ -349,6 +350,7 @@ function addKeyToAggregates(k: KeyPoolKey) {
        sumTokens += tokens;
        sumCost += getTokenCostUsd(f, tokens);
        increment(modelStats, `${f}__tokens`, tokens);
+        increment(modelStats, `${f}__trial`, k.tier === "free" ? 1 : 0);
        increment(modelStats, `${f}__revoked`, k.isRevoked ? 1 : 0);
        increment(modelStats, `${f}__active`, k.isDisabled ? 0 : 1);
        increment(modelStats, `${f}__overQuota`, k.isOverQuota ? 1 : 0);
@ -437,6 +439,7 @@ function getInfoForFamily(family: ModelFamily): BaseFamilyInfo {
        break;
      case "anthropic":
        info.overQuotaKeys = modelStats.get(`${family}__overQuota`) || 0;
+        info.trialKeys = modelStats.get(`${family}__trial`) || 0;
        info.prefilledKeys = modelStats.get(`${family}__pozzed`) || 0;
        break;
      case "aws":
--- a/src/shared/key-management/anthropic/checker.ts
+++ b/src/shared/key-management/anthropic/checker.ts
@ -1,4 +1,4 @@
-import axios, { AxiosError } from "axios";
+import axios, { AxiosError, AxiosResponse } from "axios";
 import { KeyCheckerBase } from "../key-checker-base";
 import type { AnthropicKey, AnthropicKeyProvider } from "./provider";

@ -52,10 +52,13 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
  }

  protected async testKeyOrFail(key: AnthropicKey) {
-    const [{ pozzed }] = await Promise.all([this.testLiveness(key)]);
-    const updates = { isPozzed: pozzed };
+    const [{ pozzed, tier }] = await Promise.all([this.testLiveness(key)]);
+    const updates = { isPozzed: pozzed, tier };
    this.updateKey(key.hash, updates);
-    this.log.info({ key: key.hash, models: key.modelFamilies }, "Checked key.");
+    this.log.info(
+      { key: key.hash, tier, models: key.modelFamilies },
+      "Checked key."
+    );
  }

  protected handleAxiosError(key: AnthropicKey, error: AxiosError) {
@ -124,7 +127,9 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
    this.updateKey(key.hash, { lastChecked: next });
  }

-  private async testLiveness(key: AnthropicKey): Promise<{ pozzed: boolean }> {
+  private async testLiveness(
+    key: AnthropicKey
+  ): Promise<{ pozzed: boolean; tier: AnthropicKey["tier"] }> {
    const payload = {
      model: TEST_MODEL,
      max_tokens: 40,
@ -133,24 +138,27 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
      system: SYSTEM,
      messages: DETECTION_PROMPT,
    };
-    const { data } = await axios.post<MessageResponse>(
+    const { data, headers } = await axios.post<MessageResponse>(
      POST_MESSAGES_URL,
      payload,
-      { headers: AnthropicKeyChecker.getHeaders(key) }
+      { headers: AnthropicKeyChecker.getRequestHeaders(key) }
    );
    this.log.debug({ data }, "Response from Anthropic");
+
+    const tier = AnthropicKeyChecker.detectTier(headers);
+
    const completion = data.content.map((part) => part.text).join("");
    if (POZZ_PROMPT.some((re) => re.test(completion))) {
      this.log.info({ key: key.hash, response: completion }, "Key is pozzed.");
-      return { pozzed: true };
+      return { pozzed: true, tier };
    } else if (COPYRIGHT_PROMPT.some((re) => re.test(completion))) {
      this.log.info(
        { key: key.hash, response: completion },
        "Key has copyright CYA prompt."
      );
-      return { pozzed: true };
+      return { pozzed: true, tier };
    } else {
-      return { pozzed: false };
+      return { pozzed: false, tier };
    }
  }

@ -161,7 +169,19 @@ export class AnthropicKeyChecker extends KeyCheckerBase<AnthropicKey> {
    return data?.error?.type;
  }

-  static getHeaders(key: AnthropicKey) {
+  static getRequestHeaders(key: AnthropicKey) {
    return { "X-API-Key": key.key, "anthropic-version": "2023-06-01" };
  }
+
+  static detectTier(headers: AxiosResponse["headers"]) {
+    const tokensLimit = headers["anthropic-ratelimit-tokens-limit"];
+    const intTokensLimit = parseInt(tokensLimit, 10);
+    if (!tokensLimit || isNaN(intTokensLimit)) return "unknown";
+    if (intTokensLimit <= 25000) return "free";
+    if (intTokensLimit <= 50000) return "build_1";
+    if (intTokensLimit <= 100000) return "build_2";
+    if (intTokensLimit <= 200000) return "build_3";
+    if (intTokensLimit <= 400000) return "build_4";
+    return "scale";
+  }
 }
--- a/src/shared/key-management/anthropic/provider.ts
+++ b/src/shared/key-management/anthropic/provider.ts
@ -4,7 +4,7 @@ import { config } from "../../../config";
 import { logger } from "../../../logger";
 import { AnthropicModelFamily, getClaudeModelFamily } from "../../models";
 import { AnthropicKeyChecker } from "./checker";
-import { HttpError, PaymentRequiredError } from "../../errors";
+import { PaymentRequiredError } from "../../errors";

 export type AnthropicKeyUpdate = Omit<
  Partial<AnthropicKey>,
@ -45,13 +45,39 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
   */
  isPozzed: boolean;
  isOverQuota: boolean;
+  /**
+   * Key billing tier (https://docs.anthropic.com/claude/reference/rate-limits)
+   **/
+  tier: typeof TIER_PRIORITY[number];
 }

 /**
- * Upon being rate limited, a key will be locked out for this many milliseconds
- * while we wait for other concurrent requests to finish.
+ * Selection priority for Anthropic keys. Aims to maximize throughput by
+ * saturating concurrency-limited keys first, then trying keys with increasingly
+ * strict rate limits. Free keys have very limited throughput and are used last.
 */
-const RATE_LIMIT_LOCKOUT = 2000;
+const TIER_PRIORITY = [
+  "unknown",
+  "scale",
+  "build_4",
+  "build_3",
+  "build_2",
+  "build_1",
+  "free",
+] as const;
+
+/**
+ * Upon being rate limited, a Scale-tier key will be locked out for this many
+ * milliseconds while we wait for other concurrent requests to finish.
+ */
+const SCALE_RATE_LIMIT_LOCKOUT = 2000;
+/**
+ * Upon being rate limited, a Build-tier key will be locked out for this many
+ * milliseconds while we wait for the per-minute rate limit to reset. Because
+ * the reset provided in the headers specifies the time for the full quota to
+ * become available, the key may become available before that time.
+ */
+const BUILD_RATE_LIMIT_LOCKOUT = 10000;
 /**
 * Upon assigning a key, we will wait this many milliseconds before allowing it
 * to be used again. This is to prevent the queue from flooding a key with too
@ -98,6 +124,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
        lastChecked: 0,
        claudeTokens: 0,
        "claude-opusTokens": 0,
+        tier: "unknown",
      };
      this.keys.push(newKey);
    }
@ -123,25 +150,27 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
      throw new PaymentRequiredError("No Anthropic keys available.");
    }

-    // (largely copied from the OpenAI provider, without trial key support)
    // Select a key, from highest priority to lowest priority:
-    // 1. Keys which are not rate limited
-    //    a. If all keys were rate limited recently, select the least-recently
-    //       rate limited key.
-    // 2. Keys which are not pozzed
-    // 3. Keys which have not been used in the longest time
+    // 1. Keys which are not rate limit locked
+    // 2. Keys with the highest tier
+    // 3. Keys which are not pozzed
+    // 4. Keys which have not been used in the longest time

    const now = Date.now();

    const keysByPriority = availableKeys.sort((a, b) => {
-      const aRateLimited = now - a.rateLimitedAt < RATE_LIMIT_LOCKOUT;
-      const bRateLimited = now - b.rateLimitedAt < RATE_LIMIT_LOCKOUT;
+      const aLockoutPeriod = getKeyLockout(a);
+      const bLockoutPeriod = getKeyLockout(b);
+
+      const aRateLimited = now - a.rateLimitedAt < aLockoutPeriod;
+      const bRateLimited = now - b.rateLimitedAt < bLockoutPeriod;

      if (aRateLimited && !bRateLimited) return 1;
      if (!aRateLimited && bRateLimited) return -1;
-      if (aRateLimited && bRateLimited) {
-        return a.rateLimitedAt - b.rateLimitedAt;
-      }
+
+      const aTierIndex = TIER_PRIORITY.indexOf(a.tier);
+      const bTierIndex = TIER_PRIORITY.indexOf(b.tier);
+      if (aTierIndex > bTierIndex) return -1;

      if (a.isPozzed && !b.isPozzed) return 1;
      if (!a.isPozzed && b.isPozzed) return -1;
@ -207,7 +236,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    const key = this.keys.find((k) => k.hash === keyHash)!;
    const now = Date.now();
    key.rateLimitedAt = now;
-    key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
+    key.rateLimitedUntil = now + SCALE_RATE_LIMIT_LOCKOUT;
  }

  public recheck() {
@ -239,3 +268,9 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    key.rateLimitedUntil = Math.max(currentRateLimit, nextRateLimit);
  }
 }
+
+function getKeyLockout(key: AnthropicKey) {
+  return ["scale", "unknown"].includes(key.tier)
+    ? SCALE_RATE_LIMIT_LOCKOUT
+    : BUILD_RATE_LIMIT_LOCKOUT;
+}
--- a/src/shared/key-management/key-checker-base.ts
+++ b/src/shared/key-management/key-checker-base.ts
@ -13,15 +13,15 @@ type KeyCheckerOptions<TKey extends Key = Key> = {

 export abstract class KeyCheckerBase<TKey extends Key> {
  protected readonly service: string;
-  protected readonly RECURRING_CHECKS_ENABLED: boolean;
+  protected readonly recurringChecksEnabled: boolean;
  /** Minimum time in between any two key checks. */
-  protected readonly MIN_CHECK_INTERVAL: number;
+  protected readonly minCheckInterval: number;
  /**
   * Minimum time in between checks for a given key. Because we can no longer
   * read quota usage, there is little reason to check a single key more often
   * than this.
   */
-  protected readonly KEY_CHECK_PERIOD: number;
+  protected readonly keyCheckPeriod: number;
  protected readonly updateKey: (hash: string, props: Partial<TKey>) => void;
  protected readonly keys: TKey[] = [];
  protected log: pino.Logger;
@ -29,14 +29,13 @@ export abstract class KeyCheckerBase<TKey extends Key> {
  protected lastCheck = 0;

  protected constructor(keys: TKey[], opts: KeyCheckerOptions<TKey>) {
-    const { service, keyCheckPeriod, minCheckInterval } = opts;
    this.keys = keys;
-    this.KEY_CHECK_PERIOD = keyCheckPeriod;
-    this.MIN_CHECK_INTERVAL = minCheckInterval;
-    this.RECURRING_CHECKS_ENABLED = opts.recurringChecksEnabled ?? true;
+    this.keyCheckPeriod = opts.keyCheckPeriod;
+    this.minCheckInterval = opts.minCheckInterval;
+    this.recurringChecksEnabled = opts.recurringChecksEnabled ?? true;
    this.updateKey = opts.updateKey;
-    this.service = service;
-    this.log = logger.child({ module: "key-checker", service });
+    this.service = opts.service;
+    this.log = logger.child({ module: "key-checker", service: opts.service });
  }

  public start() {
@ -102,7 +101,7 @@ export abstract class KeyCheckerBase<TKey extends Key> {
      return;
    }

-    if (!this.RECURRING_CHECKS_ENABLED) {
+    if (!this.recurringChecksEnabled) {
      checkLog.info(
        "Initial checks complete and recurring checks are disabled for this service. Stopping."
      );
@ -117,8 +116,8 @@ export abstract class KeyCheckerBase<TKey extends Key> {
    // Don't check any individual key too often.
    // Don't check anything at all at a rate faster than once per 3 seconds.
    const nextCheck = Math.max(
-      oldestKey.lastChecked + this.KEY_CHECK_PERIOD,
-      this.lastCheck + this.MIN_CHECK_INTERVAL
+      oldestKey.lastChecked + this.keyCheckPeriod,
+      this.lastCheck + this.minCheckInterval
    );

    const delay = nextCheck - Date.now();