adds claude-opus model family

2024-03-04 14:08:59 -06:00 · 2024-03-04 14:08:59 -06:00 · 1edc93dc72
parent f6c124c1d3
commit 1edc93dc72
8 changed files with 30 additions and 14 deletions
--- a/.env.example
+++ b/.env.example
@ -40,9 +40,10 @@ NODE_ENV=production

 # Which model types users are allowed to access.
 # The following model families are recognized:
-# turbo | gpt4 | gpt4-32k | gpt4-turbo | dall-e | claude | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo
-# By default, all models are allowed except for 'dall-e'. To allow DALL-E image
-# generation, uncomment the line below and add 'dall-e' to the list.
+# turbo | gpt4 | gpt4-32k | gpt4-turbo | dall-e | claude | claude-opus | gemini-pro | mistral-tiny | mistral-small | mistral-medium | mistral-large | aws-claude | azure-turbo | azure-gpt4 | azure-gpt4-32k | azure-gpt4-turbo
+# By default, all models are allowed except for 'dall-e' and 'claude-opus'. To
+# allow DALL-E image generation, uncomment the line below and add 'dall-e' to
+# the list.
 # ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,claude,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo

 # URLs from which requests will be blocked.
--- a/src/info-page.ts
+++ b/src/info-page.ts
@ -17,7 +17,8 @@ const MODEL_FAMILY_FRIENDLY_NAME: { [f in ModelFamily]: string } = {
  "gpt4-32k": "GPT-4 32k",
  "gpt4-turbo": "GPT-4 Turbo",
  "dall-e": "DALL-E",
-  "claude": "Claude",
+  "claude": "Claude (Sonnet)",
+  "claude-opus": "Claude (Opus)",
  "gemini-pro": "Gemini Pro",
  "mistral-tiny": "Mistral 7B",
  "mistral-small": "Mixtral Small", // Originally 8x7B, but that now refers to the older open-weight version. Mixtral Small is a newer closed-weight update to the 8x7B model.
--- a/src/proxy/middleware/request/onproxyreq/add-anthropic-preamble.ts
+++ b/src/proxy/middleware/request/onproxyreq/add-anthropic-preamble.ts
@ -7,11 +7,12 @@ import { HPMRequestCallback } from "../index";
 * know this without trying to send the request and seeing if it fails. If a
 * key is marked as requiring a preamble, it will be added here.
 */
-export const addAnthropicPreamble: HPMRequestCallback = (
-  _proxyReq,
-  req
-) => {
-  if (!isTextGenerationRequest(req) || req.key?.service !== "anthropic") {
+export const addAnthropicPreamble: HPMRequestCallback = (_proxyReq, req) => {
+  if (
+    !isTextGenerationRequest(req) ||
+    req.key?.service !== "anthropic" ||
+    req.outboundApi !== "anthropic-text"
+  ) {
    return;
  }

--- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts
+++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts
@ -69,10 +69,14 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    modelMax = 100000;
  } else if (model.match(/^claude-2/)) {
    modelMax = 200000;
+  } else if (model.match(/^claude-3/)) {
+    modelMax = 200000;
  } else if (model.match(/^gemini-\d{3}$/)) {
    modelMax = GOOGLE_AI_MAX_CONTEXT;
  } else if (model.match(/^mistral-(tiny|small|medium)$/)) {
    modelMax = MISTRAL_AI_MAX_CONTENT;
+  } else if (model.match(/^anthropic\.claude-3-sonnet/)) {
+    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude-v2:\d/)) {
    modelMax = 200000;
  } else if (model.match(/^anthropic\.claude/)) {
--- a/src/shared/key-management/anthropic/provider.ts
+++ b/src/shared/key-management/anthropic/provider.ts
@ -13,8 +13,8 @@ export type AnthropicModel =
  | "claude-v1-100k"
  | "claude-2"
  | "claude-2.1"
-  | "claude-3-opus-20240229"
-  | "claude-3-sonnet-20240229";
+  | "claude-3-opus-20240229" // new expensive model
+  | "claude-3-sonnet-20240229" // new cheap claude2 sidegrade

 export type AnthropicKeyUpdate = Omit<
  Partial<AnthropicKey>,
@ -90,7 +90,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
      const newKey: AnthropicKey = {
        key,
        service: this.service,
-        modelFamilies: ["claude"],
+        modelFamilies: ["claude", "claude-opus"],
        isDisabled: false,
        isOverQuota: false,
        isRevoked: false,
@ -107,6 +107,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
          .slice(0, 8)}`,
        lastChecked: 0,
        claudeTokens: 0,
+        "claude-opusTokens": 0,
      };
      this.keys.push(newKey);
    }
--- a/src/shared/models.ts
+++ b/src/shared/models.ts
@ -22,7 +22,7 @@ export type OpenAIModelFamily =
  | "gpt4-32k"
  | "gpt4-turbo"
  | "dall-e";
-export type AnthropicModelFamily = "claude";
+export type AnthropicModelFamily = "claude" | "claude-opus";
 export type GoogleAIModelFamily = "gemini-pro";
 export type MistralAIModelFamily =
  | "mistral-tiny"
@ -51,6 +51,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  "gpt4-turbo",
  "dall-e",
  "claude",
+  "claude-opus",
  "gemini-pro",
  "mistral-tiny",
  "mistral-small",
@ -96,6 +97,7 @@ export const MODEL_FAMILY_SERVICE: {
  "gpt4-32k": "openai",
  "dall-e": "openai",
  claude: "anthropic",
+  "claude-opus": "anthropic",
  "aws-claude": "aws",
  "azure-turbo": "azure",
  "azure-gpt4": "azure",
@ -122,6 +124,7 @@ export function getOpenAIModelFamily(

 export function getClaudeModelFamily(model: string): ModelFamily {
  if (model.startsWith("anthropic.")) return getAwsBedrockModelFamily(model);
+  if (model.includes("opus")) return "claude-opus";
  return "claude";
 }

@ -146,7 +149,8 @@ export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
  }
 }

-export function getAwsBedrockModelFamily(_model: string): ModelFamily {
+export function getAwsBedrockModelFamily(model: string): ModelFamily {
+  if (model.includes("opus")) return "claude-opus";
  return "aws-claude";
 }

--- a/src/shared/stats.ts
+++ b/src/shared/stats.ts
@ -29,6 +29,9 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
    case "claude":
      cost = 0.000008;
      break;
+    case "claude-opus":
+      cost = 0.000015;
+      break;
    case "mistral-tiny":
      cost = 0.00000031;
      break;
--- a/src/shared/users/user-store.ts
+++ b/src/shared/users/user-store.ts
@ -34,6 +34,7 @@ const INITIAL_TOKENS: Required<UserTokenCounts> = {
  "gpt4-turbo": 0,
  "dall-e": 0,
  claude: 0,
+  "claude-opus": 0,
  "gemini-pro": 0,
  "mistral-tiny": 0,
  "mistral-small": 0,