Implement Anthropic Chat Completions endpoint and Claude 3 (khanon/oai-reverse-proxy!64)

2024-03-04 19:06:46 +00:00 · 2024-03-04 19:06:46 +00:00 · db318ec237
parent b90abbda88
commit db318ec237
34 changed files with 841 additions and 161 deletions
--- a/src/proxy/anthropic.ts
+++ b/src/proxy/anthropic.ts
@ -42,6 +42,8 @@ const getModelsResponse = () => {
    "claude-2",
    "claude-2.0",
    "claude-2.1",
+    "claude-3-opus-20240229",
+    "claude-3-sonnet-20240229",
  ];

  const models = claudeVariants.map((id) => ({
@ -81,8 +83,16 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
  }

  if (req.inboundApi === "openai") {
-    req.log.info("Transforming Anthropic response to OpenAI format");
-    body = transformAnthropicResponse(body, req);
+    req.log.info("Transforming Anthropic text to OpenAI format");
+    body = transformAnthropicTextResponseToOpenAI(body, req);
+  }
+
+  if (
+    req.inboundApi === "anthropic-text" &&
+    req.outboundApi === "anthropic-chat"
+  ) {
+    req.log.info("Transforming Anthropic text to Anthropic chat format");
+    body = transformAnthropicChatResponseToAnthropicText(body, req);
  }

  if (req.tokenizerInfo) {
@ -92,13 +102,32 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
  res.status(200).json(body);
 };

+function transformAnthropicChatResponseToAnthropicText(
+  anthropicBody: Record<string, any>,
+  req: Request
+): Record<string, any> {
+  return {
+    type: "completion",
+    id: "trans-" + anthropicBody.id,
+    completion: anthropicBody.content
+      .map((part: { type: string; text: string }) =>
+        part.type === "text" ? part.text : ""
+      )
+      .join(""),
+    stop_reason: anthropicBody.stop_reason,
+    stop: anthropicBody.stop_sequence,
+    model: anthropicBody.model,
+    usage: anthropicBody.usage,
+  };
+}
+
 /**
 * Transforms a model response from the Anthropic API to match those from the
 * OpenAI API, for users using Claude via the OpenAI-compatible endpoint. This
 * is only used for non-streaming requests as streaming requests are handled
 * on-the-fly.
 */
-function transformAnthropicResponse(
+function transformAnthropicTextResponseToOpenAI(
  anthropicBody: Record<string, any>,
  req: Request
 ): Record<string, any> {
@ -139,36 +168,96 @@ const anthropicProxy = createQueueMiddleware({
      proxyRes: createOnProxyResHandler([anthropicResponseHandler]),
      error: handleProxyError,
    },
-    pathRewrite: {
-      // Send OpenAI-compat requests to the real Anthropic endpoint.
-      "^/v1/chat/completions": "/v1/complete",
+    // Abusing pathFilter to rewrite the paths dynamically.
+    pathFilter: (pathname, req) => {
+      const isText = req.outboundApi === "anthropic-text";
+      const isChat = req.outboundApi === "anthropic-chat";
+      if (isChat && pathname === "/v1/complete") {
+        req.url = "/v1/messages";
+      }
+      if (isText && pathname === "/v1/chat/completions") {
+        req.url = "/v1/complete";
+      }
+      if (isChat && pathname === "/v1/claude-3/complete") {
+        req.url = "/v1/messages";
+      }
+      return true;
    },
  }),
 });

+const nativeTextPreprocessor = createPreprocessorMiddleware({
+  inApi: "anthropic-text",
+  outApi: "anthropic-text",
+  service: "anthropic",
+});
+
+const textToChatPreprocessor = createPreprocessorMiddleware({
+  inApi: "anthropic-text",
+  outApi: "anthropic-chat",
+  service: "anthropic",
+});
+
+/**
+ * Routes text completion prompts to anthropic-chat if they need translation
+ * (claude-3 based models do not support the old text completion endpoint).
+ */
+const claudeTextCompletionRouter: RequestHandler = (req, res, next) => {
+  if (req.body.model?.startsWith("claude-3")) {
+    textToChatPreprocessor(req, res, next);
+  } else {
+    nativeTextPreprocessor(req, res, next);
+  }
+};
+
 const anthropicRouter = Router();
 anthropicRouter.get("/v1/models", handleModelRequest);
-// Native Anthropic chat completion endpoint.
+// Anthropic text completion endpoint. Dynamic routing based on model.
 anthropicRouter.post(
  "/v1/complete",
  ipLimiter,
+  claudeTextCompletionRouter,
+  anthropicProxy
+);
+// Native Anthropic chat completion endpoint.
+anthropicRouter.post(
+  "/v1/messages",
+  ipLimiter,
  createPreprocessorMiddleware({
-    inApi: "anthropic",
-    outApi: "anthropic",
+    inApi: "anthropic-chat",
+    outApi: "anthropic-chat",
    service: "anthropic",
  }),
  anthropicProxy
 );
-// OpenAI-to-Anthropic compatibility endpoint.
+// OpenAI-to-Anthropic Text compatibility endpoint.
 anthropicRouter.post(
  "/v1/chat/completions",
  ipLimiter,
  createPreprocessorMiddleware(
-    { inApi: "openai", outApi: "anthropic", service: "anthropic" },
+    { inApi: "openai", outApi: "anthropic-text", service: "anthropic" },
    { afterTransform: [maybeReassignModel] }
  ),
  anthropicProxy
 );
+// Temporary force Anthropic Text to Anthropic Chat for frontends which do not
+// yet support the new model. Forces claude-3. Will be removed once common
+// frontends have been updated.
+anthropicRouter.post(
+  "/v1/claude-3/complete",
+  ipLimiter,
+  createPreprocessorMiddleware(
+    { inApi: "anthropic-text", outApi: "anthropic-chat", service: "anthropic" },
+    {
+      beforeTransform: [
+        (req) => {
+          req.body.model = "claude-3-sonnet-20240229";
+        },
+      ],
+    }
+  ),
+  anthropicProxy
+);

 function maybeReassignModel(req: Request) {
  const model = req.body.model;
--- a/src/proxy/aws.ts
+++ b/src/proxy/aws.ts
@ -32,6 +32,7 @@ const getModelsResponse = () => {
  const variants = [
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
+    "anthropic.claude-3-sonnet-20240229-v1:0"
  ];

  const models = variants.map((id) => ({
@ -145,7 +146,7 @@ awsRouter.post(
  "/v1/complete",
  ipLimiter,
  createPreprocessorMiddleware(
-    { inApi: "anthropic", outApi: "anthropic", service: "aws" },
+    { inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
    { afterTransform: [maybeReassignModel] }
  ),
  awsProxy
@ -155,7 +156,7 @@ awsRouter.post(
  "/v1/chat/completions",
  ipLimiter,
  createPreprocessorMiddleware(
-    { inApi: "openai", outApi: "anthropic", service: "aws" },
+    { inApi: "openai", outApi: "anthropic-text", service: "aws" },
    { afterTransform: [maybeReassignModel] }
  ),
  awsProxy
--- a/src/proxy/middleware/common.ts
+++ b/src/proxy/middleware/common.ts
@ -11,6 +11,8 @@ const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
 const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
 const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
 const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
+const ANTHROPIC_MESSAGES_ENDPOINT = "/v1/messages";
+const ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT = "/v1/claude-3/complete";

 export function isTextGenerationRequest(req: Request) {
  return (
@ -19,6 +21,8 @@ export function isTextGenerationRequest(req: Request) {
      OPENAI_CHAT_COMPLETION_ENDPOINT,
      OPENAI_TEXT_COMPLETION_ENDPOINT,
      ANTHROPIC_COMPLETION_ENDPOINT,
+      ANTHROPIC_MESSAGES_ENDPOINT,
+      ANTHROPIC_CLAUDE3_COMPAT_ENDPOINT,
    ].some((endpoint) => req.path.startsWith(endpoint))
  );
 }
@ -91,6 +95,7 @@ export const classifyErrorAndSend = (
    });
  } catch (error) {
    req.log.error(error, `Error writing error response headers, giving up.`);
+    res.end();
  }
 };

@ -199,11 +204,24 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
      return body.choices[0].message.content || "";
    case "openai-text":
      return body.choices[0].text;
-    case "anthropic":
+    case "anthropic-chat":
+      if (!body.content) {
+        req.log.error(
+          { body: JSON.stringify(body) },
+          "Received empty Anthropic chat completion"
+        );
+        return "";
+      }
+      return body.content
+        .map(({ text, type }: { type: string; text: string }) =>
+          type === "text" ? text : `[Unsupported content type: ${type}]`
+        )
+        .join("\n");
+    case "anthropic-text":
      if (!body.completion) {
        req.log.error(
          { body: JSON.stringify(body) },
-          "Received empty Anthropic completion"
+          "Received empty Anthropic text completion"
        );
        return "";
      }
@ -229,7 +247,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
      return body.model;
    case "openai-image":
      return req.body.model;
-    case "anthropic":
+    case "anthropic-chat":
+    case "anthropic-text":
      // Anthropic confirms the model in the response, but AWS Claude doesn't.
      return body.model || req.body.model;
    case "google-ai":
--- a/src/proxy/middleware/request/onproxyreq/add-key.ts
+++ b/src/proxy/middleware/request/onproxyreq/add-key.ts
@ -28,7 +28,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
    switch (req.outboundApi) {
      // If we are translating between API formats we may need to select a model
      // for the user, because the provided model is for the inbound API.
-      case "anthropic":
+      case "anthropic-chat":
+      case "anthropic-text":
        assignedKey = keyPool.get("claude-v1", req.service);
        break;
      case "openai-text":
@ -71,6 +72,8 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
      if (key.organizationId) {
        proxyReq.setHeader("OpenAI-Organization", key.organizationId);
      }
+      proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
+      break;
    case "mistral-ai":
      proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
      break;
--- a/src/proxy/middleware/request/onproxyreq/finalize-body.ts
+++ b/src/proxy/middleware/request/onproxyreq/finalize-body.ts
@ -8,6 +8,10 @@ export const finalizeBody: HPMRequestCallback = (proxyReq, req) => {
    if (req.outboundApi === "openai-image") {
      delete req.body.stream;
    }
+    // For anthropic text to chat requests, remove undefined prompt.
+    if (req.outboundApi === "anthropic-chat") {
+      delete req.body.prompt;
+    }

    const updatedBody = JSON.stringify(req.body);
    proxyReq.setHeader("Content-Length", Buffer.byteLength(updatedBody));
--- a/src/proxy/middleware/request/preprocessor-factory.ts
+++ b/src/proxy/middleware/request/preprocessor-factory.ts
@ -114,7 +114,7 @@ const handleTestMessage: RequestHandler = (req, res) => {
  if (method !== "POST") {
    return;
  }
-  
+
  if (isTestMessage(body)) {
    req.log.info({ body }, "Received test message. Skipping API call.");
    res.json({
@ -145,6 +145,9 @@ function isTestMessage(body: any) {
      messages[0].content === "Hi"
    );
  } else {
-    return prompt?.trim() === "Human: Hi\n\nAssistant:";
+    return (
+      prompt?.trim() === "Human: Hi\n\nAssistant:" ||
+      prompt?.startsWith("Hi\n\n")
+    );
  }
 }
--- a/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts
+++ b/src/proxy/middleware/request/preprocessors/count-prompt-tokens.ts
@ -2,6 +2,7 @@ import { RequestPreprocessor } from "../index";
 import { countTokens } from "../../../../shared/tokenization";
 import { assertNever } from "../../../../shared/utils";
 import {
+  AnthropicChatMessage,
  GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
@ -28,7 +29,13 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
      result = await countTokens({ req, prompt, service });
      break;
    }
-    case "anthropic": {
+    case "anthropic-chat": {
+      req.outputTokens = req.body.max_tokens;
+      const prompt: AnthropicChatMessage[] = req.body.messages;
+      result = await countTokens({ req, prompt, service });
+      break;
+    }
+    case "anthropic-text": {
      req.outputTokens = req.body.max_tokens_to_sample;
      const prompt: string = req.body.prompt;
      result = await countTokens({ req, prompt, service });
--- a/src/proxy/middleware/request/preprocessors/language-filter.ts
+++ b/src/proxy/middleware/request/preprocessors/language-filter.ts
@ -6,6 +6,7 @@ import { UserInputError } from "../../../../shared/errors";
 import {
  MistralAIChatMessage,
  OpenAIChatMessage,
+  flattenAnthropicMessages,
 } from "../../../../shared/api-schemas";

 const rejectedClients = new Map<string, number>();
@ -53,7 +54,9 @@ function getPromptFromRequest(req: Request) {
  const service = req.outboundApi;
  const body = req.body;
  switch (service) {
-    case "anthropic":
+    case "anthropic-chat":
+      return flattenAnthropicMessages(body.messages);
+    case "anthropic-text":
      return body.prompt;
    case "openai":
    case "mistral-ai":
--- a/src/proxy/middleware/request/preprocessors/sign-aws-request.ts
+++ b/src/proxy/middleware/request/preprocessors/sign-aws-request.ts
@ -2,7 +2,10 @@ import express from "express";
 import { Sha256 } from "@aws-crypto/sha256-js";
 import { SignatureV4 } from "@smithy/signature-v4";
 import { HttpRequest } from "@smithy/protocol-http";
-import { AnthropicV1CompleteSchema } from "../../../../shared/api-schemas/anthropic";
+import {
+  AnthropicV1TextSchema,
+  AnthropicV1MessagesSchema,
+} from "../../../../shared/api-schemas/anthropic";
 import { keyPool } from "../../../../shared/key-management";
 import { RequestPreprocessor } from "../index";

@ -22,19 +25,37 @@ export const signAwsRequest: RequestPreprocessor = async (req) => {
  let preamble = req.body.prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
  req.body.prompt = preamble + req.body.prompt;

-  // AWS supports only a subset of Anthropic's parameters and is more strict
-  // about unknown parameters.
+  // AWS uses mostly the same parameters as Anthropic, with a few removed params
+  // and much stricter validation on unused parameters. Rather than treating it
+  // as a separate schema we will use the anthropic ones and strip the unused
+  // parameters.
  // TODO: This should happen in transform-outbound-payload.ts
-  const strippedParams = AnthropicV1CompleteSchema.pick({
-    prompt: true,
-    max_tokens_to_sample: true,
-    stop_sequences: true,
-    temperature: true,
-    top_k: true,
-    top_p: true,
-  })
-    .strip()
-    .parse(req.body);
+  let strippedParams: Record<string, unknown>;
+  if (req.inboundApi === "anthropic-chat") {
+    strippedParams = AnthropicV1MessagesSchema
+      .pick({
+        messages: true,
+        max_tokens: true,
+        stop_sequences: true,
+        temperature: true,
+        top_k: true,
+        top_p: true,
+      })
+      .strip()
+      .parse(req.body);
+  } else {
+    strippedParams = AnthropicV1TextSchema
+      .pick({
+        prompt: true,
+        max_tokens_to_sample: true,
+        stop_sequences: true,
+        temperature: true,
+        top_k: true,
+        top_p: true,
+      })
+      .strip()
+      .parse(req.body);
+  }

  const credential = getCredentialParts(req);
  const host = AMZ_HOST.replace("%REGION%", credential.region);
--- a/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
+++ b/src/proxy/middleware/request/preprocessors/transform-outbound-payload.ts
@ -1,4 +1,7 @@
-import { openAIToAnthropic } from "../../../../shared/api-schemas/anthropic";
+import {
+  anthropicTextToAnthropicChat,
+  openAIToAnthropicText,
+} from "../../../../shared/api-schemas/anthropic";
 import { openAIToOpenAIText } from "../../../../shared/api-schemas/openai-text";
 import { openAIToOpenAIImage } from "../../../../shared/api-schemas/openai-image";
 import { openAIToGoogleAI } from "../../../../shared/api-schemas/google-ai";
@ -41,8 +44,16 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
    return;
  }

-  if (req.inboundApi === "openai" && req.outboundApi === "anthropic") {
-    req.body = openAIToAnthropic(req);
+  if (
+    req.inboundApi === "anthropic-text" &&
+    req.outboundApi === "anthropic-chat"
+  ) {
+    req.body = anthropicTextToAnthropicChat(req);
+    return;
+  }
+
+  if (req.inboundApi === "openai" && req.outboundApi === "anthropic-text") {
+    req.body = openAIToAnthropicText(req);
    return;
  }

--- a/src/proxy/middleware/request/preprocessors/validate-context-size.ts
+++ b/src/proxy/middleware/request/preprocessors/validate-context-size.ts
@ -29,7 +29,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
    case "openai-text":
      proxyMax = OPENAI_MAX_CONTEXT;
      break;
-    case "anthropic":
+    case "anthropic-chat":
+    case "anthropic-text":
      proxyMax = CLAUDE_MAX_CONTEXT;
      break;
    case "google-ai":
--- a/src/proxy/middleware/response/log-prompt.ts
+++ b/src/proxy/middleware/response/log-prompt.ts
@ -10,9 +10,11 @@ import {
 import { ProxyResHandlerWithBody } from ".";
 import { assertNever } from "../../../shared/utils";
 import {
+  AnthropicChatMessage, flattenAnthropicMessages,
  MistralAIChatMessage,
  OpenAIChatMessage,
 } from "../../../shared/api-schemas";
+import { APIFormat } from "../../../shared/key-management";

 /** If prompt logging is enabled, enqueues the prompt for logging. */
 export const logPrompt: ProxyResHandlerWithBody = async (
@ -33,7 +35,7 @@ export const logPrompt: ProxyResHandlerWithBody = async (
  if (!loggable) return;

  const promptPayload = getPromptForRequest(req, responseBody);
-  const promptFlattened = flattenMessages(promptPayload);
+  const promptFlattened = flattenMessages(promptPayload, req.outboundApi);
  const response = getCompletionFromBody(req, responseBody);
  const model = getModelFromBody(req, responseBody);

@ -57,13 +59,19 @@ type OaiImageResult = {
 const getPromptForRequest = (
  req: Request,
  responseBody: Record<string, any>
-): string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult => {
+):
+  | string
+  | OpenAIChatMessage[]
+  | AnthropicChatMessage[]
+  | MistralAIChatMessage[]
+  | OaiImageResult => {
  // Since the prompt logger only runs after the request has been proxied, we
  // can assume the body has already been transformed to the target API's
  // format.
  switch (req.outboundApi) {
    case "openai":
    case "mistral-ai":
+    case "anthropic-chat":
      return req.body.messages;
    case "openai-text":
      return req.body.prompt;
@ -75,7 +83,7 @@ const getPromptForRequest = (
        quality: req.body.quality,
        revisedPrompt: responseBody.data[0].revised_prompt,
      };
-    case "anthropic":
+    case "anthropic-text":
      return req.body.prompt;
    case "google-ai":
      return req.body.prompt.text;
@ -85,11 +93,20 @@ const getPromptForRequest = (
 };

 const flattenMessages = (
-  val: string | OpenAIChatMessage[] | MistralAIChatMessage[] | OaiImageResult
+  val:
+    | string
+    | OpenAIChatMessage[]
+    | MistralAIChatMessage[]
+    | OaiImageResult
+    | AnthropicChatMessage[],
+  format: APIFormat,
 ): string => {
  if (typeof val === "string") {
    return val.trim();
  }
+  if (format === "anthropic-chat") {
+    return flattenAnthropicMessages(val as AnthropicChatMessage[]);
+  }
  if (Array.isArray(val)) {
    return val
      .map(({ content, role }) => {
--- a/src/proxy/middleware/response/streaming/aggregators/anthropic-chat.ts
+++ b/src/proxy/middleware/response/streaming/aggregators/anthropic-chat.ts
@ -0,0 +1,49 @@
+import { OpenAIChatCompletionStreamEvent } from "../index";
+
+export type AnthropicChatCompletionResponse = {
+  id: string;
+  type: "message";
+  role: "assistant";
+  content: { type: "text"; text: string }[];
+  model: string;
+  stop_reason: string | null;
+  stop_sequence: string | null;
+  usage: { input_tokens: number; output_tokens: number };
+};
+
+/**
+ * Given a list of OpenAI chat completion events, compiles them into a single
+ * finalized Anthropic chat completion response so that non-streaming middleware
+ * can operate on it as if it were a blocking response.
+ */
+export function mergeEventsForAnthropicChat(
+  events: OpenAIChatCompletionStreamEvent[]
+): AnthropicChatCompletionResponse {
+  let merged: AnthropicChatCompletionResponse = {
+    id: "",
+    type: "message",
+    role: "assistant",
+    content: [],
+    model: "",
+    stop_reason: null,
+    stop_sequence: null,
+    usage: { input_tokens: 0, output_tokens: 0 },
+  };
+  merged = events.reduce((acc, event, i) => {
+    // The first event will only contain role assignment and response metadata
+    if (i === 0) {
+      acc.id = event.id;
+      acc.model = event.model;
+      acc.content = [{ type: "text", text: "" }];
+      return acc;
+    }
+
+    acc.stop_reason = event.choices[0].finish_reason ?? "";
+    if (event.choices[0].delta.content) {
+      acc.content[0].text += event.choices[0].delta.content;
+    }
+
+    return acc;
+  }, merged);
+  return merged;
+}
--- a/src/proxy/middleware/response/streaming/aggregators/anthropic-text.ts
+++ b/src/proxy/middleware/response/streaming/aggregators/anthropic-text.ts
@ -1,6 +1,6 @@
 import { OpenAIChatCompletionStreamEvent } from "../index";

-export type AnthropicCompletionResponse = {
+export type AnthropicTextCompletionResponse = {
  completion: string;
  stop_reason: string;
  truncated: boolean;
@ -15,10 +15,10 @@ export type AnthropicCompletionResponse = {
 * finalized Anthropic completion response so that non-streaming middleware
 * can operate on it as if it were a blocking response.
 */
-export function mergeEventsForAnthropic(
+export function mergeEventsForAnthropicText(
  events: OpenAIChatCompletionStreamEvent[]
-): AnthropicCompletionResponse {
-  let merged: AnthropicCompletionResponse = {
+): AnthropicTextCompletionResponse {
+  let merged: AnthropicTextCompletionResponse = {
    log_id: "",
    exception: null,
    model: "",
--- a/src/proxy/middleware/response/streaming/event-aggregator.ts
+++ b/src/proxy/middleware/response/streaming/event-aggregator.ts
@ -1,9 +1,12 @@
 import { APIFormat } from "../../../../shared/key-management";
 import { assertNever } from "../../../../shared/utils";
 import {
-  mergeEventsForAnthropic,
+  anthropicV2ToOpenAI,
+  mergeEventsForAnthropicChat,
+  mergeEventsForAnthropicText,
  mergeEventsForOpenAIChat,
  mergeEventsForOpenAIText,
+  AnthropicV2StreamEvent,
  OpenAIChatCompletionStreamEvent,
 } from "./index";

@ -20,8 +23,30 @@ export class EventAggregator {
    this.format = format;
  }

-  addEvent(event: OpenAIChatCompletionStreamEvent) {
-    this.events.push(event);
+  addEvent(event: OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent) {
+    if (eventIsOpenAIEvent(event)) {
+      this.events.push(event);
+    } else {
+      // horrible special case. previously all transformers' target format was
+      // openai, so the event aggregator could conveniently assume all incoming
+      // events were in openai format.
+      // now we have added anthropic-chat-to-text, so aggregator needs to know
+      // how to collapse events from two formats.
+      // because that is annoying, we will simply transform anthropic events to
+      // openai (even if the client didn't ask for openai) so we don't have to
+      // write aggregation logic for anthropic chat (which is also a troublesome
+      // stateful format).
+      const openAIEvent = anthropicV2ToOpenAI({
+        data: `event: completion\ndata: ${JSON.stringify(event)}\n\n`,
+        lastPosition: -1,
+        index: 0,
+        fallbackId: event.log_id || "event-aggregator-fallback",
+        fallbackModel: event.model || "claude-3-fallback",
+      });
+      if (openAIEvent.event) {
+        this.events.push(openAIEvent.event);
+      }
+    }
  }

  getFinalResponse() {
@ -32,8 +57,10 @@ export class EventAggregator {
        return mergeEventsForOpenAIChat(this.events);
      case "openai-text":
        return mergeEventsForOpenAIText(this.events);
-      case "anthropic":
-        return mergeEventsForAnthropic(this.events);
+      case "anthropic-text":
+        return mergeEventsForAnthropicText(this.events);
+      case "anthropic-chat":
+        return mergeEventsForAnthropicChat(this.events);
      case "openai-image":
        throw new Error(`SSE aggregation not supported for ${this.format}`);
      default:
@ -41,3 +68,9 @@ export class EventAggregator {
    }
  }
 }
+
+function eventIsOpenAIEvent(
+  event: any
+): event is OpenAIChatCompletionStreamEvent {
+  return event?.object === "chat.completion.chunk";
+}
--- a/src/proxy/middleware/response/streaming/index.ts
+++ b/src/proxy/middleware/response/streaming/index.ts
@ -1,9 +1,17 @@
-export type SSEResponseTransformArgs = {
+export type SSEResponseTransformArgs<S = Record<string, any>> = {
  data: string;
  lastPosition: number;
  index: number;
  fallbackId: string;
  fallbackModel: string;
+  state?: S;
+};
+
+export type AnthropicV2StreamEvent = {
+  log_id?: string;
+  model?: string;
+  completion: string;
+  stop_reason: string | null;
 };

 export type OpenAIChatCompletionStreamEvent = {
@ -16,17 +24,24 @@ export type OpenAIChatCompletionStreamEvent = {
    delta: { role?: string; content?: string };
    finish_reason: string | null;
  }[];
-}
+};

-export type StreamingCompletionTransformer = (
-  params: SSEResponseTransformArgs
-) => { position: number; event?: OpenAIChatCompletionStreamEvent };
+export type StreamingCompletionTransformer<
+  T = OpenAIChatCompletionStreamEvent,
+  S = any,
+> = (params: SSEResponseTransformArgs<S>) => {
+  position: number;
+  event?: T;
+  state?: S;
+};

 export { openAITextToOpenAIChat } from "./transformers/openai-text-to-openai";
 export { anthropicV1ToOpenAI } from "./transformers/anthropic-v1-to-openai";
 export { anthropicV2ToOpenAI } from "./transformers/anthropic-v2-to-openai";
+export { anthropicChatToAnthropicV2 } from "./transformers/anthropic-chat-to-anthropic-v2";
 export { googleAIToOpenAI } from "./transformers/google-ai-to-openai";
 export { passthroughToOpenAI } from "./transformers/passthrough-to-openai";
 export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
 export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
-export { mergeEventsForAnthropic } from "./aggregators/anthropic";
+export { mergeEventsForAnthropicText } from "./aggregators/anthropic-text";
+export { mergeEventsForAnthropicChat } from "./aggregators/anthropic-chat";
--- a/src/proxy/middleware/response/streaming/parse-sse.ts
+++ b/src/proxy/middleware/response/streaming/parse-sse.ts
@ -3,27 +3,27 @@ export type ServerSentEvent = { id?: string; type?: string; data: string };
 /** Given a string of SSE data, parse it into a `ServerSentEvent` object. */
 export function parseEvent(event: string) {
  const buffer: ServerSentEvent = { data: "" };
-  return event.split(/\r?\n/).reduce(parseLine, buffer)
+  return event.split(/\r?\n/).reduce(parseLine, buffer);
 }

 function parseLine(event: ServerSentEvent, line: string) {
  const separator = line.indexOf(":");
-  const field = separator === -1 ? line : line.slice(0,separator);
+  const field = separator === -1 ? line : line.slice(0, separator);
  const value = separator === -1 ? "" : line.slice(separator + 1);

  switch (field) {
-    case 'id':
-      event.id = value.trim()
-      break
-    case 'event':
-      event.type = value.trim()
-      break
-    case 'data':
-      event.data += value.trimStart()
-      break
+    case "id":
+      event.id = value.trim();
+      break;
+    case "event":
+      event.type = value.trim();
+      break;
+    case "data":
+      event.data += value.trimStart();
+      break;
    default:
-      break
+      break;
  }

-  return event
-}
+  return event;
+}
--- a/src/proxy/middleware/response/streaming/sse-message-transformer.ts
+++ b/src/proxy/middleware/response/streaming/sse-message-transformer.ts
@ -3,11 +3,13 @@ import { logger } from "../../../../logger";
 import { APIFormat } from "../../../../shared/key-management";
 import { assertNever } from "../../../../shared/utils";
 import {
+  anthropicChatToAnthropicV2,
  anthropicV1ToOpenAI,
+  AnthropicV2StreamEvent,
  anthropicV2ToOpenAI,
+  googleAIToOpenAI,
  OpenAIChatCompletionStreamEvent,
  openAITextToOpenAIChat,
-  googleAIToOpenAI,
  passthroughToOpenAI,
  StreamingCompletionTransformer,
 } from "./index";
@ -26,9 +28,13 @@ type SSEMessageTransformerOptions = TransformOptions & {
 */
 export class SSEMessageTransformer extends Transform {
  private lastPosition: number;
+  private transformState: any;
  private msgCount: number;
  private readonly inputFormat: APIFormat;
-  private readonly transformFn: StreamingCompletionTransformer;
+  private readonly transformFn: StreamingCompletionTransformer<
+    // TODO: Refactor transformers to not assume only OpenAI events as output
+    OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
+  >;
  private readonly log;
  private readonly fallbackId: string;
  private readonly fallbackModel: string;
@ -58,15 +64,20 @@ export class SSEMessageTransformer extends Transform {
  _transform(chunk: Buffer, _encoding: BufferEncoding, callback: Function) {
    try {
      const originalMessage = chunk.toString();
-      const { event: transformedMessage, position: newPosition } =
-        this.transformFn({
-          data: originalMessage,
-          lastPosition: this.lastPosition,
-          index: this.msgCount++,
-          fallbackId: this.fallbackId,
-          fallbackModel: this.fallbackModel,
-        });
+      const {
+        event: transformedMessage,
+        position: newPosition,
+        state,
+      } = this.transformFn({
+        data: originalMessage,
+        lastPosition: this.lastPosition,
+        index: this.msgCount++,
+        fallbackId: this.fallbackId,
+        fallbackModel: this.fallbackModel,
+        state: this.transformState,
+      });
      this.lastPosition = newPosition;
+      this.transformState = state;

      // Special case for Azure OpenAI, which is 99% the same as OpenAI but
      // sometimes emits an extra event at the beginning of the stream with the
@ -84,7 +95,7 @@ export class SSEMessageTransformer extends Transform {
      // Some events may not be transformed, e.g. ping events
      if (!transformedMessage) return callback();

-      if (this.msgCount === 1) {
+      if (this.msgCount === 1 && eventIsOpenAIEvent(transformedMessage)) {
        // TODO: does this need to be skipped for passthroughToOpenAI?
        this.push(createInitialMessage(transformedMessage));
      }
@ -98,20 +109,30 @@ export class SSEMessageTransformer extends Transform {
  }
 }

+function eventIsOpenAIEvent(
+  event: any
+): event is OpenAIChatCompletionStreamEvent {
+  return event?.object === "chat.completion.chunk";
+}
+
 function getTransformer(
  responseApi: APIFormat,
  version?: string
-): StreamingCompletionTransformer {
+): StreamingCompletionTransformer<
+  OpenAIChatCompletionStreamEvent | AnthropicV2StreamEvent
+> {
  switch (responseApi) {
    case "openai":
    case "mistral-ai":
      return passthroughToOpenAI;
    case "openai-text":
      return openAITextToOpenAIChat;
-    case "anthropic":
+    case "anthropic-text":
      return version === "2023-01-01"
        ? anthropicV1ToOpenAI
        : anthropicV2ToOpenAI;
+    case "anthropic-chat":
+      return anthropicChatToAnthropicV2;
    case "google-ai":
      return googleAIToOpenAI;
    case "openai-image":
--- a/src/proxy/middleware/response/streaming/sse-stream-adapter.ts
+++ b/src/proxy/middleware/response/streaming/sse-stream-adapter.ts
@ -67,7 +67,7 @@ export class SSEStreamAdapter extends Transform {
          default:
            this.log.error({ message, type }, "Received bad AWS stream event");
            return makeCompletionSSE({
-              format: "anthropic",
+              format: "anthropic-text",
              title: "Proxy stream error",
              message:
                "The proxy received an unrecognized error from AWS while streaming.",
--- a/src/proxy/middleware/response/streaming/transformers/anthropic-chat-to-anthropic-v2.ts
+++ b/src/proxy/middleware/response/streaming/transformers/anthropic-chat-to-anthropic-v2.ts
@ -0,0 +1,129 @@
+import {
+  AnthropicV2StreamEvent,
+  StreamingCompletionTransformer,
+} from "../index";
+import { parseEvent, ServerSentEvent } from "../parse-sse";
+import { logger } from "../../../../../logger";
+
+const log = logger.child({
+  module: "sse-transformer",
+  transformer: "anthropic-chat-to-anthropic-v2",
+});
+
+export type AnthropicChatEventType =
+  | "message_start"
+  | "content_block_start"
+  | "content_block_delta"
+  | "content_block_stop"
+  | "message_delta"
+  | "message_stop";
+
+type AnthropicChatStartEvent = {
+  type: "message_start";
+  message: {
+    id: string;
+    type: "message";
+    role: "assistant";
+    content: [];
+    model: string;
+    stop_reason: null;
+    stop_sequence: null;
+    usage: { input_tokens: number; output_tokens: number };
+  };
+};
+
+type AnthropicChatContentBlockStartEvent = {
+  type: "content_block_start";
+  index: number;
+  content_block: { type: "text"; text: string };
+};
+
+export type AnthropicChatContentBlockDeltaEvent = {
+  type: "content_block_delta";
+  index: number;
+  delta: { type: "text_delta"; text: string };
+};
+
+type AnthropicChatContentBlockStopEvent = {
+  type: "content_block_stop";
+  index: number;
+};
+
+type AnthropicChatMessageDeltaEvent = {
+  type: "message_delta";
+  delta: {
+    stop_reason: string;
+    stop_sequence: null;
+    usage: { output_tokens: number };
+  };
+};
+
+type AnthropicChatMessageStopEvent = {
+  type: "message_stop";
+};
+
+type AnthropicChatTransformerState = { content: string };
+
+/**
+ * Transforms an incoming Anthropic Chat SSE to an equivalent Anthropic V2
+ * Text SSE.
+ * For now we assume there is only one content block and message delta. In the
+ * future Anthropic may add multi-turn responses or multiple content blocks
+ * (probably for multimodal responses, image generation, etc) but as far as I
+ * can tell this is not yet implemented.
+ */
+export const anthropicChatToAnthropicV2: StreamingCompletionTransformer<
+  AnthropicV2StreamEvent,
+  AnthropicChatTransformerState
+> = (params) => {
+  const { data } = params;
+
+  const rawEvent = parseEvent(data);
+  if (!rawEvent.data || !rawEvent.type) {
+    return { position: -1 };
+  }
+
+  const deltaEvent = asAnthropicChatDelta(rawEvent);
+  if (!deltaEvent) {
+    return { position: -1 };
+  }
+
+  const newEvent = {
+    log_id: params.fallbackId,
+    model: params.fallbackModel,
+    completion: deltaEvent.delta.text,
+    stop_reason: null,
+  };
+
+  return { position: -1, event: newEvent };
+};
+
+export function asAnthropicChatDelta(
+  event: ServerSentEvent
+): AnthropicChatContentBlockDeltaEvent | null {
+  if (
+    !event.type ||
+    !["content_block_start", "content_block_delta"].includes(event.type)
+  ) {
+    return null;
+  }
+
+  try {
+    const parsed = JSON.parse(event.data);
+    if (parsed.type === "content_block_delta") {
+      return parsed;
+    } else if (parsed.type === "content_block_start") {
+      return {
+        type: "content_block_delta",
+        index: parsed.index,
+        delta: { type: "text_delta", text: parsed.content_block?.text ?? "" },
+      };
+    } else {
+      // noinspection ExceptionCaughtLocallyJS
+      throw new Error("Invalid event type");
+    }
+  } catch (error) {
+    log.warn({ error: error.stack, event }, "Received invalid event");
+  }
+  return null;
+}
--- a/src/proxy/middleware/response/streaming/transformers/anthropic-chat-to-openai.ts
+++ b/src/proxy/middleware/response/streaming/transformers/anthropic-chat-to-openai.ts
@ -0,0 +1,45 @@
+import { StreamingCompletionTransformer } from "../index";
+import { parseEvent } from "../parse-sse";
+import { logger } from "../../../../../logger";
+import { asAnthropicChatDelta } from "./anthropic-chat-to-anthropic-v2";
+
+const log = logger.child({
+  module: "sse-transformer",
+  transformer: "anthropic-chat-to-openai",
+});
+
+/**
+ * Transforms an incoming Anthropic Chat SSE to an equivalent OpenAI
+ * chat.completion.chunks SSE.
+ */
+export const anthropicChatToOpenAI: StreamingCompletionTransformer = (
+  params
+) => {
+  const { data } = params;
+
+  const rawEvent = parseEvent(data);
+  if (!rawEvent.data || !rawEvent.type) {
+    return { position: -1 };
+  }
+
+  const deltaEvent = asAnthropicChatDelta(rawEvent);
+  if (!deltaEvent) {
+    return { position: -1 };
+  }
+
+  const newEvent = {
+    id: params.fallbackId,
+    object: "chat.completion.chunk" as const,
+    created: Date.now(),
+    model: params.fallbackModel,
+    choices: [
+      {
+        index: params.index,
+        delta: { content: deltaEvent.delta.text },
+        finish_reason: null,
+      },
+    ],
+  };
+
+  return { position: -1, event: newEvent };
+};
--- a/src/proxy/middleware/response/streaming/transformers/anthropic-v2-to-openai.ts
+++ b/src/proxy/middleware/response/streaming/transformers/anthropic-v2-to-openai.ts
@ -1,4 +1,7 @@
-import { StreamingCompletionTransformer } from "../index";
+import {
+  AnthropicV2StreamEvent,
+  StreamingCompletionTransformer,
+} from "../index";
 import { parseEvent, ServerSentEvent } from "../parse-sse";
 import { logger } from "../../../../../logger";

@ -7,13 +10,6 @@ const log = logger.child({
  transformer: "anthropic-v2-to-openai",
 });

-type AnthropicV2StreamEvent = {
-  log_id?: string;
-  model?: string;
-  completion: string;
-  stop_reason: string;
-};
-
 /**
 * Transforms an incoming Anthropic SSE (2023-06-01 API) to an equivalent
 * OpenAI chat.completion.chunk SSE.
--- a/src/service-info.ts
+++ b/src/service-info.ts
@ -91,6 +91,7 @@ export type ServiceInfo = {
    openai2?: string;
    "openai-image"?: string;
    anthropic?: string;
+    "anthropic-claude-3"?: string;
    "google-ai"?: string;
    "mistral-ai"?: string;
    aws?: string;
@ -132,6 +133,7 @@ const SERVICE_ENDPOINTS: { [s in LLMService]: Record<string, string> } = {
  },
  anthropic: {
    anthropic: `%BASE%/anthropic`,
+    "anthropic-claude-3 (temporary compatibility endpoint)": `%BASE%/anthropic/claude-3`,
  },
  "google-ai": {
    "google-ai": `%BASE%/google-ai`,
--- a/src/shared/api-schemas/anthropic.ts
+++ b/src/shared/api-schemas/anthropic.ts
@ -6,30 +6,64 @@ import {
  OpenAIChatMessage,
  OpenAIV1ChatCompletionSchema,
 } from "./openai";
+import { logger } from "../../logger";

 const CLAUDE_OUTPUT_MAX = config.maxOutputTokensAnthropic;

-// https://console.anthropic.com/docs/api/reference#-v1-complete
-export const AnthropicV1CompleteSchema = z
+const AnthropicV1BaseSchema = z
  .object({
    model: z.string().max(100),
-    prompt: z.string({
-      required_error:
-        "No prompt found. Are you sending an OpenAI-formatted request to the Claude endpoint?",
-    }),
-    max_tokens_to_sample: z.coerce
-      .number()
-      .int()
-      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
    stop_sequences: z.array(z.string().max(500)).optional(),
    stream: z.boolean().optional().default(false),
    temperature: z.coerce.number().optional().default(1),
    top_k: z.coerce.number().optional(),
    top_p: z.coerce.number().optional(),
+    metadata: z.object({ user_id: z.string().optional() }).optional(),
  })
  .strip();

-export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) {
+// https://docs.anthropic.com/claude/reference/complete_post [deprecated]
+export const AnthropicV1TextSchema = AnthropicV1BaseSchema.merge(
+  z.object({
+    prompt: z.string(),
+    max_tokens_to_sample: z.coerce
+      .number()
+      .int()
+      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
+  })
+);
+
+// https://docs.anthropic.com/claude/reference/messages_post
+export const AnthropicV1MessagesSchema = AnthropicV1BaseSchema.merge(
+  z.object({
+    messages: z
+      .array(
+        z.object({
+          role: z.enum(["user", "assistant"]),
+          content: z.union([
+            z.string(),
+            z.array(z.object({ type: z.string().max(100), text: z.string() })),
+          ]),
+        })
+      )
+      .min(1)
+      .refine((v) => v[0].role === "user", {
+        message: `First message must be have 'user' role. Use 'system' parameter to start with a system message.`,
+      }),
+    max_tokens: z
+      .number()
+      .int()
+      .transform((v) => Math.min(v, CLAUDE_OUTPUT_MAX)),
+    system: z.string().optional(),
+  })
+);
+export type AnthropicChatMessage = z.infer<
+  typeof AnthropicV1MessagesSchema
+>["messages"][0];
+
+export function openAIMessagesToClaudeTextPrompt(
+  messages: OpenAIChatMessage[]
+) {
  return (
    messages
      .map((m) => {
@ -51,13 +85,13 @@ export function openAIMessagesToClaudePrompt(messages: OpenAIChatMessage[]) {
  );
 }

-export function openAIToAnthropic(req: Request) {
+export function openAIToAnthropicText(req: Request) {
  const { body } = req;
  const result = OpenAIV1ChatCompletionSchema.safeParse(body);
  if (!result.success) {
    req.log.warn(
      { issues: result.error.issues, body },
-      "Invalid OpenAI-to-Anthropic request"
+      "Invalid OpenAI-to-Anthropic Text request"
    );
    throw result.error;
  }
@ -65,7 +99,7 @@ export function openAIToAnthropic(req: Request) {
  req.headers["anthropic-version"] = "2023-06-01";

  const { messages, ...rest } = result.data;
-  const prompt = openAIMessagesToClaudePrompt(messages);
+  const prompt = openAIMessagesToClaudeTextPrompt(messages);

  let stops = rest.stop
    ? Array.isArray(rest.stop)
@ -90,3 +124,105 @@ export function openAIToAnthropic(req: Request) {
    top_p: rest.top_p,
  };
 }
+
+/**
+ * Converts an older Anthropic Text Completion prompt to the newer Messages API
+ * by splitting the flat text into messages.
+ */
+export function anthropicTextToAnthropicChat(req: Request) {
+  const { body } = req;
+  const result = AnthropicV1TextSchema.safeParse(body);
+  if (!result.success) {
+    req.log.warn(
+      { issues: result.error.issues, body },
+      "Invalid Anthropic Text-to-Anthropic Chat request"
+    );
+    throw result.error;
+  }
+
+  req.headers["anthropic-version"] = "2023-06-01";
+
+  const { model, max_tokens_to_sample, prompt, ...rest } = result.data;
+  validateAnthropicTextPrompt(prompt);
+
+  // Iteratively slice the prompt into messages. Start from the beginning and
+  // look for the next `\n\nHuman:` or `\n\nAssistant:`. Anything before the
+  // first human message is a system message.
+  let index = prompt.indexOf("\n\nHuman:");
+  let remaining = prompt.slice(index);
+  const system = prompt.slice(0, index);
+  const messages: AnthropicChatMessage[] = [];
+  while (remaining) {
+    const isHuman = remaining.startsWith("\n\nHuman:");
+
+    // TODO: Are multiple consecutive human or assistant messages allowed?
+    // Currently we will enforce alternating turns.
+    const thisRole = isHuman ? "\n\nHuman:" : "\n\nAssistant:";
+    const nextRole = isHuman ? "\n\nAssistant:" : "\n\nHuman:";
+    const nextIndex = remaining.indexOf(nextRole);
+
+    // Collect text up to the next message, or the end of the prompt for the
+    // Assistant prefill if present.
+    const msg = remaining
+      .slice(0, nextIndex === -1 ? undefined : nextIndex)
+      .replace(thisRole, "")
+      .trimStart();
+
+    const role = isHuman ? "user" : "assistant";
+    messages.push({ role, content: msg });
+    remaining = remaining.slice(nextIndex);
+
+    if (nextIndex === -1) break;
+  }
+
+  // fix "messages: final assistant content cannot end with trailing whitespace"
+  const lastMessage = messages[messages.length - 1];
+  if (
+    lastMessage.role === "assistant" &&
+    typeof lastMessage.content === "string"
+  ) {
+    messages[messages.length - 1].content = lastMessage.content.trimEnd();
+  }
+
+  return {
+    model,
+    system,
+    messages,
+    max_tokens: max_tokens_to_sample,
+    ...rest,
+  };
+}
+
+function validateAnthropicTextPrompt(prompt: string) {
+  if (!prompt.includes("\n\nHuman:") || !prompt.includes("\n\nAssistant:")) {
+    throw new Error(
+      "Prompt must contain at least one human and one assistant message."
+    );
+  }
+  // First human message must be before first assistant message
+  const firstHuman = prompt.indexOf("\n\nHuman:");
+  const firstAssistant = prompt.indexOf("\n\nAssistant:");
+  if (firstAssistant < firstHuman) {
+    throw new Error(
+      "First Assistant message must come after the first Human message."
+    );
+  }
+}
+
+export function flattenAnthropicMessages(
+  messages: AnthropicChatMessage[]
+): string {
+  return messages
+    .map((msg) => {
+      const name = msg.role === "user" ? "\n\nHuman: " : "\n\nAssistant: ";
+      const parts = Array.isArray(msg.content)
+        ? msg.content
+        : [{ type: "text", text: msg.content }];
+      return `${name}: ${parts
+        .map(({ text, type }) =>
+          type === "text" ? text : `[Unsupported content type: ${type}]`
+        )
+        .join("\n")}`;
+    })
+    .join("\n\n");
+}
--- a/src/shared/api-schemas/index.ts
+++ b/src/shared/api-schemas/index.ts
@ -1,6 +1,6 @@
 import { z } from "zod";
 import { APIFormat } from "../key-management";
-import { AnthropicV1CompleteSchema } from "./anthropic";
+import { AnthropicV1TextSchema, AnthropicV1MessagesSchema } from "./anthropic";
 import { OpenAIV1ChatCompletionSchema } from "./openai";
 import { OpenAIV1TextCompletionSchema } from "./openai-text";
 import { OpenAIV1ImagesGenerationSchema } from "./openai-image";
@ -8,11 +8,13 @@ import { GoogleAIV1GenerateContentSchema } from "./google-ai";
 import { MistralAIV1ChatCompletionsSchema } from "./mistral-ai";

 export { OpenAIChatMessage } from "./openai";
+export { AnthropicChatMessage, flattenAnthropicMessages } from "./anthropic";
 export { GoogleAIChatMessage } from "./google-ai";
 export { MistralAIChatMessage } from "./mistral-ai";

 export const API_SCHEMA_VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
-  anthropic: AnthropicV1CompleteSchema,
+  "anthropic-chat": AnthropicV1MessagesSchema,
+  "anthropic-text": AnthropicV1TextSchema,
  openai: OpenAIV1ChatCompletionSchema,
  "openai-text": OpenAIV1TextCompletionSchema,
  "openai-image": OpenAIV1ImagesGenerationSchema,
--- a/src/shared/key-management/anthropic/provider.ts
+++ b/src/shared/key-management/anthropic/provider.ts
@ -12,7 +12,9 @@ export type AnthropicModel =
  | "claude-v1"
  | "claude-v1-100k"
  | "claude-2"
-  | "claude-2.1";
+  | "claude-2.1"
+  | "claude-3-opus-20240229"
+  | "claude-3-sonnet-20240229";

 export type AnthropicKeyUpdate = Omit<
  Partial<AnthropicKey>,
--- a/src/shared/key-management/index.ts
+++ b/src/shared/key-management/index.ts
@ -9,11 +9,12 @@ import { KeyPool } from "./key-pool";
 /** The request and response format used by a model's API. */
 export type APIFormat =
  | "openai"
-  | "anthropic"
-  | "google-ai"
-  | "mistral-ai"
  | "openai-text"
-  | "openai-image";
+  | "openai-image"
+  | "anthropic-chat" // Anthropic's newer messages array format
+  | "anthropic-text" // Legacy flat string prompt format
+  | "google-ai"
+  | "mistral-ai";
 export type Model =
  | OpenAIModel
  | AnthropicModel
--- a/src/shared/models.ts
+++ b/src/shared/models.ts
@ -192,7 +192,8 @@ export function getModelFamilyForRequest(req: Request): ModelFamily {
    modelFamily = getAzureOpenAIModelFamily(model);
  } else {
    switch (req.outboundApi) {
-      case "anthropic":
+      case "anthropic-chat":
+      case "anthropic-text":
        modelFamily = getClaudeModelFamily(model);
        break;
      case "openai":
--- a/src/shared/streaming.ts
+++ b/src/shared/streaming.ts
@ -84,7 +84,7 @@ export function makeCompletionSSE({
        model,
      };
      break;
-    case "anthropic":
+    case "anthropic-text":
      event = {
        completion: content,
        stop_reason: title,
@ -106,13 +106,14 @@ export function makeCompletionSSE({
          },
        ],
      });
+    case "anthropic-chat":
    case "openai-image":
      throw new Error(`SSE not supported for ${format} requests`);
    default:
      assertNever(format);
  }

-  if (format === "anthropic") {
+  if (format === "anthropic-text") {
    return (
      ["event: completion", `data: ${JSON.stringify(event)}`].join("\n") +
      "\n\n"
--- a/src/shared/tokenization/claude.ts
+++ b/src/shared/tokenization/claude.ts
@ -1,23 +1,27 @@
 import { getTokenizer } from "@anthropic-ai/tokenizer";
 import { Tiktoken } from "tiktoken/lite";
+import { AnthropicChatMessage } from "../api-schemas";

 let encoder: Tiktoken;
+let userRoleCount = 0;
+let assistantRoleCount = 0;

 export function init() {
  // they export a `countTokens` function too but it instantiates a new
  // tokenizer every single time and it is not fast...
  encoder = getTokenizer();
+  userRoleCount = encoder.encode("\n\nHuman: ", "all").length;
+  assistantRoleCount = encoder.encode("\n\nAssistant: ", "all").length;
  return true;
 }

-export function getTokenCount(prompt: string, _model: string) {
-  // Don't try tokenizing if the prompt is massive to prevent DoS.
-  // 500k characters should be sufficient for all supported models.
-  if (prompt.length > 500000) {
-    return {
-      tokenizer: "length fallback",
-      token_count: 100000,
-    };
+export function getTokenCount(prompt: string | AnthropicChatMessage[]) {
+  if (typeof prompt !== "string") {
+    return getTokenCountForMessages(prompt);
+  }
+
+  if (prompt.length > 800000) {
+    throw new Error("Content is too large to tokenize.");
  }

  return {
@ -25,3 +29,36 @@ export function getTokenCount(prompt: string, _model: string) {
    token_count: encoder.encode(prompt.normalize("NFKC"), "all").length,
  };
 }
+
+function getTokenCountForMessages(messages: AnthropicChatMessage[]) {
+  let numTokens = 0;
+
+  for (const message of messages) {
+    const { content, role } = message;
+    numTokens += role === "user" ? userRoleCount : assistantRoleCount;
+
+    const parts = Array.isArray(content)
+      ? content
+      : [{ type: "text", text: content }];
+
+    for (const part of parts) {
+      // We don't allow other content types for now because we can't estimate
+      // cost for them.
+      if (part.type !== "text") {
+        throw new Error(`Unsupported Anthropic content type: ${part.type}`);
+      }
+
+      if (part.text.length > 800000 || numTokens > 200000) {
+        throw new Error("Content is too large to tokenize.");
+      }
+
+      numTokens += encoder.encode(part.text.normalize("NFKC"), "all").length;
+    }
+  }
+
+  if (messages[messages.length - 1].role !== "assistant") {
+    numTokens += assistantRoleCount;
+  }
+
+  return { tokenizer: "@anthropic-ai/tokenizer", token_count: numTokens };
+}
--- a/src/shared/tokenization/mistral.ts
+++ b/src/shared/tokenization/mistral.ts
@ -29,13 +29,8 @@ export function getTokenCount(prompt: MistralAIChatMessage[] | string) {
 }

 function getTextTokenCount(prompt: string) {
-  // Don't try tokenizing if the prompt is massive to prevent DoS.
-  // 500k characters should be sufficient for all supported models.
-  if (prompt.length > 500000) {
-    return {
-      tokenizer: "length fallback",
-      token_count: 100000,
-    };
+  if (prompt.length > 800000) {
+    throw new Error("Content is too large to tokenize.");
  }

  return {
--- a/src/shared/tokenization/openai.ts
+++ b/src/shared/tokenization/openai.ts
@ -60,16 +60,8 @@ export async function getTokenCount(
          textContent = value;
        }

-        // Break if we get a huge message or exceed the token limit to prevent
-        // DoS.
-        // 200k tokens allows for future 200k GPT-4 models and 500k characters
-        // is just a sanity check
-        if (textContent.length > 500000 || numTokens > 200000) {
-          numTokens = 200000;
-          return {
-            tokenizer: "tiktoken (prompt length limit exceeded)",
-            token_count: numTokens,
-          };
+        if (textContent.length > 800000 || numTokens > 200000) {
+          throw new Error("Content is too large to tokenize.");
        }

        numTokens += encoder.encode(textContent).length;
--- a/src/shared/tokenization/tokenizer.ts
+++ b/src/shared/tokenization/tokenizer.ts
@ -16,6 +16,7 @@ import {
 } from "./mistral";
 import { APIFormat } from "../key-management";
 import {
+  AnthropicChatMessage,
  GoogleAIChatMessage,
  MistralAIChatMessage,
  OpenAIChatMessage,
@ -27,23 +28,60 @@ export async function init() {
  initMistralAI();
 }

-/** Tagged union via `service` field of the different types of requests that can
- * be made to the tokenization service, for both prompts and completions */
+type OpenAIChatTokenCountRequest = {
+  prompt: OpenAIChatMessage[];
+  completion?: never;
+  service: "openai";
+};
+
+type AnthropicChatTokenCountRequest = {
+  prompt: AnthropicChatMessage[];
+  completion?: never;
+  service: "anthropic-chat";
+};
+
+type GoogleAIChatTokenCountRequest = {
+  prompt: GoogleAIChatMessage[];
+  completion?: never;
+  service: "google-ai";
+};
+
+type MistralAIChatTokenCountRequest = {
+  prompt: MistralAIChatMessage[];
+  completion?: never;
+  service: "mistral-ai";
+};
+
+type FlatPromptTokenCountRequest = {
+  prompt: string;
+  completion?: never;
+  service: "openai-text" | "anthropic-text" | "google-ai";
+};
+
+type StringCompletionTokenCountRequest = {
+  prompt?: never;
+  completion: string;
+  service: APIFormat;
+};
+
+type OpenAIImageCompletionTokenCountRequest = {
+  prompt?: never;
+  completion?: never;
+  service: "openai-image";
+};
+
+/**
+ * Tagged union via `service` field of the different types of requests that can
+ * be made to the tokenization service, for both prompts and completions
+ */
 type TokenCountRequest = { req: Request } & (
-  | { prompt: OpenAIChatMessage[]; completion?: never; service: "openai" }
-  | {
-      prompt: string;
-      completion?: never;
-      service: "openai-text" | "anthropic" | "google-ai";
-    }
-  | { prompt?: GoogleAIChatMessage[]; completion?: never; service: "google-ai" }
-  | {
-      prompt: MistralAIChatMessage[];
-      completion?: never;
-      service: "mistral-ai";
-    }
-  | { prompt?: never; completion: string; service: APIFormat }
-  | { prompt?: never; completion?: never; service: "openai-image" }
+  | OpenAIChatTokenCountRequest
+  | AnthropicChatTokenCountRequest
+  | GoogleAIChatTokenCountRequest
+  | MistralAIChatTokenCountRequest
+  | FlatPromptTokenCountRequest
+  | StringCompletionTokenCountRequest
+  | OpenAIImageCompletionTokenCountRequest
 );

 type TokenCountResult = {
@ -60,9 +98,14 @@ export async function countTokens({
 }: TokenCountRequest): Promise<TokenCountResult> {
  const time = process.hrtime();
  switch (service) {
-    case "anthropic":
+    case "anthropic-chat":
      return {
-        ...getClaudeTokenCount(prompt ?? completion, req.body.model),
+        ...getClaudeTokenCount(prompt ?? completion),
+        tokenization_duration_ms: getElapsedMs(time),
+      };
+    case "anthropic-text":
+      return {
+        ...getClaudeTokenCount(prompt ?? completion),
        tokenization_duration_ms: getElapsedMs(time),
      };
    case "openai":
--- a/src/shared/users/user-store.ts
+++ b/src/shared/users/user-store.ts
@ -400,7 +400,8 @@ function getModelFamilyForQuotaUsage(
    case "openai-text":
    case "openai-image":
      return getOpenAIModelFamily(model);
-    case "anthropic":
+    case "anthropic-chat":
+    case "anthropic-text":
      return getClaudeModelFamily(model);
    case "google-ai":
      return getGoogleAIModelFamily(model);