oai-reverse-proxy/src/proxy/aws.ts

import { Request, RequestHandler, Response, Router } from "express";
import { createProxyMiddleware } from "http-proxy-middleware";
import { v4 } from "uuid";
import { config } from "../config";
import { logger } from "../logger";
import { createQueueMiddleware } from "./queue";
import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common";
import {
  createPreprocessorMiddleware,
  signAwsRequest,
  finalizeSignedRequest,
  createOnProxyReqHandler,
} from "./middleware/request";
import {
  ProxyResHandlerWithBody,
  createOnProxyResHandler,
} from "./middleware/response";
import { transformAnthropicChatResponseToAnthropicText } from "./anthropic";
import { sendErrorToClient } from "./middleware/response/error-generator";

const LATEST_AWS_V2_MINOR_VERSION = "1";

let modelsCache: any = null;
let modelsCacheTime = 0;

const getModelsResponse = () => {
  if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
    return modelsCache;
  }

  if (!config.awsCredentials) return { object: "list", data: [] };

  // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html
  const variants = [
    "anthropic.claude-v2",
    "anthropic.claude-v2:1",
    "anthropic.claude-3-haiku-20240307-v1:0",
    "anthropic.claude-3-sonnet-20240229-v1:0",
  ];

  const models = variants.map((id) => ({
    id,
    object: "model",
    created: new Date().getTime(),
    owned_by: "anthropic",
    permission: [],
    root: "claude",
    parent: null,
  }));

  modelsCache = { object: "list", data: models };
  modelsCacheTime = new Date().getTime();

  return modelsCache;
};

const handleModelRequest: RequestHandler = (_req, res) => {
  res.status(200).json(getModelsResponse());
};

/** Only used for non-streaming requests. */
const awsResponseHandler: ProxyResHandlerWithBody = async (
  _proxyRes,
  req,
  res,
  body
) => {
  if (typeof body !== "object") {
    throw new Error("Expected body to be an object");
  }

  if (config.promptLogging) {
    const host = req.get("host");
    body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
  }

  if (req.inboundApi === "openai") {
    req.log.info("Transforming AWS Claude response to OpenAI format");
    body = transformAwsTextResponseToOpenAI(body, req);
  }

  if (
    req.inboundApi === "anthropic-text" &&
    req.outboundApi === "anthropic-chat"
  ) {
    req.log.info("Transforming AWS Claude chat response to Text format");
    body = transformAnthropicChatResponseToAnthropicText(body);
  }

  if (req.tokenizerInfo) {
    body.proxy_tokenizer = req.tokenizerInfo;
  }

  // AWS does not confirm the model in the response, so we have to add it
  body.model = req.body.model;

  res.status(200).json(body);
};

/**
 * Transforms a model response from the Anthropic API to match those from the
 * OpenAI API, for users using Claude via the OpenAI-compatible endpoint. This
 * is only used for non-streaming requests as streaming requests are handled
 * on-the-fly.
 */
function transformAwsTextResponseToOpenAI(
  awsBody: Record<string, any>,
  req: Request
): Record<string, any> {
  const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
  return {
    id: "aws-" + v4(),
    object: "chat.completion",
    created: Date.now(),
    model: req.body.model,
    usage: {
      prompt_tokens: req.promptTokens,
      completion_tokens: req.outputTokens,
      total_tokens: totalTokens,
    },
    choices: [
      {
        message: {
          role: "assistant",
          content: awsBody.completion?.trim(),
        },
        finish_reason: awsBody.stop_reason,
        index: 0,
      },
    ],
  };
}

const awsProxy = createQueueMiddleware({
  beforeProxy: signAwsRequest,
  proxyMiddleware: createProxyMiddleware({
    target: "bad-target-will-be-rewritten",
    router: ({ signedRequest }) => {
      if (!signedRequest) throw new Error("Must sign request before proxying");
      return `${signedRequest.protocol}//${signedRequest.hostname}`;
    },
    changeOrigin: true,
    selfHandleResponse: true,
    logger,
    on: {
      proxyReq: createOnProxyReqHandler({ pipeline: [finalizeSignedRequest] }),
      proxyRes: createOnProxyResHandler([awsResponseHandler]),
      error: handleProxyError,
    },
  }),
});

const nativeTextPreprocessor = createPreprocessorMiddleware(
  { inApi: "anthropic-text", outApi: "anthropic-text", service: "aws" },
  { afterTransform: [maybeReassignModel] }
);

const textToChatPreprocessor = createPreprocessorMiddleware(
  { inApi: "anthropic-text", outApi: "anthropic-chat", service: "aws" },
  { afterTransform: [maybeReassignModel] }
);

/**
 * Routes text completion prompts to aws anthropic-chat if they need translation
 * (claude-3 based models do not support the old text completion endpoint).
 */
const awsTextCompletionRouter: RequestHandler = (req, res, next) => {
  if (req.body.model?.includes("claude-3")) {
    textToChatPreprocessor(req, res, next);
  } else {
    nativeTextPreprocessor(req, res, next);
  }
};

const awsRouter = Router();
awsRouter.get("/v1/models", handleModelRequest);
// Native(ish) Anthropic text completion endpoint.
awsRouter.post("/v1/complete", ipLimiter, awsTextCompletionRouter, awsProxy);
// Native Anthropic chat completion endpoint.
awsRouter.post(
  "/v1/messages",
  ipLimiter,
  createPreprocessorMiddleware(
    { inApi: "anthropic-chat", outApi: "anthropic-chat", service: "aws" },
    { afterTransform: [maybeReassignModel] }
  ),
  awsProxy
);
// Temporary force-Claude3 endpoint
awsRouter.post(
  "/v1/sonnet/:action(complete|messages)",
  ipLimiter,
  handleCompatibilityRequest,
  createPreprocessorMiddleware({
    inApi: "anthropic-text",
    outApi: "anthropic-chat",
    service: "aws",
  }),
  awsProxy
);

// OpenAI-to-AWS Anthropic compatibility endpoint.
awsRouter.post(
  "/v1/chat/completions",
  ipLimiter,
  createPreprocessorMiddleware(
    { inApi: "openai", outApi: "anthropic-text", service: "aws" },
    { afterTransform: [maybeReassignModel] }
  ),
  awsProxy
);

/**
 * Tries to deal with:
 * - frontends sending AWS model names even when they want to use the OpenAI-
 *   compatible endpoint
 * - frontends sending Anthropic model names that AWS doesn't recognize
 * - frontends sending OpenAI model names because they expect the proxy to
 *   translate them
 */
function maybeReassignModel(req: Request) {
  const model = req.body.model;

  // If client already specified an AWS Claude model ID, use it
  if (model.includes("anthropic.claude")) {
    return;
  }

  const pattern =
    /^(claude-)?(instant-)?(v)?(\d+)(\.(\d+))?(-\d+k)?(-sonnet-?|-opus-?)(\d*)/i;
  const match = model.match(pattern);

  // If there's no match, return the latest v2 model
  if (!match) {
    req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
    return;
  }

  const instant = match[2];
  const major = match[4];
  const minor = match[6];

  if (instant) {
    req.body.model = "anthropic.claude-instant-v1";
    return;
  }

  // There's only one v1 model
  if (major === "1") {
    req.body.model = "anthropic.claude-v1";
    return;
  }

  // Try to map Anthropic API v2 models to AWS v2 models
  if (major === "2") {
    if (minor === "0") {
      req.body.model = "anthropic.claude-v2";
      return;
    }
    req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
    return;
  }

  // AWS currently only supports one v3 model.
  const variant = match[8]; // sonnet or opus
  const variantVersion = match[9];
  if (major === "3") {
    req.body.model = "anthropic.claude-3-sonnet-20240229-v1:0";
    return;
  }

  // Fallback to latest v2 model
  req.body.model = `anthropic.claude-v2:${LATEST_AWS_V2_MINOR_VERSION}`;
  return;
}

export function handleCompatibilityRequest(
  req: Request,
  res: Response,
  next: any
) {
  const action = req.params.action;
  const alreadyInChatFormat = Boolean(req.body.messages);
  const compatModel = "anthropic.claude-3-sonnet-20240229-v1:0";
  req.log.info(
    { inputModel: req.body.model, compatModel, alreadyInChatFormat },
    "Handling AWS compatibility request"
  );

  if (action === "messages" || alreadyInChatFormat) {
    return sendErrorToClient({
      req,
      res,
      options: {
        title: "Unnecessary usage of compatibility endpoint",
        message: `Your client seems to already support the new Claude API format. This endpoint is intended for clients that do not yet support the new format.\nUse the normal \`/aws/claude\` proxy endpoint instead.`,
        format: "unknown",
        statusCode: 400,
        reqId: req.id,
        obj: {
          requested_endpoint: "/aws/claude/sonnet",
          correct_endpoint: "/aws/claude",
        },
      },
    });
  }

  req.body.model = compatModel;
  next();
}

export const aws = awsRouter;