oai-reverse-proxy/src/proxy/middleware/common.ts

import { Request, Response } from "express";
import httpProxy from "http-proxy";
import { ZodError } from "zod";
import { assertNever } from "../../shared/utils";
import { QuotaExceededError } from "./request/apply-quota-limits";

const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";

/** Returns true if we're making a request to a completion endpoint. */
export function isCompletionRequest(req: Request) {
  // 99% sure this function is not needed anymore
  return (
    req.method === "POST" &&
    [
      OPENAI_CHAT_COMPLETION_ENDPOINT,
      OPENAI_TEXT_COMPLETION_ENDPOINT,
      ANTHROPIC_COMPLETION_ENDPOINT,
    ].some((endpoint) => req.path.startsWith(endpoint))
  );
}

export function isEmbeddingsRequest(req: Request) {
  return (
    req.method === "POST" && req.path.startsWith(OPENAI_EMBEDDINGS_ENDPOINT)
  );
}

export function writeErrorResponse(
  req: Request,
  res: Response,
  statusCode: number,
  errorPayload: Record<string, any>
) {
  const errorSource = errorPayload.error?.type?.startsWith("proxy")
    ? "proxy"
    : "upstream";

  // If we're mid-SSE stream, send a data event with the error payload and end
  // the stream. Otherwise just send a normal error response.
  if (
    res.headersSent ||
    res.getHeader("content-type") === "text/event-stream"
  ) {
    const errorContent =
      statusCode === 403
        ? JSON.stringify(errorPayload)
        : JSON.stringify(errorPayload, null, 2);

    const msg = buildFakeSseMessage(
      `${errorSource} error (${statusCode})`,
      errorContent,
      req
    );
    res.write(msg);
    res.write(`data: [DONE]\n\n`);
    res.end();
  } else {
    if (req.debug && errorPayload.error) {
      errorPayload.error.proxy_tokenizer_debug_info = req.debug;
    }
    res.status(statusCode).json(errorPayload);
  }
}

export const handleProxyError: httpProxy.ErrorCallback = (err, req, res) => {
  req.log.error({ err }, `Error during proxy request middleware`);
  handleInternalError(err, req as Request, res as Response);
};

export const handleInternalError = (
  err: Error,
  req: Request,
  res: Response
) => {
  try {
    if (err instanceof ZodError) {
      writeErrorResponse(req, res, 400, {
        error: {
          type: "proxy_validation_error",
          proxy_note: `Reverse proxy couldn't validate your request when trying to transform it. Your client may be sending invalid data.`,
          issues: err.issues,
          stack: err.stack,
          message: err.message,
        },
      });
    } else if (err.name === "ForbiddenError") {
      // Spoofs a vaguely threatening OpenAI error message. Only invoked by the
      // block-zoomers rewriter to scare off tiktokers.
      writeErrorResponse(req, res, 403, {
        error: {
          type: "organization_account_disabled",
          code: "policy_violation",
          param: null,
          message: err.message,
        },
      });
    } else if (err instanceof QuotaExceededError) {
      writeErrorResponse(req, res, 429, {
        error: {
          type: "proxy_quota_exceeded",
          code: "quota_exceeded",
          message: `You've exceeded your token quota for this model type.`,
          info: err.quotaInfo,
          stack: err.stack,
        },
      });
    } else {
      writeErrorResponse(req, res, 500, {
        error: {
          type: "proxy_internal_error",
          proxy_note: `Reverse proxy encountered an error before it could reach the upstream API.`,
          message: err.message,
          stack: err.stack,
        },
      });
    }
  } catch (e) {
    req.log.error(
      { error: e },
      `Error writing error response headers, giving up.`
    );
  }
};

export function buildFakeSseMessage(
  type: string,
  string: string,
  req: Request
) {
  let fakeEvent;
  const content = `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`;

  switch (req.inboundApi) {
    case "openai":
      fakeEvent = {
        id: "chatcmpl-" + req.id,
        object: "chat.completion.chunk",
        created: Date.now(),
        model: req.body?.model,
        choices: [{ delta: { content }, index: 0, finish_reason: type }],
      };
      break;
    case "openai-text":
      fakeEvent = {
        id: "cmpl-" + req.id,
        object: "text_completion",
        created: Date.now(),
        choices: [
          { text: content, index: 0, logprobs: null, finish_reason: type },
        ],
        model: req.body?.model,
      };
      break;
    case "anthropic":
      fakeEvent = {
        completion: content,
        stop_reason: type,
        truncated: false, // I've never seen this be true
        stop: null,
        model: req.body?.model,
        log_id: "proxy-req-" + req.id,
      };
      break;
    case "google-palm":
      throw new Error("PaLM not supported as an inbound API format");
    default:
      assertNever(req.inboundApi);
  }
  return `data: ${JSON.stringify(fakeEvent)}\n\n`;
}

export function getCompletionFromBody(req: Request, body: Record<string, any>) {
  const format = req.outboundApi;
  switch (format) {
    case "openai":
      return body.choices[0].message.content;
    case "openai-text":
      return body.choices[0].text;
    case "anthropic":
      if (!body.completion) {
        req.log.error(
          { body: JSON.stringify(body) },
          "Received empty Anthropic completion"
        );
        return "";
      }
      return body.completion.trim();
    case "google-palm":
      return body.candidates[0].output;
    default:
      assertNever(format);
  }
}

export function getModelFromBody(req: Request, body: Record<string, any>) {
  const format = req.outboundApi;
  switch (format) {
    case "openai":
    case "openai-text":
      return body.model;
    case "anthropic":
      // Anthropic confirms the model in the response, but AWS Claude doesn't.
      return body.model || req.body.model;
    case "google-palm":
      // Google doesn't confirm the model in the response.
      return req.body.model;
    default:
      assertNever(format);
  }
}