oai-reverse-proxy/src/proxy/openai.ts

293 lines
7.4 KiB
TypeScript

import { RequestHandler, Request, Router } from "express";
import * as http from "http";
import { createProxyMiddleware } from "http-proxy-middleware";
import { config } from "../config";
import { keyPool } from "../shared/key-management";
import {
ModelFamily,
OpenAIModelFamily,
getOpenAIModelFamily,
} from "../shared/models";
import { logger } from "../logger";
import { createQueueMiddleware } from "./queue";
import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common";
import {
RequestPreprocessor,
addKey,
addKeyForEmbeddingsRequest,
applyQuotaLimits,
blockZoomerOrigins,
createEmbeddingsPreprocessorMiddleware,
createPreprocessorMiddleware,
finalizeBody,
forceModel,
languageFilter,
limitCompletions,
stripHeaders,
} from "./middleware/request";
import {
createOnProxyResHandler,
ProxyResHandlerWithBody,
} from "./middleware/response";
let modelsCache: any = null;
let modelsCacheTime = 0;
function getModelsResponse() {
if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
return modelsCache;
}
// https://platform.openai.com/docs/models/overview
const knownModels = [
"gpt-4",
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-instruct",
"gpt-3.5-turbo-instruct-0914",
"text-embedding-ada-002",
];
let available = new Set<OpenAIModelFamily>();
for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "openai") continue;
key.modelFamilies.forEach((family) =>
available.add(family as OpenAIModelFamily)
);
}
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x)));
const models = knownModels
.map((id) => ({
id,
object: "model",
created: new Date().getTime(),
owned_by: "openai",
permission: [
{
id: "modelperm-" + id,
object: "model_permission",
created: new Date().getTime(),
organization: "*",
group: null,
is_blocking: false,
},
],
root: id,
parent: null,
}))
.filter((model) => available.has(getOpenAIModelFamily(model.id)));
modelsCache = { object: "list", data: models };
modelsCacheTime = new Date().getTime();
return modelsCache;
}
const handleModelRequest: RequestHandler = (_req, res) => {
res.status(200).json(getModelsResponse());
};
/** Handles some turbo-instruct special cases. */
const rewriteForTurboInstruct: RequestPreprocessor = (req) => {
// /v1/turbo-instruct/v1/chat/completions accepts either prompt or messages.
// Depending on whichever is provided, we need to set the inbound format so
// it is transformed correctly later.
if (req.body.prompt && !req.body.messages) {
req.inboundApi = "openai-text";
} else if (req.body.messages && !req.body.prompt) {
req.inboundApi = "openai";
// Set model for user since they're using a client which is not aware of
// turbo-instruct.
req.body.model = "gpt-3.5-turbo-instruct";
} else {
throw new Error("`prompt` OR `messages` must be provided");
}
req.url = "/v1/completions";
};
const rewriteRequest = (
proxyReq: http.ClientRequest,
req: Request,
res: http.ServerResponse
) => {
const rewriterPipeline = [
applyQuotaLimits,
addKey,
languageFilter,
limitCompletions,
blockZoomerOrigins,
stripHeaders,
finalizeBody,
];
try {
for (const rewriter of rewriterPipeline) {
rewriter(proxyReq, req, res, {});
}
} catch (error) {
req.log.error(error, "Error while executing proxy rewriter");
proxyReq.destroy(error as Error);
}
};
const rewriteEmbeddingsRequest = (
proxyReq: http.ClientRequest,
req: Request,
res: http.ServerResponse
) => {
const rewriterPipeline = [
addKeyForEmbeddingsRequest,
stripHeaders,
finalizeBody,
];
req.body = { input: req.body.input, model: "text-embedding-ada-002" };
try {
for (const rewriter of rewriterPipeline) {
rewriter(proxyReq, req, res, {});
}
} catch (error) {
req.log.error(error, "Error while executing proxy embeddings rewriter");
proxyReq.destroy(error as Error);
}
};
const openaiResponseHandler: ProxyResHandlerWithBody = async (
_proxyRes,
req,
res,
body
) => {
if (typeof body !== "object") {
throw new Error("Expected body to be an object");
}
if (config.promptLogging) {
const host = req.get("host");
body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
}
if (req.outboundApi === "openai-text" && req.inboundApi === "openai") {
req.log.info("Transforming Turbo-Instruct response to Chat format");
body = transformTurboInstructResponse(body);
}
// TODO: Remove once tokenization is stable
if (req.debug) {
body.proxy_tokenizer_debug_info = req.debug;
}
res.status(200).json(body);
};
/** Only used for non-streaming responses. */
function transformTurboInstructResponse(
turboInstructBody: Record<string, any>
): Record<string, any> {
const transformed = { ...turboInstructBody };
transformed.choices = [
{
...turboInstructBody.choices[0],
message: {
role: "assistant",
content: turboInstructBody.choices[0].text.trim(),
},
},
];
delete transformed.choices[0].text;
return transformed;
}
const openaiProxy = createQueueMiddleware(
createProxyMiddleware({
target: "https://api.openai.com",
changeOrigin: true,
on: {
proxyReq: rewriteRequest,
proxyRes: createOnProxyResHandler([openaiResponseHandler]),
error: handleProxyError,
},
selfHandleResponse: true,
logger,
})
);
const openaiEmbeddingsProxy = createProxyMiddleware({
target: "https://api.openai.com",
changeOrigin: true,
on: { proxyReq: rewriteEmbeddingsRequest, error: handleProxyError },
selfHandleResponse: false,
logger,
});
const openaiRouter = Router();
// Fix paths because clients don't consistently use the /v1 prefix.
openaiRouter.use((req, _res, next) => {
if (!req.path.startsWith("/v1/")) {
req.url = `/v1${req.url}`;
}
next();
});
openaiRouter.get("/v1/models", handleModelRequest);
// Native text completion endpoint, only for turbo-instruct.
openaiRouter.post(
"/v1/completions",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai-text",
outApi: "openai-text",
service: "openai",
}),
openaiProxy
);
// turbo-instruct compatibility endpoint, accepts either prompt or messages
openaiRouter.post(
/\/v1\/turbo-instruct\/(v1\/)?chat\/completions/,
ipLimiter,
createPreprocessorMiddleware(
{ inApi: "openai", outApi: "openai-text", service: "openai" },
{
beforeTransform: [rewriteForTurboInstruct],
afterTransform: [forceModel("gpt-3.5-turbo-instruct")],
}
),
openaiProxy
);
// General chat completion endpoint. Turbo-instruct is not supported here.
openaiRouter.post(
"/v1/chat/completions",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai",
outApi: "openai",
service: "openai",
}),
openaiProxy
);
// Embeddings endpoint.
openaiRouter.post(
"/v1/embeddings",
ipLimiter,
createEmbeddingsPreprocessorMiddleware(),
openaiEmbeddingsProxy
);
export const openai = openaiRouter;