implements kobold endpoint
This commit is contained in:
parent
1ba72582da
commit
fd75f5c0c6
17
src/keys.ts
17
src/keys.ts
|
@ -128,4 +128,19 @@ function incrementPrompt(keyHash?: string) {
|
|||
key.promptCount++;
|
||||
}
|
||||
|
||||
export const keys = { init, list, get, anyAvailable, disable, incrementPrompt };
|
||||
function downgradeKey(keyHash?: string) {
|
||||
if (!keyHash) return;
|
||||
logger.warn({ key: keyHash }, "Downgrading key to GPT-3.5.");
|
||||
const key = keyPool.find((k) => k.hash === keyHash)!;
|
||||
key.isGpt4 = false;
|
||||
}
|
||||
|
||||
export const keys = {
|
||||
init,
|
||||
list,
|
||||
get,
|
||||
anyAvailable,
|
||||
disable,
|
||||
incrementPrompt,
|
||||
downgradeKey,
|
||||
};
|
||||
|
|
|
@ -4,17 +4,22 @@ import * as httpProxy from "http-proxy";
|
|||
import { logger } from "../logger";
|
||||
import { keys } from "../keys";
|
||||
|
||||
const MODEL_ROUTES = ["/v1/chat/completions"];
|
||||
export const QUOTA_ROUTES = ["/v1/chat/completions"];
|
||||
|
||||
/** Handle and rewrite response to proxied requests to OpenAI */
|
||||
// TODO: This is a mess, fix it
|
||||
export const handleResponse = (
|
||||
/** Check for errors in the response from OpenAI and handle them. */
|
||||
// This is a mess of promises, callbacks and event listeners because none of
|
||||
// this low-level nodejs http shit is async/await friendly.
|
||||
export const handleDownstreamErrors = (
|
||||
proxyRes: http.IncomingMessage,
|
||||
req: Request,
|
||||
res: Response
|
||||
) => {
|
||||
const statusCode = proxyRes.statusCode || 500;
|
||||
if (statusCode >= 400) {
|
||||
const promise = new Promise<void>((resolve, reject) => {
|
||||
const statusCode = proxyRes.statusCode || 500;
|
||||
if (statusCode < 400) {
|
||||
return resolve();
|
||||
}
|
||||
|
||||
let body = "";
|
||||
proxyRes.on("data", (chunk) => (body += chunk));
|
||||
proxyRes.on("end", () => {
|
||||
|
@ -26,10 +31,15 @@ export const handleResponse = (
|
|||
: "There are no more keys available.";
|
||||
try {
|
||||
errorPayload = JSON.parse(body);
|
||||
} catch (err) {
|
||||
logger.error({ error: err }, errorPayload.error);
|
||||
res.json(errorPayload);
|
||||
return;
|
||||
} catch (parseError) {
|
||||
const error = parseError as Error;
|
||||
logger.error({ error }, "Problem parsing error from OpenAI");
|
||||
res.json({
|
||||
error: "Problem parsing error from OpenAI",
|
||||
body: body,
|
||||
trace: error.stack,
|
||||
});
|
||||
return reject(error);
|
||||
}
|
||||
|
||||
if (statusCode === 401) {
|
||||
|
@ -57,24 +67,34 @@ export const handleResponse = (
|
|||
`OpenAI rate limit exceeded or model overloaded. Keyhash ${req.key?.hash}`
|
||||
);
|
||||
}
|
||||
} else if (statusCode === 404) {
|
||||
// Most likely model not found
|
||||
if (errorPayload.error?.code === "model_not_found") {
|
||||
if (req.key!.isGpt4) {
|
||||
keys.downgradeKey(req.key?.hash);
|
||||
}
|
||||
errorPayload.proxy_note =
|
||||
"This key may have been incorrectly flagged as gpt-4 enabled.";
|
||||
}
|
||||
} else {
|
||||
logger.error(
|
||||
{ error: errorPayload },
|
||||
`Unexpected error from OpenAI. Keyhash ${req.key?.hash}`
|
||||
);
|
||||
}
|
||||
|
||||
res.status(statusCode).json(errorPayload);
|
||||
reject(errorPayload);
|
||||
});
|
||||
} else {
|
||||
// Increment key's usage count if request was to a quota'd route
|
||||
if (MODEL_ROUTES.includes(req.path)) {
|
||||
keys.incrementPrompt(req.key?.hash);
|
||||
}
|
||||
|
||||
Object.keys(proxyRes.headers).forEach((key) => {
|
||||
res.setHeader(key, proxyRes.headers[key] as string);
|
||||
});
|
||||
proxyRes.pipe(res);
|
||||
}
|
||||
});
|
||||
return promise;
|
||||
};
|
||||
|
||||
export const onError: httpProxy.ErrorCallback = (err, _req, res) => {
|
||||
/** Handles errors in the request rewrite pipeline before proxying to OpenAI. */
|
||||
export const handleInternalError: httpProxy.ErrorCallback = (
|
||||
err,
|
||||
_req,
|
||||
res
|
||||
) => {
|
||||
logger.error({ error: err }, "Error proxying to OpenAI");
|
||||
|
||||
(res as http.ServerResponse).writeHead(500, {
|
||||
|
@ -91,3 +111,18 @@ export const onError: httpProxy.ErrorCallback = (err, _req, res) => {
|
|||
})
|
||||
);
|
||||
};
|
||||
|
||||
export const incrementKeyUsage = (req: Request) => {
|
||||
if (QUOTA_ROUTES.includes(req.path)) {
|
||||
keys.incrementPrompt(req.key?.hash);
|
||||
}
|
||||
};
|
||||
|
||||
export const copyHttpHeaders = (
|
||||
proxyRes: http.IncomingMessage,
|
||||
res: Response
|
||||
) => {
|
||||
Object.keys(proxyRes.headers).forEach((key) => {
|
||||
res.setHeader(key, proxyRes.headers[key] as string);
|
||||
});
|
||||
};
|
||||
|
|
|
@ -1,6 +1,106 @@
|
|||
import { Request, Response, NextFunction } from "express";
|
||||
/* Pretends to be a KoboldAI API endpoint and translates incoming Kobold
|
||||
requests to OpenAI API equivalents. */
|
||||
|
||||
export const kobold = (req: Request, res: Response, next: NextFunction) => {
|
||||
// TODO: Implement kobold
|
||||
res.status(501).json({ error: "Not implemented" });
|
||||
import { Request, Response, Router } from "express";
|
||||
import http from "http";
|
||||
import { createProxyMiddleware } from "http-proxy-middleware";
|
||||
import { logger } from "../logger";
|
||||
import {
|
||||
copyHttpHeaders,
|
||||
handleDownstreamErrors,
|
||||
handleInternalError,
|
||||
incrementKeyUsage,
|
||||
} from "./common";
|
||||
import {
|
||||
addKey,
|
||||
disableStream,
|
||||
finalizeBody,
|
||||
languageFilter,
|
||||
limitOutputTokens,
|
||||
} from "./rewriters";
|
||||
import { transformKoboldPayload } from "./rewriters/transform-kobold-payload";
|
||||
|
||||
export const handleModelRequest = (_req: Request, res: Response) => {
|
||||
res.status(200).json({ result: "Connected to OpenAI reverse proxy" });
|
||||
};
|
||||
|
||||
export const handleSoftPromptsRequest = (_req: Request, res: Response) => {
|
||||
res.status(200).json({ soft_prompts_list: [] });
|
||||
};
|
||||
|
||||
const rewriteRequest = (
|
||||
proxyReq: http.ClientRequest,
|
||||
req: Request,
|
||||
res: Response
|
||||
) => {
|
||||
const rewriterPipeline = [
|
||||
addKey,
|
||||
transformKoboldPayload,
|
||||
languageFilter,
|
||||
disableStream,
|
||||
limitOutputTokens,
|
||||
finalizeBody,
|
||||
];
|
||||
|
||||
try {
|
||||
for (const rewriter of rewriterPipeline) {
|
||||
rewriter(proxyReq, req, res, {});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(error, "Error while executing proxy rewriter");
|
||||
proxyReq.destroy(error as Error);
|
||||
}
|
||||
};
|
||||
|
||||
const handleProxiedResponse = async (
|
||||
proxyRes: http.IncomingMessage,
|
||||
req: Request,
|
||||
res: Response
|
||||
) => {
|
||||
try {
|
||||
await handleDownstreamErrors(proxyRes, req, res);
|
||||
} catch (error) {
|
||||
// Handler takes over the response, we're done here.
|
||||
return;
|
||||
}
|
||||
incrementKeyUsage(req);
|
||||
copyHttpHeaders(proxyRes, res);
|
||||
|
||||
// For Kobold we need to consume the response body to turn it into a KoboldAI
|
||||
// response payload.
|
||||
let body = "";
|
||||
proxyRes.on("data", (chunk) => (body += chunk));
|
||||
proxyRes.on("end", () => {
|
||||
const response = JSON.parse(body);
|
||||
const koboldResponse = {
|
||||
results: [{ text: response.choices[0].message.content }],
|
||||
};
|
||||
res.status(200).json(koboldResponse);
|
||||
});
|
||||
};
|
||||
|
||||
const koboldOaiProxy = createProxyMiddleware({
|
||||
target: "https://api.openai.com",
|
||||
changeOrigin: true,
|
||||
pathRewrite: {
|
||||
"^/api/v1/generate": "/v1/chat/completions",
|
||||
},
|
||||
on: {
|
||||
proxyReq: rewriteRequest,
|
||||
proxyRes: handleProxiedResponse,
|
||||
error: handleInternalError,
|
||||
},
|
||||
selfHandleResponse: true,
|
||||
logger,
|
||||
});
|
||||
|
||||
const koboldRouter = Router();
|
||||
koboldRouter.get("/api/v1/model", handleModelRequest);
|
||||
koboldRouter.get("/api/v1/config/soft_prompts_list", handleSoftPromptsRequest);
|
||||
koboldRouter.post("/api/v1/generate", koboldOaiProxy);
|
||||
koboldRouter.use((req, res) => {
|
||||
logger.warn(`Unhandled kobold request: ${req.method} ${req.path}`);
|
||||
res.status(404).json({ error: "Not found" });
|
||||
});
|
||||
|
||||
export const kobold = koboldRouter;
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
import { Request, Router } from "express";
|
||||
import { Request, Response, Router } from "express";
|
||||
import * as http from "http";
|
||||
import { createProxyMiddleware } from "http-proxy-middleware";
|
||||
import { logger } from "../logger";
|
||||
import { handleResponse, onError } from "./common";
|
||||
import {
|
||||
handleDownstreamErrors,
|
||||
handleInternalError,
|
||||
incrementKeyUsage,
|
||||
copyHttpHeaders,
|
||||
} from "./common";
|
||||
import { ipLimiter } from "./rate-limit";
|
||||
import {
|
||||
addKey,
|
||||
|
@ -35,13 +40,29 @@ const rewriteRequest = (
|
|||
}
|
||||
};
|
||||
|
||||
const handleProxiedResponse = async (
|
||||
proxyRes: http.IncomingMessage,
|
||||
req: Request,
|
||||
res: Response
|
||||
) => {
|
||||
try {
|
||||
await handleDownstreamErrors(proxyRes, req, res);
|
||||
} catch (error) {
|
||||
// Handler takes over the response, we're done here.
|
||||
return;
|
||||
}
|
||||
incrementKeyUsage(req);
|
||||
copyHttpHeaders(proxyRes, res);
|
||||
proxyRes.pipe(res);
|
||||
};
|
||||
|
||||
const openaiProxy = createProxyMiddleware({
|
||||
target: "https://api.openai.com",
|
||||
changeOrigin: true,
|
||||
on: {
|
||||
proxyReq: rewriteRequest,
|
||||
proxyRes: handleResponse,
|
||||
error: onError,
|
||||
proxyRes: handleProxiedResponse,
|
||||
error: handleInternalError,
|
||||
},
|
||||
selfHandleResponse: true,
|
||||
logger,
|
||||
|
@ -56,5 +77,4 @@ openaiRouter.use((req, res) => {
|
|||
res.status(404).json({ error: "Not found" });
|
||||
});
|
||||
|
||||
|
||||
export const openai = openaiRouter;
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
import { config } from "../../config";
|
||||
import type { ExpressHttpProxyReqCallback } from ".";
|
||||
import { logger } from "../../logger";
|
||||
|
||||
// Kobold requests look like this:
|
||||
// body:
|
||||
// {
|
||||
// prompt: "Aqua is character from Konosuba anime. Aqua is a goddess, before life in the Fantasy World, she was a goddess of water who guided humans to the afterlife. Aqua looks like young woman with beauty no human could match. Aqua has light blue hair, blue eyes, slim figure, long legs, wide hips, blue waist-long hair that is partially tied into a loop with a spherical clip. Aqua's measurements are 83-56-83 cm. Aqua's height 157cm. Aqua wears sleeveless dark-blue dress with white trimmings, extremely short dark blue miniskirt, green bow around her chest with a blue gem in the middle, detached white sleeves with blue and golden trimmings, thigh-high blue heeled boots over white stockings with blue trimmings. Aqua is very strong in water magic, but a little stupid, so she does not always use it to the place. Aqua is high-spirited, cheerful, carefree. Aqua rarely thinks about the consequences of her actions and always acts or speaks on her whims. Because very easy to taunt Aqua with jeers or lure her with praises.\n" +
|
||||
// "Aqua's personality: high-spirited, likes to party, carefree, cheerful.\n" +
|
||||
// 'Circumstances and context of the dialogue: Aqua is standing in the city square and is looking for new followers\n' +
|
||||
// 'This is how Aqua should talk\n' +
|
||||
// 'You: Hi Aqua, I heard you like to spend time in the pub.\n' +
|
||||
// "Aqua: *excitedly* Oh my goodness, yes! I just love spending time at the pub! It's so much fun to talk to all the adventurers and hear about their exciting adventures! And you are?\n" +
|
||||
// "You: I'm a new here and I wanted to ask for your advice.\n" +
|
||||
// 'Aqua: *giggles* Oh, advice! I love giving advice! And in gratitude for that, treat me to a drink! *gives signals to the bartender*\n' +
|
||||
// 'This is how Aqua should talk\n' +
|
||||
// 'You: Hello\n' +
|
||||
// "Aqua: *excitedly* Hello there, dear! Are you new to Axel? Don't worry, I, Aqua the goddess of water, am here to help you! Do you need any assistance? And may I say, I look simply radiant today! *strikes a pose and looks at you with puppy eyes*\n" +
|
||||
// '\n' +
|
||||
// 'Then the roleplay chat between You and Aqua begins.\n' +
|
||||
// "Aqua: *She is in the town square of a city named Axel. It's morning on a Saturday and she suddenly notices a person who looks like they don't know what they're doing. She approaches him and speaks* \n" +
|
||||
// '\n' +
|
||||
// `"Are you new here? Do you need help? Don't worry! I, Aqua the Goddess of Water, shall help you! Do I look beautiful?" \n` +
|
||||
// '\n' +
|
||||
// '*She strikes a pose and looks at him with puppy eyes.*\n' +
|
||||
// 'You: test\n' +
|
||||
// 'You: test\n' +
|
||||
// 'You: t\n' +
|
||||
// 'You: test\n',
|
||||
// use_story: false,
|
||||
// use_memory: false,
|
||||
// use_authors_note: false,
|
||||
// use_world_info: false,
|
||||
// max_context_length: 2048,
|
||||
// max_length: 180,
|
||||
// rep_pen: 1.1,
|
||||
// rep_pen_range: 1024,
|
||||
// rep_pen_slope: 0.9,
|
||||
// temperature: 0.65,
|
||||
// tfs: 0.9,
|
||||
// top_a: 0,
|
||||
// top_k: 0,
|
||||
// top_p: 0.9,
|
||||
// typical: 1,
|
||||
// sampler_order: [
|
||||
// 6, 0, 1, 2,
|
||||
// 3, 4, 5
|
||||
// ],
|
||||
// singleline: false
|
||||
// }
|
||||
|
||||
// OpenAI expects this body:
|
||||
// { model: 'gpt-3.5-turbo', temperature: 0.65, top_p: 0.9, max_tokens: 180, messages }
|
||||
// there's also a frequency_penalty but it's not clear how that maps to kobold's
|
||||
// rep_pen.
|
||||
|
||||
// messages is an array of { role: "system" | "assistant" | "user", content: ""}
|
||||
// kobold only sends us the entire prompt. we can try to split the last line and
|
||||
// use that as the user message and put the rest in the system message
|
||||
// ideally we'd split the history into user and assistant messages, but that's
|
||||
// too much work for now
|
||||
|
||||
/** Transforms a KoboldAI payload into an OpenAI payload. */
|
||||
export const transformKoboldPayload: ExpressHttpProxyReqCallback = (
|
||||
_proxyReq,
|
||||
req
|
||||
) => {
|
||||
const { body } = req;
|
||||
const { prompt, max_length, rep_pen, top_p, temperature } = body;
|
||||
|
||||
const promptLines = prompt.split("\n");
|
||||
const lastLine = promptLines.pop();
|
||||
const messages = [
|
||||
{ role: "system", content: promptLines.join("\n") },
|
||||
{ role: "user", content: lastLine },
|
||||
];
|
||||
|
||||
// Kobold doesn't select a model. If we were assigned a key that supports
|
||||
// gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly
|
||||
// assigned, we'll get an error from OpenAI but the key will be downgraded
|
||||
// for the next request.
|
||||
const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo";
|
||||
const newBody = {
|
||||
model,
|
||||
temperature,
|
||||
top_p,
|
||||
frequency_penalty: rep_pen, // remove this if model turns schizo
|
||||
max_tokens: max_length,
|
||||
messages,
|
||||
};
|
||||
req.body = newBody;
|
||||
};
|
|
@ -15,4 +15,19 @@ router.use(auth);
|
|||
router.use("/kobold", kobold);
|
||||
router.use("/openai", openai);
|
||||
|
||||
// SillyTavern annoyingly just disregards the path in whatever URL users input,
|
||||
// so requests come in at /api/v1. We need to rewrite them to
|
||||
// /proxy/kobold/api/v1 so the request is routed to the correct handler.
|
||||
function rewriteTavernRequests(
|
||||
req: express.Request,
|
||||
_res: express.Response,
|
||||
next: express.NextFunction
|
||||
) {
|
||||
if (req.path.startsWith("/api/v1")) {
|
||||
req.url = req.url.replace("/api/v1", "/proxy/kobold/api/v1");
|
||||
}
|
||||
next();
|
||||
}
|
||||
|
||||
export { rewriteTavernRequests };
|
||||
export { router as proxyRouter };
|
||||
|
|
|
@ -4,14 +4,21 @@ import cors from "cors";
|
|||
import pinoHttp from "pino-http";
|
||||
import { logger } from "./logger";
|
||||
import { keys } from "./keys";
|
||||
import { proxyRouter } from "./proxy/routes";
|
||||
import { proxyRouter, rewriteTavernRequests } from "./proxy/routes";
|
||||
import { handleInfoPage } from "./info-page";
|
||||
|
||||
const PORT = config.port;
|
||||
|
||||
const app = express();
|
||||
// middleware
|
||||
app.use(pinoHttp({ logger }));
|
||||
app.use("/", rewriteTavernRequests);
|
||||
app.use(
|
||||
pinoHttp({
|
||||
logger,
|
||||
// SillyTavern spams the hell out of this endpoint so don't log it
|
||||
autoLogging: { ignore: (req) => req.url === "/proxy/kobold/api/v1/model" },
|
||||
})
|
||||
);
|
||||
app.use(cors());
|
||||
app.use(
|
||||
express.json({ limit: "10mb" }),
|
||||
|
|
Loading…
Reference in New Issue