fixes kobold key selection, I think
This commit is contained in:
parent
f37a2fa7fa
commit
885487a61d
|
@ -1,4 +1,5 @@
|
|||
import { KeyPool } from "./key-pool";
|
||||
|
||||
export type { Key } from "./key-pool";
|
||||
export type { Key, Model } from "./key-pool";
|
||||
export const keyPool = new KeyPool();
|
||||
export { SUPPORTED_MODELS } from "./key-pool";
|
||||
|
|
|
@ -6,6 +6,27 @@ import { config } from "../config";
|
|||
import { logger } from "../logger";
|
||||
import { KeyChecker } from "./key-checker";
|
||||
|
||||
// I made too many assumptions about OpenAI being the only provider and now this
|
||||
// is a mess with the addition of Anthropic. Server will have to be restricted
|
||||
// to operating on one provider at a time until I can refactor this to use
|
||||
// some KeyProvider interface.
|
||||
|
||||
// TODO: Move this stuff somewhere else, it's not key management.
|
||||
export type Model = OpenAIModel | AnthropicModel;
|
||||
export type OpenAIModel =
|
||||
| "gpt-3.5-turbo"
|
||||
| "gpt-4"
|
||||
export type AnthropicModel =
|
||||
| "claude-v1"
|
||||
| "claude-instant-v1"
|
||||
export const SUPPORTED_MODELS: readonly Model[] = [
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-4",
|
||||
"claude-v1",
|
||||
"claude-instant-v1",
|
||||
] as const;
|
||||
|
||||
|
||||
export type Key = {
|
||||
/** The OpenAI API key itself. */
|
||||
key: string;
|
||||
|
@ -91,32 +112,29 @@ export class KeyPool {
|
|||
});
|
||||
}
|
||||
|
||||
public get(model: string) {
|
||||
const needsGpt4Key = model.startsWith("gpt-4");
|
||||
public get(model: Model) {
|
||||
const needGpt4 = model.startsWith("gpt-4");
|
||||
const availableKeys = this.keys
|
||||
.filter((key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4))
|
||||
.filter((key) => !key.isDisabled && (!needGpt4 || key.isGpt4))
|
||||
.sort((a, b) => a.lastUsed - b.lastUsed);
|
||||
if (availableKeys.length === 0) {
|
||||
let message = "No keys available. Please add more keys.";
|
||||
if (needsGpt4Key) {
|
||||
if (needGpt4) {
|
||||
message =
|
||||
"No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
|
||||
"No GPT-4 keys available. Please add more keys or select a non-GPT-4 model.";
|
||||
}
|
||||
this.log.error(message);
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
// Prioritize trial keys
|
||||
const trialKeys = availableKeys.filter((key) => key.isTrial);
|
||||
if (trialKeys.length > 0) {
|
||||
this.log.info({ key: trialKeys[0].hash }, "Using trial key");
|
||||
trialKeys[0].lastUsed = Date.now();
|
||||
return trialKeys[0];
|
||||
}
|
||||
|
||||
// Otherwise, return the oldest key
|
||||
const oldestKey = availableKeys[0];
|
||||
this.log.info({ key: oldestKey.hash }, "Assigning key to request.");
|
||||
oldestKey.lastUsed = Date.now();
|
||||
return { ...oldestKey };
|
||||
}
|
||||
|
|
|
@ -1,10 +1,45 @@
|
|||
import type { ExpressHttpProxyReqCallback } from ".";
|
||||
import { Key, keyPool } from "../../key-management";
|
||||
import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management";
|
||||
|
||||
/** Add an OpenAI key from the pool to the request. */
|
||||
export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
|
||||
let assignedKey: Key;
|
||||
assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!;
|
||||
|
||||
// Not all clients request a particular model.
|
||||
// If they request a model, just use that.
|
||||
// If they don't request a model, use a GPT-4 key if there is an active one,
|
||||
// otherwise use a GPT-3.5 key.
|
||||
|
||||
// TODO: Anthropic mode should prioritize Claude over Claude Instant.
|
||||
// Each provider needs to define some priority order for their models.
|
||||
|
||||
if (bodyHasModel(req.body)) {
|
||||
assignedKey = keyPool.get(req.body.model);
|
||||
} else {
|
||||
try {
|
||||
assignedKey = keyPool.get("gpt-4");
|
||||
} catch {
|
||||
assignedKey = keyPool.get("gpt-3.5-turbo");
|
||||
}
|
||||
}
|
||||
req.key = assignedKey;
|
||||
req.log.info(
|
||||
{
|
||||
key: assignedKey.hash,
|
||||
model: req.body?.model,
|
||||
isGpt4: assignedKey.isGpt4,
|
||||
},
|
||||
"Assigned key to request"
|
||||
);
|
||||
|
||||
// TODO: Requests to Anthropic models use `X-API-Key`.
|
||||
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
|
||||
};
|
||||
|
||||
function bodyHasModel(body: any): body is { model: Model } {
|
||||
// Model names can have suffixes indicating the frozen release version but
|
||||
// OpenAI and Anthropic will use the latest version if you omit the suffix.
|
||||
const isSupportedModel = (model: string) =>
|
||||
SUPPORTED_MODELS.some((supported) => model.startsWith(supported));
|
||||
return typeof body?.model === "string" && isSupportedModel(body.model);
|
||||
}
|
||||
|
|
|
@ -75,14 +75,10 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = (
|
|||
{ role: "user", content: lastLine },
|
||||
];
|
||||
|
||||
// Kobold doesn't select a model. If we were assigned a key that supports
|
||||
// gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly
|
||||
// assigned, we'll get an error from OpenAI but the key will be downgraded
|
||||
// for the next request.
|
||||
// Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4
|
||||
// key, use that. Otherwise, use GPT-3.5-turbo.
|
||||
|
||||
// const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; //TODO: this is fucked, fix it later
|
||||
|
||||
const model = "gpt-3.5-turbo";
|
||||
const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo";
|
||||
const newBody = {
|
||||
model,
|
||||
temperature,
|
||||
|
|
Loading…
Reference in New Issue