fixes kobold key selection, I think
This commit is contained in:
parent
f37a2fa7fa
commit
885487a61d
|
@ -1,4 +1,5 @@
|
||||||
import { KeyPool } from "./key-pool";
|
import { KeyPool } from "./key-pool";
|
||||||
|
|
||||||
export type { Key } from "./key-pool";
|
export type { Key, Model } from "./key-pool";
|
||||||
export const keyPool = new KeyPool();
|
export const keyPool = new KeyPool();
|
||||||
|
export { SUPPORTED_MODELS } from "./key-pool";
|
||||||
|
|
|
@ -6,6 +6,27 @@ import { config } from "../config";
|
||||||
import { logger } from "../logger";
|
import { logger } from "../logger";
|
||||||
import { KeyChecker } from "./key-checker";
|
import { KeyChecker } from "./key-checker";
|
||||||
|
|
||||||
|
// I made too many assumptions about OpenAI being the only provider and now this
|
||||||
|
// is a mess with the addition of Anthropic. Server will have to be restricted
|
||||||
|
// to operating on one provider at a time until I can refactor this to use
|
||||||
|
// some KeyProvider interface.
|
||||||
|
|
||||||
|
// TODO: Move this stuff somewhere else, it's not key management.
|
||||||
|
export type Model = OpenAIModel | AnthropicModel;
|
||||||
|
export type OpenAIModel =
|
||||||
|
| "gpt-3.5-turbo"
|
||||||
|
| "gpt-4"
|
||||||
|
export type AnthropicModel =
|
||||||
|
| "claude-v1"
|
||||||
|
| "claude-instant-v1"
|
||||||
|
export const SUPPORTED_MODELS: readonly Model[] = [
|
||||||
|
"gpt-3.5-turbo",
|
||||||
|
"gpt-4",
|
||||||
|
"claude-v1",
|
||||||
|
"claude-instant-v1",
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
|
||||||
export type Key = {
|
export type Key = {
|
||||||
/** The OpenAI API key itself. */
|
/** The OpenAI API key itself. */
|
||||||
key: string;
|
key: string;
|
||||||
|
@ -91,32 +112,29 @@ export class KeyPool {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public get(model: string) {
|
public get(model: Model) {
|
||||||
const needsGpt4Key = model.startsWith("gpt-4");
|
const needGpt4 = model.startsWith("gpt-4");
|
||||||
const availableKeys = this.keys
|
const availableKeys = this.keys
|
||||||
.filter((key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4))
|
.filter((key) => !key.isDisabled && (!needGpt4 || key.isGpt4))
|
||||||
.sort((a, b) => a.lastUsed - b.lastUsed);
|
.sort((a, b) => a.lastUsed - b.lastUsed);
|
||||||
if (availableKeys.length === 0) {
|
if (availableKeys.length === 0) {
|
||||||
let message = "No keys available. Please add more keys.";
|
let message = "No keys available. Please add more keys.";
|
||||||
if (needsGpt4Key) {
|
if (needGpt4) {
|
||||||
message =
|
message =
|
||||||
"No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
|
"No GPT-4 keys available. Please add more keys or select a non-GPT-4 model.";
|
||||||
}
|
}
|
||||||
this.log.error(message);
|
|
||||||
throw new Error(message);
|
throw new Error(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prioritize trial keys
|
// Prioritize trial keys
|
||||||
const trialKeys = availableKeys.filter((key) => key.isTrial);
|
const trialKeys = availableKeys.filter((key) => key.isTrial);
|
||||||
if (trialKeys.length > 0) {
|
if (trialKeys.length > 0) {
|
||||||
this.log.info({ key: trialKeys[0].hash }, "Using trial key");
|
|
||||||
trialKeys[0].lastUsed = Date.now();
|
trialKeys[0].lastUsed = Date.now();
|
||||||
return trialKeys[0];
|
return trialKeys[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, return the oldest key
|
// Otherwise, return the oldest key
|
||||||
const oldestKey = availableKeys[0];
|
const oldestKey = availableKeys[0];
|
||||||
this.log.info({ key: oldestKey.hash }, "Assigning key to request.");
|
|
||||||
oldestKey.lastUsed = Date.now();
|
oldestKey.lastUsed = Date.now();
|
||||||
return { ...oldestKey };
|
return { ...oldestKey };
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,45 @@
|
||||||
import type { ExpressHttpProxyReqCallback } from ".";
|
import type { ExpressHttpProxyReqCallback } from ".";
|
||||||
import { Key, keyPool } from "../../key-management";
|
import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management";
|
||||||
|
|
||||||
/** Add an OpenAI key from the pool to the request. */
|
/** Add an OpenAI key from the pool to the request. */
|
||||||
export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
|
export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
|
||||||
let assignedKey: Key;
|
let assignedKey: Key;
|
||||||
assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!;
|
|
||||||
|
// Not all clients request a particular model.
|
||||||
|
// If they request a model, just use that.
|
||||||
|
// If they don't request a model, use a GPT-4 key if there is an active one,
|
||||||
|
// otherwise use a GPT-3.5 key.
|
||||||
|
|
||||||
|
// TODO: Anthropic mode should prioritize Claude over Claude Instant.
|
||||||
|
// Each provider needs to define some priority order for their models.
|
||||||
|
|
||||||
|
if (bodyHasModel(req.body)) {
|
||||||
|
assignedKey = keyPool.get(req.body.model);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
assignedKey = keyPool.get("gpt-4");
|
||||||
|
} catch {
|
||||||
|
assignedKey = keyPool.get("gpt-3.5-turbo");
|
||||||
|
}
|
||||||
|
}
|
||||||
req.key = assignedKey;
|
req.key = assignedKey;
|
||||||
|
req.log.info(
|
||||||
|
{
|
||||||
|
key: assignedKey.hash,
|
||||||
|
model: req.body?.model,
|
||||||
|
isGpt4: assignedKey.isGpt4,
|
||||||
|
},
|
||||||
|
"Assigned key to request"
|
||||||
|
);
|
||||||
|
|
||||||
|
// TODO: Requests to Anthropic models use `X-API-Key`.
|
||||||
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
|
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function bodyHasModel(body: any): body is { model: Model } {
|
||||||
|
// Model names can have suffixes indicating the frozen release version but
|
||||||
|
// OpenAI and Anthropic will use the latest version if you omit the suffix.
|
||||||
|
const isSupportedModel = (model: string) =>
|
||||||
|
SUPPORTED_MODELS.some((supported) => model.startsWith(supported));
|
||||||
|
return typeof body?.model === "string" && isSupportedModel(body.model);
|
||||||
|
}
|
||||||
|
|
|
@ -75,14 +75,10 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = (
|
||||||
{ role: "user", content: lastLine },
|
{ role: "user", content: lastLine },
|
||||||
];
|
];
|
||||||
|
|
||||||
// Kobold doesn't select a model. If we were assigned a key that supports
|
// Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4
|
||||||
// gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly
|
// key, use that. Otherwise, use GPT-3.5-turbo.
|
||||||
// assigned, we'll get an error from OpenAI but the key will be downgraded
|
|
||||||
// for the next request.
|
|
||||||
|
|
||||||
// const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; //TODO: this is fucked, fix it later
|
const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo";
|
||||||
|
|
||||||
const model = "gpt-3.5-turbo";
|
|
||||||
const newBody = {
|
const newBody = {
|
||||||
model,
|
model,
|
||||||
temperature,
|
temperature,
|
||||||
|
|
Loading…
Reference in New Issue