fixes kobold key selection, I think

This commit is contained in:
nai-degen 2023-04-10 19:50:42 -07:00 committed by nai-degen
parent f37a2fa7fa
commit 885487a61d
4 changed files with 68 additions and 18 deletions

View File

@ -1,4 +1,5 @@
import { KeyPool } from "./key-pool";
export type { Key } from "./key-pool";
export type { Key, Model } from "./key-pool";
export const keyPool = new KeyPool();
export { SUPPORTED_MODELS } from "./key-pool";

View File

@ -6,6 +6,27 @@ import { config } from "../config";
import { logger } from "../logger";
import { KeyChecker } from "./key-checker";
// I made too many assumptions about OpenAI being the only provider and now this
// is a mess with the addition of Anthropic. Server will have to be restricted
// to operating on one provider at a time until I can refactor this to use
// some KeyProvider interface.
// TODO: Move this stuff somewhere else, it's not key management.
export type Model = OpenAIModel | AnthropicModel;
export type OpenAIModel =
| "gpt-3.5-turbo"
| "gpt-4"
export type AnthropicModel =
| "claude-v1"
| "claude-instant-v1"
export const SUPPORTED_MODELS: readonly Model[] = [
"gpt-3.5-turbo",
"gpt-4",
"claude-v1",
"claude-instant-v1",
] as const;
export type Key = {
/** The OpenAI API key itself. */
key: string;
@ -91,32 +112,29 @@ export class KeyPool {
});
}
public get(model: string) {
const needsGpt4Key = model.startsWith("gpt-4");
public get(model: Model) {
const needGpt4 = model.startsWith("gpt-4");
const availableKeys = this.keys
.filter((key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4))
.filter((key) => !key.isDisabled && (!needGpt4 || key.isGpt4))
.sort((a, b) => a.lastUsed - b.lastUsed);
if (availableKeys.length === 0) {
let message = "No keys available. Please add more keys.";
if (needsGpt4Key) {
if (needGpt4) {
message =
"No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
"No GPT-4 keys available. Please add more keys or select a non-GPT-4 model.";
}
this.log.error(message);
throw new Error(message);
}
// Prioritize trial keys
const trialKeys = availableKeys.filter((key) => key.isTrial);
if (trialKeys.length > 0) {
this.log.info({ key: trialKeys[0].hash }, "Using trial key");
trialKeys[0].lastUsed = Date.now();
return trialKeys[0];
}
// Otherwise, return the oldest key
const oldestKey = availableKeys[0];
this.log.info({ key: oldestKey.hash }, "Assigning key to request.");
oldestKey.lastUsed = Date.now();
return { ...oldestKey };
}

View File

@ -1,10 +1,45 @@
import type { ExpressHttpProxyReqCallback } from ".";
import { Key, keyPool } from "../../key-management";
import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management";
/** Add an OpenAI key from the pool to the request. */
export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
let assignedKey: Key;
assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!;
// Not all clients request a particular model.
// If they request a model, just use that.
// If they don't request a model, use a GPT-4 key if there is an active one,
// otherwise use a GPT-3.5 key.
// TODO: Anthropic mode should prioritize Claude over Claude Instant.
// Each provider needs to define some priority order for their models.
if (bodyHasModel(req.body)) {
assignedKey = keyPool.get(req.body.model);
} else {
try {
assignedKey = keyPool.get("gpt-4");
} catch {
assignedKey = keyPool.get("gpt-3.5-turbo");
}
}
req.key = assignedKey;
req.log.info(
{
key: assignedKey.hash,
model: req.body?.model,
isGpt4: assignedKey.isGpt4,
},
"Assigned key to request"
);
// TODO: Requests to Anthropic models use `X-API-Key`.
proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
};
function bodyHasModel(body: any): body is { model: Model } {
// Model names can have suffixes indicating the frozen release version but
// OpenAI and Anthropic will use the latest version if you omit the suffix.
const isSupportedModel = (model: string) =>
SUPPORTED_MODELS.some((supported) => model.startsWith(supported));
return typeof body?.model === "string" && isSupportedModel(body.model);
}

View File

@ -75,14 +75,10 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = (
{ role: "user", content: lastLine },
];
// Kobold doesn't select a model. If we were assigned a key that supports
// gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly
// assigned, we'll get an error from OpenAI but the key will be downgraded
// for the next request.
// Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4
// key, use that. Otherwise, use GPT-3.5-turbo.
// const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; //TODO: this is fucked, fix it later
const model = "gpt-3.5-turbo";
const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo";
const newBody = {
model,
temperature,