diff --git a/src/key-management/index.ts b/src/key-management/index.ts index e7e8c6c..2214bb1 100644 --- a/src/key-management/index.ts +++ b/src/key-management/index.ts @@ -1,4 +1,5 @@ import { KeyPool } from "./key-pool"; -export type { Key } from "./key-pool"; +export type { Key, Model } from "./key-pool"; export const keyPool = new KeyPool(); +export { SUPPORTED_MODELS } from "./key-pool"; diff --git a/src/key-management/key-pool.ts b/src/key-management/key-pool.ts index bbaf4df..9838bf6 100644 --- a/src/key-management/key-pool.ts +++ b/src/key-management/key-pool.ts @@ -6,6 +6,27 @@ import { config } from "../config"; import { logger } from "../logger"; import { KeyChecker } from "./key-checker"; +// I made too many assumptions about OpenAI being the only provider and now this +// is a mess with the addition of Anthropic. Server will have to be restricted +// to operating on one provider at a time until I can refactor this to use +// some KeyProvider interface. + +// TODO: Move this stuff somewhere else, it's not key management. +export type Model = OpenAIModel | AnthropicModel; +export type OpenAIModel = +| "gpt-3.5-turbo" +| "gpt-4" +export type AnthropicModel = +| "claude-v1" +| "claude-instant-v1" +export const SUPPORTED_MODELS: readonly Model[] = [ + "gpt-3.5-turbo", + "gpt-4", + "claude-v1", + "claude-instant-v1", +] as const; + + export type Key = { /** The OpenAI API key itself. */ key: string; @@ -91,32 +112,29 @@ export class KeyPool { }); } - public get(model: string) { - const needsGpt4Key = model.startsWith("gpt-4"); + public get(model: Model) { + const needGpt4 = model.startsWith("gpt-4"); const availableKeys = this.keys - .filter((key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4)) + .filter((key) => !key.isDisabled && (!needGpt4 || key.isGpt4)) .sort((a, b) => a.lastUsed - b.lastUsed); if (availableKeys.length === 0) { let message = "No keys available. Please add more keys."; - if (needsGpt4Key) { + if (needGpt4) { message = - "No GPT-4 keys available. Please add more keys or use a non-GPT-4 model."; + "No GPT-4 keys available. Please add more keys or select a non-GPT-4 model."; } - this.log.error(message); throw new Error(message); } // Prioritize trial keys const trialKeys = availableKeys.filter((key) => key.isTrial); if (trialKeys.length > 0) { - this.log.info({ key: trialKeys[0].hash }, "Using trial key"); trialKeys[0].lastUsed = Date.now(); return trialKeys[0]; } // Otherwise, return the oldest key const oldestKey = availableKeys[0]; - this.log.info({ key: oldestKey.hash }, "Assigning key to request."); oldestKey.lastUsed = Date.now(); return { ...oldestKey }; } diff --git a/src/proxy/rewriters/add-key.ts b/src/proxy/rewriters/add-key.ts index 5d5ec25..3b8e801 100644 --- a/src/proxy/rewriters/add-key.ts +++ b/src/proxy/rewriters/add-key.ts @@ -1,10 +1,45 @@ import type { ExpressHttpProxyReqCallback } from "."; -import { Key, keyPool } from "../../key-management"; +import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management"; /** Add an OpenAI key from the pool to the request. */ export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => { let assignedKey: Key; - assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!; + + // Not all clients request a particular model. + // If they request a model, just use that. + // If they don't request a model, use a GPT-4 key if there is an active one, + // otherwise use a GPT-3.5 key. + + // TODO: Anthropic mode should prioritize Claude over Claude Instant. + // Each provider needs to define some priority order for their models. + + if (bodyHasModel(req.body)) { + assignedKey = keyPool.get(req.body.model); + } else { + try { + assignedKey = keyPool.get("gpt-4"); + } catch { + assignedKey = keyPool.get("gpt-3.5-turbo"); + } + } req.key = assignedKey; + req.log.info( + { + key: assignedKey.hash, + model: req.body?.model, + isGpt4: assignedKey.isGpt4, + }, + "Assigned key to request" + ); + + // TODO: Requests to Anthropic models use `X-API-Key`. proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`); }; + +function bodyHasModel(body: any): body is { model: Model } { + // Model names can have suffixes indicating the frozen release version but + // OpenAI and Anthropic will use the latest version if you omit the suffix. + const isSupportedModel = (model: string) => + SUPPORTED_MODELS.some((supported) => model.startsWith(supported)); + return typeof body?.model === "string" && isSupportedModel(body.model); +} diff --git a/src/proxy/rewriters/transform-kobold-payload.ts b/src/proxy/rewriters/transform-kobold-payload.ts index 15aef3a..fd26af0 100644 --- a/src/proxy/rewriters/transform-kobold-payload.ts +++ b/src/proxy/rewriters/transform-kobold-payload.ts @@ -75,14 +75,10 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = ( { role: "user", content: lastLine }, ]; - // Kobold doesn't select a model. If we were assigned a key that supports - // gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly - // assigned, we'll get an error from OpenAI but the key will be downgraded - // for the next request. + // Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4 + // key, use that. Otherwise, use GPT-3.5-turbo. - // const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; //TODO: this is fucked, fix it later - - const model = "gpt-3.5-turbo"; + const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; const newBody = { model, temperature,