diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index 0b42a54..d6b6bbc 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -1,7 +1,7 @@ import { RequestHandler, Router } from "express"; import { createProxyMiddleware } from "http-proxy-middleware"; import { config } from "../config"; -import { keyPool } from "../shared/key-management"; +import { keyPool, OpenAIKey } from "../shared/key-management"; import { getOpenAIModelFamily, ModelFamily, @@ -36,8 +36,8 @@ export const KNOWN_OPENAI_MODELS = [ "gpt-4-0613", "gpt-4-0314", // EOL 2024-06-13 "gpt-4-32k", + "gpt-4-32k-0314", // EOL 2024-06-13 "gpt-4-32k-0613", - // "gpt-4-32k-0314", // EOL 2024-06-13 "gpt-3.5-turbo", "gpt-3.5-turbo-0301", // EOL 2024-06-13 "gpt-3.5-turbo-0613", @@ -52,15 +52,21 @@ let modelsCache: any = null; let modelsCacheTime = 0; export function generateModelList(models = KNOWN_OPENAI_MODELS) { - let available = new Set(); + // Get available families and snapshots + let availableFamilies = new Set(); + const availableSnapshots = new Set(); for (const key of keyPool.list()) { if (key.isDisabled || key.service !== "openai") continue; - key.modelFamilies.forEach((family) => - available.add(family as OpenAIModelFamily) - ); + const asOpenAIKey = key as OpenAIKey; + asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f)); + asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s)); } + + // Remove disabled families const allowed = new Set(config.allowedModelFamilies); - available = new Set([...available].filter((x) => allowed.has(x))); + availableFamilies = new Set( + [...availableFamilies].filter((x) => allowed.has(x)) + ); return models .map((id) => ({ @@ -81,7 +87,16 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) { root: id, parent: null, })) - .filter((model) => available.has(getOpenAIModelFamily(model.id))); + .filter((model) => { + // First check if the family is available + const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id)); + if (!hasFamily) return false; + + // Then for snapshots, ensure the specific snapshot is available + const isSnapshot = model.id.match(/-\d{4}(-preview)?$/); + if (!isSnapshot) return true; + return availableSnapshots.has(model.id); + }); } const handleModelRequest: RequestHandler = (_req, res) => { @@ -165,7 +180,7 @@ const openaiProxy = createQueueMiddleware({ selfHandleResponse: true, logger, on: { - proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }), + proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody] }), proxyRes: createOnProxyResHandler([openaiResponseHandler]), error: handleProxyError, }, diff --git a/src/shared/key-management/openai/checker.ts b/src/shared/key-management/openai/checker.ts index 71990fd..d49b347 100644 --- a/src/shared/key-management/openai/checker.ts +++ b/src/shared/key-management/openai/checker.ts @@ -59,7 +59,12 @@ export class OpenAIKeyChecker extends KeyCheckerBase { this.updateKey(key.hash, {}); } this.log.info( - { key: key.hash, models: key.modelFamilies, trial: key.isTrial }, + { + key: key.hash, + models: key.modelFamilies, + trial: key.isTrial, + snapshots: key.modelSnapshots, + }, "Checked key." ); } @@ -69,10 +74,11 @@ export class OpenAIKeyChecker extends KeyCheckerBase { ): Promise { const opts = { headers: OpenAIKeyChecker.getHeaders(key) }; const { data } = await axios.get(GET_MODELS_URL, opts); - const models = data.data; - const families = new Set(); - models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo"))); + const models = data.data.map(({ id }) => { + families.add(getOpenAIModelFamily(id, "turbo")); + return id; + }); // disable dall-e for trial keys due to very low per-day quota that tends to // render the key unusable. @@ -86,13 +92,16 @@ export class OpenAIKeyChecker extends KeyCheckerBase { // families.delete("dall-e"); // } - // as of 2024-01-10, the models endpoint has a bug and sometimes returns the - // gpt-4-32k-0314 snapshot even though the key doesn't have access to - // base gpt-4-32k. we will ignore this model if the snapshot is returned - // without the base model. - const has32k = models.find(({ id }) => id === "gpt-4-32k"); - if (families.has("gpt4-32k") && !has32k) { - families.delete("gpt4-32k"); + // as of January 2024, 0314 model snapshots are only available on keys which + // have used them in the past. these keys also seem to have 32k-0314 even + // though they don't have the base gpt-4-32k model alias listed. if a key + // has access to both 0314 models we will flag it as such and force add + // gpt4-32k to its model families. + if ( + ["gpt-4-0314", "gpt-4-32k-0314"].every((m) => models.find((n) => n === m)) + ) { + this.log.info({ key: key.hash }, "Added gpt4-32k to -0314 key."); + families.add("gpt4-32k"); } // We want to update the key's model families here, but we don't want to @@ -102,6 +111,7 @@ export class OpenAIKeyChecker extends KeyCheckerBase { const familiesArray = [...families]; const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; this.updateKey(key.hash, { + modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)), modelFamilies: familiesArray, lastChecked: keyFromPool.lastChecked, }); diff --git a/src/shared/key-management/openai/provider.ts b/src/shared/key-management/openai/provider.ts index ecca3d6..0d06cb0 100644 --- a/src/shared/key-management/openai/provider.ts +++ b/src/shared/key-management/openai/provider.ts @@ -18,7 +18,8 @@ export type OpenAIModel = | "gpt-4-1106" | "text-embedding-ada-002" | "dall-e-2" - | "dall-e-3"; + | "dall-e-3" + | string; // Flattening model families instead of using a nested object for easier // cloning. @@ -67,6 +68,10 @@ export interface OpenAIKey extends Key, OpenAIKeyUsage { * This key's maximum request rate for GPT-4, per minute. */ gpt4Rpm: number; + /** + * Model snapshots available. + */ + modelSnapshots: string[]; } export type OpenAIKeyUpdate = Omit< @@ -127,6 +132,7 @@ export class OpenAIKeyProvider implements KeyProvider { "gpt4-turboTokens": 0, "dall-eTokens": 0, gpt4Rpm: 0, + modelSnapshots: [], }; this.keys.push(newKey); } @@ -155,23 +161,31 @@ export class OpenAIKeyProvider implements KeyProvider { }); } - public get(model: Model) { + public get(requestModel: Model) { + let model = requestModel; + + // Special case for GPT-4-32k. Some keys have access to only gpt4-32k-0314 + // but not gpt-4-32k-0613, or its alias gpt-4-32k. Because we add a model + // family if a key has any snapshot, we need to dealias gpt-4-32k here so + // we can look for the specific snapshot. + // gpt-4-32k is superceded by gpt4-turbo so this shouldn't ever change. + if (model === "gpt-4-32k") model = "gpt-4-32k-0613"; + const neededFamily = getOpenAIModelFamily(model); const excludeTrials = model === "text-embedding-ada-002"; + const needsSnapshot = model.match(/-\d{4}(-preview)?$/); const availableKeys = this.keys.filter( // Allow keys which (key) => !key.isDisabled && // are not disabled - key.modelFamilies.includes(neededFamily) && // have access to the model - (!excludeTrials || !key.isTrial) // and are not trials (if applicable) + key.modelFamilies.includes(neededFamily) && // have access to the model family we need + (!excludeTrials || !key.isTrial) && // and are not trials if we don't want them + (!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need ); if (availableKeys.length === 0) { - throw new HttpError( - 402, - `No keys available for model family '${neededFamily}'.` - ); + throw new HttpError(402, `No keys can fulfill request for ${model}`); } // Select a key, from highest priority to lowest priority: