adds separate model detection for gpt-4-32k-0314

This commit is contained in:
nai-degen 2024-03-10 19:16:11 -05:00
parent 7f431de98e
commit 4b86802eb2
3 changed files with 67 additions and 28 deletions

View File

@ -1,7 +1,7 @@
import { RequestHandler, Router } from "express";
import { createProxyMiddleware } from "http-proxy-middleware";
import { config } from "../config";
import { keyPool } from "../shared/key-management";
import { keyPool, OpenAIKey } from "../shared/key-management";
import {
getOpenAIModelFamily,
ModelFamily,
@ -36,8 +36,8 @@ export const KNOWN_OPENAI_MODELS = [
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-4-32k-0613",
// "gpt-4-32k-0314", // EOL 2024-06-13
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613",
@ -52,15 +52,21 @@ let modelsCache: any = null;
let modelsCacheTime = 0;
export function generateModelList(models = KNOWN_OPENAI_MODELS) {
let available = new Set<OpenAIModelFamily>();
// Get available families and snapshots
let availableFamilies = new Set<OpenAIModelFamily>();
const availableSnapshots = new Set<string>();
for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "openai") continue;
key.modelFamilies.forEach((family) =>
available.add(family as OpenAIModelFamily)
);
const asOpenAIKey = key as OpenAIKey;
asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
}
// Remove disabled families
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x)));
availableFamilies = new Set(
[...availableFamilies].filter((x) => allowed.has(x))
);
return models
.map((id) => ({
@ -81,7 +87,16 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
root: id,
parent: null,
}))
.filter((model) => available.has(getOpenAIModelFamily(model.id)));
.filter((model) => {
// First check if the family is available
const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
if (!hasFamily) return false;
// Then for snapshots, ensure the specific snapshot is available
const isSnapshot = model.id.match(/-\d{4}(-preview)?$/);
if (!isSnapshot) return true;
return availableSnapshots.has(model.id);
});
}
const handleModelRequest: RequestHandler = (_req, res) => {
@ -165,7 +180,7 @@ const openaiProxy = createQueueMiddleware({
selfHandleResponse: true,
logger,
on: {
proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }),
proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody] }),
proxyRes: createOnProxyResHandler([openaiResponseHandler]),
error: handleProxyError,
},

View File

@ -59,7 +59,12 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
this.updateKey(key.hash, {});
}
this.log.info(
{ key: key.hash, models: key.modelFamilies, trial: key.isTrial },
{
key: key.hash,
models: key.modelFamilies,
trial: key.isTrial,
snapshots: key.modelSnapshots,
},
"Checked key."
);
}
@ -69,10 +74,11 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
): Promise<OpenAIModelFamily[]> {
const opts = { headers: OpenAIKeyChecker.getHeaders(key) };
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
const models = data.data;
const families = new Set<OpenAIModelFamily>();
models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo")));
const models = data.data.map(({ id }) => {
families.add(getOpenAIModelFamily(id, "turbo"));
return id;
});
// disable dall-e for trial keys due to very low per-day quota that tends to
// render the key unusable.
@ -86,13 +92,16 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
// families.delete("dall-e");
// }
// as of 2024-01-10, the models endpoint has a bug and sometimes returns the
// gpt-4-32k-0314 snapshot even though the key doesn't have access to
// base gpt-4-32k. we will ignore this model if the snapshot is returned
// without the base model.
const has32k = models.find(({ id }) => id === "gpt-4-32k");
if (families.has("gpt4-32k") && !has32k) {
families.delete("gpt4-32k");
// as of January 2024, 0314 model snapshots are only available on keys which
// have used them in the past. these keys also seem to have 32k-0314 even
// though they don't have the base gpt-4-32k model alias listed. if a key
// has access to both 0314 models we will flag it as such and force add
// gpt4-32k to its model families.
if (
["gpt-4-0314", "gpt-4-32k-0314"].every((m) => models.find((n) => n === m))
) {
this.log.info({ key: key.hash }, "Added gpt4-32k to -0314 key.");
families.add("gpt4-32k");
}
// We want to update the key's model families here, but we don't want to
@ -102,6 +111,7 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
const familiesArray = [...families];
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
this.updateKey(key.hash, {
modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)),
modelFamilies: familiesArray,
lastChecked: keyFromPool.lastChecked,
});

View File

@ -18,7 +18,8 @@ export type OpenAIModel =
| "gpt-4-1106"
| "text-embedding-ada-002"
| "dall-e-2"
| "dall-e-3";
| "dall-e-3"
| string;
// Flattening model families instead of using a nested object for easier
// cloning.
@ -67,6 +68,10 @@ export interface OpenAIKey extends Key, OpenAIKeyUsage {
* This key's maximum request rate for GPT-4, per minute.
*/
gpt4Rpm: number;
/**
* Model snapshots available.
*/
modelSnapshots: string[];
}
export type OpenAIKeyUpdate = Omit<
@ -127,6 +132,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
"gpt4-turboTokens": 0,
"dall-eTokens": 0,
gpt4Rpm: 0,
modelSnapshots: [],
};
this.keys.push(newKey);
}
@ -155,23 +161,31 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
});
}
public get(model: Model) {
public get(requestModel: Model) {
let model = requestModel;
// Special case for GPT-4-32k. Some keys have access to only gpt4-32k-0314
// but not gpt-4-32k-0613, or its alias gpt-4-32k. Because we add a model
// family if a key has any snapshot, we need to dealias gpt-4-32k here so
// we can look for the specific snapshot.
// gpt-4-32k is superceded by gpt4-turbo so this shouldn't ever change.
if (model === "gpt-4-32k") model = "gpt-4-32k-0613";
const neededFamily = getOpenAIModelFamily(model);
const excludeTrials = model === "text-embedding-ada-002";
const needsSnapshot = model.match(/-\d{4}(-preview)?$/);
const availableKeys = this.keys.filter(
// Allow keys which
(key) =>
!key.isDisabled && // are not disabled
key.modelFamilies.includes(neededFamily) && // have access to the model
(!excludeTrials || !key.isTrial) // and are not trials (if applicable)
key.modelFamilies.includes(neededFamily) && // have access to the model family we need
(!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
(!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
);
if (availableKeys.length === 0) {
throw new HttpError(
402,
`No keys available for model family '${neededFamily}'.`
);
throw new HttpError(402, `No keys can fulfill request for ${model}`);
}
// Select a key, from highest priority to lowest priority: