adds separate model detection for gpt-4-32k-0314
This commit is contained in:
parent
7f431de98e
commit
4b86802eb2
|
@ -1,7 +1,7 @@
|
|||
import { RequestHandler, Router } from "express";
|
||||
import { createProxyMiddleware } from "http-proxy-middleware";
|
||||
import { config } from "../config";
|
||||
import { keyPool } from "../shared/key-management";
|
||||
import { keyPool, OpenAIKey } from "../shared/key-management";
|
||||
import {
|
||||
getOpenAIModelFamily,
|
||||
ModelFamily,
|
||||
|
@ -36,8 +36,8 @@ export const KNOWN_OPENAI_MODELS = [
|
|||
"gpt-4-0613",
|
||||
"gpt-4-0314", // EOL 2024-06-13
|
||||
"gpt-4-32k",
|
||||
"gpt-4-32k-0314", // EOL 2024-06-13
|
||||
"gpt-4-32k-0613",
|
||||
// "gpt-4-32k-0314", // EOL 2024-06-13
|
||||
"gpt-3.5-turbo",
|
||||
"gpt-3.5-turbo-0301", // EOL 2024-06-13
|
||||
"gpt-3.5-turbo-0613",
|
||||
|
@ -52,15 +52,21 @@ let modelsCache: any = null;
|
|||
let modelsCacheTime = 0;
|
||||
|
||||
export function generateModelList(models = KNOWN_OPENAI_MODELS) {
|
||||
let available = new Set<OpenAIModelFamily>();
|
||||
// Get available families and snapshots
|
||||
let availableFamilies = new Set<OpenAIModelFamily>();
|
||||
const availableSnapshots = new Set<string>();
|
||||
for (const key of keyPool.list()) {
|
||||
if (key.isDisabled || key.service !== "openai") continue;
|
||||
key.modelFamilies.forEach((family) =>
|
||||
available.add(family as OpenAIModelFamily)
|
||||
);
|
||||
const asOpenAIKey = key as OpenAIKey;
|
||||
asOpenAIKey.modelFamilies.forEach((f) => availableFamilies.add(f));
|
||||
asOpenAIKey.modelSnapshots.forEach((s) => availableSnapshots.add(s));
|
||||
}
|
||||
|
||||
// Remove disabled families
|
||||
const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
|
||||
available = new Set([...available].filter((x) => allowed.has(x)));
|
||||
availableFamilies = new Set(
|
||||
[...availableFamilies].filter((x) => allowed.has(x))
|
||||
);
|
||||
|
||||
return models
|
||||
.map((id) => ({
|
||||
|
@ -81,7 +87,16 @@ export function generateModelList(models = KNOWN_OPENAI_MODELS) {
|
|||
root: id,
|
||||
parent: null,
|
||||
}))
|
||||
.filter((model) => available.has(getOpenAIModelFamily(model.id)));
|
||||
.filter((model) => {
|
||||
// First check if the family is available
|
||||
const hasFamily = availableFamilies.has(getOpenAIModelFamily(model.id));
|
||||
if (!hasFamily) return false;
|
||||
|
||||
// Then for snapshots, ensure the specific snapshot is available
|
||||
const isSnapshot = model.id.match(/-\d{4}(-preview)?$/);
|
||||
if (!isSnapshot) return true;
|
||||
return availableSnapshots.has(model.id);
|
||||
});
|
||||
}
|
||||
|
||||
const handleModelRequest: RequestHandler = (_req, res) => {
|
||||
|
@ -165,7 +180,7 @@ const openaiProxy = createQueueMiddleware({
|
|||
selfHandleResponse: true,
|
||||
logger,
|
||||
on: {
|
||||
proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody], }),
|
||||
proxyReq: createOnProxyReqHandler({ pipeline: [addKey, finalizeBody] }),
|
||||
proxyRes: createOnProxyResHandler([openaiResponseHandler]),
|
||||
error: handleProxyError,
|
||||
},
|
||||
|
|
|
@ -59,7 +59,12 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
|
|||
this.updateKey(key.hash, {});
|
||||
}
|
||||
this.log.info(
|
||||
{ key: key.hash, models: key.modelFamilies, trial: key.isTrial },
|
||||
{
|
||||
key: key.hash,
|
||||
models: key.modelFamilies,
|
||||
trial: key.isTrial,
|
||||
snapshots: key.modelSnapshots,
|
||||
},
|
||||
"Checked key."
|
||||
);
|
||||
}
|
||||
|
@ -69,10 +74,11 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
|
|||
): Promise<OpenAIModelFamily[]> {
|
||||
const opts = { headers: OpenAIKeyChecker.getHeaders(key) };
|
||||
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
|
||||
const models = data.data;
|
||||
|
||||
const families = new Set<OpenAIModelFamily>();
|
||||
models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo")));
|
||||
const models = data.data.map(({ id }) => {
|
||||
families.add(getOpenAIModelFamily(id, "turbo"));
|
||||
return id;
|
||||
});
|
||||
|
||||
// disable dall-e for trial keys due to very low per-day quota that tends to
|
||||
// render the key unusable.
|
||||
|
@ -86,13 +92,16 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
|
|||
// families.delete("dall-e");
|
||||
// }
|
||||
|
||||
// as of 2024-01-10, the models endpoint has a bug and sometimes returns the
|
||||
// gpt-4-32k-0314 snapshot even though the key doesn't have access to
|
||||
// base gpt-4-32k. we will ignore this model if the snapshot is returned
|
||||
// without the base model.
|
||||
const has32k = models.find(({ id }) => id === "gpt-4-32k");
|
||||
if (families.has("gpt4-32k") && !has32k) {
|
||||
families.delete("gpt4-32k");
|
||||
// as of January 2024, 0314 model snapshots are only available on keys which
|
||||
// have used them in the past. these keys also seem to have 32k-0314 even
|
||||
// though they don't have the base gpt-4-32k model alias listed. if a key
|
||||
// has access to both 0314 models we will flag it as such and force add
|
||||
// gpt4-32k to its model families.
|
||||
if (
|
||||
["gpt-4-0314", "gpt-4-32k-0314"].every((m) => models.find((n) => n === m))
|
||||
) {
|
||||
this.log.info({ key: key.hash }, "Added gpt4-32k to -0314 key.");
|
||||
families.add("gpt4-32k");
|
||||
}
|
||||
|
||||
// We want to update the key's model families here, but we don't want to
|
||||
|
@ -102,6 +111,7 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
|
|||
const familiesArray = [...families];
|
||||
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
|
||||
this.updateKey(key.hash, {
|
||||
modelSnapshots: models.filter((m) => m.match(/-\d{4}(-preview)?$/)),
|
||||
modelFamilies: familiesArray,
|
||||
lastChecked: keyFromPool.lastChecked,
|
||||
});
|
||||
|
|
|
@ -18,7 +18,8 @@ export type OpenAIModel =
|
|||
| "gpt-4-1106"
|
||||
| "text-embedding-ada-002"
|
||||
| "dall-e-2"
|
||||
| "dall-e-3";
|
||||
| "dall-e-3"
|
||||
| string;
|
||||
|
||||
// Flattening model families instead of using a nested object for easier
|
||||
// cloning.
|
||||
|
@ -67,6 +68,10 @@ export interface OpenAIKey extends Key, OpenAIKeyUsage {
|
|||
* This key's maximum request rate for GPT-4, per minute.
|
||||
*/
|
||||
gpt4Rpm: number;
|
||||
/**
|
||||
* Model snapshots available.
|
||||
*/
|
||||
modelSnapshots: string[];
|
||||
}
|
||||
|
||||
export type OpenAIKeyUpdate = Omit<
|
||||
|
@ -127,6 +132,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
|||
"gpt4-turboTokens": 0,
|
||||
"dall-eTokens": 0,
|
||||
gpt4Rpm: 0,
|
||||
modelSnapshots: [],
|
||||
};
|
||||
this.keys.push(newKey);
|
||||
}
|
||||
|
@ -155,23 +161,31 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
|||
});
|
||||
}
|
||||
|
||||
public get(model: Model) {
|
||||
public get(requestModel: Model) {
|
||||
let model = requestModel;
|
||||
|
||||
// Special case for GPT-4-32k. Some keys have access to only gpt4-32k-0314
|
||||
// but not gpt-4-32k-0613, or its alias gpt-4-32k. Because we add a model
|
||||
// family if a key has any snapshot, we need to dealias gpt-4-32k here so
|
||||
// we can look for the specific snapshot.
|
||||
// gpt-4-32k is superceded by gpt4-turbo so this shouldn't ever change.
|
||||
if (model === "gpt-4-32k") model = "gpt-4-32k-0613";
|
||||
|
||||
const neededFamily = getOpenAIModelFamily(model);
|
||||
const excludeTrials = model === "text-embedding-ada-002";
|
||||
const needsSnapshot = model.match(/-\d{4}(-preview)?$/);
|
||||
|
||||
const availableKeys = this.keys.filter(
|
||||
// Allow keys which
|
||||
(key) =>
|
||||
!key.isDisabled && // are not disabled
|
||||
key.modelFamilies.includes(neededFamily) && // have access to the model
|
||||
(!excludeTrials || !key.isTrial) // and are not trials (if applicable)
|
||||
key.modelFamilies.includes(neededFamily) && // have access to the model family we need
|
||||
(!excludeTrials || !key.isTrial) && // and are not trials if we don't want them
|
||||
(!needsSnapshot || key.modelSnapshots.includes(model)) // and have the specific snapshot we need
|
||||
);
|
||||
|
||||
if (availableKeys.length === 0) {
|
||||
throw new HttpError(
|
||||
402,
|
||||
`No keys available for model family '${neededFamily}'.`
|
||||
);
|
||||
throw new HttpError(402, `No keys can fulfill request for ${model}`);
|
||||
}
|
||||
|
||||
// Select a key, from highest priority to lowest priority:
|
||||
|
|
Loading…
Reference in New Issue