Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29)
This commit is contained in:
parent
cbf9f16108
commit
aa5380d2ef
|
@ -16,7 +16,6 @@
|
|||
"firebase-admin": "^11.10.1",
|
||||
"googleapis": "^122.0.0",
|
||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||
"openai": "^3.2.1",
|
||||
"pino": "^8.11.0",
|
||||
"pino-http": "^8.3.3",
|
||||
"showdown": "^2.1.0",
|
||||
|
@ -3163,23 +3162,6 @@
|
|||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/openai": {
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
|
||||
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
|
||||
"dependencies": {
|
||||
"axios": "^0.26.0",
|
||||
"form-data": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/openai/node_modules/axios": {
|
||||
"version": "0.26.1",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.14.8"
|
||||
}
|
||||
},
|
||||
"node_modules/p-limit": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
"firebase-admin": "^11.10.1",
|
||||
"googleapis": "^122.0.0",
|
||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||
"openai": "^3.2.1",
|
||||
"pino": "^8.11.0",
|
||||
"pino-http": "^8.3.3",
|
||||
"showdown": "^2.1.0",
|
||||
|
|
|
@ -88,11 +88,11 @@ type Config = {
|
|||
*
|
||||
* `none`: Hide quota information
|
||||
*
|
||||
* `partial`: Display quota information only as a percentage
|
||||
* `partial`: (deprecated) Same as `full` because usage is no longer tracked
|
||||
*
|
||||
* `full`: Display quota information as usage against total capacity
|
||||
* `full`: Displays information about keys' quota limits
|
||||
*/
|
||||
quotaDisplayMode: "none" | "partial" | "full";
|
||||
quotaDisplayMode: "none" | "full";
|
||||
/**
|
||||
* Which request queueing strategy to use when keys are over their rate limit.
|
||||
*
|
||||
|
@ -152,7 +152,7 @@ export const config: Config = {
|
|||
),
|
||||
logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
|
||||
checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
|
||||
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
|
||||
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
|
||||
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
|
||||
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
|
||||
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
|
||||
|
|
|
@ -2,7 +2,7 @@ import fs from "fs";
|
|||
import { Request, Response } from "express";
|
||||
import showdown from "showdown";
|
||||
import { config, listConfig } from "./config";
|
||||
import { keyPool } from "./key-management";
|
||||
import { OpenAIKey, keyPool } from "./key-management";
|
||||
import { getUniqueIps } from "./proxy/rate-limit";
|
||||
import {
|
||||
QueuePartition,
|
||||
|
@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
|
|||
type ServiceInfo = {
|
||||
activeKeys: number;
|
||||
trialKeys?: number;
|
||||
quota: string;
|
||||
activeLimit: string;
|
||||
revokedKeys?: number;
|
||||
overQuotaKeys?: number;
|
||||
proomptersInQueue: number;
|
||||
estimatedQueueTime: string;
|
||||
};
|
||||
|
@ -88,51 +90,55 @@ type ServiceInfo = {
|
|||
|
||||
function getOpenAIInfo() {
|
||||
const info: { [model: string]: Partial<ServiceInfo> } = {};
|
||||
const keys = keyPool.list().filter((k) => k.service === "openai");
|
||||
const keys = keyPool
|
||||
.list()
|
||||
.filter((k) => k.service === "openai") as OpenAIKey[];
|
||||
const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
|
||||
|
||||
if (keyPool.anyUnchecked()) {
|
||||
const uncheckedKeys = keys.filter((k) => !k.lastChecked);
|
||||
info.status = `Still checking ${uncheckedKeys.length} keys...` as any;
|
||||
info.status =
|
||||
`Performing startup key checks (${uncheckedKeys.length} left).` as any;
|
||||
} else {
|
||||
delete info.status;
|
||||
}
|
||||
|
||||
if (config.checkKeys) {
|
||||
const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled);
|
||||
const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled);
|
||||
const turboKeys = keys.filter((k) => !k.isGpt4);
|
||||
const gpt4Keys = keys.filter((k) => k.isGpt4);
|
||||
|
||||
const quota: Record<string, string> = { turbo: "", gpt4: "" };
|
||||
const turboQuota = keyPool.remainingQuota("openai") * 100;
|
||||
const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100;
|
||||
const turboQuota = keyPool.activeLimitInUsd("openai");
|
||||
const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });
|
||||
|
||||
if (config.quotaDisplayMode === "full") {
|
||||
const turboUsage = keyPool.usageInUsd("openai");
|
||||
const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true });
|
||||
quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`;
|
||||
quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`;
|
||||
} else {
|
||||
quota.turbo = `${Math.round(turboQuota)}%`;
|
||||
quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
|
||||
// Don't invert this condition; some proxies may be using the now-deprecated
|
||||
// 'partial' option which we want to treat as 'full' here.
|
||||
if (config.quotaDisplayMode !== "none") {
|
||||
quota.turbo = turboQuota;
|
||||
quota.gpt4 = gpt4Quota;
|
||||
}
|
||||
|
||||
info.turbo = {
|
||||
activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
|
||||
trialKeys: turboKeys.filter((k) => k.isTrial).length,
|
||||
quota: quota.turbo,
|
||||
activeLimit: quota.turbo,
|
||||
revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
|
||||
overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
|
||||
};
|
||||
|
||||
if (hasGpt4) {
|
||||
info.gpt4 = {
|
||||
activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
|
||||
trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
|
||||
quota: quota.gpt4,
|
||||
activeLimit: quota.gpt4,
|
||||
revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
|
||||
overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
|
||||
};
|
||||
}
|
||||
|
||||
if (config.quotaDisplayMode === "none") {
|
||||
delete info.turbo?.quota;
|
||||
delete info.gpt4?.quota;
|
||||
delete info.turbo?.activeLimit;
|
||||
delete info.gpt4?.activeLimit;
|
||||
}
|
||||
} else {
|
||||
info.status = "Key checking is disabled." as any;
|
||||
|
|
|
@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
|
||||
}
|
||||
|
||||
public remainingQuota() {
|
||||
const activeKeys = this.keys.filter((k) => !k.isDisabled).length;
|
||||
const allKeys = this.keys.length;
|
||||
if (activeKeys === 0) return 0;
|
||||
return Math.round((activeKeys / allKeys) * 100) / 100;
|
||||
}
|
||||
|
||||
public usageInUsd() {
|
||||
return "$0.00 / ∞";
|
||||
public activeLimitInUsd() {
|
||||
return "∞";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
|
|||
anyUnchecked(): boolean;
|
||||
incrementPrompt(hash: string): void;
|
||||
getLockoutPeriod(model: Model): number;
|
||||
remainingQuota(options?: Record<string, unknown>): number;
|
||||
usageInUsd(options?: Record<string, unknown>): string;
|
||||
activeLimitInUsd(options?: Record<string, unknown>): string;
|
||||
markRateLimited(hash: string): void;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,9 +32,15 @@ export class KeyPool {
|
|||
return this.keyProviders.flatMap((provider) => provider.list());
|
||||
}
|
||||
|
||||
public disable(key: Key): void {
|
||||
public disable(key: Key, reason: "quota" | "revoked"): void {
|
||||
const service = this.getKeyProvider(key.service);
|
||||
service.disable(key);
|
||||
if (service instanceof OpenAIKeyProvider) {
|
||||
service.update(key.hash, {
|
||||
isRevoked: reason === "revoked",
|
||||
isOverQuota: reason === "quota",
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public update(key: Key, props: AllowedPartial): void {
|
||||
|
@ -75,18 +81,11 @@ export class KeyPool {
|
|||
}
|
||||
}
|
||||
|
||||
public remainingQuota(
|
||||
service: AIService,
|
||||
options?: Record<string, unknown>
|
||||
): number {
|
||||
return this.getKeyProvider(service).remainingQuota(options);
|
||||
}
|
||||
|
||||
public usageInUsd(
|
||||
public activeLimitInUsd(
|
||||
service: AIService,
|
||||
options?: Record<string, unknown>
|
||||
): string {
|
||||
return this.getKeyProvider(service).usageInUsd(options);
|
||||
return this.getKeyProvider(service).activeLimitInUsd(options);
|
||||
}
|
||||
|
||||
private getService(model: Model): AIService {
|
||||
|
|
|
@ -1,14 +1,24 @@
|
|||
import axios, { AxiosError } from "axios";
|
||||
import { Configuration, OpenAIApi } from "openai";
|
||||
import { logger } from "../../logger";
|
||||
import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
|
||||
|
||||
/** Minimum time in between any two key checks. */
|
||||
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
|
||||
const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes
|
||||
/**
|
||||
* Minimum time in between checks for a given key. Because we can no longer
|
||||
* read quota usage, there is little reason to check a single key more often
|
||||
* than this.
|
||||
**/
|
||||
const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
|
||||
|
||||
const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
|
||||
const GET_MODELS_URL = "https://api.openai.com/v1/models";
|
||||
const GET_SUBSCRIPTION_URL =
|
||||
"https://api.openai.com/dashboard/billing/subscription";
|
||||
const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
|
||||
|
||||
type GetModelsResponse = {
|
||||
data: [{ id: string }];
|
||||
};
|
||||
|
||||
type GetSubscriptionResponse = {
|
||||
plan: { title: string };
|
||||
|
@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
|
|||
system_hard_limit_usd: number;
|
||||
};
|
||||
|
||||
type GetUsageResponse = {
|
||||
total_usage: number;
|
||||
};
|
||||
|
||||
type OpenAIError = {
|
||||
error: { type: string; code: string; param: unknown; message: string };
|
||||
};
|
||||
|
@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
|
|||
/**
|
||||
* Schedules the next check. If there are still keys yet to be checked, it
|
||||
* will schedule a check immediately for the next unchecked key. Otherwise,
|
||||
* it will schedule a check in several minutes for the oldest key.
|
||||
* it will schedule a check for the least recently checked key, respecting
|
||||
* the minimum check interval.
|
||||
**/
|
||||
private scheduleNextCheck() {
|
||||
const enabledKeys = this.keys.filter((key) => !key.isDisabled);
|
||||
|
@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
|
|||
key.lastChecked < oldest.lastChecked ? key : oldest
|
||||
);
|
||||
|
||||
// Don't check any individual key more than once every 5 minutes.
|
||||
// Also, don't check anything more often than once every 3 seconds.
|
||||
// Don't check any individual key too often.
|
||||
// Don't check anything at all at a rate faster than once per 3 seconds.
|
||||
const nextCheck = Math.max(
|
||||
oldestKey.lastChecked + KEY_CHECK_PERIOD,
|
||||
this.lastCheck + MIN_CHECK_INTERVAL
|
||||
|
@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
|
|||
this.log.debug({ key: key.hash }, "Checking key...");
|
||||
let isInitialCheck = !key.lastChecked;
|
||||
try {
|
||||
// During the initial check we need to get the subscription first because
|
||||
// trials have different behavior.
|
||||
// We only need to check for provisioned models on the initial check.
|
||||
if (isInitialCheck) {
|
||||
const subscription = await this.getSubscription(key);
|
||||
this.updateKey(key.hash, { isTrial: !subscription.has_payment_method });
|
||||
if (key.isTrial) {
|
||||
this.log.debug(
|
||||
{ key: key.hash },
|
||||
"Attempting generation on trial key."
|
||||
);
|
||||
await this.assertCanGenerate(key);
|
||||
}
|
||||
const [provisionedModels, usage] = await Promise.all([
|
||||
this.getProvisionedModels(key),
|
||||
this.getUsage(key),
|
||||
]);
|
||||
const [subscription, provisionedModels, _livenessTest] =
|
||||
await Promise.all([
|
||||
this.getSubscription(key),
|
||||
this.getProvisionedModels(key),
|
||||
this.testLiveness(key),
|
||||
]);
|
||||
const updates = {
|
||||
isGpt4: provisionedModels.gpt4,
|
||||
isTrial: !subscription.has_payment_method,
|
||||
softLimit: subscription.soft_limit_usd,
|
||||
hardLimit: subscription.hard_limit_usd,
|
||||
systemHardLimit: subscription.system_hard_limit_usd,
|
||||
usage,
|
||||
};
|
||||
this.updateKey(key.hash, updates);
|
||||
} else {
|
||||
// Don't check provisioned models after the initial check because it's
|
||||
// not likely to change.
|
||||
const [subscription, usage] = await Promise.all([
|
||||
// Provisioned models don't change, so we don't need to check them again
|
||||
const [subscription, _livenessTest] = await Promise.all([
|
||||
this.getSubscription(key),
|
||||
this.getUsage(key),
|
||||
this.testLiveness(key),
|
||||
]);
|
||||
const updates = {
|
||||
softLimit: subscription.soft_limit_usd,
|
||||
hardLimit: subscription.hard_limit_usd,
|
||||
systemHardLimit: subscription.system_hard_limit_usd,
|
||||
usage,
|
||||
};
|
||||
this.updateKey(key.hash, updates);
|
||||
}
|
||||
this.log.info(
|
||||
{ key: key.hash, usage: key.usage, hardLimit: key.hardLimit },
|
||||
{ key: key.hash, hardLimit: key.hardLimit },
|
||||
"Key check complete."
|
||||
);
|
||||
} catch (error) {
|
||||
|
@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
|
|||
private async getProvisionedModels(
|
||||
key: OpenAIKey
|
||||
): Promise<{ turbo: boolean; gpt4: boolean }> {
|
||||
const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
|
||||
const models = (await openai.listModels()!).data.data;
|
||||
const opts = { headers: { Authorization: `Bearer ${key.key}` } };
|
||||
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
|
||||
const models = data.data;
|
||||
const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
|
||||
const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
|
||||
// We want to update the key's `isGpt4` flag here, but we don't want to
|
||||
// update its `lastChecked` timestamp because we need to let the liveness
|
||||
// check run before we can consider the key checked.
|
||||
|
||||
// Need to use `find` here because keys are cloned from the pool.
|
||||
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
|
||||
this.updateKey(key.hash, {
|
||||
isGpt4: gpt4,
|
||||
lastChecked: keyFromPool.lastChecked,
|
||||
});
|
||||
return { turbo, gpt4 };
|
||||
}
|
||||
|
||||
|
@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
|
|||
return data;
|
||||
}
|
||||
|
||||
private async getUsage(key: OpenAIKey) {
|
||||
const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
|
||||
const url = `${GET_USAGE_URL}?${querystring}`;
|
||||
const { data } = await axios.get<GetUsageResponse>(url, {
|
||||
headers: { Authorization: `Bearer ${key.key}` },
|
||||
});
|
||||
return parseFloat((data.total_usage / 100).toFixed(2));
|
||||
}
|
||||
|
||||
private handleAxiosError(key: OpenAIKey, error: AxiosError) {
|
||||
if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) {
|
||||
if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
|
||||
const { status, data } = error.response;
|
||||
if (status === 401) {
|
||||
this.log.warn(
|
||||
{ key: key.hash, error: data },
|
||||
"Key is invalid or revoked. Disabling key."
|
||||
);
|
||||
this.updateKey(key.hash, { isDisabled: true });
|
||||
} else if (status === 429 && data.error.type === "insufficient_quota") {
|
||||
this.log.warn(
|
||||
{ key: key.hash, isTrial: key.isTrial, error: data },
|
||||
"Key is out of quota. Disabling key."
|
||||
);
|
||||
this.updateKey(key.hash, { isDisabled: true });
|
||||
}
|
||||
else if (status === 429 && data.error.type === "access_terminated") {
|
||||
this.log.warn(
|
||||
{ key: key.hash, isTrial: key.isTrial, error: data },
|
||||
"Key has been terminated due to policy violations. Disabling key."
|
||||
);
|
||||
this.updateKey(key.hash, { isDisabled: true });
|
||||
this.updateKey(key.hash, {
|
||||
isDisabled: true,
|
||||
isRevoked: true,
|
||||
isGpt4: false,
|
||||
});
|
||||
} else if (status === 429) {
|
||||
switch (data.error.type) {
|
||||
case "insufficient_quota":
|
||||
case "access_terminated":
|
||||
case "billing_not_active":
|
||||
const isOverQuota = data.error.type === "insufficient_quota";
|
||||
const isRevoked = !isOverQuota;
|
||||
const isGpt4 = isRevoked ? false : key.isGpt4;
|
||||
this.log.warn(
|
||||
{ key: key.hash, rateLimitType: data.error.type, error: data },
|
||||
"Key returned a non-transient 429 error. Disabling key."
|
||||
);
|
||||
this.updateKey(key.hash, {
|
||||
isDisabled: true,
|
||||
isRevoked,
|
||||
isOverQuota,
|
||||
isGpt4,
|
||||
});
|
||||
break;
|
||||
case "requests":
|
||||
// Trial keys have extremely low requests-per-minute limits and we
|
||||
// can often hit them just while checking the key, so we need to
|
||||
// retry the check later to know if the key has quota remaining.
|
||||
this.log.warn(
|
||||
{ key: key.hash, error: data },
|
||||
"Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
|
||||
);
|
||||
// To trigger a shorter than usual delay before the next check, we
|
||||
// will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
|
||||
// This will cause the usual key check scheduling logic to schedule
|
||||
// the next check in 15 seconds. This also prevents the key from
|
||||
// holding up startup checks for other keys.
|
||||
const fifteenSeconds = 15 * 1000;
|
||||
const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
|
||||
this.updateKey(key.hash, { lastChecked: next });
|
||||
break;
|
||||
case "tokens":
|
||||
// Hitting a token rate limit, even on a trial key, actually implies
|
||||
// that the key is valid and can generate completions, so we will
|
||||
// treat this as effectively a successful `testLiveness` call.
|
||||
this.log.info(
|
||||
{ key: key.hash },
|
||||
"Key is currently `tokens` rate limited; assuming it is operational."
|
||||
);
|
||||
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||
break;
|
||||
default:
|
||||
this.log.error(
|
||||
{ key: key.hash, rateLimitType: data.error.type, error: data },
|
||||
"Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
|
||||
);
|
||||
// We don't know what this error means, so we just let the key
|
||||
// through and maybe it will fail when someone tries to use it.
|
||||
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||
}
|
||||
} else {
|
||||
this.log.error(
|
||||
{ key: key.hash, status, error: data },
|
||||
"Encountered API error while checking key."
|
||||
"Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
|
||||
);
|
||||
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||
}
|
||||
return;
|
||||
}
|
||||
this.log.error(
|
||||
{ key: key.hash, error },
|
||||
"Network error while checking key; trying again later."
|
||||
{ key: key.hash, error: error.message },
|
||||
"Network error while checking key; trying this key again in a minute."
|
||||
);
|
||||
const oneMinute = 60 * 1000;
|
||||
const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
|
||||
this.updateKey(key.hash, { lastChecked: next });
|
||||
}
|
||||
|
||||
/**
|
||||
* Trial key usage reporting is inaccurate, so we need to run an actual
|
||||
* completion to test them for liveness.
|
||||
* Tests whether the key is valid and has quota remaining. The request we send
|
||||
* is actually not valid, but keys which are revoked or out of quota will fail
|
||||
* with a 401 or 429 error instead of the expected 400 Bad Request error.
|
||||
* This lets us avoid test keys without spending any quota.
|
||||
*/
|
||||
private async assertCanGenerate(key: OpenAIKey): Promise<void> {
|
||||
const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
|
||||
// This will throw an AxiosError if the key is invalid or out of quota.
|
||||
await openai.createChatCompletion({
|
||||
private async testLiveness(key: OpenAIKey): Promise<void> {
|
||||
const payload = {
|
||||
model: "gpt-3.5-turbo",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
max_tokens: 1,
|
||||
});
|
||||
max_tokens: -1,
|
||||
messages: [{ role: "user", content: "" }],
|
||||
};
|
||||
const { data } = await axios.post<OpenAIError>(
|
||||
POST_CHAT_COMPLETIONS_URL,
|
||||
payload,
|
||||
{
|
||||
headers: { Authorization: `Bearer ${key.key}` },
|
||||
validateStatus: (status) => status === 400,
|
||||
}
|
||||
);
|
||||
if (data.error.type === "invalid_request_error") {
|
||||
// This is the expected error type for our bad prompt, so key is valid.
|
||||
return;
|
||||
} else {
|
||||
this.log.warn(
|
||||
{ key: key.hash, error: data },
|
||||
"Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
static getUsageQuerystring(isTrial: boolean) {
|
||||
// For paid keys, the limit resets every month, so we can use the first day
|
||||
// of the current month.
|
||||
// For trial keys, the limit does not reset and we don't know when the key
|
||||
// was created, so we use 99 days ago because that's as far back as the API
|
||||
// will let us go.
|
||||
|
||||
// End date needs to be set to the beginning of the next day so that we get
|
||||
// usage for the current day.
|
||||
|
||||
const today = new Date();
|
||||
const startDate = isTrial
|
||||
? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
|
||||
: new Date(today.getFullYear(), today.getMonth(), 1);
|
||||
const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
|
||||
return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
|
||||
endDate.toISOString().split("T")[0]
|
||||
}`;
|
||||
}
|
||||
|
||||
static errorIsOpenAiError(
|
||||
static errorIsOpenAIError(
|
||||
error: AxiosError
|
||||
): error is AxiosError<OpenAIError> {
|
||||
const data = error.response?.data as any;
|
||||
|
|
|
@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
|
|||
|
||||
export interface OpenAIKey extends Key {
|
||||
readonly service: "openai";
|
||||
/** The current usage of this key. */
|
||||
usage: number;
|
||||
/** Set when key check returns a 401. */
|
||||
isRevoked: boolean;
|
||||
/** Set when key check returns a non-transient 429. */
|
||||
isOverQuota: boolean;
|
||||
/** Threshold at which a warning email will be sent by OpenAI. */
|
||||
softLimit: number;
|
||||
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */
|
||||
|
@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {
|
|||
|
||||
export type OpenAIKeyUpdate = Omit<
|
||||
Partial<OpenAIKey>,
|
||||
"key" | "hash" | "lastUsed" | "lastChecked" | "promptCount"
|
||||
"key" | "hash" | "promptCount"
|
||||
>;
|
||||
|
||||
export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||
|
@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
|||
isGpt4: true,
|
||||
isTrial: false,
|
||||
isDisabled: false,
|
||||
isRevoked: false,
|
||||
isOverQuota: false,
|
||||
softLimit: 0,
|
||||
hardLimit: 0,
|
||||
systemHardLimit: 0,
|
||||
|
@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
|||
/** Called by the key checker to update key information. */
|
||||
public update(keyHash: string, update: OpenAIKeyUpdate) {
|
||||
const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
|
||||
Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
|
||||
Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
|
||||
// this.writeKeyStatus();
|
||||
}
|
||||
|
||||
|
@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
|||
const keyFromPool = this.keys.find((k) => k.key === key.key);
|
||||
if (!keyFromPool || keyFromPool.isDisabled) return;
|
||||
keyFromPool.isDisabled = true;
|
||||
// If it's disabled just set the usage to the hard limit so it doesn't
|
||||
// mess with the aggregate usage.
|
||||
keyFromPool.usage = keyFromPool.hardLimit;
|
||||
this.log.warn({ key: key.hash }, "Key disabled");
|
||||
}
|
||||
|
||||
|
@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the remaining aggregate quota for all keys as a percentage.
|
||||
* Can go slightly negative because keys will typically go slightly over their
|
||||
* limit before being disabled. Can sometimes go *really* negative if the
|
||||
* cron job OpenAI uses to disable keys fails, as the key will essentially
|
||||
* have unlimited quota.
|
||||
**/
|
||||
public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number {
|
||||
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
|
||||
if (keys.length === 0) return 0;
|
||||
|
||||
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
|
||||
* Returns the total quota limit of all keys in USD. Keys which are disabled
|
||||
* are not included in the total.
|
||||
*/
|
||||
public activeLimitInUsd(
|
||||
{ gpt4 }: { gpt4: boolean } = { gpt4: false }
|
||||
): string {
|
||||
const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
|
||||
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
|
||||
|
||||
return 1 - totalUsage / totalLimit;
|
||||
}
|
||||
|
||||
/** Returns used and available usage in USD. */
|
||||
public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
|
||||
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
|
||||
if (keys.length === 0) return "???";
|
||||
|
||||
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
|
||||
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
|
||||
|
||||
return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
|
||||
return `$${totalLimit.toFixed(2)}`;
|
||||
}
|
||||
|
||||
/** Writes key status to disk. */
|
||||
|
|
|
@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
}
|
||||
} else if (statusCode === 401) {
|
||||
// Key is invalid or was revoked
|
||||
keyPool.disable(req.key!);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
|
||||
} else if (statusCode === 429) {
|
||||
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
||||
|
@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
|
|||
const type = errorPayload.error?.type;
|
||||
if (type === "insufficient_quota") {
|
||||
// Billing quota exceeded (key is dead, disable it)
|
||||
keyPool.disable(req.key!);
|
||||
keyPool.disable(req.key!, "quota");
|
||||
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
|
||||
} else if (type === "access_terminated") {
|
||||
// Account banned (key is dead, disable it)
|
||||
keyPool.disable(req.key!);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
|
||||
} else if (type === "billing_not_active") {
|
||||
// Billing is not active (key is dead, disable it)
|
||||
keyPool.disable(req.key!);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
|
||||
} else if (type === "requests" || type === "tokens") {
|
||||
// Per-minute request or token rate limit is exceeded, which we can retry
|
||||
|
|
Loading…
Reference in New Issue