Rework OpenAIKeyChecker to remove usage tracking and test all keys for liveness (khanon/oai-reverse-proxy!29)
This commit is contained in:
parent
cbf9f16108
commit
aa5380d2ef
|
@ -16,7 +16,6 @@
|
||||||
"firebase-admin": "^11.10.1",
|
"firebase-admin": "^11.10.1",
|
||||||
"googleapis": "^122.0.0",
|
"googleapis": "^122.0.0",
|
||||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||||
"openai": "^3.2.1",
|
|
||||||
"pino": "^8.11.0",
|
"pino": "^8.11.0",
|
||||||
"pino-http": "^8.3.3",
|
"pino-http": "^8.3.3",
|
||||||
"showdown": "^2.1.0",
|
"showdown": "^2.1.0",
|
||||||
|
@ -3163,23 +3162,6 @@
|
||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/openai": {
|
|
||||||
"version": "3.2.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
|
|
||||||
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
|
|
||||||
"dependencies": {
|
|
||||||
"axios": "^0.26.0",
|
|
||||||
"form-data": "^4.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/openai/node_modules/axios": {
|
|
||||||
"version": "0.26.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
|
|
||||||
"integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
|
|
||||||
"dependencies": {
|
|
||||||
"follow-redirects": "^1.14.8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/p-limit": {
|
"node_modules/p-limit": {
|
||||||
"version": "3.1.0",
|
"version": "3.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
|
||||||
|
|
|
@ -25,7 +25,6 @@
|
||||||
"firebase-admin": "^11.10.1",
|
"firebase-admin": "^11.10.1",
|
||||||
"googleapis": "^122.0.0",
|
"googleapis": "^122.0.0",
|
||||||
"http-proxy-middleware": "^3.0.0-beta.1",
|
"http-proxy-middleware": "^3.0.0-beta.1",
|
||||||
"openai": "^3.2.1",
|
|
||||||
"pino": "^8.11.0",
|
"pino": "^8.11.0",
|
||||||
"pino-http": "^8.3.3",
|
"pino-http": "^8.3.3",
|
||||||
"showdown": "^2.1.0",
|
"showdown": "^2.1.0",
|
||||||
|
|
|
@ -88,11 +88,11 @@ type Config = {
|
||||||
*
|
*
|
||||||
* `none`: Hide quota information
|
* `none`: Hide quota information
|
||||||
*
|
*
|
||||||
* `partial`: Display quota information only as a percentage
|
* `partial`: (deprecated) Same as `full` because usage is no longer tracked
|
||||||
*
|
*
|
||||||
* `full`: Display quota information as usage against total capacity
|
* `full`: Displays information about keys' quota limits
|
||||||
*/
|
*/
|
||||||
quotaDisplayMode: "none" | "partial" | "full";
|
quotaDisplayMode: "none" | "full";
|
||||||
/**
|
/**
|
||||||
* Which request queueing strategy to use when keys are over their rate limit.
|
* Which request queueing strategy to use when keys are over their rate limit.
|
||||||
*
|
*
|
||||||
|
@ -152,7 +152,7 @@ export const config: Config = {
|
||||||
),
|
),
|
||||||
logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
|
logLevel: getEnvWithDefault("LOG_LEVEL", "info"),
|
||||||
checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
|
checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev),
|
||||||
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "partial"),
|
quotaDisplayMode: getEnvWithDefault("QUOTA_DISPLAY_MODE", "full"),
|
||||||
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
|
promptLogging: getEnvWithDefault("PROMPT_LOGGING", false),
|
||||||
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
|
promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined),
|
||||||
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
|
googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined),
|
||||||
|
|
|
@ -2,7 +2,7 @@ import fs from "fs";
|
||||||
import { Request, Response } from "express";
|
import { Request, Response } from "express";
|
||||||
import showdown from "showdown";
|
import showdown from "showdown";
|
||||||
import { config, listConfig } from "./config";
|
import { config, listConfig } from "./config";
|
||||||
import { keyPool } from "./key-management";
|
import { OpenAIKey, keyPool } from "./key-management";
|
||||||
import { getUniqueIps } from "./proxy/rate-limit";
|
import { getUniqueIps } from "./proxy/rate-limit";
|
||||||
import {
|
import {
|
||||||
QueuePartition,
|
QueuePartition,
|
||||||
|
@ -78,7 +78,9 @@ function cacheInfoPageHtml(baseUrl: string) {
|
||||||
type ServiceInfo = {
|
type ServiceInfo = {
|
||||||
activeKeys: number;
|
activeKeys: number;
|
||||||
trialKeys?: number;
|
trialKeys?: number;
|
||||||
quota: string;
|
activeLimit: string;
|
||||||
|
revokedKeys?: number;
|
||||||
|
overQuotaKeys?: number;
|
||||||
proomptersInQueue: number;
|
proomptersInQueue: number;
|
||||||
estimatedQueueTime: string;
|
estimatedQueueTime: string;
|
||||||
};
|
};
|
||||||
|
@ -88,51 +90,55 @@ type ServiceInfo = {
|
||||||
|
|
||||||
function getOpenAIInfo() {
|
function getOpenAIInfo() {
|
||||||
const info: { [model: string]: Partial<ServiceInfo> } = {};
|
const info: { [model: string]: Partial<ServiceInfo> } = {};
|
||||||
const keys = keyPool.list().filter((k) => k.service === "openai");
|
const keys = keyPool
|
||||||
|
.list()
|
||||||
|
.filter((k) => k.service === "openai") as OpenAIKey[];
|
||||||
const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
|
const hasGpt4 = keys.some((k) => k.isGpt4) && !config.turboOnly;
|
||||||
|
|
||||||
if (keyPool.anyUnchecked()) {
|
if (keyPool.anyUnchecked()) {
|
||||||
const uncheckedKeys = keys.filter((k) => !k.lastChecked);
|
const uncheckedKeys = keys.filter((k) => !k.lastChecked);
|
||||||
info.status = `Still checking ${uncheckedKeys.length} keys...` as any;
|
info.status =
|
||||||
|
`Performing startup key checks (${uncheckedKeys.length} left).` as any;
|
||||||
} else {
|
} else {
|
||||||
delete info.status;
|
delete info.status;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.checkKeys) {
|
if (config.checkKeys) {
|
||||||
const turboKeys = keys.filter((k) => !k.isGpt4 && !k.isDisabled);
|
const turboKeys = keys.filter((k) => !k.isGpt4);
|
||||||
const gpt4Keys = keys.filter((k) => k.isGpt4 && !k.isDisabled);
|
const gpt4Keys = keys.filter((k) => k.isGpt4);
|
||||||
|
|
||||||
const quota: Record<string, string> = { turbo: "", gpt4: "" };
|
const quota: Record<string, string> = { turbo: "", gpt4: "" };
|
||||||
const turboQuota = keyPool.remainingQuota("openai") * 100;
|
const turboQuota = keyPool.activeLimitInUsd("openai");
|
||||||
const gpt4Quota = keyPool.remainingQuota("openai", { gpt4: true }) * 100;
|
const gpt4Quota = keyPool.activeLimitInUsd("openai", { gpt4: true });
|
||||||
|
|
||||||
if (config.quotaDisplayMode === "full") {
|
// Don't invert this condition; some proxies may be using the now-deprecated
|
||||||
const turboUsage = keyPool.usageInUsd("openai");
|
// 'partial' option which we want to treat as 'full' here.
|
||||||
const gpt4Usage = keyPool.usageInUsd("openai", { gpt4: true });
|
if (config.quotaDisplayMode !== "none") {
|
||||||
quota.turbo = `${turboUsage} (${Math.round(turboQuota)}% remaining)`;
|
quota.turbo = turboQuota;
|
||||||
quota.gpt4 = `${gpt4Usage} (${Math.round(gpt4Quota)}% remaining)`;
|
quota.gpt4 = gpt4Quota;
|
||||||
} else {
|
|
||||||
quota.turbo = `${Math.round(turboQuota)}%`;
|
|
||||||
quota.gpt4 = `${Math.round(gpt4Quota * 100)}%`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
info.turbo = {
|
info.turbo = {
|
||||||
activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
|
activeKeys: turboKeys.filter((k) => !k.isDisabled).length,
|
||||||
trialKeys: turboKeys.filter((k) => k.isTrial).length,
|
trialKeys: turboKeys.filter((k) => k.isTrial).length,
|
||||||
quota: quota.turbo,
|
activeLimit: quota.turbo,
|
||||||
|
revokedKeys: turboKeys.filter((k) => k.isRevoked).length,
|
||||||
|
overQuotaKeys: turboKeys.filter((k) => k.isOverQuota).length,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (hasGpt4) {
|
if (hasGpt4) {
|
||||||
info.gpt4 = {
|
info.gpt4 = {
|
||||||
activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
|
activeKeys: gpt4Keys.filter((k) => !k.isDisabled).length,
|
||||||
trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
|
trialKeys: gpt4Keys.filter((k) => k.isTrial).length,
|
||||||
quota: quota.gpt4,
|
activeLimit: quota.gpt4,
|
||||||
|
revokedKeys: gpt4Keys.filter((k) => k.isRevoked).length,
|
||||||
|
overQuotaKeys: gpt4Keys.filter((k) => k.isOverQuota).length,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config.quotaDisplayMode === "none") {
|
if (config.quotaDisplayMode === "none") {
|
||||||
delete info.turbo?.quota;
|
delete info.turbo?.activeLimit;
|
||||||
delete info.gpt4?.quota;
|
delete info.gpt4?.activeLimit;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
info.status = "Key checking is disabled." as any;
|
info.status = "Key checking is disabled." as any;
|
||||||
|
|
|
@ -201,14 +201,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
|
key.rateLimitedUntil = now + RATE_LIMIT_LOCKOUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
public remainingQuota() {
|
public activeLimitInUsd() {
|
||||||
const activeKeys = this.keys.filter((k) => !k.isDisabled).length;
|
return "∞";
|
||||||
const allKeys = this.keys.length;
|
|
||||||
if (activeKeys === 0) return 0;
|
|
||||||
return Math.round((activeKeys / allKeys) * 100) / 100;
|
|
||||||
}
|
|
||||||
|
|
||||||
public usageInUsd() {
|
|
||||||
return "$0.00 / ∞";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,8 +52,7 @@ export interface KeyProvider<T extends Key = Key> {
|
||||||
anyUnchecked(): boolean;
|
anyUnchecked(): boolean;
|
||||||
incrementPrompt(hash: string): void;
|
incrementPrompt(hash: string): void;
|
||||||
getLockoutPeriod(model: Model): number;
|
getLockoutPeriod(model: Model): number;
|
||||||
remainingQuota(options?: Record<string, unknown>): number;
|
activeLimitInUsd(options?: Record<string, unknown>): string;
|
||||||
usageInUsd(options?: Record<string, unknown>): string;
|
|
||||||
markRateLimited(hash: string): void;
|
markRateLimited(hash: string): void;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,9 +32,15 @@ export class KeyPool {
|
||||||
return this.keyProviders.flatMap((provider) => provider.list());
|
return this.keyProviders.flatMap((provider) => provider.list());
|
||||||
}
|
}
|
||||||
|
|
||||||
public disable(key: Key): void {
|
public disable(key: Key, reason: "quota" | "revoked"): void {
|
||||||
const service = this.getKeyProvider(key.service);
|
const service = this.getKeyProvider(key.service);
|
||||||
service.disable(key);
|
service.disable(key);
|
||||||
|
if (service instanceof OpenAIKeyProvider) {
|
||||||
|
service.update(key.hash, {
|
||||||
|
isRevoked: reason === "revoked",
|
||||||
|
isOverQuota: reason === "quota",
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public update(key: Key, props: AllowedPartial): void {
|
public update(key: Key, props: AllowedPartial): void {
|
||||||
|
@ -75,18 +81,11 @@ export class KeyPool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public remainingQuota(
|
public activeLimitInUsd(
|
||||||
service: AIService,
|
|
||||||
options?: Record<string, unknown>
|
|
||||||
): number {
|
|
||||||
return this.getKeyProvider(service).remainingQuota(options);
|
|
||||||
}
|
|
||||||
|
|
||||||
public usageInUsd(
|
|
||||||
service: AIService,
|
service: AIService,
|
||||||
options?: Record<string, unknown>
|
options?: Record<string, unknown>
|
||||||
): string {
|
): string {
|
||||||
return this.getKeyProvider(service).usageInUsd(options);
|
return this.getKeyProvider(service).activeLimitInUsd(options);
|
||||||
}
|
}
|
||||||
|
|
||||||
private getService(model: Model): AIService {
|
private getService(model: Model): AIService {
|
||||||
|
|
|
@ -1,14 +1,24 @@
|
||||||
import axios, { AxiosError } from "axios";
|
import axios, { AxiosError } from "axios";
|
||||||
import { Configuration, OpenAIApi } from "openai";
|
|
||||||
import { logger } from "../../logger";
|
import { logger } from "../../logger";
|
||||||
import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
|
import type { OpenAIKey, OpenAIKeyProvider } from "./provider";
|
||||||
|
|
||||||
|
/** Minimum time in between any two key checks. */
|
||||||
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
|
const MIN_CHECK_INTERVAL = 3 * 1000; // 3 seconds
|
||||||
const KEY_CHECK_PERIOD = 5 * 60 * 1000; // 5 minutes
|
/**
|
||||||
|
* Minimum time in between checks for a given key. Because we can no longer
|
||||||
|
* read quota usage, there is little reason to check a single key more often
|
||||||
|
* than this.
|
||||||
|
**/
|
||||||
|
const KEY_CHECK_PERIOD = 60 * 60 * 1000; // 1 hour
|
||||||
|
|
||||||
|
const POST_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions";
|
||||||
|
const GET_MODELS_URL = "https://api.openai.com/v1/models";
|
||||||
const GET_SUBSCRIPTION_URL =
|
const GET_SUBSCRIPTION_URL =
|
||||||
"https://api.openai.com/dashboard/billing/subscription";
|
"https://api.openai.com/dashboard/billing/subscription";
|
||||||
const GET_USAGE_URL = "https://api.openai.com/dashboard/billing/usage";
|
|
||||||
|
type GetModelsResponse = {
|
||||||
|
data: [{ id: string }];
|
||||||
|
};
|
||||||
|
|
||||||
type GetSubscriptionResponse = {
|
type GetSubscriptionResponse = {
|
||||||
plan: { title: string };
|
plan: { title: string };
|
||||||
|
@ -18,10 +28,6 @@ type GetSubscriptionResponse = {
|
||||||
system_hard_limit_usd: number;
|
system_hard_limit_usd: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
type GetUsageResponse = {
|
|
||||||
total_usage: number;
|
|
||||||
};
|
|
||||||
|
|
||||||
type OpenAIError = {
|
type OpenAIError = {
|
||||||
error: { type: string; code: string; param: unknown; message: string };
|
error: { type: string; code: string; param: unknown; message: string };
|
||||||
};
|
};
|
||||||
|
@ -54,7 +60,8 @@ export class OpenAIKeyChecker {
|
||||||
/**
|
/**
|
||||||
* Schedules the next check. If there are still keys yet to be checked, it
|
* Schedules the next check. If there are still keys yet to be checked, it
|
||||||
* will schedule a check immediately for the next unchecked key. Otherwise,
|
* will schedule a check immediately for the next unchecked key. Otherwise,
|
||||||
* it will schedule a check in several minutes for the oldest key.
|
* it will schedule a check for the least recently checked key, respecting
|
||||||
|
* the minimum check interval.
|
||||||
**/
|
**/
|
||||||
private scheduleNextCheck() {
|
private scheduleNextCheck() {
|
||||||
const enabledKeys = this.keys.filter((key) => !key.isDisabled);
|
const enabledKeys = this.keys.filter((key) => !key.isDisabled);
|
||||||
|
@ -94,8 +101,8 @@ export class OpenAIKeyChecker {
|
||||||
key.lastChecked < oldest.lastChecked ? key : oldest
|
key.lastChecked < oldest.lastChecked ? key : oldest
|
||||||
);
|
);
|
||||||
|
|
||||||
// Don't check any individual key more than once every 5 minutes.
|
// Don't check any individual key too often.
|
||||||
// Also, don't check anything more often than once every 3 seconds.
|
// Don't check anything at all at a rate faster than once per 3 seconds.
|
||||||
const nextCheck = Math.max(
|
const nextCheck = Math.max(
|
||||||
oldestKey.lastChecked + KEY_CHECK_PERIOD,
|
oldestKey.lastChecked + KEY_CHECK_PERIOD,
|
||||||
this.lastCheck + MIN_CHECK_INTERVAL
|
this.lastCheck + MIN_CHECK_INTERVAL
|
||||||
|
@ -122,47 +129,37 @@ export class OpenAIKeyChecker {
|
||||||
this.log.debug({ key: key.hash }, "Checking key...");
|
this.log.debug({ key: key.hash }, "Checking key...");
|
||||||
let isInitialCheck = !key.lastChecked;
|
let isInitialCheck = !key.lastChecked;
|
||||||
try {
|
try {
|
||||||
// During the initial check we need to get the subscription first because
|
// We only need to check for provisioned models on the initial check.
|
||||||
// trials have different behavior.
|
|
||||||
if (isInitialCheck) {
|
if (isInitialCheck) {
|
||||||
const subscription = await this.getSubscription(key);
|
const [subscription, provisionedModels, _livenessTest] =
|
||||||
this.updateKey(key.hash, { isTrial: !subscription.has_payment_method });
|
await Promise.all([
|
||||||
if (key.isTrial) {
|
this.getSubscription(key),
|
||||||
this.log.debug(
|
this.getProvisionedModels(key),
|
||||||
{ key: key.hash },
|
this.testLiveness(key),
|
||||||
"Attempting generation on trial key."
|
]);
|
||||||
);
|
|
||||||
await this.assertCanGenerate(key);
|
|
||||||
}
|
|
||||||
const [provisionedModels, usage] = await Promise.all([
|
|
||||||
this.getProvisionedModels(key),
|
|
||||||
this.getUsage(key),
|
|
||||||
]);
|
|
||||||
const updates = {
|
const updates = {
|
||||||
isGpt4: provisionedModels.gpt4,
|
isGpt4: provisionedModels.gpt4,
|
||||||
|
isTrial: !subscription.has_payment_method,
|
||||||
softLimit: subscription.soft_limit_usd,
|
softLimit: subscription.soft_limit_usd,
|
||||||
hardLimit: subscription.hard_limit_usd,
|
hardLimit: subscription.hard_limit_usd,
|
||||||
systemHardLimit: subscription.system_hard_limit_usd,
|
systemHardLimit: subscription.system_hard_limit_usd,
|
||||||
usage,
|
|
||||||
};
|
};
|
||||||
this.updateKey(key.hash, updates);
|
this.updateKey(key.hash, updates);
|
||||||
} else {
|
} else {
|
||||||
// Don't check provisioned models after the initial check because it's
|
// Provisioned models don't change, so we don't need to check them again
|
||||||
// not likely to change.
|
const [subscription, _livenessTest] = await Promise.all([
|
||||||
const [subscription, usage] = await Promise.all([
|
|
||||||
this.getSubscription(key),
|
this.getSubscription(key),
|
||||||
this.getUsage(key),
|
this.testLiveness(key),
|
||||||
]);
|
]);
|
||||||
const updates = {
|
const updates = {
|
||||||
softLimit: subscription.soft_limit_usd,
|
softLimit: subscription.soft_limit_usd,
|
||||||
hardLimit: subscription.hard_limit_usd,
|
hardLimit: subscription.hard_limit_usd,
|
||||||
systemHardLimit: subscription.system_hard_limit_usd,
|
systemHardLimit: subscription.system_hard_limit_usd,
|
||||||
usage,
|
|
||||||
};
|
};
|
||||||
this.updateKey(key.hash, updates);
|
this.updateKey(key.hash, updates);
|
||||||
}
|
}
|
||||||
this.log.info(
|
this.log.info(
|
||||||
{ key: key.hash, usage: key.usage, hardLimit: key.hardLimit },
|
{ key: key.hash, hardLimit: key.hardLimit },
|
||||||
"Key check complete."
|
"Key check complete."
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -182,10 +179,21 @@ export class OpenAIKeyChecker {
|
||||||
private async getProvisionedModels(
|
private async getProvisionedModels(
|
||||||
key: OpenAIKey
|
key: OpenAIKey
|
||||||
): Promise<{ turbo: boolean; gpt4: boolean }> {
|
): Promise<{ turbo: boolean; gpt4: boolean }> {
|
||||||
const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
|
const opts = { headers: { Authorization: `Bearer ${key.key}` } };
|
||||||
const models = (await openai.listModels()!).data.data;
|
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
|
||||||
|
const models = data.data;
|
||||||
const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
|
const turbo = models.some(({ id }) => id.startsWith("gpt-3.5"));
|
||||||
const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
|
const gpt4 = models.some(({ id }) => id.startsWith("gpt-4"));
|
||||||
|
// We want to update the key's `isGpt4` flag here, but we don't want to
|
||||||
|
// update its `lastChecked` timestamp because we need to let the liveness
|
||||||
|
// check run before we can consider the key checked.
|
||||||
|
|
||||||
|
// Need to use `find` here because keys are cloned from the pool.
|
||||||
|
const keyFromPool = this.keys.find((k) => k.hash === key.hash)!;
|
||||||
|
this.updateKey(key.hash, {
|
||||||
|
isGpt4: gpt4,
|
||||||
|
lastChecked: keyFromPool.lastChecked,
|
||||||
|
});
|
||||||
return { turbo, gpt4 };
|
return { turbo, gpt4 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -197,86 +205,124 @@ export class OpenAIKeyChecker {
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async getUsage(key: OpenAIKey) {
|
|
||||||
const querystring = OpenAIKeyChecker.getUsageQuerystring(key.isTrial);
|
|
||||||
const url = `${GET_USAGE_URL}?${querystring}`;
|
|
||||||
const { data } = await axios.get<GetUsageResponse>(url, {
|
|
||||||
headers: { Authorization: `Bearer ${key.key}` },
|
|
||||||
});
|
|
||||||
return parseFloat((data.total_usage / 100).toFixed(2));
|
|
||||||
}
|
|
||||||
|
|
||||||
private handleAxiosError(key: OpenAIKey, error: AxiosError) {
|
private handleAxiosError(key: OpenAIKey, error: AxiosError) {
|
||||||
if (error.response && OpenAIKeyChecker.errorIsOpenAiError(error)) {
|
if (error.response && OpenAIKeyChecker.errorIsOpenAIError(error)) {
|
||||||
const { status, data } = error.response;
|
const { status, data } = error.response;
|
||||||
if (status === 401) {
|
if (status === 401) {
|
||||||
this.log.warn(
|
this.log.warn(
|
||||||
{ key: key.hash, error: data },
|
{ key: key.hash, error: data },
|
||||||
"Key is invalid or revoked. Disabling key."
|
"Key is invalid or revoked. Disabling key."
|
||||||
);
|
);
|
||||||
this.updateKey(key.hash, { isDisabled: true });
|
this.updateKey(key.hash, {
|
||||||
} else if (status === 429 && data.error.type === "insufficient_quota") {
|
isDisabled: true,
|
||||||
this.log.warn(
|
isRevoked: true,
|
||||||
{ key: key.hash, isTrial: key.isTrial, error: data },
|
isGpt4: false,
|
||||||
"Key is out of quota. Disabling key."
|
});
|
||||||
);
|
} else if (status === 429) {
|
||||||
this.updateKey(key.hash, { isDisabled: true });
|
switch (data.error.type) {
|
||||||
}
|
case "insufficient_quota":
|
||||||
else if (status === 429 && data.error.type === "access_terminated") {
|
case "access_terminated":
|
||||||
this.log.warn(
|
case "billing_not_active":
|
||||||
{ key: key.hash, isTrial: key.isTrial, error: data },
|
const isOverQuota = data.error.type === "insufficient_quota";
|
||||||
"Key has been terminated due to policy violations. Disabling key."
|
const isRevoked = !isOverQuota;
|
||||||
);
|
const isGpt4 = isRevoked ? false : key.isGpt4;
|
||||||
this.updateKey(key.hash, { isDisabled: true });
|
this.log.warn(
|
||||||
|
{ key: key.hash, rateLimitType: data.error.type, error: data },
|
||||||
|
"Key returned a non-transient 429 error. Disabling key."
|
||||||
|
);
|
||||||
|
this.updateKey(key.hash, {
|
||||||
|
isDisabled: true,
|
||||||
|
isRevoked,
|
||||||
|
isOverQuota,
|
||||||
|
isGpt4,
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
case "requests":
|
||||||
|
// Trial keys have extremely low requests-per-minute limits and we
|
||||||
|
// can often hit them just while checking the key, so we need to
|
||||||
|
// retry the check later to know if the key has quota remaining.
|
||||||
|
this.log.warn(
|
||||||
|
{ key: key.hash, error: data },
|
||||||
|
"Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds."
|
||||||
|
);
|
||||||
|
// To trigger a shorter than usual delay before the next check, we
|
||||||
|
// will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)).
|
||||||
|
// This will cause the usual key check scheduling logic to schedule
|
||||||
|
// the next check in 15 seconds. This also prevents the key from
|
||||||
|
// holding up startup checks for other keys.
|
||||||
|
const fifteenSeconds = 15 * 1000;
|
||||||
|
const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds);
|
||||||
|
this.updateKey(key.hash, { lastChecked: next });
|
||||||
|
break;
|
||||||
|
case "tokens":
|
||||||
|
// Hitting a token rate limit, even on a trial key, actually implies
|
||||||
|
// that the key is valid and can generate completions, so we will
|
||||||
|
// treat this as effectively a successful `testLiveness` call.
|
||||||
|
this.log.info(
|
||||||
|
{ key: key.hash },
|
||||||
|
"Key is currently `tokens` rate limited; assuming it is operational."
|
||||||
|
);
|
||||||
|
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
this.log.error(
|
||||||
|
{ key: key.hash, rateLimitType: data.error.type, error: data },
|
||||||
|
"Encountered unexpected rate limit error class while checking key. This may indicate a change in the API; please report this."
|
||||||
|
);
|
||||||
|
// We don't know what this error means, so we just let the key
|
||||||
|
// through and maybe it will fail when someone tries to use it.
|
||||||
|
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
this.log.error(
|
this.log.error(
|
||||||
{ key: key.hash, status, error: data },
|
{ key: key.hash, status, error: data },
|
||||||
"Encountered API error while checking key."
|
"Encountered unexpected error status while checking key. This may indicate a change in the API; please report this."
|
||||||
);
|
);
|
||||||
|
this.updateKey(key.hash, { lastChecked: Date.now() });
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this.log.error(
|
this.log.error(
|
||||||
{ key: key.hash, error },
|
{ key: key.hash, error: error.message },
|
||||||
"Network error while checking key; trying again later."
|
"Network error while checking key; trying this key again in a minute."
|
||||||
);
|
);
|
||||||
|
const oneMinute = 60 * 1000;
|
||||||
|
const next = Date.now() - (KEY_CHECK_PERIOD - oneMinute);
|
||||||
|
this.updateKey(key.hash, { lastChecked: next });
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Trial key usage reporting is inaccurate, so we need to run an actual
|
* Tests whether the key is valid and has quota remaining. The request we send
|
||||||
* completion to test them for liveness.
|
* is actually not valid, but keys which are revoked or out of quota will fail
|
||||||
|
* with a 401 or 429 error instead of the expected 400 Bad Request error.
|
||||||
|
* This lets us avoid test keys without spending any quota.
|
||||||
*/
|
*/
|
||||||
private async assertCanGenerate(key: OpenAIKey): Promise<void> {
|
private async testLiveness(key: OpenAIKey): Promise<void> {
|
||||||
const openai = new OpenAIApi(new Configuration({ apiKey: key.key }));
|
const payload = {
|
||||||
// This will throw an AxiosError if the key is invalid or out of quota.
|
|
||||||
await openai.createChatCompletion({
|
|
||||||
model: "gpt-3.5-turbo",
|
model: "gpt-3.5-turbo",
|
||||||
messages: [{ role: "user", content: "Hello" }],
|
max_tokens: -1,
|
||||||
max_tokens: 1,
|
messages: [{ role: "user", content: "" }],
|
||||||
});
|
};
|
||||||
|
const { data } = await axios.post<OpenAIError>(
|
||||||
|
POST_CHAT_COMPLETIONS_URL,
|
||||||
|
payload,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `Bearer ${key.key}` },
|
||||||
|
validateStatus: (status) => status === 400,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
if (data.error.type === "invalid_request_error") {
|
||||||
|
// This is the expected error type for our bad prompt, so key is valid.
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
this.log.warn(
|
||||||
|
{ key: key.hash, error: data },
|
||||||
|
"Unexpected 400 error class while checking key; assuming key is valid, but this may indicate a change in the API."
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static getUsageQuerystring(isTrial: boolean) {
|
static errorIsOpenAIError(
|
||||||
// For paid keys, the limit resets every month, so we can use the first day
|
|
||||||
// of the current month.
|
|
||||||
// For trial keys, the limit does not reset and we don't know when the key
|
|
||||||
// was created, so we use 99 days ago because that's as far back as the API
|
|
||||||
// will let us go.
|
|
||||||
|
|
||||||
// End date needs to be set to the beginning of the next day so that we get
|
|
||||||
// usage for the current day.
|
|
||||||
|
|
||||||
const today = new Date();
|
|
||||||
const startDate = isTrial
|
|
||||||
? new Date(today.getTime() - 99 * 24 * 60 * 60 * 1000)
|
|
||||||
: new Date(today.getFullYear(), today.getMonth(), 1);
|
|
||||||
const endDate = new Date(today.getTime() + 24 * 60 * 60 * 1000);
|
|
||||||
return `start_date=${startDate.toISOString().split("T")[0]}&end_date=${
|
|
||||||
endDate.toISOString().split("T")[0]
|
|
||||||
}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
static errorIsOpenAiError(
|
|
||||||
error: AxiosError
|
error: AxiosError
|
||||||
): error is AxiosError<OpenAIError> {
|
): error is AxiosError<OpenAIError> {
|
||||||
const data = error.response?.data as any;
|
const data = error.response?.data as any;
|
||||||
|
|
|
@ -18,8 +18,10 @@ export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [
|
||||||
|
|
||||||
export interface OpenAIKey extends Key {
|
export interface OpenAIKey extends Key {
|
||||||
readonly service: "openai";
|
readonly service: "openai";
|
||||||
/** The current usage of this key. */
|
/** Set when key check returns a 401. */
|
||||||
usage: number;
|
isRevoked: boolean;
|
||||||
|
/** Set when key check returns a non-transient 429. */
|
||||||
|
isOverQuota: boolean;
|
||||||
/** Threshold at which a warning email will be sent by OpenAI. */
|
/** Threshold at which a warning email will be sent by OpenAI. */
|
||||||
softLimit: number;
|
softLimit: number;
|
||||||
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */
|
/** Threshold at which the key will be disabled because it has reached the user-defined limit. */
|
||||||
|
@ -54,7 +56,7 @@ export interface OpenAIKey extends Key {
|
||||||
|
|
||||||
export type OpenAIKeyUpdate = Omit<
|
export type OpenAIKeyUpdate = Omit<
|
||||||
Partial<OpenAIKey>,
|
Partial<OpenAIKey>,
|
||||||
"key" | "hash" | "lastUsed" | "lastChecked" | "promptCount"
|
"key" | "hash" | "promptCount"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
|
@ -80,6 +82,8 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
isGpt4: true,
|
isGpt4: true,
|
||||||
isTrial: false,
|
isTrial: false,
|
||||||
isDisabled: false,
|
isDisabled: false,
|
||||||
|
isRevoked: false,
|
||||||
|
isOverQuota: false,
|
||||||
softLimit: 0,
|
softLimit: 0,
|
||||||
hardLimit: 0,
|
hardLimit: 0,
|
||||||
systemHardLimit: 0,
|
systemHardLimit: 0,
|
||||||
|
@ -183,7 +187,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
/** Called by the key checker to update key information. */
|
/** Called by the key checker to update key information. */
|
||||||
public update(keyHash: string, update: OpenAIKeyUpdate) {
|
public update(keyHash: string, update: OpenAIKeyUpdate) {
|
||||||
const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
|
const keyFromPool = this.keys.find((k) => k.hash === keyHash)!;
|
||||||
Object.assign(keyFromPool, { ...update, lastChecked: Date.now() });
|
Object.assign(keyFromPool, { lastChecked: Date.now(), ...update });
|
||||||
// this.writeKeyStatus();
|
// this.writeKeyStatus();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,9 +196,6 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
const keyFromPool = this.keys.find((k) => k.key === key.key);
|
const keyFromPool = this.keys.find((k) => k.key === key.key);
|
||||||
if (!keyFromPool || keyFromPool.isDisabled) return;
|
if (!keyFromPool || keyFromPool.isDisabled) return;
|
||||||
keyFromPool.isDisabled = true;
|
keyFromPool.isDisabled = true;
|
||||||
// If it's disabled just set the usage to the hard limit so it doesn't
|
|
||||||
// mess with the aggregate usage.
|
|
||||||
keyFromPool.usage = keyFromPool.hardLimit;
|
|
||||||
this.log.warn({ key: key.hash }, "Key disabled");
|
this.log.warn({ key: key.hash }, "Key disabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -302,31 +303,15 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the remaining aggregate quota for all keys as a percentage.
|
* Returns the total quota limit of all keys in USD. Keys which are disabled
|
||||||
* Can go slightly negative because keys will typically go slightly over their
|
* are not included in the total.
|
||||||
* limit before being disabled. Can sometimes go *really* negative if the
|
*/
|
||||||
* cron job OpenAI uses to disable keys fails, as the key will essentially
|
public activeLimitInUsd(
|
||||||
* have unlimited quota.
|
{ gpt4 }: { gpt4: boolean } = { gpt4: false }
|
||||||
**/
|
): string {
|
||||||
public remainingQuota({ gpt4 }: { gpt4: boolean } = { gpt4: false }): number {
|
const keys = this.keys.filter((k) => !k.isDisabled && k.isGpt4 === gpt4);
|
||||||
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
|
|
||||||
if (keys.length === 0) return 0;
|
|
||||||
|
|
||||||
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
|
|
||||||
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
|
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
|
||||||
|
return `$${totalLimit.toFixed(2)}`;
|
||||||
return 1 - totalUsage / totalLimit;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns used and available usage in USD. */
|
|
||||||
public usageInUsd({ gpt4 }: { gpt4: boolean } = { gpt4: false }): string {
|
|
||||||
const keys = this.keys.filter((k) => k.isGpt4 === gpt4);
|
|
||||||
if (keys.length === 0) return "???";
|
|
||||||
|
|
||||||
const totalUsage = keys.reduce((acc, key) => acc + key.usage, 0);
|
|
||||||
const totalLimit = keys.reduce((acc, { hardLimit }) => acc + hardLimit, 0);
|
|
||||||
|
|
||||||
return `$${totalUsage.toFixed(2)} / $${totalLimit.toFixed(2)}`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Writes key status to disk. */
|
/** Writes key status to disk. */
|
||||||
|
|
|
@ -269,7 +269,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
}
|
}
|
||||||
} else if (statusCode === 401) {
|
} else if (statusCode === 401) {
|
||||||
// Key is invalid or was revoked
|
// Key is invalid or was revoked
|
||||||
keyPool.disable(req.key!);
|
keyPool.disable(req.key!, "revoked");
|
||||||
errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
|
errorPayload.proxy_note = `API key is invalid or revoked. ${tryAgainMessage}`;
|
||||||
} else if (statusCode === 429) {
|
} else if (statusCode === 429) {
|
||||||
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
||||||
|
@ -375,15 +375,15 @@ function handleOpenAIRateLimitError(
|
||||||
const type = errorPayload.error?.type;
|
const type = errorPayload.error?.type;
|
||||||
if (type === "insufficient_quota") {
|
if (type === "insufficient_quota") {
|
||||||
// Billing quota exceeded (key is dead, disable it)
|
// Billing quota exceeded (key is dead, disable it)
|
||||||
keyPool.disable(req.key!);
|
keyPool.disable(req.key!, "quota");
|
||||||
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
|
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
|
||||||
} else if (type === "access_terminated") {
|
} else if (type === "access_terminated") {
|
||||||
// Account banned (key is dead, disable it)
|
// Account banned (key is dead, disable it)
|
||||||
keyPool.disable(req.key!);
|
keyPool.disable(req.key!, "revoked");
|
||||||
errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
|
errorPayload.proxy_note = `Assigned key has been banned by OpenAI for policy violations. ${tryAgainMessage}`;
|
||||||
} else if (type === "billing_not_active") {
|
} else if (type === "billing_not_active") {
|
||||||
// Billing is not active (key is dead, disable it)
|
// Billing is not active (key is dead, disable it)
|
||||||
keyPool.disable(req.key!);
|
keyPool.disable(req.key!, "revoked");
|
||||||
errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
|
errorPayload.proxy_note = `Assigned key was deactivated by OpenAI. ${tryAgainMessage}`;
|
||||||
} else if (type === "requests" || type === "tokens") {
|
} else if (type === "requests" || type === "tokens") {
|
||||||
// Per-minute request or token rate limit is exceeded, which we can retry
|
// Per-minute request or token rate limit is exceeded, which we can retry
|
||||||
|
|
Loading…
Reference in New Issue