From e462ad585e1c2e892ef67bb3b0af08419747519f Mon Sep 17 00:00:00 2001 From: nai-degen Date: Wed, 30 Aug 2023 08:33:00 -0500 Subject: [PATCH] improves keychecker stability with rate-limited trial keys --- src/key-management/openai/checker.ts | 76 ++++++++++++++++++--------- src/key-management/openai/provider.ts | 15 +++--- 2 files changed, 60 insertions(+), 31 deletions(-) diff --git a/src/key-management/openai/checker.ts b/src/key-management/openai/checker.ts index b9a9d1c..18e9298 100644 --- a/src/key-management/openai/checker.ts +++ b/src/key-management/openai/checker.ts @@ -139,19 +139,15 @@ export class OpenAIKeyChecker { const updates = { modelFamilies: provisionedModels, isTrial: livenessTest.rateLimit <= 250, - softLimit: 0, - hardLimit: 0, - systemHardLimit: 0, }; this.updateKey(key.hash, updates); } else { - // Provisioned models don't change, so we don't need to check them again + // No updates needed as models and trial status generally don't change. const [_livenessTest] = await Promise.all([this.testLiveness(key)]); - const updates = { softLimit: 0, hardLimit: 0, systemHardLimit: 0 }; - this.updateKey(key.hash, updates); + this.updateKey(key.hash, {}); } this.log.info( - { key: key.hash, models: key.modelFamilies }, + { key: key.hash, models: key.modelFamilies, trial: key.isTrial }, "Key check complete." ); } catch (error) { @@ -164,6 +160,10 @@ export class OpenAIKeyChecker { // Only enqueue the next check if this wasn't a startup check, since those // are batched together elsewhere. if (!isInitialCheck) { + this.log.info( + { key: key.hash }, + "Recurring keychecks are disabled, no-op." + ); // this.scheduleNextCheck(); } } @@ -192,7 +192,6 @@ export class OpenAIKeyChecker { // update its `lastChecked` timestamp because we need to let the liveness // check run before we can consider the key checked. - // Need to use `find` here because keys are cloned from the pool. const keyFromPool = this.keys.find((k) => k.hash === key.hash)!; this.updateKey(key.hash, { modelFamilies: families, @@ -259,21 +258,32 @@ export class OpenAIKeyChecker { }); break; case "requests": - // Trial keys have extremely low requests-per-minute limits and we - // can often hit them just while checking the key, so we need to - // retry the check later to know if the key has quota remaining. - this.log.warn( - { key: key.hash, error: data }, - "Key is currently rate limited, so its liveness cannot be checked. Retrying in fifteen seconds." - ); - // To trigger a shorter than usual delay before the next check, we - // will set its `lastChecked` to (NOW - (KEY_CHECK_PERIOD - 15s)). - // This will cause the usual key check scheduling logic to schedule - // the next check in 15 seconds. This also prevents the key from - // holding up startup checks for other keys. - const fifteenSeconds = 15 * 1000; - const next = Date.now() - (KEY_CHECK_PERIOD - fifteenSeconds); - this.updateKey(key.hash, { lastChecked: next }); + // If we hit the text completion rate limit on a trial key, it is + // likely being used by many proxies. We will disable the key since + // it's just going to be constantly rate limited. + const isTrial = + Number(error.response.headers["x-ratelimit-limit-requests"]) <= + 250; + + if (isTrial) { + this.log.warn( + { key: key.hash, error: data }, + "Trial key is rate limited on text completion endpoint. This indicates the key is being used by several proxies at once and is not likely to be usable. Disabling key." + ); + this.updateKey(key.hash, { + isTrial, + isDisabled: true, + isOverQuota: true, + modelFamilies: ["turbo"], + lastChecked: Date.now(), + }); + } else { + this.log.warn( + { key: key.hash, error: data }, + "Non-trial key is rate limited on text completion endpoint. This is unusual and may indicate a bug. Assuming key is operational." + ); + this.updateKey(key.hash, { lastChecked: Date.now() }); + } break; case "tokens": // Hitting a token rate limit, even on a trial key, actually implies @@ -321,8 +331,26 @@ export class OpenAIKeyChecker { * We use the rate limit header to determine whether it's a trial key. */ private async testLiveness(key: OpenAIKey): Promise<{ rateLimit: number }> { + // What the hell this is doing: + + // OpenAI enforces separate rate limits for chat and text completions. Trial + // keys have extremely low rate limits of 200 per day per API type. In order + // to avoid wasting more valuable chat quota, we send an (invalid) chat + // request to Babbage (a text completion model). Even though our request is + // to the chat endpoint, we get text rate limit headers back because the + // requested model determines the rate limit used, not the endpoint. + + // Once we have headers, we can determine: + // 1. Is the key revoked? (401, OAI doesn't even validate the request) + // 2. Is the key out of quota? (400, OAI will still validate the request) + // 3. Is the key a trial key? (400, x-ratelimit-limit-requests: 200) + + // This might still cause issues if too many proxies are running a train on + // the same trial key and even the text completion quota is exhausted, but + // it should work better than the alternative. + const payload = { - model: "gpt-3.5-turbo", + model: "babbage-002", max_tokens: -1, messages: [{ role: "user", content: "" }], }; diff --git a/src/key-management/openai/provider.ts b/src/key-management/openai/provider.ts index 9fb4030..d28b503 100644 --- a/src/key-management/openai/provider.ts +++ b/src/key-management/openai/provider.ts @@ -30,12 +30,6 @@ export interface OpenAIKey extends Key { isRevoked: boolean; /** Set when key check returns a non-transient 429. */ isOverQuota: boolean; - /** Threshold at which a warning email will be sent by OpenAI. */ - softLimit: number; - /** Threshold at which the key will be disabled because it has reached the user-defined limit. */ - hardLimit: number; - /** The maximum quota allocated to this key by OpenAI. */ - systemHardLimit: number; /** The time at which this key was last rate limited. */ rateLimitedAt: number; /** @@ -359,7 +353,14 @@ export class OpenAIKeyProvider implements KeyProvider { } public recheck() { - this.keys.forEach((key) => (key.lastChecked = 0)); + this.keys.forEach((key) => { + this.update(key.hash, { + isRevoked: false, + isOverQuota: false, + isDisabled: false, + lastChecked: 0, + }); + }); this.checker?.scheduleNextCheck(); }