diff --git a/src/config.ts b/src/config.ts index 9d168b0..76a7d8f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -18,6 +18,8 @@ type Config = { openaiKey?: string; /** Comma-delimited list of Anthropic API keys. */ anthropicKey?: string; + scaleKey?: string; + scaleMinDeployments: number; /** * The proxy key to require for requests. Only applicable if the user * management mode is set to 'proxy_key', and required if so. @@ -26,7 +28,7 @@ type Config = { /** * The admin key used to access the /admin API. Required if the user * management mode is set to 'user_token'. - **/ + */ adminKey?: string; /** * Which user management mode to use. @@ -49,7 +51,7 @@ type Config = { * * `firebase_rtdb`: Users are stored in a Firebase Realtime Database; requires * `firebaseKey` and `firebaseRtdbUrl` to be set. - **/ + */ gatekeeperStore: "memory" | "firebase_rtdb"; /** URL of the Firebase Realtime Database if using the Firebase RTDB store. */ firebaseRtdbUrl?: string; @@ -127,6 +129,8 @@ export const config: Config = { port: getEnvWithDefault("PORT", 7860), openaiKey: getEnvWithDefault("OPENAI_KEY", ""), anthropicKey: getEnvWithDefault("ANTHROPIC_KEY", ""), + scaleKey: getEnvWithDefault("SCALE_KEY", ""), + scaleMinDeployments: getEnvWithDefault("SCALE_MIN_DEPLOYMENTS", 0), proxyKey: getEnvWithDefault("PROXY_KEY", ""), adminKey: getEnvWithDefault("ADMIN_KEY", ""), gatekeeper: getEnvWithDefault("GATEKEEPER", "none"), @@ -262,6 +266,7 @@ export const OMITTED_KEYS: (keyof Config)[] = [ "logLevel", "openaiKey", "anthropicKey", + "scaleKey", "proxyKey", "adminKey", "checkKeys", diff --git a/src/key-management/index.ts b/src/key-management/index.ts index 13c73a5..0686461 100644 --- a/src/key-management/index.ts +++ b/src/key-management/index.ts @@ -5,7 +5,7 @@ import { } from "./anthropic/provider"; import { KeyPool } from "./key-pool"; -export type AIService = "openai" | "anthropic"; +export type AIService = "openai" | "anthropic" | "scale"; export type Model = OpenAIModel | AnthropicModel; export interface Key { diff --git a/src/key-management/scale/provider.ts b/src/key-management/scale/provider.ts new file mode 100644 index 0000000..08f0f06 --- /dev/null +++ b/src/key-management/scale/provider.ts @@ -0,0 +1,155 @@ +import crypto from "crypto"; +import { Key, KeyProvider } from ".."; +import { config } from "../../config"; +import { logger } from "../../logger"; + +export interface ScaleDeployment extends Key { + readonly service: "scale"; + deploymentUrl: string; + createdAt: number; +} + +/* +Scale is a bit different from the other providers. It doesn't have set API keys; +instead there are "deployments", which are created in the Scale dashboard and +are accessible via a URL and API key together. + +The operator can provide these accounts via the SCALE_KEY environment variable, +but more likely they will want the proxy to just automatically create new +accounts and deployments as older ones reach their usage limits. +*/ + +export class ScaleKeyProvider implements KeyProvider { + readonly service = "scale"; + + private deployments: ScaleDeployment[] = []; + private log = logger.child({ module: "key-provider", service: this.service }); + private churnerEnabled = false; + + constructor() { + const keyConfig = config.scaleKey?.trim(); + if (!keyConfig) return; + let initialKeys: string[]; + initialKeys = [...new Set(keyConfig.split(",").map((k) => k.trim()))]; + for (const keyStr of initialKeys) { + const [key, deploymentUrl] = keyStr.split("$"); + const newDeployment: ScaleDeployment = { + key, + deploymentUrl, + service: this.service, + isGpt4: false, + isTrial: false, + isDisabled: false, + promptCount: 0, + lastUsed: 0, + createdAt: Date.now(), + hash: `sca-${crypto + .createHash("sha256") + .update(keyStr) + .digest("hex") + .slice(0, 8)}`, + lastChecked: 0, + }; + this.deployments.push(newDeployment); + } + this.log.info( + { keyCount: this.deployments.length }, + "Loaded initial Scale deployments" + ); + } + + public init() { + // TODO: Start account churner + this.churnerEnabled = true; + } + + public list() { + return this.deployments.map((k) => Object.freeze({ ...k, key: undefined })); + } + + public get(_model: unknown) { + // Scale doesn't support changing models on the fly + const availableDeployments = this.deployments.filter((a) => !a.isDisabled); + const canCreateNewAccounts = config.scaleMinDeployments > 0; + if (availableDeployments.length === 0) { + if (canCreateNewAccounts) { + this.log.warn( + "Ran out of Scale deployments and the churner is not creating new ones fast enough." + ); + throw new Error( + "No Scale deployments available. Try again in a few minutes when the churner has created new deployments." + ); + } else { + throw new Error( + "No Scale deployments available and account churner is disabled (possible IP ban or signup rate limit)." + ); + } + } + + // Unlike other providers, Scale doesn't want to rotate keys. Instead, we + // want to use the same key for as long as possible while building up a + // reserve of new accounts. Once an account dies there should be a fresh + // one ready to go. + + const now = Date.now(); + + const deploymentsByPriority = availableDeployments.sort((a, b) => { + return a.createdAt - b.createdAt; + }); + + const selectedKey = deploymentsByPriority[0]; + selectedKey.lastUsed = now; + return { ...selectedKey }; + } + + public disable(deployment: ScaleDeployment) { + const deploymentFromPool = this.deployments.find( + (d) => d.hash === deployment.hash + ); + if (!deploymentFromPool || deploymentFromPool.isDisabled) return; + deploymentFromPool.isDisabled = true; + this.log.warn({ key: deployment.hash }, "Scale deployment disabled"); + } + + public update(hash: string, update: Partial) { + const deploymentFromPool = this.deployments.find((d) => d.hash === hash)!; + Object.assign(deploymentFromPool, update); + } + + public available() { + return this.deployments.filter((k) => !k.isDisabled).length; + } + + // Normally this would return the number of unchecked keys but we will + // repurpose it to return the number of pending accounts the churner is + // creating. + public anyUnchecked() { + return config.scaleMinDeployments - this.available() > 0; + } + + public incrementPrompt(hash?: string) { + const deployment = this.deployments.find((d) => d.hash === hash); + if (!deployment) return; + deployment.promptCount++; + } + + public getLockoutPeriod(_model: unknown) { + // TODO: Scale doesn't have rate limits but this may need to be repurposed + // to lock out the request queue if the account churner enabled but falling + // behind. + return 0; + } + + public markRateLimited(keyHash: string) { + // Do nothing + } + + /** Doesn't really mean anything for Scale */ + public remainingQuota() { + return 1; + } + + public usageInUsd() { + return "$0.00 / ∞"; + } +}