From 120040c0285c116aec64c225b3f40242827efb9b Mon Sep 17 00:00:00 2001 From: khanon Date: Thu, 8 Jun 2023 00:58:57 +0000 Subject: [PATCH] Separate `MAX_OUTPUT_TOKENS` config for OpenAI/Claude (khanon/oai-reverse-proxy!21) --- .env.example | 3 +- .gitignore | 2 +- docs/deploy-huggingface.md | 17 +++++--- src/config.ts | 43 +++++++++++++++++-- .../middleware/request/limit-output-tokens.ts | 30 ++++++------- 5 files changed, 69 insertions(+), 26 deletions(-) diff --git a/.env.example b/.env.example index 5f78bef..a3f746a 100644 --- a/.env.example +++ b/.env.example @@ -4,7 +4,8 @@ # PORT=7860 # SERVER_TITLE=Coom Tunnel # MODEL_RATE_LIMIT=4 -# MAX_OUTPUT_TOKENS=300 +# MAX_OUTPUT_TOKENS_OPENAI=300 +# MAX_OUTPUT_TOKENS_ANTHROPIC=900 # LOG_LEVEL=info # REJECT_DISALLOWED=false # REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy." diff --git a/.gitignore b/.gitignore index 675f293..9f34fcb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ .env +.venv .vscode build greeting.md node_modules -venv diff --git a/docs/deploy-huggingface.md b/docs/deploy-huggingface.md index e9766a9..9060399 100644 --- a/docs/deploy-huggingface.md +++ b/docs/deploy-huggingface.md @@ -4,6 +4,7 @@ This repository can be deployed to a [Huggingface Space](https://huggingface.co/ ### 1. Get an API key - Go to [OpenAI](https://openai.com/) and sign up for an account. You can use a free trial key for this as long as you provide SMS verification. + - Claude is not publicly available yet, but if you have access to it via the [Anthropic](https://www.anthropic.com/) closed beta, you can also use that key with the proxy. ### 2. Create an empty Huggingface Space - Go to [Huggingface](https://huggingface.co/) and sign up for an account. @@ -35,13 +36,15 @@ CMD [ "npm", "start" ] ![Commit](huggingface-savedockerfile.png) -### 4. Set your OpenAI API key as a secret +### 4. Set your API key as a secret - Click the Settings button in the top right corner of your repository. - Scroll down to the `Repository Secrets` section and click `New Secret`. ![Secrets](https://files.catbox.moe/irrp2p.png) - Enter `OPENAI_KEY` as the name and your OpenAI API key as the value. + - For Claude, set `ANTHROPIC_KEY` instead. + - You can use both types of keys at the same time if you want. ![New Secret](https://files.catbox.moe/ka6s1a.png) @@ -49,8 +52,8 @@ CMD [ "npm", "start" ] - Your server should automatically deploy when you add the secret, but if not you can select `Factory Reboot` from that same Settings menu. ### 6. Share the link -- The Service Info section below should show the URL for your server. You can share this with anyone to safely give them access to your OpenAI API key. -- Your friend doesn't need any OpenAI API key of their own, they just need your link. +- The Service Info section below should show the URL for your server. You can share this with anyone to safely give them access to your API key. +- Your friend doesn't need any API key of their own, they just need your link. # Optional @@ -71,12 +74,16 @@ The server will be started with some default configuration, but you can override Here are some example settings: ```shell # Requests per minute per IP address -MODEL_RATE_LIMIT=2 +MODEL_RATE_LIMIT=4 # Max tokens to request from OpenAI -MAX_OUTPUT_TOKENS=256 +MAX_OUTPUT_TOKENS_OPENAI=256 +# Max tokens to request from Anthropic (Claude) +MAX_OUTPUT_TOKENS_ANTHROPIC=512 # Block prompts containing disallowed characters REJECT_DISALLOWED=false REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy." +# Show exact quota usage on the Server Info page +QUOTA_DISPLAY_MODE=full ``` See `.env.example` for a full list of available settings, or check `config.ts` for details on what each setting does. diff --git a/src/config.ts b/src/config.ts index 860aa75..9d168b0 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,7 +1,11 @@ import dotenv from "dotenv"; import type firebase from "firebase-admin"; +import pino from "pino"; dotenv.config(); +// Can't import the usual logger here because it itself needs the config. +const startupLogger = pino({ level: "debug" }).child({ module: "startup" }); + const isDev = process.env.NODE_ENV !== "production"; type PromptLoggingBackend = "google_sheets"; @@ -59,8 +63,10 @@ type Config = { maxIpsPerUser: number; /** Per-IP limit for requests per minute to OpenAI's completions endpoint. */ modelRateLimit: number; - /** Max number of tokens to generate. Requests which specify a higher value will be rewritten to use this value. */ - maxOutputTokens: number; + /** For OpenAI, the maximum number of sampled tokens a user can request. */ + maxOutputTokensOpenAI: number; + /** For Anthropic, the maximum number of sampled tokens a user can request. */ + maxOutputTokensAnthropic: number; /** Whether requests containing disallowed characters should be rejected. */ rejectDisallowed?: boolean; /** Message to return when rejecting requests. */ @@ -129,7 +135,11 @@ export const config: Config = { firebaseRtdbUrl: getEnvWithDefault("FIREBASE_RTDB_URL", undefined), firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined), modelRateLimit: getEnvWithDefault("MODEL_RATE_LIMIT", 4), - maxOutputTokens: getEnvWithDefault("MAX_OUTPUT_TOKENS", 300), + maxOutputTokensOpenAI: getEnvWithDefault("MAX_OUTPUT_TOKENS_OPENAI", 300), + maxOutputTokensAnthropic: getEnvWithDefault( + "MAX_OUTPUT_TOKENS_ANTHROPIC", + 600 + ), rejectDisallowed: getEnvWithDefault("REJECT_DISALLOWED", false), rejectMessage: getEnvWithDefault( "REJECT_MESSAGE", @@ -154,8 +164,35 @@ export const config: Config = { blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"), } as const; +function migrateConfigs() { + let migrated = false; + const deprecatedMax = process.env.MAX_OUTPUT_TOKENS; + + if (!process.env.MAX_OUTPUT_TOKENS_OPENAI && deprecatedMax) { + migrated = true; + config.maxOutputTokensOpenAI = parseInt(deprecatedMax); + } + if (!process.env.MAX_OUTPUT_TOKENS_ANTHROPIC && deprecatedMax) { + migrated = true; + config.maxOutputTokensAnthropic = parseInt(deprecatedMax); + } + + if (migrated) { + startupLogger.warn( + { + MAX_OUTPUT_TOKENS: deprecatedMax, + MAX_OUTPUT_TOKENS_OPENAI: config.maxOutputTokensOpenAI, + MAX_OUTPUT_TOKENS_ANTHROPIC: config.maxOutputTokensAnthropic, + }, + "`MAX_OUTPUT_TOKENS` has been replaced with separate `MAX_OUTPUT_TOKENS_OPENAI` and `MAX_OUTPUT_TOKENS_ANTHROPIC` configs. You should update your .env file to remove `MAX_OUTPUT_TOKENS` and set the new configs." + ); + } +} + /** Prevents the server from starting if config state is invalid. */ export async function assertConfigIsValid() { + migrateConfigs(); + // Ensure gatekeeper mode is valid. if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) { throw new Error( diff --git a/src/proxy/middleware/request/limit-output-tokens.ts b/src/proxy/middleware/request/limit-output-tokens.ts index acfbdc8..09e9475 100644 --- a/src/proxy/middleware/request/limit-output-tokens.ts +++ b/src/proxy/middleware/request/limit-output-tokens.ts @@ -3,34 +3,32 @@ import { config } from "../../../config"; import { isCompletionRequest } from "../common"; import { ProxyRequestMiddleware } from "."; -const MAX_TOKENS = config.maxOutputTokens; - /** Enforce a maximum number of tokens requested from the model. */ export const limitOutputTokens: ProxyRequestMiddleware = (_proxyReq, req) => { + // TODO: do all of this shit in the zod validator if (isCompletionRequest(req)) { - const requestedMaxTokens = Number.parseInt(getMaxTokensFromRequest(req)); - let maxTokens = requestedMaxTokens; + const requestedMax = Number.parseInt(getMaxTokensFromRequest(req)); + const apiMax = + req.outboundApi === "openai" + ? config.maxOutputTokensOpenAI + : config.maxOutputTokensAnthropic; + let maxTokens = requestedMax; - if (typeof requestedMaxTokens !== "number") { - req.log.warn( - { requestedMaxTokens, clampedMaxTokens: MAX_TOKENS }, - "Invalid max tokens value. Using default value." - ); - maxTokens = MAX_TOKENS; + if (typeof requestedMax !== "number") { + maxTokens = apiMax; } - // TODO: this is not going to scale well, need to implement a better way - // of translating request parameters from one API to another. - maxTokens = Math.min(maxTokens, MAX_TOKENS); + maxTokens = Math.min(maxTokens, apiMax); if (req.outboundApi === "openai") { req.body.max_tokens = maxTokens; } else if (req.outboundApi === "anthropic") { req.body.max_tokens_to_sample = maxTokens; } - if (requestedMaxTokens !== maxTokens) { - req.log.warn( - `Limiting max tokens from ${requestedMaxTokens} to ${maxTokens}` + if (requestedMax !== maxTokens) { + req.log.info( + { requestedMax, configMax: apiMax, final: maxTokens }, + "Limiting user's requested max output tokens" ); } }