From fc3043dad0d035374c2610f86a8ac8bf193dbaff Mon Sep 17 00:00:00 2001 From: nai-degen <44111-khanon@users.noreply.gitgud.io> Date: Sat, 15 Apr 2023 01:21:04 +0000 Subject: [PATCH] Implements prompt logging via Google Sheets (khanon/oai-reverse-proxy!1) --- .env.example | 31 +- .gitignore | 1 + README.md | 31 +- docs/huggingface.md | 8 +- docs/logging-sheets.md | 52 +++ info-page.md | 2 - package-lock.json | 208 +++++++++ package.json | 1 + src/config.ts | 31 +- src/info-page.ts | 36 +- src/key-management/key-checker.ts | 2 +- src/key-management/key-pool.ts | 2 +- src/prompt-logging/backends/index.ts | 1 + src/prompt-logging/backends/sheets.ts | 422 ++++++++++++++++++ src/prompt-logging/index.ts | 20 + src/prompt-logging/log-queue.ts | 108 +++++ src/proxy/kobold.ts | 21 +- .../request}/add-key.ts | 2 +- .../request}/disable-stream.ts | 0 .../request}/finalize-body.ts | 0 .../request}/index.ts | 5 +- .../request}/language-filter.ts | 10 +- .../middleware/request/limit-completions.ts | 17 + .../request}/limit-output-tokens.ts | 4 +- .../request}/transform-kobold-payload.ts | 26 +- .../response/index.ts} | 20 +- src/proxy/middleware/response/log-prompt.ts | 54 +++ src/proxy/openai.ts | 36 +- src/server.ts | 6 + src/types/custom.d.ts | 1 + 30 files changed, 1078 insertions(+), 80 deletions(-) create mode 100644 docs/logging-sheets.md create mode 100644 src/prompt-logging/backends/index.ts create mode 100644 src/prompt-logging/backends/sheets.ts create mode 100644 src/prompt-logging/index.ts create mode 100644 src/prompt-logging/log-queue.ts rename src/proxy/{rewriters => middleware/request}/add-key.ts (94%) rename src/proxy/{rewriters => middleware/request}/disable-stream.ts (100%) rename src/proxy/{rewriters => middleware/request}/finalize-body.ts (100%) rename src/proxy/{rewriters => middleware/request}/index.ts (90%) rename src/proxy/{rewriters => middleware/request}/language-filter.ts (79%) create mode 100644 src/proxy/middleware/request/limit-completions.ts rename src/proxy/{rewriters => middleware/request}/limit-output-tokens.ts (91%) rename src/proxy/{rewriters => middleware/request}/transform-kobold-payload.ts (88%) rename src/proxy/{common.ts => middleware/response/index.ts} (93%) create mode 100644 src/proxy/middleware/response/log-prompt.ts diff --git a/.env.example b/.env.example index ba00bcd..1b6cdf7 100644 --- a/.env.example +++ b/.env.example @@ -1,16 +1,33 @@ -# Copy this file to .env and fill in the values. +# Copy this file to .env and fill in the values you wish to change. Most already +# have sensible defaults. See config.ts for more details. -# Set your OpenAI API key(s) below, separated by commas if you have multiple keys. -OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx - -# Optional settings (please see config.ts for more details) # PORT=7860 -# PROXY_KEY=your-secret-key # MODEL_RATE_LIMIT=4 # MAX_OUTPUT_TOKENS=300 # LOG_LEVEL=info -# LOG_PROMPTS=false # REJECT_DISALLOWED=false # REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy." # REJECT_SAMPLE_RATE=0.2 # CHECK_KEYS=false + +# Note: CHECK_KEYS is disabled by default in local development mode, but enabled +# by default in production mode. + +# Optional settings for prompt logging to Google Sheets +# PROMPT_LOGGING=false + +# ------------------------------------------------------------------------------ +# !!! EVERYTHING BELOW IS FOR LOCAL DEVELOPMENT ONLY !!! +# If you are deploying this to Huggingface Spaces, do not set any keys here as +# the .env file is public and your keys will be exposed. +# Sensitive keys should be set in the Secrets UI instead. + +# You can add multiple keys by separating them with a comma. +OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + +# You can require a bearer token to access the proxy by setting the key below. +# PROXY_KEY=your-secret-key + +# This is only relevant if you want to use the prompt logging feature. +# GOOGLE_SHEETS_SPREADSHEET_ID=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +# GOOGLE_SHEETS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx diff --git a/.gitignore b/.gitignore index ed520f2..c02ddf4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .env build node_modules +greeting.md diff --git a/README.md b/README.md index 5254437..535ecb7 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,3 @@ ---- -title: oai-reverse-proxy -emoji: 🔁 -colorFrom: green -colorTo: purple -sdk: docker -pinned: false ---- # OAI Reverse Proxy Reverse proxy server for the OpenAI (and soon Anthropic) APIs. Forwards text generation requests while rejecting administrative/billing requests. Includes optional rate limiting and prompt filtering to prevent abuse. @@ -13,9 +5,10 @@ Reverse proxy server for the OpenAI (and soon Anthropic) APIs. Forwards text gen ### Table of Contents - [What is this?](#what-is-this) - [Why?](#why) -- [Setup Instructions](#setup-instructions) +- [Usage Instructions](#setup-instructions) - [Deploy to Huggingface (Recommended)](#deploy-to-huggingface-recommended) - [Deploy to Repl.it (WIP)](#deploy-to-replit-wip) +- [Local Development](#local-development) ## What is this? If you would like to provide a friend access to an API via keys you own, you can use this to keep your keys safe while still allowing them to generate text with the API. You can also use this if you'd like to build a client-side application which uses the OpenAI or Anthropic APIs, but don't want to build your own backend. You should never embed your real API keys in a client-side application. Instead, you can have your frontend connect to this reverse proxy and forward requests to the downstream service. @@ -29,8 +22,8 @@ This proxy only forwards text generation requests to the downstream service and --- -## Setup Instructions -Since this is a server, you'll need to deploy it somewhere. A few options are available: +## Usage Instructions +If you'd like to run your own instance of this proxy, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like. ### Deploy to Huggingface (Recommended) [See here for instructions on how to deploy to a Huggingface Space.](./docs/huggingface.md) @@ -40,4 +33,18 @@ Still working on this. It's a bit more technical than the Huggingface option; yo [![Run on Repl.it](https://replit.com/badge/github/nai-degen/oai-reverse-proxy)](https://replit.com/new/github/nai-degen/oai-reverse-proxy) -You'll need to set your secrets in Replit similar to the Huggingface instructions above. Currently .env files don't work properly so it only uses the default configuration. +You'll need to set your secrets in Repl.it similar to the Huggingface instructions above. Currently .env files don't work properly so it only uses the default configuration. + +## Local Development +To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below. + +1. Clone the repo +2. Install dependencies with `npm install` +3. Create a `.env` file in the root of the project and add your API keys. See the [.env.example](./.env.example) file for an example. +4. Start the server in development mode with `npm run start:dev`. + +You can also use `npm run start:dev:tsc` to enable project-wide type checking at the cost of slower startup times. `npm run type-check` can be used to run type checking without starting the server. + +## Prompt Logging Setup (Optional) + +See the [Google Sheets logging](./docs/logging-sheets.md) documentation for instructions on how to set up logging to a Google Sheet. diff --git a/docs/huggingface.md b/docs/huggingface.md index 9ddd81c..e9766a9 100644 --- a/docs/huggingface.md +++ b/docs/huggingface.md @@ -25,7 +25,7 @@ RUN apt-get update && \ RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app WORKDIR /app RUN npm install -COPY Dockerfile .env* ./ +COPY Dockerfile greeting.md* .env* ./ RUN npm run build EXPOSE 7860 ENV NODE_ENV=production @@ -60,6 +60,10 @@ To update your server, go to the Settings menu and select `Factory Reboot`. Thi Note that if you just perform a regular Restart, the server will be restarted with the same code that was running before. +## Adding a greeting message + +You can create a Markdown file called `greeting.md` to display a message on the Server Info page. This is a good place to put instructions for how to use the server. + ## Customizing the server The server will be started with some default configuration, but you can override it by adding a `.env` file to your Space. You can use Huggingface's web editor to create a new `.env` file alongside your Dockerfile. Huggingface will restart your server automatically when you save the file. @@ -70,8 +74,6 @@ Here are some example settings: MODEL_RATE_LIMIT=2 # Max tokens to request from OpenAI MAX_OUTPUT_TOKENS=256 -LOG_LEVEL=info -LOG_PROMPTS=false # Block prompts containing disallowed characters REJECT_DISALLOWED=false REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy." diff --git a/docs/logging-sheets.md b/docs/logging-sheets.md new file mode 100644 index 0000000..a880bb4 --- /dev/null +++ b/docs/logging-sheets.md @@ -0,0 +1,52 @@ +# Configuring Google Sheets Prompt Logging +This proxy can log incoming prompts and model responses to Google Sheets. Some configuration on the Google side is required to enable this feature. The APIs used are free, but you will need a Google account and a Google Cloud Platform project. + +NOTE: Concurrency is not supported. Don't connect two instances of the server to the same spreadsheet or bad things will happen. + +## Prerequisites +- A Google account +- A Google Cloud Platform project + +Note that this process grants the proxy software access to your Google Sheets data for the spreadsheet ID you provide. Use a throwaway spreadsheet/Google Account if you're not comfortable with this. + +### 0. Create a Google Cloud Platform Project +_A Google Cloud Platform project is required to enable programmatic access to Google Sheets. If you already have a project, skip to the next step. You can also see the [Google Cloud Platform documentation](https://developers.google.com/workspace/guides/create-project) for more information._ + +- Go to the Google Cloud Platform Console and [create a new project](https://console.cloud.google.com/projectcreate). + +### 1. Enable the Google Sheets API +_The Google Sheets API must be enabled for your project. You can also see the [Google Sheets API documentation](https://developers.google.com/sheets/api/quickstart/nodejs) for more information._ + +- Go to the [Google Sheets API page](https://console.cloud.google.com/apis/library/sheets.googleapis.com) and click **Enable**, then fill in the form to enable the Google Sheets API for your project. + + +### 2. Create a Service Account +_A service account is required to authenticate the proxy to Google Sheets._ + +- Once the Google Sheets API is enabled, click the **Credentials** tab on the Google Sheets API page. +- Click **Create credentials** and select **Service account**. +- Provide a name for the service account and click **Done** (the second and third steps can be skipped). + +### 3. Download the Service Account Key +_Once your account is created, you'll need to download the key file and include it in the proxy's secrets configuration._ + +- Click the Service Account you just created in the list of service accounts for the API. +- Click the **Keys** tab and click **Add key**, then select **Create new key**. +- Select **JSON** as the key type and click **Create**. + +The JSON file will be downloaded to your computer. + +### 4. Set the Service Account key as a Secret +_The JSON key file must be set as a secret in the proxy's configuration. Because files cannot be included in the secrets configuration, you'll need to base64 encode the file's contents and paste the encoded string as the value of the `GOOGLE_SHEETS_KEY` secret._ + +- Open the JSON key file in a text editor and copy the contents. +- Visit the [base64 encode/decode tool](https://www.base64encode.org/) and paste the contents into the box, then click **Encode**. +- Copy the encoded string and paste it as the value of the `GOOGLE_SHEETS_KEY` secret in the deployment's secrets configuration. + - **WARNING:** Don't reveal this string publically. The `.env` file is NOT private -- unless you're running the proxy locally, you should not use it to store secrets! + +### 5. Create a new spreadsheet and share it with the service account +_The service account must be given permission to access the logging spreadsheet. Each service account has a unique email address, which can be found in the JSON key file; share the spreadsheet with that email address just as you would share it with another user._ + +- Open the JSON key file in a text editor and copy the value of the `client_email` field. +- Open the spreadsheet you want to log to, or create a new one, and click **File > Share**. +- Paste the service account's email address into the **Add people or groups** field. Ensure the service account has **Editor** permissions, then click **Done**. diff --git a/info-page.md b/info-page.md index 4fd0c22..ed1d22f 100644 --- a/info-page.md +++ b/info-page.md @@ -1,5 +1,3 @@ # OAI Reverse Proxy - -This is a simple reverse proxy for the OpenAI API. To use it, refer to the `endpoints` listed in the Service Info section below and paste them into your preferred front-end. diff --git a/package-lock.json b/package-lock.json index f705e91..c991bcc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "cors": "^2.8.5", "dotenv": "^16.0.3", "express": "^4.18.2", + "googleapis": "^117.0.0", "http-proxy-middleware": "^3.0.0-beta.1", "pino": "^8.11.0", "pino-http": "^8.3.3", @@ -711,6 +712,14 @@ "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==" }, + "node_modules/arrify": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/arrify/-/arrify-2.0.1.tgz", + "integrity": "sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug==", + "engines": { + "node": ">=8" + } + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -759,6 +768,14 @@ } ] }, + "node_modules/bignumber.js": { + "version": "9.1.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.1.1.tgz", + "integrity": "sha512-pHm4LsMJ6lzgNGVfZHjMoO8sdoRhOzOH4MLmY65Jg70bpxCKu5iOHNJyfF6OyvYw7t8Fpf35RuzUyqnQsj8Vig==", + "engines": { + "node": "*" + } + }, "node_modules/binary-extensions": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", @@ -1151,6 +1168,14 @@ "node": ">=12" } }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/ee-first": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", @@ -1326,6 +1351,11 @@ "node": ">= 0.10.0" } }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, "node_modules/fast-redact": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/fast-redact/-/fast-redact-3.1.2.tgz", @@ -1334,6 +1364,11 @@ "node": ">=6" } }, + "node_modules/fast-text-encoding": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/fast-text-encoding/-/fast-text-encoding-1.0.6.tgz", + "integrity": "sha512-VhXlQgj9ioXCqGstD37E/HBeqEGV/qOD/kmbVG8h5xKBYvM1L3lR1Zn4555cQ8GkYbJa8aJSipLPndE1k6zK2w==" + }, "node_modules/fill-range": { "version": "7.0.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", @@ -1429,6 +1464,32 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" }, + "node_modules/gaxios": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-5.1.0.tgz", + "integrity": "sha512-aezGIjb+/VfsJtIcHGcBSerNEDdfdHeMros+RbYbGpmonKWQCOVOes0LVZhn1lDtIgq55qq0HaxymIoae3Fl/A==", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^5.0.0", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.7" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/gcp-metadata": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-5.2.0.tgz", + "integrity": "sha512-aFhhvvNycky2QyhG+dcfEdHBF0FRbYcf39s6WNHUDysKSrbJ5vuFbjydxBcmewtXeV248GP8dWT3ByPNxsyHCw==", + "dependencies": { + "gaxios": "^5.0.0", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=12" + } + }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", @@ -1462,6 +1523,80 @@ "node": ">= 6" } }, + "node_modules/google-auth-library": { + "version": "8.7.0", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-8.7.0.tgz", + "integrity": "sha512-1M0NG5VDIvJZEnstHbRdckLZESoJwguinwN8Dhae0j2ZKIQFIV63zxm6Fo6nM4xkgqUr2bbMtV5Dgo+Hy6oo0Q==", + "dependencies": { + "arrify": "^2.0.0", + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "fast-text-encoding": "^1.0.0", + "gaxios": "^5.0.0", + "gcp-metadata": "^5.0.0", + "gtoken": "^6.1.0", + "jws": "^4.0.0", + "lru-cache": "^6.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/google-p12-pem": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/google-p12-pem/-/google-p12-pem-4.0.1.tgz", + "integrity": "sha512-WPkN4yGtz05WZ5EhtlxNDWPhC4JIic6G8ePitwUWy4l+XPVYec+a0j0Ts47PDtW59y3RwAhUd9/h9ZZ63px6RQ==", + "dependencies": { + "node-forge": "^1.3.1" + }, + "bin": { + "gp12-pem": "build/src/bin/gp12-pem.js" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/googleapis": { + "version": "117.0.0", + "resolved": "https://registry.npmjs.org/googleapis/-/googleapis-117.0.0.tgz", + "integrity": "sha512-F6l7uK5BpPuMoWZQJ07yPgd1o42R5ke1CbxfejPJtCffd9UyWdSvsr7Ah97u9co9Qk1HkNSoCX749rxQmpVj8g==", + "dependencies": { + "google-auth-library": "^8.0.2", + "googleapis-common": "^6.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/googleapis-common": { + "version": "6.0.4", + "resolved": "https://registry.npmjs.org/googleapis-common/-/googleapis-common-6.0.4.tgz", + "integrity": "sha512-m4ErxGE8unR1z0VajT6AYk3s6a9gIMM6EkDZfkPnES8joeOlEtFEJeF8IyZkb0tjPXkktUfYrE4b3Li1DNyOwA==", + "dependencies": { + "extend": "^3.0.2", + "gaxios": "^5.0.1", + "google-auth-library": "^8.0.2", + "qs": "^6.7.0", + "url-template": "^2.0.8", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/gtoken": { + "version": "6.1.2", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-6.1.2.tgz", + "integrity": "sha512-4ccGpzz7YAr7lxrT2neugmXQ3hP9ho2gcaityLVkiUecAiwiy60Ii8gRbZeOsXV19fYaRjgBSshs8kXw+NKCPQ==", + "dependencies": { + "gaxios": "^5.0.1", + "google-p12-pem": "^4.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=12.0.0" + } + }, "node_modules/has": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", @@ -1558,6 +1693,39 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/https-proxy-agent/node_modules/debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/https-proxy-agent/node_modules/ms": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + }, "node_modules/iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -1815,6 +1983,33 @@ "node": ">= 0.6" } }, + "node_modules/node-fetch": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.9.tgz", + "integrity": "sha512-DJm/CJkZkRjKKj4Zi4BsKVZh3ValV5IR5s7LVZnW+6YMh0W1BfNA8XSs6DLMGYlId5F3KnA70uu2qepcR08Qqg==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/node-forge": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.1.tgz", + "integrity": "sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA==", + "engines": { + "node": ">= 6.13.0" + } + }, "node_modules/nodemon": { "version": "2.0.22", "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-2.0.22.tgz", @@ -2527,6 +2722,11 @@ "node": ">= 0.8" } }, + "node_modules/url-template": { + "version": "2.0.8", + "resolved": "https://registry.npmjs.org/url-template/-/url-template-2.0.8.tgz", + "integrity": "sha512-XdVKMF4SJ0nP/O7XIPB0JwAEuT9lDIYnNsK8yGVe43y0AWoKeJNdv3ZNWh7ksJ6KqQFjOO6ox/VEitLnaVNufw==" + }, "node_modules/utils-merge": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", @@ -2535,6 +2735,14 @@ "node": ">= 0.4.0" } }, + "node_modules/uuid": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", + "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", diff --git a/package.json b/package.json index 6aed518..d027f1f 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "cors": "^2.8.5", "dotenv": "^16.0.3", "express": "^4.18.2", + "googleapis": "^117.0.0", "http-proxy-middleware": "^3.0.0-beta.1", "pino": "^8.11.0", "pino-http": "^8.3.3", diff --git a/src/config.ts b/src/config.ts index 3f9d9ab..4c6cc69 100644 --- a/src/config.ts +++ b/src/config.ts @@ -3,10 +3,12 @@ dotenv.config(); const isDev = process.env.NODE_ENV !== "production"; +type PROMPT_LOGGING_BACKEND = "google_sheets"; + type Config = { /** The port the proxy server will listen on. */ port: number; - /** OpenAI API key, either a single key or a base64-encoded JSON array of key configs. */ + /** OpenAI API key, either a single key or a comma-delimeted list of keys. */ openaiKey?: string; /** Proxy key. If set, requests must provide this key in the Authorization header to use the proxy. */ proxyKey?: string; @@ -20,10 +22,16 @@ type Config = { rejectSampleRate?: number; /** Message to return when rejecting requests. */ rejectMessage?: string; - /** Logging threshold. */ + /** Pino log level. */ logLevel?: "debug" | "info" | "warn" | "error"; - /** Whether prompts and responses should be logged. */ - logPrompts?: boolean; // TODO: Implement prompt logging once we have persistent storage. + /** Whether prompts and responses should be logged to persistent storage. */ + promptLogging?: boolean; // TODO: Implement prompt logging once we have persistent storage. + /** Which prompt logging backend to use. */ + promptLoggingBackend?: PROMPT_LOGGING_BACKEND; + /** Base64-encoded Google Sheets API key. */ + googleSheetsKey?: string; + /** Google Sheets spreadsheet ID. */ + googleSheetsSpreadsheetId?: string; /** Whether to periodically check keys for usage and validity. */ checkKeys?: boolean; }; @@ -43,11 +51,22 @@ export const config: Config = { "This content violates /aicg/'s acceptable use policy." ), logLevel: getEnvWithDefault("LOG_LEVEL", "info"), - logPrompts: getEnvWithDefault("LOG_PROMPTS", false), // Not yet implemented checkKeys: getEnvWithDefault("CHECK_KEYS", !isDev), + promptLogging: getEnvWithDefault("PROMPT_LOGGING", false), + promptLoggingBackend: getEnvWithDefault("PROMPT_LOGGING_BACKEND", undefined), + googleSheetsKey: getEnvWithDefault("GOOGLE_SHEETS_KEY", undefined), + googleSheetsSpreadsheetId: getEnvWithDefault( + "GOOGLE_SHEETS_SPREADSHEET_ID", + undefined + ), } as const; -export const SENSITIVE_KEYS: (keyof Config)[] = ["proxyKey", "openaiKey"]; +export const SENSITIVE_KEYS: (keyof Config)[] = [ + "proxyKey", + "openaiKey", + "googleSheetsKey", + "googleSheetsSpreadsheetId", +]; const getKeys = Object.keys as (obj: T) => Array; export function listConfig(): Record { const result: Record = {}; diff --git a/src/info-page.ts b/src/info-page.ts index d18d578..6c59680 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -1,3 +1,4 @@ +import fs from "fs"; import { Request, Response } from "express"; import showdown from "showdown"; import { config, listConfig } from "./config"; @@ -54,11 +55,6 @@ function getInfoPageHtml(host: string) { sha: process.env.COMMIT_SHA?.slice(0, 7) || "dev", }; - const readme = require("fs").readFileSync("info-page.md", "utf8"); - const readmeBody = readme.split("---")[2] || readme; - const converter = new showdown.Converter(); - const html = converter.makeHtml(readmeBody); - const pageBody = ` @@ -66,7 +62,7 @@ function getInfoPageHtml(host: string) { OpenAI Reverse Proxy

Service Info

${JSON.stringify(info, null, 2)}
@@ -75,3 +71,31 @@ function getInfoPageHtml(host: string) { return pageBody; } + +const infoPageHeaderHtml = buildInfoPageHeader(new showdown.Converter()); + +/** + * If the server operator provides a `greeting.md` file, it will be included in + * the rendered info page. + **/ +function buildInfoPageHeader(converter: showdown.Converter) { + const genericInfoPage = fs.readFileSync("info-page.md", "utf8"); + const customGreeting = fs.existsSync("greeting.md") + ? fs.readFileSync("greeting.md", "utf8") + : null; + + let infoBody = genericInfoPage; + if (config.promptLogging) { + infoBody += `\n## Prompt logging is enabled! +The server operator has enabled prompt logging. The prompts you send and the AI responses you receive may be saved. + +Logs are anonymous and do not contain IP addresses or timestamps. + +**If you are uncomfortable with the above, don't send prompts to this proxy!**`; + } + if (customGreeting) { + infoBody += `\n## Server greeting\n +${customGreeting}`; + } + return converter.makeHtml(infoBody); +} diff --git a/src/key-management/key-checker.ts b/src/key-management/key-checker.ts index c844d74..9e59aef 100644 --- a/src/key-management/key-checker.ts +++ b/src/key-management/key-checker.ts @@ -30,7 +30,7 @@ type UpdateFn = typeof KeyPool.prototype.update; export class KeyChecker { private readonly keys: Key[]; - private log = logger.child({ module: "KeyChecker" }); + private log = logger.child({ module: "key-checker" }); private timeout?: NodeJS.Timeout; private updateKey: UpdateFn; private lastCheck = 0; diff --git a/src/key-management/key-pool.ts b/src/key-management/key-pool.ts index 304a745..79d2bc3 100644 --- a/src/key-management/key-pool.ts +++ b/src/key-management/key-pool.ts @@ -60,7 +60,7 @@ export type KeyUpdate = Omit< export class KeyPool { private keys: Key[] = []; private checker?: KeyChecker; - private log = logger.child({ module: "KeyPool" }); + private log = logger.child({ module: "key-pool" }); constructor() { const keyString = config.openaiKey; diff --git a/src/prompt-logging/backends/index.ts b/src/prompt-logging/backends/index.ts new file mode 100644 index 0000000..606b148 --- /dev/null +++ b/src/prompt-logging/backends/index.ts @@ -0,0 +1 @@ +export * as sheets from "./sheets"; diff --git a/src/prompt-logging/backends/sheets.ts b/src/prompt-logging/backends/sheets.ts new file mode 100644 index 0000000..796bc3e --- /dev/null +++ b/src/prompt-logging/backends/sheets.ts @@ -0,0 +1,422 @@ +/* Google Sheets backend for prompt logger. Upon every flush, this backend +writes the batch to a Sheets spreadsheet. If the sheet becomes too large, it +will create a new sheet and continue writing there. + +This is essentially a really shitty ORM for Sheets. Absolutely no concurrency +support because it relies on local state to match up with the remote state. */ + +import { google, sheets_v4 } from "googleapis"; +import type { CredentialBody } from "google-auth-library"; +import type { GaxiosResponse } from "googleapis-common"; +import { config } from "../../config"; +import { logger } from "../../logger"; +import { PromptLogEntry } from ".."; + +// There is always a sheet called __index__ which contains a list of all the +// other sheets. We use this rather than iterating over all the sheets in case +// the user needs to manually work with the spreadsheet. +// If no __index__ sheet exists, we will assume that the spreadsheet is empty +// and create one. + +type IndexSheetModel = { + /** + * Stored in cell B2. Set on startup; if it changes, we assume that another + * instance of the proxy is writing to the spreadsheet and stop. + */ + lockId: string; + /** + * Data starts at row 4. Row 1-3 are headers + */ + rows: { logSheetName: string; createdAt: string; rowCount: number }[]; +}; + +type LogSheetModel = { + sheetName: string; + rows: { + model: string; + endpoint: string; + promptRaw: string; + promptFlattened: string; + response: string; + }[]; +}; + +const MAX_ROWS_PER_SHEET = 2000; +const log = logger.child({ module: "sheets" }); + +let sheetsClient: sheets_v4.Sheets | null = null; +/** Called when log backend aborts to tell the log queue to stop. */ +let stopCallback: (() => void) | null = null; +/** Lock/synchronization ID for this session. */ +let lockId = Math.random().toString(36).substring(2, 15); +/** In-memory cache of the index sheet. */ +let indexSheet: IndexSheetModel | null = null; +/** In-memory cache of the active log sheet. */ +let activeLogSheet: LogSheetModel | null = null; + +/** + * Loads the __index__ sheet into memory. By default, asserts that the lock ID + * has not changed since the start of the session. + */ +const loadIndexSheet = async (assertLockId = true) => { + const client = sheetsClient!; + const spreadsheetId = config.googleSheetsSpreadsheetId!; + log.info({ assertLockId }, "Loading __index__ sheet."); + const res = await client.spreadsheets.values.get({ + spreadsheetId: spreadsheetId, + range: "__index__!A1:D", + majorDimension: "ROWS", + }); + const data = assertData(res); + if (!data.values || data.values[2][0] !== "logSheetName") { + log.error({ values: data.values }, "Unexpected format for __index__ sheet"); + throw new Error("Unexpected format for __index__ sheet"); + } + + if (assertLockId) { + const lockIdCell = data.values[1][1]; + if (lockIdCell !== lockId) { + log.error( + { receivedLock: lockIdCell, expectedLock: lockId }, + "Another instance of the proxy is writing to the spreadsheet; stopping." + ); + stop(); + throw new Error(`Lock ID assertion failed`); + } + } + + const rows = data.values.slice(3).map((row) => { + return { + logSheetName: row[0], + createdAt: row[1], + rowCount: row[2], + }; + }); + indexSheet = { lockId, rows }; +}; + +/** Creates empty __index__ sheet for a new spreadsheet. */ +const createIndexSheet = async () => { + const client = sheetsClient!; + const spreadsheetId = config.googleSheetsSpreadsheetId!; + log.info("Creating empty __index__ sheet."); + const res = await client.spreadsheets.batchUpdate({ + spreadsheetId: spreadsheetId, + requestBody: { + requests: [ + { + addSheet: { + properties: { + title: "__index__", + gridProperties: { rowCount: 1, columnCount: 3 }, + }, + }, + }, + ], + }, + }); + assertData(res); + indexSheet = { lockId, rows: [] }; + await writeIndexSheet(); +}; + +/** Writes contents of in-memory indexSheet to the remote __index__ sheet. */ +const writeIndexSheet = async () => { + const client = sheetsClient!; + const spreadsheetId = config.googleSheetsSpreadsheetId!; + const headerRows = [ + ["Don't edit this sheet while the server is running.", "", ""], + ["Lock ID", lockId, ""], + ["logSheetName", "createdAt", "rowCount"], + ]; + const contentRows = indexSheet!.rows.map((row) => { + return [row.logSheetName, row.createdAt, row.rowCount]; + }); + log.info("Persisting __index__ sheet."); + await client.spreadsheets.values.batchUpdate({ + spreadsheetId: spreadsheetId, + requestBody: { + valueInputOption: "RAW", + data: [ + { range: "__index__!A1:D", values: [...headerRows, ...contentRows] }, + ], + }, + }); +}; + +/** Creates a new log sheet, adds it to the index, and sets it as active. */ +const createLogSheet = async () => { + const client = sheetsClient!; + const spreadsheetId = config.googleSheetsSpreadsheetId!; + // Sheet name format is Log_YYYYMMDD_HHMMSS + const sheetName = `Log_${new Date() + .toISOString() + // YYYY-MM-DDTHH:MM:SS.sssZ -> YYYYMMDD_HHMMSS + .replace(/[-:.]/g, "") + .replace(/T/, "_") + .substring(0, 15)}`; + + log.info({ sheetName }, "Creating new log sheet."); + const res = await client.spreadsheets.batchUpdate({ + spreadsheetId: spreadsheetId, + requestBody: { + requests: [ + { + addSheet: { + properties: { + title: sheetName, + gridProperties: { rowCount: MAX_ROWS_PER_SHEET, columnCount: 5 }, + }, + }, + }, + ], + }, + }); + assertData(res); + // Increase row/column size and wrap text for readability. + const sheetId = res.data.replies![0].addSheet!.properties!.sheetId; + await client.spreadsheets.batchUpdate({ + spreadsheetId: spreadsheetId, + requestBody: { + requests: [ + { + repeatCell: { + range: { sheetId }, + cell: { + userEnteredFormat: { + wrapStrategy: "WRAP", + verticalAlignment: "TOP", + }, + }, + fields: "*", + }, + }, + { + updateDimensionProperties: { + range: { + sheetId, + dimension: "COLUMNS", + startIndex: 3, + endIndex: 5, + }, + properties: { pixelSize: 500 }, + fields: "pixelSize", + }, + }, + { + updateDimensionProperties: { + range: { + sheetId, + dimension: "ROWS", + startIndex: 1, + }, + properties: { pixelSize: 200 }, + fields: "pixelSize", + }, + }, + ], + }, + }); + await client.spreadsheets.values.batchUpdate({ + spreadsheetId: spreadsheetId, + requestBody: { + valueInputOption: "RAW", + data: [ + { + range: `${sheetName}!A1:E`, + values: [ + ["model", "endpoint", "prompt json", "prompt string", "response"], + ], + }, + ], + }, + }); + indexSheet!.rows.push({ + logSheetName: sheetName, + createdAt: new Date().toISOString(), + rowCount: 0, + }); + await writeIndexSheet(); + activeLogSheet = { sheetName, rows: [] }; +}; + +export const appendBatch = async (batch: PromptLogEntry[]) => { + if (!activeLogSheet) { + // Create a new log sheet if we don't have one yet. + await createLogSheet(); + } else { + // Check lock to ensure we're the only instance writing to the spreadsheet. + await loadIndexSheet(true); + } + + const client = sheetsClient!; + const spreadsheetId = config.googleSheetsSpreadsheetId!; + const sheetName = activeLogSheet!.sheetName; + const newRows = batch.map((entry) => { + return [ + entry.model, + entry.endpoint, + entry.promptRaw, + entry.promptFlattened, + entry.response, + ]; + }); + log.info({ sheetName, rowCount: newRows.length }, "Appending log batch."); + const data = await client.spreadsheets.values.append({ + spreadsheetId: spreadsheetId, + range: `${sheetName}!A1:D`, + valueInputOption: "RAW", + requestBody: { values: newRows, majorDimension: "ROWS" }, + }); + assertData(data); + if (data.data.updates && data.data.updates.updatedRows) { + const newRowCount = data.data.updates.updatedRows; + log.info({ sheetName, rowCount: newRowCount }, "Successfully appended."); + activeLogSheet!.rows = activeLogSheet!.rows.concat( + newRows.map((row) => ({ + model: row[0], + endpoint: row[1], + promptRaw: row[2], + promptFlattened: row[3], + response: row[4], + })) + ); + } else { + // We didn't receive an error but we didn't get any updates either. + // We may need to create a new sheet and throw to make the queue retry the + // batch. + log.warn( + { sheetName, rowCount: newRows.length }, + "No updates received from append. Creating new sheet and retrying." + ); + await createLogSheet(); + throw new Error("No updates received from append."); + } + await finalizeBatch(); +}; + +const finalizeBatch = async () => { + const sheetName = activeLogSheet!.sheetName; + const rowCount = activeLogSheet!.rows.length; + const indexRow = indexSheet!.rows.find( + ({ logSheetName }) => logSheetName === sheetName + )!; + indexRow.rowCount = rowCount; + if (rowCount >= MAX_ROWS_PER_SHEET) { + await createLogSheet(); // Also updates index sheet + } else { + await writeIndexSheet(); + } + log.info({ sheetName, rowCount }, "Batch finalized."); +}; + +type LoadLogSheetArgs = { + sheetName: string; + /** The starting row to load. If omitted, loads all rows (expensive). */ + fromRow?: number; +}; + +/** Not currently used. */ +export const loadLogSheet = async ({ + sheetName, + fromRow = 2, // omit header row +}: LoadLogSheetArgs) => { + const client = sheetsClient!; + const spreadsheetId = config.googleSheetsSpreadsheetId!; + + const range = `${sheetName}!A${fromRow}:E`; + const res = await client.spreadsheets.values.get({ + spreadsheetId: spreadsheetId, + range, + }); + const data = assertData(res); + const values = data.values || []; + const rows = values.slice(1).map((row) => { + return { + model: row[0], + endpoint: row[1], + promptRaw: row[2], + promptFlattened: row[3], + response: row[4], + }; + }); + activeLogSheet = { sheetName, rows }; +}; + +export const init = async (onStop: () => void) => { + if (sheetsClient) { + return; + } + if (!config.googleSheetsKey || !config.googleSheetsSpreadsheetId) { + throw new Error( + "Missing required Google Sheets config. Refer to documentation for setup instructions." + ); + } + + log.info("Initializing Google Sheets backend."); + const encodedCreds = config.googleSheetsKey; + // encodedCreds is a base64-encoded JSON key from the GCP console. + const creds: CredentialBody = JSON.parse( + Buffer.from(encodedCreds, "base64").toString("utf8").trim() + ); + const auth = new google.auth.GoogleAuth({ + scopes: ["https://www.googleapis.com/auth/spreadsheets"], + credentials: creds, + }); + sheetsClient = google.sheets({ version: "v4", auth }); + stopCallback = onStop; + + const sheetId = config.googleSheetsSpreadsheetId; + const res = await sheetsClient.spreadsheets.get({ + spreadsheetId: sheetId, + }); + if (!res.data) { + const { status, statusText, headers } = res; + log.error( + { + res: { status, statusText, headers }, + creds: { + client_email: creds.client_email?.slice(0, 5) + "********", + private_key: creds.private_key?.slice(0, 5) + "********", + }, + sheetId: config.googleSheetsSpreadsheetId, + }, + "Could not connect to Google Sheets." + ); + stop(); + throw new Error("Could not connect to Google Sheets."); + } else { + const sheetTitle = res.data.properties?.title; + log.info({ sheetId, sheetTitle }, "Connected to Google Sheets."); + } + + // Load or create the index sheet and write the lockId to it. + try { + log.info("Loading index sheet."); + await loadIndexSheet(false); + await writeIndexSheet(); + } catch (e) { + log.info("Creating new index sheet."); + await createIndexSheet(); + } +}; + +/** Called during some unrecoverable error to tell the log queue to stop. */ +function stop() { + log.warn("Stopping Google Sheets backend."); + if (stopCallback) { + stopCallback(); + } + sheetsClient = null; +} + +function assertData(res: GaxiosResponse) { + if (!res.data) { + const { status, statusText, headers } = res; + log.error( + { res: { status, statusText, headers } }, + "Unexpected response from Google Sheets API." + ); + } + return res.data!; +} diff --git a/src/prompt-logging/index.ts b/src/prompt-logging/index.ts new file mode 100644 index 0000000..d93b6fd --- /dev/null +++ b/src/prompt-logging/index.ts @@ -0,0 +1,20 @@ +/* Logs prompts and model responses to a persistent storage backend, if enabled. +Since the proxy is generally deployed to free-tier services, our options for +persistent storage are pretty limited. We'll use Google Sheets as a makeshift +database for now. + +Due to the limitations of Google Sheets, we'll queue up log entries and flush +them to the API periodically. */ + +export interface PromptLogEntry { + model: string; + endpoint: string; + /** JSON prompt passed to the model */ + promptRaw: string; + /** Prompt with user and assistant messages flattened into a single string */ + promptFlattened: string; + response: string; + // TODO: temperature, top_p, top_k, etc. +} + +export * as logQueue from "./log-queue"; diff --git a/src/prompt-logging/log-queue.ts b/src/prompt-logging/log-queue.ts new file mode 100644 index 0000000..ebf377a --- /dev/null +++ b/src/prompt-logging/log-queue.ts @@ -0,0 +1,108 @@ +/* Queues incoming prompts/responses and periodically flushes them to configured + * logging backend. */ + +import { logger } from "../logger"; +import { PromptLogEntry } from "."; +import { sheets } from "./backends"; + +const FLUSH_INTERVAL = 1000 * 20; // 20 seconds +const MAX_BATCH_SIZE = 100; + +const queue: PromptLogEntry[] = []; +const log = logger.child({ module: "log-queue" }); + +let started = false; +let timeoutId: NodeJS.Timeout | null = null; +let retrying = false; +let failedBatchCount = 0; + +export const enqueue = (payload: PromptLogEntry) => { + if (!started) { + log.warn("Log queue not started, discarding incoming log entry."); + return; + } + queue.push(payload); +}; + +export const flush = async () => { + if (!started) { + return; + } + + if (queue.length > 0) { + const batchSize = Math.min(MAX_BATCH_SIZE, queue.length); + const nextBatch = queue.splice(0, batchSize); + log.info({ size: nextBatch.length }, "Submitting new batch."); + try { + await sheets.appendBatch(nextBatch); + retrying = false; + } catch (e: any) { + if (retrying) { + log.error( + { message: e.message, stack: e.stack }, + "Failed twice to flush batch, discarding." + ); + retrying = false; + failedBatchCount++; + } else { + // Put the batch back at the front of the queue and try again + log.warn( + { message: e.message, stack: e.stack }, + "Failed to flush batch. Retrying." + ); + queue.unshift(...nextBatch); + retrying = true; + setImmediate(() => flush()); + return; + } + } + } + + const useHalfInterval = queue.length > MAX_BATCH_SIZE / 2; + scheduleFlush(useHalfInterval); +}; + +export const start = async () => { + try { + await sheets.init(() => stop()); + log.info("Logging backend initialized."); + started = true; + } catch (e) { + log.error(e, "Could not initialize logging backend."); + return; + } + scheduleFlush(); +}; + +export const stop = () => { + if (timeoutId) { + clearTimeout(timeoutId); + } + log.info("Stopping log queue."); + started = false; +}; + +const scheduleFlush = (halfInterval = false) => { + if (failedBatchCount > 5) { + log.error( + { failedBatchCount }, + "Too many failed batches. Stopping prompt logging." + ); + stop(); + return; + } + + if (halfInterval) { + log.warn( + { queueSize: queue.length }, + "Queue is falling behind, switching to faster flush interval." + ); + } + + timeoutId = setTimeout( + () => { + flush(); + }, + halfInterval ? FLUSH_INTERVAL / 2 : FLUSH_INTERVAL + ); +}; diff --git a/src/proxy/kobold.ts b/src/proxy/kobold.ts index d377d93..0a9e743 100644 --- a/src/proxy/kobold.ts +++ b/src/proxy/kobold.ts @@ -4,12 +4,8 @@ requests to OpenAI API equivalents. */ import { Request, Response, Router } from "express"; import http from "http"; import { createProxyMiddleware } from "http-proxy-middleware"; +import { config } from "../config"; import { logger } from "../logger"; -import { - createOnProxyResHandler, - handleInternalError, - ProxyResHandlerWithBody, -} from "./common"; import { ipLimiter } from "./rate-limit"; import { addKey, @@ -18,7 +14,12 @@ import { languageFilter, limitOutputTokens, transformKoboldPayload, -} from "./rewriters"; +} from "./middleware/request"; +import { + createOnProxyResHandler, + handleInternalError, + ProxyResHandlerWithBody, +} from "./middleware/response"; export const handleModelRequest = (_req: Request, res: Response) => { res.status(200).json({ result: "Connected to OpenAI reverse proxy" }); @@ -33,6 +34,7 @@ const rewriteRequest = ( req: Request, res: Response ) => { + req.api = "kobold"; const rewriterPipeline = [ addKey, transformKoboldPayload, @@ -54,7 +56,7 @@ const rewriteRequest = ( const koboldResponseHandler: ProxyResHandlerWithBody = async ( _proxyRes, - _req, + req, res, body ) => { @@ -65,6 +67,11 @@ const koboldResponseHandler: ProxyResHandlerWithBody = async ( const koboldResponse = { results: [{ text: body.choices[0].message.content }], model: body.model, + ...(config.promptLogging && { + proxy_note: `Prompt logging is enabled on this proxy instance. See ${req.get( + "host" + )} for more information.`, + }), }; res.send(JSON.stringify(koboldResponse)); diff --git a/src/proxy/rewriters/add-key.ts b/src/proxy/middleware/request/add-key.ts similarity index 94% rename from src/proxy/rewriters/add-key.ts rename to src/proxy/middleware/request/add-key.ts index 3b8e801..88079e4 100644 --- a/src/proxy/rewriters/add-key.ts +++ b/src/proxy/middleware/request/add-key.ts @@ -1,5 +1,5 @@ +import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../../key-management"; import type { ExpressHttpProxyReqCallback } from "."; -import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management"; /** Add an OpenAI key from the pool to the request. */ export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => { diff --git a/src/proxy/rewriters/disable-stream.ts b/src/proxy/middleware/request/disable-stream.ts similarity index 100% rename from src/proxy/rewriters/disable-stream.ts rename to src/proxy/middleware/request/disable-stream.ts diff --git a/src/proxy/rewriters/finalize-body.ts b/src/proxy/middleware/request/finalize-body.ts similarity index 100% rename from src/proxy/rewriters/finalize-body.ts rename to src/proxy/middleware/request/finalize-body.ts diff --git a/src/proxy/rewriters/index.ts b/src/proxy/middleware/request/index.ts similarity index 90% rename from src/proxy/rewriters/index.ts rename to src/proxy/middleware/request/index.ts index 806cfeb..ffd01b5 100644 --- a/src/proxy/rewriters/index.ts +++ b/src/proxy/middleware/request/index.ts @@ -3,10 +3,11 @@ import type { ClientRequest } from "http"; import type { ProxyReqCallback } from "http-proxy"; export { addKey } from "./add-key"; -export { languageFilter } from "./language-filter"; export { disableStream } from "./disable-stream"; -export { limitOutputTokens } from "./limit-output-tokens"; export { finalizeBody } from "./finalize-body"; +export { languageFilter } from "./language-filter"; +export { limitCompletions } from "./limit-completions"; +export { limitOutputTokens } from "./limit-output-tokens"; export { transformKoboldPayload } from "./transform-kobold-payload"; export type ExpressHttpProxyReqCallback = ProxyReqCallback< diff --git a/src/proxy/rewriters/language-filter.ts b/src/proxy/middleware/request/language-filter.ts similarity index 79% rename from src/proxy/rewriters/language-filter.ts rename to src/proxy/middleware/request/language-filter.ts index 1289a5b..8d3a7e6 100644 --- a/src/proxy/rewriters/language-filter.ts +++ b/src/proxy/middleware/request/language-filter.ts @@ -1,13 +1,13 @@ -import { config } from "../../config"; +import { config } from "../../../config"; +import { logger } from "../../../logger"; import type { ExpressHttpProxyReqCallback } from "."; -import { logger } from "../../logger"; const DISALLOWED_REGEX = /[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DB5\u4E00-\u9FD5\uF900-\uFA6D\uFA70-\uFAD9]/; -// Our shitty free-tier will fall over if we test every single character in each -// 15k character request ten times a second. So we'll just sample 20% of the -// characters and hope that's enough. +// Our shitty free-tier VMs will fall over if we test every single character in +// each 15k character request ten times a second. So we'll just sample 20% of +// the characters and hope that's enough. const containsDisallowedCharacters = (text: string) => { const sampleSize = Math.ceil(text.length * (config.rejectSampleRate || 0.2)); const sample = text diff --git a/src/proxy/middleware/request/limit-completions.ts b/src/proxy/middleware/request/limit-completions.ts new file mode 100644 index 0000000..32883b4 --- /dev/null +++ b/src/proxy/middleware/request/limit-completions.ts @@ -0,0 +1,17 @@ +import type { ExpressHttpProxyReqCallback } from "."; + +const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions"; + +/** Don't allow multiple completions to be requested to prevent abuse. */ +export const limitCompletions: ExpressHttpProxyReqCallback = ( + _proxyReq, + req +) => { + if (req.method === "POST" && req.path === OPENAI_CHAT_COMPLETION_ENDPOINT) { + const originalN = req.body?.n || 1; + req.body.n = 1; + if (originalN !== req.body.n) { + req.log.warn(`Limiting completion choices from ${originalN} to 1`); + } + } +}; diff --git a/src/proxy/rewriters/limit-output-tokens.ts b/src/proxy/middleware/request/limit-output-tokens.ts similarity index 91% rename from src/proxy/rewriters/limit-output-tokens.ts rename to src/proxy/middleware/request/limit-output-tokens.ts index aa408d2..8329ac2 100644 --- a/src/proxy/rewriters/limit-output-tokens.ts +++ b/src/proxy/middleware/request/limit-output-tokens.ts @@ -1,6 +1,6 @@ -import { config } from "../../config"; +import { config } from "../../../config"; +import { logger } from "../../../logger"; import type { ExpressHttpProxyReqCallback } from "."; -import { logger } from "../../logger"; const MAX_TOKENS = config.maxOutputTokens; diff --git a/src/proxy/rewriters/transform-kobold-payload.ts b/src/proxy/middleware/request/transform-kobold-payload.ts similarity index 88% rename from src/proxy/rewriters/transform-kobold-payload.ts rename to src/proxy/middleware/request/transform-kobold-payload.ts index 46cdf72..9e37f9b 100644 --- a/src/proxy/rewriters/transform-kobold-payload.ts +++ b/src/proxy/middleware/request/transform-kobold-payload.ts @@ -1,6 +1,5 @@ -import { config } from "../../config"; +import { logger } from "../../../logger"; import type { ExpressHttpProxyReqCallback } from "."; -import { logger } from "../../logger"; // Kobold requests look like this: // body: @@ -55,10 +54,9 @@ import { logger } from "../../logger"; // rep_pen. // messages is an array of { role: "system" | "assistant" | "user", content: ""} -// kobold only sends us the entire prompt. we can try to split the last line and -// use that as the user message and put the rest in the system message -// ideally we'd split the history into user and assistant messages, but that's -// too much work for now +// kobold only sends us the entire prompt. we can try to split the last two +// lines into user and assistant messages, but that's not always correct. For +// now it will have to do. /** Transforms a KoboldAI payload into an OpenAI payload. */ export const transformKoboldPayload: ExpressHttpProxyReqCallback = ( @@ -67,20 +65,22 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = ( ) => { const { body } = req; const { prompt, max_length, rep_pen, top_p, temperature } = body; - + if (!max_length) { - logger.error("KoboldAI request missing max_length"); + logger.error("KoboldAI request missing max_length."); throw new Error("You must specify a max_length parameter."); } const promptLines = prompt.split("\n"); - const lastLine = promptLines.pop(); + // The very last line is the contentless "Assistant: " hint to the AI. + // Tavern just leaves an empty line, Agnai includes the AI's name. + const assistantHint = promptLines.pop(); + // The second-to-last line is the user's prompt, generally. + const userPrompt = promptLines.pop(); const messages = [ { role: "system", content: promptLines.join("\n") }, - // TODO: technically the last line could be another assistant prompt if the - // user requested a multi-turn response. Need to see how Tavern and Agnai - // submit such requests. - { role: "user", content: lastLine }, + { role: "user", content: userPrompt }, + { role: "assistant", content: assistantHint }, ]; // Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4 diff --git a/src/proxy/common.ts b/src/proxy/middleware/response/index.ts similarity index 93% rename from src/proxy/common.ts rename to src/proxy/middleware/response/index.ts index 08f72da..a97b0a3 100644 --- a/src/proxy/common.ts +++ b/src/proxy/middleware/response/index.ts @@ -3,8 +3,9 @@ import * as http from "http"; import util from "util"; import zlib from "zlib"; import * as httpProxy from "http-proxy"; -import { logger } from "../logger"; -import { keyPool } from "../key-management"; +import { logger } from "../../../logger"; +import { keyPool } from "../../../key-management"; +import { logPrompt } from "./log-prompt"; export const QUOTA_ROUTES = ["/v1/chat/completions"]; const DECODER_MAP = { @@ -57,6 +58,7 @@ export const createOnProxyResHandler = (middleware: ProxyResMiddleware) => { handleDownstreamErrors, incrementKeyUsage, copyHttpHeaders, + logPrompt, ...middleware, ]; @@ -72,7 +74,11 @@ export const createOnProxyResHandler = (middleware: ProxyResMiddleware) => { const message = `Error while executing proxy response middleware: ${lastMiddlewareName} (${error.message})`; logger.error( - { error, thrownBy: lastMiddlewareName, key: req.key?.hash }, + { + error: error.stack, + thrownBy: lastMiddlewareName, + key: req.key?.hash, + }, message ); res @@ -137,7 +143,7 @@ const decodeResponseBody: DecodeResponseBodyHandler = async ( * and throw an error to stop the middleware stack. * @throws {Error} HTTP error status code from downstream service */ -export const handleDownstreamErrors: ProxyResHandlerWithBody = async ( +const handleDownstreamErrors: ProxyResHandlerWithBody = async ( proxyRes, req, res, @@ -266,6 +272,12 @@ const copyHttpHeaders: ProxyResHandlerWithBody = async ( if (key === "content-encoding") { return; } + // We're usually using res.json() to send the response, which causes express + // to set content-length. That's not valid for chunked responses and some + // clients will reject it so we need to omit it. + if (key === "transfer-encoding") { + return; + } res.setHeader(key, proxyRes.headers[key] as string); }); }; diff --git a/src/proxy/middleware/response/log-prompt.ts b/src/proxy/middleware/response/log-prompt.ts new file mode 100644 index 0000000..847381d --- /dev/null +++ b/src/proxy/middleware/response/log-prompt.ts @@ -0,0 +1,54 @@ +import { config } from "../../../config"; +import { logQueue } from "../../../prompt-logging"; +import { ProxyResHandlerWithBody } from "."; + +/** If prompt logging is enabled, enqueues the prompt for logging. */ +export const logPrompt: ProxyResHandlerWithBody = async ( + _proxyRes, + req, + _res, + responseBody +) => { + if (!config.promptLogging) { + return; + } + if (typeof responseBody !== "object") { + throw new Error("Expected body to be an object"); + } + + const model = req.body.model; + const promptFlattened = flattenMessages(req.body.messages); + const response = getResponseForModel({ model, body: responseBody }); + + logQueue.enqueue({ + model, + endpoint: req.api, + promptRaw: JSON.stringify(req.body.messages), + promptFlattened, + response, + }); +}; + +type OaiMessage = { + role: "user" | "assistant" | "system"; + content: string; +}; + +const flattenMessages = (messages: OaiMessage[]): string => { + return messages.map((m) => `${m.role}: ${m.content}`).join("\n"); +}; + +const getResponseForModel = ({ + model, + body, +}: { + model: string; + body: Record; +}) => { + if (model.startsWith("claude")) { + // TODO: confirm if there is supposed to be a leading space + return body.completion.trim(); + } else { + return body.choices[0].message.content; + } +}; diff --git a/src/proxy/openai.ts b/src/proxy/openai.ts index b44fe79..346550d 100644 --- a/src/proxy/openai.ts +++ b/src/proxy/openai.ts @@ -1,12 +1,8 @@ -import { Request, Response, Router } from "express"; +import { Request, Router } from "express"; import * as http from "http"; import { createProxyMiddleware } from "http-proxy-middleware"; +import { config } from "../config"; import { logger } from "../logger"; -import { - createOnProxyResHandler, - handleInternalError, - ProxyResHandlerWithBody, -} from "./common"; import { ipLimiter } from "./rate-limit"; import { addKey, @@ -14,18 +10,26 @@ import { disableStream, finalizeBody, limitOutputTokens, -} from "./rewriters"; + limitCompletions, +} from "./middleware/request"; +import { + createOnProxyResHandler, + handleInternalError, + ProxyResHandlerWithBody, +} from "./middleware/response"; const rewriteRequest = ( proxyReq: http.ClientRequest, req: Request, res: http.ServerResponse ) => { + req.api = "openai"; const rewriterPipeline = [ addKey, languageFilter, disableStream, limitOutputTokens, + limitCompletions, finalizeBody, ]; @@ -41,13 +45,19 @@ const rewriteRequest = ( const openaiResponseHandler: ProxyResHandlerWithBody = async ( _proxyRes, - _req, + req, res, body ) => { if (typeof body !== "object") { throw new Error("Expected body to be an object"); } + + if (config.promptLogging) { + const host = req.get("host"); + body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`; + } + res.status(200).json(body); }; @@ -75,6 +85,16 @@ openaiRouter.use((req, _res, next) => { }); openaiRouter.get("/v1/models", openaiProxy); openaiRouter.post("/v1/chat/completions", ipLimiter, openaiProxy); +// If a browser tries to visit a route that doesn't exist, redirect to the info +// page to help them find the right URL. +openaiRouter.get("*", (req, res, next) => { + const isBrowser = req.headers["user-agent"]?.includes("Mozilla"); + if (isBrowser) { + res.redirect("/"); + } else { + next(); + } +}); openaiRouter.use((req, res) => { logger.warn(`Blocked openai proxy request: ${req.method} ${req.path}`); res.status(404).json({ error: "Not found" }); diff --git a/src/server.ts b/src/server.ts index 7a4f629..1ca892c 100644 --- a/src/server.ts +++ b/src/server.ts @@ -8,6 +8,7 @@ import { logger } from "./logger"; import { keyPool } from "./key-management"; import { proxyRouter, rewriteTavernRequests } from "./proxy/routes"; import { handleInfoPage } from "./info-page"; +import { logQueue } from "./prompt-logging"; const PORT = config.port; @@ -69,4 +70,9 @@ app.listen(PORT, async () => { `Server listening on port ${PORT}` ); keyPool.init(); + + if (config.promptLogging) { + logger.info("Starting prompt logging..."); + logQueue.start(); + } }); diff --git a/src/types/custom.d.ts b/src/types/custom.d.ts index a384c73..4be7b28 100644 --- a/src/types/custom.d.ts +++ b/src/types/custom.d.ts @@ -5,6 +5,7 @@ declare global { namespace Express { interface Request { key?: Key; + api: "kobold" | "openai" | "anthropic"; } } }