diff --git a/README.md b/README.md index 024cce8..59b8be5 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,20 @@ # OAI Reverse Proxy - Reverse proxy server for various LLM APIs. ### Table of Contents -- [What is this?](#what-is-this) -- [Features](#features) -- [Usage Instructions](#usage-instructions) - - [Self-hosting](#self-hosting) - - [Huggingface (outdated, not advised)](#huggingface-outdated-not-advised) -- [Local Development](#local-development) + +* [OAI Reverse Proxy](#oai-reverse-proxy) + * [Table of Contents](#table-of-contents) + * [What is this?](#what-is-this) + * [Features](#features) + * [Usage Instructions](#usage-instructions) + * [Personal Use (single-user)](#personal-use-single-user) + * [Updating](#updating) + * [Local Development](#local-development) + * [Self-hosting](#self-hosting) + * [Building](#building) + * [Forking](#forking) + ## What is this? This project allows you to run a reverse proxy server for various LLM APIs. @@ -27,37 +33,42 @@ This project allows you to run a reverse proxy server for various LLM APIs. - [x] Simple role-based permissions - [x] Per-model token quotas - [x] Temporary user accounts -- [x] Prompt and completion logging +- [x] Event audit logging +- [x] Optional full logging of prompts and completions - [x] Abuse detection and prevention + - [x] IP address and user token model invocation rate limits + - [x] IP blacklists + - [x] Proof-of-work challenge for access by anonymous users --- ## Usage Instructions If you'd like to run your own instance of this server, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like if you know what you're doing and the service supports Node.js. +### Personal Use (single-user) +If you just want to run the proxy server to use yourself without hosting it for others: +1. Install [Node.js](https://nodejs.org/en/download/) >= 18.0.0 +2. Clone this repository +3. Create a `.env` file in the root of the project and add your API keys. See the [.env.example](./.env.example) file for an example. +4. Install dependencies with `npm install` +5. Run `npm run build` +6. Run `npm start` + +#### Updating +You must re-run `npm install` and `npm run build` whenever you pull new changes from the repository. + +#### Local Development +Use `npm run start:dev` to run the proxy in development mode with watch mode enabled. Use `npm run type-check` to run the type checker across the project. + ### Self-hosting -[See here for instructions on how to self-host the application on your own VPS or local machine.](./docs/self-hosting.md) +[See here for instructions on how to self-host the application on your own VPS or local machine and expose it to the internet for others to use.](./docs/self-hosting.md) **Ensure you set the `TRUSTED_PROXIES` environment variable according to your deployment.** Refer to [.env.example](./.env.example) and [config.ts](./src/config.ts) for more information. -### Huggingface (outdated, not advised) -[See here for instructions on how to deploy to a Huggingface Space.](./docs/deploy-huggingface.md) - -## Local Development -To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below. - -1. Clone the repo -2. Install dependencies with `npm install` -3. Create a `.env` file in the root of the project and add your API keys. See the [.env.example](./.env.example) file for an example. -4. Start the server in development mode with `npm run start:dev`. - -You can also use `npm run start:dev:tsc` to enable project-wide type checking at the cost of slower startup times. `npm run type-check` can be used to run type checking without starting the server. - ## Building -To build the project, run `npm run build`. This will compile the TypeScript code to JavaScript and output it to the `build` directory. +To build the project, run `npm run build`. This will compile the TypeScript code to JavaScript and output it to the `build` directory. You should run this whenever you pull new changes from the repository. Note that if you are trying to build the server on a very memory-constrained (<= 1GB) VPS, you may need to run the build with `NODE_OPTIONS=--max_old_space_size=2048 npm run build` to avoid running out of memory during the build process, assuming you have swap enabled. The application itself should run fine on a 512MB VPS for most reasonable traffic levels. ## Forking - If you are forking the repository on GitGud, you may wish to disable GitLab CI/CD or you will be spammed with emails about failed builds due not having any CI runners. You can do this by going to *Settings > General > Visibility, project features, permissions* and then disabling the "CI/CD" feature. diff --git a/package.json b/package.json index 7b9cc48..2718e0e 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,6 @@ "start": "node --trace-deprecation --trace-warnings build/server.js", "start:dev": "nodemon --watch src --exec ts-node --transpile-only src/server.ts", "start:debug": "ts-node --inspect --transpile-only src/server.ts", - "start:replit": "tsc && node build/server.js", "start:watch": "nodemon --require source-map-support/register build/server.js", "type-check": "tsc --noEmit" }, diff --git a/src/proxy/middleware/request/preprocessors/language-filter.ts b/src/proxy/middleware/request/preprocessors/language-filter.ts index 345aa75..05cb20e 100644 --- a/src/proxy/middleware/request/preprocessors/language-filter.ts +++ b/src/proxy/middleware/request/preprocessors/language-filter.ts @@ -1,4 +1,5 @@ import { Request } from "express"; +import { z } from "zod"; import { config } from "../../../../config"; import { assertNever } from "../../../../shared/utils"; import { RequestPreprocessor } from "../index"; @@ -8,6 +9,7 @@ import { OpenAIChatMessage, flattenAnthropicMessages, } from "../../../../shared/api-schemas"; +import { GoogleAIV1GenerateContentSchema } from "../../../../shared/api-schemas/google-ai"; const rejectedClients = new Map(); @@ -50,6 +52,10 @@ export const languageFilter: RequestPreprocessor = async (req) => { } }; +/* +TODO: this is not type safe and does not raise errors if request body zod schema +is changed. +*/ function getPromptFromRequest(req: Request) { const service = req.outboundApi; const body = req.body; @@ -75,8 +81,13 @@ function getPromptFromRequest(req: Request) { case "openai-image": case "mistral-text": return body.prompt; - case "google-ai": - return body.prompt.text; + case "google-ai": { + const b = body as z.infer; + return [ + b.systemInstruction?.parts.map((p) => p.text), + ...b.contents.flatMap((c) => c.parts.map((p) => p.text)), + ].join("\n"); + } default: assertNever(service); }