Add docs and support for Render.com deployments (khanon/oai-reverse-proxy!9)

This commit is contained in:
nai-degen 2023-05-15 21:47:30 +00:00
parent 7f202dc9ef
commit ec4e7a3671
13 changed files with 152 additions and 72 deletions

View File

@ -2,12 +2,13 @@
# have sensible defaults. See config.ts for more details. # have sensible defaults. See config.ts for more details.
# PORT=7860 # PORT=7860
# SERVER_TITLE=Coom Tunnel
# MODEL_RATE_LIMIT=4 # MODEL_RATE_LIMIT=4
# MAX_OUTPUT_TOKENS=300 # MAX_OUTPUT_TOKENS=300
# LOG_LEVEL=info # LOG_LEVEL=info
# REJECT_DISALLOWED=false # REJECT_DISALLOWED=false
# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy." # REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
# CHECK_KEYS=false # CHECK_KEYS=true
# QUOTA_DISPLAY_MODE=full # QUOTA_DISPLAY_MODE=full
# QUEUE_MODE=fair # QUEUE_MODE=fair
@ -23,10 +24,9 @@
# PROMPT_LOGGING=false # PROMPT_LOGGING=false
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# !!! EVERYTHING BELOW IS FOR LOCAL DEVELOPMENT ONLY !!! # The values below are secret -- make sure they are set securely.
# If you are deploying this to Huggingface Spaces, do not set any keys here as # For Huggingface, set them via the Secrets section in your Space's config UI.
# the .env file is public and your keys will be exposed. # For Render, create a "secret file" called .env using the Environment tab.
# Sensitive keys should be set in the Secrets UI instead.
# You can add multiple keys by separating them with a comma. # You can add multiple keys by separating them with a comma.
OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

27
.replit
View File

@ -1,27 +0,0 @@
run = "npm run start:replit"
hidden = [".build", ".config"]
[packager]
language = "nodejs"
[packager.features]
enabledForHosting = false
packageSearch = true
guessImports = true
[nix]
channel = "stable-22_11"
[gitHubImport]
requiredFiles = [".replit", "replit.nix", ".config"]
[languages]
[languages.typescript]
pattern = "**/{*.ts,*.js,*.tsx,*.jsx}"
[languages.typescript.languageServer]
start = "typescript-language-server --stdio"
[deployment]
run = ["sh", "-c", "npm run start:replit"]

View File

@ -26,14 +26,10 @@ This proxy only forwards text generation requests to the downstream service and
If you'd like to run your own instance of this proxy, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like. If you'd like to run your own instance of this proxy, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like.
### Deploy to Huggingface (Recommended) ### Deploy to Huggingface (Recommended)
[See here for instructions on how to deploy to a Huggingface Space.](./docs/huggingface.md) [See here for instructions on how to deploy to a Huggingface Space.](./docs/deploy-huggingface.md)
### Deploy to Repl.it (WIP) ### Deploy to Render
Still working on this. It's a bit more technical than the Huggingface option; you can give it a shot by clicking on the button below. [See here for instructions on how to deploy to Render.com.](./docs/deploy-render.md)
[![Run on Repl.it](https://replit.com/badge/github/nai-degen/oai-reverse-proxy)](https://replit.com/new/github/nai-degen/oai-reverse-proxy)
You'll need to set your secrets in Repl.it similar to the Huggingface instructions above. Currently .env files don't work properly so it only uses the default configuration.
## Local Development ## Local Development
To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below. To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below.

View File

@ -1,13 +0,0 @@
FROM node:18-bullseye-slim
WORKDIR /app
COPY package*.json ./
RUN npm install
COPY . .
RUN npm run build
EXPOSE 7860
CMD [ "npm", "start" ]

View File

@ -0,0 +1,11 @@
FROM node:18-bullseye-slim
RUN apt-get update && \
apt-get install -y git
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
WORKDIR /app
RUN npm install
COPY Dockerfile greeting.md* .env* ./
RUN npm run build
EXPOSE 7860
ENV NODE_ENV=production
CMD [ "npm", "start" ]

27
docker/render/Dockerfile Normal file
View File

@ -0,0 +1,27 @@
# syntax = docker/dockerfile:1.2
FROM node:18-bullseye-slim
RUN apt-get update && \
apt-get install -y curl
# Unlike Huggingface, Render can only deploy straight from a git repo and
# doesn't allow you to create or modify arbitrary files via the web UI.
# To use a greeting file, set `GREETING_URL` to a URL that points to a raw
# text file containing your greeting, such as a GitHub Gist.
# You may need to clear the build cache if you change the greeting, otherwise
# Render will use the cached layer from the previous build.
WORKDIR /app
ARG GREETING_URL
RUN if [ -n "$GREETING_URL" ]; then \
curl -sL "$GREETING_URL" > greeting.md; \
fi
COPY package*.json greeting.md* ./
RUN npm install
COPY . .
RUN npm run build
RUN --mount=type=secret,id=_env,dst=/etc/secrets/.env cat /etc/secrets/.env >> .env
EXPOSE 10000
ENV NODE_ENV=production
CMD [ "npm", "start" ]

49
docs/deploy-render.md Normal file
View File

@ -0,0 +1,49 @@
# Deploy to Render.com
Render.com offers a free tier that includes 750 hours of compute time per month. This is enough to run a single proxy instance 24/7. Instances shut down after 15 minutes without traffic but start up again automatically when a request is received.
### 1. Create account
- [Sign up for Render.com](https://render.com/) to create an account and access the dashboard.
### 2. Create a service using a Blueprint
Render allows you to deploy and auutomatically configure a repository containing a [render.yaml](../render.yaml) file using its Blueprints feature. This is the easiest way to get started.
- Click the **Blueprints** tab at the top of the dashboard.
- Click **New Blueprint Instance**.
- Under **Public Git repository**, enter `https://gitlab.com/khanon/oai-proxy`.
- Note that this is not the GitGud repository, but a mirror on GitLab.
- Click **Continue**.
- Under **Blueprint Name**, enter a name.
- Under **Branch**, enter `main`.
- Click **Create New Resources**.
The service will be created according to the instructions in the `render.yaml` file. This will take a few minutes. It may show that the build failed, which is normal as you have not yet set the required environment variables.
### 3. Set environment variables
- Return to the **Dashboard** tab.
- Click the name of the service you just created, which may show as "Deploy failed".
- Click the **Environment** tab.
- Click **Add Secret File**.
- Under **Filename**, enter `.env`.
- Under **Contents**, enter all of your environment variables, one per line, in the format `NAME=value`.
- For example, `OPENAI_KEY=sk-abc123`.
- Click **Save Changes**.
The service will automatically rebuild and deploy with the new environment variables. This will take a few minutes. The link to your deployed proxy will appear at the top of the page.
# Optional
## Updating the server
To update your server, go to the page for your Web Service and click **Manual Deploy** > **Deploy latest commit**. This will pull the latest version of the code and redeploy the server.
_If you have trouble with this, you can also try selecting **Clear build cache & deploy** instead from the same menu._
## Adding a greeting message
To show a greeting message on the Server Info page, set the `GREETING_URL` environment variable within Render to the URL of a Markdown file. This URL should point to a raw text file, not an HTML page. You can use a public GitHub Gist or GitLab Snippet for this. For example: `GREETING_URL=https://gitlab.com/-/snippets/2542011/raw/main/greeting.md`. You can change the title of the page by setting the `SERVER_TITLE` environment variable.
Don't set `GREETING_URL` in the `.env` secret file you created earlier; it must be set in Render's environment variables section for it to work correctly.
## Customizing the server
You can customize the server by editing the `.env` configuration you created earlier. Refer to [.env.example](../.env.example) for a list of all available configuration options. Further information can be found in the [config.ts](../src/config.ts) file.

10
render.yaml Normal file
View File

@ -0,0 +1,10 @@
services:
- type: web
name: oai-proxy
env: docker
repo: https://gitlab.com/khanon/oai-proxy.git
region: oregon
plan: free
branch: main
healthCheckPath: /health
dockerfilePath: ./docker/render/Dockerfile

View File

@ -1,9 +0,0 @@
{ pkgs }: {
deps = [
pkgs.nodejs-18_x
pkgs.yarn
pkgs.esbuild
pkgs.nodePackages.typescript
pkgs.nodePackages.typescript-language-server
];
}

View File

@ -16,7 +16,8 @@ export const handleInfoPage = (req: Request, res: Response) => {
return; return;
} }
// Huggingface puts spaces behind some cloudflare ssl proxy, so `req.protocol` is `http` but the correct URL is actually `https` // Some load balancers/reverse proxies don't give us the right protocol in
// the host header. Huggingface works this way, Cloudflare does not.
const host = req.get("host"); const host = req.get("host");
const isHuggingface = host?.includes("hf.space"); const isHuggingface = host?.includes("hf.space");
const protocol = isHuggingface ? "https" : req.protocol; const protocol = isHuggingface ? "https" : req.protocol;
@ -80,12 +81,10 @@ function cacheInfoPageHtml(host: string) {
...getQueueInformation(), ...getQueueInformation(),
keys: keyInfo, keys: keyInfo,
config: listConfig(), config: listConfig(),
commitSha: process.env.COMMIT_SHA || "dev", build: process.env.COMMIT_SHA || "dev",
}; };
const title = process.env.SPACE_ID const title = getServerTitle();
? `${process.env.SPACE_AUTHOR_NAME} / ${process.env.SPACE_TITLE}`
: "OAI Reverse Proxy";
const headerHtml = buildInfoPageHeader(new showdown.Converter(), title); const headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
const pageBody = `<!DOCTYPE html> const pageBody = `<!DOCTYPE html>
@ -160,3 +159,22 @@ function getQueueInformation() {
estimatedQueueTime: waitMs > 2000 ? waitTime : "no wait", estimatedQueueTime: waitMs > 2000 ? waitTime : "no wait",
}; };
} }
function getServerTitle() {
// Use manually set title if available
if (process.env.SERVER_TITLE) {
return process.env.SERVER_TITLE;
}
// Huggingface
if (process.env.SPACE_ID) {
return `${process.env.SPACE_AUTHOR_NAME} / ${process.env.SPACE_TITLE}`;
}
// Render
if (process.env.RENDER) {
return `Render / ${process.env.RENDER_SERVICE_NAME}`;
}
return "OAI Reverse Proxy";
}

View File

@ -22,8 +22,12 @@ app.use(
pinoHttp({ pinoHttp({
quietReqLogger: true, quietReqLogger: true,
logger, logger,
// SillyTavern spams the hell out of this endpoint so don't log it autoLogging: {
autoLogging: { ignore: (req) => req.url === "/proxy/kobold/api/v1/model" }, ignore: (req) => {
const ignored = ["/proxy/kobold/api/v1/model", "/health"];
return ignored.includes(req.url as string);
},
},
redact: { redact: {
paths: [ paths: [
"req.headers.cookie", "req.headers.cookie",
@ -36,6 +40,8 @@ app.use(
}, },
}) })
); );
app.get("/health", (_req, res) => res.sendStatus(200));
app.use((req, _res, next) => { app.use((req, _res, next) => {
req.startTime = Date.now(); req.startTime = Date.now();
req.retryCount = 0; req.retryCount = 0;
@ -46,9 +52,10 @@ app.use(
express.json({ limit: "10mb" }), express.json({ limit: "10mb" }),
express.urlencoded({ extended: true, limit: "10mb" }) express.urlencoded({ extended: true, limit: "10mb" })
); );
// TODO: this works if we're always being deployed to Huggingface but if users
// deploy this somewhere without a load balancer then incoming requests can // TODO: Detect (or support manual configuration of) whether the app is behind
// spoof the X-Forwarded-For header and bypass the rate limiting. // a load balancer/reverse proxy, which is necessary to determine request IP
// addresses correctly.
app.set("trust proxy", true); app.set("trust proxy", true);
// routes // routes
@ -126,6 +133,17 @@ function registerUncaughtExceptionHandler() {
} }
function setGitSha() { function setGitSha() {
// On Render, the .git directory isn't available in the docker build context
// so we can't get the SHA directly, but they expose it as an env variable.
if (process.env.RENDER) {
const shaString = `${process.env.RENDER_GIT_COMMIT?.slice(0, 7)} (${
process.env.RENDER_GIT_REPO_SLUG
})`;
process.env.COMMIT_SHA = shaString;
logger.info({ sha: shaString }, "Got commit SHA via Render config.");
return;
}
try { try {
// Huggingface seems to have changed something about how they deploy Spaces // Huggingface seems to have changed something about how they deploy Spaces
// and git commands fail because of some ownership issue with the .git // and git commands fail because of some ownership issue with the .git

View File

@ -7,7 +7,7 @@ declare global {
interface Request { interface Request {
key?: Key; key?: Key;
api: "kobold" | "openai" | "anthropic"; api: "kobold" | "openai" | "anthropic";
user: User; user?: User;
isStreaming?: boolean; isStreaming?: boolean;
startTime: number; startTime: number;
retryCount: number; retryCount: number;