Add docs and support for Render.com deployments (khanon/oai-reverse-proxy!9)
This commit is contained in:
parent
7f202dc9ef
commit
ec4e7a3671
10
.env.example
10
.env.example
|
@ -2,12 +2,13 @@
|
||||||
# have sensible defaults. See config.ts for more details.
|
# have sensible defaults. See config.ts for more details.
|
||||||
|
|
||||||
# PORT=7860
|
# PORT=7860
|
||||||
|
# SERVER_TITLE=Coom Tunnel
|
||||||
# MODEL_RATE_LIMIT=4
|
# MODEL_RATE_LIMIT=4
|
||||||
# MAX_OUTPUT_TOKENS=300
|
# MAX_OUTPUT_TOKENS=300
|
||||||
# LOG_LEVEL=info
|
# LOG_LEVEL=info
|
||||||
# REJECT_DISALLOWED=false
|
# REJECT_DISALLOWED=false
|
||||||
# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
|
# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
|
||||||
# CHECK_KEYS=false
|
# CHECK_KEYS=true
|
||||||
# QUOTA_DISPLAY_MODE=full
|
# QUOTA_DISPLAY_MODE=full
|
||||||
# QUEUE_MODE=fair
|
# QUEUE_MODE=fair
|
||||||
|
|
||||||
|
@ -23,10 +24,9 @@
|
||||||
# PROMPT_LOGGING=false
|
# PROMPT_LOGGING=false
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# !!! EVERYTHING BELOW IS FOR LOCAL DEVELOPMENT ONLY !!!
|
# The values below are secret -- make sure they are set securely.
|
||||||
# If you are deploying this to Huggingface Spaces, do not set any keys here as
|
# For Huggingface, set them via the Secrets section in your Space's config UI.
|
||||||
# the .env file is public and your keys will be exposed.
|
# For Render, create a "secret file" called .env using the Environment tab.
|
||||||
# Sensitive keys should be set in the Secrets UI instead.
|
|
||||||
|
|
||||||
# You can add multiple keys by separating them with a comma.
|
# You can add multiple keys by separating them with a comma.
|
||||||
OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||||
|
|
27
.replit
27
.replit
|
@ -1,27 +0,0 @@
|
||||||
run = "npm run start:replit"
|
|
||||||
hidden = [".build", ".config"]
|
|
||||||
|
|
||||||
[packager]
|
|
||||||
language = "nodejs"
|
|
||||||
|
|
||||||
[packager.features]
|
|
||||||
enabledForHosting = false
|
|
||||||
packageSearch = true
|
|
||||||
guessImports = true
|
|
||||||
|
|
||||||
[nix]
|
|
||||||
channel = "stable-22_11"
|
|
||||||
|
|
||||||
[gitHubImport]
|
|
||||||
requiredFiles = [".replit", "replit.nix", ".config"]
|
|
||||||
|
|
||||||
[languages]
|
|
||||||
|
|
||||||
[languages.typescript]
|
|
||||||
pattern = "**/{*.ts,*.js,*.tsx,*.jsx}"
|
|
||||||
|
|
||||||
[languages.typescript.languageServer]
|
|
||||||
start = "typescript-language-server --stdio"
|
|
||||||
|
|
||||||
[deployment]
|
|
||||||
run = ["sh", "-c", "npm run start:replit"]
|
|
10
README.md
10
README.md
|
@ -26,14 +26,10 @@ This proxy only forwards text generation requests to the downstream service and
|
||||||
If you'd like to run your own instance of this proxy, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like.
|
If you'd like to run your own instance of this proxy, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like.
|
||||||
|
|
||||||
### Deploy to Huggingface (Recommended)
|
### Deploy to Huggingface (Recommended)
|
||||||
[See here for instructions on how to deploy to a Huggingface Space.](./docs/huggingface.md)
|
[See here for instructions on how to deploy to a Huggingface Space.](./docs/deploy-huggingface.md)
|
||||||
|
|
||||||
### Deploy to Repl.it (WIP)
|
### Deploy to Render
|
||||||
Still working on this. It's a bit more technical than the Huggingface option; you can give it a shot by clicking on the button below.
|
[See here for instructions on how to deploy to Render.com.](./docs/deploy-render.md)
|
||||||
|
|
||||||
[![Run on Repl.it](https://replit.com/badge/github/nai-degen/oai-reverse-proxy)](https://replit.com/new/github/nai-degen/oai-reverse-proxy)
|
|
||||||
|
|
||||||
You'll need to set your secrets in Repl.it similar to the Huggingface instructions above. Currently .env files don't work properly so it only uses the default configuration.
|
|
||||||
|
|
||||||
## Local Development
|
## Local Development
|
||||||
To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below.
|
To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below.
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
FROM node:18-bullseye-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY package*.json ./
|
|
||||||
RUN npm install
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
RUN npm run build
|
|
||||||
|
|
||||||
EXPOSE 7860
|
|
||||||
|
|
||||||
CMD [ "npm", "start" ]
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
FROM node:18-bullseye-slim
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git
|
||||||
|
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
|
||||||
|
WORKDIR /app
|
||||||
|
RUN npm install
|
||||||
|
COPY Dockerfile greeting.md* .env* ./
|
||||||
|
RUN npm run build
|
||||||
|
EXPOSE 7860
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
CMD [ "npm", "start" ]
|
|
@ -0,0 +1,27 @@
|
||||||
|
# syntax = docker/dockerfile:1.2
|
||||||
|
|
||||||
|
FROM node:18-bullseye-slim
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y curl
|
||||||
|
|
||||||
|
# Unlike Huggingface, Render can only deploy straight from a git repo and
|
||||||
|
# doesn't allow you to create or modify arbitrary files via the web UI.
|
||||||
|
# To use a greeting file, set `GREETING_URL` to a URL that points to a raw
|
||||||
|
# text file containing your greeting, such as a GitHub Gist.
|
||||||
|
|
||||||
|
# You may need to clear the build cache if you change the greeting, otherwise
|
||||||
|
# Render will use the cached layer from the previous build.
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
ARG GREETING_URL
|
||||||
|
RUN if [ -n "$GREETING_URL" ]; then \
|
||||||
|
curl -sL "$GREETING_URL" > greeting.md; \
|
||||||
|
fi
|
||||||
|
COPY package*.json greeting.md* ./
|
||||||
|
RUN npm install
|
||||||
|
COPY . .
|
||||||
|
RUN npm run build
|
||||||
|
RUN --mount=type=secret,id=_env,dst=/etc/secrets/.env cat /etc/secrets/.env >> .env
|
||||||
|
EXPOSE 10000
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
CMD [ "npm", "start" ]
|
|
@ -0,0 +1,49 @@
|
||||||
|
# Deploy to Render.com
|
||||||
|
Render.com offers a free tier that includes 750 hours of compute time per month. This is enough to run a single proxy instance 24/7. Instances shut down after 15 minutes without traffic but start up again automatically when a request is received.
|
||||||
|
|
||||||
|
### 1. Create account
|
||||||
|
- [Sign up for Render.com](https://render.com/) to create an account and access the dashboard.
|
||||||
|
|
||||||
|
### 2. Create a service using a Blueprint
|
||||||
|
Render allows you to deploy and auutomatically configure a repository containing a [render.yaml](../render.yaml) file using its Blueprints feature. This is the easiest way to get started.
|
||||||
|
|
||||||
|
- Click the **Blueprints** tab at the top of the dashboard.
|
||||||
|
- Click **New Blueprint Instance**.
|
||||||
|
- Under **Public Git repository**, enter `https://gitlab.com/khanon/oai-proxy`.
|
||||||
|
- Note that this is not the GitGud repository, but a mirror on GitLab.
|
||||||
|
- Click **Continue**.
|
||||||
|
- Under **Blueprint Name**, enter a name.
|
||||||
|
- Under **Branch**, enter `main`.
|
||||||
|
- Click **Create New Resources**.
|
||||||
|
|
||||||
|
The service will be created according to the instructions in the `render.yaml` file. This will take a few minutes. It may show that the build failed, which is normal as you have not yet set the required environment variables.
|
||||||
|
|
||||||
|
### 3. Set environment variables
|
||||||
|
- Return to the **Dashboard** tab.
|
||||||
|
- Click the name of the service you just created, which may show as "Deploy failed".
|
||||||
|
- Click the **Environment** tab.
|
||||||
|
- Click **Add Secret File**.
|
||||||
|
- Under **Filename**, enter `.env`.
|
||||||
|
- Under **Contents**, enter all of your environment variables, one per line, in the format `NAME=value`.
|
||||||
|
- For example, `OPENAI_KEY=sk-abc123`.
|
||||||
|
- Click **Save Changes**.
|
||||||
|
|
||||||
|
The service will automatically rebuild and deploy with the new environment variables. This will take a few minutes. The link to your deployed proxy will appear at the top of the page.
|
||||||
|
|
||||||
|
# Optional
|
||||||
|
|
||||||
|
## Updating the server
|
||||||
|
|
||||||
|
To update your server, go to the page for your Web Service and click **Manual Deploy** > **Deploy latest commit**. This will pull the latest version of the code and redeploy the server.
|
||||||
|
|
||||||
|
_If you have trouble with this, you can also try selecting **Clear build cache & deploy** instead from the same menu._
|
||||||
|
|
||||||
|
## Adding a greeting message
|
||||||
|
|
||||||
|
To show a greeting message on the Server Info page, set the `GREETING_URL` environment variable within Render to the URL of a Markdown file. This URL should point to a raw text file, not an HTML page. You can use a public GitHub Gist or GitLab Snippet for this. For example: `GREETING_URL=https://gitlab.com/-/snippets/2542011/raw/main/greeting.md`. You can change the title of the page by setting the `SERVER_TITLE` environment variable.
|
||||||
|
|
||||||
|
Don't set `GREETING_URL` in the `.env` secret file you created earlier; it must be set in Render's environment variables section for it to work correctly.
|
||||||
|
|
||||||
|
## Customizing the server
|
||||||
|
|
||||||
|
You can customize the server by editing the `.env` configuration you created earlier. Refer to [.env.example](../.env.example) for a list of all available configuration options. Further information can be found in the [config.ts](../src/config.ts) file.
|
|
@ -0,0 +1,10 @@
|
||||||
|
services:
|
||||||
|
- type: web
|
||||||
|
name: oai-proxy
|
||||||
|
env: docker
|
||||||
|
repo: https://gitlab.com/khanon/oai-proxy.git
|
||||||
|
region: oregon
|
||||||
|
plan: free
|
||||||
|
branch: main
|
||||||
|
healthCheckPath: /health
|
||||||
|
dockerfilePath: ./docker/render/Dockerfile
|
|
@ -1,9 +0,0 @@
|
||||||
{ pkgs }: {
|
|
||||||
deps = [
|
|
||||||
pkgs.nodejs-18_x
|
|
||||||
pkgs.yarn
|
|
||||||
pkgs.esbuild
|
|
||||||
pkgs.nodePackages.typescript
|
|
||||||
pkgs.nodePackages.typescript-language-server
|
|
||||||
];
|
|
||||||
}
|
|
|
@ -16,7 +16,8 @@ export const handleInfoPage = (req: Request, res: Response) => {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Huggingface puts spaces behind some cloudflare ssl proxy, so `req.protocol` is `http` but the correct URL is actually `https`
|
// Some load balancers/reverse proxies don't give us the right protocol in
|
||||||
|
// the host header. Huggingface works this way, Cloudflare does not.
|
||||||
const host = req.get("host");
|
const host = req.get("host");
|
||||||
const isHuggingface = host?.includes("hf.space");
|
const isHuggingface = host?.includes("hf.space");
|
||||||
const protocol = isHuggingface ? "https" : req.protocol;
|
const protocol = isHuggingface ? "https" : req.protocol;
|
||||||
|
@ -80,12 +81,10 @@ function cacheInfoPageHtml(host: string) {
|
||||||
...getQueueInformation(),
|
...getQueueInformation(),
|
||||||
keys: keyInfo,
|
keys: keyInfo,
|
||||||
config: listConfig(),
|
config: listConfig(),
|
||||||
commitSha: process.env.COMMIT_SHA || "dev",
|
build: process.env.COMMIT_SHA || "dev",
|
||||||
};
|
};
|
||||||
|
|
||||||
const title = process.env.SPACE_ID
|
const title = getServerTitle();
|
||||||
? `${process.env.SPACE_AUTHOR_NAME} / ${process.env.SPACE_TITLE}`
|
|
||||||
: "OAI Reverse Proxy";
|
|
||||||
const headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
|
const headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
|
||||||
|
|
||||||
const pageBody = `<!DOCTYPE html>
|
const pageBody = `<!DOCTYPE html>
|
||||||
|
@ -160,3 +159,22 @@ function getQueueInformation() {
|
||||||
estimatedQueueTime: waitMs > 2000 ? waitTime : "no wait",
|
estimatedQueueTime: waitMs > 2000 ? waitTime : "no wait",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getServerTitle() {
|
||||||
|
// Use manually set title if available
|
||||||
|
if (process.env.SERVER_TITLE) {
|
||||||
|
return process.env.SERVER_TITLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Huggingface
|
||||||
|
if (process.env.SPACE_ID) {
|
||||||
|
return `${process.env.SPACE_AUTHOR_NAME} / ${process.env.SPACE_TITLE}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Render
|
||||||
|
if (process.env.RENDER) {
|
||||||
|
return `Render / ${process.env.RENDER_SERVICE_NAME}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return "OAI Reverse Proxy";
|
||||||
|
}
|
||||||
|
|
|
@ -22,8 +22,12 @@ app.use(
|
||||||
pinoHttp({
|
pinoHttp({
|
||||||
quietReqLogger: true,
|
quietReqLogger: true,
|
||||||
logger,
|
logger,
|
||||||
// SillyTavern spams the hell out of this endpoint so don't log it
|
autoLogging: {
|
||||||
autoLogging: { ignore: (req) => req.url === "/proxy/kobold/api/v1/model" },
|
ignore: (req) => {
|
||||||
|
const ignored = ["/proxy/kobold/api/v1/model", "/health"];
|
||||||
|
return ignored.includes(req.url as string);
|
||||||
|
},
|
||||||
|
},
|
||||||
redact: {
|
redact: {
|
||||||
paths: [
|
paths: [
|
||||||
"req.headers.cookie",
|
"req.headers.cookie",
|
||||||
|
@ -36,6 +40,8 @@ app.use(
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
app.get("/health", (_req, res) => res.sendStatus(200));
|
||||||
app.use((req, _res, next) => {
|
app.use((req, _res, next) => {
|
||||||
req.startTime = Date.now();
|
req.startTime = Date.now();
|
||||||
req.retryCount = 0;
|
req.retryCount = 0;
|
||||||
|
@ -46,9 +52,10 @@ app.use(
|
||||||
express.json({ limit: "10mb" }),
|
express.json({ limit: "10mb" }),
|
||||||
express.urlencoded({ extended: true, limit: "10mb" })
|
express.urlencoded({ extended: true, limit: "10mb" })
|
||||||
);
|
);
|
||||||
// TODO: this works if we're always being deployed to Huggingface but if users
|
|
||||||
// deploy this somewhere without a load balancer then incoming requests can
|
// TODO: Detect (or support manual configuration of) whether the app is behind
|
||||||
// spoof the X-Forwarded-For header and bypass the rate limiting.
|
// a load balancer/reverse proxy, which is necessary to determine request IP
|
||||||
|
// addresses correctly.
|
||||||
app.set("trust proxy", true);
|
app.set("trust proxy", true);
|
||||||
|
|
||||||
// routes
|
// routes
|
||||||
|
@ -126,6 +133,17 @@ function registerUncaughtExceptionHandler() {
|
||||||
}
|
}
|
||||||
|
|
||||||
function setGitSha() {
|
function setGitSha() {
|
||||||
|
// On Render, the .git directory isn't available in the docker build context
|
||||||
|
// so we can't get the SHA directly, but they expose it as an env variable.
|
||||||
|
if (process.env.RENDER) {
|
||||||
|
const shaString = `${process.env.RENDER_GIT_COMMIT?.slice(0, 7)} (${
|
||||||
|
process.env.RENDER_GIT_REPO_SLUG
|
||||||
|
})`;
|
||||||
|
process.env.COMMIT_SHA = shaString;
|
||||||
|
logger.info({ sha: shaString }, "Got commit SHA via Render config.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Huggingface seems to have changed something about how they deploy Spaces
|
// Huggingface seems to have changed something about how they deploy Spaces
|
||||||
// and git commands fail because of some ownership issue with the .git
|
// and git commands fail because of some ownership issue with the .git
|
||||||
|
|
|
@ -7,7 +7,7 @@ declare global {
|
||||||
interface Request {
|
interface Request {
|
||||||
key?: Key;
|
key?: Key;
|
||||||
api: "kobold" | "openai" | "anthropic";
|
api: "kobold" | "openai" | "anthropic";
|
||||||
user: User;
|
user?: User;
|
||||||
isStreaming?: boolean;
|
isStreaming?: boolean;
|
||||||
startTime: number;
|
startTime: number;
|
||||||
retryCount: number;
|
retryCount: number;
|
||||||
|
|
Loading…
Reference in New Issue