Add docs and support for Render.com deployments (khanon/oai-reverse-proxy!9)
This commit is contained in:
parent
7f202dc9ef
commit
ec4e7a3671
10
.env.example
10
.env.example
|
@ -2,12 +2,13 @@
|
|||
# have sensible defaults. See config.ts for more details.
|
||||
|
||||
# PORT=7860
|
||||
# SERVER_TITLE=Coom Tunnel
|
||||
# MODEL_RATE_LIMIT=4
|
||||
# MAX_OUTPUT_TOKENS=300
|
||||
# LOG_LEVEL=info
|
||||
# REJECT_DISALLOWED=false
|
||||
# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
|
||||
# CHECK_KEYS=false
|
||||
# CHECK_KEYS=true
|
||||
# QUOTA_DISPLAY_MODE=full
|
||||
# QUEUE_MODE=fair
|
||||
|
||||
|
@ -23,10 +24,9 @@
|
|||
# PROMPT_LOGGING=false
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# !!! EVERYTHING BELOW IS FOR LOCAL DEVELOPMENT ONLY !!!
|
||||
# If you are deploying this to Huggingface Spaces, do not set any keys here as
|
||||
# the .env file is public and your keys will be exposed.
|
||||
# Sensitive keys should be set in the Secrets UI instead.
|
||||
# The values below are secret -- make sure they are set securely.
|
||||
# For Huggingface, set them via the Secrets section in your Space's config UI.
|
||||
# For Render, create a "secret file" called .env using the Environment tab.
|
||||
|
||||
# You can add multiple keys by separating them with a comma.
|
||||
OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
|
|
27
.replit
27
.replit
|
@ -1,27 +0,0 @@
|
|||
run = "npm run start:replit"
|
||||
hidden = [".build", ".config"]
|
||||
|
||||
[packager]
|
||||
language = "nodejs"
|
||||
|
||||
[packager.features]
|
||||
enabledForHosting = false
|
||||
packageSearch = true
|
||||
guessImports = true
|
||||
|
||||
[nix]
|
||||
channel = "stable-22_11"
|
||||
|
||||
[gitHubImport]
|
||||
requiredFiles = [".replit", "replit.nix", ".config"]
|
||||
|
||||
[languages]
|
||||
|
||||
[languages.typescript]
|
||||
pattern = "**/{*.ts,*.js,*.tsx,*.jsx}"
|
||||
|
||||
[languages.typescript.languageServer]
|
||||
start = "typescript-language-server --stdio"
|
||||
|
||||
[deployment]
|
||||
run = ["sh", "-c", "npm run start:replit"]
|
10
README.md
10
README.md
|
@ -26,14 +26,10 @@ This proxy only forwards text generation requests to the downstream service and
|
|||
If you'd like to run your own instance of this proxy, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like.
|
||||
|
||||
### Deploy to Huggingface (Recommended)
|
||||
[See here for instructions on how to deploy to a Huggingface Space.](./docs/huggingface.md)
|
||||
[See here for instructions on how to deploy to a Huggingface Space.](./docs/deploy-huggingface.md)
|
||||
|
||||
### Deploy to Repl.it (WIP)
|
||||
Still working on this. It's a bit more technical than the Huggingface option; you can give it a shot by clicking on the button below.
|
||||
|
||||
[![Run on Repl.it](https://replit.com/badge/github/nai-degen/oai-reverse-proxy)](https://replit.com/new/github/nai-degen/oai-reverse-proxy)
|
||||
|
||||
You'll need to set your secrets in Repl.it similar to the Huggingface instructions above. Currently .env files don't work properly so it only uses the default configuration.
|
||||
### Deploy to Render
|
||||
[See here for instructions on how to deploy to Render.com.](./docs/deploy-render.md)
|
||||
|
||||
## Local Development
|
||||
To run the proxy locally for development or testing, install Node.js >= 18.0.0 and follow the steps below.
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
FROM node:18-bullseye-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm install
|
||||
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
EXPOSE 7860
|
||||
|
||||
CMD [ "npm", "start" ]
|
|
@ -0,0 +1,11 @@
|
|||
FROM node:18-bullseye-slim
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git
|
||||
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
|
||||
WORKDIR /app
|
||||
RUN npm install
|
||||
COPY Dockerfile greeting.md* .env* ./
|
||||
RUN npm run build
|
||||
EXPOSE 7860
|
||||
ENV NODE_ENV=production
|
||||
CMD [ "npm", "start" ]
|
|
@ -0,0 +1,27 @@
|
|||
# syntax = docker/dockerfile:1.2
|
||||
|
||||
FROM node:18-bullseye-slim
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl
|
||||
|
||||
# Unlike Huggingface, Render can only deploy straight from a git repo and
|
||||
# doesn't allow you to create or modify arbitrary files via the web UI.
|
||||
# To use a greeting file, set `GREETING_URL` to a URL that points to a raw
|
||||
# text file containing your greeting, such as a GitHub Gist.
|
||||
|
||||
# You may need to clear the build cache if you change the greeting, otherwise
|
||||
# Render will use the cached layer from the previous build.
|
||||
|
||||
WORKDIR /app
|
||||
ARG GREETING_URL
|
||||
RUN if [ -n "$GREETING_URL" ]; then \
|
||||
curl -sL "$GREETING_URL" > greeting.md; \
|
||||
fi
|
||||
COPY package*.json greeting.md* ./
|
||||
RUN npm install
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
RUN --mount=type=secret,id=_env,dst=/etc/secrets/.env cat /etc/secrets/.env >> .env
|
||||
EXPOSE 10000
|
||||
ENV NODE_ENV=production
|
||||
CMD [ "npm", "start" ]
|
|
@ -0,0 +1,49 @@
|
|||
# Deploy to Render.com
|
||||
Render.com offers a free tier that includes 750 hours of compute time per month. This is enough to run a single proxy instance 24/7. Instances shut down after 15 minutes without traffic but start up again automatically when a request is received.
|
||||
|
||||
### 1. Create account
|
||||
- [Sign up for Render.com](https://render.com/) to create an account and access the dashboard.
|
||||
|
||||
### 2. Create a service using a Blueprint
|
||||
Render allows you to deploy and auutomatically configure a repository containing a [render.yaml](../render.yaml) file using its Blueprints feature. This is the easiest way to get started.
|
||||
|
||||
- Click the **Blueprints** tab at the top of the dashboard.
|
||||
- Click **New Blueprint Instance**.
|
||||
- Under **Public Git repository**, enter `https://gitlab.com/khanon/oai-proxy`.
|
||||
- Note that this is not the GitGud repository, but a mirror on GitLab.
|
||||
- Click **Continue**.
|
||||
- Under **Blueprint Name**, enter a name.
|
||||
- Under **Branch**, enter `main`.
|
||||
- Click **Create New Resources**.
|
||||
|
||||
The service will be created according to the instructions in the `render.yaml` file. This will take a few minutes. It may show that the build failed, which is normal as you have not yet set the required environment variables.
|
||||
|
||||
### 3. Set environment variables
|
||||
- Return to the **Dashboard** tab.
|
||||
- Click the name of the service you just created, which may show as "Deploy failed".
|
||||
- Click the **Environment** tab.
|
||||
- Click **Add Secret File**.
|
||||
- Under **Filename**, enter `.env`.
|
||||
- Under **Contents**, enter all of your environment variables, one per line, in the format `NAME=value`.
|
||||
- For example, `OPENAI_KEY=sk-abc123`.
|
||||
- Click **Save Changes**.
|
||||
|
||||
The service will automatically rebuild and deploy with the new environment variables. This will take a few minutes. The link to your deployed proxy will appear at the top of the page.
|
||||
|
||||
# Optional
|
||||
|
||||
## Updating the server
|
||||
|
||||
To update your server, go to the page for your Web Service and click **Manual Deploy** > **Deploy latest commit**. This will pull the latest version of the code and redeploy the server.
|
||||
|
||||
_If you have trouble with this, you can also try selecting **Clear build cache & deploy** instead from the same menu._
|
||||
|
||||
## Adding a greeting message
|
||||
|
||||
To show a greeting message on the Server Info page, set the `GREETING_URL` environment variable within Render to the URL of a Markdown file. This URL should point to a raw text file, not an HTML page. You can use a public GitHub Gist or GitLab Snippet for this. For example: `GREETING_URL=https://gitlab.com/-/snippets/2542011/raw/main/greeting.md`. You can change the title of the page by setting the `SERVER_TITLE` environment variable.
|
||||
|
||||
Don't set `GREETING_URL` in the `.env` secret file you created earlier; it must be set in Render's environment variables section for it to work correctly.
|
||||
|
||||
## Customizing the server
|
||||
|
||||
You can customize the server by editing the `.env` configuration you created earlier. Refer to [.env.example](../.env.example) for a list of all available configuration options. Further information can be found in the [config.ts](../src/config.ts) file.
|
|
@ -0,0 +1,10 @@
|
|||
services:
|
||||
- type: web
|
||||
name: oai-proxy
|
||||
env: docker
|
||||
repo: https://gitlab.com/khanon/oai-proxy.git
|
||||
region: oregon
|
||||
plan: free
|
||||
branch: main
|
||||
healthCheckPath: /health
|
||||
dockerfilePath: ./docker/render/Dockerfile
|
|
@ -1,9 +0,0 @@
|
|||
{ pkgs }: {
|
||||
deps = [
|
||||
pkgs.nodejs-18_x
|
||||
pkgs.yarn
|
||||
pkgs.esbuild
|
||||
pkgs.nodePackages.typescript
|
||||
pkgs.nodePackages.typescript-language-server
|
||||
];
|
||||
}
|
|
@ -16,7 +16,8 @@ export const handleInfoPage = (req: Request, res: Response) => {
|
|||
return;
|
||||
}
|
||||
|
||||
// Huggingface puts spaces behind some cloudflare ssl proxy, so `req.protocol` is `http` but the correct URL is actually `https`
|
||||
// Some load balancers/reverse proxies don't give us the right protocol in
|
||||
// the host header. Huggingface works this way, Cloudflare does not.
|
||||
const host = req.get("host");
|
||||
const isHuggingface = host?.includes("hf.space");
|
||||
const protocol = isHuggingface ? "https" : req.protocol;
|
||||
|
@ -80,12 +81,10 @@ function cacheInfoPageHtml(host: string) {
|
|||
...getQueueInformation(),
|
||||
keys: keyInfo,
|
||||
config: listConfig(),
|
||||
commitSha: process.env.COMMIT_SHA || "dev",
|
||||
build: process.env.COMMIT_SHA || "dev",
|
||||
};
|
||||
|
||||
const title = process.env.SPACE_ID
|
||||
? `${process.env.SPACE_AUTHOR_NAME} / ${process.env.SPACE_TITLE}`
|
||||
: "OAI Reverse Proxy";
|
||||
const title = getServerTitle();
|
||||
const headerHtml = buildInfoPageHeader(new showdown.Converter(), title);
|
||||
|
||||
const pageBody = `<!DOCTYPE html>
|
||||
|
@ -160,3 +159,22 @@ function getQueueInformation() {
|
|||
estimatedQueueTime: waitMs > 2000 ? waitTime : "no wait",
|
||||
};
|
||||
}
|
||||
|
||||
function getServerTitle() {
|
||||
// Use manually set title if available
|
||||
if (process.env.SERVER_TITLE) {
|
||||
return process.env.SERVER_TITLE;
|
||||
}
|
||||
|
||||
// Huggingface
|
||||
if (process.env.SPACE_ID) {
|
||||
return `${process.env.SPACE_AUTHOR_NAME} / ${process.env.SPACE_TITLE}`;
|
||||
}
|
||||
|
||||
// Render
|
||||
if (process.env.RENDER) {
|
||||
return `Render / ${process.env.RENDER_SERVICE_NAME}`;
|
||||
}
|
||||
|
||||
return "OAI Reverse Proxy";
|
||||
}
|
||||
|
|
|
@ -22,8 +22,12 @@ app.use(
|
|||
pinoHttp({
|
||||
quietReqLogger: true,
|
||||
logger,
|
||||
// SillyTavern spams the hell out of this endpoint so don't log it
|
||||
autoLogging: { ignore: (req) => req.url === "/proxy/kobold/api/v1/model" },
|
||||
autoLogging: {
|
||||
ignore: (req) => {
|
||||
const ignored = ["/proxy/kobold/api/v1/model", "/health"];
|
||||
return ignored.includes(req.url as string);
|
||||
},
|
||||
},
|
||||
redact: {
|
||||
paths: [
|
||||
"req.headers.cookie",
|
||||
|
@ -36,6 +40,8 @@ app.use(
|
|||
},
|
||||
})
|
||||
);
|
||||
|
||||
app.get("/health", (_req, res) => res.sendStatus(200));
|
||||
app.use((req, _res, next) => {
|
||||
req.startTime = Date.now();
|
||||
req.retryCount = 0;
|
||||
|
@ -46,9 +52,10 @@ app.use(
|
|||
express.json({ limit: "10mb" }),
|
||||
express.urlencoded({ extended: true, limit: "10mb" })
|
||||
);
|
||||
// TODO: this works if we're always being deployed to Huggingface but if users
|
||||
// deploy this somewhere without a load balancer then incoming requests can
|
||||
// spoof the X-Forwarded-For header and bypass the rate limiting.
|
||||
|
||||
// TODO: Detect (or support manual configuration of) whether the app is behind
|
||||
// a load balancer/reverse proxy, which is necessary to determine request IP
|
||||
// addresses correctly.
|
||||
app.set("trust proxy", true);
|
||||
|
||||
// routes
|
||||
|
@ -126,6 +133,17 @@ function registerUncaughtExceptionHandler() {
|
|||
}
|
||||
|
||||
function setGitSha() {
|
||||
// On Render, the .git directory isn't available in the docker build context
|
||||
// so we can't get the SHA directly, but they expose it as an env variable.
|
||||
if (process.env.RENDER) {
|
||||
const shaString = `${process.env.RENDER_GIT_COMMIT?.slice(0, 7)} (${
|
||||
process.env.RENDER_GIT_REPO_SLUG
|
||||
})`;
|
||||
process.env.COMMIT_SHA = shaString;
|
||||
logger.info({ sha: shaString }, "Got commit SHA via Render config.");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Huggingface seems to have changed something about how they deploy Spaces
|
||||
// and git commands fail because of some ownership issue with the .git
|
||||
|
|
|
@ -7,7 +7,7 @@ declare global {
|
|||
interface Request {
|
||||
key?: Key;
|
||||
api: "kobold" | "openai" | "anthropic";
|
||||
user: User;
|
||||
user?: User;
|
||||
isStreaming?: boolean;
|
||||
startTime: number;
|
||||
retryCount: number;
|
||||
|
|
Loading…
Reference in New Issue