OpenAI DALL-E Image Generation (khanon/oai-reverse-proxy!52)

This commit is contained in:
khanon 2023-11-14 05:41:19 +00:00
parent 3ea23760c3
commit 20c064394a
56 changed files with 1401 additions and 305 deletions

View File

@ -11,8 +11,10 @@
# The title displayed on the info page. # The title displayed on the info page.
# SERVER_TITLE=Coom Tunnel # SERVER_TITLE=Coom Tunnel
# Model requests allowed per minute per user. # Text model requests allowed per minute per user.
# MODEL_RATE_LIMIT=4 # TEXT_MODEL_RATE_LIMIT=4
# Image model requests allowed per minute per user.
# IMAGE_MODEL_RATE_LIMIT=2
# Max number of context tokens a user can request at once. # Max number of context tokens a user can request at once.
# Increase this if your proxy allow GPT 32k or 128k context # Increase this if your proxy allow GPT 32k or 128k context
@ -31,10 +33,11 @@
# CHECK_KEYS=true # CHECK_KEYS=true
# Which model types users are allowed to access. # Which model types users are allowed to access.
# If you want to restrict access to certain models, uncomment the line below and list only the models you want to allow, # The following model families are recognized:
# separated by commas. By default, all models are allowed. The following model families are recognized: # turbo | gpt4 | gpt4-32k | gpt4-turbo | dall-e | claude | bison | aws-claude
# turbo | gpt4 | gpt4-32k | gpt4-turbo | claude | bison | aws-claude # By default, all models are allowed except for 'dall-e'. To allow DALL-E image
# ALLOWED_MODEL_FAMILIES=turbo,gpt4-turbo,aws-claude # generation, uncomment the line below and add 'dall-e' to the list.
# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,claude,bison,aws-claude
# URLs from which requests will be blocked. # URLs from which requests will be blocked.
# BLOCKED_ORIGINS=reddit.com,9gag.com # BLOCKED_ORIGINS=reddit.com,9gag.com
@ -82,10 +85,18 @@
# ALLOW_NICKNAME_CHANGES=true # ALLOW_NICKNAME_CHANGES=true
# Default token quotas for each model family. (0 for unlimited) # Default token quotas for each model family. (0 for unlimited)
# DALL-E "tokens" are counted at a rate of 100000 tokens per US$1.00 generated,
# which is similar to the cost of GPT-4 Turbo.
# DALL-E 3 costs around US$0.10 per image (10000 tokens).
# See `docs/dall-e-configuration.md` for more information.
# TOKEN_QUOTA_TURBO=0 # TOKEN_QUOTA_TURBO=0
# TOKEN_QUOTA_GPT4=0 # TOKEN_QUOTA_GPT4=0
# TOKEN_QUOTA_GPT4_32K=0 # TOKEN_QUOTA_GPT4_32K=0
# TOKEN_QUOTA_GPT4_TURBO=0
# TOKEN_QUOTA_DALL_E=0
# TOKEN_QUOTA_CLAUDE=0 # TOKEN_QUOTA_CLAUDE=0
# TOKEN_QUOTA_BISON=0
# TOKEN_QUOTA_AWS_CLAUDE=0
# How often to refresh token quotas. (hourly | daily) # How often to refresh token quotas. (hourly | daily)
# Leave unset to never automatically refresh quotas. # Leave unset to never automatically refresh quotas.

2
data/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitkeep

0
data/user-files/.gitkeep Normal file
View File

View File

@ -3,6 +3,8 @@ RUN apt-get update && \
apt-get install -y git apt-get install -y git
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
WORKDIR /app WORKDIR /app
RUN chown -R 1000:1000 /app
USER 1000
RUN npm install RUN npm install
COPY Dockerfile greeting.md* .env* ./ COPY Dockerfile greeting.md* .env* ./
RUN npm run build RUN npm run build

View File

@ -17,6 +17,8 @@ ARG GREETING_URL
RUN if [ -n "$GREETING_URL" ]; then \ RUN if [ -n "$GREETING_URL" ]; then \
curl -sL "$GREETING_URL" > greeting.md; \ curl -sL "$GREETING_URL" > greeting.md; \
fi fi
RUN chown -R 1000:1000 /app
USER 1000
COPY package*.json greeting.md* ./ COPY package*.json greeting.md* ./
RUN npm install RUN npm install
COPY . . COPY . .

View File

@ -0,0 +1,71 @@
# Configuring the proxy for DALL-E
The proxy supports DALL-E 2 and DALL-E 3 image generation via the `/proxy/openai-images` endpoint. By default it is disabled as it is somewhat expensive and potentially more open to abuse than text generation.
- [Updating your Dockerfile](#updating-your-dockerfile)
- [Enabling DALL-E](#enabling-dall-e)
- [Setting quotas](#setting-quotas)
- [Rate limiting](#rate-limiting)
## Updating your Dockerfile
If you are using a previous version of the Dockerfile supplied with the proxy, it doesn't have the necessary permissions to let the proxy save temporary files.
You can replace the entire thing with the new Dockerfile at [./docker/huggingface/Dockerfile](../docker/huggingface/Dockerfile) (or the equivalent for Render deployments).
You can also modify your existing Dockerfile; just add the following lines after the `WORKDIR` line:
```Dockerfile
# Existing
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
WORKDIR /app
# Take ownership of the app directory and switch to the non-root user
RUN chown -R 1000:1000 /app
USER 1000
# Existing
RUN npm install
```
## Enabling DALL-E
Add `dall-e` to the `ALLOWED_MODEL_FAMILIES` environment variable to enable DALL-E. For example:
```
# GPT3.5 Turbo, GPT-4, GPT-4 Turbo, and DALL-E
ALLOWED_MODEL_FAMILIES=turbo,gpt-4,gpt-4turbo,dall-e
# All models as of this writing
ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,claude,bison,aws-claude,dall-e
```
Refer to [.env.example](../.env.example) for a full list of supported model families. You can add `dall-e` to that list to enable all models.
## Setting quotas
DALL-E doesn't bill by token like text generation models. Instead there is a fixed cost per image generated, depending on the model, image size, and selected quality.
The proxy still uses tokens to set quotas for users. The cost for each generated image will be converted to "tokens" at a rate of 100000 tokens per US$1.00. This works out to a similar cost-per-token as GPT-4 Turbo, so you can use similar token quotas for both.
Use `TOKEN_QUOTA_DALL_E` to set the default quota for image generation. Otherwise it works the same as token quotas for other models.
```
# ~50 standard DALL-E images per refresh period, or US$2.00
TOKEN_QUOTA_DALL_E=200000
```
Refer to [https://openai.com/pricing](https://openai.com/pricing) for the latest pricing information. As of this writing, the cheapest DALL-E 3 image costs $0.04 per generation, which works out to 4000 tokens. Higher resolution and quality settings can cost up to $0.12 per image, or 12000 tokens.
## Rate limiting
The old `MODEL_RATE_LIMIT` setting has been split into `TEXT_MODEL_RATE_LIMIT` and `IMAGE_MODEL_RATE_LIMIT`. Whatever value you previously set for `MODEL_RATE_LIMIT` will be used for text models.
If you don't specify a `IMAGE_MODEL_RATE_LIMIT`, it defaults to half of the `TEXT_MODEL_RATE_LIMIT`, to a minimum of 1 image per minute.
```
# 4 text generations per minute, 2 images per minute
TEXT_MODEL_RATE_LIMIT=4
IMAGE_MODEL_RATE_LIMIT=2
```
If a prompt is filtered by OpenAI's content filter, it won't count towards the rate limit.
## Hiding recent images
By default, the proxy shows the last 12 recently generated images by users. You can hide this section by setting `SHOW_RECENT_IMAGES` to `false`.

View File

@ -25,6 +25,8 @@ RUN apt-get update && \
apt-get install -y git apt-get install -y git
RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
WORKDIR /app WORKDIR /app
RUN chown -R 1000:1000 /app
USER 1000
RUN npm install RUN npm install
COPY Dockerfile greeting.md* .env* ./ COPY Dockerfile greeting.md* .env* ./
RUN npm run build RUN npm run build

406
package-lock.json generated
View File

@ -15,6 +15,7 @@
"@smithy/signature-v4": "^2.0.10", "@smithy/signature-v4": "^2.0.10",
"@smithy/types": "^2.3.4", "@smithy/types": "^2.3.4",
"axios": "^1.3.5", "axios": "^1.3.5",
"check-disk-space": "^3.4.0",
"cookie-parser": "^1.4.6", "cookie-parser": "^1.4.6",
"copyfiles": "^2.4.1", "copyfiles": "^2.4.1",
"cors": "^2.8.5", "cors": "^2.8.5",
@ -33,6 +34,7 @@
"pino": "^8.11.0", "pino": "^8.11.0",
"pino-http": "^8.3.3", "pino-http": "^8.3.3",
"sanitize-html": "^2.11.0", "sanitize-html": "^2.11.0",
"sharp": "^0.32.6",
"showdown": "^2.1.0", "showdown": "^2.1.0",
"tiktoken": "^1.0.10", "tiktoken": "^1.0.10",
"uuid": "^9.0.0", "uuid": "^9.0.0",
@ -1373,15 +1375,20 @@
} }
}, },
"node_modules/axios": { "node_modules/axios": {
"version": "1.3.5", "version": "1.6.1",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.3.5.tgz", "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz",
"integrity": "sha512-glL/PvG/E+xCWwV8S6nCHcrfg1exGx7vxyUIivIA1iL7BIh6bePylCfVHwp6k13ao7SATxB6imau2kqY+I67kw==", "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==",
"dependencies": { "dependencies": {
"follow-redirects": "^1.15.0", "follow-redirects": "^1.15.0",
"form-data": "^4.0.0", "form-data": "^4.0.0",
"proxy-from-env": "^1.1.0" "proxy-from-env": "^1.1.0"
} }
}, },
"node_modules/b4a": {
"version": "1.6.4",
"resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.4.tgz",
"integrity": "sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw=="
},
"node_modules/balanced-match": { "node_modules/balanced-match": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
@ -1423,6 +1430,52 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/bl": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
"integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
"dependencies": {
"buffer": "^5.5.0",
"inherits": "^2.0.4",
"readable-stream": "^3.4.0"
}
},
"node_modules/bl/node_modules/buffer": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"dependencies": {
"base64-js": "^1.3.1",
"ieee754": "^1.1.13"
}
},
"node_modules/bl/node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/bluebird": { "node_modules/bluebird": {
"version": "3.7.2", "version": "3.7.2",
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz",
@ -1582,6 +1635,14 @@
"node": ">=8" "node": ">=8"
} }
}, },
"node_modules/check-disk-space": {
"version": "3.4.0",
"resolved": "https://registry.npmjs.org/check-disk-space/-/check-disk-space-3.4.0.tgz",
"integrity": "sha512-drVkSqfwA+TvuEhFipiR1OC9boEGZL5RrWvVsOthdcvQNXyCCuKkEiTOTXZ7qxSf/GLwq4GvzfrQD/Wz325hgw==",
"engines": {
"node": ">=16"
}
},
"node_modules/chokidar": { "node_modules/chokidar": {
"version": "3.5.3", "version": "3.5.3",
"resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz",
@ -1609,6 +1670,11 @@
"fsevents": "~2.3.2" "fsevents": "~2.3.2"
} }
}, },
"node_modules/chownr": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
},
"node_modules/cliui": { "node_modules/cliui": {
"version": "8.0.1", "version": "8.0.1",
"resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
@ -1623,6 +1689,18 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/color": {
"version": "4.2.3",
"resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
"integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
"dependencies": {
"color-convert": "^2.0.1",
"color-string": "^1.9.0"
},
"engines": {
"node": ">=12.5.0"
}
},
"node_modules/color-convert": { "node_modules/color-convert": {
"version": "2.0.1", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
@ -1639,6 +1717,15 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
"integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
}, },
"node_modules/color-string": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
"integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
"dependencies": {
"color-name": "^1.0.0",
"simple-swizzle": "^0.2.2"
}
},
"node_modules/colorette": { "node_modules/colorette": {
"version": "2.0.20", "version": "2.0.20",
"resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz", "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz",
@ -2000,6 +2087,28 @@
"ms": "2.0.0" "ms": "2.0.0"
} }
}, },
"node_modules/decompress-response": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
"integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
"dependencies": {
"mimic-response": "^3.1.0"
},
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/deep-extend": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
"integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
"engines": {
"node": ">=4.0.0"
}
},
"node_modules/deep-is": { "node_modules/deep-is": {
"version": "0.1.4", "version": "0.1.4",
"resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
@ -2039,6 +2148,14 @@
"npm": "1.2.8000 || >= 1.4.16" "npm": "1.2.8000 || >= 1.4.16"
} }
}, },
"node_modules/detect-libc": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.2.tgz",
"integrity": "sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw==",
"engines": {
"node": ">=8"
}
},
"node_modules/diff": { "node_modules/diff": {
"version": "4.0.2", "version": "4.0.2",
"resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz",
@ -2188,7 +2305,6 @@
"version": "1.4.4", "version": "1.4.4",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
"integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
"devOptional": true,
"dependencies": { "dependencies": {
"once": "^1.4.0" "once": "^1.4.0"
} }
@ -2473,6 +2589,14 @@
"node": ">=0.8.x" "node": ">=0.8.x"
} }
}, },
"node_modules/expand-template": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
"integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
"engines": {
"node": ">=6"
}
},
"node_modules/express": { "node_modules/express": {
"version": "4.18.2", "version": "4.18.2",
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
@ -2557,6 +2681,11 @@
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
"optional": true "optional": true
}, },
"node_modules/fast-fifo": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
"integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
},
"node_modules/fast-levenshtein": { "node_modules/fast-levenshtein": {
"version": "2.0.6", "version": "2.0.6",
"resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
@ -2718,6 +2847,11 @@
"node": ">= 0.6" "node": ">= 0.6"
} }
}, },
"node_modules/fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
"integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
},
"node_modules/fs.realpath": { "node_modules/fs.realpath": {
"version": "1.0.0", "version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@ -2795,6 +2929,11 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/github-from-package": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
"integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="
},
"node_modules/glob": { "node_modules/glob": {
"version": "8.1.0", "version": "8.1.0",
"resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz", "resolved": "https://registry.npmjs.org/glob/-/glob-8.1.0.tgz",
@ -3246,6 +3385,11 @@
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
}, },
"node_modules/ini": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
"integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
},
"node_modules/ipaddr.js": { "node_modules/ipaddr.js": {
"version": "1.9.1", "version": "1.9.1",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
@ -3254,6 +3398,11 @@
"node": ">= 0.10" "node": ">= 0.10"
} }
}, },
"node_modules/is-arrayish": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
"integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ=="
},
"node_modules/is-binary-path": { "node_modules/is-binary-path": {
"version": "2.1.0", "version": "2.1.0",
"resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
@ -3780,6 +3929,17 @@
"node": ">= 0.6" "node": ">= 0.6"
} }
}, },
"node_modules/mimic-response": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
"integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/minimatch": { "node_modules/minimatch": {
"version": "3.1.2", "version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@ -3810,6 +3970,11 @@
"node": ">=10" "node": ">=10"
} }
}, },
"node_modules/mkdirp-classic": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
"integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
},
"node_modules/ms": { "node_modules/ms": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
@ -3860,6 +4025,11 @@
"node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
} }
}, },
"node_modules/napi-build-utils": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-1.0.2.tgz",
"integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg=="
},
"node_modules/negotiator": { "node_modules/negotiator": {
"version": "0.6.3", "version": "0.6.3",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
@ -3868,6 +4038,22 @@
"node": ">= 0.6" "node": ">= 0.6"
} }
}, },
"node_modules/node-abi": {
"version": "3.51.0",
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.51.0.tgz",
"integrity": "sha512-SQkEP4hmNWjlniS5zdnfIXTk1x7Ome85RDzHlTbBtzE97Gfwz/Ipw4v/Ryk20DWIy3yCNVLVlGKApCnmvYoJbA==",
"dependencies": {
"semver": "^7.3.5"
},
"engines": {
"node": ">=10"
}
},
"node_modules/node-addon-api": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz",
"integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA=="
},
"node_modules/node-fetch": { "node_modules/node-fetch": {
"version": "2.6.9", "version": "2.6.9",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.9.tgz", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.9.tgz",
@ -4217,6 +4403,70 @@
"node": "^10 || ^12 || >=14" "node": "^10 || ^12 || >=14"
} }
}, },
"node_modules/prebuild-install": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.1.tgz",
"integrity": "sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw==",
"dependencies": {
"detect-libc": "^2.0.0",
"expand-template": "^2.0.3",
"github-from-package": "0.0.0",
"minimist": "^1.2.3",
"mkdirp-classic": "^0.5.3",
"napi-build-utils": "^1.0.1",
"node-abi": "^3.3.0",
"pump": "^3.0.0",
"rc": "^1.2.7",
"simple-get": "^4.0.0",
"tar-fs": "^2.0.0",
"tunnel-agent": "^0.6.0"
},
"bin": {
"prebuild-install": "bin.js"
},
"engines": {
"node": ">=10"
}
},
"node_modules/prebuild-install/node_modules/readable-stream": {
"version": "3.6.2",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
"dependencies": {
"inherits": "^2.0.3",
"string_decoder": "^1.1.1",
"util-deprecate": "^1.0.1"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/prebuild-install/node_modules/tar-fs": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz",
"integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==",
"dependencies": {
"chownr": "^1.1.1",
"mkdirp-classic": "^0.5.2",
"pump": "^3.0.0",
"tar-stream": "^2.1.4"
}
},
"node_modules/prebuild-install/node_modules/tar-stream": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
"integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
"dependencies": {
"bl": "^4.0.3",
"end-of-stream": "^1.4.1",
"fs-constants": "^1.0.0",
"inherits": "^2.0.3",
"readable-stream": "^3.1.1"
},
"engines": {
"node": ">=6"
}
},
"node_modules/prettier": { "node_modules/prettier": {
"version": "3.0.3", "version": "3.0.3",
"resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.3.tgz", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.3.tgz",
@ -4352,7 +4602,6 @@
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
"integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
"dev": true,
"dependencies": { "dependencies": {
"end-of-stream": "^1.1.0", "end-of-stream": "^1.1.0",
"once": "^1.3.1" "once": "^1.3.1"
@ -4372,6 +4621,11 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/queue-tick": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz",
"integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag=="
},
"node_modules/quick-format-unescaped": { "node_modules/quick-format-unescaped": {
"version": "4.0.4", "version": "4.0.4",
"resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz", "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz",
@ -4407,6 +4661,28 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/rc": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
"integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
"dependencies": {
"deep-extend": "^0.6.0",
"ini": "~1.3.0",
"minimist": "^1.2.0",
"strip-json-comments": "~2.0.1"
},
"bin": {
"rc": "cli.js"
}
},
"node_modules/rc/node_modules/strip-json-comments": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
"integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/readable-stream": { "node_modules/readable-stream": {
"version": "4.3.0", "version": "4.3.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.3.0.tgz", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-4.3.0.tgz",
@ -4615,9 +4891,9 @@
"dev": true "dev": true
}, },
"node_modules/semver": { "node_modules/semver": {
"version": "7.5.3", "version": "7.5.4",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.5.3.tgz", "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
"integrity": "sha512-QBlUtyVk/5EeHbi7X0fw6liDZc7BBmEaSYn01fMU1OUYbf6GPsbTtd8WmnqbI20SeycoHSeiybkE/q1Q+qlThQ==", "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
"dependencies": { "dependencies": {
"lru-cache": "^6.0.0" "lru-cache": "^6.0.0"
}, },
@ -4675,6 +4951,28 @@
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
}, },
"node_modules/sharp": {
"version": "0.32.6",
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz",
"integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==",
"hasInstallScript": true,
"dependencies": {
"color": "^4.2.3",
"detect-libc": "^2.0.2",
"node-addon-api": "^6.1.0",
"prebuild-install": "^7.1.1",
"semver": "^7.5.4",
"simple-get": "^4.0.1",
"tar-fs": "^3.0.4",
"tunnel-agent": "^0.6.0"
},
"engines": {
"node": ">=14.15.0"
},
"funding": {
"url": "https://opencollective.com/libvips"
}
},
"node_modules/shell-quote": { "node_modules/shell-quote": {
"version": "1.8.1", "version": "1.8.1",
"resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.1.tgz", "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.1.tgz",
@ -4712,6 +5010,57 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/simple-concat": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
"integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/simple-get": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
"integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"dependencies": {
"decompress-response": "^6.0.0",
"once": "^1.3.1",
"simple-concat": "^1.0.0"
}
},
"node_modules/simple-swizzle": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
"integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
"dependencies": {
"is-arrayish": "^0.3.1"
}
},
"node_modules/simple-update-notifier": { "node_modules/simple-update-notifier": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-2.0.0.tgz", "resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-2.0.0.tgz",
@ -4809,11 +5158,19 @@
"node": ">=10.0.0" "node": ">=10.0.0"
} }
}, },
"node_modules/streamx": {
"version": "2.15.4",
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.4.tgz",
"integrity": "sha512-uSXKl88bibiUCQ1eMpItRljCzDENcDx18rsfDmV79r0e/ThfrAwxG4Y2FarQZ2G4/21xcOKmFFd1Hue+ZIDwHw==",
"dependencies": {
"fast-fifo": "^1.1.0",
"queue-tick": "^1.0.1"
}
},
"node_modules/string_decoder": { "node_modules/string_decoder": {
"version": "1.3.0", "version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
"integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
"devOptional": true,
"dependencies": { "dependencies": {
"safe-buffer": "~5.2.0" "safe-buffer": "~5.2.0"
} }
@ -4872,6 +5229,26 @@
"node": ">=4" "node": ">=4"
} }
}, },
"node_modules/tar-fs": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz",
"integrity": "sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==",
"dependencies": {
"mkdirp-classic": "^0.5.2",
"pump": "^3.0.0",
"tar-stream": "^3.1.5"
}
},
"node_modules/tar-stream": {
"version": "3.1.6",
"resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.6.tgz",
"integrity": "sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==",
"dependencies": {
"b4a": "^1.6.4",
"fast-fifo": "^1.2.0",
"streamx": "^2.15.0"
}
},
"node_modules/teeny-request": { "node_modules/teeny-request": {
"version": "8.0.3", "version": "8.0.3",
"resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-8.0.3.tgz", "resolved": "https://registry.npmjs.org/teeny-request/-/teeny-request-8.0.3.tgz",
@ -5047,6 +5424,17 @@
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz",
"integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q=="
}, },
"node_modules/tunnel-agent": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
"integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
"dependencies": {
"safe-buffer": "^5.0.1"
},
"engines": {
"node": "*"
}
},
"node_modules/type-is": { "node_modules/type-is": {
"version": "1.6.18", "version": "1.6.18",
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",

View File

@ -23,6 +23,7 @@
"@smithy/signature-v4": "^2.0.10", "@smithy/signature-v4": "^2.0.10",
"@smithy/types": "^2.3.4", "@smithy/types": "^2.3.4",
"axios": "^1.3.5", "axios": "^1.3.5",
"check-disk-space": "^3.4.0",
"cookie-parser": "^1.4.6", "cookie-parser": "^1.4.6",
"copyfiles": "^2.4.1", "copyfiles": "^2.4.1",
"cors": "^2.8.5", "cors": "^2.8.5",
@ -41,6 +42,7 @@
"pino": "^8.11.0", "pino": "^8.11.0",
"pino-http": "^8.3.3", "pino-http": "^8.3.3",
"sanitize-html": "^2.11.0", "sanitize-html": "^2.11.0",
"sharp": "^0.32.6",
"showdown": "^2.1.0", "showdown": "^2.1.0",
"tiktoken": "^1.0.10", "tiktoken": "^1.0.10",
"uuid": "^9.0.0", "uuid": "^9.0.0",

View File

@ -1,12 +1,17 @@
import dotenv from "dotenv"; import dotenv from "dotenv";
import type firebase from "firebase-admin"; import type firebase from "firebase-admin";
import path from "path";
import pino from "pino"; import pino from "pino";
import type { ModelFamily } from "./shared/models"; import type { ModelFamily } from "./shared/models";
import { MODEL_FAMILIES } from "./shared/models";
dotenv.config(); dotenv.config();
const startupLogger = pino({ level: "debug" }).child({ module: "startup" }); const startupLogger = pino({ level: "debug" }).child({ module: "startup" });
const isDev = process.env.NODE_ENV !== "production"; const isDev = process.env.NODE_ENV !== "production";
export const DATA_DIR = path.join(__dirname, "..", "data");
export const USER_ASSETS_DIR = path.join(DATA_DIR, "user-files");
type Config = { type Config = {
/** The port the proxy server will listen on. */ /** The port the proxy server will listen on. */
port: number; port: number;
@ -75,8 +80,10 @@ type Config = {
* `maxIpsPerUser` limit, or if only connections from new IPs are be rejected. * `maxIpsPerUser` limit, or if only connections from new IPs are be rejected.
*/ */
maxIpsAutoBan: boolean; maxIpsAutoBan: boolean;
/** Per-IP limit for requests per minute to OpenAI's completions endpoint. */ /** Per-IP limit for requests per minute to text and chat models. */
modelRateLimit: number; textModelRateLimit: number;
/** Per-IP limit for requests per minute to image generation models. */
imageModelRateLimit: number;
/** /**
* For OpenAI, the maximum number of context tokens (prompt + max output) a * For OpenAI, the maximum number of context tokens (prompt + max output) a
* user can request before their request is rejected. * user can request before their request is rejected.
@ -157,6 +164,8 @@ type Config = {
quotaRefreshPeriod?: "hourly" | "daily" | string; quotaRefreshPeriod?: "hourly" | "daily" | string;
/** Whether to allow users to change their own nicknames via the UI. */ /** Whether to allow users to change their own nicknames via the UI. */
allowNicknameChanges: boolean; allowNicknameChanges: boolean;
/** Whether to show recent DALL-E image generations on the homepage. */
showRecentImages: boolean;
/** /**
* If true, cookies will be set without the `Secure` attribute, allowing * If true, cookies will be set without the `Secure` attribute, allowing
* the admin UI to used over HTTP. * the admin UI to used over HTTP.
@ -180,7 +189,8 @@ export const config: Config = {
maxIpsAutoBan: getEnvWithDefault("MAX_IPS_AUTO_BAN", true), maxIpsAutoBan: getEnvWithDefault("MAX_IPS_AUTO_BAN", true),
firebaseRtdbUrl: getEnvWithDefault("FIREBASE_RTDB_URL", undefined), firebaseRtdbUrl: getEnvWithDefault("FIREBASE_RTDB_URL", undefined),
firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined), firebaseKey: getEnvWithDefault("FIREBASE_KEY", undefined),
modelRateLimit: getEnvWithDefault("MODEL_RATE_LIMIT", 4), textModelRateLimit: getEnvWithDefault("TEXT_MODEL_RATE_LIMIT", 4),
imageModelRateLimit: getEnvWithDefault("IMAGE_MODEL_RATE_LIMIT", 4),
maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384), maxContextTokensOpenAI: getEnvWithDefault("MAX_CONTEXT_TOKENS_OPENAI", 16384),
maxContextTokensAnthropic: getEnvWithDefault( maxContextTokensAnthropic: getEnvWithDefault(
"MAX_CONTEXT_TOKENS_ANTHROPIC", "MAX_CONTEXT_TOKENS_ANTHROPIC",
@ -225,17 +235,19 @@ export const config: Config = {
"You must be over the age of majority in your country to use this service." "You must be over the age of majority in your country to use this service."
), ),
blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"), blockRedirect: getEnvWithDefault("BLOCK_REDIRECT", "https://www.9gag.com"),
tokenQuota: { tokenQuota: MODEL_FAMILIES.reduce(
turbo: getEnvWithDefault("TOKEN_QUOTA_TURBO", 0), (acc, family: ModelFamily) => {
gpt4: getEnvWithDefault("TOKEN_QUOTA_GPT4", 0), acc[family] = getEnvWithDefault(
"gpt4-32k": getEnvWithDefault("TOKEN_QUOTA_GPT4_32K", 0), `TOKEN_QUOTA_${family.toUpperCase().replace(/-/g, "_")}`,
"gpt4-turbo": getEnvWithDefault("TOKEN_QUOTA_GPT4_TURBO", 0), 0
claude: getEnvWithDefault("TOKEN_QUOTA_CLAUDE", 0), ) as number;
bison: getEnvWithDefault("TOKEN_QUOTA_BISON", 0), return acc;
"aws-claude": getEnvWithDefault("TOKEN_QUOTA_AWS_CLAUDE", 0), },
}, {} as { [key in ModelFamily]: number }
),
quotaRefreshPeriod: getEnvWithDefault("QUOTA_REFRESH_PERIOD", undefined), quotaRefreshPeriod: getEnvWithDefault("QUOTA_REFRESH_PERIOD", undefined),
allowNicknameChanges: getEnvWithDefault("ALLOW_NICKNAME_CHANGES", true), allowNicknameChanges: getEnvWithDefault("ALLOW_NICKNAME_CHANGES", true),
showRecentImages: getEnvWithDefault("SHOW_RECENT_IMAGES", true),
useInsecureCookies: getEnvWithDefault("USE_INSECURE_COOKIES", isDev), useInsecureCookies: getEnvWithDefault("USE_INSECURE_COOKIES", isDev),
} as const; } as const;
@ -252,6 +264,19 @@ function generateCookieSecret() {
export const COOKIE_SECRET = generateCookieSecret(); export const COOKIE_SECRET = generateCookieSecret();
export async function assertConfigIsValid() { export async function assertConfigIsValid() {
if (process.env.MODEL_RATE_LIMIT !== undefined) {
const limit =
parseInt(process.env.MODEL_RATE_LIMIT, 10) || config.textModelRateLimit;
config.textModelRateLimit = limit;
config.imageModelRateLimit = Math.max(Math.floor(limit / 2), 1);
startupLogger.warn(
{ textLimit: limit, imageLimit: config.imageModelRateLimit },
"MODEL_RATE_LIMIT is deprecated. Use TEXT_MODEL_RATE_LIMIT and IMAGE_MODEL_RATE_LIMIT instead."
);
}
if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) { if (!["none", "proxy_key", "user_token"].includes(config.gatekeeper)) {
throw new Error( throw new Error(
`Invalid gatekeeper mode: ${config.gatekeeper}. Must be one of: none, proxy_key, user_token.` `Invalid gatekeeper mode: ${config.gatekeeper}. Must be one of: none, proxy_key, user_token.`
@ -332,6 +357,7 @@ export const OMITTED_KEYS: (keyof Config)[] = [
"blockMessage", "blockMessage",
"blockRedirect", "blockRedirect",
"allowNicknameChanges", "allowNicknameChanges",
"showRecentImages",
"useInsecureCookies", "useInsecureCookies",
]; ];
@ -428,5 +454,5 @@ function parseCsv(val: string): string[] {
const regex = /(".*?"|[^",]+)(?=\s*,|\s*$)/g; const regex = /(".*?"|[^",]+)(?=\s*,|\s*$)/g;
const matches = val.match(regex) || []; const matches = val.match(regex) || [];
return matches.map(item => item.replace(/^"|"$/g, '').trim()); return matches.map((item) => item.replace(/^"|"$/g, "").trim());
} }

View File

@ -14,6 +14,7 @@ import { getUniqueIps } from "./proxy/rate-limit";
import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue"; import { getEstimatedWaitTime, getQueueLength } from "./proxy/queue";
import { getTokenCostUsd, prettyTokens } from "./shared/stats"; import { getTokenCostUsd, prettyTokens } from "./shared/stats";
import { assertNever } from "./shared/utils"; import { assertNever } from "./shared/utils";
import { getLastNImages } from "./shared/file-storage/image-history";
const INFO_PAGE_TTL = 2000; const INFO_PAGE_TTL = 2000;
let infoPageHtml: string | undefined; let infoPageHtml: string | undefined;
@ -94,6 +95,8 @@ function cacheInfoPageHtml(baseUrl: string) {
const tokens = serviceStats.get("tokens") || 0; const tokens = serviceStats.get("tokens") || 0;
const tokenCost = serviceStats.get("tokenCost") || 0; const tokenCost = serviceStats.get("tokenCost") || 0;
const allowDalle = config.allowedModelFamilies.includes("dall-e");
const info = { const info = {
uptime: Math.floor(process.uptime()), uptime: Math.floor(process.uptime()),
endpoints: { endpoints: {
@ -101,13 +104,16 @@ function cacheInfoPageHtml(baseUrl: string) {
...(openaiKeys ...(openaiKeys
? { ["openai2"]: baseUrl + "/proxy/openai/turbo-instruct" } ? { ["openai2"]: baseUrl + "/proxy/openai/turbo-instruct" }
: {}), : {}),
...(openaiKeys && allowDalle
? { ["openai-image"]: baseUrl + "/proxy/openai-image" }
: {}),
...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}), ...(anthropicKeys ? { anthropic: baseUrl + "/proxy/anthropic" } : {}),
...(palmKeys ? { "google-palm": baseUrl + "/proxy/google-palm" } : {}), ...(palmKeys ? { "google-palm": baseUrl + "/proxy/google-palm" } : {}),
...(awsKeys ? { aws: baseUrl + "/proxy/aws/claude" } : {}), ...(awsKeys ? { aws: baseUrl + "/proxy/aws/claude" } : {}),
}, },
proompts, proompts,
tookens: `${prettyTokens(tokens)}${getCostString(tokenCost)}`, tookens: `${prettyTokens(tokens)}${getCostString(tokenCost)}`,
...(config.modelRateLimit ? { proomptersNow: getUniqueIps() } : {}), ...(config.textModelRateLimit ? { proomptersNow: getUniqueIps() } : {}),
openaiKeys, openaiKeys,
anthropicKeys, anthropicKeys,
palmKeys, palmKeys,
@ -287,7 +293,6 @@ function getOpenAIInfo() {
// Don't show trial/revoked keys for non-turbo families. // Don't show trial/revoked keys for non-turbo families.
// Generally those stats only make sense for the lowest-tier model. // Generally those stats only make sense for the lowest-tier model.
if (f !== "turbo") { if (f !== "turbo") {
console.log("deleting", f);
delete info[f]!.trialKeys; delete info[f]!.trialKeys;
delete info[f]!.revokedKeys; delete info[f]!.revokedKeys;
} }
@ -457,6 +462,9 @@ Logs are anonymous and do not contain IP addresses or timestamps. [You can see t
if (customGreeting) { if (customGreeting) {
infoBody += `\n## Server Greeting\n${customGreeting}`; infoBody += `\n## Server Greeting\n${customGreeting}`;
} }
infoBody += buildRecentImageSection();
return converter.makeHtml(infoBody); return converter.makeHtml(infoBody);
} }
@ -499,6 +507,43 @@ function getServerTitle() {
return "OAI Reverse Proxy"; return "OAI Reverse Proxy";
} }
function buildRecentImageSection() {
if (
!config.allowedModelFamilies.includes("dall-e") ||
!config.showRecentImages
) {
return "";
}
let html = `<h2>Recent DALL-E Generations</h2>`;
const recentImages = getLastNImages(12).reverse();
if (recentImages.length === 0) {
html += `<p>No images yet.</p>`;
return html;
}
html += `<div style="display: flex; flex-wrap: wrap;" id="recent-images">`;
for (const { url, prompt } of recentImages) {
const thumbUrl = url.replace(/\.png$/, "_t.jpg");
const escapedPrompt = escapeHtml(prompt);
html += `<div style="margin: 0.5em;" class="recent-image">
<a href="${url}" target="_blank"><img src="${thumbUrl}" title="${escapedPrompt}" alt="${escapedPrompt}" style="max-width: 150px; max-height: 150px;" /></a>
</div>`;
}
html += `</div>`;
return html;
}
function escapeHtml(unsafe: string) {
return unsafe
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
function getExternalUrlForHuggingfaceSpaceId(spaceId: string) { function getExternalUrlForHuggingfaceSpaceId(spaceId: string) {
// Huggingface broke their amazon elb config and no longer sends the // Huggingface broke their amazon elb config and no longer sends the
// x-forwarded-host header. This is a workaround. // x-forwarded-host header. This is a workaround.

View File

@ -87,9 +87,8 @@ const anthropicResponseHandler: ProxyResHandlerWithBody = async (
body = transformAnthropicResponse(body, req); body = transformAnthropicResponse(body, req);
} }
// TODO: Remove once tokenization is stable if (req.tokenizerInfo) {
if (req.debug) { body.proxy_tokenizer = req.tokenizerInfo;
body.proxy_tokenizer_debug_info = req.debug;
} }
res.status(200).json(body); res.status(200).json(body);

View File

@ -73,9 +73,8 @@ const awsResponseHandler: ProxyResHandlerWithBody = async (
body = transformAwsResponse(body, req); body = transformAwsResponse(body, req);
} }
// TODO: Remove once tokenization is stable if (req.tokenizerInfo) {
if (req.debug) { body.proxy_tokenizer = req.tokenizerInfo;
body.proxy_tokenizer_debug_info = req.debug;
} }
// AWS does not confirm the model in the response, so we have to add it // AWS does not confirm the model in the response, so we have to add it

View File

@ -9,11 +9,10 @@ import { QuotaExceededError } from "./request/apply-quota-limits";
const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions"; const OPENAI_CHAT_COMPLETION_ENDPOINT = "/v1/chat/completions";
const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions"; const OPENAI_TEXT_COMPLETION_ENDPOINT = "/v1/completions";
const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings"; const OPENAI_EMBEDDINGS_ENDPOINT = "/v1/embeddings";
const OPENAI_IMAGE_COMPLETION_ENDPOINT = "/v1/images/generations";
const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete"; const ANTHROPIC_COMPLETION_ENDPOINT = "/v1/complete";
/** Returns true if we're making a request to a completion endpoint. */ export function isTextGenerationRequest(req: Request) {
export function isCompletionRequest(req: Request) {
// 99% sure this function is not needed anymore
return ( return (
req.method === "POST" && req.method === "POST" &&
[ [
@ -24,6 +23,13 @@ export function isCompletionRequest(req: Request) {
); );
} }
export function isImageGenerationRequest(req: Request) {
return (
req.method === "POST" &&
req.path.startsWith(OPENAI_IMAGE_COMPLETION_ENDPOINT)
);
}
export function isEmbeddingsRequest(req: Request) { export function isEmbeddingsRequest(req: Request) {
return ( return (
req.method === "POST" && req.path.startsWith(OPENAI_EMBEDDINGS_ENDPOINT) req.method === "POST" && req.path.startsWith(OPENAI_EMBEDDINGS_ENDPOINT)
@ -53,8 +59,8 @@ export function writeErrorResponse(
res.write(`data: [DONE]\n\n`); res.write(`data: [DONE]\n\n`);
res.end(); res.end();
} else { } else {
if (req.debug && errorPayload.error) { if (req.tokenizerInfo && errorPayload.error) {
errorPayload.error.proxy_tokenizer_debug_info = req.debug; errorPayload.error.proxy_tokenizer = req.tokenizerInfo;
} }
res.status(statusCode).json(errorPayload); res.status(statusCode).json(errorPayload);
} }
@ -103,7 +109,7 @@ function classifyError(err: Error): {
code: { enabled: false }, code: { enabled: false },
maxErrors: 3, maxErrors: 3,
transform: ({ issue, ...rest }) => { transform: ({ issue, ...rest }) => {
return `At '${rest.pathComponent}', ${issue.message}`; return `At '${rest.pathComponent}': ${issue.message}`;
}, },
}); });
return { status: 400, userMessage, type: "proxy_validation_error" }; return { status: 400, userMessage, type: "proxy_validation_error" };
@ -173,6 +179,8 @@ export function getCompletionFromBody(req: Request, body: Record<string, any>) {
return body.completion.trim(); return body.completion.trim();
case "google-palm": case "google-palm":
return body.candidates[0].output; return body.candidates[0].output;
case "openai-image":
return body.data?.map((item: any) => item.url).join("\n");
default: default:
assertNever(format); assertNever(format);
} }
@ -184,6 +192,8 @@ export function getModelFromBody(req: Request, body: Record<string, any>) {
case "openai": case "openai":
case "openai-text": case "openai-text":
return body.model; return body.model;
case "openai-image":
return req.body.model;
case "anthropic": case "anthropic":
// Anthropic confirms the model in the response, but AWS Claude doesn't. // Anthropic confirms the model in the response, but AWS Claude doesn't.
return body.model || req.body.model; return body.model || req.body.model;

View File

@ -1,5 +1,5 @@
import { AnthropicKey, Key } from "../../../shared/key-management"; import { AnthropicKey, Key } from "../../../shared/key-management";
import { isCompletionRequest } from "../common"; import { isTextGenerationRequest } from "../common";
import { ProxyRequestMiddleware } from "."; import { ProxyRequestMiddleware } from ".";
/** /**
@ -11,7 +11,7 @@ export const addAnthropicPreamble: ProxyRequestMiddleware = (
_proxyReq, _proxyReq,
req req
) => { ) => {
if (!isCompletionRequest(req) || req.key?.service !== "anthropic") { if (!isTextGenerationRequest(req) || req.key?.service !== "anthropic") {
return; return;
} }

View File

@ -1,5 +1,5 @@
import { Key, OpenAIKey, keyPool } from "../../../shared/key-management"; import { Key, OpenAIKey, keyPool } from "../../../shared/key-management";
import { isCompletionRequest, isEmbeddingsRequest } from "../common"; import { isEmbeddingsRequest } from "../common";
import { ProxyRequestMiddleware } from "."; import { ProxyRequestMiddleware } from ".";
import { assertNever } from "../../../shared/utils"; import { assertNever } from "../../../shared/utils";
@ -7,18 +7,6 @@ import { assertNever } from "../../../shared/utils";
export const addKey: ProxyRequestMiddleware = (proxyReq, req) => { export const addKey: ProxyRequestMiddleware = (proxyReq, req) => {
let assignedKey: Key; let assignedKey: Key;
if (!isCompletionRequest(req)) {
// Horrible, horrible hack to stop the proxy from complaining about clients
// not sending a model when they are requesting the list of models (which
// requires a key, but obviously not a model).
// I don't think this is needed anymore since models requests are no longer
// proxied to the upstream API. Everything going through this is either a
// completion request or a special case like OpenAI embeddings.
req.log.warn({ path: req.path }, "addKey called on non-completion request");
req.body.model = "gpt-3.5-turbo";
}
if (!req.inboundApi || !req.outboundApi) { if (!req.inboundApi || !req.outboundApi) {
const err = new Error( const err = new Error(
"Request API format missing. Did you forget to add the request preprocessor to your router?" "Request API format missing. Did you forget to add the request preprocessor to your router?"
@ -54,6 +42,9 @@ export const addKey: ProxyRequestMiddleware = (proxyReq, req) => {
throw new Error( throw new Error(
"OpenAI Chat as an API translation target is not supported" "OpenAI Chat as an API translation target is not supported"
); );
case "openai-image":
assignedKey = keyPool.get("dall-e-3");
break;
default: default:
assertNever(req.outboundApi); assertNever(req.outboundApi);
} }

View File

@ -1,5 +1,5 @@
import { hasAvailableQuota } from "../../../shared/users/user-store"; import { hasAvailableQuota } from "../../../shared/users/user-store";
import { isCompletionRequest } from "../common"; import { isImageGenerationRequest, isTextGenerationRequest } from "../common";
import { ProxyRequestMiddleware } from "."; import { ProxyRequestMiddleware } from ".";
export class QuotaExceededError extends Error { export class QuotaExceededError extends Error {
@ -12,12 +12,19 @@ export class QuotaExceededError extends Error {
} }
export const applyQuotaLimits: ProxyRequestMiddleware = (_proxyReq, req) => { export const applyQuotaLimits: ProxyRequestMiddleware = (_proxyReq, req) => {
if (!isCompletionRequest(req) || !req.user) { const subjectToQuota =
return; isTextGenerationRequest(req) || isImageGenerationRequest(req);
} if (!subjectToQuota || !req.user) return;
const requestedTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0); const requestedTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);
if (!hasAvailableQuota(req.user.token, req.body.model, requestedTokens)) { if (
!hasAvailableQuota({
userToken: req.user.token,
model: req.body.model,
api: req.outboundApi,
requested: requestedTokens,
})
) {
throw new QuotaExceededError( throw new QuotaExceededError(
"You have exceeded your proxy token quota for this model.", "You have exceeded your proxy token quota for this model.",
{ {

View File

@ -1,4 +1,3 @@
import { isCompletionRequest } from "../common";
import { ProxyRequestMiddleware } from "."; import { ProxyRequestMiddleware } from ".";
const DISALLOWED_ORIGIN_SUBSTRINGS = "janitorai.com,janitor.ai".split(","); const DISALLOWED_ORIGIN_SUBSTRINGS = "janitorai.com,janitor.ai".split(",");
@ -15,10 +14,6 @@ class ForbiddenError extends Error {
* stop getting emails asking for tech support. * stop getting emails asking for tech support.
*/ */
export const blockZoomerOrigins: ProxyRequestMiddleware = (_proxyReq, req) => { export const blockZoomerOrigins: ProxyRequestMiddleware = (_proxyReq, req) => {
if (!isCompletionRequest(req)) {
return;
}
const origin = req.headers.origin || req.headers.referer; const origin = req.headers.origin || req.headers.referer;
if (origin && DISALLOWED_ORIGIN_SUBSTRINGS.some((s) => origin.includes(s))) { if (origin && DISALLOWED_ORIGIN_SUBSTRINGS.some((s) => origin.includes(s))) {
// Venus-derivatives send a test prompt to check if the proxy is working. // Venus-derivatives send a test prompt to check if the proxy is working.

View File

@ -35,14 +35,18 @@ export const countPromptTokens: RequestPreprocessor = async (req) => {
result = await countTokens({ req, prompt, service }); result = await countTokens({ req, prompt, service });
break; break;
} }
case "openai-image": {
req.outputTokens = 1;
result = await countTokens({ req, service });
break;
}
default: default:
assertNever(service); assertNever(service);
} }
req.promptTokens = result.token_count; req.promptTokens = result.token_count;
// TODO: Remove once token counting is stable
req.log.debug({ result: result }, "Counted prompt tokens."); req.log.debug({ result: result }, "Counted prompt tokens.");
req.debug = req.debug ?? {}; req.tokenizerInfo = req.tokenizerInfo ?? {};
req.debug = { ...req.debug, ...result }; req.tokenizerInfo = { ...req.tokenizerInfo, ...result };
}; };

View File

@ -4,6 +4,11 @@ import type { ProxyRequestMiddleware } from ".";
/** Finalize the rewritten request body. Must be the last rewriter. */ /** Finalize the rewritten request body. Must be the last rewriter. */
export const finalizeBody: ProxyRequestMiddleware = (proxyReq, req) => { export const finalizeBody: ProxyRequestMiddleware = (proxyReq, req) => {
if (["POST", "PUT", "PATCH"].includes(req.method ?? "") && req.body) { if (["POST", "PUT", "PATCH"].includes(req.method ?? "") && req.body) {
// For image generation requests, remove stream flag.
if (req.outboundApi === "openai-image") {
delete req.body.stream;
}
const updatedBody = JSON.stringify(req.body); const updatedBody = JSON.stringify(req.body);
proxyReq.setHeader("Content-Length", Buffer.byteLength(updatedBody)); proxyReq.setHeader("Content-Length", Buffer.byteLength(updatedBody));
(req as any).rawBody = Buffer.from(updatedBody); (req as any).rawBody = Buffer.from(updatedBody);

View File

@ -58,6 +58,7 @@ function getPromptFromRequest(req: Request) {
) )
.join("\n\n"); .join("\n\n");
case "openai-text": case "openai-text":
case "openai-image":
return body.prompt; return body.prompt;
case "google-palm": case "google-palm":
return body.prompt.text; return body.prompt.text;

View File

@ -1,12 +1,12 @@
import { isCompletionRequest } from "../common"; import { isTextGenerationRequest } from "../common";
import { ProxyRequestMiddleware } from "."; import { ProxyRequestMiddleware } from ".";
/** /**
* Don't allow multiple completions to be requested to prevent abuse. * Don't allow multiple text completions to be requested to prevent abuse.
* OpenAI-only, Anthropic provides no such parameter. * OpenAI-only, Anthropic provides no such parameter.
**/ **/
export const limitCompletions: ProxyRequestMiddleware = (_proxyReq, req) => { export const limitCompletions: ProxyRequestMiddleware = (_proxyReq, req) => {
if (isCompletionRequest(req) && req.outboundApi === "openai") { if (isTextGenerationRequest(req) && req.outboundApi === "openai") {
const originalN = req.body?.n || 1; const originalN = req.body?.n || 1;
req.body.n = 1; req.body.n = 1;
if (originalN !== req.body.n) { if (originalN !== req.body.n) {

View File

@ -17,6 +17,7 @@ export const createOnProxyReqHandler = ({
// The streaming flag must be set before any other middleware runs, because // The streaming flag must be set before any other middleware runs, because
// it may influence which other middleware a particular API pipeline wants // it may influence which other middleware a particular API pipeline wants
// to run. // to run.
// Image generation requests can't be streamed.
req.isStreaming = req.body.stream === true || req.body.stream === "true"; req.isStreaming = req.body.stream === true || req.body.stream === "true";
req.body.stream = req.isStreaming; req.body.stream = req.isStreaming;

View File

@ -2,7 +2,7 @@ import { Request } from "express";
import { z } from "zod"; import { z } from "zod";
import { config } from "../../../config"; import { config } from "../../../config";
import { OpenAIPromptMessage } from "../../../shared/tokenization"; import { OpenAIPromptMessage } from "../../../shared/tokenization";
import { isCompletionRequest } from "../common"; import { isTextGenerationRequest, isImageGenerationRequest } from "../common";
import { RequestPreprocessor } from "."; import { RequestPreprocessor } from ".";
import { APIFormat } from "../../../shared/key-management"; import { APIFormat } from "../../../shared/key-management";
@ -88,6 +88,21 @@ const OpenAIV1TextCompletionSchema = z
}) })
.merge(OpenAIV1ChatCompletionSchema.omit({ messages: true })); .merge(OpenAIV1ChatCompletionSchema.omit({ messages: true }));
// https://platform.openai.com/docs/api-reference/images/create
const OpenAIV1ImagesGenerationSchema = z.object({
prompt: z.string().max(4000),
model: z.string().optional(),
quality: z.enum(["standard", "hd"]).optional().default("standard"),
n: z.number().int().min(1).max(4).optional().default(1),
response_format: z.enum(["url", "b64_json"]).optional(),
size: z
.enum(["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"])
.optional()
.default("1024x1024"),
style: z.enum(["vivid", "natural"]).optional().default("vivid"),
user: z.string().optional(),
});
// https://developers.generativeai.google/api/rest/generativelanguage/models/generateText // https://developers.generativeai.google/api/rest/generativelanguage/models/generateText
const PalmV1GenerateTextSchema = z.object({ const PalmV1GenerateTextSchema = z.object({
model: z.string(), model: z.string(),
@ -110,6 +125,7 @@ const VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
anthropic: AnthropicV1CompleteSchema, anthropic: AnthropicV1CompleteSchema,
openai: OpenAIV1ChatCompletionSchema, openai: OpenAIV1ChatCompletionSchema,
"openai-text": OpenAIV1TextCompletionSchema, "openai-text": OpenAIV1TextCompletionSchema,
"openai-image": OpenAIV1ImagesGenerationSchema,
"google-palm": PalmV1GenerateTextSchema, "google-palm": PalmV1GenerateTextSchema,
}; };
@ -117,11 +133,10 @@ const VALIDATORS: Record<APIFormat, z.ZodSchema<any>> = {
export const transformOutboundPayload: RequestPreprocessor = async (req) => { export const transformOutboundPayload: RequestPreprocessor = async (req) => {
const sameService = req.inboundApi === req.outboundApi; const sameService = req.inboundApi === req.outboundApi;
const alreadyTransformed = req.retryCount > 0; const alreadyTransformed = req.retryCount > 0;
const notTransformable = !isCompletionRequest(req); const notTransformable =
!isTextGenerationRequest(req) && !isImageGenerationRequest(req);
if (alreadyTransformed || notTransformable) { if (alreadyTransformed || notTransformable) return;
return;
}
if (sameService) { if (sameService) {
const result = VALIDATORS[req.inboundApi].safeParse(req.body); const result = VALIDATORS[req.inboundApi].safeParse(req.body);
@ -151,6 +166,11 @@ export const transformOutboundPayload: RequestPreprocessor = async (req) => {
return; return;
} }
if (req.inboundApi === "openai" && req.outboundApi === "openai-image") {
req.body = openaiToOpenaiImage(req);
return;
}
throw new Error( throw new Error(
`'${req.inboundApi}' -> '${req.outboundApi}' request proxying is not supported. Make sure your client is configured to use the correct API.` `'${req.inboundApi}' -> '${req.outboundApi}' request proxying is not supported. Make sure your client is configured to use the correct API.`
); );
@ -226,6 +246,49 @@ function openaiToOpenaiText(req: Request) {
return OpenAIV1TextCompletionSchema.parse(transformed); return OpenAIV1TextCompletionSchema.parse(transformed);
} }
// Takes the last chat message and uses it verbatim as the image prompt.
function openaiToOpenaiImage(req: Request) {
const { body } = req;
const result = OpenAIV1ChatCompletionSchema.safeParse(body);
if (!result.success) {
req.log.warn(
{ issues: result.error.issues, body },
"Invalid OpenAI-to-OpenAI-image request"
);
throw result.error;
}
const { messages } = result.data;
const prompt = messages.filter((m) => m.role === "user").pop()?.content;
if (body.stream) {
throw new Error(
"Streaming is not supported for image generation requests."
);
}
// Some frontends do weird things with the prompt, like prefixing it with a
// character name or wrapping the entire thing in quotes. We will look for
// the index of "Image:" and use everything after that as the prompt.
const index = prompt?.toLowerCase().indexOf("image:");
if (index === -1 || !prompt) {
throw new Error(
`Start your prompt with 'Image:' followed by a description of the image you want to generate (received: ${prompt}).`
);
}
// TODO: Add some way to specify parameters via chat message
const transformed = {
model: body.model.includes("dall-e") ? body.model : "dall-e-3",
quality: "standard",
size: "1024x1024",
response_format: "url",
prompt: prompt.slice(index! + 6).trim(),
};
return OpenAIV1ImagesGenerationSchema.parse(transformed);
}
function openaiToPalm(req: Request): z.infer<typeof PalmV1GenerateTextSchema> { function openaiToPalm(req: Request): z.infer<typeof PalmV1GenerateTextSchema> {
const { body } = req; const { body } = req;
const result = OpenAIV1ChatCompletionSchema.safeParse({ const result = OpenAIV1ChatCompletionSchema.safeParse({

View File

@ -34,6 +34,8 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
case "google-palm": case "google-palm":
proxyMax = BISON_MAX_CONTEXT; proxyMax = BISON_MAX_CONTEXT;
break; break;
case "openai-image":
return;
default: default:
assertNever(req.outboundApi); assertNever(req.outboundApi);
} }
@ -81,10 +83,10 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
"Prompt size validated" "Prompt size validated"
); );
req.debug.prompt_tokens = promptTokens; req.tokenizerInfo.prompt_tokens = promptTokens;
req.debug.completion_tokens = outputTokens; req.tokenizerInfo.completion_tokens = outputTokens;
req.debug.max_model_tokens = modelMax; req.tokenizerInfo.max_model_tokens = modelMax;
req.debug.max_proxy_tokens = proxyMax; req.tokenizerInfo.max_proxy_tokens = proxyMax;
}; };
function assertRequestHasTokenCounts( function assertRequestHasTokenCounts(

View File

@ -13,13 +13,16 @@ import {
incrementTokenCount, incrementTokenCount,
} from "../../../shared/users/user-store"; } from "../../../shared/users/user-store";
import { assertNever } from "../../../shared/utils"; import { assertNever } from "../../../shared/utils";
import { refundLastAttempt } from "../../rate-limit";
import { import {
getCompletionFromBody, getCompletionFromBody,
isCompletionRequest, isImageGenerationRequest,
isTextGenerationRequest,
writeErrorResponse, writeErrorResponse,
} from "../common"; } from "../common";
import { handleStreamedResponse } from "./handle-streamed-response"; import { handleStreamedResponse } from "./handle-streamed-response";
import { logPrompt } from "./log-prompt"; import { logPrompt } from "./log-prompt";
import { saveImage } from "./save-image";
const DECODER_MAP = { const DECODER_MAP = {
gzip: util.promisify(zlib.gunzip), gzip: util.promisify(zlib.gunzip),
@ -106,6 +109,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
countResponseTokens, countResponseTokens,
incrementUsage, incrementUsage,
copyHttpHeaders, copyHttpHeaders,
saveImage,
logPrompt, logPrompt,
...apiMiddleware ...apiMiddleware
); );
@ -285,7 +289,16 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
switch (service) { switch (service) {
case "openai": case "openai":
case "google-palm": case "google-palm":
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`; if (errorPayload.error?.code === "content_policy_violation") {
errorPayload.proxy_note = `Request was filtered by OpenAI's content moderation system. Try another prompt.`;
refundLastAttempt(req);
} else if (errorPayload.error?.code === "billing_hard_limit_reached") {
// For some reason, some models return this 400 error instead of the
// same 429 billing error that other models return.
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
} else {
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
}
break; break;
case "anthropic": case "anthropic":
case "aws": case "aws":
@ -453,6 +466,7 @@ function handleOpenAIRateLimitError(
const type = errorPayload.error?.type; const type = errorPayload.error?.type;
switch (type) { switch (type) {
case "insufficient_quota": case "insufficient_quota":
case "invalid_request_error": // this is the billing_hard_limit_reached error seen in some cases
// Billing quota exceeded (key is dead, disable it) // Billing quota exceeded (key is dead, disable it)
keyPool.disable(req.key!, "quota"); keyPool.disable(req.key!, "quota");
errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`; errorPayload.proxy_note = `Assigned key's quota has been exceeded. ${tryAgainMessage}`;
@ -487,13 +501,22 @@ function handleOpenAIRateLimitError(
} }
const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => { const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
if (isCompletionRequest(req)) { if (isTextGenerationRequest(req) || isImageGenerationRequest(req)) {
const model = req.body.model; const model = req.body.model;
const tokensUsed = req.promptTokens! + req.outputTokens!; const tokensUsed = req.promptTokens! + req.outputTokens!;
req.log.debug(
{
model,
tokensUsed,
promptTokens: req.promptTokens,
outputTokens: req.outputTokens,
},
`Incrementing usage for model`
);
keyPool.incrementUsage(req.key!, model, tokensUsed); keyPool.incrementUsage(req.key!, model, tokensUsed);
if (req.user) { if (req.user) {
incrementPromptCount(req.user.token); incrementPromptCount(req.user.token);
incrementTokenCount(req.user.token, model, tokensUsed); incrementTokenCount(req.user.token, model, req.outboundApi, tokensUsed);
} }
} }
}; };
@ -504,6 +527,12 @@ const countResponseTokens: ProxyResHandlerWithBody = async (
_res, _res,
body body
) => { ) => {
if (req.outboundApi === "openai-image") {
req.outputTokens = req.promptTokens;
req.promptTokens = 0;
return;
}
// This function is prone to breaking if the upstream API makes even minor // This function is prone to breaking if the upstream API makes even minor
// changes to the response format, especially for SSE responses. If you're // changes to the response format, especially for SSE responses. If you're
// seeing errors in this function, check the reassembled response body from // seeing errors in this function, check the reassembled response body from
@ -518,8 +547,8 @@ const countResponseTokens: ProxyResHandlerWithBody = async (
{ service, tokens, prevOutputTokens: req.outputTokens }, { service, tokens, prevOutputTokens: req.outputTokens },
`Counted tokens for completion` `Counted tokens for completion`
); );
if (req.debug) { if (req.tokenizerInfo) {
req.debug.completion_tokens = tokens; req.tokenizerInfo.completion_tokens = tokens;
} }
req.outputTokens = tokens.token_count; req.outputTokens = tokens.token_count;

View File

@ -4,7 +4,8 @@ import { logQueue } from "../../../shared/prompt-logging";
import { import {
getCompletionFromBody, getCompletionFromBody,
getModelFromBody, getModelFromBody,
isCompletionRequest, isImageGenerationRequest,
isTextGenerationRequest,
} from "../common"; } from "../common";
import { ProxyResHandlerWithBody } from "."; import { ProxyResHandlerWithBody } from ".";
import { assertNever } from "../../../shared/utils"; import { assertNever } from "../../../shared/utils";
@ -23,11 +24,11 @@ export const logPrompt: ProxyResHandlerWithBody = async (
throw new Error("Expected body to be an object"); throw new Error("Expected body to be an object");
} }
if (!isCompletionRequest(req)) { const loggable =
return; isTextGenerationRequest(req) || isImageGenerationRequest(req);
} if (!loggable) return;
const promptPayload = getPromptForRequest(req); const promptPayload = getPromptForRequest(req, responseBody);
const promptFlattened = flattenMessages(promptPayload); const promptFlattened = flattenMessages(promptPayload);
const response = getCompletionFromBody(req, responseBody); const response = getCompletionFromBody(req, responseBody);
const model = getModelFromBody(req, responseBody); const model = getModelFromBody(req, responseBody);
@ -46,7 +47,18 @@ type OaiMessage = {
content: string; content: string;
}; };
const getPromptForRequest = (req: Request): string | OaiMessage[] => { type OaiImageResult = {
prompt: string;
size: string;
style: string;
quality: string;
revisedPrompt?: string;
};
const getPromptForRequest = (
req: Request,
responseBody: Record<string, any>
): string | OaiMessage[] | OaiImageResult => {
// Since the prompt logger only runs after the request has been proxied, we // Since the prompt logger only runs after the request has been proxied, we
// can assume the body has already been transformed to the target API's // can assume the body has already been transformed to the target API's
// format. // format.
@ -55,6 +67,14 @@ const getPromptForRequest = (req: Request): string | OaiMessage[] => {
return req.body.messages; return req.body.messages;
case "openai-text": case "openai-text":
return req.body.prompt; return req.body.prompt;
case "openai-image":
return {
prompt: req.body.prompt,
size: req.body.size,
style: req.body.style,
quality: req.body.quality,
revisedPrompt: responseBody.data[0].revised_prompt,
};
case "anthropic": case "anthropic":
return req.body.prompt; return req.body.prompt;
case "google-palm": case "google-palm":
@ -64,9 +84,14 @@ const getPromptForRequest = (req: Request): string | OaiMessage[] => {
} }
}; };
const flattenMessages = (messages: string | OaiMessage[]): string => { const flattenMessages = (
if (typeof messages === "string") { val: string | OaiMessage[] | OaiImageResult
return messages.trim(); ): string => {
if (typeof val === "string") {
return val.trim();
} }
return messages.map((m) => `${m.role}: ${m.content}`).join("\n"); if (Array.isArray(val)) {
return val.map((m) => `${m.role}: ${m.content}`).join("\n");
}
return val.prompt.trim();
}; };

View File

@ -0,0 +1,27 @@
import { ProxyResHandlerWithBody } from "./index";
import { mirrorGeneratedImage, OpenAIImageGenerationResult } from "../../../shared/file-storage/mirror-generated-image";
export const saveImage: ProxyResHandlerWithBody = async (
_proxyRes,
req,
_res,
body,
) => {
if (req.outboundApi !== "openai-image") {
return;
}
if (typeof body !== "object") {
throw new Error("Expected body to be an object");
}
if (body.data) {
const baseUrl = req.protocol + "://" + req.get("host");
const prompt = body.data[0].revised_prompt ?? req.body.prompt;
await mirrorGeneratedImage(
baseUrl,
prompt,
body as OpenAIImageGenerationResult
);
}
};

View File

@ -33,9 +33,10 @@ export class EventAggregator {
case "anthropic": case "anthropic":
return mergeEventsForAnthropic(this.events); return mergeEventsForAnthropic(this.events);
case "google-palm": case "google-palm":
throw new Error("Google PaLM API does not support streaming responses"); case "openai-image":
throw new Error(`SSE aggregation not supported for ${this.format}`);
default: default:
assertNever(this.format); assertNever(this.format);
} }
} }
} }

View File

@ -99,7 +99,8 @@ function getTransformer(
? anthropicV1ToOpenAI ? anthropicV1ToOpenAI
: anthropicV2ToOpenAI; : anthropicV2ToOpenAI;
case "google-palm": case "google-palm":
throw new Error("Google PaLM does not support streaming responses"); case "openai-image":
throw new Error(`SSE transformation not supported for ${responseApi}`);
default: default:
assertNever(responseApi); assertNever(responseApi);
} }

153
src/proxy/openai-image.ts Normal file
View File

@ -0,0 +1,153 @@
import { RequestHandler, Router, Request } from "express";
import { createProxyMiddleware } from "http-proxy-middleware";
import { config } from "../config";
import { logger } from "../logger";
import { createQueueMiddleware } from "./queue";
import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common";
import {
addKey,
applyQuotaLimits,
blockZoomerOrigins,
createPreprocessorMiddleware,
finalizeBody,
stripHeaders,
createOnProxyReqHandler,
} from "./middleware/request";
import {
createOnProxyResHandler,
ProxyResHandlerWithBody,
} from "./middleware/response";
import { generateModelList } from "./openai";
import {
mirrorGeneratedImage,
OpenAIImageGenerationResult,
} from "../shared/file-storage/mirror-generated-image";
const KNOWN_MODELS = ["dall-e-2", "dall-e-3"];
let modelListCache: any = null;
let modelListValid = 0;
const handleModelRequest: RequestHandler = (_req, res) => {
if (new Date().getTime() - modelListValid < 1000 * 60) return modelListCache;
const result = generateModelList(KNOWN_MODELS);
modelListCache = { object: "list", data: result };
modelListValid = new Date().getTime();
res.status(200).json(modelListCache);
};
const openaiImagesResponseHandler: ProxyResHandlerWithBody = async (
_proxyRes,
req,
res,
body
) => {
if (typeof body !== "object") {
throw new Error("Expected body to be an object");
}
if (config.promptLogging) {
const host = req.get("host");
body.proxy_note = `Prompts are logged on this proxy instance. See ${host} for more information.`;
}
if (req.inboundApi === "openai") {
req.log.info("Transforming OpenAI image response to OpenAI chat format");
body = transformResponseForChat(body as OpenAIImageGenerationResult, req);
}
if (req.tokenizerInfo) {
body.proxy_tokenizer = req.tokenizerInfo;
}
res.status(200).json(body);
};
/**
* Transforms a DALL-E image generation response into a chat response, simply
* embedding the image URL into the chat message as a Markdown image.
*/
function transformResponseForChat(
imageBody: OpenAIImageGenerationResult,
req: Request
): Record<string, any> {
const prompt = imageBody.data[0].revised_prompt ?? req.body.prompt;
const content = imageBody.data
.map((item) => {
const { url, b64_json } = item;
if (b64_json) {
return `![${prompt}](data:image/png;base64,${b64_json})`;
} else {
return `![${prompt}](${url})`;
}
})
.join("\n\n");
return {
id: "dalle-" + req.id,
object: "chat.completion",
created: Date.now(),
model: req.body.model,
usage: {
prompt_tokens: 0,
completion_tokens: req.outputTokens,
total_tokens: req.outputTokens,
},
choices: [
{
message: { role: "assistant", content },
finish_reason: "stop",
index: 0,
},
],
};
}
const openaiImagesProxy = createQueueMiddleware({
proxyMiddleware: createProxyMiddleware({
target: "https://api.openai.com",
changeOrigin: true,
selfHandleResponse: true,
logger,
pathRewrite: {
"^/v1/chat/completions": "/v1/images/generations",
},
on: {
proxyReq: createOnProxyReqHandler({
pipeline: [
applyQuotaLimits,
addKey,
blockZoomerOrigins,
stripHeaders,
finalizeBody,
],
}),
proxyRes: createOnProxyResHandler([openaiImagesResponseHandler]),
error: handleProxyError,
},
}),
});
const openaiImagesRouter = Router();
openaiImagesRouter.get("/v1/models", handleModelRequest);
openaiImagesRouter.post(
"/v1/images/generations",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai-image",
outApi: "openai-image",
service: "openai",
}),
openaiImagesProxy
);
openaiImagesRouter.post(
"/v1/chat/completions",
ipLimiter,
createPreprocessorMiddleware({
inApi: "openai",
outApi: "openai-image",
service: "openai",
}),
openaiImagesProxy
);
export const openaiImage = openaiImagesRouter;

View File

@ -2,61 +2,50 @@ import { RequestHandler, Router } from "express";
import { createProxyMiddleware } from "http-proxy-middleware"; import { createProxyMiddleware } from "http-proxy-middleware";
import { config } from "../config"; import { config } from "../config";
import { keyPool } from "../shared/key-management"; import { keyPool } from "../shared/key-management";
import { import { getOpenAIModelFamily, ModelFamily, OpenAIModelFamily } from "../shared/models";
ModelFamily,
OpenAIModelFamily,
getOpenAIModelFamily,
} from "../shared/models";
import { logger } from "../logger"; import { logger } from "../logger";
import { createQueueMiddleware } from "./queue"; import { createQueueMiddleware } from "./queue";
import { ipLimiter } from "./rate-limit"; import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common"; import { handleProxyError } from "./middleware/common";
import { import {
RequestPreprocessor,
addKey, addKey,
addKeyForEmbeddingsRequest, addKeyForEmbeddingsRequest,
applyQuotaLimits, applyQuotaLimits,
blockZoomerOrigins, blockZoomerOrigins,
createEmbeddingsPreprocessorMiddleware, createEmbeddingsPreprocessorMiddleware,
createOnProxyReqHandler,
createPreprocessorMiddleware, createPreprocessorMiddleware,
finalizeBody, finalizeBody,
forceModel, forceModel,
limitCompletions, limitCompletions,
RequestPreprocessor,
stripHeaders, stripHeaders,
createOnProxyReqHandler,
} from "./middleware/request"; } from "./middleware/request";
import { import { createOnProxyResHandler, ProxyResHandlerWithBody } from "./middleware/response";
createOnProxyResHandler,
ProxyResHandlerWithBody, // https://platform.openai.com/docs/models/overview
} from "./middleware/response"; const KNOWN_MODELS = [
"gpt-4-1106-preview",
"gpt-4",
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-instruct",
"gpt-3.5-turbo-instruct-0914",
"text-embedding-ada-002",
];
let modelsCache: any = null; let modelsCache: any = null;
let modelsCacheTime = 0; let modelsCacheTime = 0;
function getModelsResponse() { export function generateModelList(models = KNOWN_MODELS) {
if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
return modelsCache;
}
// https://platform.openai.com/docs/models/overview
const knownModels = [
"gpt-4-1106-preview",
"gpt-4",
"gpt-4-0613",
"gpt-4-0314", // EOL 2024-06-13
"gpt-4-32k",
"gpt-4-32k-0613",
"gpt-4-32k-0314", // EOL 2024-06-13
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301", // EOL 2024-06-13
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-16k-0613",
"gpt-3.5-turbo-instruct",
"gpt-3.5-turbo-instruct-0914",
"text-embedding-ada-002",
];
let available = new Set<OpenAIModelFamily>(); let available = new Set<OpenAIModelFamily>();
for (const key of keyPool.list()) { for (const key of keyPool.list()) {
if (key.isDisabled || key.service !== "openai") continue; if (key.isDisabled || key.service !== "openai") continue;
@ -67,7 +56,7 @@ function getModelsResponse() {
const allowed = new Set<ModelFamily>(config.allowedModelFamilies); const allowed = new Set<ModelFamily>(config.allowedModelFamilies);
available = new Set([...available].filter((x) => allowed.has(x))); available = new Set([...available].filter((x) => allowed.has(x)));
const models = knownModels return models
.map((id) => ({ .map((id) => ({
id, id,
object: "model", object: "model",
@ -87,15 +76,14 @@ function getModelsResponse() {
parent: null, parent: null,
})) }))
.filter((model) => available.has(getOpenAIModelFamily(model.id))); .filter((model) => available.has(getOpenAIModelFamily(model.id)));
modelsCache = { object: "list", data: models };
modelsCacheTime = new Date().getTime();
return modelsCache;
} }
const handleModelRequest: RequestHandler = (_req, res) => { const handleModelRequest: RequestHandler = (_req, res) => {
res.status(200).json(getModelsResponse()); if (new Date().getTime() - modelsCacheTime < 1000 * 60) return modelsCache;
const result = generateModelList();
modelsCache = { object: "list", data: result };
modelsCacheTime = new Date().getTime();
res.status(200).json(modelsCache);
}; };
/** Handles some turbo-instruct special cases. */ /** Handles some turbo-instruct special cases. */
@ -137,9 +125,8 @@ const openaiResponseHandler: ProxyResHandlerWithBody = async (
body = transformTurboInstructResponse(body); body = transformTurboInstructResponse(body);
} }
// TODO: Remove once tokenization is stable if (req.tokenizerInfo) {
if (req.debug) { body.proxy_tokenizer = req.tokenizerInfo;
body.proxy_tokenizer_debug_info = req.debug;
} }
res.status(200).json(body); res.status(200).json(body);

View File

@ -75,9 +75,8 @@ const palmResponseHandler: ProxyResHandlerWithBody = async (
body = transformPalmResponse(body, req); body = transformPalmResponse(body, req);
} }
// TODO: Remove once tokenization is stable if (req.tokenizerInfo) {
if (req.debug) { body.proxy_tokenizer = req.tokenizerInfo;
body.proxy_tokenizer_debug_info = req.debug;
} }
// TODO: PaLM has no streaming capability which will pose a problem here if // TODO: PaLM has no streaming capability which will pose a problem here if

View File

@ -12,11 +12,11 @@
*/ */
import type { Handler, Request } from "express"; import type { Handler, Request } from "express";
import { keyPool, SupportedModel } from "../shared/key-management"; import { keyPool } from "../shared/key-management";
import { import {
getClaudeModelFamily, getClaudeModelFamily,
getGooglePalmModelFamily, getGooglePalmModelFamily,
getOpenAIModelFamily, getOpenAIModelFamily, MODEL_FAMILIES,
ModelFamily, ModelFamily,
} from "../shared/models"; } from "../shared/models";
import { buildFakeSse, initializeSseStream } from "../shared/streaming"; import { buildFakeSse, initializeSseStream } from "../shared/streaming";
@ -132,7 +132,7 @@ function getPartitionForRequest(req: Request): ModelFamily {
// There is a single request queue, but it is partitioned by model family. // There is a single request queue, but it is partitioned by model family.
// Model families are typically separated on cost/rate limit boundaries so // Model families are typically separated on cost/rate limit boundaries so
// they should be treated as separate queues. // they should be treated as separate queues.
const model = (req.body.model as SupportedModel) ?? "gpt-3.5-turbo"; const model = req.body.model ?? "gpt-3.5-turbo";
// Weird special case for AWS because they serve multiple models from // Weird special case for AWS because they serve multiple models from
// different vendors, even if currently only one is supported. // different vendors, even if currently only one is supported.
@ -145,6 +145,7 @@ function getPartitionForRequest(req: Request): ModelFamily {
return getClaudeModelFamily(model); return getClaudeModelFamily(model);
case "openai": case "openai":
case "openai-text": case "openai-text":
case "openai-image":
return getOpenAIModelFamily(model); return getOpenAIModelFamily(model);
case "google-palm": case "google-palm":
return getGooglePalmModelFamily(model); return getGooglePalmModelFamily(model);
@ -207,40 +208,15 @@ export function dequeue(partition: ModelFamily): Request | undefined {
function processQueue() { function processQueue() {
// This isn't completely correct, because a key can service multiple models. // This isn't completely correct, because a key can service multiple models.
// Currently if a key is locked out on one model it will also stop servicing // Currently if a key is locked out on one model it will also stop servicing
// the others, because we only track one rate limit per key. // the others, because we only track rate limits for the key as a whole.
// TODO: `getLockoutPeriod` uses model names instead of model families
// TODO: genericize this it's really ugly
const gpt4TurboLockout = keyPool.getLockoutPeriod("gpt-4-1106");
const gpt432kLockout = keyPool.getLockoutPeriod("gpt-4-32k");
const gpt4Lockout = keyPool.getLockoutPeriod("gpt-4");
const turboLockout = keyPool.getLockoutPeriod("gpt-3.5-turbo");
const claudeLockout = keyPool.getLockoutPeriod("claude-v1");
const palmLockout = keyPool.getLockoutPeriod("text-bison-001");
const awsClaudeLockout = keyPool.getLockoutPeriod("anthropic.claude-v2");
const reqs: (Request | undefined)[] = []; const reqs: (Request | undefined)[] = [];
if (gpt4TurboLockout === 0) { MODEL_FAMILIES.forEach((modelFamily) => {
reqs.push(dequeue("gpt4-turbo")); const lockout = keyPool.getLockoutPeriod(modelFamily);
} if (lockout === 0) {
if (gpt432kLockout === 0) { reqs.push(dequeue(modelFamily));
reqs.push(dequeue("gpt4-32k")); }
} });
if (gpt4Lockout === 0) {
reqs.push(dequeue("gpt4"));
}
if (turboLockout === 0) {
reqs.push(dequeue("turbo"));
}
if (claudeLockout === 0) {
reqs.push(dequeue("claude"));
}
if (palmLockout === 0) {
reqs.push(dequeue("bison"));
}
if (awsClaudeLockout === 0) {
reqs.push(dequeue("aws-claude"));
}
reqs.filter(Boolean).forEach((req) => { reqs.filter(Boolean).forEach((req) => {
if (req?.proceed) { if (req?.proceed) {

View File

@ -9,8 +9,6 @@ export const SHARED_IP_ADDRESSES = new Set([
"209.97.162.44", "209.97.162.44",
]); ]);
const RATE_LIMIT_ENABLED = Boolean(config.modelRateLimit);
const RATE_LIMIT = Math.max(1, config.modelRateLimit);
const ONE_MINUTE_MS = 60 * 1000; const ONE_MINUTE_MS = 60 * 1000;
type Timestamp = number; type Timestamp = number;
@ -22,12 +20,15 @@ const exemptedRequests: Timestamp[] = [];
const isRecentAttempt = (now: Timestamp) => (attempt: Timestamp) => const isRecentAttempt = (now: Timestamp) => (attempt: Timestamp) =>
attempt > now - ONE_MINUTE_MS; attempt > now - ONE_MINUTE_MS;
const getTryAgainInMs = (ip: string) => { const getTryAgainInMs = (ip: string, type: "text" | "image") => {
const now = Date.now(); const now = Date.now();
const attempts = lastAttempts.get(ip) || []; const attempts = lastAttempts.get(ip) || [];
const validAttempts = attempts.filter(isRecentAttempt(now)); const validAttempts = attempts.filter(isRecentAttempt(now));
if (validAttempts.length >= RATE_LIMIT) { const limit =
type === "text" ? config.textModelRateLimit : config.imageModelRateLimit;
if (validAttempts.length >= limit) {
return validAttempts[0] - now + ONE_MINUTE_MS; return validAttempts[0] - now + ONE_MINUTE_MS;
} else { } else {
lastAttempts.set(ip, [...validAttempts, now]); lastAttempts.set(ip, [...validAttempts, now]);
@ -35,12 +36,16 @@ const getTryAgainInMs = (ip: string) => {
} }
}; };
const getStatus = (ip: string) => { const getStatus = (ip: string, type: "text" | "image") => {
const now = Date.now(); const now = Date.now();
const attempts = lastAttempts.get(ip) || []; const attempts = lastAttempts.get(ip) || [];
const validAttempts = attempts.filter(isRecentAttempt(now)); const validAttempts = attempts.filter(isRecentAttempt(now));
const limit =
type === "text" ? config.textModelRateLimit : config.imageModelRateLimit;
return { return {
remaining: Math.max(0, RATE_LIMIT - validAttempts.length), remaining: Math.max(0, limit - validAttempts.length),
reset: validAttempts.length > 0 ? validAttempts[0] + ONE_MINUTE_MS : now, reset: validAttempts.length > 0 ? validAttempts[0] + ONE_MINUTE_MS : now,
}; };
}; };
@ -69,12 +74,26 @@ setInterval(clearOldExemptions, 10 * 1000);
export const getUniqueIps = () => lastAttempts.size; export const getUniqueIps = () => lastAttempts.size;
/**
* Can be used to manually remove the most recent attempt from an IP address,
* ie. in case a prompt triggered OpenAI's content filter and therefore did not
* result in a generation.
*/
export const refundLastAttempt = (req: Request) => {
const key = req.user?.token || req.risuToken || req.ip;
const attempts = lastAttempts.get(key) || [];
attempts.pop();
}
export const ipLimiter = async ( export const ipLimiter = async (
req: Request, req: Request,
res: Response, res: Response,
next: NextFunction next: NextFunction
) => { ) => {
if (!RATE_LIMIT_ENABLED) return next(); const imageLimit = config.imageModelRateLimit;
const textLimit = config.textModelRateLimit;
if (!textLimit && !imageLimit) return next();
if (req.user?.type === "special") return next(); if (req.user?.type === "special") return next();
// Exempts Agnai.chat from IP-based rate limiting because its IPs are shared // Exempts Agnai.chat from IP-based rate limiting because its IPs are shared
@ -90,24 +109,25 @@ export const ipLimiter = async (
return next(); return next();
} }
const type = req.baseUrl + req.path ? "image" : "text";
const limit = type === "image" ? imageLimit : textLimit;
// If user is authenticated, key rate limiting by their token. Otherwise, key // If user is authenticated, key rate limiting by their token. Otherwise, key
// rate limiting by their IP address. Mitigates key sharing. // rate limiting by their IP address. Mitigates key sharing.
const rateLimitKey = req.user?.token || req.risuToken || req.ip; const rateLimitKey = req.user?.token || req.risuToken || req.ip;
const { remaining, reset } = getStatus(rateLimitKey); const { remaining, reset } = getStatus(rateLimitKey, type);
res.set("X-RateLimit-Limit", config.modelRateLimit.toString()); res.set("X-RateLimit-Limit", limit.toString());
res.set("X-RateLimit-Remaining", remaining.toString()); res.set("X-RateLimit-Remaining", remaining.toString());
res.set("X-RateLimit-Reset", reset.toString()); res.set("X-RateLimit-Reset", reset.toString());
const tryAgainInMs = getTryAgainInMs(rateLimitKey); const tryAgainInMs = getTryAgainInMs(rateLimitKey, type);
if (tryAgainInMs > 0) { if (tryAgainInMs > 0) {
res.set("Retry-After", tryAgainInMs.toString()); res.set("Retry-After", tryAgainInMs.toString());
res.status(429).json({ res.status(429).json({
error: { error: {
type: "proxy_rate_limited", type: "proxy_rate_limited",
message: `This proxy is rate limited to ${ message: `This model type is rate limited to ${limit} prompts per minute. Please try again in ${Math.ceil(
config.modelRateLimit
} prompts per minute. Please try again in ${Math.ceil(
tryAgainInMs / 1000 tryAgainInMs / 1000
)} seconds.`, )} seconds.`,
}, },

View File

@ -2,6 +2,7 @@ import express, { Request, Response, NextFunction } from "express";
import { gatekeeper } from "./gatekeeper"; import { gatekeeper } from "./gatekeeper";
import { checkRisuToken } from "./check-risu-token"; import { checkRisuToken } from "./check-risu-token";
import { openai } from "./openai"; import { openai } from "./openai";
import { openaiImage } from "./openai-image";
import { anthropic } from "./anthropic"; import { anthropic } from "./anthropic";
import { googlePalm } from "./palm"; import { googlePalm } from "./palm";
import { aws } from "./aws"; import { aws } from "./aws";
@ -27,6 +28,7 @@ proxyRouter.use((req, _res, next) => {
next(); next();
}); });
proxyRouter.use("/openai", addV1, openai); proxyRouter.use("/openai", addV1, openai);
proxyRouter.use("/openai-image", addV1, openaiImage);
proxyRouter.use("/anthropic", addV1, anthropic); proxyRouter.use("/anthropic", addV1, anthropic);
proxyRouter.use("/google-palm", addV1, googlePalm); proxyRouter.use("/google-palm", addV1, googlePalm);
proxyRouter.use("/aws/claude", addV1, aws); proxyRouter.use("/aws/claude", addV1, aws);

View File

@ -1,11 +1,14 @@
import { assertConfigIsValid, config } from "./config"; import { assertConfigIsValid, config, USER_ASSETS_DIR } from "./config";
import "source-map-support/register"; import "source-map-support/register";
import checkDiskSpace from "check-disk-space";
import express from "express"; import express from "express";
import cors from "cors"; import cors from "cors";
import path from "path"; import path from "path";
import pinoHttp from "pino-http"; import pinoHttp from "pino-http";
import os from "os";
import childProcess from "child_process"; import childProcess from "child_process";
import { logger } from "./logger"; import { logger } from "./logger";
import { setupAssetsDir } from "./shared/file-storage/setup-assets-dir";
import { keyPool } from "./shared/key-management"; import { keyPool } from "./shared/key-management";
import { adminRouter } from "./admin/routes"; import { adminRouter } from "./admin/routes";
import { proxyRouter } from "./proxy/routes"; import { proxyRouter } from "./proxy/routes";
@ -58,6 +61,8 @@ app.set("views", [
path.join(__dirname, "shared/views"), path.join(__dirname, "shared/views"),
]); ]);
app.use("/user_content", express.static(USER_ASSETS_DIR));
app.get("/health", (_req, res) => res.sendStatus(200)); app.get("/health", (_req, res) => res.sendStatus(200));
app.use(cors()); app.use(cors());
app.use(checkOrigin); app.use(checkOrigin);
@ -99,13 +104,17 @@ async function start() {
await initTokenizers(); await initTokenizers();
if (config.allowedModelFamilies.includes("dall-e")) {
await setupAssetsDir();
}
if (config.gatekeeper === "user_token") { if (config.gatekeeper === "user_token") {
await initUserStore(); await initUserStore();
} }
if (config.promptLogging) { if (config.promptLogging) {
logger.info("Starting prompt logging..."); logger.info("Starting prompt logging...");
logQueue.start(); await logQueue.start();
} }
logger.info("Starting request queue..."); logger.info("Starting request queue...");
@ -116,8 +125,12 @@ async function start() {
registerUncaughtExceptionHandler(); registerUncaughtExceptionHandler();
}); });
const diskSpace = await checkDiskSpace(
__dirname.startsWith("/app") ? "/app" : os.homedir()
);
logger.info( logger.info(
{ build: process.env.BUILD_INFO, nodeEnv: process.env.NODE_ENV }, { build: process.env.BUILD_INFO, nodeEnv: process.env.NODE_ENV, diskSpace },
"Startup complete." "Startup complete."
); );
} }

View File

@ -0,0 +1,35 @@
type ImageHistory = {
url: string;
prompt: string;
}
const IMAGE_HISTORY_SIZE = 30;
const imageHistory = new Array<ImageHistory>(IMAGE_HISTORY_SIZE);
let imageHistoryIndex = 0;
export function getImageHistory() {
return imageHistory.filter((url) => url);
}
export function addToImageHistory(image: ImageHistory) {
imageHistory[imageHistoryIndex] = image;
imageHistoryIndex = (imageHistoryIndex + 1) % IMAGE_HISTORY_SIZE;
}
export function getLastNImages(n: number) {
const result: ImageHistory[] = [];
let currentIndex = (imageHistoryIndex - 1 + IMAGE_HISTORY_SIZE) % IMAGE_HISTORY_SIZE;
for (let i = 0; i < n; i++) {
// Check if the current index is valid (not undefined).
if (imageHistory[currentIndex]) {
result.unshift(imageHistory[currentIndex]);
}
// Move to the previous item, wrapping around if necessary.
currentIndex = (currentIndex - 1 + IMAGE_HISTORY_SIZE) % IMAGE_HISTORY_SIZE;
}
return result;
}

View File

@ -0,0 +1,75 @@
import axios from "axios";
import { promises as fs } from "fs";
import path from "path";
import { v4 } from "uuid";
import { USER_ASSETS_DIR } from "../../config";
import { logger } from "../../logger";
import { addToImageHistory } from "./image-history";
import sharp from "sharp";
const log = logger.child({ module: "file-storage" });
export type OpenAIImageGenerationResult = {
created: number;
data: {
revised_prompt?: string;
url: string;
b64_json: string;
}[];
};
async function downloadImage(url: string) {
const { data } = await axios.get(url, { responseType: "arraybuffer" });
const buffer = Buffer.from(data, "binary");
const newFilename = `${v4()}.png`;
const filepath = path.join(USER_ASSETS_DIR, newFilename);
await fs.writeFile(filepath, buffer);
return filepath;
}
async function saveB64Image(b64: string) {
const buffer = Buffer.from(b64, "base64");
const newFilename = `${v4()}.png`;
const filepath = path.join(USER_ASSETS_DIR, newFilename);
await fs.writeFile(filepath, buffer);
return filepath;
}
async function createThumbnail(filepath: string) {
const thumbnailPath = filepath.replace(/(\.[\wd_-]+)$/i, "_t.jpg");
await sharp(filepath)
.resize(150, 150, {
fit: "inside",
withoutEnlargement: true,
})
.toFormat("jpeg")
.toFile(thumbnailPath);
return thumbnailPath;
}
/**
* Downloads generated images and mirrors them to the user_content directory.
* Mutates the result object.
*/
export async function mirrorGeneratedImage(
host: string,
prompt: string,
result: OpenAIImageGenerationResult
): Promise<OpenAIImageGenerationResult> {
for (const item of result.data) {
let mirror: string;
if (item.b64_json) {
mirror = await saveB64Image(item.b64_json);
} else {
mirror = await downloadImage(item.url);
}
item.url = `${host}/user_content/${path.basename(mirror)}`;
await createThumbnail(mirror);
addToImageHistory({ url: item.url, prompt });
}
return result;
}

View File

@ -0,0 +1,20 @@
import { promises as fs } from "fs";
import { logger } from "../../logger";
import { USER_ASSETS_DIR } from "../../config";
const log = logger.child({ module: "file-storage" });
export async function setupAssetsDir() {
try {
log.info({ dir: USER_ASSETS_DIR }, "Setting up user assets directory");
await fs.mkdir(USER_ASSETS_DIR, { recursive: true });
const stats = await fs.stat(USER_ASSETS_DIR);
const mode = stats.mode | 0o666;
if (stats.mode !== mode) {
await fs.chmod(USER_ASSETS_DIR, mode);
}
} catch (e) {
log.error(e);
throw new Error("Could not create user assets directory for DALL-E image generation. You may need to update your Dockerfile to `chown` the working directory to user 1000. See the proxy docs for more information.");
}
}

View File

@ -6,14 +6,12 @@ import type { AnthropicModelFamily } from "../../models";
import { AnthropicKeyChecker } from "./checker"; import { AnthropicKeyChecker } from "./checker";
// https://docs.anthropic.com/claude/reference/selecting-a-model // https://docs.anthropic.com/claude/reference/selecting-a-model
export const ANTHROPIC_SUPPORTED_MODELS = [ export type AnthropicModel =
"claude-instant-v1", | "claude-instant-v1"
"claude-instant-v1-100k", | "claude-instant-v1-100k"
"claude-v1", | "claude-v1"
"claude-v1-100k", | "claude-v1-100k"
"claude-2", | "claude-2";
] as const;
export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
export type AnthropicKeyUpdate = Omit< export type AnthropicKeyUpdate = Omit<
Partial<AnthropicKey>, Partial<AnthropicKey>,
@ -180,7 +178,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
key.claudeTokens += tokens; key.claudeTokens += tokens;
} }
public getLockoutPeriod(_model: AnthropicModel) { public getLockoutPeriod() {
const activeKeys = this.keys.filter((k) => !k.isDisabled); const activeKeys = this.keys.filter((k) => !k.isDisabled);
// Don't lock out if there are no keys available or the queue will stall. // Don't lock out if there are no keys available or the queue will stall.
// Just let it through so the add-key middleware can throw an error. // Just let it through so the add-key middleware can throw an error.

View File

@ -6,12 +6,10 @@ import type { AwsBedrockModelFamily } from "../../models";
import { AwsKeyChecker } from "./checker"; import { AwsKeyChecker } from "./checker";
// https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html // https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids-arns.html
export const AWS_BEDROCK_SUPPORTED_MODELS = [ export type AwsBedrockModel =
"anthropic.claude-v1", | "anthropic.claude-v1"
"anthropic.claude-v2", | "anthropic.claude-v2"
"anthropic.claude-instant-v1", | "anthropic.claude-instant-v1";
] as const;
export type AwsBedrockModel = (typeof AWS_BEDROCK_SUPPORTED_MODELS)[number];
type AwsBedrockKeyUsage = { type AwsBedrockKeyUsage = {
[K in AwsBedrockModelFamily as `${K}Tokens`]: number; [K in AwsBedrockModelFamily as `${K}Tokens`]: number;
@ -158,7 +156,7 @@ export class AwsBedrockKeyProvider implements KeyProvider<AwsBedrockKey> {
key["aws-claudeTokens"] += tokens; key["aws-claudeTokens"] += tokens;
} }
public getLockoutPeriod(_model: AwsBedrockModel) { public getLockoutPeriod() {
// TODO: same exact behavior for three providers, should be refactored // TODO: same exact behavior for three providers, should be refactored
const activeKeys = this.keys.filter((k) => !k.isDisabled); const activeKeys = this.keys.filter((k) => !k.isDisabled);
// Don't lock out if there are no keys available or the queue will stall. // Don't lock out if there are no keys available or the queue will stall.

View File

@ -1,15 +1,17 @@
import { OPENAI_SUPPORTED_MODELS, OpenAIModel } from "./openai/provider"; import { OpenAIModel } from "./openai/provider";
import { import { AnthropicModel } from "./anthropic/provider";
ANTHROPIC_SUPPORTED_MODELS, import { GooglePalmModel } from "./palm/provider";
AnthropicModel, import { AwsBedrockModel } from "./aws/provider";
} from "./anthropic/provider";
import { GOOGLE_PALM_SUPPORTED_MODELS, GooglePalmModel } from "./palm/provider";
import { AWS_BEDROCK_SUPPORTED_MODELS, AwsBedrockModel } from "./aws/provider";
import { KeyPool } from "./key-pool"; import { KeyPool } from "./key-pool";
import type { ModelFamily } from "../models"; import type { ModelFamily } from "../models";
/** The request and response format used by a model's API. */ /** The request and response format used by a model's API. */
export type APIFormat = "openai" | "anthropic" | "google-palm" | "openai-text"; export type APIFormat =
| "openai"
| "anthropic"
| "google-palm"
| "openai-text"
| "openai-image";
/** The service that a model is hosted on; distinct because services like AWS provide multiple APIs, but have their own endpoints and authentication. */ /** The service that a model is hosted on; distinct because services like AWS provide multiple APIs, but have their own endpoints and authentication. */
export type LLMService = "openai" | "anthropic" | "google-palm" | "aws"; export type LLMService = "openai" | "anthropic" | "google-palm" | "aws";
export type Model = export type Model =
@ -60,23 +62,12 @@ export interface KeyProvider<T extends Key = Key> {
update(hash: string, update: Partial<T>): void; update(hash: string, update: Partial<T>): void;
available(): number; available(): number;
incrementUsage(hash: string, model: string, tokens: number): void; incrementUsage(hash: string, model: string, tokens: number): void;
getLockoutPeriod(model: Model): number; getLockoutPeriod(model: ModelFamily): number;
markRateLimited(hash: string): void; markRateLimited(hash: string): void;
recheck(): void; recheck(): void;
} }
export const keyPool = new KeyPool(); export const keyPool = new KeyPool();
export const SUPPORTED_MODELS = [
...OPENAI_SUPPORTED_MODELS,
...ANTHROPIC_SUPPORTED_MODELS,
] as const;
export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
export {
OPENAI_SUPPORTED_MODELS,
ANTHROPIC_SUPPORTED_MODELS,
GOOGLE_PALM_SUPPORTED_MODELS,
AWS_BEDROCK_SUPPORTED_MODELS,
};
export { AnthropicKey } from "./anthropic/provider"; export { AnthropicKey } from "./anthropic/provider";
export { OpenAIKey } from "./openai/provider"; export { OpenAIKey } from "./openai/provider";
export { GooglePalmKey } from "./palm/provider"; export { GooglePalmKey } from "./palm/provider";

View File

@ -9,6 +9,8 @@ import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider";
import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider"; import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
import { GooglePalmKeyProvider } from "./palm/provider"; import { GooglePalmKeyProvider } from "./palm/provider";
import { AwsBedrockKeyProvider } from "./aws/provider"; import { AwsBedrockKeyProvider } from "./aws/provider";
import { ModelFamily } from "../models";
import { assertNever } from "../utils";
type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate; type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;
@ -37,7 +39,7 @@ export class KeyPool {
} }
public get(model: Model): Key { public get(model: Model): Key {
const service = this.getService(model); const service = this.getServiceForModel(model);
return this.getKeyProvider(service).get(model); return this.getKeyProvider(service).get(model);
} }
@ -67,7 +69,7 @@ export class KeyPool {
public available(model: Model | "all" = "all"): number { public available(model: Model | "all" = "all"): number {
return this.keyProviders.reduce((sum, provider) => { return this.keyProviders.reduce((sum, provider) => {
const includeProvider = const includeProvider =
model === "all" || this.getService(model) === provider.service; model === "all" || this.getServiceForModel(model) === provider.service;
return sum + (includeProvider ? provider.available() : 0); return sum + (includeProvider ? provider.available() : 0);
}, 0); }, 0);
} }
@ -77,9 +79,9 @@ export class KeyPool {
provider.incrementUsage(key.hash, model, tokens); provider.incrementUsage(key.hash, model, tokens);
} }
public getLockoutPeriod(model: Model): number { public getLockoutPeriod(family: ModelFamily): number {
const service = this.getService(model); const service = this.getServiceForModelFamily(family);
return this.getKeyProvider(service).getLockoutPeriod(model); return this.getKeyProvider(service).getLockoutPeriod(family);
} }
public markRateLimited(key: Key): void { public markRateLimited(key: Key): void {
@ -104,8 +106,12 @@ export class KeyPool {
provider.recheck(); provider.recheck();
} }
private getService(model: Model): LLMService { private getServiceForModel(model: Model): LLMService {
if (model.startsWith("gpt") || model.startsWith("text-embedding-ada")) { if (
model.startsWith("gpt") ||
model.startsWith("text-embedding-ada") ||
model.startsWith("dall-e")
) {
// https://platform.openai.com/docs/models/model-endpoint-compatibility // https://platform.openai.com/docs/models/model-endpoint-compatibility
return "openai"; return "openai";
} else if (model.startsWith("claude-")) { } else if (model.startsWith("claude-")) {
@ -122,6 +128,25 @@ export class KeyPool {
throw new Error(`Unknown service for model '${model}'`); throw new Error(`Unknown service for model '${model}'`);
} }
private getServiceForModelFamily(modelFamily: ModelFamily): LLMService {
switch (modelFamily) {
case "gpt4":
case "gpt4-32k":
case "gpt4-turbo":
case "turbo":
case "dall-e":
return "openai";
case "claude":
return "anthropic";
case "bison":
return "google-palm";
case "aws-claude":
return "aws";
default:
assertNever(modelFamily);
}
}
private getKeyProvider(service: LLMService): KeyProvider { private getKeyProvider(service: LLMService): KeyProvider {
return this.keyProviders.find((provider) => provider.service === service)!; return this.keyProviders.find((provider) => provider.service === service)!;
} }

View File

@ -95,10 +95,15 @@ export class OpenAIKeyChecker extends KeyCheckerBase<OpenAIKey> {
const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts); const { data } = await axios.get<GetModelsResponse>(GET_MODELS_URL, opts);
const models = data.data; const models = data.data;
// const families: OpenAIModelFamily[] = [];
const families = new Set<OpenAIModelFamily>(); const families = new Set<OpenAIModelFamily>();
models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo"))); models.forEach(({ id }) => families.add(getOpenAIModelFamily(id, "turbo")));
// For now we remove dall-e from the list of provisioned models if only
// dall-e-2 is available.
if (families.has("dall-e") && !models.find(({ id }) => id === "dall-e-3")) {
families.delete("dall-e");
}
// We want to update the key's model families here, but we don't want to // We want to update the key's model families here, but we don't want to
// update its `lastChecked` timestamp because we need to let the liveness // update its `lastChecked` timestamp because we need to let the liveness
// check run before we can consider the key checked. // check run before we can consider the key checked.

View File

@ -15,12 +15,9 @@ export type OpenAIModel =
| "gpt-4" | "gpt-4"
| "gpt-4-32k" | "gpt-4-32k"
| "gpt-4-1106" | "gpt-4-1106"
| "text-embedding-ada-002"; | "text-embedding-ada-002"
export const OPENAI_SUPPORTED_MODELS: readonly OpenAIModel[] = [ | "dall-e-2"
"gpt-3.5-turbo", | "dall-e-3"
"gpt-3.5-turbo-instruct",
"gpt-4",
] as const;
// Flattening model families instead of using a nested object for easier // Flattening model families instead of using a nested object for easier
// cloning. // cloning.
@ -127,6 +124,7 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
gpt4Tokens: 0, gpt4Tokens: 0,
"gpt4-32kTokens": 0, "gpt4-32kTokens": 0,
"gpt4-turboTokens": 0, "gpt4-turboTokens": 0,
"dall-eTokens": 0,
gpt4Rpm: 0, gpt4Rpm: 0,
}; };
this.keys.push(newKey); this.keys.push(newKey);
@ -284,10 +282,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
* Given a model, returns the period until a key will be available to service * Given a model, returns the period until a key will be available to service
* the request, or returns 0 if a key is ready immediately. * the request, or returns 0 if a key is ready immediately.
*/ */
public getLockoutPeriod(model: Model = "gpt-4"): number { public getLockoutPeriod(family: OpenAIModelFamily): number {
const neededFamily = getOpenAIModelFamily(model);
const activeKeys = this.keys.filter( const activeKeys = this.keys.filter(
(key) => !key.isDisabled && key.modelFamilies.includes(neededFamily) (key) => !key.isDisabled && key.modelFamilies.includes(family)
); );
if (activeKeys.length === 0) { if (activeKeys.length === 0) {
@ -335,6 +332,10 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
this.log.debug({ key: keyHash }, "Key rate limited"); this.log.debug({ key: keyHash }, "Key rate limited");
const key = this.keys.find((k) => k.hash === keyHash)!; const key = this.keys.find((k) => k.hash === keyHash)!;
key.rateLimitedAt = Date.now(); key.rateLimitedAt = Date.now();
// DALL-E requests do not send headers telling us when the rate limit will
// be reset so we need to set a fallback value here. Other models will have
// this overwritten by the `updateRateLimits` method.
key.rateLimitRequestsReset = 5000;
} }
public incrementUsage(keyHash: string, model: string, tokens: number) { public incrementUsage(keyHash: string, model: string, tokens: number) {

View File

@ -5,11 +5,7 @@ import { logger } from "../../../logger";
import type { GooglePalmModelFamily } from "../../models"; import type { GooglePalmModelFamily } from "../../models";
// https://developers.generativeai.google.com/models/language // https://developers.generativeai.google.com/models/language
export const GOOGLE_PALM_SUPPORTED_MODELS = [ export type GooglePalmModel = "text-bison-001";
"text-bison-001",
// "chat-bison-001", no adjustable safety settings, so it's useless
] as const;
export type GooglePalmModel = (typeof GOOGLE_PALM_SUPPORTED_MODELS)[number];
export type GooglePalmKeyUpdate = Omit< export type GooglePalmKeyUpdate = Omit<
Partial<GooglePalmKey>, Partial<GooglePalmKey>,
@ -149,7 +145,7 @@ export class GooglePalmKeyProvider implements KeyProvider<GooglePalmKey> {
key.bisonTokens += tokens; key.bisonTokens += tokens;
} }
public getLockoutPeriod(_model: GooglePalmModel) { public getLockoutPeriod() {
const activeKeys = this.keys.filter((k) => !k.isDisabled); const activeKeys = this.keys.filter((k) => !k.isDisabled);
// Don't lock out if there are no keys available or the queue will stall. // Don't lock out if there are no keys available or the queue will stall.
// Just let it through so the add-key middleware can throw an error. // Just let it through so the add-key middleware can throw an error.

View File

@ -1,6 +1,8 @@
import { logger } from "../logger"; // Don't import anything here, this is imported by config.ts
export type OpenAIModelFamily = "turbo" | "gpt4" | "gpt4-32k" | "gpt4-turbo"; import pino from "pino";
export type OpenAIModelFamily = "turbo" | "gpt4" | "gpt4-32k" | "gpt4-turbo" | "dall-e";
export type AnthropicModelFamily = "claude"; export type AnthropicModelFamily = "claude";
export type GooglePalmModelFamily = "bison"; export type GooglePalmModelFamily = "bison";
export type AwsBedrockModelFamily = "aws-claude"; export type AwsBedrockModelFamily = "aws-claude";
@ -17,6 +19,7 @@ export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
"gpt4", "gpt4",
"gpt4-32k", "gpt4-32k",
"gpt4-turbo", "gpt4-turbo",
"dall-e",
"claude", "claude",
"bison", "bison",
"aws-claude", "aws-claude",
@ -30,8 +33,11 @@ export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
"^gpt-4$": "gpt4", "^gpt-4$": "gpt4",
"^gpt-3.5-turbo": "turbo", "^gpt-3.5-turbo": "turbo",
"^text-embedding-ada-002$": "turbo", "^text-embedding-ada-002$": "turbo",
"^dall-e-\\d{1}$": "dall-e",
}; };
const modelLogger = pino({ level: "debug" }).child({ module: "startup" });
export function getOpenAIModelFamily( export function getOpenAIModelFamily(
model: string, model: string,
defaultFamily: OpenAIModelFamily = "gpt4" defaultFamily: OpenAIModelFamily = "gpt4"
@ -42,14 +48,14 @@ export function getOpenAIModelFamily(
return defaultFamily; return defaultFamily;
} }
export function getClaudeModelFamily(_model: string): ModelFamily { export function getClaudeModelFamily(model: string): ModelFamily {
if (model.startsWith("anthropic.")) return getAwsBedrockModelFamily(model);
return "claude"; return "claude";
} }
export function getGooglePalmModelFamily(model: string): ModelFamily { export function getGooglePalmModelFamily(model: string): ModelFamily {
if (model.match(/^\w+-bison-\d{3}$/)) return "bison"; if (model.match(/^\w+-bison-\d{3}$/)) return "bison";
const stack = new Error().stack; modelLogger.warn({ model }, "Could not determine Google PaLM model family");
logger.warn({ model, stack }, "Unmapped PaLM model family");
return "bison"; return "bison";
} }

View File

@ -396,7 +396,7 @@ export const init = async (onStop: () => void) => {
await loadIndexSheet(false); await loadIndexSheet(false);
await writeIndexSheet(); await writeIndexSheet();
} catch (e) { } catch (e) {
log.info("Creating new index sheet."); log.warn(e, "Could not load index sheet. Creating a new one.");
await createIndexSheet(); await createIndexSheet();
} }
}; };

View File

@ -17,6 +17,9 @@ export function getTokenCostUsd(model: ModelFamily, tokens: number) {
case "turbo": case "turbo":
cost = 0.000001; cost = 0.000001;
break; break;
case "dall-e":
cost = 0.00001;
break;
case "aws-claude": case "aws-claude":
case "claude": case "claude":
cost = 0.00001102; cost = 0.00001102;

View File

@ -79,7 +79,8 @@ export function buildFakeSse(
}; };
break; break;
case "google-palm": case "google-palm":
throw new Error("PaLM not supported as an inbound API format"); case "openai-image":
throw new Error(`SSE not supported for ${req.inboundApi} requests`);
default: default:
assertNever(req.inboundApi); assertNever(req.inboundApi);
} }
@ -92,4 +93,4 @@ export function buildFakeSse(
} }
return `data: ${JSON.stringify(fakeEvent)}\n\n`; return `data: ${JSON.stringify(fakeEvent)}\n\n`;
} }

View File

@ -78,3 +78,63 @@ export type OpenAIPromptMessage = {
content: string; content: string;
role: string; role: string;
}; };
// Model Resolution Price
// DALL·E 3 1024×1024 $0.040 / image
// 1024×1792, 1792×1024 $0.080 / image
// DALL·E 3 HD 1024×1024 $0.080 / image
// 1024×1792, 1792×1024 $0.120 / image
// DALL·E 2 1024×1024 $0.020 / image
// 512×512 $0.018 / image
// 256×256 $0.016 / image
export const DALLE_TOKENS_PER_DOLLAR = 100000;
/**
* OpenAI image generation with DALL-E doesn't use tokens but everything else
* in the application does. There is a fixed cost for each image generation
* request depending on the model and selected quality/resolution parameters,
* which we convert to tokens at a rate of 100000 tokens per dollar.
*/
export function getOpenAIImageCost(params: {
model: "dall-e-2" | "dall-e-3";
quality: "standard" | "hd";
resolution: "512x512" | "256x256" | "1024x1024" | "1024x1792" | "1792x1024";
n: number | null;
}) {
const { model, quality, resolution, n } = params;
const usd = (() => {
switch (model) {
case "dall-e-2":
switch (resolution) {
case "512x512":
return 0.018;
case "256x256":
return 0.016;
case "1024x1024":
return 0.02;
default:
throw new Error("Invalid resolution");
}
case "dall-e-3":
switch (resolution) {
case "1024x1024":
return quality === "standard" ? 0.04 : 0.08;
case "1024x1792":
case "1792x1024":
return quality === "standard" ? 0.08 : 0.12;
default:
throw new Error("Invalid resolution");
}
default:
throw new Error("Invalid image generation model");
}
})();
const tokens = (n ?? 1) * (usd * DALLE_TOKENS_PER_DOLLAR);
return {
tokenizer: `openai-image cost`,
token_count: Math.ceil(tokens),
};
}

View File

@ -8,6 +8,7 @@ import {
init as initOpenAi, init as initOpenAi,
getTokenCount as getOpenAITokenCount, getTokenCount as getOpenAITokenCount,
OpenAIPromptMessage, OpenAIPromptMessage,
getOpenAIImageCost,
} from "./openai"; } from "./openai";
import { APIFormat } from "../key-management"; import { APIFormat } from "../key-management";
@ -26,6 +27,7 @@ type TokenCountRequest = { req: Request } & (
service: "openai-text" | "anthropic" | "google-palm"; service: "openai-text" | "anthropic" | "google-palm";
} }
| { prompt?: never; completion: string; service: APIFormat } | { prompt?: never; completion: string; service: APIFormat }
| { prompt?: never; completion?: never; service: "openai-image" }
); );
type TokenCountResult = { type TokenCountResult = {
@ -53,6 +55,16 @@ export async function countTokens({
...getOpenAITokenCount(prompt ?? completion, req.body.model), ...getOpenAITokenCount(prompt ?? completion, req.body.model),
tokenization_duration_ms: getElapsedMs(time), tokenization_duration_ms: getElapsedMs(time),
}; };
case "openai-image":
return {
...getOpenAIImageCost({
model: req.body.model,
quality: req.body.quality,
resolution: req.body.size,
n: parseInt(req.body.n, 10) || null,
}),
tokenization_duration_ms: getElapsedMs(time),
};
case "google-palm": case "google-palm":
// TODO: Can't find a tokenization library for PaLM. There is an API // TODO: Can't find a tokenization library for PaLM. There is an API
// endpoint for it but it adds significant latency to the request. // endpoint for it but it adds significant latency to the request.

View File

@ -7,6 +7,7 @@ export const tokenCountsSchema: ZodType<UserTokenCounts> = z.object({
gpt4: z.number().optional().default(0), gpt4: z.number().optional().default(0),
"gpt4-32k": z.number().optional().default(0), "gpt4-32k": z.number().optional().default(0),
"gpt4-turbo": z.number().optional().default(0), "gpt4-turbo": z.number().optional().default(0),
"dall-e": z.number().optional().default(0),
claude: z.number().optional().default(0), claude: z.number().optional().default(0),
bison: z.number().optional().default(0), bison: z.number().optional().default(0),
"aws-claude": z.number().optional().default(0), "aws-claude": z.number().optional().default(0),

View File

@ -11,9 +11,17 @@ import admin from "firebase-admin";
import schedule from "node-schedule"; import schedule from "node-schedule";
import { v4 as uuid } from "uuid"; import { v4 as uuid } from "uuid";
import { config, getFirebaseApp } from "../../config"; import { config, getFirebaseApp } from "../../config";
import { MODEL_FAMILIES, ModelFamily } from "../models"; import {
getClaudeModelFamily,
getGooglePalmModelFamily,
getOpenAIModelFamily,
MODEL_FAMILIES,
ModelFamily,
} from "../models";
import { logger } from "../../logger"; import { logger } from "../../logger";
import { User, UserTokenCounts, UserUpdate } from "./schema"; import { User, UserTokenCounts, UserUpdate } from "./schema";
import { APIFormat } from "../key-management";
import { assertNever } from "../utils";
const log = logger.child({ module: "users" }); const log = logger.child({ module: "users" });
@ -22,6 +30,7 @@ const INITIAL_TOKENS: Required<UserTokenCounts> = {
gpt4: 0, gpt4: 0,
"gpt4-32k": 0, "gpt4-32k": 0,
"gpt4-turbo": 0, "gpt4-turbo": 0,
"dall-e": 0,
claude: 0, claude: 0,
bison: 0, bison: 0,
"aws-claude": 0, "aws-claude": 0,
@ -166,11 +175,12 @@ export function incrementPromptCount(token: string) {
export function incrementTokenCount( export function incrementTokenCount(
token: string, token: string,
model: string, model: string,
api: APIFormat,
consumption: number consumption: number
) { ) {
const user = users.get(token); const user = users.get(token);
if (!user) return; if (!user) return;
const modelFamily = getModelFamilyForQuotaUsage(model); const modelFamily = getModelFamilyForQuotaUsage(model, api);
const existing = user.tokenCounts[modelFamily] ?? 0; const existing = user.tokenCounts[modelFamily] ?? 0;
user.tokenCounts[modelFamily] = existing + consumption; user.tokenCounts[modelFamily] = existing + consumption;
usersToFlush.add(token); usersToFlush.add(token);
@ -181,9 +191,10 @@ export function incrementTokenCount(
* to the user's list of IPs. Returns the user if they exist and are not * to the user's list of IPs. Returns the user if they exist and are not
* disabled, otherwise returns undefined. * disabled, otherwise returns undefined.
*/ */
export function authenticate(token: string, ip: string): export function authenticate(
{ user?: User; result: "success" | "disabled" | "not_found" | "limited" } token: string,
{ ip: string
): { user?: User; result: "success" | "disabled" | "not_found" | "limited" } {
const user = users.get(token); const user = users.get(token);
if (!user) return { result: "not_found" }; if (!user) return { result: "not_found" };
if (user.disabledAt) return { result: "disabled" }; if (user.disabledAt) return { result: "disabled" };
@ -210,16 +221,22 @@ export function authenticate(token: string, ip: string):
return { user, result: "success" }; return { user, result: "success" };
} }
export function hasAvailableQuota( export function hasAvailableQuota({
token: string, userToken,
model: string, model,
requested: number api,
) { requested,
const user = users.get(token); }: {
userToken: string;
model: string;
api: APIFormat;
requested: number;
}) {
const user = users.get(userToken);
if (!user) return false; if (!user) return false;
if (user.type === "special") return true; if (user.type === "special") return true;
const modelFamily = getModelFamilyForQuotaUsage(model); const modelFamily = getModelFamilyForQuotaUsage(model, api);
const { tokenCounts, tokenLimits } = user; const { tokenCounts, tokenLimits } = user;
const tokenLimit = tokenLimits[modelFamily]; const tokenLimit = tokenLimits[modelFamily];
@ -361,30 +378,22 @@ async function flushUsers() {
); );
} }
// TODO: use key-management/models.ts for family mapping function getModelFamilyForQuotaUsage(
function getModelFamilyForQuotaUsage(model: string): ModelFamily { model: string,
if (model.startsWith("gpt-4-1106")) { api: APIFormat
return "gpt4-turbo"; ): ModelFamily {
switch (api) {
case "openai":
case "openai-text":
case "openai-image":
return getOpenAIModelFamily(model);
case "anthropic":
return getClaudeModelFamily(model);
case "google-palm":
return getGooglePalmModelFamily(model);
default:
assertNever(api);
} }
if (model.includes("32k")) {
return "gpt4-32k";
}
if (model.startsWith("gpt-4")) {
return "gpt4";
}
if (model.startsWith("gpt-3.5")) {
return "turbo";
}
if (model.includes("bison")) {
return "bison";
}
if (model.startsWith("claude")) {
return "claude";
}
if (model.startsWith("anthropic.claude")) {
return "aws-claude";
}
throw new Error(`Unknown quota model family for model ${model}`);
} }
function getRefreshCrontab() { function getRefreshCrontab() {

View File

@ -24,8 +24,7 @@ declare global {
heartbeatInterval?: NodeJS.Timeout; heartbeatInterval?: NodeJS.Timeout;
promptTokens?: number; promptTokens?: number;
outputTokens?: number; outputTokens?: number;
// TODO: remove later tokenizerInfo: Record<string, any>;
debug: Record<string, any>;
signedRequest: HttpRequest; signedRequest: HttpRequest;
} }
} }