From 2bad644772b595ba43b56d29ce37b4f829cc23c7 Mon Sep 17 00:00:00 2001 From: nai-degen <44111-khanon@users.noreply.gitgud.io> Date: Fri, 19 May 2023 04:33:20 +0000 Subject: [PATCH] Prefer user tokens as rate-limit/queue keys when available (khanon/oai-reverse-proxy!10) --- src/info-page.ts | 2 +- src/proxy/queue.ts | 27 ++++++++++++++++++++++----- src/proxy/rate-limit.ts | 8 ++++++-- src/server.ts | 23 +++++++++++++---------- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/info-page.ts b/src/info-page.ts index b377e82..e96af81 100644 --- a/src/info-page.ts +++ b/src/info-page.ts @@ -81,7 +81,7 @@ function cacheInfoPageHtml(host: string) { ...getQueueInformation(), keys: keyInfo, config: listConfig(), - build: process.env.COMMIT_SHA || "dev", + build: process.env.BUILD_INFO || "dev", }; const title = getServerTitle(); diff --git a/src/proxy/queue.ts b/src/proxy/queue.ts index 5d5e0c9..9dee155 100644 --- a/src/proxy/queue.ts +++ b/src/proxy/queue.ts @@ -31,16 +31,33 @@ const AGNAI_CONCURRENCY_LIMIT = 15; /** Maximum number of queue slots for individual users. */ const USER_CONCURRENCY_LIMIT = 1; +const sameIpPredicate = (incoming: Request) => (queued: Request) => + queued.ip === incoming.ip; +const sameUserPredicate = (incoming: Request) => (queued: Request) => { + const incomingUser = incoming.user ?? { token: incoming.ip }; + const queuedUser = queued.user ?? { token: queued.ip }; + return queuedUser.token === incomingUser.token; +}; + export function enqueue(req: Request) { - // All agnai.chat requests come from the same IP, so we allow them to have + let enqueuedRequestCount = 0; + let isGuest = req.user?.token === undefined; + + if (isGuest) { + enqueuedRequestCount = queue.filter(sameIpPredicate(req)).length; + } else { + enqueuedRequestCount = queue.filter(sameUserPredicate(req)).length; + } + + // All Agnai.chat requests come from the same IP, so we allow them to have // more spots in the queue. Can't make it unlimited because people will // intentionally abuse it. + // Authenticated users always get a single spot in the queue. const maxConcurrentQueuedRequests = - req.ip === AGNAI_DOT_CHAT_IP + isGuest && req.ip === AGNAI_DOT_CHAT_IP ? AGNAI_CONCURRENCY_LIMIT : USER_CONCURRENCY_LIMIT; - const reqCount = queue.filter((r) => r.ip === req.ip).length; - if (reqCount >= maxConcurrentQueuedRequests) { + if (enqueuedRequestCount >= maxConcurrentQueuedRequests) { if (req.ip === AGNAI_DOT_CHAT_IP) { // Re-enqueued requests are not counted towards the limit since they // already made it through the queue once. @@ -48,7 +65,7 @@ export function enqueue(req: Request) { throw new Error("Too many agnai.chat requests are already queued"); } } else { - throw new Error("Request is already queued for this IP"); + throw new Error("Your IP or token already has a request in the queue"); } } diff --git a/src/proxy/rate-limit.ts b/src/proxy/rate-limit.ts index db751f5..c4f7eed 100644 --- a/src/proxy/rate-limit.ts +++ b/src/proxy/rate-limit.ts @@ -66,12 +66,16 @@ export const ipLimiter = (req: Request, res: Response, next: NextFunction) => { return; } - const { remaining, reset } = getStatus(req.ip); + // If user is authenticated, key rate limiting by their token. Otherwise, key + // rate limiting by their IP address. Mitigates key sharing. + const rateLimitKey = req.user?.token || req.ip; + + const { remaining, reset } = getStatus(rateLimitKey); res.set("X-RateLimit-Limit", config.modelRateLimit.toString()); res.set("X-RateLimit-Remaining", remaining.toString()); res.set("X-RateLimit-Reset", reset.toString()); - const tryAgainInMs = getTryAgainInMs(req.ip); + const tryAgainInMs = getTryAgainInMs(rateLimitKey); if (tryAgainInMs > 0) { res.set("Retry-After", tryAgainInMs.toString()); res.status(429).json({ diff --git a/src/server.ts b/src/server.ts index d368ab2..6e89363 100644 --- a/src/server.ts +++ b/src/server.ts @@ -35,6 +35,8 @@ app.use( "req.headers.authorization", 'req.headers["x-forwarded-for"]', 'req.headers["x-real-ip"]', + 'req.headers["true-client-ip"]', + 'req.headers["cf-connecting-ip"]', ], censor: "********", }, @@ -85,7 +87,7 @@ app.use((_req: unknown, res: express.Response) => { async function start() { logger.info("Server starting up..."); - setGitSha(); + setBuildInfo(); logger.info("Checking configs and external dependencies..."); await assertConfigIsValid(); @@ -112,7 +114,7 @@ async function start() { }); logger.info( - { sha: process.env.COMMIT_SHA, nodeEnv: process.env.NODE_ENV }, + { build: process.env.BUILD_INFO, nodeEnv: process.env.NODE_ENV }, "Startup complete." ); } @@ -132,15 +134,16 @@ function registerUncaughtExceptionHandler() { }); } -function setGitSha() { +function setBuildInfo() { // On Render, the .git directory isn't available in the docker build context // so we can't get the SHA directly, but they expose it as an env variable. if (process.env.RENDER) { - const shaString = `${process.env.RENDER_GIT_COMMIT?.slice(0, 7)} (${ - process.env.RENDER_GIT_REPO_SLUG - })`; - process.env.COMMIT_SHA = shaString; - logger.info({ sha: shaString }, "Got commit SHA via Render config."); + const sha = process.env.RENDER_GIT_COMMIT?.slice(0, 7) || "unknown SHA"; + const branch = process.env.RENDER_GIT_BRANCH || "unknown branch"; + const repo = process.env.RENDER_GIT_REPO_SLUG || "unknown repo"; + const buildInfo = `${sha} (${branch}@${repo})`; + process.env.BUILD_INFO = buildInfo; + logger.info({ build: buildInfo }, "Got build info from Render config."); return; } @@ -171,7 +174,7 @@ function setGitSha() { logger.info({ sha, status, changes }, "Got commit SHA and status."); - process.env.COMMIT_SHA = `${sha}${changes ? " (modified)" : ""}`; + process.env.BUILD_INFO = `${sha}${changes ? " (modified)" : ""}`; } catch (error: any) { logger.error( { @@ -182,7 +185,7 @@ function setGitSha() { "Failed to get commit SHA.", error ); - process.env.COMMIT_SHA = "unknown"; + process.env.BUILD_INFO = "unknown"; } }