removes QUEUE_MODE config (now always enabled)

This commit is contained in:
nai-degen 2023-08-09 18:20:19 -05:00
parent 5d3fb6af3a
commit 6bb67281d9
7 changed files with 38 additions and 92 deletions

View File

@ -11,7 +11,6 @@
# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy." # REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
# CHECK_KEYS=true # CHECK_KEYS=true
# QUOTA_DISPLAY_MODE=full # QUOTA_DISPLAY_MODE=full
# QUEUE_MODE=fair
# BLOCKED_ORIGINS=reddit.com,9gag.com # BLOCKED_ORIGINS=reddit.com,9gag.com
# BLOCK_MESSAGE="You must be over the age of majority in your country to use this service." # BLOCK_MESSAGE="You must be over the age of majority in your country to use this service."
# BLOCK_REDIRECT="https://roblox.com/" # BLOCK_REDIRECT="https://roblox.com/"

View File

@ -9,7 +9,6 @@ const startupLogger = pino({ level: "debug" }).child({ module: "startup" });
const isDev = process.env.NODE_ENV !== "production"; const isDev = process.env.NODE_ENV !== "production";
type PromptLoggingBackend = "google_sheets"; type PromptLoggingBackend = "google_sheets";
export type DequeueMode = "fair" | "random" | "none";
type Config = { type Config = {
/** The port the proxy server will listen on. */ /** The port the proxy server will listen on. */
@ -107,16 +106,6 @@ type Config = {
* `full`: Displays information about keys' quota limits * `full`: Displays information about keys' quota limits
*/ */
quotaDisplayMode: "none" | "full"; quotaDisplayMode: "none" | "full";
/**
* Which request queueing strategy to use when keys are over their rate limit.
*
* `fair`: Requests are serviced in the order they were received (default)
*
* `random`: Requests are serviced randomly
*
* `none`: Requests are not queued and users have to retry manually
*/
queueMode: DequeueMode;
/** /**
* Comma-separated list of origins to block. Requests matching any of these * Comma-separated list of origins to block. Requests matching any of these
* origins or referers will be rejected. * origins or referers will be rejected.
@ -179,7 +168,6 @@ export const config: Config = {
"GOOGLE_SHEETS_SPREADSHEET_ID", "GOOGLE_SHEETS_SPREADSHEET_ID",
undefined undefined
), ),
queueMode: getEnvWithDefault("QUEUE_MODE", "fair"),
blockedOrigins: getEnvWithDefault("BLOCKED_ORIGINS", undefined), blockedOrigins: getEnvWithDefault("BLOCKED_ORIGINS", undefined),
blockMessage: getEnvWithDefault( blockMessage: getEnvWithDefault(
"BLOCK_MESSAGE", "BLOCK_MESSAGE",

View File

@ -148,7 +148,6 @@ function getOpenAIInfo() {
}; };
} }
if (config.queueMode !== "none") {
const turboQueue = getQueueInformation("turbo"); const turboQueue = getQueueInformation("turbo");
info.turbo.proomptersInQueue = turboQueue.proomptersInQueue; info.turbo.proomptersInQueue = turboQueue.proomptersInQueue;
@ -159,7 +158,6 @@ function getOpenAIInfo() {
info.gpt4.proomptersInQueue = gpt4Queue.proomptersInQueue; info.gpt4.proomptersInQueue = gpt4Queue.proomptersInQueue;
info.gpt4.estimatedQueueTime = gpt4Queue.estimatedQueueTime; info.gpt4.estimatedQueueTime = gpt4Queue.estimatedQueueTime;
} }
}
return info; return info;
} }
@ -168,11 +166,9 @@ function getAnthropicInfo() {
const claudeInfo: Partial<ServiceInfo> = {}; const claudeInfo: Partial<ServiceInfo> = {};
const keys = keyPool.list().filter((k) => k.service === "anthropic"); const keys = keyPool.list().filter((k) => k.service === "anthropic");
claudeInfo.activeKeys = keys.filter((k) => !k.isDisabled).length; claudeInfo.activeKeys = keys.filter((k) => !k.isDisabled).length;
if (config.queueMode !== "none") {
const queue = getQueueInformation("claude"); const queue = getQueueInformation("claude");
claudeInfo.proomptersInQueue = queue.proomptersInQueue; claudeInfo.proomptersInQueue = queue.proomptersInQueue;
claudeInfo.estimatedQueueTime = queue.estimatedQueueTime; claudeInfo.estimatedQueueTime = queue.estimatedQueueTime;
}
return { claude: claudeInfo }; return { claude: claudeInfo };
} }
@ -198,7 +194,6 @@ Logs are anonymous and do not contain IP addresses or timestamps. [You can see t
**If you are uncomfortable with this, don't send prompts to this proxy!**`; **If you are uncomfortable with this, don't send prompts to this proxy!**`;
} }
if (config.queueMode !== "none") {
const waits: string[] = []; const waits: string[] = [];
infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`; infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`;
@ -216,7 +211,6 @@ Logs are anonymous and do not contain IP addresses or timestamps. [You can see t
waits.push(`**Claude:** ${claudeWait}`); waits.push(`**Claude:** ${claudeWait}`);
} }
infoBody += "\n\n" + waits.join(" / "); infoBody += "\n\n" + waits.join(" / ");
}
if (customGreeting) { if (customGreeting) {
infoBody += `\n## Server Greeting\n infoBody += `\n## Server Greeting\n
@ -227,9 +221,6 @@ ${customGreeting}`;
/** Returns queue time in seconds, or minutes + seconds if over 60 seconds. */ /** Returns queue time in seconds, or minutes + seconds if over 60 seconds. */
function getQueueInformation(partition: QueuePartition) { function getQueueInformation(partition: QueuePartition) {
if (config.queueMode === "none") {
return {};
}
const waitMs = getEstimatedWaitTime(partition); const waitMs = getEstimatedWaitTime(partition);
const waitTime = const waitTime =
waitMs < 60000 waitMs < 60000

View File

@ -33,12 +33,6 @@ const rewriteRequest = (
req: Request, req: Request,
res: Response res: Response
) => { ) => {
if (config.queueMode !== "none") {
const msg = `Queueing is enabled on this proxy instance and is incompatible with the KoboldAI endpoint. Use the OpenAI endpoint instead.`;
proxyReq.destroy(new Error(msg));
return;
}
req.body.stream = false; req.body.stream = false;
const rewriterPipeline = [ const rewriterPipeline = [
addKey, addKey,

View File

@ -341,11 +341,8 @@ function maybeHandleMissingPreambleError(
"Request failed due to missing preamble. Key will be marked as such for subsequent requests." "Request failed due to missing preamble. Key will be marked as such for subsequent requests."
); );
keyPool.update(req.key!, { requiresPreamble: true }); keyPool.update(req.key!, { requiresPreamble: true });
if (config.queueMode !== "none") {
reenqueueRequest(req); reenqueueRequest(req);
throw new RetryableError("Claude request re-enqueued to add preamble."); throw new RetryableError("Claude request re-enqueued to add preamble.");
}
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
} else { } else {
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`; errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
} }
@ -357,11 +354,8 @@ function handleAnthropicRateLimitError(
) { ) {
if (errorPayload.error?.type === "rate_limit_error") { if (errorPayload.error?.type === "rate_limit_error") {
keyPool.markRateLimited(req.key!); keyPool.markRateLimited(req.key!);
if (config.queueMode !== "none") {
reenqueueRequest(req); reenqueueRequest(req);
throw new RetryableError("Claude rate-limited request re-enqueued."); throw new RetryableError("Claude rate-limited request re-enqueued.");
}
errorPayload.proxy_note = `There are too many in-flight requests for this key. Try again later.`;
} else { } else {
errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`; errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`;
} }
@ -388,13 +382,11 @@ function handleOpenAIRateLimitError(
} else if (type === "requests" || type === "tokens") { } else if (type === "requests" || type === "tokens") {
// Per-minute request or token rate limit is exceeded, which we can retry // Per-minute request or token rate limit is exceeded, which we can retry
keyPool.markRateLimited(req.key!); keyPool.markRateLimited(req.key!);
if (config.queueMode !== "none") { // I'm aware this is confusing -- throwing this class of error will cause
// the proxy response handler to return without terminating the request,
// so that it can be placed back in the queue.
reenqueueRequest(req); reenqueueRequest(req);
// This is confusing, but it will bubble up to the top-level response
// handler and cause the request to go back into the request queue.
throw new RetryableError("Rate-limited request re-enqueued."); throw new RetryableError("Rate-limited request re-enqueued.");
}
errorPayload.proxy_note = `Assigned key's '${type}' rate limit has been exceeded. Try again later.`;
} else { } else {
// OpenAI probably overloaded // OpenAI probably overloaded
errorPayload.proxy_note = `This is likely a temporary error with OpenAI. Try again in a few seconds.`; errorPayload.proxy_note = `This is likely a temporary error with OpenAI. Try again in a few seconds.`;

View File

@ -16,7 +16,6 @@
*/ */
import type { Handler, Request } from "express"; import type { Handler, Request } from "express";
import { config, DequeueMode } from "../config";
import { keyPool, SupportedModel } from "../key-management"; import { keyPool, SupportedModel } from "../key-management";
import { logger } from "../logger"; import { logger } from "../logger";
import { AGNAI_DOT_CHAT_IP } from "./rate-limit"; import { AGNAI_DOT_CHAT_IP } from "./rate-limit";
@ -27,8 +26,6 @@ export type QueuePartition = "claude" | "turbo" | "gpt-4";
const queue: Request[] = []; const queue: Request[] = [];
const log = logger.child({ module: "request-queue" }); const log = logger.child({ module: "request-queue" });
let dequeueMode: DequeueMode = "fair";
/** Maximum number of queue slots for Agnai.chat requests. */ /** Maximum number of queue slots for Agnai.chat requests. */
const AGNAI_CONCURRENCY_LIMIT = 15; const AGNAI_CONCURRENCY_LIMIT = 15;
/** Maximum number of queue slots for individual users. */ /** Maximum number of queue slots for individual users. */
@ -160,18 +157,9 @@ export function dequeue(partition: QueuePartition): Request | undefined {
return undefined; return undefined;
} }
let req: Request; const req = modelQueue.reduce((prev, curr) =>
if (dequeueMode === "fair") {
// Dequeue the request that has been waiting the longest
req = modelQueue.reduce((prev, curr) =>
prev.startTime < curr.startTime ? prev : curr prev.startTime < curr.startTime ? prev : curr
); );
} else {
// Dequeue a random request
const index = Math.floor(Math.random() * modelQueue.length);
req = modelQueue[index];
}
queue.splice(queue.indexOf(req), 1); queue.splice(queue.indexOf(req), 1);
if (req.onAborted) { if (req.onAborted) {
@ -293,10 +281,6 @@ export function getQueueLength(partition: QueuePartition | "all" = "all") {
export function createQueueMiddleware(proxyMiddleware: Handler): Handler { export function createQueueMiddleware(proxyMiddleware: Handler): Handler {
return (req, res, next) => { return (req, res, next) => {
if (config.queueMode === "none") {
return proxyMiddleware(req, res, next);
}
req.proceed = () => { req.proceed = () => {
proxyMiddleware(req, res, next); proxyMiddleware(req, res, next);
}; };

View File

@ -102,10 +102,8 @@ async function start() {
logQueue.start(); logQueue.start();
} }
if (config.queueMode !== "none") {
logger.info("Starting request queue..."); logger.info("Starting request queue...");
startRequestQueue(); startRequestQueue();
}
app.listen(PORT, async () => { app.listen(PORT, async () => {
logger.info({ port: PORT }, "Now listening for connections."); logger.info({ port: PORT }, "Now listening for connections.");