removes QUEUE_MODE config (now always enabled)

This commit is contained in:
nai-degen 2023-08-09 18:20:19 -05:00
parent 5d3fb6af3a
commit 6bb67281d9
7 changed files with 38 additions and 92 deletions

View File

@ -11,7 +11,6 @@
# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
# CHECK_KEYS=true
# QUOTA_DISPLAY_MODE=full
# QUEUE_MODE=fair
# BLOCKED_ORIGINS=reddit.com,9gag.com
# BLOCK_MESSAGE="You must be over the age of majority in your country to use this service."
# BLOCK_REDIRECT="https://roblox.com/"

View File

@ -9,7 +9,6 @@ const startupLogger = pino({ level: "debug" }).child({ module: "startup" });
const isDev = process.env.NODE_ENV !== "production";
type PromptLoggingBackend = "google_sheets";
export type DequeueMode = "fair" | "random" | "none";
type Config = {
/** The port the proxy server will listen on. */
@ -107,16 +106,6 @@ type Config = {
* `full`: Displays information about keys' quota limits
*/
quotaDisplayMode: "none" | "full";
/**
* Which request queueing strategy to use when keys are over their rate limit.
*
* `fair`: Requests are serviced in the order they were received (default)
*
* `random`: Requests are serviced randomly
*
* `none`: Requests are not queued and users have to retry manually
*/
queueMode: DequeueMode;
/**
* Comma-separated list of origins to block. Requests matching any of these
* origins or referers will be rejected.
@ -179,7 +168,6 @@ export const config: Config = {
"GOOGLE_SHEETS_SPREADSHEET_ID",
undefined
),
queueMode: getEnvWithDefault("QUEUE_MODE", "fair"),
blockedOrigins: getEnvWithDefault("BLOCKED_ORIGINS", undefined),
blockMessage: getEnvWithDefault(
"BLOCK_MESSAGE",

View File

@ -148,17 +148,15 @@ function getOpenAIInfo() {
};
}
if (config.queueMode !== "none") {
const turboQueue = getQueueInformation("turbo");
const turboQueue = getQueueInformation("turbo");
info.turbo.proomptersInQueue = turboQueue.proomptersInQueue;
info.turbo.estimatedQueueTime = turboQueue.estimatedQueueTime;
info.turbo.proomptersInQueue = turboQueue.proomptersInQueue;
info.turbo.estimatedQueueTime = turboQueue.estimatedQueueTime;
if (hasGpt4) {
const gpt4Queue = getQueueInformation("gpt-4");
info.gpt4.proomptersInQueue = gpt4Queue.proomptersInQueue;
info.gpt4.estimatedQueueTime = gpt4Queue.estimatedQueueTime;
}
if (hasGpt4) {
const gpt4Queue = getQueueInformation("gpt-4");
info.gpt4.proomptersInQueue = gpt4Queue.proomptersInQueue;
info.gpt4.estimatedQueueTime = gpt4Queue.estimatedQueueTime;
}
return info;
@ -168,11 +166,9 @@ function getAnthropicInfo() {
const claudeInfo: Partial<ServiceInfo> = {};
const keys = keyPool.list().filter((k) => k.service === "anthropic");
claudeInfo.activeKeys = keys.filter((k) => !k.isDisabled).length;
if (config.queueMode !== "none") {
const queue = getQueueInformation("claude");
claudeInfo.proomptersInQueue = queue.proomptersInQueue;
claudeInfo.estimatedQueueTime = queue.estimatedQueueTime;
}
const queue = getQueueInformation("claude");
claudeInfo.proomptersInQueue = queue.proomptersInQueue;
claudeInfo.estimatedQueueTime = queue.estimatedQueueTime;
return { claude: claudeInfo };
}
@ -198,26 +194,24 @@ Logs are anonymous and do not contain IP addresses or timestamps. [You can see t
**If you are uncomfortable with this, don't send prompts to this proxy!**`;
}
if (config.queueMode !== "none") {
const waits: string[] = [];
infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`;
const waits: string[] = [];
infoBody += `\n## Estimated Wait Times\nIf the AI is busy, your prompt will processed when a slot frees up.`;
if (config.openaiKey) {
const turboWait = getQueueInformation("turbo").estimatedQueueTime;
const gpt4Wait = getQueueInformation("gpt-4").estimatedQueueTime;
waits.push(`**Turbo:** ${turboWait}`);
if (keyPool.list().some((k) => k.isGpt4) && !config.turboOnly) {
waits.push(`**GPT-4:** ${gpt4Wait}`);
}
if (config.openaiKey) {
const turboWait = getQueueInformation("turbo").estimatedQueueTime;
const gpt4Wait = getQueueInformation("gpt-4").estimatedQueueTime;
waits.push(`**Turbo:** ${turboWait}`);
if (keyPool.list().some((k) => k.isGpt4) && !config.turboOnly) {
waits.push(`**GPT-4:** ${gpt4Wait}`);
}
if (config.anthropicKey) {
const claudeWait = getQueueInformation("claude").estimatedQueueTime;
waits.push(`**Claude:** ${claudeWait}`);
}
infoBody += "\n\n" + waits.join(" / ");
}
if (config.anthropicKey) {
const claudeWait = getQueueInformation("claude").estimatedQueueTime;
waits.push(`**Claude:** ${claudeWait}`);
}
infoBody += "\n\n" + waits.join(" / ");
if (customGreeting) {
infoBody += `\n## Server Greeting\n
${customGreeting}`;
@ -227,9 +221,6 @@ ${customGreeting}`;
/** Returns queue time in seconds, or minutes + seconds if over 60 seconds. */
function getQueueInformation(partition: QueuePartition) {
if (config.queueMode === "none") {
return {};
}
const waitMs = getEstimatedWaitTime(partition);
const waitTime =
waitMs < 60000

View File

@ -33,12 +33,6 @@ const rewriteRequest = (
req: Request,
res: Response
) => {
if (config.queueMode !== "none") {
const msg = `Queueing is enabled on this proxy instance and is incompatible with the KoboldAI endpoint. Use the OpenAI endpoint instead.`;
proxyReq.destroy(new Error(msg));
return;
}
req.body.stream = false;
const rewriterPipeline = [
addKey,

View File

@ -341,11 +341,8 @@ function maybeHandleMissingPreambleError(
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
);
keyPool.update(req.key!, { requiresPreamble: true });
if (config.queueMode !== "none") {
reenqueueRequest(req);
throw new RetryableError("Claude request re-enqueued to add preamble.");
}
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
reenqueueRequest(req);
throw new RetryableError("Claude request re-enqueued to add preamble.");
} else {
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
}
@ -357,11 +354,8 @@ function handleAnthropicRateLimitError(
) {
if (errorPayload.error?.type === "rate_limit_error") {
keyPool.markRateLimited(req.key!);
if (config.queueMode !== "none") {
reenqueueRequest(req);
throw new RetryableError("Claude rate-limited request re-enqueued.");
}
errorPayload.proxy_note = `There are too many in-flight requests for this key. Try again later.`;
reenqueueRequest(req);
throw new RetryableError("Claude rate-limited request re-enqueued.");
} else {
errorPayload.proxy_note = `Unrecognized rate limit error from Anthropic. Key may be over quota.`;
}
@ -388,13 +382,11 @@ function handleOpenAIRateLimitError(
} else if (type === "requests" || type === "tokens") {
// Per-minute request or token rate limit is exceeded, which we can retry
keyPool.markRateLimited(req.key!);
if (config.queueMode !== "none") {
reenqueueRequest(req);
// This is confusing, but it will bubble up to the top-level response
// handler and cause the request to go back into the request queue.
throw new RetryableError("Rate-limited request re-enqueued.");
}
errorPayload.proxy_note = `Assigned key's '${type}' rate limit has been exceeded. Try again later.`;
// I'm aware this is confusing -- throwing this class of error will cause
// the proxy response handler to return without terminating the request,
// so that it can be placed back in the queue.
reenqueueRequest(req);
throw new RetryableError("Rate-limited request re-enqueued.");
} else {
// OpenAI probably overloaded
errorPayload.proxy_note = `This is likely a temporary error with OpenAI. Try again in a few seconds.`;

View File

@ -16,7 +16,6 @@
*/
import type { Handler, Request } from "express";
import { config, DequeueMode } from "../config";
import { keyPool, SupportedModel } from "../key-management";
import { logger } from "../logger";
import { AGNAI_DOT_CHAT_IP } from "./rate-limit";
@ -27,8 +26,6 @@ export type QueuePartition = "claude" | "turbo" | "gpt-4";
const queue: Request[] = [];
const log = logger.child({ module: "request-queue" });
let dequeueMode: DequeueMode = "fair";
/** Maximum number of queue slots for Agnai.chat requests. */
const AGNAI_CONCURRENCY_LIMIT = 15;
/** Maximum number of queue slots for individual users. */
@ -160,18 +157,9 @@ export function dequeue(partition: QueuePartition): Request | undefined {
return undefined;
}
let req: Request;
if (dequeueMode === "fair") {
// Dequeue the request that has been waiting the longest
req = modelQueue.reduce((prev, curr) =>
prev.startTime < curr.startTime ? prev : curr
);
} else {
// Dequeue a random request
const index = Math.floor(Math.random() * modelQueue.length);
req = modelQueue[index];
}
const req = modelQueue.reduce((prev, curr) =>
prev.startTime < curr.startTime ? prev : curr
);
queue.splice(queue.indexOf(req), 1);
if (req.onAborted) {
@ -293,10 +281,6 @@ export function getQueueLength(partition: QueuePartition | "all" = "all") {
export function createQueueMiddleware(proxyMiddleware: Handler): Handler {
return (req, res, next) => {
if (config.queueMode === "none") {
return proxyMiddleware(req, res, next);
}
req.proceed = () => {
proxyMiddleware(req, res, next);
};

View File

@ -102,10 +102,8 @@ async function start() {
logQueue.start();
}
if (config.queueMode !== "none") {
logger.info("Starting request queue...");
startRequestQueue();
}
logger.info("Starting request queue...");
startRequestQueue();
app.listen(PORT, async () => {
logger.info({ port: PORT }, "Now listening for connections.");