improves clarity of errors sent back to streaming clients

2023-10-03 19:45:15 -05:00 · 2023-10-03 19:45:15 -05:00 · 5033d00444
parent ba0b20617e
commit 5033d00444
6 changed files with 110 additions and 119 deletions
--- a/src/proxy/middleware/common.ts
+++ b/src/proxy/middleware/common.ts
@ -1,6 +1,8 @@
 import { Request, Response } from "express";
 import httpProxy from "http-proxy";
 import { ZodError } from "zod";
+import { generateErrorMessage } from "zod-error";
+import { buildFakeSse } from "../../shared/streaming";
 import { assertNever } from "../../shared/utils";
 import { QuotaExceededError } from "./request/apply-quota-limits";

@ -44,16 +46,9 @@ export function writeErrorResponse(
    res.headersSent ||
    String(res.getHeader("content-type")).startsWith("text/event-stream")
  ) {
-    const errorContent =
-      statusCode === 403
-        ? JSON.stringify(errorPayload)
-        : JSON.stringify(errorPayload, null, 2);
-
-    const msg = buildFakeSseMessage(
-      `${errorSource} error (${statusCode})`,
-      errorContent,
-      req
-    );
+    const errorTitle = `${errorSource} error (${statusCode})`;
+    const errorContent = JSON.stringify(errorPayload, null, 2);
+    const msg = buildFakeSse(errorTitle, errorContent, req);
    res.write(msg);
    res.write(`data: [DONE]\n\n`);
    res.end();
@ -66,110 +61,98 @@ export function writeErrorResponse(
 }

 export const handleProxyError: httpProxy.ErrorCallback = (err, req, res) => {
-  req.log.error({ err }, `Error during proxy request middleware`);
-  handleInternalError(err, req as Request, res as Response);
+  req.log.error(err, `Error during http-proxy-middleware request`);
+  classifyErrorAndSend(err, req as Request, res as Response);
 };

-export const handleInternalError = (
+export const classifyErrorAndSend = (
  err: Error,
  req: Request,
  res: Response
 ) => {
  try {
-    if (err instanceof ZodError) {
-      writeErrorResponse(req, res, 400, {
-        error: {
-          type: "proxy_validation_error",
-          proxy_note: `Reverse proxy couldn't validate your request when trying to transform it. Your client may be sending invalid data.`,
-          issues: err.issues,
-          stack: err.stack,
-          message: err.message,
-        },
-      });
-    } else if (err.name === "ForbiddenError") {
-      // Spoofs a vaguely threatening OpenAI error message. Only invoked by the
-      // block-zoomers rewriter to scare off tiktokers.
-      writeErrorResponse(req, res, 403, {
-        error: {
-          type: "organization_account_disabled",
-          code: "policy_violation",
-          param: null,
-          message: err.message,
-        },
-      });
-    } else if (err instanceof QuotaExceededError) {
-      writeErrorResponse(req, res, 429, {
-        error: {
-          type: "proxy_quota_exceeded",
-          code: "quota_exceeded",
-          message: `You've exceeded your token quota for this model type.`,
-          info: err.quotaInfo,
-          stack: err.stack,
-        },
-      });
-    } else {
-      writeErrorResponse(req, res, 500, {
-        error: {
-          type: "proxy_internal_error",
-          proxy_note: `Reverse proxy encountered an error before it could reach the upstream API.`,
-          message: err.message,
-          stack: err.stack,
-        },
-      });
-    }
-  } catch (e) {
-    req.log.error(
-      { error: e },
-      `Error writing error response headers, giving up.`
-    );
+    const { status, userMessage, ...errorDetails } = classifyError(err);
+    writeErrorResponse(req, res, status, {
+      error: { message: userMessage, ...errorDetails },
+    });
+  } catch (error) {
+    req.log.error(error, `Error writing error response headers, giving up.`);
  }
 };

-export function buildFakeSseMessage(
-  type: string,
-  string: string,
-  req: Request
-) {
-  let fakeEvent;
-  const content = `\`\`\`\n[${type}: ${string}]\n\`\`\`\n`;
+function classifyError(err: Error): {
+  /** HTTP status code returned to the client. */
+  status: number;
+  /** Message displayed to the user. */
+  userMessage: string;
+  /** Short error type, e.g. "proxy_validation_error". */
+  type: string;
+} & Record<string, any> {
+  const defaultError = {
+    status: 500,
+    userMessage: `Reverse proxy encountered an unexpected error. (${err.message})`,
+    type: "proxy_internal_error",
+    stack: err.stack,
+  };

-  switch (req.inboundApi) {
-    case "openai":
-      fakeEvent = {
-        id: "chatcmpl-" + req.id,
-        object: "chat.completion.chunk",
-        created: Date.now(),
-        model: req.body?.model,
-        choices: [{ delta: { content }, index: 0, finish_reason: type }],
+  switch (err.constructor.name) {
+    case "ZodError":
+      const userMessage = generateErrorMessage((err as ZodError).issues, {
+        prefix: "Request validation failed. ",
+        path: { enabled: true, label: null, type: "breadcrumbs" },
+        code: { enabled: false },
+        maxErrors: 3,
+        transform: ({ issue, ...rest }) => {
+          return `At '${rest.pathComponent}', ${issue.message}`;
+        },
+      });
+      return { status: 400, userMessage, type: "proxy_validation_error" };
+    case "ForbiddenError":
+      // Mimics a ban notice from OpenAI, thrown when blockZoomerOrigins blocks
+      // a request.
+      return {
+        status: 403,
+        userMessage: `Your account has been disabled for violating our terms of service.`,
+        type: "organization_account_disabled",
+        code: "policy_violation",
      };
-      break;
-    case "openai-text":
-      fakeEvent = {
-        id: "cmpl-" + req.id,
-        object: "text_completion",
-        created: Date.now(),
-        choices: [
-          { text: content, index: 0, logprobs: null, finish_reason: type },
-        ],
-        model: req.body?.model,
+    case "QuotaExceededError":
+      return {
+        status: 429,
+        userMessage: `You've exceeded your token quota for this model type.`,
+        type: "proxy_quota_exceeded",
+        info: (err as QuotaExceededError).quotaInfo,
      };
-      break;
-    case "anthropic":
-      fakeEvent = {
-        completion: content,
-        stop_reason: type,
-        truncated: false, // I've never seen this be true
-        stop: null,
-        model: req.body?.model,
-        log_id: "proxy-req-" + req.id,
-      };
-      break;
-    case "google-palm":
-      throw new Error("PaLM not supported as an inbound API format");
+    case "Error":
+      if ("code" in err) {
+        switch (err.code) {
+          case "ENOTFOUND":
+            return {
+              status: 502,
+              userMessage: `Reverse proxy encountered a DNS error while trying to connect to the upstream service.`,
+              type: "proxy_network_error",
+              code: err.code,
+            };
+          case "ECONNREFUSED":
+            return {
+              status: 502,
+              userMessage: `Reverse proxy couldn't connect to the upstream service.`,
+              type: "proxy_network_error",
+              code: err.code,
+            };
+          case "ECONNRESET":
+            return {
+              status: 504,
+              userMessage: `Reverse proxy timed out while waiting for the upstream service to respond.`,
+              type: "proxy_network_error",
+              code: err.code,
+            };
+        }
+      }
+      return defaultError;
    default:
-      assertNever(req.inboundApi);
+      return defaultError;
  }
-  return `data: ${JSON.stringify(fakeEvent)}\n\n`;
 }

 export function getCompletionFromBody(req: Request, body: Record<string, any>) {
--- a/src/proxy/middleware/request/preprocess.ts
+++ b/src/proxy/middleware/request/preprocess.ts
@ -1,5 +1,6 @@
 import { RequestHandler } from "express";
-import { handleInternalError } from "../common";
+import { initializeSseStream } from "../../../shared/streaming";
+import { classifyErrorAndSend } from "../common";
 import {
  RequestPreprocessor,
  validateContextSize,
@ -66,6 +67,13 @@ async function executePreprocessors(
    next();
  } catch (error) {
    req.log.error(error, "Error while executing request preprocessor");
-    handleInternalError(error as Error, req, res);
+
+    // If the requested has opted into streaming, the client probably won't
+    // handle a non-eventstream response, but we haven't initialized the SSE
+    // stream yet as that is typically done later by the request queue. We'll
+    // do that here and then call classifyErrorAndSend to use the streaming
+    // error handler.
+    initializeSseStream(res)
+    classifyErrorAndSend(error as Error, req, res);
  }
 }
--- a/src/proxy/middleware/request/validate-context-size.ts
+++ b/src/proxy/middleware/request/validate-context-size.ts
@ -67,12 +67,14 @@ export const validateContextSize: RequestPreprocessor = async (req) => {
  }

  const finalMax = Math.min(proxyMax, modelMax);
-  z.number()
-    .int()
-    .max(finalMax, {
-      message: `Your request exceeds the context size limit for this model or proxy. (max: ${finalMax} tokens, requested: ${promptTokens} prompt + ${outputTokens} output = ${contextTokens} context tokens)`,
-    })
-    .parse(contextTokens);
+  z.object({
+    tokens: z
+      .number()
+      .int()
+      .max(finalMax, {
+        message: `Your request exceeds the context size limit. (max: ${finalMax} tokens, requested: ${promptTokens} prompt + ${outputTokens} output = ${contextTokens} context tokens)`,
+      }),
+  }).parse({ tokens: contextTokens });

  req.log.debug(
    { promptTokens, outputTokens, contextTokens, modelMax, proxyMax },
--- a/src/proxy/middleware/response/handle-streamed-response.ts
+++ b/src/proxy/middleware/response/handle-streamed-response.ts
@ -1,14 +1,14 @@
 import { pipeline } from "stream";
 import { promisify } from "util";
-import { buildFakeSseMessage } from "../common";
+import {
+  buildFakeSse,
+  copySseResponseHeaders,
+  initializeSseStream
+} from "../../../shared/streaming";
 import { decodeResponseBody, RawResponseBodyHandler } from ".";
 import { SSEStreamAdapter } from "./streaming/sse-stream-adapter";
 import { SSEMessageTransformer } from "./streaming/sse-message-transformer";
 import { EventAggregator } from "./streaming/event-aggregator";
-import {
-  copySseResponseHeaders,
-  initializeSseStream,
-} from "../../../shared/streaming";

 const pipelineAsync = promisify(pipeline);

@ -79,7 +79,7 @@ export const handleStreamedResponse: RawResponseBodyHandler = async (
    res.end();
    return aggregator.getFinalResponse();
  } catch (err) {
-    const errorEvent = buildFakeSseMessage("stream-error", err.message, req);
+    const errorEvent = buildFakeSse("stream-error", err.message, req);
    res.write(`${errorEvent}data: [DONE]\n\n`);
    res.end();
    throw err;
--- a/src/proxy/middleware/response/streaming/index.ts
+++ b/src/proxy/middleware/response/streaming/index.ts
@ -28,4 +28,3 @@ export { anthropicV2ToOpenAI } from "./transformers/anthropic-v2-to-openai";
 export { mergeEventsForOpenAIChat } from "./aggregators/openai-chat";
 export { mergeEventsForOpenAIText } from "./aggregators/openai-text";
 export { mergeEventsForAnthropic } from "./aggregators/anthropic";
-
--- a/src/proxy/queue.ts
+++ b/src/proxy/queue.ts
@ -23,11 +23,10 @@ import {
  getOpenAIModelFamily,
  ModelFamily,
 } from "../shared/models";
-import { initializeSseStream } from "../shared/streaming";
+import { buildFakeSse, initializeSseStream } from "../shared/streaming";
 import { assertNever } from "../shared/utils";
 import { logger } from "../logger";
 import { AGNAI_DOT_CHAT_IP } from "./rate-limit";
-import { buildFakeSseMessage } from "./middleware/common";

 const queue: Request[] = [];
 const log = logger.child({ module: "request-queue" });
@ -109,7 +108,7 @@ export function enqueue(req: Request) {
        const avgWait = Math.round(getEstimatedWaitTime(partition) / 1000);
        const currentDuration = Math.round((Date.now() - req.startTime) / 1000);
        const debugMsg = `queue length: ${queue.length}; elapsed time: ${currentDuration}s; avg wait: ${avgWait}s`;
-        req.res!.write(buildFakeSseMessage("heartbeat", debugMsg, req));
+        req.res!.write(buildFakeSse("heartbeat", debugMsg, req));
      }
    }, 10000);
  }
@ -337,7 +336,7 @@ function killQueuedRequest(req: Request) {
  try {
    const message = `Your request has been terminated by the proxy because it has been in the queue for more than 5 minutes. The queue is currently ${queue.length} requests long.`;
    if (res.headersSent) {
-      const fakeErrorEvent = buildFakeSseMessage(
+      const fakeErrorEvent = buildFakeSse(
        "proxy queue error",
        message,
        req
@ -363,7 +362,7 @@ function initStreaming(req: Request) {
    return;
  }

-  res.write(": joining queue\n\n");
+  res.write(`: joining queue at position ${queue.length}\n\n`);
 }

 /**