Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)

2023-06-09 07:16:40 +00:00 · 2023-06-09 07:16:40 +00:00 · 96cf4a0e2d
parent 4f2a12ef14
commit 96cf4a0e2d
8 changed files with 122 additions and 16 deletions
--- a/src/key-management/anthropic/provider.ts
+++ b/src/key-management/anthropic/provider.ts
@ -11,12 +11,30 @@ export const ANTHROPIC_SUPPORTED_MODELS = [
 ] as const;
 export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];

+export type AnthropicKeyUpdate = Omit<
+  Partial<AnthropicKey>,
+  | "key"
+  | "hash"
+  | "lastUsed"
+  | "promptCount"
+  | "rateLimitedAt"
+  | "rateLimitedUntil"
+>;
+
 export interface AnthropicKey extends Key {
  readonly service: "anthropic";
  /** The time at which this key was last rate limited. */
  rateLimitedAt: number;
  /** The time until which this key is rate limited. */
  rateLimitedUntil: number;
+  /**
+   * Whether this key requires a special preamble.  For unclear reasons, some
+   * Anthropic keys will throw an error if the prompt does not begin with a
+   * message from the user, whereas others can be used without a preamble. This
+   * is despite using the same API endpoint, version, and model.
+   * When a key returns this particular error, we set this flag to true.
+   */
+  requiresPreamble: boolean;
 }

 /**
@ -52,6 +70,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
        lastUsed: 0,
        rateLimitedAt: 0,
        rateLimitedUntil: 0,
+        requiresPreamble: false,
        hash: `ant-${crypto
          .createHash("sha256")
          .update(key)
@ -119,6 +138,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    this.log.warn({ key: key.hash }, "Key disabled");
  }

+  public update(hash: string, update: Partial<AnthropicKey>) {
+    const keyFromPool = this.keys.find((k) => k.hash === hash)!;
+    Object.assign(keyFromPool, update);
+  }
+
  public available() {
    return this.keys.filter((k) => !k.isDisabled).length;
  }
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@ -47,6 +47,7 @@ export interface KeyProvider<T extends Key = Key> {
  get(model: Model): T;
  list(): Omit<T, "key">[];
  disable(key: T): void;
+  update(hash: string, update: Partial<T>): void;
  available(): number;
  anyUnchecked(): boolean;
  incrementPrompt(hash: string): void;
@ -63,3 +64,5 @@ export const SUPPORTED_MODELS = [
 ] as const;
 export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
 export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS };
+export { AnthropicKey } from "./anthropic/provider";
+export { OpenAIKey } from "./openai/provider";
--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@ -1,7 +1,9 @@
 import type * as http from "http";
-import { AnthropicKeyProvider } from "./anthropic/provider";
-import { Key, AIService, Model, KeyProvider } from "./index";
-import { OpenAIKeyProvider } from "./openai/provider";
+import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider";
+import { Key, Model, KeyProvider, AIService } from "./index";
+import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
+
+type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;

 export class KeyPool {
  private keyProviders: KeyProvider[] = [];
@ -35,6 +37,11 @@ export class KeyPool {
    service.disable(key);
  }

+  public update(key: Key, props: AllowedPartial): void {
+    const service = this.getKeyProvider(key.service);
+    service.update(key.hash, props);
+  }
+
  public available(service: AIService | "all" = "all"): number {
    return this.keyProviders.reduce((sum, provider) => {
      const includeProvider = service === "all" || service === provider.service;
--- a/src/proxy/anthropic.ts
+++ b/src/proxy/anthropic.ts
@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
 import { handleProxyError } from "./middleware/common";
 import {
  addKey,
+  addAnthropicPreamble,
  createPreprocessorMiddleware,
  finalizeBody,
  languageFilter,
@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
 ) => {
  const rewriterPipeline = [
    addKey,
+    addAnthropicPreamble,
    languageFilter,
    limitOutputTokens,
    finalizeBody,
--- a/src/proxy/middleware/request/add-anthropic-preamble.ts
+++ b/src/proxy/middleware/request/add-anthropic-preamble.ts
@ -0,0 +1,32 @@
+import { AnthropicKey, Key } from "../../../key-management";
+import { isCompletionRequest } from "../common";
+import { ProxyRequestMiddleware } from ".";
+
+/**
+ * Some keys require the prompt to start with `\n\nHuman:`. There is no way to
+ * know this without trying to send the request and seeing if it fails. If a
+ * key is marked as requiring a preamble, it will be added here.
+ */
+export const addAnthropicPreamble: ProxyRequestMiddleware = (
+  _proxyReq,
+  req
+) => {
+  if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
+    return;
+  }
+
+  let preamble = "";
+  let prompt = req.body.prompt;
+  assertAnthropicKey(req.key);
+  if (req.key.requiresPreamble) {
+    preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
+    req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
+  }
+  req.body.prompt = preamble + prompt;
+};
+
+function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
+  if (key.service !== "anthropic") {
+    throw new Error(`Expected an Anthropic key, got '${key.service}'`);
+  }
+}
--- a/src/proxy/middleware/request/index.ts
+++ b/src/proxy/middleware/request/index.ts
@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";

 // HPM middleware (runs on onProxyReq, cannot be async)
 export { addKey } from "./add-key";
+export { addAnthropicPreamble } from "./add-anthropic-preamble";
 export { finalizeBody } from "./finalize-body";
 export { languageFilter } from "./language-filter";
 export { limitCompletions } from "./limit-completions";
--- a/src/proxy/middleware/request/transform-outbound-payload.ts
+++ b/src/proxy/middleware/request/transform-outbound-payload.ts
@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
  // Remove duplicates
  stops = [...new Set(stops)];

-  // TEMP: More shitty anthropic API hacks
-  // If you receive a 400 Bad Request error from Anthropic complaining about
-  // "prompt must start with a '\n\nHuman: ' turn", enable this setting.
-  // I will try to fix this when I can identify why it only happens sometimes.
-  let preamble = "";
-  if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
-    preamble = "\n\nHuman: Hello Claude.";
-  }
-
  return {
    ...rest,
    model,
-    prompt: preamble + prompt,
+    prompt: prompt,
    max_tokens_to_sample: rest.max_tokens,
    stop_sequences: stops,
  };
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
 function reenqueueRequest(req: Request) {
  req.log.info(
    { key: req.key?.hash, retryCount: req.retryCount },
-    `Re-enqueueing request due to rate-limit error`
+    `Re-enqueueing request due to retryable error`
  );
  req.retryCount++;
  enqueue(req);
@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (

  if (statusCode === 400) {
    // Bad request (likely prompt is too long)
+    if (req.outboundApi === "openai") {
      errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
+    } else if (req.outboundApi === "anthropic") {
+      maybeHandleMissingPreambleError(req, errorPayload);
+    }
  } else if (statusCode === 401) {
    // Key is invalid or was revoked
    keyPool.disable(req.key!);
@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    // OpenAI uses this for a bunch of different rate-limiting scenarios.
    if (req.outboundApi === "openai") {
      handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
-    } else {
+    } else if (req.outboundApi === "anthropic") {
      handleAnthropicRateLimitError(req, errorPayload);
    }
  } else if (statusCode === 404) {
@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
  throw new Error(errorPayload.error?.message);
 };

+/**
+ * This is a workaround for a very strange issue where certain API keys seem to
+ * enforce more strict input validation than others -- specifically, they will
+ * require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
+ * being used as a generic text completion service and to enforce the use of
+ * the chat RLHF.  This is not documented anywhere, and it's not clear why some
+ * keys enforce this and others don't.
+ * This middleware checks for that specific error and marks the key as being
+ * one that requires the prefix, and then re-enqueues the request.
+ * The exact error is:
+ * ```
+ * {
+ *   "error": {
+ *     "type": "invalid_request_error",
+ *     "message": "prompt must start with \"\n\nHuman:\" turn"
+ *   }
+ * }
+ * ```
+ */
+function maybeHandleMissingPreambleError(
+  req: Request,
+  errorPayload: Record<string, any>
+) {
+  if (
+    errorPayload.error?.type === "invalid_request_error" &&
+    errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
+  ) {
+    req.log.warn(
+      { key: req.key?.hash },
+      "Request failed due to missing preamble. Key will be marked as such for subsequent requests."
+    );
+    keyPool.update(req.key!, { requiresPreamble: true });
+    if (config.queueMode !== "none") {
+      reenqueueRequest(req);
+      throw new RetryableError("Claude request re-enqueued to add preamble.");
+    }
+    errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
+  } else {
+    errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
+  }
+}
+
 function handleAnthropicRateLimitError(
  req: Request,
  errorPayload: Record<string, any>