Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)

2023-06-09 07:16:40 +00:00 · 2023-06-09 07:16:40 +00:00 · 96cf4a0e2d
parent 4f2a12ef14
commit 96cf4a0e2d
8 changed files with 122 additions and 16 deletions
--- a/src/key-management/anthropic/provider.ts
+++ b/src/key-management/anthropic/provider.ts
@ -11,12 +11,30 @@ export const ANTHROPIC_SUPPORTED_MODELS = [
 ] as const;
 export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
 export type AnthropicKeyUpdate = Omit<
  Partial<AnthropicKey>,
  | "key"
  | "hash"
  | "lastUsed"
  | "promptCount"
  | "rateLimitedAt"
  | "rateLimitedUntil"
 >;
 export interface AnthropicKey extends Key {
  readonly service: "anthropic";
  /** The time at which this key was last rate limited. */
  rateLimitedAt: number;
  /** The time until which this key is rate limited. */
  rateLimitedUntil: number;
  /**
   * Whether this key requires a special preamble.  For unclear reasons, some
   * Anthropic keys will throw an error if the prompt does not begin with a
   * message from the user, whereas others can be used without a preamble. This
   * is despite using the same API endpoint, version, and model.
   * When a key returns this particular error, we set this flag to true.
   */
  requiresPreamble: boolean;
 }
 /**
@ -52,6 +70,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
        lastUsed: 0,
        rateLimitedAt: 0,
        rateLimitedUntil: 0,
        requiresPreamble: false,
        hash: `ant-${crypto
          .createHash("sha256")
          .update(key)
@ -119,6 +138,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
    this.log.warn({ key: key.hash }, "Key disabled");
  }
  public update(hash: string, update: Partial<AnthropicKey>) {
    const keyFromPool = this.keys.find((k) => k.hash === hash)!;
    Object.assign(keyFromPool, update);
  }
  public available() {
    return this.keys.filter((k) => !k.isDisabled).length;
  }
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@ -47,6 +47,7 @@ export interface KeyProvider<T extends Key = Key> {
  get(model: Model): T;
  list(): Omit<T, "key">[];
  disable(key: T): void;
  update(hash: string, update: Partial<T>): void;
  available(): number;
  anyUnchecked(): boolean;
  incrementPrompt(hash: string): void;
@ -63,3 +64,5 @@ export const SUPPORTED_MODELS = [
 ] as const;
 export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
 export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS };
 export { AnthropicKey } from "./anthropic/provider";
 export { OpenAIKey } from "./openai/provider";
--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@ -1,7 +1,9 @@
 import type * as http from "http";
-import { AnthropicKeyProvider } from "./anthropic/provider";
+import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider";
-import { Key, AIService, Model, KeyProvider } from "./index";
+import { Key, Model, KeyProvider, AIService } from "./index";
-import { OpenAIKeyProvider } from "./openai/provider";
+import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
 type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;
 export class KeyPool {
  private keyProviders: KeyProvider[] = [];
@ -35,6 +37,11 @@ export class KeyPool {
    service.disable(key);
  }
  public update(key: Key, props: AllowedPartial): void {
    const service = this.getKeyProvider(key.service);
    service.update(key.hash, props);
  }
  public available(service: AIService | "all" = "all"): number {
    return this.keyProviders.reduce((sum, provider) => {
      const includeProvider = service === "all" || service === provider.service;
--- a/src/proxy/anthropic.ts
+++ b/src/proxy/anthropic.ts
@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
 import { handleProxyError } from "./middleware/common";
 import {
  addKey,
  addAnthropicPreamble,
  createPreprocessorMiddleware,
  finalizeBody,
  languageFilter,
@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
 ) => {
  const rewriterPipeline = [
    addKey,
    addAnthropicPreamble,
    languageFilter,
    limitOutputTokens,
    finalizeBody,
--- a/src/proxy/middleware/request/add-anthropic-preamble.ts
+++ b/src/proxy/middleware/request/add-anthropic-preamble.ts
@ -0,0 +1,32 @@
 import { AnthropicKey, Key } from "../../../key-management";
 import { isCompletionRequest } from "../common";
 import { ProxyRequestMiddleware } from ".";
 /**
 * Some keys require the prompt to start with `\n\nHuman:`. There is no way to
 * know this without trying to send the request and seeing if it fails. If a
 * key is marked as requiring a preamble, it will be added here.
 */
 export const addAnthropicPreamble: ProxyRequestMiddleware = (
  _proxyReq,
  req
 ) => {
  if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
    return;
  }
  let preamble = "";
  let prompt = req.body.prompt;
  assertAnthropicKey(req.key);
  if (req.key.requiresPreamble) {
    preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
    req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
  }
  req.body.prompt = preamble + prompt;
 };
 function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
  if (key.service !== "anthropic") {
    throw new Error(`Expected an Anthropic key, got '${key.service}'`);
  }
 }
--- a/src/proxy/middleware/request/index.ts
+++ b/src/proxy/middleware/request/index.ts
@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
 // HPM middleware (runs on onProxyReq, cannot be async)
 export { addKey } from "./add-key";
 export { addAnthropicPreamble } from "./add-anthropic-preamble";
 export { finalizeBody } from "./finalize-body";
 export { languageFilter } from "./language-filter";
 export { limitCompletions } from "./limit-completions";
--- a/src/proxy/middleware/request/transform-outbound-payload.ts
+++ b/src/proxy/middleware/request/transform-outbound-payload.ts
@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
  // Remove duplicates
  stops = [...new Set(stops)];
  // TEMP: More shitty anthropic API hacks
  // If you receive a 400 Bad Request error from Anthropic complaining about
  // "prompt must start with a '\n\nHuman: ' turn", enable this setting.
  // I will try to fix this when I can identify why it only happens sometimes.
  let preamble = "";
  if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
    preamble = "\n\nHuman: Hello Claude.";
  }
  return {
    ...rest,
    model,
-    prompt: preamble + prompt,
+    prompt: prompt,
    max_tokens_to_sample: rest.max_tokens,
    stop_sequences: stops,
  };
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
 function reenqueueRequest(req: Request) {
  req.log.info(
    { key: req.key?.hash, retryCount: req.retryCount },
-    `Re-enqueueing request due to rate-limit error`
+    `Re-enqueueing request due to retryable error`
  );
  req.retryCount++;
  enqueue(req);
@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
  if (statusCode === 400) {
    // Bad request (likely prompt is too long)
    if (req.outboundApi === "openai") {
      errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
    } else if (req.outboundApi === "anthropic") {
      maybeHandleMissingPreambleError(req, errorPayload);
    }
  } else if (statusCode === 401) {
    // Key is invalid or was revoked
    keyPool.disable(req.key!);
@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
    // OpenAI uses this for a bunch of different rate-limiting scenarios.
    if (req.outboundApi === "openai") {
      handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
-    } else {
+    } else if (req.outboundApi === "anthropic") {
      handleAnthropicRateLimitError(req, errorPayload);
    }
  } else if (statusCode === 404) {
@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
  throw new Error(errorPayload.error?.message);
 };
 /**
 * This is a workaround for a very strange issue where certain API keys seem to
 * enforce more strict input validation than others -- specifically, they will
 * require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
 * being used as a generic text completion service and to enforce the use of
 * the chat RLHF.  This is not documented anywhere, and it's not clear why some
 * keys enforce this and others don't.
 * This middleware checks for that specific error and marks the key as being
 * one that requires the prefix, and then re-enqueues the request.
 * The exact error is:
 * ```
 * {
 *   "error": {
 *     "type": "invalid_request_error",
 *     "message": "prompt must start with \"\n\nHuman:\" turn"
 *   }
 * }
 * ```
 */
 function maybeHandleMissingPreambleError(
  req: Request,
  errorPayload: Record<string, any>
 ) {
  if (
    errorPayload.error?.type === "invalid_request_error" &&
    errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
  ) {
    req.log.warn(
      { key: req.key?.hash },
      "Request failed due to missing preamble. Key will be marked as such for subsequent requests."
    );
    keyPool.update(req.key!, { requiresPreamble: true });
    if (config.queueMode !== "none") {
      reenqueueRequest(req);
      throw new RetryableError("Claude request re-enqueued to add preamble.");
    }
    errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
  } else {
    errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
  }
 }
 function handleAnthropicRateLimitError(
  req: Request,
  errorPayload: Record<string, any>