From 96cf4a0e2d470dee97e6ec2fc7ffc2e30e94ee47 Mon Sep 17 00:00:00 2001 From: khanon Date: Fri, 9 Jun 2023 07:16:40 +0000 Subject: [PATCH] Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23) --- src/key-management/anthropic/provider.ts | 24 +++++++++ src/key-management/index.ts | 3 ++ src/key-management/key-pool.ts | 13 +++-- src/proxy/anthropic.ts | 2 + .../request/add-anthropic-preamble.ts | 32 ++++++++++++ src/proxy/middleware/request/index.ts | 1 + .../request/transform-outbound-payload.ts | 11 +--- src/proxy/middleware/response/index.ts | 52 +++++++++++++++++-- 8 files changed, 122 insertions(+), 16 deletions(-) create mode 100644 src/proxy/middleware/request/add-anthropic-preamble.ts diff --git a/src/key-management/anthropic/provider.ts b/src/key-management/anthropic/provider.ts index a077be5..28e8617 100644 --- a/src/key-management/anthropic/provider.ts +++ b/src/key-management/anthropic/provider.ts @@ -11,12 +11,30 @@ export const ANTHROPIC_SUPPORTED_MODELS = [ ] as const; export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number]; +export type AnthropicKeyUpdate = Omit< + Partial, + | "key" + | "hash" + | "lastUsed" + | "promptCount" + | "rateLimitedAt" + | "rateLimitedUntil" +>; + export interface AnthropicKey extends Key { readonly service: "anthropic"; /** The time at which this key was last rate limited. */ rateLimitedAt: number; /** The time until which this key is rate limited. */ rateLimitedUntil: number; + /** + * Whether this key requires a special preamble. For unclear reasons, some + * Anthropic keys will throw an error if the prompt does not begin with a + * message from the user, whereas others can be used without a preamble. This + * is despite using the same API endpoint, version, and model. + * When a key returns this particular error, we set this flag to true. + */ + requiresPreamble: boolean; } /** @@ -52,6 +70,7 @@ export class AnthropicKeyProvider implements KeyProvider { lastUsed: 0, rateLimitedAt: 0, rateLimitedUntil: 0, + requiresPreamble: false, hash: `ant-${crypto .createHash("sha256") .update(key) @@ -119,6 +138,11 @@ export class AnthropicKeyProvider implements KeyProvider { this.log.warn({ key: key.hash }, "Key disabled"); } + public update(hash: string, update: Partial) { + const keyFromPool = this.keys.find((k) => k.hash === hash)!; + Object.assign(keyFromPool, update); + } + public available() { return this.keys.filter((k) => !k.isDisabled).length; } diff --git a/src/key-management/index.ts b/src/key-management/index.ts index b2916f3..13c73a5 100644 --- a/src/key-management/index.ts +++ b/src/key-management/index.ts @@ -47,6 +47,7 @@ export interface KeyProvider { get(model: Model): T; list(): Omit[]; disable(key: T): void; + update(hash: string, update: Partial): void; available(): number; anyUnchecked(): boolean; incrementPrompt(hash: string): void; @@ -63,3 +64,5 @@ export const SUPPORTED_MODELS = [ ] as const; export type SupportedModel = (typeof SUPPORTED_MODELS)[number]; export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS }; +export { AnthropicKey } from "./anthropic/provider"; +export { OpenAIKey } from "./openai/provider"; diff --git a/src/key-management/key-pool.ts b/src/key-management/key-pool.ts index acdd154..1f100a5 100644 --- a/src/key-management/key-pool.ts +++ b/src/key-management/key-pool.ts @@ -1,7 +1,9 @@ import type * as http from "http"; -import { AnthropicKeyProvider } from "./anthropic/provider"; -import { Key, AIService, Model, KeyProvider } from "./index"; -import { OpenAIKeyProvider } from "./openai/provider"; +import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider"; +import { Key, Model, KeyProvider, AIService } from "./index"; +import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider"; + +type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate; export class KeyPool { private keyProviders: KeyProvider[] = []; @@ -35,6 +37,11 @@ export class KeyPool { service.disable(key); } + public update(key: Key, props: AllowedPartial): void { + const service = this.getKeyProvider(key.service); + service.update(key.hash, props); + } + public available(service: AIService | "all" = "all"): number { return this.keyProviders.reduce((sum, provider) => { const includeProvider = service === "all" || service === provider.service; diff --git a/src/proxy/anthropic.ts b/src/proxy/anthropic.ts index fd06312..af7cd82 100644 --- a/src/proxy/anthropic.ts +++ b/src/proxy/anthropic.ts @@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit"; import { handleProxyError } from "./middleware/common"; import { addKey, + addAnthropicPreamble, createPreprocessorMiddleware, finalizeBody, languageFilter, @@ -69,6 +70,7 @@ const rewriteAnthropicRequest = ( ) => { const rewriterPipeline = [ addKey, + addAnthropicPreamble, languageFilter, limitOutputTokens, finalizeBody, diff --git a/src/proxy/middleware/request/add-anthropic-preamble.ts b/src/proxy/middleware/request/add-anthropic-preamble.ts new file mode 100644 index 0000000..7fedfa2 --- /dev/null +++ b/src/proxy/middleware/request/add-anthropic-preamble.ts @@ -0,0 +1,32 @@ +import { AnthropicKey, Key } from "../../../key-management"; +import { isCompletionRequest } from "../common"; +import { ProxyRequestMiddleware } from "."; + +/** + * Some keys require the prompt to start with `\n\nHuman:`. There is no way to + * know this without trying to send the request and seeing if it fails. If a + * key is marked as requiring a preamble, it will be added here. + */ +export const addAnthropicPreamble: ProxyRequestMiddleware = ( + _proxyReq, + req +) => { + if (!isCompletionRequest(req) || req.key?.service !== "anthropic") { + return; + } + + let preamble = ""; + let prompt = req.body.prompt; + assertAnthropicKey(req.key); + if (req.key.requiresPreamble) { + preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:"; + req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt"); + } + req.body.prompt = preamble + prompt; +}; + +function assertAnthropicKey(key: Key): asserts key is AnthropicKey { + if (key.service !== "anthropic") { + throw new Error(`Expected an Anthropic key, got '${key.service}'`); + } +} diff --git a/src/proxy/middleware/request/index.ts b/src/proxy/middleware/request/index.ts index eca9189..11f2c00 100644 --- a/src/proxy/middleware/request/index.ts +++ b/src/proxy/middleware/request/index.ts @@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload"; // HPM middleware (runs on onProxyReq, cannot be async) export { addKey } from "./add-key"; +export { addAnthropicPreamble } from "./add-anthropic-preamble"; export { finalizeBody } from "./finalize-body"; export { languageFilter } from "./language-filter"; export { limitCompletions } from "./limit-completions"; diff --git a/src/proxy/middleware/request/transform-outbound-payload.ts b/src/proxy/middleware/request/transform-outbound-payload.ts index c97aa62..6c07835 100644 --- a/src/proxy/middleware/request/transform-outbound-payload.ts +++ b/src/proxy/middleware/request/transform-outbound-payload.ts @@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) { // Remove duplicates stops = [...new Set(stops)]; - // TEMP: More shitty anthropic API hacks - // If you receive a 400 Bad Request error from Anthropic complaining about - // "prompt must start with a '\n\nHuman: ' turn", enable this setting. - // I will try to fix this when I can identify why it only happens sometimes. - let preamble = ""; - if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) { - preamble = "\n\nHuman: Hello Claude."; - } - return { ...rest, model, - prompt: preamble + prompt, + prompt: prompt, max_tokens_to_sample: rest.max_tokens, stop_sequences: stops, }; diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts index 9bd18dc..3580eb7 100644 --- a/src/proxy/middleware/response/index.ts +++ b/src/proxy/middleware/response/index.ts @@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => { function reenqueueRequest(req: Request) { req.log.info( { key: req.key?.hash, retryCount: req.retryCount }, - `Re-enqueueing request due to rate-limit error` + `Re-enqueueing request due to retryable error` ); req.retryCount++; enqueue(req); @@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( if (statusCode === 400) { // Bad request (likely prompt is too long) - errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`; + if (req.outboundApi === "openai") { + errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`; + } else if (req.outboundApi === "anthropic") { + maybeHandleMissingPreambleError(req, errorPayload); + } } else if (statusCode === 401) { // Key is invalid or was revoked keyPool.disable(req.key!); @@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( // OpenAI uses this for a bunch of different rate-limiting scenarios. if (req.outboundApi === "openai") { handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload); - } else { + } else if (req.outboundApi === "anthropic") { handleAnthropicRateLimitError(req, errorPayload); } } else if (statusCode === 404) { @@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async ( throw new Error(errorPayload.error?.message); }; +/** + * This is a workaround for a very strange issue where certain API keys seem to + * enforce more strict input validation than others -- specifically, they will + * require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from + * being used as a generic text completion service and to enforce the use of + * the chat RLHF. This is not documented anywhere, and it's not clear why some + * keys enforce this and others don't. + * This middleware checks for that specific error and marks the key as being + * one that requires the prefix, and then re-enqueues the request. + * The exact error is: + * ``` + * { + * "error": { + * "type": "invalid_request_error", + * "message": "prompt must start with \"\n\nHuman:\" turn" + * } + * } + * ``` + */ +function maybeHandleMissingPreambleError( + req: Request, + errorPayload: Record +) { + if ( + errorPayload.error?.type === "invalid_request_error" && + errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn' + ) { + req.log.warn( + { key: req.key?.hash }, + "Request failed due to missing preamble. Key will be marked as such for subsequent requests." + ); + keyPool.update(req.key!, { requiresPreamble: true }); + if (config.queueMode !== "none") { + reenqueueRequest(req); + throw new RetryableError("Claude request re-enqueued to add preamble."); + } + errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`; + } else { + errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`; + } +} + function handleAnthropicRateLimitError( req: Request, errorPayload: Record