Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)

This commit is contained in:
khanon 2023-06-09 07:16:40 +00:00
parent 4f2a12ef14
commit 96cf4a0e2d
8 changed files with 122 additions and 16 deletions

View File

@ -11,12 +11,30 @@ export const ANTHROPIC_SUPPORTED_MODELS = [
] as const; ] as const;
export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number]; export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
export type AnthropicKeyUpdate = Omit<
Partial<AnthropicKey>,
| "key"
| "hash"
| "lastUsed"
| "promptCount"
| "rateLimitedAt"
| "rateLimitedUntil"
>;
export interface AnthropicKey extends Key { export interface AnthropicKey extends Key {
readonly service: "anthropic"; readonly service: "anthropic";
/** The time at which this key was last rate limited. */ /** The time at which this key was last rate limited. */
rateLimitedAt: number; rateLimitedAt: number;
/** The time until which this key is rate limited. */ /** The time until which this key is rate limited. */
rateLimitedUntil: number; rateLimitedUntil: number;
/**
* Whether this key requires a special preamble. For unclear reasons, some
* Anthropic keys will throw an error if the prompt does not begin with a
* message from the user, whereas others can be used without a preamble. This
* is despite using the same API endpoint, version, and model.
* When a key returns this particular error, we set this flag to true.
*/
requiresPreamble: boolean;
} }
/** /**
@ -52,6 +70,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
lastUsed: 0, lastUsed: 0,
rateLimitedAt: 0, rateLimitedAt: 0,
rateLimitedUntil: 0, rateLimitedUntil: 0,
requiresPreamble: false,
hash: `ant-${crypto hash: `ant-${crypto
.createHash("sha256") .createHash("sha256")
.update(key) .update(key)
@ -119,6 +138,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
this.log.warn({ key: key.hash }, "Key disabled"); this.log.warn({ key: key.hash }, "Key disabled");
} }
public update(hash: string, update: Partial<AnthropicKey>) {
const keyFromPool = this.keys.find((k) => k.hash === hash)!;
Object.assign(keyFromPool, update);
}
public available() { public available() {
return this.keys.filter((k) => !k.isDisabled).length; return this.keys.filter((k) => !k.isDisabled).length;
} }

View File

@ -47,6 +47,7 @@ export interface KeyProvider<T extends Key = Key> {
get(model: Model): T; get(model: Model): T;
list(): Omit<T, "key">[]; list(): Omit<T, "key">[];
disable(key: T): void; disable(key: T): void;
update(hash: string, update: Partial<T>): void;
available(): number; available(): number;
anyUnchecked(): boolean; anyUnchecked(): boolean;
incrementPrompt(hash: string): void; incrementPrompt(hash: string): void;
@ -63,3 +64,5 @@ export const SUPPORTED_MODELS = [
] as const; ] as const;
export type SupportedModel = (typeof SUPPORTED_MODELS)[number]; export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS }; export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS };
export { AnthropicKey } from "./anthropic/provider";
export { OpenAIKey } from "./openai/provider";

View File

@ -1,7 +1,9 @@
import type * as http from "http"; import type * as http from "http";
import { AnthropicKeyProvider } from "./anthropic/provider"; import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider";
import { Key, AIService, Model, KeyProvider } from "./index"; import { Key, Model, KeyProvider, AIService } from "./index";
import { OpenAIKeyProvider } from "./openai/provider"; import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;
export class KeyPool { export class KeyPool {
private keyProviders: KeyProvider[] = []; private keyProviders: KeyProvider[] = [];
@ -35,6 +37,11 @@ export class KeyPool {
service.disable(key); service.disable(key);
} }
public update(key: Key, props: AllowedPartial): void {
const service = this.getKeyProvider(key.service);
service.update(key.hash, props);
}
public available(service: AIService | "all" = "all"): number { public available(service: AIService | "all" = "all"): number {
return this.keyProviders.reduce((sum, provider) => { return this.keyProviders.reduce((sum, provider) => {
const includeProvider = service === "all" || service === provider.service; const includeProvider = service === "all" || service === provider.service;

View File

@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
import { handleProxyError } from "./middleware/common"; import { handleProxyError } from "./middleware/common";
import { import {
addKey, addKey,
addAnthropicPreamble,
createPreprocessorMiddleware, createPreprocessorMiddleware,
finalizeBody, finalizeBody,
languageFilter, languageFilter,
@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
) => { ) => {
const rewriterPipeline = [ const rewriterPipeline = [
addKey, addKey,
addAnthropicPreamble,
languageFilter, languageFilter,
limitOutputTokens, limitOutputTokens,
finalizeBody, finalizeBody,

View File

@ -0,0 +1,32 @@
import { AnthropicKey, Key } from "../../../key-management";
import { isCompletionRequest } from "../common";
import { ProxyRequestMiddleware } from ".";
/**
* Some keys require the prompt to start with `\n\nHuman:`. There is no way to
* know this without trying to send the request and seeing if it fails. If a
* key is marked as requiring a preamble, it will be added here.
*/
export const addAnthropicPreamble: ProxyRequestMiddleware = (
_proxyReq,
req
) => {
if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
return;
}
let preamble = "";
let prompt = req.body.prompt;
assertAnthropicKey(req.key);
if (req.key.requiresPreamble) {
preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
}
req.body.prompt = preamble + prompt;
};
function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
if (key.service !== "anthropic") {
throw new Error(`Expected an Anthropic key, got '${key.service}'`);
}
}

View File

@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
// HPM middleware (runs on onProxyReq, cannot be async) // HPM middleware (runs on onProxyReq, cannot be async)
export { addKey } from "./add-key"; export { addKey } from "./add-key";
export { addAnthropicPreamble } from "./add-anthropic-preamble";
export { finalizeBody } from "./finalize-body"; export { finalizeBody } from "./finalize-body";
export { languageFilter } from "./language-filter"; export { languageFilter } from "./language-filter";
export { limitCompletions } from "./limit-completions"; export { limitCompletions } from "./limit-completions";

View File

@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
// Remove duplicates // Remove duplicates
stops = [...new Set(stops)]; stops = [...new Set(stops)];
// TEMP: More shitty anthropic API hacks
// If you receive a 400 Bad Request error from Anthropic complaining about
// "prompt must start with a '\n\nHuman: ' turn", enable this setting.
// I will try to fix this when I can identify why it only happens sometimes.
let preamble = "";
if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
preamble = "\n\nHuman: Hello Claude.";
}
return { return {
...rest, ...rest,
model, model,
prompt: preamble + prompt, prompt: prompt,
max_tokens_to_sample: rest.max_tokens, max_tokens_to_sample: rest.max_tokens,
stop_sequences: stops, stop_sequences: stops,
}; };

View File

@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
function reenqueueRequest(req: Request) { function reenqueueRequest(req: Request) {
req.log.info( req.log.info(
{ key: req.key?.hash, retryCount: req.retryCount }, { key: req.key?.hash, retryCount: req.retryCount },
`Re-enqueueing request due to rate-limit error` `Re-enqueueing request due to retryable error`
); );
req.retryCount++; req.retryCount++;
enqueue(req); enqueue(req);
@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
if (statusCode === 400) { if (statusCode === 400) {
// Bad request (likely prompt is too long) // Bad request (likely prompt is too long)
if (req.outboundApi === "openai") {
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`; errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
} else if (req.outboundApi === "anthropic") {
maybeHandleMissingPreambleError(req, errorPayload);
}
} else if (statusCode === 401) { } else if (statusCode === 401) {
// Key is invalid or was revoked // Key is invalid or was revoked
keyPool.disable(req.key!); keyPool.disable(req.key!);
@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
// OpenAI uses this for a bunch of different rate-limiting scenarios. // OpenAI uses this for a bunch of different rate-limiting scenarios.
if (req.outboundApi === "openai") { if (req.outboundApi === "openai") {
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload); handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
} else { } else if (req.outboundApi === "anthropic") {
handleAnthropicRateLimitError(req, errorPayload); handleAnthropicRateLimitError(req, errorPayload);
} }
} else if (statusCode === 404) { } else if (statusCode === 404) {
@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
throw new Error(errorPayload.error?.message); throw new Error(errorPayload.error?.message);
}; };
/**
* This is a workaround for a very strange issue where certain API keys seem to
* enforce more strict input validation than others -- specifically, they will
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
* being used as a generic text completion service and to enforce the use of
* the chat RLHF. This is not documented anywhere, and it's not clear why some
* keys enforce this and others don't.
* This middleware checks for that specific error and marks the key as being
* one that requires the prefix, and then re-enqueues the request.
* The exact error is:
* ```
* {
* "error": {
* "type": "invalid_request_error",
* "message": "prompt must start with \"\n\nHuman:\" turn"
* }
* }
* ```
*/
function maybeHandleMissingPreambleError(
req: Request,
errorPayload: Record<string, any>
) {
if (
errorPayload.error?.type === "invalid_request_error" &&
errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
) {
req.log.warn(
{ key: req.key?.hash },
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
);
keyPool.update(req.key!, { requiresPreamble: true });
if (config.queueMode !== "none") {
reenqueueRequest(req);
throw new RetryableError("Claude request re-enqueued to add preamble.");
}
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
} else {
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
}
}
function handleAnthropicRateLimitError( function handleAnthropicRateLimitError(
req: Request, req: Request,
errorPayload: Record<string, any> errorPayload: Record<string, any>