Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)
This commit is contained in:
parent
4f2a12ef14
commit
96cf4a0e2d
|
@ -11,12 +11,30 @@ export const ANTHROPIC_SUPPORTED_MODELS = [
|
|||
] as const;
|
||||
export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
|
||||
|
||||
export type AnthropicKeyUpdate = Omit<
|
||||
Partial<AnthropicKey>,
|
||||
| "key"
|
||||
| "hash"
|
||||
| "lastUsed"
|
||||
| "promptCount"
|
||||
| "rateLimitedAt"
|
||||
| "rateLimitedUntil"
|
||||
>;
|
||||
|
||||
export interface AnthropicKey extends Key {
|
||||
readonly service: "anthropic";
|
||||
/** The time at which this key was last rate limited. */
|
||||
rateLimitedAt: number;
|
||||
/** The time until which this key is rate limited. */
|
||||
rateLimitedUntil: number;
|
||||
/**
|
||||
* Whether this key requires a special preamble. For unclear reasons, some
|
||||
* Anthropic keys will throw an error if the prompt does not begin with a
|
||||
* message from the user, whereas others can be used without a preamble. This
|
||||
* is despite using the same API endpoint, version, and model.
|
||||
* When a key returns this particular error, we set this flag to true.
|
||||
*/
|
||||
requiresPreamble: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -52,6 +70,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
lastUsed: 0,
|
||||
rateLimitedAt: 0,
|
||||
rateLimitedUntil: 0,
|
||||
requiresPreamble: false,
|
||||
hash: `ant-${crypto
|
||||
.createHash("sha256")
|
||||
.update(key)
|
||||
|
@ -119,6 +138,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
this.log.warn({ key: key.hash }, "Key disabled");
|
||||
}
|
||||
|
||||
public update(hash: string, update: Partial<AnthropicKey>) {
|
||||
const keyFromPool = this.keys.find((k) => k.hash === hash)!;
|
||||
Object.assign(keyFromPool, update);
|
||||
}
|
||||
|
||||
public available() {
|
||||
return this.keys.filter((k) => !k.isDisabled).length;
|
||||
}
|
||||
|
|
|
@ -47,6 +47,7 @@ export interface KeyProvider<T extends Key = Key> {
|
|||
get(model: Model): T;
|
||||
list(): Omit<T, "key">[];
|
||||
disable(key: T): void;
|
||||
update(hash: string, update: Partial<T>): void;
|
||||
available(): number;
|
||||
anyUnchecked(): boolean;
|
||||
incrementPrompt(hash: string): void;
|
||||
|
@ -63,3 +64,5 @@ export const SUPPORTED_MODELS = [
|
|||
] as const;
|
||||
export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
|
||||
export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS };
|
||||
export { AnthropicKey } from "./anthropic/provider";
|
||||
export { OpenAIKey } from "./openai/provider";
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
import type * as http from "http";
|
||||
import { AnthropicKeyProvider } from "./anthropic/provider";
|
||||
import { Key, AIService, Model, KeyProvider } from "./index";
|
||||
import { OpenAIKeyProvider } from "./openai/provider";
|
||||
import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider";
|
||||
import { Key, Model, KeyProvider, AIService } from "./index";
|
||||
import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
|
||||
|
||||
type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;
|
||||
|
||||
export class KeyPool {
|
||||
private keyProviders: KeyProvider[] = [];
|
||||
|
@ -35,6 +37,11 @@ export class KeyPool {
|
|||
service.disable(key);
|
||||
}
|
||||
|
||||
public update(key: Key, props: AllowedPartial): void {
|
||||
const service = this.getKeyProvider(key.service);
|
||||
service.update(key.hash, props);
|
||||
}
|
||||
|
||||
public available(service: AIService | "all" = "all"): number {
|
||||
return this.keyProviders.reduce((sum, provider) => {
|
||||
const includeProvider = service === "all" || service === provider.service;
|
||||
|
|
|
@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
|
|||
import { handleProxyError } from "./middleware/common";
|
||||
import {
|
||||
addKey,
|
||||
addAnthropicPreamble,
|
||||
createPreprocessorMiddleware,
|
||||
finalizeBody,
|
||||
languageFilter,
|
||||
|
@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
|
|||
) => {
|
||||
const rewriterPipeline = [
|
||||
addKey,
|
||||
addAnthropicPreamble,
|
||||
languageFilter,
|
||||
limitOutputTokens,
|
||||
finalizeBody,
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
import { AnthropicKey, Key } from "../../../key-management";
|
||||
import { isCompletionRequest } from "../common";
|
||||
import { ProxyRequestMiddleware } from ".";
|
||||
|
||||
/**
|
||||
* Some keys require the prompt to start with `\n\nHuman:`. There is no way to
|
||||
* know this without trying to send the request and seeing if it fails. If a
|
||||
* key is marked as requiring a preamble, it will be added here.
|
||||
*/
|
||||
export const addAnthropicPreamble: ProxyRequestMiddleware = (
|
||||
_proxyReq,
|
||||
req
|
||||
) => {
|
||||
if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
|
||||
return;
|
||||
}
|
||||
|
||||
let preamble = "";
|
||||
let prompt = req.body.prompt;
|
||||
assertAnthropicKey(req.key);
|
||||
if (req.key.requiresPreamble) {
|
||||
preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
|
||||
req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
|
||||
}
|
||||
req.body.prompt = preamble + prompt;
|
||||
};
|
||||
|
||||
function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
|
||||
if (key.service !== "anthropic") {
|
||||
throw new Error(`Expected an Anthropic key, got '${key.service}'`);
|
||||
}
|
||||
}
|
|
@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
|
|||
|
||||
// HPM middleware (runs on onProxyReq, cannot be async)
|
||||
export { addKey } from "./add-key";
|
||||
export { addAnthropicPreamble } from "./add-anthropic-preamble";
|
||||
export { finalizeBody } from "./finalize-body";
|
||||
export { languageFilter } from "./language-filter";
|
||||
export { limitCompletions } from "./limit-completions";
|
||||
|
|
|
@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
|
|||
// Remove duplicates
|
||||
stops = [...new Set(stops)];
|
||||
|
||||
// TEMP: More shitty anthropic API hacks
|
||||
// If you receive a 400 Bad Request error from Anthropic complaining about
|
||||
// "prompt must start with a '\n\nHuman: ' turn", enable this setting.
|
||||
// I will try to fix this when I can identify why it only happens sometimes.
|
||||
let preamble = "";
|
||||
if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
|
||||
preamble = "\n\nHuman: Hello Claude.";
|
||||
}
|
||||
|
||||
return {
|
||||
...rest,
|
||||
model,
|
||||
prompt: preamble + prompt,
|
||||
prompt: prompt,
|
||||
max_tokens_to_sample: rest.max_tokens,
|
||||
stop_sequences: stops,
|
||||
};
|
||||
|
|
|
@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
|
|||
function reenqueueRequest(req: Request) {
|
||||
req.log.info(
|
||||
{ key: req.key?.hash, retryCount: req.retryCount },
|
||||
`Re-enqueueing request due to rate-limit error`
|
||||
`Re-enqueueing request due to retryable error`
|
||||
);
|
||||
req.retryCount++;
|
||||
enqueue(req);
|
||||
|
@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
|
||||
if (statusCode === 400) {
|
||||
// Bad request (likely prompt is too long)
|
||||
if (req.outboundApi === "openai") {
|
||||
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
|
||||
} else if (req.outboundApi === "anthropic") {
|
||||
maybeHandleMissingPreambleError(req, errorPayload);
|
||||
}
|
||||
} else if (statusCode === 401) {
|
||||
// Key is invalid or was revoked
|
||||
keyPool.disable(req.key!);
|
||||
|
@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
||||
if (req.outboundApi === "openai") {
|
||||
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
|
||||
} else {
|
||||
} else if (req.outboundApi === "anthropic") {
|
||||
handleAnthropicRateLimitError(req, errorPayload);
|
||||
}
|
||||
} else if (statusCode === 404) {
|
||||
|
@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
throw new Error(errorPayload.error?.message);
|
||||
};
|
||||
|
||||
/**
|
||||
* This is a workaround for a very strange issue where certain API keys seem to
|
||||
* enforce more strict input validation than others -- specifically, they will
|
||||
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
|
||||
* being used as a generic text completion service and to enforce the use of
|
||||
* the chat RLHF. This is not documented anywhere, and it's not clear why some
|
||||
* keys enforce this and others don't.
|
||||
* This middleware checks for that specific error and marks the key as being
|
||||
* one that requires the prefix, and then re-enqueues the request.
|
||||
* The exact error is:
|
||||
* ```
|
||||
* {
|
||||
* "error": {
|
||||
* "type": "invalid_request_error",
|
||||
* "message": "prompt must start with \"\n\nHuman:\" turn"
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
function maybeHandleMissingPreambleError(
|
||||
req: Request,
|
||||
errorPayload: Record<string, any>
|
||||
) {
|
||||
if (
|
||||
errorPayload.error?.type === "invalid_request_error" &&
|
||||
errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
|
||||
) {
|
||||
req.log.warn(
|
||||
{ key: req.key?.hash },
|
||||
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
|
||||
);
|
||||
keyPool.update(req.key!, { requiresPreamble: true });
|
||||
if (config.queueMode !== "none") {
|
||||
reenqueueRequest(req);
|
||||
throw new RetryableError("Claude request re-enqueued to add preamble.");
|
||||
}
|
||||
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
|
||||
} else {
|
||||
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
|
||||
}
|
||||
}
|
||||
|
||||
function handleAnthropicRateLimitError(
|
||||
req: Request,
|
||||
errorPayload: Record<string, any>
|
||||
|
|
Loading…
Reference in New Issue