Automatically add Anthropic "\n\nHuman:" preamble when necessary (khanon/oai-reverse-proxy!23)
This commit is contained in:
parent
4f2a12ef14
commit
96cf4a0e2d
|
@ -11,12 +11,30 @@ export const ANTHROPIC_SUPPORTED_MODELS = [
|
||||||
] as const;
|
] as const;
|
||||||
export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
|
export type AnthropicModel = (typeof ANTHROPIC_SUPPORTED_MODELS)[number];
|
||||||
|
|
||||||
|
export type AnthropicKeyUpdate = Omit<
|
||||||
|
Partial<AnthropicKey>,
|
||||||
|
| "key"
|
||||||
|
| "hash"
|
||||||
|
| "lastUsed"
|
||||||
|
| "promptCount"
|
||||||
|
| "rateLimitedAt"
|
||||||
|
| "rateLimitedUntil"
|
||||||
|
>;
|
||||||
|
|
||||||
export interface AnthropicKey extends Key {
|
export interface AnthropicKey extends Key {
|
||||||
readonly service: "anthropic";
|
readonly service: "anthropic";
|
||||||
/** The time at which this key was last rate limited. */
|
/** The time at which this key was last rate limited. */
|
||||||
rateLimitedAt: number;
|
rateLimitedAt: number;
|
||||||
/** The time until which this key is rate limited. */
|
/** The time until which this key is rate limited. */
|
||||||
rateLimitedUntil: number;
|
rateLimitedUntil: number;
|
||||||
|
/**
|
||||||
|
* Whether this key requires a special preamble. For unclear reasons, some
|
||||||
|
* Anthropic keys will throw an error if the prompt does not begin with a
|
||||||
|
* message from the user, whereas others can be used without a preamble. This
|
||||||
|
* is despite using the same API endpoint, version, and model.
|
||||||
|
* When a key returns this particular error, we set this flag to true.
|
||||||
|
*/
|
||||||
|
requiresPreamble: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -52,6 +70,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
lastUsed: 0,
|
lastUsed: 0,
|
||||||
rateLimitedAt: 0,
|
rateLimitedAt: 0,
|
||||||
rateLimitedUntil: 0,
|
rateLimitedUntil: 0,
|
||||||
|
requiresPreamble: false,
|
||||||
hash: `ant-${crypto
|
hash: `ant-${crypto
|
||||||
.createHash("sha256")
|
.createHash("sha256")
|
||||||
.update(key)
|
.update(key)
|
||||||
|
@ -119,6 +138,11 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
this.log.warn({ key: key.hash }, "Key disabled");
|
this.log.warn({ key: key.hash }, "Key disabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public update(hash: string, update: Partial<AnthropicKey>) {
|
||||||
|
const keyFromPool = this.keys.find((k) => k.hash === hash)!;
|
||||||
|
Object.assign(keyFromPool, update);
|
||||||
|
}
|
||||||
|
|
||||||
public available() {
|
public available() {
|
||||||
return this.keys.filter((k) => !k.isDisabled).length;
|
return this.keys.filter((k) => !k.isDisabled).length;
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,6 +47,7 @@ export interface KeyProvider<T extends Key = Key> {
|
||||||
get(model: Model): T;
|
get(model: Model): T;
|
||||||
list(): Omit<T, "key">[];
|
list(): Omit<T, "key">[];
|
||||||
disable(key: T): void;
|
disable(key: T): void;
|
||||||
|
update(hash: string, update: Partial<T>): void;
|
||||||
available(): number;
|
available(): number;
|
||||||
anyUnchecked(): boolean;
|
anyUnchecked(): boolean;
|
||||||
incrementPrompt(hash: string): void;
|
incrementPrompt(hash: string): void;
|
||||||
|
@ -63,3 +64,5 @@ export const SUPPORTED_MODELS = [
|
||||||
] as const;
|
] as const;
|
||||||
export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
|
export type SupportedModel = (typeof SUPPORTED_MODELS)[number];
|
||||||
export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS };
|
export { OPENAI_SUPPORTED_MODELS, ANTHROPIC_SUPPORTED_MODELS };
|
||||||
|
export { AnthropicKey } from "./anthropic/provider";
|
||||||
|
export { OpenAIKey } from "./openai/provider";
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
import type * as http from "http";
|
import type * as http from "http";
|
||||||
import { AnthropicKeyProvider } from "./anthropic/provider";
|
import { AnthropicKeyProvider, AnthropicKeyUpdate } from "./anthropic/provider";
|
||||||
import { Key, AIService, Model, KeyProvider } from "./index";
|
import { Key, Model, KeyProvider, AIService } from "./index";
|
||||||
import { OpenAIKeyProvider } from "./openai/provider";
|
import { OpenAIKeyProvider, OpenAIKeyUpdate } from "./openai/provider";
|
||||||
|
|
||||||
|
type AllowedPartial = OpenAIKeyUpdate | AnthropicKeyUpdate;
|
||||||
|
|
||||||
export class KeyPool {
|
export class KeyPool {
|
||||||
private keyProviders: KeyProvider[] = [];
|
private keyProviders: KeyProvider[] = [];
|
||||||
|
@ -35,6 +37,11 @@ export class KeyPool {
|
||||||
service.disable(key);
|
service.disable(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public update(key: Key, props: AllowedPartial): void {
|
||||||
|
const service = this.getKeyProvider(key.service);
|
||||||
|
service.update(key.hash, props);
|
||||||
|
}
|
||||||
|
|
||||||
public available(service: AIService | "all" = "all"): number {
|
public available(service: AIService | "all" = "all"): number {
|
||||||
return this.keyProviders.reduce((sum, provider) => {
|
return this.keyProviders.reduce((sum, provider) => {
|
||||||
const includeProvider = service === "all" || service === provider.service;
|
const includeProvider = service === "all" || service === provider.service;
|
||||||
|
|
|
@ -8,6 +8,7 @@ import { ipLimiter } from "./rate-limit";
|
||||||
import { handleProxyError } from "./middleware/common";
|
import { handleProxyError } from "./middleware/common";
|
||||||
import {
|
import {
|
||||||
addKey,
|
addKey,
|
||||||
|
addAnthropicPreamble,
|
||||||
createPreprocessorMiddleware,
|
createPreprocessorMiddleware,
|
||||||
finalizeBody,
|
finalizeBody,
|
||||||
languageFilter,
|
languageFilter,
|
||||||
|
@ -69,6 +70,7 @@ const rewriteAnthropicRequest = (
|
||||||
) => {
|
) => {
|
||||||
const rewriterPipeline = [
|
const rewriterPipeline = [
|
||||||
addKey,
|
addKey,
|
||||||
|
addAnthropicPreamble,
|
||||||
languageFilter,
|
languageFilter,
|
||||||
limitOutputTokens,
|
limitOutputTokens,
|
||||||
finalizeBody,
|
finalizeBody,
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import { AnthropicKey, Key } from "../../../key-management";
|
||||||
|
import { isCompletionRequest } from "../common";
|
||||||
|
import { ProxyRequestMiddleware } from ".";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some keys require the prompt to start with `\n\nHuman:`. There is no way to
|
||||||
|
* know this without trying to send the request and seeing if it fails. If a
|
||||||
|
* key is marked as requiring a preamble, it will be added here.
|
||||||
|
*/
|
||||||
|
export const addAnthropicPreamble: ProxyRequestMiddleware = (
|
||||||
|
_proxyReq,
|
||||||
|
req
|
||||||
|
) => {
|
||||||
|
if (!isCompletionRequest(req) || req.key?.service !== "anthropic") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let preamble = "";
|
||||||
|
let prompt = req.body.prompt;
|
||||||
|
assertAnthropicKey(req.key);
|
||||||
|
if (req.key.requiresPreamble) {
|
||||||
|
preamble = prompt.startsWith("\n\nHuman:") ? "" : "\n\nHuman:";
|
||||||
|
req.log.debug({ key: req.key.hash, preamble }, "Adding preamble to prompt");
|
||||||
|
}
|
||||||
|
req.body.prompt = preamble + prompt;
|
||||||
|
};
|
||||||
|
|
||||||
|
function assertAnthropicKey(key: Key): asserts key is AnthropicKey {
|
||||||
|
if (key.service !== "anthropic") {
|
||||||
|
throw new Error(`Expected an Anthropic key, got '${key.service}'`);
|
||||||
|
}
|
||||||
|
}
|
|
@ -9,6 +9,7 @@ export { transformOutboundPayload } from "./transform-outbound-payload";
|
||||||
|
|
||||||
// HPM middleware (runs on onProxyReq, cannot be async)
|
// HPM middleware (runs on onProxyReq, cannot be async)
|
||||||
export { addKey } from "./add-key";
|
export { addKey } from "./add-key";
|
||||||
|
export { addAnthropicPreamble } from "./add-anthropic-preamble";
|
||||||
export { finalizeBody } from "./finalize-body";
|
export { finalizeBody } from "./finalize-body";
|
||||||
export { languageFilter } from "./language-filter";
|
export { languageFilter } from "./language-filter";
|
||||||
export { limitCompletions } from "./limit-completions";
|
export { limitCompletions } from "./limit-completions";
|
||||||
|
|
|
@ -153,19 +153,10 @@ function openaiToAnthropic(body: any, req: Request) {
|
||||||
// Remove duplicates
|
// Remove duplicates
|
||||||
stops = [...new Set(stops)];
|
stops = [...new Set(stops)];
|
||||||
|
|
||||||
// TEMP: More shitty anthropic API hacks
|
|
||||||
// If you receive a 400 Bad Request error from Anthropic complaining about
|
|
||||||
// "prompt must start with a '\n\nHuman: ' turn", enable this setting.
|
|
||||||
// I will try to fix this when I can identify why it only happens sometimes.
|
|
||||||
let preamble = "";
|
|
||||||
if (process.env.CLAUDE_ADD_HUMAN_PREAMBLE) {
|
|
||||||
preamble = "\n\nHuman: Hello Claude.";
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...rest,
|
...rest,
|
||||||
model,
|
model,
|
||||||
prompt: preamble + prompt,
|
prompt: prompt,
|
||||||
max_tokens_to_sample: rest.max_tokens,
|
max_tokens_to_sample: rest.max_tokens,
|
||||||
stop_sequences: stops,
|
stop_sequences: stops,
|
||||||
};
|
};
|
||||||
|
|
|
@ -135,7 +135,7 @@ export const createOnProxyResHandler = (apiMiddleware: ProxyResMiddleware) => {
|
||||||
function reenqueueRequest(req: Request) {
|
function reenqueueRequest(req: Request) {
|
||||||
req.log.info(
|
req.log.info(
|
||||||
{ key: req.key?.hash, retryCount: req.retryCount },
|
{ key: req.key?.hash, retryCount: req.retryCount },
|
||||||
`Re-enqueueing request due to rate-limit error`
|
`Re-enqueueing request due to retryable error`
|
||||||
);
|
);
|
||||||
req.retryCount++;
|
req.retryCount++;
|
||||||
enqueue(req);
|
enqueue(req);
|
||||||
|
@ -262,7 +262,11 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
|
|
||||||
if (statusCode === 400) {
|
if (statusCode === 400) {
|
||||||
// Bad request (likely prompt is too long)
|
// Bad request (likely prompt is too long)
|
||||||
|
if (req.outboundApi === "openai") {
|
||||||
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
|
errorPayload.proxy_note = `Upstream service rejected the request as invalid. Your prompt may be too long for ${req.body?.model}.`;
|
||||||
|
} else if (req.outboundApi === "anthropic") {
|
||||||
|
maybeHandleMissingPreambleError(req, errorPayload);
|
||||||
|
}
|
||||||
} else if (statusCode === 401) {
|
} else if (statusCode === 401) {
|
||||||
// Key is invalid or was revoked
|
// Key is invalid or was revoked
|
||||||
keyPool.disable(req.key!);
|
keyPool.disable(req.key!);
|
||||||
|
@ -271,7 +275,7 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
// OpenAI uses this for a bunch of different rate-limiting scenarios.
|
||||||
if (req.outboundApi === "openai") {
|
if (req.outboundApi === "openai") {
|
||||||
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
|
handleOpenAIRateLimitError(req, tryAgainMessage, errorPayload);
|
||||||
} else {
|
} else if (req.outboundApi === "anthropic") {
|
||||||
handleAnthropicRateLimitError(req, errorPayload);
|
handleAnthropicRateLimitError(req, errorPayload);
|
||||||
}
|
}
|
||||||
} else if (statusCode === 404) {
|
} else if (statusCode === 404) {
|
||||||
|
@ -305,6 +309,48 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
||||||
throw new Error(errorPayload.error?.message);
|
throw new Error(errorPayload.error?.message);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a workaround for a very strange issue where certain API keys seem to
|
||||||
|
* enforce more strict input validation than others -- specifically, they will
|
||||||
|
* require a `\n\nHuman:` prefix on the prompt, perhaps to prevent the key from
|
||||||
|
* being used as a generic text completion service and to enforce the use of
|
||||||
|
* the chat RLHF. This is not documented anywhere, and it's not clear why some
|
||||||
|
* keys enforce this and others don't.
|
||||||
|
* This middleware checks for that specific error and marks the key as being
|
||||||
|
* one that requires the prefix, and then re-enqueues the request.
|
||||||
|
* The exact error is:
|
||||||
|
* ```
|
||||||
|
* {
|
||||||
|
* "error": {
|
||||||
|
* "type": "invalid_request_error",
|
||||||
|
* "message": "prompt must start with \"\n\nHuman:\" turn"
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
function maybeHandleMissingPreambleError(
|
||||||
|
req: Request,
|
||||||
|
errorPayload: Record<string, any>
|
||||||
|
) {
|
||||||
|
if (
|
||||||
|
errorPayload.error?.type === "invalid_request_error" &&
|
||||||
|
errorPayload.error?.message === 'prompt must start with "\n\nHuman:" turn'
|
||||||
|
) {
|
||||||
|
req.log.warn(
|
||||||
|
{ key: req.key?.hash },
|
||||||
|
"Request failed due to missing preamble. Key will be marked as such for subsequent requests."
|
||||||
|
);
|
||||||
|
keyPool.update(req.key!, { requiresPreamble: true });
|
||||||
|
if (config.queueMode !== "none") {
|
||||||
|
reenqueueRequest(req);
|
||||||
|
throw new RetryableError("Claude request re-enqueued to add preamble.");
|
||||||
|
}
|
||||||
|
errorPayload.proxy_note = `This Claude key requires special prompt formatting. Try again; the proxy will reformat your prompt next time.`;
|
||||||
|
} else {
|
||||||
|
errorPayload.proxy_note = `Proxy received unrecognized error from Anthropic. Check the specific error for more information.`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function handleAnthropicRateLimitError(
|
function handleAnthropicRateLimitError(
|
||||||
req: Request,
|
req: Request,
|
||||||
errorPayload: Record<string, any>
|
errorPayload: Record<string, any>
|
||||||
|
|
Loading…
Reference in New Issue