handles Anthropic keys which cannot support multimodal requests
This commit is contained in:
parent
3f9fd25004
commit
32fea30c91
|
@ -54,13 +54,13 @@ export function sendProxyError(
|
|||
const msg =
|
||||
statusCode === 500
|
||||
? `The proxy encountered an error while trying to process your prompt.`
|
||||
: `The proxy encountered an error while trying to send your prompt to the upstream service.`;
|
||||
: `The proxy encountered an error while trying to send your prompt to the API.`;
|
||||
|
||||
sendErrorToClient({
|
||||
options: {
|
||||
format: req.inboundApi,
|
||||
title: `Proxy error (HTTP ${statusCode} ${statusMessage})`,
|
||||
message: `${msg} Further technical details are provided below.`,
|
||||
message: `${msg} Further details are provided below.`,
|
||||
obj: errorPayload,
|
||||
reqId: req.id,
|
||||
model: req.body?.model,
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import { AnthropicChatMessage } from "../../../../shared/api-schemas";
|
||||
import { Key, OpenAIKey, keyPool } from "../../../../shared/key-management";
|
||||
import { isEmbeddingsRequest } from "../../common";
|
||||
import { HPMRequestCallback } from "../index";
|
||||
|
@ -19,17 +20,24 @@ export const addKey: HPMRequestCallback = (proxyReq, req) => {
|
|||
throw new Error("You must specify a model with your request.");
|
||||
}
|
||||
|
||||
let needsMultimodal = false;
|
||||
if (outboundApi === "anthropic-chat") {
|
||||
needsMultimodal = needsMultimodalKey(
|
||||
body.messages as AnthropicChatMessage[]
|
||||
);
|
||||
}
|
||||
|
||||
if (inboundApi === outboundApi) {
|
||||
assignedKey = keyPool.get(body.model, service);
|
||||
assignedKey = keyPool.get(body.model, service, needsMultimodal);
|
||||
} else {
|
||||
switch (outboundApi) {
|
||||
// If we are translating between API formats we may need to select a model
|
||||
// for the user, because the provided model is for the inbound API.
|
||||
// TODO: This whole else condition is probably no longer needed since API
|
||||
// translation now reassigns the model earlier in the request pipeline.
|
||||
case "anthropic-chat":
|
||||
case "anthropic-text":
|
||||
assignedKey = keyPool.get("claude-v1", service);
|
||||
case "anthropic-chat":
|
||||
assignedKey = keyPool.get("claude-v1", service, needsMultimodal);
|
||||
break;
|
||||
case "openai-text":
|
||||
assignedKey = keyPool.get("gpt-3.5-turbo-instruct", service);
|
||||
|
@ -114,3 +122,10 @@ export const addKeyForEmbeddingsRequest: HPMRequestCallback = (
|
|||
proxyReq.setHeader("OpenAI-Organization", key.organizationId);
|
||||
}
|
||||
};
|
||||
|
||||
function needsMultimodalKey(messages: AnthropicChatMessage[]) {
|
||||
return messages.some(
|
||||
({ content }) =>
|
||||
typeof content !== "string" && content.some((c) => c.type === "image")
|
||||
);
|
||||
}
|
||||
|
|
|
@ -33,9 +33,13 @@ function getMessageContent({
|
|||
*/
|
||||
|
||||
const note = obj?.proxy_note || obj?.error?.message || "";
|
||||
const header = `**${title}**`;
|
||||
const friendlyMessage = note ? `${message}\n\n***\n\n*${note}*` : message;
|
||||
const serializedObj = obj ? "```" + JSON.stringify(obj, null, 2) + "```" : "";
|
||||
const header = `### **${title}**`;
|
||||
const friendlyMessage = note ? `${message}\n\n----\n\n*${note}*` : message;
|
||||
|
||||
const serializedObj = obj
|
||||
? ["```", JSON.stringify(obj, null, 2), "```"].join("\n")
|
||||
: "";
|
||||
|
||||
const { stack } = JSON.parse(JSON.stringify(obj ?? {}));
|
||||
let prettyTrace = "";
|
||||
if (stack && obj) {
|
||||
|
|
|
@ -191,6 +191,9 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
{ statusCode, type: errorType, errorPayload, key: req.key?.hash },
|
||||
`Received error response from upstream. (${proxyRes.statusMessage})`
|
||||
);
|
||||
|
||||
// TODO: split upstream error handling into separate modules for each service,
|
||||
// this is out of control.
|
||||
|
||||
const service = req.key!.service;
|
||||
if (service === "aws") {
|
||||
|
@ -200,8 +203,6 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
}
|
||||
|
||||
if (statusCode === 400) {
|
||||
// Bad request. For OpenAI, this is usually due to prompt length.
|
||||
// For Anthropic, this is usually due to missing preamble.
|
||||
switch (service) {
|
||||
case "openai":
|
||||
case "google-ai":
|
||||
|
@ -231,31 +232,46 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
|
|||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
|
||||
} else if (statusCode === 403) {
|
||||
if (service === "anthropic") {
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
|
||||
return;
|
||||
}
|
||||
switch (errorType) {
|
||||
case "UnrecognizedClientException":
|
||||
// Key is invalid.
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
|
||||
break;
|
||||
case "AccessDeniedException":
|
||||
const isModelAccessError =
|
||||
errorPayload.error?.message?.includes(`specified model ID`);
|
||||
if (!isModelAccessError) {
|
||||
req.log.error(
|
||||
{ key: req.key?.hash, model: req.body?.model },
|
||||
"Disabling key due to AccessDeniedException when invoking model. If credentials are valid, check IAM permissions."
|
||||
switch (service) {
|
||||
case "anthropic":
|
||||
if (
|
||||
errorType === "permission_error" &&
|
||||
errorPayload.error?.message?.toLowerCase().includes("multimodal")
|
||||
) {
|
||||
req.log.warn(
|
||||
{ key: req.key?.hash },
|
||||
"This Anthropic key does not support multimodal prompts."
|
||||
);
|
||||
keyPool.update(req.key!, { allowsMultimodality: false });
|
||||
await reenqueueRequest(req);
|
||||
throw new RetryableError("Claude request re-enqueued because key does not support multimodality.");
|
||||
} else {
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
|
||||
}
|
||||
return;
|
||||
case "aws":
|
||||
switch (errorType) {
|
||||
case "UnrecognizedClientException":
|
||||
// Key is invalid.
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
errorPayload.proxy_note = `Assigned API key is invalid or revoked, please try again.`;
|
||||
break;
|
||||
case "AccessDeniedException":
|
||||
const isModelAccessError =
|
||||
errorPayload.error?.message?.includes(`specified model ID`);
|
||||
if (!isModelAccessError) {
|
||||
req.log.error(
|
||||
{ key: req.key?.hash, model: req.body?.model },
|
||||
"Disabling key due to AccessDeniedException when invoking model. If credentials are valid, check IAM permissions."
|
||||
);
|
||||
keyPool.disable(req.key!, "revoked");
|
||||
}
|
||||
errorPayload.proxy_note = `API key doesn't have access to the requested resource. Model ID: ${req.body?.model}`;
|
||||
break;
|
||||
default:
|
||||
errorPayload.proxy_note = `Received 403 error. Key may be invalid.`;
|
||||
}
|
||||
errorPayload.proxy_note = `API key doesn't have access to the requested resource. Model ID: ${req.body?.model}`;
|
||||
break;
|
||||
default:
|
||||
errorPayload.proxy_note = `Received 403 error. Key may be invalid.`;
|
||||
}
|
||||
} else if (statusCode === 429) {
|
||||
switch (service) {
|
||||
|
|
|
@ -45,10 +45,11 @@ export interface AnthropicKey extends Key, AnthropicKeyUsage {
|
|||
*/
|
||||
isPozzed: boolean;
|
||||
isOverQuota: boolean;
|
||||
allowsMultimodality: boolean;
|
||||
/**
|
||||
* Key billing tier (https://docs.anthropic.com/claude/reference/rate-limits)
|
||||
**/
|
||||
tier: typeof TIER_PRIORITY[number];
|
||||
tier: (typeof TIER_PRIORITY)[number];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -111,6 +112,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
isOverQuota: false,
|
||||
isRevoked: false,
|
||||
isPozzed: false,
|
||||
allowsMultimodality: true,
|
||||
promptCount: 0,
|
||||
lastUsed: 0,
|
||||
rateLimitedAt: 0,
|
||||
|
@ -142,12 +144,20 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
|||
return this.keys.map((k) => Object.freeze({ ...k, key: undefined }));
|
||||
}
|
||||
|
||||
public get(_model: string) {
|
||||
// Currently, all Anthropic keys have access to all models. This will almost
|
||||
// certainly change when they move out of beta later this year.
|
||||
const availableKeys = this.keys.filter((k) => !k.isDisabled);
|
||||
public get(rawModel: string) {
|
||||
this.log.debug({ model: rawModel }, "Selecting key");
|
||||
const needsMultimodal = rawModel.endsWith("-multimodal");
|
||||
|
||||
const availableKeys = this.keys.filter((k) => {
|
||||
return !k.isDisabled && (!needsMultimodal || k.allowsMultimodality);
|
||||
});
|
||||
|
||||
if (availableKeys.length === 0) {
|
||||
throw new PaymentRequiredError("No Anthropic keys available.");
|
||||
throw new PaymentRequiredError(
|
||||
needsMultimodal
|
||||
? "No multimodal Anthropic keys available. Please disable multimodal input (such as inline images) and try again."
|
||||
: "No Anthropic keys available."
|
||||
);
|
||||
}
|
||||
|
||||
// Select a key, from highest priority to lowest priority:
|
||||
|
|
|
@ -41,7 +41,13 @@ export class KeyPool {
|
|||
this.scheduleRecheck();
|
||||
}
|
||||
|
||||
public get(model: string, service?: LLMService): Key {
|
||||
public get(model: string, service?: LLMService, multimodal?: boolean): Key {
|
||||
// hack for some claude requests needing keys with particular permissions
|
||||
// even though they use the same models as the non-multimodal requests
|
||||
if (multimodal) {
|
||||
model += "-multimodal";
|
||||
}
|
||||
|
||||
const queryService = service || this.getServiceForModel(model);
|
||||
return this.getKeyProvider(queryService).get(model);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue