allows enabling vision prompts on a per-service basis

2024-06-07 12:09:43 -05:00 · 2024-06-07 12:09:43 -05:00 · 7660ed8b94
parent 55f1bbed3b
commit 7660ed8b94
3 changed files with 47 additions and 15 deletions
--- a/.env.example
+++ b/.env.example
@ -46,6 +46,14 @@ NODE_ENV=production
 # 'azure-dall-e' to the list of allowed model families.
 # ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,gpt4o,claude,claude-opus,gemini-pro,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o

+# Which services can be used to process prompts containing images via multimodal
+# models. The following services are recognized:
+# openai | anthropic | aws | azure | google-ai | mistral-ai
+# Do not enable this feature unless all users are trusted, as you will be liable
+# for any user-submitted images containing illegal content.
+# By default, no image services are allowed and image prompts are rejected.
+# ALLOWED_IMAGE_SERVICES=
+
 # IP addresses or CIDR blocks from which requests will be blocked.
 # IP_BLACKLIST=10.0.0.1/24
 # URLs from which requests will be blocked.
@ -60,7 +68,7 @@ NODE_ENV=production
 # Avoid short or common phrases as this tests the entire prompt.
 # REJECT_PHRASES="phrase one,phrase two,"phrase three, which has a comma",phrase four"
 # Message to show when requests are rejected.
-# REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
+# REJECT_MESSAGE="You can't say that here."

 # Whether prompts should be logged to Google Sheets.
 # Requires additional setup. See `docs/google-sheets.md` for more information.
@ -102,18 +110,19 @@ NODE_ENV=production
 # ALLOW_NICKNAME_CHANGES=true

 # Default token quotas for each model family. (0 for unlimited)
-# DALL-E "tokens" are counted at a rate of 100000 tokens per US$1.00 generated,
-# which is similar to the cost of GPT-4 Turbo.
-# DALL-E 3 costs around US$0.10 per image (10000 tokens).
-# See `docs/dall-e-configuration.md` for more information.
+# Specify as TOKEN_QUOTA_MODEL_FAMILY=value, replacing dashes with underscores.
 # TOKEN_QUOTA_TURBO=0
 # TOKEN_QUOTA_GPT4=0
 # TOKEN_QUOTA_GPT4_32K=0
 # TOKEN_QUOTA_GPT4_TURBO=0
-# TOKEN_QUOTA_DALL_E=0
 # TOKEN_QUOTA_CLAUDE=0
 # TOKEN_QUOTA_GEMINI_PRO=0
 # TOKEN_QUOTA_AWS_CLAUDE=0
+# "Tokens" for image-generation models are counted at a rate of 100000 tokens
+# per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
+# DALL-E 3 costs around US$0.10 per image (10000 tokens).
+# See `docs/dall-e-configuration.md` for more information.
+# TOKEN_QUOTA_DALL_E=0

 # How often to refresh token quotas. (hourly | daily)
 # Leave unset to never automatically refresh quotas.
--- a/src/config.ts
+++ b/src/config.ts
@ -3,7 +3,7 @@ import dotenv from "dotenv";
 import type firebase from "firebase-admin";
 import path from "path";
 import pino from "pino";
-import type { ModelFamily } from "./shared/models";
+import type { LLMService, ModelFamily } from "./shared/models";
 import { MODEL_FAMILIES } from "./shared/models";

 dotenv.config();
@ -340,13 +340,18 @@ type Config = {
   */
  allowOpenAIToolUsage?: boolean;
  /**
-   * Whether to allow prompts containing images, for use with multimodal models.
-   * Avoid giving this to untrusted users, as they can submit illegal content.
+   * Which services will accept prompts containing images, for use with
+   * multimodal models. Users with `special` role are exempt from this
+   * restriction.
   *
-   * Applies to GPT-4 Vision and Claude Vision. Users with `special` role are
-   * exempt from this restriction.
+   * Do not enable this feature for untrusted users, as malicious users could
+   * send images which violate your provider's terms of service or local laws.
+   *
+   * Defaults to no services, meaning image prompts are disabled. Use a comma-
+   * separated list. Available services are:
+   * openai,anthropic,google-ai,mistral-ai,aws,azure
   */
-  allowImagePrompts?: boolean;
+  allowedVisionServices: LLMService[];
  /**
   * Allows overriding the default proxy endpoint route. Defaults to /proxy.
   * A leading slash is required.
@ -479,7 +484,9 @@ export const config: Config = {
  staticServiceInfo: getEnvWithDefault("STATIC_SERVICE_INFO", false),
  trustedProxies: getEnvWithDefault("TRUSTED_PROXIES", 1),
  allowOpenAIToolUsage: getEnvWithDefault("ALLOW_OPENAI_TOOL_USAGE", false),
-  allowImagePrompts: getEnvWithDefault("ALLOW_IMAGE_PROMPTS", false),
+  allowedVisionServices: parseCsv(
+    getEnvWithDefault("ALLOWED_VISION_SERVICES", "")
+  ) as LLMService[],
  proxyEndpointRoute: getEnvWithDefault("PROXY_ENDPOINT_ROUTE", "/proxy"),
  adminWhitelist: parseCsv(
    getEnvWithDefault("ADMIN_WHITELIST", "0.0.0.0/0,::/0")
@ -536,6 +543,17 @@ export async function assertConfigIsValid() {
    );
  }

+  if (process.env.ALLOW_IMAGE_PROMPTS === "true") {
+    const hasAllowedServices = config.allowedVisionServices.length > 0;
+    if (!hasAllowedServices) {
+      config.allowedVisionServices = ["openai", "anthropic"];
+      startupLogger.warn(
+        { allowedVisionServices: config.allowedVisionServices },
+        "ALLOW_IMAGE_PROMPTS is deprecated. Use ALLOWED_VISION_SERVICES instead."
+      );
+    }
+  }
+
  if (config.promptLogging && !config.promptLoggingBackend) {
    throw new Error(
      "Prompt logging is enabled but no backend is configured. Set PROMPT_LOGGING_BACKEND to 'google_sheets' or 'file'."
--- a/src/proxy/middleware/request/preprocessors/validate-vision.ts
+++ b/src/proxy/middleware/request/preprocessors/validate-vision.ts
@ -9,9 +9,14 @@ import { ForbiddenError } from "../../../../shared/errors";
 * Rejects prompts containing images if multimodal prompts are disabled.
 */
 export const validateVision: RequestPreprocessor = async (req) => {
-  if (config.allowImagePrompts) return;
-  if (req.user?.type === "special") return;
+  if (req.service === undefined) {
+    throw new Error("Request service must be set before validateVision");
+  }

+  if (req.user?.type === "special") return;
+  if (config.allowedVisionServices.includes(req.service)) return;
+
+  // vision not allowed for req's service, block prompts with images
  let hasImage = false;
  switch (req.outboundApi) {
    case "openai":