fixes kobold key selection, I think

2023-04-10 19:50:42 -07:00 · 2023-04-10 19:50:42 -07:00 · 885487a61d
parent f37a2fa7fa
commit 885487a61d
4 changed files with 68 additions and 18 deletions
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@ -1,4 +1,5 @@
 import { KeyPool } from "./key-pool";

-export type { Key } from "./key-pool";
+export type { Key, Model } from "./key-pool";
 export const keyPool = new KeyPool();
+export { SUPPORTED_MODELS } from "./key-pool";
--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@ -6,6 +6,27 @@ import { config } from "../config";
 import { logger } from "../logger";
 import { KeyChecker } from "./key-checker";

+// I made too many assumptions about OpenAI being the only provider and now this
+// is a mess with the addition of Anthropic. Server will have to be restricted
+// to operating on one provider at a time until I can refactor this to use
+// some KeyProvider interface.
+
+// TODO: Move this stuff somewhere else, it's not key management.
+export type Model = OpenAIModel | AnthropicModel;
+export type OpenAIModel =
+| "gpt-3.5-turbo"
+| "gpt-4"
+export type AnthropicModel =
+| "claude-v1"
+| "claude-instant-v1"
+export const SUPPORTED_MODELS: readonly Model[] = [
+  "gpt-3.5-turbo",
+  "gpt-4",
+  "claude-v1",
+  "claude-instant-v1",
+] as const;
+  
+
 export type Key = {
  /** The OpenAI API key itself. */
  key: string;
@ -91,32 +112,29 @@ export class KeyPool {
    });
  }

-  public get(model: string) {
-    const needsGpt4Key = model.startsWith("gpt-4");
+  public get(model: Model) {
+    const needGpt4 = model.startsWith("gpt-4");
    const availableKeys = this.keys
-      .filter((key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4))
+      .filter((key) => !key.isDisabled && (!needGpt4 || key.isGpt4))
      .sort((a, b) => a.lastUsed - b.lastUsed);
    if (availableKeys.length === 0) {
      let message = "No keys available. Please add more keys.";
-      if (needsGpt4Key) {
+      if (needGpt4) {
        message =
-          "No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
+          "No GPT-4 keys available. Please add more keys or select a non-GPT-4 model.";
      }
-      this.log.error(message);
      throw new Error(message);
    }

    // Prioritize trial keys
    const trialKeys = availableKeys.filter((key) => key.isTrial);
    if (trialKeys.length > 0) {
-      this.log.info({ key: trialKeys[0].hash }, "Using trial key");
      trialKeys[0].lastUsed = Date.now();
      return trialKeys[0];
    }

    // Otherwise, return the oldest key
    const oldestKey = availableKeys[0];
-    this.log.info({ key: oldestKey.hash }, "Assigning key to request.");
    oldestKey.lastUsed = Date.now();
    return { ...oldestKey };
  }
--- a/src/proxy/rewriters/add-key.ts
+++ b/src/proxy/rewriters/add-key.ts
@ -1,10 +1,45 @@
 import type { ExpressHttpProxyReqCallback } from ".";
-import { Key, keyPool } from "../../key-management";
+import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management";

 /** Add an OpenAI key from the pool to the request. */
 export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
  let assignedKey: Key;
-  assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!;
+
+  // Not all clients request a particular model.
+  // If they request a model, just use that.
+  // If they don't request a model, use a GPT-4 key if there is an active one,
+  // otherwise use a GPT-3.5 key.
+
+  // TODO: Anthropic mode should prioritize Claude over Claude Instant.
+  // Each provider needs to define some priority order for their models.
+
+  if (bodyHasModel(req.body)) {
+    assignedKey = keyPool.get(req.body.model);
+  } else {
+    try {
+      assignedKey = keyPool.get("gpt-4");
+    } catch {
+      assignedKey = keyPool.get("gpt-3.5-turbo");
+    }
+  }
  req.key = assignedKey;
+  req.log.info(
+    {
+      key: assignedKey.hash,
+      model: req.body?.model,
+      isGpt4: assignedKey.isGpt4,
+    },
+    "Assigned key to request"
+  );
+
+  // TODO: Requests to Anthropic models use `X-API-Key`.
  proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
 };
+
+function bodyHasModel(body: any): body is { model: Model } {
+  // Model names can have suffixes indicating the frozen release version but
+  // OpenAI and Anthropic will use the latest version if you omit the suffix.
+  const isSupportedModel = (model: string) =>
+    SUPPORTED_MODELS.some((supported) => model.startsWith(supported));
+  return typeof body?.model === "string" && isSupportedModel(body.model);
+}
--- a/src/proxy/rewriters/transform-kobold-payload.ts
+++ b/src/proxy/rewriters/transform-kobold-payload.ts
@ -75,14 +75,10 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = (
    { role: "user", content: lastLine },
  ];

-  // Kobold doesn't select a model. If we were assigned a key that supports
-  // gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly
-  // assigned, we'll get an error from OpenAI but the key will be downgraded
-  // for the next request.
+  // Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4
+  // key, use that. Otherwise, use GPT-3.5-turbo.

-  // const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; //TODO: this is fucked, fix it later
-
-  const model = "gpt-3.5-turbo";
+  const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo";
  const newBody = {
    model,
    temperature,