fixes kobold key selection, I think

2023-04-10 19:50:42 -07:00 · 2023-04-10 19:50:42 -07:00 · 885487a61d
parent f37a2fa7fa
commit 885487a61d
4 changed files with 68 additions and 18 deletions
--- a/src/key-management/index.ts
+++ b/src/key-management/index.ts
@ -1,4 +1,5 @@
 import { KeyPool } from "./key-pool";
-export type { Key } from "./key-pool";
+export type { Key, Model } from "./key-pool";
 export const keyPool = new KeyPool();
 export { SUPPORTED_MODELS } from "./key-pool";
--- a/src/key-management/key-pool.ts
+++ b/src/key-management/key-pool.ts
@ -6,6 +6,27 @@ import { config } from "../config";
 import { logger } from "../logger";
 import { KeyChecker } from "./key-checker";
 // I made too many assumptions about OpenAI being the only provider and now this
 // is a mess with the addition of Anthropic. Server will have to be restricted
 // to operating on one provider at a time until I can refactor this to use
 // some KeyProvider interface.
 // TODO: Move this stuff somewhere else, it's not key management.
 export type Model = OpenAIModel | AnthropicModel;
 export type OpenAIModel =
 | "gpt-3.5-turbo"
 | "gpt-4"
 export type AnthropicModel =
 | "claude-v1"
 | "claude-instant-v1"
 export const SUPPORTED_MODELS: readonly Model[] = [
  "gpt-3.5-turbo",
  "gpt-4",
  "claude-v1",
  "claude-instant-v1",
 ] as const;
 export type Key = {
  /** The OpenAI API key itself. */
  key: string;
@ -91,32 +112,29 @@ export class KeyPool {
    });
  }
-  public get(model: string) {
+  public get(model: Model) {
-    const needsGpt4Key = model.startsWith("gpt-4");
+    const needGpt4 = model.startsWith("gpt-4");
    const availableKeys = this.keys
-      .filter((key) => !key.isDisabled && (!needsGpt4Key || key.isGpt4))
+      .filter((key) => !key.isDisabled && (!needGpt4 || key.isGpt4))
      .sort((a, b) => a.lastUsed - b.lastUsed);
    if (availableKeys.length === 0) {
      let message = "No keys available. Please add more keys.";
-      if (needsGpt4Key) {
+      if (needGpt4) {
        message =
-          "No GPT-4 keys available. Please add more keys or use a non-GPT-4 model.";
+          "No GPT-4 keys available. Please add more keys or select a non-GPT-4 model.";
      }
      this.log.error(message);
      throw new Error(message);
    }
    // Prioritize trial keys
    const trialKeys = availableKeys.filter((key) => key.isTrial);
    if (trialKeys.length > 0) {
      this.log.info({ key: trialKeys[0].hash }, "Using trial key");
      trialKeys[0].lastUsed = Date.now();
      return trialKeys[0];
    }
    // Otherwise, return the oldest key
    const oldestKey = availableKeys[0];
    this.log.info({ key: oldestKey.hash }, "Assigning key to request.");
    oldestKey.lastUsed = Date.now();
    return { ...oldestKey };
  }
--- a/src/proxy/rewriters/add-key.ts
+++ b/src/proxy/rewriters/add-key.ts
@ -1,10 +1,45 @@
 import type { ExpressHttpProxyReqCallback } from ".";
-import { Key, keyPool } from "../../key-management";
+import { Key, Model, keyPool, SUPPORTED_MODELS } from "../../key-management";
 /** Add an OpenAI key from the pool to the request. */
 export const addKey: ExpressHttpProxyReqCallback = (proxyReq, req) => {
  let assignedKey: Key;
-  assignedKey = keyPool.get(req.body?.model || "gpt-3.5")!;
+
  // Not all clients request a particular model.
  // If they request a model, just use that.
  // If they don't request a model, use a GPT-4 key if there is an active one,
  // otherwise use a GPT-3.5 key.
  // TODO: Anthropic mode should prioritize Claude over Claude Instant.
  // Each provider needs to define some priority order for their models.
  if (bodyHasModel(req.body)) {
    assignedKey = keyPool.get(req.body.model);
  } else {
    try {
      assignedKey = keyPool.get("gpt-4");
    } catch {
      assignedKey = keyPool.get("gpt-3.5-turbo");
    }
  }
  req.key = assignedKey;
  req.log.info(
    {
      key: assignedKey.hash,
      model: req.body?.model,
      isGpt4: assignedKey.isGpt4,
    },
    "Assigned key to request"
  );
  // TODO: Requests to Anthropic models use `X-API-Key`.
  proxyReq.setHeader("Authorization", `Bearer ${assignedKey.key}`);
 };
 function bodyHasModel(body: any): body is { model: Model } {
  // Model names can have suffixes indicating the frozen release version but
  // OpenAI and Anthropic will use the latest version if you omit the suffix.
  const isSupportedModel = (model: string) =>
    SUPPORTED_MODELS.some((supported) => model.startsWith(supported));
  return typeof body?.model === "string" && isSupportedModel(body.model);
 }
--- a/src/proxy/rewriters/transform-kobold-payload.ts
+++ b/src/proxy/rewriters/transform-kobold-payload.ts
@ -75,14 +75,10 @@ export const transformKoboldPayload: ExpressHttpProxyReqCallback = (
    { role: "user", content: lastLine },
  ];
-  // Kobold doesn't select a model. If we were assigned a key that supports
+  // Kobold doesn't select a model. If the addKey rewriter assigned us a GPT-4
-  // gpt4, use it, otherwise use gpt3.5-turbo. If the key was incorrectly
+  // key, use that. Otherwise, use GPT-3.5-turbo.
  // assigned, we'll get an error from OpenAI but the key will be downgraded
  // for the next request.
-  // const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo"; //TODO: this is fucked, fix it later
+  const model = req.key!.isGpt4 ? "gpt-4" : "gpt-3.5-turbo";
  const model = "gpt-3.5-turbo";
  const newBody = {
    model,
    temperature,