tries to detect Azure GPT4-Turbo deployments more reliably

2023-12-15 12:14:23 -06:00 · 2023-12-15 12:14:23 -06:00 · 3be2687793
parent 5599a83ae4
commit 3be2687793
1 changed files with 41 additions and 26 deletions
--- a/src/shared/key-management/azure/checker.ts
+++ b/src/shared/key-management/azure/checker.ts
@ -36,34 +36,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {

  protected async testKeyOrFail(key: AzureOpenAIKey) {
    const model = await this.testModel(key);
-    this.log.info(
-      { key: key.hash, deploymentModel: model },
-      "Checked key."
-    );
+    this.log.info({ key: key.hash, deploymentModel: model }, "Checked key.");
    this.updateKey(key.hash, { modelFamilies: [model] });
  }

-  // provided api-key header isn't valid (401)
-  // {
-  //   "error": {
-  //     "code": "401",
-  //     "message": "Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource."
-  //   }
-  // }
-
-  // api key correct but deployment id is wrong (404)
-  // {
-  //   "error": {
-  //     "code": "DeploymentNotFound",
-  //     "message": "The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again."
-  //   }
-  // }
-
-  // resource name is wrong (node will throw ENOTFOUND)
-
-  // rate limited (429)
-  // TODO: try to reproduce this
-
  protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
    if (error.response && AzureOpenAIKeyChecker.errorIsAzureError(error)) {
      const data = error.response.data;
@ -88,6 +64,20 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
            isDisabled: true,
            isRevoked: true,
          });
+        case "429":
+          this.log.warn(
+            { key: key.hash, errorType, error: error.response.data },
+            "Key is rate limited. Rechecking key in 1 minute."
+          );
+          this.updateKey(key.hash, { lastChecked: Date.now() });
+          setTimeout(async () => {
+            this.log.info(
+              { key: key.hash },
+              "Rechecking Azure key after rate limit."
+            );
+            await this.checkKey(key);
+          }, 1000 * 60);
+          return;
        default:
          this.log.error(
            { key: key.hash, errorType, error: error.response.data, status },
@ -129,7 +119,32 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
      headers: { "Content-Type": "application/json", "api-key": apiKey },
    });

-    return getAzureOpenAIModelFamily(data.model);
+    const family = getAzureOpenAIModelFamily(data.model);
+
+    // Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
+    // Otherwise we can use the model family Azure returned.
+    if (family !== "azure-gpt4") {
+      return family;
+    }
+
+    // Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
+    // GPT-4 will return a Bad Request error.
+    const contextText = {
+      max_tokens: 9000,
+      stream: false,
+      temperature: 0,
+      seed: 0,
+      messages: [{ role: "user", content: "" }],
+    };
+    const { data: contextTest, status } = await axios.post(url, contextText, {
+      headers: { "Content-Type": "application/json", "api-key": apiKey },
+      validateStatus: (status) => status === 400 || status === 200,
+    });
+    const code = contextTest.error?.code;
+    this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");
+
+    if (code === "context_length_exceeded") return "azure-gpt4";
+    return "azure-gpt4-turbo";
  }

  static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {