tries to detect Azure GPT4-Turbo deployments more reliably

2023-12-15 12:14:23 -06:00 · 2023-12-15 12:14:23 -06:00 · 3be2687793
parent 5599a83ae4
commit 3be2687793
1 changed files with 41 additions and 26 deletions
--- a/src/shared/key-management/azure/checker.ts
+++ b/src/shared/key-management/azure/checker.ts
@ -36,34 +36,10 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
  protected async testKeyOrFail(key: AzureOpenAIKey) {
    const model = await this.testModel(key);
-    this.log.info(
+    this.log.info({ key: key.hash, deploymentModel: model }, "Checked key.");
      { key: key.hash, deploymentModel: model },
      "Checked key."
    );
    this.updateKey(key.hash, { modelFamilies: [model] });
  }
  // provided api-key header isn't valid (401)
  // {
  //   "error": {
  //     "code": "401",
  //     "message": "Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource."
  //   }
  // }
  // api key correct but deployment id is wrong (404)
  // {
  //   "error": {
  //     "code": "DeploymentNotFound",
  //     "message": "The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again."
  //   }
  // }
  // resource name is wrong (node will throw ENOTFOUND)
  // rate limited (429)
  // TODO: try to reproduce this
  protected handleAxiosError(key: AzureOpenAIKey, error: AxiosError) {
    if (error.response && AzureOpenAIKeyChecker.errorIsAzureError(error)) {
      const data = error.response.data;
@ -88,6 +64,20 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
            isDisabled: true,
            isRevoked: true,
          });
        case "429":
          this.log.warn(
            { key: key.hash, errorType, error: error.response.data },
            "Key is rate limited. Rechecking key in 1 minute."
          );
          this.updateKey(key.hash, { lastChecked: Date.now() });
          setTimeout(async () => {
            this.log.info(
              { key: key.hash },
              "Rechecking Azure key after rate limit."
            );
            await this.checkKey(key);
          }, 1000 * 60);
          return;
        default:
          this.log.error(
            { key: key.hash, errorType, error: error.response.data, status },
@ -129,7 +119,32 @@ export class AzureOpenAIKeyChecker extends KeyCheckerBase<AzureOpenAIKey> {
      headers: { "Content-Type": "application/json", "api-key": apiKey },
    });
-    return getAzureOpenAIModelFamily(data.model);
+    const family = getAzureOpenAIModelFamily(data.model);
    // Azure returns "gpt-4" even for GPT-4 Turbo, so we need further checks.
    // Otherwise we can use the model family Azure returned.
    if (family !== "azure-gpt4") {
      return family;
    }
    // Try to send an oversized prompt. GPT-4 Turbo can handle this but regular
    // GPT-4 will return a Bad Request error.
    const contextText = {
      max_tokens: 9000,
      stream: false,
      temperature: 0,
      seed: 0,
      messages: [{ role: "user", content: "" }],
    };
    const { data: contextTest, status } = await axios.post(url, contextText, {
      headers: { "Content-Type": "application/json", "api-key": apiKey },
      validateStatus: (status) => status === 400 || status === 200,
    });
    const code = contextTest.error?.code;
    this.log.debug({ code, status }, "Performed Azure GPT4 context size test.");
    if (code === "context_length_exceeded") return "azure-gpt4";
    return "azure-gpt4-turbo";
  }
  static errorIsAzureError(error: AxiosError): error is AxiosError<AzureError> {