diff --git a/docs/azure-configuration.md b/docs/azure-configuration.md
index 2ee172b..5f94581 100644
--- a/docs/azure-configuration.md
+++ b/docs/azure-configuration.md
@@ -17,9 +17,14 @@ AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-
 ```
 
 ## Model assignment
-Note that each Azure deployment is assigned a model when you create it in the Microsoft Cognitive Services portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
+Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
 
 ### Supported model IDs
 Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID.
 
 As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.
+
+## On content filtering
+Be aware that all Azure OpenAI Service deployments have content filtering enabled by default at a Medium level. Prompts or responses which are deemed to be inappropriate will be rejected by the API. This is a feature of the Azure OpenAI Service and not the proxy.
+
+You can disable this from deployment's settings within Azure, but you would need to request an exemption from Microsoft for your organization first. See [this page](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/content-filters) for more information.
diff --git a/scripts/test-concurrency.js b/scripts/test-concurrency.js
new file mode 100644
index 0000000..e5d1b33
--- /dev/null
+++ b/scripts/test-concurrency.js
@@ -0,0 +1,44 @@
+const axios = require("axios");
+
+const concurrentRequests = 5;
+const headers = {
+  Authorization: "Bearer test",
+  "Content-Type": "application/json",
+};
+
+const payload = {
+  model: "gpt-4",
+  max_tokens: 1,
+  stream: false,
+  messages: [{ role: "user", content: "Hi" }],
+};
+
+const makeRequest = async (i) => {
+  try {
+    const response = await axios.post(
+      "http://localhost:7860/proxy/azure/openai/v1/chat/completions",
+      payload,
+      { headers }
+    );
+    console.log(
+      `Req ${i} finished with status code ${response.status} and response:`,
+      response.data
+    );
+  } catch (error) {
+    console.error(`Error in req ${i}:`, error.message);
+  }
+};
+
+const executeRequestsConcurrently = () => {
+  const promises = [];
+  for (let i = 1; i <= concurrentRequests; i++) {
+    console.log(`Starting request ${i}`);
+    promises.push(makeRequest(i));
+  }
+
+  Promise.all(promises).then(() => {
+    console.log("All requests finished");
+  });
+};
+
+executeRequestsConcurrently();
diff --git a/scripts/test_concurrency.ps1 b/scripts/test_concurrency.ps1
deleted file mode 100644
index b802dbe..0000000
--- a/scripts/test_concurrency.ps1
+++ /dev/null
@@ -1,40 +0,0 @@
-$NumThreads = 10
-
-$runspacePool = [runspacefactory]::CreateRunspacePool(1, $NumThreads)
-$runspacePool.Open()
-$runspaces = @()
-
-$headers = @{
-    "Authorization" = "Bearer test"
-    "anthropic-version" = "2023-01-01"
-    "Content-Type" = "application/json"
-}
-
-$payload = @{
-    model = "claude-v2"
-    max_tokens_to_sample = 40
-    temperature = 0
-    stream = $true
-    prompt = "Test prompt, please reply with lorem ipsum`n`n:Assistant:"
-} | ConvertTo-Json
-
-for ($i = 1; $i -le $NumThreads; $i++) {
-    Write-Host "Starting thread $i"
-    $runspace = [powershell]::Create()
-    $runspace.AddScript({
-        param($i, $headers, $payload)
-        $response = Invoke-WebRequest -Uri "http://localhost:7860/proxy/aws/claude/v1/complete" -Method Post -Headers $headers -Body $payload
-        Write-Host "Response from server: $($response.StatusCode)"
-    }).AddArgument($i).AddArgument($headers).AddArgument($payload)
-
-    $runspace.RunspacePool = $runspacePool
-    $runspaces += [PSCustomObject]@{ Pipe = $runspace; Status = $runspace.BeginInvoke() }
-}
-
-$runspaces | ForEach-Object {
-    $_.Pipe.EndInvoke($_.Status)
-    $_.Pipe.Dispose()
-}
-
-$runspacePool.Close()
-$runspacePool.Dispose()
diff --git a/src/proxy/middleware/response/index.ts b/src/proxy/middleware/response/index.ts
index 1dbfb0e..4fea396 100644
--- a/src/proxy/middleware/response/index.ts
+++ b/src/proxy/middleware/response/index.ts
@@ -343,8 +343,10 @@ const handleUpstreamErrors: ProxyResHandlerWithBody = async (
       case "aws":
         handleAwsRateLimitError(req, errorPayload);
         break;
-      case "google-palm":
       case "azure":
+        handleAzureRateLimitError(req, errorPayload);
+        break;
+      case "google-palm":
         errorPayload.proxy_note = `Automatic rate limit retries are not supported for this service. Try again in a few seconds.`;
         break;
       default:
@@ -507,6 +509,22 @@ function handleOpenAIRateLimitError(
   return errorPayload;
 }
 
+function handleAzureRateLimitError(
+  req: Request,
+  errorPayload: ProxiedErrorPayload
+) {
+  const code = errorPayload.error?.code;
+  switch (code) {
+    case "429":
+      keyPool.markRateLimited(req.key!);
+      reenqueueRequest(req);
+      throw new RetryableError("Rate-limited request re-enqueued.");
+    default:
+      errorPayload.proxy_note = `Unrecognized rate limit error from Azure (${code}). Please report this.`;
+      break;
+  }
+}
+
 const incrementUsage: ProxyResHandlerWithBody = async (_proxyRes, req) => {
   if (isTextGenerationRequest(req) || isImageGenerationRequest(req)) {
     const model = req.body.model;
diff --git a/src/proxy/queue.ts b/src/proxy/queue.ts
index a971e30..abe128c 100644
--- a/src/proxy/queue.ts
+++ b/src/proxy/queue.ts
@@ -15,6 +15,8 @@ import crypto from "crypto";
 import type { Handler, Request } from "express";
 import { keyPool } from "../shared/key-management";
 import {
+  getAwsBedrockModelFamily,
+  getAzureOpenAIModelFamily,
   getClaudeModelFamily,
   getGooglePalmModelFamily,
   getOpenAIModelFamily,
@@ -136,11 +138,10 @@ function getPartitionForRequest(req: Request): ModelFamily {
   // they should be treated as separate queues.
   const model = req.body.model ?? "gpt-3.5-turbo";
 
-  // Weird special case for AWS because they serve multiple models from
+  // Weird special case for AWS/Azure because they serve multiple models from
   // different vendors, even if currently only one is supported.
-  if (req.service === "aws") {
-    return "aws-claude";
-  }
+  if (req.service === "aws") return getAwsBedrockModelFamily(model);
+  if (req.service === "azure") return getAzureOpenAIModelFamily(model);
 
   switch (req.outboundApi) {
     case "anthropic":
@@ -221,7 +222,11 @@ function processQueue() {
 
   reqs.filter(Boolean).forEach((req) => {
     if (req?.proceed) {
-      req.log.info({ retries: req.retryCount }, `Dequeuing request.`);
+      const modelFamily = getPartitionForRequest(req!);
+      req.log.info({
+        retries: req.retryCount,
+        partition: modelFamily,
+      }, `Dequeuing request.`);
       req.proceed();
     }
   });
@@ -415,6 +420,7 @@ function initStreaming(req: Request) {
     // Some clients have a broken SSE parser that doesn't handle comments
     // correctly. These clients can pass ?badSseParser=true to
     // disable comments in the SSE stream.
+    res.write(getHeartbeatPayload());
     return;
   }
 
diff --git a/src/shared/key-management/azure/provider.ts b/src/shared/key-management/azure/provider.ts
index 5256b7e..aac5380 100644
--- a/src/shared/key-management/azure/provider.ts
+++ b/src/shared/key-management/azure/provider.ts
@@ -157,8 +157,11 @@ export class AzureOpenAIKeyProvider implements KeyProvider<AzureOpenAIKey> {
 
   // TODO: all of this shit is duplicate code
 
-  public getLockoutPeriod() {
-    const activeKeys = this.keys.filter((k) => !k.isDisabled);
+  public getLockoutPeriod(family: AzureOpenAIModelFamily) {
+    const activeKeys = this.keys.filter(
+      (key) => !key.isDisabled && key.modelFamilies.includes(family)
+    );
+
     // Don't lock out if there are no keys available or the queue will stall.
     // Just let it through so the add-key middleware can throw an error.
     if (activeKeys.length === 0) return 0;
diff --git a/src/shared/key-management/openai/provider.ts b/src/shared/key-management/openai/provider.ts
index db2055d..214ab1e 100644
--- a/src/shared/key-management/openai/provider.ts
+++ b/src/shared/key-management/openai/provider.ts
@@ -276,12 +276,9 @@ export class OpenAIKeyProvider implements KeyProvider<OpenAIKey> {
       (key) => !key.isDisabled && key.modelFamilies.includes(family)
     );
 
-    if (activeKeys.length === 0) {
-      // If there are no active keys for this model we can't fulfill requests.
-      // We'll return 0 to let the request through and return an error,
-      // otherwise the request will be stuck in the queue forever.
-      return 0;
-    }
+    // Don't lock out if there are no keys available or the queue will stall.
+    // Just let it through so the add-key middleware can throw an error.
+    if (activeKeys.length === 0) return 0;
 
     // A key is rate-limited if its `rateLimitedAt` plus the greater of its
     // `rateLimitRequestsReset` and `rateLimitTokensReset` is after the