From 0e98529365477a4f240b2ac67d94ff59235144c5 Mon Sep 17 00:00:00 2001
From: huchenlei <chenlei.hu@mail.utoronto.ca>
Date: Wed, 15 May 2024 15:46:53 -0400
Subject: [PATCH] Replace einops.rearrange with torch native

---
 modules/sd_hijack_optimizations.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 7f9e328d0..4c2dc56d4 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -486,7 +486,19 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
     k_in = self.to_k(context_k)
     v_in = self.to_v(context_v)
 
-    q, k, v = (rearrange(t, 'b n (h d) -> b n h d', h=h) for t in (q_in, k_in, v_in))
+    def _reshape(t):
+        """rearrange(t, 'b n (h d) -> b n h d', h=h).
+        Using torch native operations to avoid overhead as this function is
+        called frequently. (70 times/it for SDXL)
+        """
+        b, n, _ = t.shape  # Get the batch size (b) and sequence length (n)
+        d = t.shape[2] // h  # Determine the depth per head
+        return t.reshape(b, n, h, d)
+
+    q = _reshape(q_in)
+    k = _reshape(k_in)
+    v = _reshape(v_in)
+
     del q_in, k_in, v_in
 
     dtype = q.dtype
@@ -497,7 +509,9 @@ def xformers_attention_forward(self, x, context=None, mask=None, **kwargs):
 
     out = out.to(dtype)
 
-    out = rearrange(out, 'b n h d -> b n (h d)', h=h)
+    # out = rearrange(out, 'b n h d -> b n (h d)', h=h)
+    b, n, h, d = out.shape
+    out = out.reshape(b, n, h * d)
     return self.to_out(out)