Encode batches separately

Significantly reduces VRAM. This makes encoding more inline with how decoding currently functions.
2023-08-13 04:16:48 -04:00 · 2023-08-13 04:16:48 -04:00 · 822597db49
parent da80d649fd
commit 822597db49
1 changed files with 9 additions and 1 deletions
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@ -92,7 +92,15 @@ def images_tensor_to_samples(image, approximation=None, model=None):
            model = shared.sd_model
        image = image.to(shared.device, dtype=devices.dtype_vae)
        image = image * 2 - 1
-        x_latent = model.get_first_stage_encoding(model.encode_first_stage(image))
+        if len(image) > 1:
+            x_latent = torch.stack([
+                model.get_first_stage_encoding(
+                    model.encode_first_stage(torch.unsqueeze(img, 0))
+                )[0]
+                for img in image
+            ])
+        else:
+            x_latent = model.get_first_stage_encoding(model.encode_first_stage(image))

    return x_latent