From 822597db49218de17e105e62075096284dfcfd41 Mon Sep 17 00:00:00 2001 From: catboxanon <122327233+catboxanon@users.noreply.github.com> Date: Sun, 13 Aug 2023 04:16:48 -0400 Subject: [PATCH] Encode batches separately Significantly reduces VRAM. This makes encoding more inline with how decoding currently functions. --- modules/sd_samplers_common.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 09d1e11e3..f9d034ca1 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -92,7 +92,15 @@ def images_tensor_to_samples(image, approximation=None, model=None): model = shared.sd_model image = image.to(shared.device, dtype=devices.dtype_vae) image = image * 2 - 1 - x_latent = model.get_first_stage_encoding(model.encode_first_stage(image)) + if len(image) > 1: + x_latent = torch.stack([ + model.get_first_stage_encoding( + model.encode_first_stage(torch.unsqueeze(img, 0)) + )[0] + for img in image + ]) + else: + x_latent = model.get_first_stage_encoding(model.encode_first_stage(image)) return x_latent