performance: check for nans in unet only once, after all steps have been completed
This commit is contained in:
parent
41ee2db5a8
commit
6214aa7d2a
|
@ -625,6 +625,9 @@ class DecodedSamples(list):
|
|||
def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
|
||||
samples = DecodedSamples()
|
||||
|
||||
if check_for_nans:
|
||||
devices.test_for_nans(batch, "unet")
|
||||
|
||||
for i in range(batch.shape[0]):
|
||||
sample = decode_first_stage(model, batch[i:i + 1])[0]
|
||||
|
||||
|
@ -987,6 +990,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
|||
if getattr(samples_ddim, 'already_decoded', False):
|
||||
x_samples_ddim = samples_ddim
|
||||
else:
|
||||
devices.test_for_nans(samples_ddim, "unet")
|
||||
|
||||
if opts.sd_vae_decode_method != 'Full':
|
||||
p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
|
||||
x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
|
||||
|
|
|
@ -273,8 +273,6 @@ class CFGDenoiser(torch.nn.Module):
|
|||
denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model)
|
||||
cfg_denoised_callback(denoised_params)
|
||||
|
||||
devices.test_for_nans(x_out, "unet")
|
||||
|
||||
if is_edit_model:
|
||||
denoised = self.combine_denoised_for_edit_model(x_out, cond_scale)
|
||||
elif skip_uncond:
|
||||
|
|
Loading…
Reference in New Issue