add CFG denoiser implementation for DDIM, PLMS and UniPC (this is the commit when you can run both old and new implementations to compare them)

This commit is contained in:
AUTOMATIC1111 2023-08-08 19:20:11 +03:00
parent 2d8e4a6544
commit 8285a149d8
6 changed files with 456 additions and 173 deletions

View File

@ -1,4 +1,4 @@
from modules import sd_samplers_compvis, sd_samplers_kdiffusion, shared from modules import sd_samplers_compvis, sd_samplers_kdiffusion, sd_samplers_timesteps, shared
# imports for functions that previously were here and are used by other modules # imports for functions that previously were here and are used by other modules
from modules.sd_samplers_common import samples_to_image_grid, sample_to_image # noqa: F401 from modules.sd_samplers_common import samples_to_image_grid, sample_to_image # noqa: F401
@ -6,6 +6,7 @@ from modules.sd_samplers_common import samples_to_image_grid, sample_to_image #
all_samplers = [ all_samplers = [
*sd_samplers_kdiffusion.samplers_data_k_diffusion, *sd_samplers_kdiffusion.samplers_data_k_diffusion,
*sd_samplers_compvis.samplers_data_compvis, *sd_samplers_compvis.samplers_data_compvis,
*sd_samplers_timesteps.samplers_data_timesteps,
] ]
all_samplers_map = {x.name: x for x in all_samplers} all_samplers_map = {x.name: x for x in all_samplers}

View File

@ -39,7 +39,7 @@ class CFGDenoiser(torch.nn.Module):
negative prompt. negative prompt.
""" """
def __init__(self, model): def __init__(self, model, sampler):
super().__init__() super().__init__()
self.inner_model = model self.inner_model = model
self.mask = None self.mask = None
@ -48,6 +48,7 @@ class CFGDenoiser(torch.nn.Module):
self.step = 0 self.step = 0
self.image_cfg_scale = None self.image_cfg_scale = None
self.padded_cond_uncond = False self.padded_cond_uncond = False
self.sampler = sampler
def combine_denoised(self, x_out, conds_list, uncond, cond_scale): def combine_denoised(self, x_out, conds_list, uncond, cond_scale):
denoised_uncond = x_out[-uncond.shape[0]:] denoised_uncond = x_out[-uncond.shape[0]:]
@ -65,6 +66,9 @@ class CFGDenoiser(torch.nn.Module):
return denoised return denoised
def get_pred_x0(self, x_in, x_out, sigma):
return x_out
def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
if state.interrupted or state.skipped: if state.interrupted or state.skipped:
raise sd_samplers_common.InterruptedException raise sd_samplers_common.InterruptedException
@ -78,6 +82,9 @@ class CFGDenoiser(torch.nn.Module):
assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)" assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
if self.mask is not None:
x = self.init_latent * self.mask + self.nmask * x
batch_size = len(conds_list) batch_size = len(conds_list)
repeats = [len(conds_list[i]) for i in range(batch_size)] repeats = [len(conds_list[i]) for i in range(batch_size)]
@ -170,11 +177,6 @@ class CFGDenoiser(torch.nn.Module):
devices.test_for_nans(x_out, "unet") devices.test_for_nans(x_out, "unet")
if opts.live_preview_content == "Prompt":
sd_samplers_common.store_latent(torch.cat([x_out[i:i+1] for i in denoised_image_indexes]))
elif opts.live_preview_content == "Negative prompt":
sd_samplers_common.store_latent(x_out[-uncond.shape[0]:])
if is_edit_model: if is_edit_model:
denoised = self.combine_denoised_for_edit_model(x_out, cond_scale) denoised = self.combine_denoised_for_edit_model(x_out, cond_scale)
elif skip_uncond: elif skip_uncond:
@ -182,8 +184,16 @@ class CFGDenoiser(torch.nn.Module):
else: else:
denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale) denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
if self.mask is not None: self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
denoised = self.init_latent * self.mask + self.nmask * denoised
if opts.live_preview_content == "Prompt":
preview = self.sampler.last_latent
elif opts.live_preview_content == "Negative prompt":
preview = self.get_pred_x0(x_in[-uncond.shape[0]:], x_out[-uncond.shape[0]:], sigma)
else:
preview = self.get_pred_x0(torch.cat([x_in[i:i+1] for i in denoised_image_indexes]), torch.cat([denoised[i:i+1] for i in denoised_image_indexes]), sigma)
sd_samplers_common.store_latent(preview)
after_cfg_callback_params = AfterCFGCallbackParams(denoised, state.sampling_step, state.sampling_steps) after_cfg_callback_params = AfterCFGCallbackParams(denoised, state.sampling_step, state.sampling_steps)
cfg_after_cfg_callback(after_cfg_callback_params) cfg_after_cfg_callback(after_cfg_callback_params)
@ -192,27 +202,3 @@ class CFGDenoiser(torch.nn.Module):
self.step += 1 self.step += 1
return denoised return denoised
class TorchHijack:
def __init__(self, sampler_noises):
# Using a deque to efficiently receive the sampler_noises in the same order as the previous index-based
# implementation.
self.sampler_noises = deque(sampler_noises)
def __getattr__(self, item):
if item == 'randn_like':
return self.randn_like
if hasattr(torch, item):
return getattr(torch, item)
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
def randn_like(self, x):
if self.sampler_noises:
noise = self.sampler_noises.popleft()
if noise.shape == x.shape:
return noise
return devices.randn_like(x)

View File

@ -1,9 +1,11 @@
from collections import namedtuple import inspect
from collections import namedtuple, deque
import numpy as np import numpy as np
import torch import torch
from PIL import Image from PIL import Image
from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared
from modules.shared import opts, state from modules.shared import opts, state
import k_diffusion.sampling
SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options']) SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
@ -127,3 +129,139 @@ def replace_torchsde_browinan():
replace_torchsde_browinan() replace_torchsde_browinan()
class TorchHijack:
def __init__(self, sampler_noises):
# Using a deque to efficiently receive the sampler_noises in the same order as the previous index-based
# implementation.
self.sampler_noises = deque(sampler_noises)
def __getattr__(self, item):
if item == 'randn_like':
return self.randn_like
if hasattr(torch, item):
return getattr(torch, item)
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
def randn_like(self, x):
if self.sampler_noises:
noise = self.sampler_noises.popleft()
if noise.shape == x.shape:
return noise
return devices.randn_like(x)
class Sampler:
def __init__(self, funcname):
self.funcname = funcname
self.func = funcname
self.extra_params = []
self.sampler_noises = None
self.stop_at = None
self.eta = None
self.config = None # set by the function calling the constructor
self.last_latent = None
self.s_min_uncond = None
self.s_churn = 0.0
self.s_tmin = 0.0
self.s_tmax = float('inf')
self.s_noise = 1.0
self.eta_option_field = 'eta_ancestral'
self.eta_infotext_field = 'Eta'
self.conditioning_key = shared.sd_model.model.conditioning_key
self.model_wrap = None
self.model_wrap_cfg = None
def callback_state(self, d):
step = d['i']
if self.stop_at is not None and step > self.stop_at:
raise InterruptedException
state.sampling_step = step
shared.total_tqdm.update()
def launch_sampling(self, steps, func):
state.sampling_steps = steps
state.sampling_step = 0
try:
return func()
except RecursionError:
print(
'Encountered RecursionError during sampling, returning last latent. '
'rho >5 with a polyexponential scheduler may cause this error. '
'You should try to use a smaller rho value instead.'
)
return self.last_latent
except InterruptedException:
return self.last_latent
def number_of_needed_noises(self, p):
return p.steps
def initialize(self, p) -> dict:
self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
self.model_wrap_cfg.step = 0
self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
self.eta = p.eta if p.eta is not None else getattr(opts, self.eta_option_field, 0.0)
self.s_min_uncond = getattr(p, 's_min_uncond', 0.0)
k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else [])
extra_params_kwargs = {}
for param_name in self.extra_params:
if hasattr(p, param_name) and param_name in inspect.signature(self.func).parameters:
extra_params_kwargs[param_name] = getattr(p, param_name)
if 'eta' in inspect.signature(self.func).parameters:
if self.eta != 1.0:
p.extra_generation_params[self.eta_infotext_field] = self.eta
extra_params_kwargs['eta'] = self.eta
if len(self.extra_params) > 0:
s_churn = getattr(opts, 's_churn', p.s_churn)
s_tmin = getattr(opts, 's_tmin', p.s_tmin)
s_tmax = getattr(opts, 's_tmax', p.s_tmax) or self.s_tmax # 0 = inf
s_noise = getattr(opts, 's_noise', p.s_noise)
if s_churn != self.s_churn:
extra_params_kwargs['s_churn'] = s_churn
p.s_churn = s_churn
p.extra_generation_params['Sigma churn'] = s_churn
if s_tmin != self.s_tmin:
extra_params_kwargs['s_tmin'] = s_tmin
p.s_tmin = s_tmin
p.extra_generation_params['Sigma tmin'] = s_tmin
if s_tmax != self.s_tmax:
extra_params_kwargs['s_tmax'] = s_tmax
p.s_tmax = s_tmax
p.extra_generation_params['Sigma tmax'] = s_tmax
if s_noise != self.s_noise:
extra_params_kwargs['s_noise'] = s_noise
p.s_noise = s_noise
p.extra_generation_params['Sigma noise'] = s_noise
return extra_params_kwargs
def create_noise_sampler(self, x, sigmas, p):
"""For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes"""
if shared.opts.no_dpmpp_sde_batch_determinism:
return None
from k_diffusion.sampling import BrownianTreeNoiseSampler
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
current_iter_seeds = p.all_seeds[p.iteration * p.batch_size:(p.iteration + 1) * p.batch_size]
return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=current_iter_seeds)

View File

@ -4,8 +4,7 @@ import inspect
import k_diffusion.sampling import k_diffusion.sampling
from modules import devices, sd_samplers_common, sd_samplers_extra, sd_samplers_cfg_denoiser from modules import devices, sd_samplers_common, sd_samplers_extra, sd_samplers_cfg_denoiser
from modules.processing import StableDiffusionProcessing from modules.shared import opts
from modules.shared import opts, state
import modules.shared as shared import modules.shared as shared
samplers_k_diffusion = [ samplers_k_diffusion = [
@ -54,133 +53,17 @@ k_diffusion_scheduler = {
} }
class TorchHijack: class KDiffusionSampler(sd_samplers_common.Sampler):
def __init__(self, sampler_noises):
# Using a deque to efficiently receive the sampler_noises in the same order as the previous index-based
# implementation.
self.sampler_noises = deque(sampler_noises)
def __getattr__(self, item):
if item == 'randn_like':
return self.randn_like
if hasattr(torch, item):
return getattr(torch, item)
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
def randn_like(self, x):
if self.sampler_noises:
noise = self.sampler_noises.popleft()
if noise.shape == x.shape:
return noise
return devices.randn_like(x)
class KDiffusionSampler:
def __init__(self, funcname, sd_model): def __init__(self, funcname, sd_model):
denoiser = k_diffusion.external.CompVisVDenoiser if sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
self.model_wrap = denoiser(sd_model, quantize=shared.opts.enable_quantization) super().__init__(funcname)
self.funcname = funcname
self.func = funcname if callable(funcname) else getattr(k_diffusion.sampling, self.funcname)
self.extra_params = sampler_extra_params.get(funcname, []) self.extra_params = sampler_extra_params.get(funcname, [])
self.model_wrap_cfg = sd_samplers_cfg_denoiser.CFGDenoiser(self.model_wrap) self.func = funcname if callable(funcname) else getattr(k_diffusion.sampling, self.funcname)
self.sampler_noises = None
self.stop_at = None
self.eta = None
self.config = None # set by the function calling the constructor
self.last_latent = None
self.s_min_uncond = None
# NOTE: These are also defined in the StableDiffusionProcessing class. denoiser = k_diffusion.external.CompVisVDenoiser if sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
# They should have been here to begin with but we're going to self.model_wrap = denoiser(sd_model, quantize=shared.opts.enable_quantization)
# leave that class __init__ signature alone. self.model_wrap_cfg = sd_samplers_cfg_denoiser.CFGDenoiser(self.model_wrap, self)
self.s_churn = 0.0
self.s_tmin = 0.0
self.s_tmax = float('inf')
self.s_noise = 1.0
self.conditioning_key = sd_model.model.conditioning_key
def callback_state(self, d):
step = d['i']
latent = d["denoised"]
if opts.live_preview_content == "Combined":
sd_samplers_common.store_latent(latent)
self.last_latent = latent
if self.stop_at is not None and step > self.stop_at:
raise sd_samplers_common.InterruptedException
state.sampling_step = step
shared.total_tqdm.update()
def launch_sampling(self, steps, func):
state.sampling_steps = steps
state.sampling_step = 0
try:
return func()
except RecursionError:
print(
'Encountered RecursionError during sampling, returning last latent. '
'rho >5 with a polyexponential scheduler may cause this error. '
'You should try to use a smaller rho value instead.'
)
return self.last_latent
except sd_samplers_common.InterruptedException:
return self.last_latent
def number_of_needed_noises(self, p):
return p.steps
def initialize(self, p: StableDiffusionProcessing):
self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
self.model_wrap_cfg.step = 0
self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
self.eta = p.eta if p.eta is not None else opts.eta_ancestral
self.s_min_uncond = getattr(p, 's_min_uncond', 0.0)
k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else [])
extra_params_kwargs = {}
for param_name in self.extra_params:
if hasattr(p, param_name) and param_name in inspect.signature(self.func).parameters:
extra_params_kwargs[param_name] = getattr(p, param_name)
if 'eta' in inspect.signature(self.func).parameters:
if self.eta != 1.0:
p.extra_generation_params["Eta"] = self.eta
extra_params_kwargs['eta'] = self.eta
if len(self.extra_params) > 0:
s_churn = getattr(opts, 's_churn', p.s_churn)
s_tmin = getattr(opts, 's_tmin', p.s_tmin)
s_tmax = getattr(opts, 's_tmax', p.s_tmax) or self.s_tmax # 0 = inf
s_noise = getattr(opts, 's_noise', p.s_noise)
if s_churn != self.s_churn:
extra_params_kwargs['s_churn'] = s_churn
p.s_churn = s_churn
p.extra_generation_params['Sigma churn'] = s_churn
if s_tmin != self.s_tmin:
extra_params_kwargs['s_tmin'] = s_tmin
p.s_tmin = s_tmin
p.extra_generation_params['Sigma tmin'] = s_tmin
if s_tmax != self.s_tmax:
extra_params_kwargs['s_tmax'] = s_tmax
p.s_tmax = s_tmax
p.extra_generation_params['Sigma tmax'] = s_tmax
if s_noise != self.s_noise:
extra_params_kwargs['s_noise'] = s_noise
p.s_noise = s_noise
p.extra_generation_params['Sigma noise'] = s_noise
return extra_params_kwargs
def get_sigmas(self, p, steps): def get_sigmas(self, p, steps):
discard_next_to_last_sigma = self.config is not None and self.config.options.get('discard_next_to_last_sigma', False) discard_next_to_last_sigma = self.config is not None and self.config.options.get('discard_next_to_last_sigma', False)
@ -232,22 +115,12 @@ class KDiffusionSampler:
return sigmas return sigmas
def create_noise_sampler(self, x, sigmas, p):
"""For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes"""
if shared.opts.no_dpmpp_sde_batch_determinism:
return None
from k_diffusion.sampling import BrownianTreeNoiseSampler
sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
current_iter_seeds = p.all_seeds[p.iteration * p.batch_size:(p.iteration + 1) * p.batch_size]
return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=current_iter_seeds)
def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps) steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps)
sigmas = self.get_sigmas(p, steps) sigmas = self.get_sigmas(p, steps)
sigma_sched = sigmas[steps - t_enc - 1:] sigma_sched = sigmas[steps - t_enc - 1:]
xi = x + noise * sigma_sched[0] xi = x + noise * sigma_sched[0]
extra_params_kwargs = self.initialize(p) extra_params_kwargs = self.initialize(p)
@ -296,12 +169,14 @@ class KDiffusionSampler:
extra_params_kwargs = self.initialize(p) extra_params_kwargs = self.initialize(p)
parameters = inspect.signature(self.func).parameters parameters = inspect.signature(self.func).parameters
if 'n' in parameters:
extra_params_kwargs['n'] = steps
if 'sigma_min' in parameters: if 'sigma_min' in parameters:
extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item() extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item()
extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item() extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item()
if 'n' in parameters:
extra_params_kwargs['n'] = steps if 'sigmas' in parameters:
else:
extra_params_kwargs['sigmas'] = sigmas extra_params_kwargs['sigmas'] = sigmas
if self.config.options.get('brownian_noise', False): if self.config.options.get('brownian_noise', False):
@ -322,3 +197,4 @@ class KDiffusionSampler:
return samples return samples

View File

@ -0,0 +1,147 @@
import torch
import inspect
from modules import devices, sd_samplers_common, sd_samplers_timesteps_impl
from modules.sd_samplers_cfg_denoiser import CFGDenoiser
from modules.shared import opts
import modules.shared as shared
samplers_timesteps = [
('k_DDIM', sd_samplers_timesteps_impl.ddim, ['k_ddim'], {}),
('k_PLMS', sd_samplers_timesteps_impl.plms, ['k_plms'], {}),
('k_UniPC', sd_samplers_timesteps_impl.unipc, ['k_unipc'], {}),
]
samplers_data_timesteps = [
sd_samplers_common.SamplerData(label, lambda model, funcname=funcname: CompVisSampler(funcname, model), aliases, options)
for label, funcname, aliases, options in samplers_timesteps
]
class CompVisTimestepsDenoiser(torch.nn.Module):
def __init__(self, model, *args, **kwargs):
super().__init__(*args, **kwargs)
self.inner_model = model
def forward(self, input, timesteps, **kwargs):
return self.inner_model.apply_model(input, timesteps, **kwargs)
class CompVisTimestepsVDenoiser(torch.nn.Module):
def __init__(self, model, *args, **kwargs):
super().__init__(*args, **kwargs)
self.inner_model = model
def predict_eps_from_z_and_v(self, x_t, t, v):
return self.inner_model.sqrt_alphas_cumprod[t.to(torch.int), None, None, None] * v + self.inner_model.sqrt_one_minus_alphas_cumprod[t.to(torch.int), None, None, None] * x_t
def forward(self, input, timesteps, **kwargs):
model_output = self.inner_model.apply_model(input, timesteps, **kwargs)
e_t = self.predict_eps_from_z_and_v(input, timesteps, model_output)
return e_t
class CFGDenoiserTimesteps(CFGDenoiser):
def __init__(self, model, sampler):
super().__init__(model, sampler)
self.alphas = model.inner_model.alphas_cumprod
def get_pred_x0(self, x_in, x_out, sigma):
ts = int(sigma.item())
s_in = x_in.new_ones([x_in.shape[0]])
a_t = self.alphas[ts].item() * s_in
sqrt_one_minus_at = (1 - a_t).sqrt()
pred_x0 = (x_in - sqrt_one_minus_at * x_out) / a_t.sqrt()
return pred_x0
class CompVisSampler(sd_samplers_common.Sampler):
def __init__(self, funcname, sd_model):
super().__init__(funcname)
self.eta_option_field = 'eta_ddim'
self.eta_infotext_field = 'Eta DDIM'
denoiser = CompVisTimestepsVDenoiser if sd_model.parameterization == "v" else CompVisTimestepsDenoiser
self.model_wrap = denoiser(sd_model)
self.model_wrap_cfg = CFGDenoiserTimesteps(self.model_wrap, self)
def get_timesteps(self, p, steps):
discard_next_to_last_sigma = self.config is not None and self.config.options.get('discard_next_to_last_sigma', False)
if opts.always_discard_next_to_last_sigma and not discard_next_to_last_sigma:
discard_next_to_last_sigma = True
p.extra_generation_params["Discard penultimate sigma"] = True
steps += 1 if discard_next_to_last_sigma else 0
timesteps = torch.clip(torch.asarray(list(range(0, 1000, 1000 // steps)), device=devices.device) + 1, 0, 999)
return timesteps
def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps)
timesteps = self.get_timesteps(p, steps)
timesteps_sched = timesteps[:t_enc]
alphas_cumprod = shared.sd_model.alphas_cumprod
sqrt_alpha_cumprod = torch.sqrt(alphas_cumprod[timesteps[t_enc]])
sqrt_one_minus_alpha_cumprod = torch.sqrt(1 - alphas_cumprod[timesteps[t_enc]])
xi = x * sqrt_alpha_cumprod + noise * sqrt_one_minus_alpha_cumprod
extra_params_kwargs = self.initialize(p)
parameters = inspect.signature(self.func).parameters
if 'timesteps' in parameters:
extra_params_kwargs['timesteps'] = timesteps_sched
if 'is_img2img' in parameters:
extra_params_kwargs['is_img2img'] = True
self.model_wrap_cfg.init_latent = x
self.last_latent = x
extra_args = {
'cond': conditioning,
'image_cond': image_conditioning,
'uncond': unconditional_conditioning,
'cond_scale': p.cfg_scale,
's_min_uncond': self.s_min_uncond
}
samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
if self.model_wrap_cfg.padded_cond_uncond:
p.extra_generation_params["Pad conds"] = True
return samples
def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
steps = steps or p.steps
timesteps = self.get_timesteps(p, steps)
extra_params_kwargs = self.initialize(p)
parameters = inspect.signature(self.func).parameters
if 'timesteps' in parameters:
extra_params_kwargs['timesteps'] = timesteps
self.last_latent = x
samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={
'cond': conditioning,
'image_cond': image_conditioning,
'uncond': unconditional_conditioning,
'cond_scale': p.cfg_scale,
's_min_uncond': self.s_min_uncond
}, disable=False, callback=self.callback_state, **extra_params_kwargs))
if self.model_wrap_cfg.padded_cond_uncond:
p.extra_generation_params["Pad conds"] = True
return samples

View File

@ -0,0 +1,135 @@
import torch
import tqdm
import k_diffusion.sampling
import numpy as np
from modules import shared
from modules.models.diffusion.uni_pc import uni_pc
@torch.no_grad()
def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0):
alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
alphas = alphas_cumprod[timesteps]
alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64)
sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy()))
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
for i in tqdm.trange(len(timesteps) - 1, disable=disable):
index = len(timesteps) - 1 - i
e_t = model(x, timesteps[index].item() * s_in, **extra_args)
a_t = alphas[index].item() * s_in
a_prev = alphas_prev[index].item() * s_in
sigma_t = sigmas[index].item() * s_in
sqrt_one_minus_at = sqrt_one_minus_alphas[index].item() * s_in
pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
dir_xt = (1. - a_prev - sigma_t ** 2).sqrt() * e_t
noise = sigma_t * k_diffusion.sampling.torch.randn_like(x)
x = a_prev.sqrt() * pred_x0 + dir_xt + noise
if callback is not None:
callback({'x': x, 'i': i, 'sigma': 0, 'sigma_hat': 0, 'denoised': pred_x0})
return x
@torch.no_grad()
def plms(model, x, timesteps, extra_args=None, callback=None, disable=None):
alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
alphas = alphas_cumprod[timesteps]
alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64)
sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
extra_args = {} if extra_args is None else extra_args
s_in = x.new_ones([x.shape[0]])
old_eps = []
def get_x_prev_and_pred_x0(e_t, index):
# select parameters corresponding to the currently considered timestep
a_t = alphas[index].item() * s_in
a_prev = alphas_prev[index].item() * s_in
sqrt_one_minus_at = sqrt_one_minus_alphas[index].item() * s_in
# current prediction for x_0
pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
# direction pointing to x_t
dir_xt = (1. - a_prev).sqrt() * e_t
x_prev = a_prev.sqrt() * pred_x0 + dir_xt
return x_prev, pred_x0
for i in tqdm.trange(len(timesteps) - 1, disable=disable):
index = len(timesteps) - 1 - i
ts = timesteps[index].item() * s_in
t_next = timesteps[max(index - 1, 0)].item() * s_in
e_t = model(x, ts, **extra_args)
if len(old_eps) == 0:
# Pseudo Improved Euler (2nd order)
x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t, index)
e_t_next = model(x_prev, t_next, **extra_args)
e_t_prime = (e_t + e_t_next) / 2
elif len(old_eps) == 1:
# 2nd order Pseudo Linear Multistep (Adams-Bashforth)
e_t_prime = (3 * e_t - old_eps[-1]) / 2
elif len(old_eps) == 2:
# 3nd order Pseudo Linear Multistep (Adams-Bashforth)
e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12
else:
# 4nd order Pseudo Linear Multistep (Adams-Bashforth)
e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24
x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index)
old_eps.append(e_t)
if len(old_eps) >= 4:
old_eps.pop(0)
x = x_prev
if callback is not None:
callback({'x': x, 'i': i, 'sigma': 0, 'sigma_hat': 0, 'denoised': pred_x0})
return x
class UniPCCFG(uni_pc.UniPC):
def __init__(self, cfg_model, extra_args, callback, *args, **kwargs):
super().__init__(None, *args, **kwargs)
def after_update(x, model_x):
callback({'x': x, 'i': self.index, 'sigma': 0, 'sigma_hat': 0, 'denoised': model_x})
self.index += 1
self.cfg_model = cfg_model
self.extra_args = extra_args
self.callback = callback
self.index = 0
self.after_update = after_update
def get_model_input_time(self, t_continuous):
return (t_continuous - 1. / self.noise_schedule.total_N) * 1000.
def model(self, x, t):
t_input = self.get_model_input_time(t)
res = self.cfg_model(x, t_input, **self.extra_args)
return res
def unipc(model, x, timesteps, extra_args=None, callback=None, disable=None, is_img2img=False):
alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
ns = uni_pc.NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod)
t_start = timesteps[-1] / 1000 + 1 / 1000 if is_img2img else None # this is likely off by a bit - if someone wants to fix it please by all means
unipc_sampler = UniPCCFG(model, extra_args, callback, ns, predict_x0=True, thresholding=False, variant=shared.opts.uni_pc_variant)
x = unipc_sampler.sample(x, steps=len(timesteps), t_start=t_start, skip_type=shared.opts.uni_pc_skip_type, method="multistep", order=shared.opts.uni_pc_order, lower_order_final=shared.opts.uni_pc_lower_order_final)
return x